diff options
Diffstat (limited to 'arch/x86/kernel')
167 files changed, 11607 insertions, 10943 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 38573340b143..21dc1a061bf1 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -1,9 +1,93 @@ | |||
1 | ifeq ($(CONFIG_X86_32),y) | 1 | # |
2 | include ${srctree}/arch/x86/kernel/Makefile_32 | 2 | # Makefile for the linux kernel. |
3 | else | 3 | # |
4 | include ${srctree}/arch/x86/kernel/Makefile_64 | 4 | |
5 | extra-y := head_$(BITS).o init_task.o vmlinux.lds | ||
6 | extra-$(CONFIG_X86_64) += head64.o | ||
7 | |||
8 | CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) | ||
9 | CFLAGS_vsyscall_64.o := $(PROFILING) -g0 | ||
10 | |||
11 | obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o | ||
12 | obj-y += traps_$(BITS).o irq_$(BITS).o | ||
13 | obj-y += time_$(BITS).o ioport.o ldt.o | ||
14 | obj-y += setup_$(BITS).o i8259_$(BITS).o | ||
15 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | ||
16 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | ||
17 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o | ||
18 | obj-y += pci-dma_$(BITS).o bootflag.o e820_$(BITS).o | ||
19 | obj-y += quirks.o i8237.o topology.o kdebugfs.o | ||
20 | obj-y += alternative.o i8253.o | ||
21 | obj-$(CONFIG_X86_64) += pci-nommu_64.o bugs_64.o | ||
22 | obj-y += tsc_$(BITS).o io_delay.o rtc.o | ||
23 | |||
24 | obj-y += i387.o | ||
25 | obj-y += ptrace.o | ||
26 | obj-y += ds.o | ||
27 | obj-$(CONFIG_X86_32) += tls.o | ||
28 | obj-$(CONFIG_IA32_EMULATION) += tls.o | ||
29 | obj-y += step.o | ||
30 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | ||
31 | obj-y += cpu/ | ||
32 | obj-y += acpi/ | ||
33 | obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o | ||
34 | obj-$(CONFIG_X86_64) += reboot.o | ||
35 | obj-$(CONFIG_MCA) += mca_32.o | ||
36 | obj-$(CONFIG_X86_MSR) += msr.o | ||
37 | obj-$(CONFIG_X86_CPUID) += cpuid.o | ||
38 | obj-$(CONFIG_MICROCODE) += microcode.o | ||
39 | obj-$(CONFIG_PCI) += early-quirks.o | ||
40 | apm-y := apm_32.o | ||
41 | obj-$(CONFIG_APM) += apm.o | ||
42 | obj-$(CONFIG_X86_SMP) += smp_$(BITS).o smpboot_$(BITS).o tsc_sync.o | ||
43 | obj-$(CONFIG_X86_32_SMP) += smpcommon_32.o | ||
44 | obj-$(CONFIG_X86_64_SMP) += smp_64.o smpboot_64.o tsc_sync.o | ||
45 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o | ||
46 | obj-$(CONFIG_X86_MPPARSE) += mpparse_$(BITS).o | ||
47 | obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o | ||
48 | obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o | ||
49 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o | ||
50 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o | ||
51 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o | ||
52 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o | ||
53 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o | ||
54 | obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o | ||
55 | obj-$(CONFIG_X86_VSMP) += vsmp_64.o | ||
56 | obj-$(CONFIG_KPROBES) += kprobes.o | ||
57 | obj-$(CONFIG_MODULES) += module_$(BITS).o | ||
58 | obj-$(CONFIG_ACPI_SRAT) += srat_32.o | ||
59 | obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o | ||
60 | obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o | ||
61 | obj-$(CONFIG_VM86) += vm86_32.o | ||
62 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | ||
63 | |||
64 | obj-$(CONFIG_HPET_TIMER) += hpet.o | ||
65 | |||
66 | obj-$(CONFIG_K8_NB) += k8.o | ||
67 | obj-$(CONFIG_MGEODE_LX) += geode_32.o mfgpt_32.o | ||
68 | obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o | ||
69 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o | ||
70 | |||
71 | obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o | ||
72 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o | ||
73 | |||
74 | ifdef CONFIG_INPUT_PCSPKR | ||
75 | obj-y += pcspeaker.o | ||
5 | endif | 76 | endif |
6 | 77 | ||
7 | # Workaround to delete .lds files with make clean | 78 | obj-$(CONFIG_SCx200) += scx200.o |
8 | # The problem is that we do not enter Makefile_32 with make clean. | 79 | scx200-y += scx200_32.o |
9 | clean-files := vsyscall*.lds vsyscall*.so | 80 | |
81 | ### | ||
82 | # 64 bit specific files | ||
83 | ifeq ($(CONFIG_X86_64),y) | ||
84 | obj-y += genapic_64.o genapic_flat_64.o | ||
85 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o | ||
86 | obj-$(CONFIG_AUDIT) += audit_64.o | ||
87 | obj-$(CONFIG_PM) += suspend_64.o | ||
88 | obj-$(CONFIG_HIBERNATION) += suspend_asm_64.o | ||
89 | |||
90 | obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o | ||
91 | obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o | ||
92 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o | ||
93 | endif | ||
diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32 deleted file mode 100644 index a7bc93c27662..000000000000 --- a/arch/x86/kernel/Makefile_32 +++ /dev/null | |||
@@ -1,88 +0,0 @@ | |||
1 | # | ||
2 | # Makefile for the linux kernel. | ||
3 | # | ||
4 | |||
5 | extra-y := head_32.o init_task.o vmlinux.lds | ||
6 | CPPFLAGS_vmlinux.lds += -Ui386 | ||
7 | |||
8 | obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \ | ||
9 | ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \ | ||
10 | pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\ | ||
11 | quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o | ||
12 | |||
13 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | ||
14 | obj-y += cpu/ | ||
15 | obj-y += acpi/ | ||
16 | obj-$(CONFIG_X86_BIOS_REBOOT) += reboot_32.o | ||
17 | obj-$(CONFIG_MCA) += mca_32.o | ||
18 | obj-$(CONFIG_X86_MSR) += msr.o | ||
19 | obj-$(CONFIG_X86_CPUID) += cpuid.o | ||
20 | obj-$(CONFIG_MICROCODE) += microcode.o | ||
21 | obj-$(CONFIG_PCI) += early-quirks.o | ||
22 | obj-$(CONFIG_APM) += apm_32.o | ||
23 | obj-$(CONFIG_X86_SMP) += smp_32.o smpboot_32.o tsc_sync.o | ||
24 | obj-$(CONFIG_SMP) += smpcommon_32.o | ||
25 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_32.o | ||
26 | obj-$(CONFIG_X86_MPPARSE) += mpparse_32.o | ||
27 | obj-$(CONFIG_X86_LOCAL_APIC) += apic_32.o nmi_32.o | ||
28 | obj-$(CONFIG_X86_IO_APIC) += io_apic_32.o | ||
29 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o | ||
30 | obj-$(CONFIG_KEXEC) += machine_kexec_32.o relocate_kernel_32.o crash.o | ||
31 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_32.o | ||
32 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o | ||
33 | obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o | ||
34 | obj-$(CONFIG_KPROBES) += kprobes_32.o | ||
35 | obj-$(CONFIG_MODULES) += module_32.o | ||
36 | obj-y += sysenter_32.o vsyscall_32.o | ||
37 | obj-$(CONFIG_ACPI_SRAT) += srat_32.o | ||
38 | obj-$(CONFIG_EFI) += efi_32.o efi_stub_32.o | ||
39 | obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o | ||
40 | obj-$(CONFIG_VM86) += vm86_32.o | ||
41 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | ||
42 | obj-$(CONFIG_HPET_TIMER) += hpet.o | ||
43 | obj-$(CONFIG_K8_NB) += k8.o | ||
44 | obj-$(CONFIG_MGEODE_LX) += geode_32.o mfgpt_32.o | ||
45 | |||
46 | obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o | ||
47 | obj-$(CONFIG_PARAVIRT) += paravirt_32.o | ||
48 | obj-y += pcspeaker.o | ||
49 | |||
50 | obj-$(CONFIG_SCx200) += scx200_32.o | ||
51 | |||
52 | # vsyscall_32.o contains the vsyscall DSO images as __initdata. | ||
53 | # We must build both images before we can assemble it. | ||
54 | # Note: kbuild does not track this dependency due to usage of .incbin | ||
55 | $(obj)/vsyscall_32.o: $(obj)/vsyscall-int80_32.so $(obj)/vsyscall-sysenter_32.so | ||
56 | targets += $(foreach F,int80 sysenter,vsyscall-$F_32.o vsyscall-$F_32.so) | ||
57 | targets += vsyscall-note_32.o vsyscall_32.lds | ||
58 | |||
59 | # The DSO images are built using a special linker script. | ||
60 | quiet_cmd_syscall = SYSCALL $@ | ||
61 | cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \ | ||
62 | -Wl,-T,$(filter-out FORCE,$^) -o $@ | ||
63 | |||
64 | export CPPFLAGS_vsyscall_32.lds += -P -C -Ui386 | ||
65 | |||
66 | vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1 \ | ||
67 | $(call ld-option, -Wl$(comma)--hash-style=sysv) | ||
68 | SYSCFLAGS_vsyscall-sysenter_32.so = $(vsyscall-flags) | ||
69 | SYSCFLAGS_vsyscall-int80_32.so = $(vsyscall-flags) | ||
70 | |||
71 | $(obj)/vsyscall-int80_32.so $(obj)/vsyscall-sysenter_32.so: \ | ||
72 | $(obj)/vsyscall-%.so: $(src)/vsyscall_32.lds \ | ||
73 | $(obj)/vsyscall-%.o $(obj)/vsyscall-note_32.o FORCE | ||
74 | $(call if_changed,syscall) | ||
75 | |||
76 | # We also create a special relocatable object that should mirror the symbol | ||
77 | # table and layout of the linked DSO. With ld -R we can then refer to | ||
78 | # these symbols in the kernel code rather than hand-coded addresses. | ||
79 | extra-y += vsyscall-syms.o | ||
80 | $(obj)/built-in.o: $(obj)/vsyscall-syms.o | ||
81 | $(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o | ||
82 | |||
83 | SYSCFLAGS_vsyscall-syms.o = -r | ||
84 | $(obj)/vsyscall-syms.o: $(src)/vsyscall_32.lds \ | ||
85 | $(obj)/vsyscall-sysenter_32.o $(obj)/vsyscall-note_32.o FORCE | ||
86 | $(call if_changed,syscall) | ||
87 | |||
88 | |||
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64 deleted file mode 100644 index 5a88890d8ee9..000000000000 --- a/arch/x86/kernel/Makefile_64 +++ /dev/null | |||
@@ -1,45 +0,0 @@ | |||
1 | # | ||
2 | # Makefile for the linux kernel. | ||
3 | # | ||
4 | |||
5 | extra-y := head_64.o head64.o init_task.o vmlinux.lds | ||
6 | CPPFLAGS_vmlinux.lds += -Ux86_64 | ||
7 | EXTRA_AFLAGS := -traditional | ||
8 | |||
9 | obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \ | ||
10 | ptrace_64.o time_64.o ioport_64.o ldt_64.o setup_64.o i8259_64.o sys_x86_64.o \ | ||
11 | x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \ | ||
12 | setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \ | ||
13 | pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \ | ||
14 | i8253.o | ||
15 | |||
16 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | ||
17 | obj-y += cpu/ | ||
18 | obj-y += acpi/ | ||
19 | obj-$(CONFIG_X86_MSR) += msr.o | ||
20 | obj-$(CONFIG_MICROCODE) += microcode.o | ||
21 | obj-$(CONFIG_X86_CPUID) += cpuid.o | ||
22 | obj-$(CONFIG_SMP) += smp_64.o smpboot_64.o trampoline_64.o tsc_sync.o | ||
23 | obj-y += apic_64.o nmi_64.o | ||
24 | obj-y += io_apic_64.o mpparse_64.o genapic_64.o genapic_flat_64.o | ||
25 | obj-$(CONFIG_KEXEC) += machine_kexec_64.o relocate_kernel_64.o crash.o | ||
26 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_64.o | ||
27 | obj-$(CONFIG_PM) += suspend_64.o | ||
28 | obj-$(CONFIG_HIBERNATION) += suspend_asm_64.o | ||
29 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | ||
30 | obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o | ||
31 | obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o | ||
32 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o | ||
33 | obj-$(CONFIG_KPROBES) += kprobes_64.o | ||
34 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o | ||
35 | obj-$(CONFIG_X86_VSMP) += vsmp_64.o | ||
36 | obj-$(CONFIG_K8_NB) += k8.o | ||
37 | obj-$(CONFIG_AUDIT) += audit_64.o | ||
38 | |||
39 | obj-$(CONFIG_MODULES) += module_64.o | ||
40 | obj-$(CONFIG_PCI) += early-quirks.o | ||
41 | |||
42 | obj-y += topology.o | ||
43 | obj-y += pcspeaker.o | ||
44 | |||
45 | CFLAGS_vsyscall_64.o := $(PROFILING) -g0 | ||
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index 1351c3982ee4..19d3d6e9d09b 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile | |||
@@ -1,5 +1,5 @@ | |||
1 | obj-$(CONFIG_ACPI) += boot.o | 1 | obj-$(CONFIG_ACPI) += boot.o |
2 | obj-$(CONFIG_ACPI_SLEEP) += sleep_$(BITS).o wakeup_$(BITS).o | 2 | obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o |
3 | 3 | ||
4 | ifneq ($(CONFIG_ACPI_PROCESSOR),) | 4 | ifneq ($(CONFIG_ACPI_PROCESSOR),) |
5 | obj-y += cstate.o processor.o | 5 | obj-y += cstate.o processor.o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 0ca27c7b0e8d..fc8825d4b996 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -496,7 +496,8 @@ EXPORT_SYMBOL(acpi_register_gsi); | |||
496 | * ACPI based hotplug support for CPU | 496 | * ACPI based hotplug support for CPU |
497 | */ | 497 | */ |
498 | #ifdef CONFIG_ACPI_HOTPLUG_CPU | 498 | #ifdef CONFIG_ACPI_HOTPLUG_CPU |
499 | int acpi_map_lsapic(acpi_handle handle, int *pcpu) | 499 | |
500 | static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) | ||
500 | { | 501 | { |
501 | struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; | 502 | struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; |
502 | union acpi_object *obj; | 503 | union acpi_object *obj; |
@@ -551,6 +552,11 @@ int acpi_map_lsapic(acpi_handle handle, int *pcpu) | |||
551 | return 0; | 552 | return 0; |
552 | } | 553 | } |
553 | 554 | ||
555 | /* wrapper to silence section mismatch warning */ | ||
556 | int __ref acpi_map_lsapic(acpi_handle handle, int *pcpu) | ||
557 | { | ||
558 | return _acpi_map_lsapic(handle, pcpu); | ||
559 | } | ||
554 | EXPORT_SYMBOL(acpi_map_lsapic); | 560 | EXPORT_SYMBOL(acpi_map_lsapic); |
555 | 561 | ||
556 | int acpi_unmap_lsapic(int cpu) | 562 | int acpi_unmap_lsapic(int cpu) |
@@ -581,25 +587,6 @@ int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base) | |||
581 | 587 | ||
582 | EXPORT_SYMBOL(acpi_unregister_ioapic); | 588 | EXPORT_SYMBOL(acpi_unregister_ioapic); |
583 | 589 | ||
584 | static unsigned long __init | ||
585 | acpi_scan_rsdp(unsigned long start, unsigned long length) | ||
586 | { | ||
587 | unsigned long offset = 0; | ||
588 | unsigned long sig_len = sizeof("RSD PTR ") - 1; | ||
589 | |||
590 | /* | ||
591 | * Scan all 16-byte boundaries of the physical memory region for the | ||
592 | * RSDP signature. | ||
593 | */ | ||
594 | for (offset = 0; offset < length; offset += 16) { | ||
595 | if (strncmp((char *)(phys_to_virt(start) + offset), "RSD PTR ", sig_len)) | ||
596 | continue; | ||
597 | return (start + offset); | ||
598 | } | ||
599 | |||
600 | return 0; | ||
601 | } | ||
602 | |||
603 | static int __init acpi_parse_sbf(struct acpi_table_header *table) | 590 | static int __init acpi_parse_sbf(struct acpi_table_header *table) |
604 | { | 591 | { |
605 | struct acpi_table_boot *sb; | 592 | struct acpi_table_boot *sb; |
@@ -742,27 +729,6 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table) | |||
742 | return 0; | 729 | return 0; |
743 | } | 730 | } |
744 | 731 | ||
745 | unsigned long __init acpi_find_rsdp(void) | ||
746 | { | ||
747 | unsigned long rsdp_phys = 0; | ||
748 | |||
749 | if (efi_enabled) { | ||
750 | if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) | ||
751 | return efi.acpi20; | ||
752 | else if (efi.acpi != EFI_INVALID_TABLE_ADDR) | ||
753 | return efi.acpi; | ||
754 | } | ||
755 | /* | ||
756 | * Scan memory looking for the RSDP signature. First search EBDA (low | ||
757 | * memory) paragraphs and then search upper memory (E0000-FFFFF). | ||
758 | */ | ||
759 | rsdp_phys = acpi_scan_rsdp(0, 0x400); | ||
760 | if (!rsdp_phys) | ||
761 | rsdp_phys = acpi_scan_rsdp(0xE0000, 0x20000); | ||
762 | |||
763 | return rsdp_phys; | ||
764 | } | ||
765 | |||
766 | #ifdef CONFIG_X86_LOCAL_APIC | 732 | #ifdef CONFIG_X86_LOCAL_APIC |
767 | /* | 733 | /* |
768 | * Parse LAPIC entries in MADT | 734 | * Parse LAPIC entries in MADT |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c new file mode 100644 index 000000000000..6bc815cd8cb3 --- /dev/null +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -0,0 +1,87 @@ | |||
1 | /* | ||
2 | * sleep.c - x86-specific ACPI sleep support. | ||
3 | * | ||
4 | * Copyright (C) 2001-2003 Patrick Mochel | ||
5 | * Copyright (C) 2001-2003 Pavel Machek <pavel@suse.cz> | ||
6 | */ | ||
7 | |||
8 | #include <linux/acpi.h> | ||
9 | #include <linux/bootmem.h> | ||
10 | #include <linux/dmi.h> | ||
11 | #include <linux/cpumask.h> | ||
12 | |||
13 | #include <asm/smp.h> | ||
14 | |||
15 | /* address in low memory of the wakeup routine. */ | ||
16 | unsigned long acpi_wakeup_address = 0; | ||
17 | unsigned long acpi_realmode_flags; | ||
18 | extern char wakeup_start, wakeup_end; | ||
19 | |||
20 | extern unsigned long acpi_copy_wakeup_routine(unsigned long); | ||
21 | |||
22 | /** | ||
23 | * acpi_save_state_mem - save kernel state | ||
24 | * | ||
25 | * Create an identity mapped page table and copy the wakeup routine to | ||
26 | * low memory. | ||
27 | */ | ||
28 | int acpi_save_state_mem(void) | ||
29 | { | ||
30 | if (!acpi_wakeup_address) { | ||
31 | printk(KERN_ERR "Could not allocate memory during boot, S3 disabled\n"); | ||
32 | return -ENOMEM; | ||
33 | } | ||
34 | memcpy((void *)acpi_wakeup_address, &wakeup_start, | ||
35 | &wakeup_end - &wakeup_start); | ||
36 | acpi_copy_wakeup_routine(acpi_wakeup_address); | ||
37 | |||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | /* | ||
42 | * acpi_restore_state - undo effects of acpi_save_state_mem | ||
43 | */ | ||
44 | void acpi_restore_state_mem(void) | ||
45 | { | ||
46 | } | ||
47 | |||
48 | |||
49 | /** | ||
50 | * acpi_reserve_bootmem - do _very_ early ACPI initialisation | ||
51 | * | ||
52 | * We allocate a page from the first 1MB of memory for the wakeup | ||
53 | * routine for when we come back from a sleep state. The | ||
54 | * runtime allocator allows specification of <16MB pages, but not | ||
55 | * <1MB pages. | ||
56 | */ | ||
57 | void __init acpi_reserve_bootmem(void) | ||
58 | { | ||
59 | if ((&wakeup_end - &wakeup_start) > PAGE_SIZE*2) { | ||
60 | printk(KERN_ERR | ||
61 | "ACPI: Wakeup code way too big, S3 disabled.\n"); | ||
62 | return; | ||
63 | } | ||
64 | |||
65 | acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2); | ||
66 | if (!acpi_wakeup_address) | ||
67 | printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); | ||
68 | } | ||
69 | |||
70 | |||
71 | static int __init acpi_sleep_setup(char *str) | ||
72 | { | ||
73 | while ((str != NULL) && (*str != '\0')) { | ||
74 | if (strncmp(str, "s3_bios", 7) == 0) | ||
75 | acpi_realmode_flags |= 1; | ||
76 | if (strncmp(str, "s3_mode", 7) == 0) | ||
77 | acpi_realmode_flags |= 2; | ||
78 | if (strncmp(str, "s3_beep", 7) == 0) | ||
79 | acpi_realmode_flags |= 4; | ||
80 | str = strchr(str, ','); | ||
81 | if (str != NULL) | ||
82 | str += strspn(str, ", \t"); | ||
83 | } | ||
84 | return 1; | ||
85 | } | ||
86 | |||
87 | __setup("acpi_sleep=", acpi_sleep_setup); | ||
diff --git a/arch/x86/kernel/acpi/sleep_32.c b/arch/x86/kernel/acpi/sleep_32.c index 10699489cfe7..63fe5525e026 100644 --- a/arch/x86/kernel/acpi/sleep_32.c +++ b/arch/x86/kernel/acpi/sleep_32.c | |||
@@ -12,76 +12,6 @@ | |||
12 | 12 | ||
13 | #include <asm/smp.h> | 13 | #include <asm/smp.h> |
14 | 14 | ||
15 | /* address in low memory of the wakeup routine. */ | ||
16 | unsigned long acpi_wakeup_address = 0; | ||
17 | unsigned long acpi_realmode_flags; | ||
18 | extern char wakeup_start, wakeup_end; | ||
19 | |||
20 | extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); | ||
21 | |||
22 | /** | ||
23 | * acpi_save_state_mem - save kernel state | ||
24 | * | ||
25 | * Create an identity mapped page table and copy the wakeup routine to | ||
26 | * low memory. | ||
27 | */ | ||
28 | int acpi_save_state_mem(void) | ||
29 | { | ||
30 | if (!acpi_wakeup_address) | ||
31 | return 1; | ||
32 | memcpy((void *)acpi_wakeup_address, &wakeup_start, | ||
33 | &wakeup_end - &wakeup_start); | ||
34 | acpi_copy_wakeup_routine(acpi_wakeup_address); | ||
35 | |||
36 | return 0; | ||
37 | } | ||
38 | |||
39 | /* | ||
40 | * acpi_restore_state - undo effects of acpi_save_state_mem | ||
41 | */ | ||
42 | void acpi_restore_state_mem(void) | ||
43 | { | ||
44 | } | ||
45 | |||
46 | /** | ||
47 | * acpi_reserve_bootmem - do _very_ early ACPI initialisation | ||
48 | * | ||
49 | * We allocate a page from the first 1MB of memory for the wakeup | ||
50 | * routine for when we come back from a sleep state. The | ||
51 | * runtime allocator allows specification of <16MB pages, but not | ||
52 | * <1MB pages. | ||
53 | */ | ||
54 | void __init acpi_reserve_bootmem(void) | ||
55 | { | ||
56 | if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) { | ||
57 | printk(KERN_ERR | ||
58 | "ACPI: Wakeup code way too big, S3 disabled.\n"); | ||
59 | return; | ||
60 | } | ||
61 | |||
62 | acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); | ||
63 | if (!acpi_wakeup_address) | ||
64 | printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); | ||
65 | } | ||
66 | |||
67 | static int __init acpi_sleep_setup(char *str) | ||
68 | { | ||
69 | while ((str != NULL) && (*str != '\0')) { | ||
70 | if (strncmp(str, "s3_bios", 7) == 0) | ||
71 | acpi_realmode_flags |= 1; | ||
72 | if (strncmp(str, "s3_mode", 7) == 0) | ||
73 | acpi_realmode_flags |= 2; | ||
74 | if (strncmp(str, "s3_beep", 7) == 0) | ||
75 | acpi_realmode_flags |= 4; | ||
76 | str = strchr(str, ','); | ||
77 | if (str != NULL) | ||
78 | str += strspn(str, ", \t"); | ||
79 | } | ||
80 | return 1; | ||
81 | } | ||
82 | |||
83 | __setup("acpi_sleep=", acpi_sleep_setup); | ||
84 | |||
85 | /* Ouch, we want to delete this. We already have better version in userspace, in | 15 | /* Ouch, we want to delete this. We already have better version in userspace, in |
86 | s2ram from suspend.sf.net project */ | 16 | s2ram from suspend.sf.net project */ |
87 | static __init int reset_videomode_after_s3(const struct dmi_system_id *d) | 17 | static __init int reset_videomode_after_s3(const struct dmi_system_id *d) |
diff --git a/arch/x86/kernel/acpi/sleep_64.c b/arch/x86/kernel/acpi/sleep_64.c deleted file mode 100644 index da42de261ba8..000000000000 --- a/arch/x86/kernel/acpi/sleep_64.c +++ /dev/null | |||
@@ -1,117 +0,0 @@ | |||
1 | /* | ||
2 | * acpi.c - Architecture-Specific Low-Level ACPI Support | ||
3 | * | ||
4 | * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> | ||
5 | * Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com> | ||
6 | * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org> | ||
7 | * Copyright (C) 2002 Andi Kleen, SuSE Labs (x86-64 port) | ||
8 | * Copyright (C) 2003 Pavel Machek, SuSE Labs | ||
9 | * | ||
10 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or modify | ||
13 | * it under the terms of the GNU General Public License as published by | ||
14 | * the Free Software Foundation; either version 2 of the License, or | ||
15 | * (at your option) any later version. | ||
16 | * | ||
17 | * This program is distributed in the hope that it will be useful, | ||
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
20 | * GNU General Public License for more details. | ||
21 | * | ||
22 | * You should have received a copy of the GNU General Public License | ||
23 | * along with this program; if not, write to the Free Software | ||
24 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
25 | * | ||
26 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
27 | */ | ||
28 | |||
29 | #include <linux/kernel.h> | ||
30 | #include <linux/init.h> | ||
31 | #include <linux/types.h> | ||
32 | #include <linux/stddef.h> | ||
33 | #include <linux/slab.h> | ||
34 | #include <linux/pci.h> | ||
35 | #include <linux/bootmem.h> | ||
36 | #include <linux/acpi.h> | ||
37 | #include <linux/cpumask.h> | ||
38 | |||
39 | #include <asm/mpspec.h> | ||
40 | #include <asm/io.h> | ||
41 | #include <asm/apic.h> | ||
42 | #include <asm/apicdef.h> | ||
43 | #include <asm/page.h> | ||
44 | #include <asm/pgtable.h> | ||
45 | #include <asm/pgalloc.h> | ||
46 | #include <asm/io_apic.h> | ||
47 | #include <asm/proto.h> | ||
48 | #include <asm/tlbflush.h> | ||
49 | |||
50 | /* -------------------------------------------------------------------------- | ||
51 | Low-Level Sleep Support | ||
52 | -------------------------------------------------------------------------- */ | ||
53 | |||
54 | /* address in low memory of the wakeup routine. */ | ||
55 | unsigned long acpi_wakeup_address = 0; | ||
56 | unsigned long acpi_realmode_flags; | ||
57 | extern char wakeup_start, wakeup_end; | ||
58 | |||
59 | extern unsigned long acpi_copy_wakeup_routine(unsigned long); | ||
60 | |||
61 | /** | ||
62 | * acpi_save_state_mem - save kernel state | ||
63 | * | ||
64 | * Create an identity mapped page table and copy the wakeup routine to | ||
65 | * low memory. | ||
66 | */ | ||
67 | int acpi_save_state_mem(void) | ||
68 | { | ||
69 | memcpy((void *)acpi_wakeup_address, &wakeup_start, | ||
70 | &wakeup_end - &wakeup_start); | ||
71 | acpi_copy_wakeup_routine(acpi_wakeup_address); | ||
72 | |||
73 | return 0; | ||
74 | } | ||
75 | |||
76 | /* | ||
77 | * acpi_restore_state | ||
78 | */ | ||
79 | void acpi_restore_state_mem(void) | ||
80 | { | ||
81 | } | ||
82 | |||
83 | /** | ||
84 | * acpi_reserve_bootmem - do _very_ early ACPI initialisation | ||
85 | * | ||
86 | * We allocate a page in low memory for the wakeup | ||
87 | * routine for when we come back from a sleep state. The | ||
88 | * runtime allocator allows specification of <16M pages, but not | ||
89 | * <1M pages. | ||
90 | */ | ||
91 | void __init acpi_reserve_bootmem(void) | ||
92 | { | ||
93 | acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2); | ||
94 | if ((&wakeup_end - &wakeup_start) > (PAGE_SIZE*2)) | ||
95 | printk(KERN_CRIT | ||
96 | "ACPI: Wakeup code way too big, will crash on attempt" | ||
97 | " to suspend\n"); | ||
98 | } | ||
99 | |||
100 | static int __init acpi_sleep_setup(char *str) | ||
101 | { | ||
102 | while ((str != NULL) && (*str != '\0')) { | ||
103 | if (strncmp(str, "s3_bios", 7) == 0) | ||
104 | acpi_realmode_flags |= 1; | ||
105 | if (strncmp(str, "s3_mode", 7) == 0) | ||
106 | acpi_realmode_flags |= 2; | ||
107 | if (strncmp(str, "s3_beep", 7) == 0) | ||
108 | acpi_realmode_flags |= 4; | ||
109 | str = strchr(str, ','); | ||
110 | if (str != NULL) | ||
111 | str += strspn(str, ", \t"); | ||
112 | } | ||
113 | return 1; | ||
114 | } | ||
115 | |||
116 | __setup("acpi_sleep=", acpi_sleep_setup); | ||
117 | |||
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index 1e931aaf2ef6..f53e3277f8e5 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S | |||
@@ -1,4 +1,4 @@ | |||
1 | .text | 1 | .section .text.page_aligned |
2 | #include <linux/linkage.h> | 2 | #include <linux/linkage.h> |
3 | #include <asm/segment.h> | 3 | #include <asm/segment.h> |
4 | #include <asm/page.h> | 4 | #include <asm/page.h> |
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index 5ed3bc5c61d7..2e1b9e0d0767 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S | |||
@@ -344,13 +344,13 @@ do_suspend_lowlevel: | |||
344 | call save_processor_state | 344 | call save_processor_state |
345 | 345 | ||
346 | movq $saved_context, %rax | 346 | movq $saved_context, %rax |
347 | movq %rsp, pt_regs_rsp(%rax) | 347 | movq %rsp, pt_regs_sp(%rax) |
348 | movq %rbp, pt_regs_rbp(%rax) | 348 | movq %rbp, pt_regs_bp(%rax) |
349 | movq %rsi, pt_regs_rsi(%rax) | 349 | movq %rsi, pt_regs_si(%rax) |
350 | movq %rdi, pt_regs_rdi(%rax) | 350 | movq %rdi, pt_regs_di(%rax) |
351 | movq %rbx, pt_regs_rbx(%rax) | 351 | movq %rbx, pt_regs_bx(%rax) |
352 | movq %rcx, pt_regs_rcx(%rax) | 352 | movq %rcx, pt_regs_cx(%rax) |
353 | movq %rdx, pt_regs_rdx(%rax) | 353 | movq %rdx, pt_regs_dx(%rax) |
354 | movq %r8, pt_regs_r8(%rax) | 354 | movq %r8, pt_regs_r8(%rax) |
355 | movq %r9, pt_regs_r9(%rax) | 355 | movq %r9, pt_regs_r9(%rax) |
356 | movq %r10, pt_regs_r10(%rax) | 356 | movq %r10, pt_regs_r10(%rax) |
@@ -360,7 +360,7 @@ do_suspend_lowlevel: | |||
360 | movq %r14, pt_regs_r14(%rax) | 360 | movq %r14, pt_regs_r14(%rax) |
361 | movq %r15, pt_regs_r15(%rax) | 361 | movq %r15, pt_regs_r15(%rax) |
362 | pushfq | 362 | pushfq |
363 | popq pt_regs_eflags(%rax) | 363 | popq pt_regs_flags(%rax) |
364 | 364 | ||
365 | movq $.L97, saved_rip(%rip) | 365 | movq $.L97, saved_rip(%rip) |
366 | 366 | ||
@@ -391,15 +391,15 @@ do_suspend_lowlevel: | |||
391 | movq %rbx, %cr2 | 391 | movq %rbx, %cr2 |
392 | movq saved_context_cr0(%rax), %rbx | 392 | movq saved_context_cr0(%rax), %rbx |
393 | movq %rbx, %cr0 | 393 | movq %rbx, %cr0 |
394 | pushq pt_regs_eflags(%rax) | 394 | pushq pt_regs_flags(%rax) |
395 | popfq | 395 | popfq |
396 | movq pt_regs_rsp(%rax), %rsp | 396 | movq pt_regs_sp(%rax), %rsp |
397 | movq pt_regs_rbp(%rax), %rbp | 397 | movq pt_regs_bp(%rax), %rbp |
398 | movq pt_regs_rsi(%rax), %rsi | 398 | movq pt_regs_si(%rax), %rsi |
399 | movq pt_regs_rdi(%rax), %rdi | 399 | movq pt_regs_di(%rax), %rdi |
400 | movq pt_regs_rbx(%rax), %rbx | 400 | movq pt_regs_bx(%rax), %rbx |
401 | movq pt_regs_rcx(%rax), %rcx | 401 | movq pt_regs_cx(%rax), %rcx |
402 | movq pt_regs_rdx(%rax), %rdx | 402 | movq pt_regs_dx(%rax), %rdx |
403 | movq pt_regs_r8(%rax), %r8 | 403 | movq pt_regs_r8(%rax), %r8 |
404 | movq pt_regs_r9(%rax), %r9 | 404 | movq pt_regs_r9(%rax), %r9 |
405 | movq pt_regs_r10(%rax), %r10 | 405 | movq pt_regs_r10(%rax), %r10 |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index d6405e0842b5..45d79ea890ae 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -273,6 +273,7 @@ struct smp_alt_module { | |||
273 | }; | 273 | }; |
274 | static LIST_HEAD(smp_alt_modules); | 274 | static LIST_HEAD(smp_alt_modules); |
275 | static DEFINE_SPINLOCK(smp_alt); | 275 | static DEFINE_SPINLOCK(smp_alt); |
276 | static int smp_mode = 1; /* protected by smp_alt */ | ||
276 | 277 | ||
277 | void alternatives_smp_module_add(struct module *mod, char *name, | 278 | void alternatives_smp_module_add(struct module *mod, char *name, |
278 | void *locks, void *locks_end, | 279 | void *locks, void *locks_end, |
@@ -341,12 +342,13 @@ void alternatives_smp_switch(int smp) | |||
341 | 342 | ||
342 | #ifdef CONFIG_LOCKDEP | 343 | #ifdef CONFIG_LOCKDEP |
343 | /* | 344 | /* |
344 | * A not yet fixed binutils section handling bug prevents | 345 | * Older binutils section handling bug prevented |
345 | * alternatives-replacement from working reliably, so turn | 346 | * alternatives-replacement from working reliably. |
346 | * it off: | 347 | * |
348 | * If this still occurs then you should see a hang | ||
349 | * or crash shortly after this line: | ||
347 | */ | 350 | */ |
348 | printk("lockdep: not fixing up alternatives.\n"); | 351 | printk("lockdep: fixing up alternatives.\n"); |
349 | return; | ||
350 | #endif | 352 | #endif |
351 | 353 | ||
352 | if (noreplace_smp || smp_alt_once) | 354 | if (noreplace_smp || smp_alt_once) |
@@ -354,21 +356,29 @@ void alternatives_smp_switch(int smp) | |||
354 | BUG_ON(!smp && (num_online_cpus() > 1)); | 356 | BUG_ON(!smp && (num_online_cpus() > 1)); |
355 | 357 | ||
356 | spin_lock_irqsave(&smp_alt, flags); | 358 | spin_lock_irqsave(&smp_alt, flags); |
357 | if (smp) { | 359 | |
360 | /* | ||
361 | * Avoid unnecessary switches because it forces JIT based VMs to | ||
362 | * throw away all cached translations, which can be quite costly. | ||
363 | */ | ||
364 | if (smp == smp_mode) { | ||
365 | /* nothing */ | ||
366 | } else if (smp) { | ||
358 | printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); | 367 | printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); |
359 | clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); | 368 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); |
360 | clear_bit(X86_FEATURE_UP, cpu_data(0).x86_capability); | 369 | clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); |
361 | list_for_each_entry(mod, &smp_alt_modules, next) | 370 | list_for_each_entry(mod, &smp_alt_modules, next) |
362 | alternatives_smp_lock(mod->locks, mod->locks_end, | 371 | alternatives_smp_lock(mod->locks, mod->locks_end, |
363 | mod->text, mod->text_end); | 372 | mod->text, mod->text_end); |
364 | } else { | 373 | } else { |
365 | printk(KERN_INFO "SMP alternatives: switching to UP code\n"); | 374 | printk(KERN_INFO "SMP alternatives: switching to UP code\n"); |
366 | set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); | 375 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); |
367 | set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability); | 376 | set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); |
368 | list_for_each_entry(mod, &smp_alt_modules, next) | 377 | list_for_each_entry(mod, &smp_alt_modules, next) |
369 | alternatives_smp_unlock(mod->locks, mod->locks_end, | 378 | alternatives_smp_unlock(mod->locks, mod->locks_end, |
370 | mod->text, mod->text_end); | 379 | mod->text, mod->text_end); |
371 | } | 380 | } |
381 | smp_mode = smp; | ||
372 | spin_unlock_irqrestore(&smp_alt, flags); | 382 | spin_unlock_irqrestore(&smp_alt, flags); |
373 | } | 383 | } |
374 | 384 | ||
@@ -431,8 +441,9 @@ void __init alternative_instructions(void) | |||
431 | if (smp_alt_once) { | 441 | if (smp_alt_once) { |
432 | if (1 == num_possible_cpus()) { | 442 | if (1 == num_possible_cpus()) { |
433 | printk(KERN_INFO "SMP alternatives: switching to UP code\n"); | 443 | printk(KERN_INFO "SMP alternatives: switching to UP code\n"); |
434 | set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); | 444 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); |
435 | set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability); | 445 | set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); |
446 | |||
436 | alternatives_smp_unlock(__smp_locks, __smp_locks_end, | 447 | alternatives_smp_unlock(__smp_locks, __smp_locks_end, |
437 | _text, _etext); | 448 | _text, _etext); |
438 | } | 449 | } |
@@ -440,7 +451,10 @@ void __init alternative_instructions(void) | |||
440 | alternatives_smp_module_add(NULL, "core kernel", | 451 | alternatives_smp_module_add(NULL, "core kernel", |
441 | __smp_locks, __smp_locks_end, | 452 | __smp_locks, __smp_locks_end, |
442 | _text, _etext); | 453 | _text, _etext); |
443 | alternatives_smp_switch(0); | 454 | |
455 | /* Only switch to UP mode if we don't immediately boot others */ | ||
456 | if (num_possible_cpus() == 1 || setup_max_cpus <= 1) | ||
457 | alternatives_smp_switch(0); | ||
444 | } | 458 | } |
445 | #endif | 459 | #endif |
446 | apply_paravirt(__parainstructions, __parainstructions_end); | 460 | apply_paravirt(__parainstructions, __parainstructions_end); |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 5b6992799c9d..608152a2a05e 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -1,12 +1,12 @@ | |||
1 | /* | 1 | /* |
2 | * Firmware replacement code. | 2 | * Firmware replacement code. |
3 | * | 3 | * |
4 | * Work around broken BIOSes that don't set an aperture or only set the | 4 | * Work around broken BIOSes that don't set an aperture or only set the |
5 | * aperture in the AGP bridge. | 5 | * aperture in the AGP bridge. |
6 | * If all fails map the aperture over some low memory. This is cheaper than | 6 | * If all fails map the aperture over some low memory. This is cheaper than |
7 | * doing bounce buffering. The memory is lost. This is done at early boot | 7 | * doing bounce buffering. The memory is lost. This is done at early boot |
8 | * because only the bootmem allocator can allocate 32+MB. | 8 | * because only the bootmem allocator can allocate 32+MB. |
9 | * | 9 | * |
10 | * Copyright 2002 Andi Kleen, SuSE Labs. | 10 | * Copyright 2002 Andi Kleen, SuSE Labs. |
11 | */ | 11 | */ |
12 | #include <linux/kernel.h> | 12 | #include <linux/kernel.h> |
@@ -30,7 +30,7 @@ int gart_iommu_aperture_disabled __initdata = 0; | |||
30 | int gart_iommu_aperture_allowed __initdata = 0; | 30 | int gart_iommu_aperture_allowed __initdata = 0; |
31 | 31 | ||
32 | int fallback_aper_order __initdata = 1; /* 64MB */ | 32 | int fallback_aper_order __initdata = 1; /* 64MB */ |
33 | int fallback_aper_force __initdata = 0; | 33 | int fallback_aper_force __initdata = 0; |
34 | 34 | ||
35 | int fix_aperture __initdata = 1; | 35 | int fix_aperture __initdata = 1; |
36 | 36 | ||
@@ -49,167 +49,270 @@ static void __init insert_aperture_resource(u32 aper_base, u32 aper_size) | |||
49 | /* This code runs before the PCI subsystem is initialized, so just | 49 | /* This code runs before the PCI subsystem is initialized, so just |
50 | access the northbridge directly. */ | 50 | access the northbridge directly. */ |
51 | 51 | ||
52 | static u32 __init allocate_aperture(void) | 52 | static u32 __init allocate_aperture(void) |
53 | { | 53 | { |
54 | u32 aper_size; | 54 | u32 aper_size; |
55 | void *p; | 55 | void *p; |
56 | 56 | ||
57 | if (fallback_aper_order > 7) | 57 | if (fallback_aper_order > 7) |
58 | fallback_aper_order = 7; | 58 | fallback_aper_order = 7; |
59 | aper_size = (32 * 1024 * 1024) << fallback_aper_order; | 59 | aper_size = (32 * 1024 * 1024) << fallback_aper_order; |
60 | 60 | ||
61 | /* | 61 | /* |
62 | * Aperture has to be naturally aligned. This means an 2GB aperture won't | 62 | * Aperture has to be naturally aligned. This means a 2GB aperture |
63 | * have much chance of finding a place in the lower 4GB of memory. | 63 | * won't have much chance of finding a place in the lower 4GB of |
64 | * Unfortunately we cannot move it up because that would make the | 64 | * memory. Unfortunately we cannot move it up because that would |
65 | * IOMMU useless. | 65 | * make the IOMMU useless. |
66 | */ | 66 | */ |
67 | p = __alloc_bootmem_nopanic(aper_size, aper_size, 0); | 67 | p = __alloc_bootmem_nopanic(aper_size, aper_size, 0); |
68 | if (!p || __pa(p)+aper_size > 0xffffffff) { | 68 | if (!p || __pa(p)+aper_size > 0xffffffff) { |
69 | printk("Cannot allocate aperture memory hole (%p,%uK)\n", | 69 | printk(KERN_ERR |
70 | p, aper_size>>10); | 70 | "Cannot allocate aperture memory hole (%p,%uK)\n", |
71 | p, aper_size>>10); | ||
71 | if (p) | 72 | if (p) |
72 | free_bootmem(__pa(p), aper_size); | 73 | free_bootmem(__pa(p), aper_size); |
73 | return 0; | 74 | return 0; |
74 | } | 75 | } |
75 | printk("Mapping aperture over %d KB of RAM @ %lx\n", | 76 | printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", |
76 | aper_size >> 10, __pa(p)); | 77 | aper_size >> 10, __pa(p)); |
77 | insert_aperture_resource((u32)__pa(p), aper_size); | 78 | insert_aperture_resource((u32)__pa(p), aper_size); |
78 | return (u32)__pa(p); | 79 | |
80 | return (u32)__pa(p); | ||
79 | } | 81 | } |
80 | 82 | ||
81 | static int __init aperture_valid(u64 aper_base, u32 aper_size) | 83 | static int __init aperture_valid(u64 aper_base, u32 aper_size) |
82 | { | 84 | { |
83 | if (!aper_base) | 85 | if (!aper_base) |
84 | return 0; | ||
85 | if (aper_size < 64*1024*1024) { | ||
86 | printk("Aperture too small (%d MB)\n", aper_size>>20); | ||
87 | return 0; | 86 | return 0; |
88 | } | 87 | |
89 | if (aper_base + aper_size > 0x100000000UL) { | 88 | if (aper_base + aper_size > 0x100000000UL) { |
90 | printk("Aperture beyond 4GB. Ignoring.\n"); | 89 | printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n"); |
91 | return 0; | 90 | return 0; |
92 | } | 91 | } |
93 | if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { | 92 | if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { |
94 | printk("Aperture pointing to e820 RAM. Ignoring.\n"); | 93 | printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n"); |
95 | return 0; | 94 | return 0; |
96 | } | 95 | } |
96 | if (aper_size < 64*1024*1024) { | ||
97 | printk(KERN_ERR "Aperture too small (%d MB)\n", aper_size>>20); | ||
98 | return 0; | ||
99 | } | ||
100 | |||
97 | return 1; | 101 | return 1; |
98 | } | 102 | } |
99 | 103 | ||
100 | /* Find a PCI capability */ | 104 | /* Find a PCI capability */ |
101 | static __u32 __init find_cap(int num, int slot, int func, int cap) | 105 | static __u32 __init find_cap(int num, int slot, int func, int cap) |
102 | { | 106 | { |
103 | u8 pos; | ||
104 | int bytes; | 107 | int bytes; |
105 | if (!(read_pci_config_16(num,slot,func,PCI_STATUS) & PCI_STATUS_CAP_LIST)) | 108 | u8 pos; |
109 | |||
110 | if (!(read_pci_config_16(num, slot, func, PCI_STATUS) & | ||
111 | PCI_STATUS_CAP_LIST)) | ||
106 | return 0; | 112 | return 0; |
107 | pos = read_pci_config_byte(num,slot,func,PCI_CAPABILITY_LIST); | 113 | |
108 | for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { | 114 | pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST); |
115 | for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { | ||
109 | u8 id; | 116 | u8 id; |
110 | pos &= ~3; | 117 | |
111 | id = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_ID); | 118 | pos &= ~3; |
119 | id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID); | ||
112 | if (id == 0xff) | 120 | if (id == 0xff) |
113 | break; | 121 | break; |
114 | if (id == cap) | 122 | if (id == cap) |
115 | return pos; | 123 | return pos; |
116 | pos = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_NEXT); | 124 | pos = read_pci_config_byte(num, slot, func, |
117 | } | 125 | pos+PCI_CAP_LIST_NEXT); |
126 | } | ||
118 | return 0; | 127 | return 0; |
119 | } | 128 | } |
120 | 129 | ||
121 | /* Read a standard AGPv3 bridge header */ | 130 | /* Read a standard AGPv3 bridge header */ |
122 | static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) | 131 | static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) |
123 | { | 132 | { |
124 | u32 apsize; | 133 | u32 apsize; |
125 | u32 apsizereg; | 134 | u32 apsizereg; |
126 | int nbits; | 135 | int nbits; |
127 | u32 aper_low, aper_hi; | 136 | u32 aper_low, aper_hi; |
128 | u64 aper; | 137 | u64 aper; |
129 | 138 | ||
130 | printk("AGP bridge at %02x:%02x:%02x\n", num, slot, func); | 139 | printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", num, slot, func); |
131 | apsizereg = read_pci_config_16(num,slot,func, cap + 0x14); | 140 | apsizereg = read_pci_config_16(num, slot, func, cap + 0x14); |
132 | if (apsizereg == 0xffffffff) { | 141 | if (apsizereg == 0xffffffff) { |
133 | printk("APSIZE in AGP bridge unreadable\n"); | 142 | printk(KERN_ERR "APSIZE in AGP bridge unreadable\n"); |
134 | return 0; | 143 | return 0; |
135 | } | 144 | } |
136 | 145 | ||
137 | apsize = apsizereg & 0xfff; | 146 | apsize = apsizereg & 0xfff; |
138 | /* Some BIOS use weird encodings not in the AGPv3 table. */ | 147 | /* Some BIOS use weird encodings not in the AGPv3 table. */ |
139 | if (apsize & 0xff) | 148 | if (apsize & 0xff) |
140 | apsize |= 0xf00; | 149 | apsize |= 0xf00; |
141 | nbits = hweight16(apsize); | 150 | nbits = hweight16(apsize); |
142 | *order = 7 - nbits; | 151 | *order = 7 - nbits; |
143 | if ((int)*order < 0) /* < 32MB */ | 152 | if ((int)*order < 0) /* < 32MB */ |
144 | *order = 0; | 153 | *order = 0; |
145 | 154 | ||
146 | aper_low = read_pci_config(num,slot,func, 0x10); | 155 | aper_low = read_pci_config(num, slot, func, 0x10); |
147 | aper_hi = read_pci_config(num,slot,func,0x14); | 156 | aper_hi = read_pci_config(num, slot, func, 0x14); |
148 | aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32); | 157 | aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32); |
149 | 158 | ||
150 | printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", | 159 | printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", |
151 | aper, 32 << *order, apsizereg); | 160 | aper, 32 << *order, apsizereg); |
152 | 161 | ||
153 | if (!aperture_valid(aper, (32*1024*1024) << *order)) | 162 | if (!aperture_valid(aper, (32*1024*1024) << *order)) |
154 | return 0; | 163 | return 0; |
155 | return (u32)aper; | 164 | return (u32)aper; |
156 | } | 165 | } |
157 | |||
158 | /* Look for an AGP bridge. Windows only expects the aperture in the | ||
159 | AGP bridge and some BIOS forget to initialize the Northbridge too. | ||
160 | Work around this here. | ||
161 | |||
162 | Do an PCI bus scan by hand because we're running before the PCI | ||
163 | subsystem. | ||
164 | 166 | ||
165 | All K8 AGP bridges are AGPv3 compliant, so we can do this scan | 167 | /* |
166 | generically. It's probably overkill to always scan all slots because | 168 | * Look for an AGP bridge. Windows only expects the aperture in the |
167 | the AGP bridges should be always an own bus on the HT hierarchy, | 169 | * AGP bridge and some BIOS forget to initialize the Northbridge too. |
168 | but do it here for future safety. */ | 170 | * Work around this here. |
171 | * | ||
172 | * Do an PCI bus scan by hand because we're running before the PCI | ||
173 | * subsystem. | ||
174 | * | ||
175 | * All K8 AGP bridges are AGPv3 compliant, so we can do this scan | ||
176 | * generically. It's probably overkill to always scan all slots because | ||
177 | * the AGP bridges should be always an own bus on the HT hierarchy, | ||
178 | * but do it here for future safety. | ||
179 | */ | ||
169 | static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) | 180 | static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) |
170 | { | 181 | { |
171 | int num, slot, func; | 182 | int num, slot, func; |
172 | 183 | ||
173 | /* Poor man's PCI discovery */ | 184 | /* Poor man's PCI discovery */ |
174 | for (num = 0; num < 256; num++) { | 185 | for (num = 0; num < 256; num++) { |
175 | for (slot = 0; slot < 32; slot++) { | 186 | for (slot = 0; slot < 32; slot++) { |
176 | for (func = 0; func < 8; func++) { | 187 | for (func = 0; func < 8; func++) { |
177 | u32 class, cap; | 188 | u32 class, cap; |
178 | u8 type; | 189 | u8 type; |
179 | class = read_pci_config(num,slot,func, | 190 | class = read_pci_config(num, slot, func, |
180 | PCI_CLASS_REVISION); | 191 | PCI_CLASS_REVISION); |
181 | if (class == 0xffffffff) | 192 | if (class == 0xffffffff) |
182 | break; | 193 | break; |
183 | 194 | ||
184 | switch (class >> 16) { | 195 | switch (class >> 16) { |
185 | case PCI_CLASS_BRIDGE_HOST: | 196 | case PCI_CLASS_BRIDGE_HOST: |
186 | case PCI_CLASS_BRIDGE_OTHER: /* needed? */ | 197 | case PCI_CLASS_BRIDGE_OTHER: /* needed? */ |
187 | /* AGP bridge? */ | 198 | /* AGP bridge? */ |
188 | cap = find_cap(num,slot,func,PCI_CAP_ID_AGP); | 199 | cap = find_cap(num, slot, func, |
200 | PCI_CAP_ID_AGP); | ||
189 | if (!cap) | 201 | if (!cap) |
190 | break; | 202 | break; |
191 | *valid_agp = 1; | 203 | *valid_agp = 1; |
192 | return read_agp(num,slot,func,cap,order); | 204 | return read_agp(num, slot, func, cap, |
193 | } | 205 | order); |
194 | 206 | } | |
207 | |||
195 | /* No multi-function device? */ | 208 | /* No multi-function device? */ |
196 | type = read_pci_config_byte(num,slot,func, | 209 | type = read_pci_config_byte(num, slot, func, |
197 | PCI_HEADER_TYPE); | 210 | PCI_HEADER_TYPE); |
198 | if (!(type & 0x80)) | 211 | if (!(type & 0x80)) |
199 | break; | 212 | break; |
200 | } | 213 | } |
201 | } | 214 | } |
202 | } | 215 | } |
203 | printk("No AGP bridge found\n"); | 216 | printk(KERN_INFO "No AGP bridge found\n"); |
217 | |||
204 | return 0; | 218 | return 0; |
205 | } | 219 | } |
206 | 220 | ||
221 | static int gart_fix_e820 __initdata = 1; | ||
222 | |||
223 | static int __init parse_gart_mem(char *p) | ||
224 | { | ||
225 | if (!p) | ||
226 | return -EINVAL; | ||
227 | |||
228 | if (!strncmp(p, "off", 3)) | ||
229 | gart_fix_e820 = 0; | ||
230 | else if (!strncmp(p, "on", 2)) | ||
231 | gart_fix_e820 = 1; | ||
232 | |||
233 | return 0; | ||
234 | } | ||
235 | early_param("gart_fix_e820", parse_gart_mem); | ||
236 | |||
237 | void __init early_gart_iommu_check(void) | ||
238 | { | ||
239 | /* | ||
240 | * in case it is enabled before, esp for kexec/kdump, | ||
241 | * previous kernel already enable that. memset called | ||
242 | * by allocate_aperture/__alloc_bootmem_nopanic cause restart. | ||
243 | * or second kernel have different position for GART hole. and new | ||
244 | * kernel could use hole as RAM that is still used by GART set by | ||
245 | * first kernel | ||
246 | * or BIOS forget to put that in reserved. | ||
247 | * try to update e820 to make that region as reserved. | ||
248 | */ | ||
249 | int fix, num; | ||
250 | u32 ctl; | ||
251 | u32 aper_size = 0, aper_order = 0, last_aper_order = 0; | ||
252 | u64 aper_base = 0, last_aper_base = 0; | ||
253 | int aper_enabled = 0, last_aper_enabled = 0; | ||
254 | |||
255 | if (!early_pci_allowed()) | ||
256 | return; | ||
257 | |||
258 | fix = 0; | ||
259 | for (num = 24; num < 32; num++) { | ||
260 | if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) | ||
261 | continue; | ||
262 | |||
263 | ctl = read_pci_config(0, num, 3, 0x90); | ||
264 | aper_enabled = ctl & 1; | ||
265 | aper_order = (ctl >> 1) & 7; | ||
266 | aper_size = (32 * 1024 * 1024) << aper_order; | ||
267 | aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff; | ||
268 | aper_base <<= 25; | ||
269 | |||
270 | if ((last_aper_order && aper_order != last_aper_order) || | ||
271 | (last_aper_base && aper_base != last_aper_base) || | ||
272 | (last_aper_enabled && aper_enabled != last_aper_enabled)) { | ||
273 | fix = 1; | ||
274 | break; | ||
275 | } | ||
276 | last_aper_order = aper_order; | ||
277 | last_aper_base = aper_base; | ||
278 | last_aper_enabled = aper_enabled; | ||
279 | } | ||
280 | |||
281 | if (!fix && !aper_enabled) | ||
282 | return; | ||
283 | |||
284 | if (!aper_base || !aper_size || aper_base + aper_size > 0x100000000UL) | ||
285 | fix = 1; | ||
286 | |||
287 | if (gart_fix_e820 && !fix && aper_enabled) { | ||
288 | if (e820_any_mapped(aper_base, aper_base + aper_size, | ||
289 | E820_RAM)) { | ||
290 | /* reserved it, so we can resuse it in second kernel */ | ||
291 | printk(KERN_INFO "update e820 for GART\n"); | ||
292 | add_memory_region(aper_base, aper_size, E820_RESERVED); | ||
293 | update_e820(); | ||
294 | } | ||
295 | return; | ||
296 | } | ||
297 | |||
298 | /* different nodes have different setting, disable them all at first*/ | ||
299 | for (num = 24; num < 32; num++) { | ||
300 | if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) | ||
301 | continue; | ||
302 | |||
303 | ctl = read_pci_config(0, num, 3, 0x90); | ||
304 | ctl &= ~1; | ||
305 | write_pci_config(0, num, 3, 0x90, ctl); | ||
306 | } | ||
307 | |||
308 | } | ||
309 | |||
207 | void __init gart_iommu_hole_init(void) | 310 | void __init gart_iommu_hole_init(void) |
208 | { | 311 | { |
209 | int fix, num; | ||
210 | u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; | 312 | u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; |
211 | u64 aper_base, last_aper_base = 0; | 313 | u64 aper_base, last_aper_base = 0; |
212 | int valid_agp = 0; | 314 | int fix, num, valid_agp = 0; |
315 | int node; | ||
213 | 316 | ||
214 | if (gart_iommu_aperture_disabled || !fix_aperture || | 317 | if (gart_iommu_aperture_disabled || !fix_aperture || |
215 | !early_pci_allowed()) | 318 | !early_pci_allowed()) |
@@ -218,24 +321,26 @@ void __init gart_iommu_hole_init(void) | |||
218 | printk(KERN_INFO "Checking aperture...\n"); | 321 | printk(KERN_INFO "Checking aperture...\n"); |
219 | 322 | ||
220 | fix = 0; | 323 | fix = 0; |
221 | for (num = 24; num < 32; num++) { | 324 | node = 0; |
325 | for (num = 24; num < 32; num++) { | ||
222 | if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) | 326 | if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) |
223 | continue; | 327 | continue; |
224 | 328 | ||
225 | iommu_detected = 1; | 329 | iommu_detected = 1; |
226 | gart_iommu_aperture = 1; | 330 | gart_iommu_aperture = 1; |
227 | 331 | ||
228 | aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; | 332 | aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; |
229 | aper_size = (32 * 1024 * 1024) << aper_order; | 333 | aper_size = (32 * 1024 * 1024) << aper_order; |
230 | aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff; | 334 | aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff; |
231 | aper_base <<= 25; | 335 | aper_base <<= 25; |
336 | |||
337 | printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n", | ||
338 | node, aper_base, aper_size >> 20); | ||
339 | node++; | ||
232 | 340 | ||
233 | printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, | ||
234 | aper_base, aper_size>>20); | ||
235 | |||
236 | if (!aperture_valid(aper_base, aper_size)) { | 341 | if (!aperture_valid(aper_base, aper_size)) { |
237 | fix = 1; | 342 | fix = 1; |
238 | break; | 343 | break; |
239 | } | 344 | } |
240 | 345 | ||
241 | if ((last_aper_order && aper_order != last_aper_order) || | 346 | if ((last_aper_order && aper_order != last_aper_order) || |
@@ -245,55 +350,64 @@ void __init gart_iommu_hole_init(void) | |||
245 | } | 350 | } |
246 | last_aper_order = aper_order; | 351 | last_aper_order = aper_order; |
247 | last_aper_base = aper_base; | 352 | last_aper_base = aper_base; |
248 | } | 353 | } |
249 | 354 | ||
250 | if (!fix && !fallback_aper_force) { | 355 | if (!fix && !fallback_aper_force) { |
251 | if (last_aper_base) { | 356 | if (last_aper_base) { |
252 | unsigned long n = (32 * 1024 * 1024) << last_aper_order; | 357 | unsigned long n = (32 * 1024 * 1024) << last_aper_order; |
358 | |||
253 | insert_aperture_resource((u32)last_aper_base, n); | 359 | insert_aperture_resource((u32)last_aper_base, n); |
254 | } | 360 | } |
255 | return; | 361 | return; |
256 | } | 362 | } |
257 | 363 | ||
258 | if (!fallback_aper_force) | 364 | if (!fallback_aper_force) |
259 | aper_alloc = search_agp_bridge(&aper_order, &valid_agp); | 365 | aper_alloc = search_agp_bridge(&aper_order, &valid_agp); |
260 | 366 | ||
261 | if (aper_alloc) { | 367 | if (aper_alloc) { |
262 | /* Got the aperture from the AGP bridge */ | 368 | /* Got the aperture from the AGP bridge */ |
263 | } else if (swiotlb && !valid_agp) { | 369 | } else if (swiotlb && !valid_agp) { |
264 | /* Do nothing */ | 370 | /* Do nothing */ |
265 | } else if ((!no_iommu && end_pfn > MAX_DMA32_PFN) || | 371 | } else if ((!no_iommu && end_pfn > MAX_DMA32_PFN) || |
266 | force_iommu || | 372 | force_iommu || |
267 | valid_agp || | 373 | valid_agp || |
268 | fallback_aper_force) { | 374 | fallback_aper_force) { |
269 | printk("Your BIOS doesn't leave a aperture memory hole\n"); | 375 | printk(KERN_ERR |
270 | printk("Please enable the IOMMU option in the BIOS setup\n"); | 376 | "Your BIOS doesn't leave a aperture memory hole\n"); |
271 | printk("This costs you %d MB of RAM\n", | 377 | printk(KERN_ERR |
272 | 32 << fallback_aper_order); | 378 | "Please enable the IOMMU option in the BIOS setup\n"); |
379 | printk(KERN_ERR | ||
380 | "This costs you %d MB of RAM\n", | ||
381 | 32 << fallback_aper_order); | ||
273 | 382 | ||
274 | aper_order = fallback_aper_order; | 383 | aper_order = fallback_aper_order; |
275 | aper_alloc = allocate_aperture(); | 384 | aper_alloc = allocate_aperture(); |
276 | if (!aper_alloc) { | 385 | if (!aper_alloc) { |
277 | /* Could disable AGP and IOMMU here, but it's probably | 386 | /* |
278 | not worth it. But the later users cannot deal with | 387 | * Could disable AGP and IOMMU here, but it's |
279 | bad apertures and turning on the aperture over memory | 388 | * probably not worth it. But the later users |
280 | causes very strange problems, so it's better to | 389 | * cannot deal with bad apertures and turning |
281 | panic early. */ | 390 | * on the aperture over memory causes very |
391 | * strange problems, so it's better to panic | ||
392 | * early. | ||
393 | */ | ||
282 | panic("Not enough memory for aperture"); | 394 | panic("Not enough memory for aperture"); |
283 | } | 395 | } |
284 | } else { | 396 | } else { |
285 | return; | 397 | return; |
286 | } | 398 | } |
287 | 399 | ||
288 | /* Fix up the north bridges */ | 400 | /* Fix up the north bridges */ |
289 | for (num = 24; num < 32; num++) { | 401 | for (num = 24; num < 32; num++) { |
290 | if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) | 402 | if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) |
291 | continue; | 403 | continue; |
292 | 404 | ||
293 | /* Don't enable translation yet. That is done later. | 405 | /* |
294 | Assume this BIOS didn't initialise the GART so | 406 | * Don't enable translation yet. That is done later. |
295 | just overwrite all previous bits */ | 407 | * Assume this BIOS didn't initialise the GART so |
296 | write_pci_config(0, num, 3, 0x90, aper_order<<1); | 408 | * just overwrite all previous bits |
297 | write_pci_config(0, num, 3, 0x94, aper_alloc>>25); | 409 | */ |
298 | } | 410 | write_pci_config(0, num, 3, 0x90, aper_order<<1); |
299 | } | 411 | write_pci_config(0, num, 3, 0x94, aper_alloc>>25); |
412 | } | ||
413 | } | ||
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index edb5108e5d0e..35a568ea8400 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c | |||
@@ -43,12 +43,10 @@ | |||
43 | #include <mach_apicdef.h> | 43 | #include <mach_apicdef.h> |
44 | #include <mach_ipi.h> | 44 | #include <mach_ipi.h> |
45 | 45 | ||
46 | #include "io_ports.h" | ||
47 | |||
48 | /* | 46 | /* |
49 | * Sanity check | 47 | * Sanity check |
50 | */ | 48 | */ |
51 | #if (SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F | 49 | #if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) |
52 | # error SPURIOUS_APIC_VECTOR definition error | 50 | # error SPURIOUS_APIC_VECTOR definition error |
53 | #endif | 51 | #endif |
54 | 52 | ||
@@ -57,7 +55,7 @@ | |||
57 | * | 55 | * |
58 | * -1=force-disable, +1=force-enable | 56 | * -1=force-disable, +1=force-enable |
59 | */ | 57 | */ |
60 | static int enable_local_apic __initdata = 0; | 58 | static int enable_local_apic __initdata; |
61 | 59 | ||
62 | /* Local APIC timer verification ok */ | 60 | /* Local APIC timer verification ok */ |
63 | static int local_apic_timer_verify_ok; | 61 | static int local_apic_timer_verify_ok; |
@@ -101,6 +99,8 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events); | |||
101 | /* Local APIC was disabled by the BIOS and enabled by the kernel */ | 99 | /* Local APIC was disabled by the BIOS and enabled by the kernel */ |
102 | static int enabled_via_apicbase; | 100 | static int enabled_via_apicbase; |
103 | 101 | ||
102 | static unsigned long apic_phys; | ||
103 | |||
104 | /* | 104 | /* |
105 | * Get the LAPIC version | 105 | * Get the LAPIC version |
106 | */ | 106 | */ |
@@ -110,7 +110,7 @@ static inline int lapic_get_version(void) | |||
110 | } | 110 | } |
111 | 111 | ||
112 | /* | 112 | /* |
113 | * Check, if the APIC is integrated or a seperate chip | 113 | * Check, if the APIC is integrated or a separate chip |
114 | */ | 114 | */ |
115 | static inline int lapic_is_integrated(void) | 115 | static inline int lapic_is_integrated(void) |
116 | { | 116 | { |
@@ -135,9 +135,9 @@ void apic_wait_icr_idle(void) | |||
135 | cpu_relax(); | 135 | cpu_relax(); |
136 | } | 136 | } |
137 | 137 | ||
138 | unsigned long safe_apic_wait_icr_idle(void) | 138 | u32 safe_apic_wait_icr_idle(void) |
139 | { | 139 | { |
140 | unsigned long send_status; | 140 | u32 send_status; |
141 | int timeout; | 141 | int timeout; |
142 | 142 | ||
143 | timeout = 0; | 143 | timeout = 0; |
@@ -154,7 +154,7 @@ unsigned long safe_apic_wait_icr_idle(void) | |||
154 | /** | 154 | /** |
155 | * enable_NMI_through_LVT0 - enable NMI through local vector table 0 | 155 | * enable_NMI_through_LVT0 - enable NMI through local vector table 0 |
156 | */ | 156 | */ |
157 | void enable_NMI_through_LVT0 (void * dummy) | 157 | void __cpuinit enable_NMI_through_LVT0(void) |
158 | { | 158 | { |
159 | unsigned int v = APIC_DM_NMI; | 159 | unsigned int v = APIC_DM_NMI; |
160 | 160 | ||
@@ -379,8 +379,10 @@ void __init setup_boot_APIC_clock(void) | |||
379 | */ | 379 | */ |
380 | if (local_apic_timer_disabled) { | 380 | if (local_apic_timer_disabled) { |
381 | /* No broadcast on UP ! */ | 381 | /* No broadcast on UP ! */ |
382 | if (num_possible_cpus() > 1) | 382 | if (num_possible_cpus() > 1) { |
383 | lapic_clockevent.mult = 1; | ||
383 | setup_APIC_timer(); | 384 | setup_APIC_timer(); |
385 | } | ||
384 | return; | 386 | return; |
385 | } | 387 | } |
386 | 388 | ||
@@ -434,7 +436,7 @@ void __init setup_boot_APIC_clock(void) | |||
434 | "with PM Timer: %ldms instead of 100ms\n", | 436 | "with PM Timer: %ldms instead of 100ms\n", |
435 | (long)res); | 437 | (long)res); |
436 | /* Correct the lapic counter value */ | 438 | /* Correct the lapic counter value */ |
437 | res = (((u64) delta ) * pm_100ms); | 439 | res = (((u64) delta) * pm_100ms); |
438 | do_div(res, deltapm); | 440 | do_div(res, deltapm); |
439 | printk(KERN_INFO "APIC delta adjusted to PM-Timer: " | 441 | printk(KERN_INFO "APIC delta adjusted to PM-Timer: " |
440 | "%lu (%ld)\n", (unsigned long) res, delta); | 442 | "%lu (%ld)\n", (unsigned long) res, delta); |
@@ -472,6 +474,19 @@ void __init setup_boot_APIC_clock(void) | |||
472 | 474 | ||
473 | local_apic_timer_verify_ok = 1; | 475 | local_apic_timer_verify_ok = 1; |
474 | 476 | ||
477 | /* | ||
478 | * Do a sanity check on the APIC calibration result | ||
479 | */ | ||
480 | if (calibration_result < (1000000 / HZ)) { | ||
481 | local_irq_enable(); | ||
482 | printk(KERN_WARNING | ||
483 | "APIC frequency too slow, disabling apic timer\n"); | ||
484 | /* No broadcast on UP ! */ | ||
485 | if (num_possible_cpus() > 1) | ||
486 | setup_APIC_timer(); | ||
487 | return; | ||
488 | } | ||
489 | |||
475 | /* We trust the pm timer based calibration */ | 490 | /* We trust the pm timer based calibration */ |
476 | if (!pm_referenced) { | 491 | if (!pm_referenced) { |
477 | apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); | 492 | apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); |
@@ -563,6 +578,9 @@ static void local_apic_timer_interrupt(void) | |||
563 | return; | 578 | return; |
564 | } | 579 | } |
565 | 580 | ||
581 | /* | ||
582 | * the NMI deadlock-detector uses this. | ||
583 | */ | ||
566 | per_cpu(irq_stat, cpu).apic_timer_irqs++; | 584 | per_cpu(irq_stat, cpu).apic_timer_irqs++; |
567 | 585 | ||
568 | evt->event_handler(evt); | 586 | evt->event_handler(evt); |
@@ -576,8 +594,7 @@ static void local_apic_timer_interrupt(void) | |||
576 | * [ if a single-CPU system runs an SMP kernel then we call the local | 594 | * [ if a single-CPU system runs an SMP kernel then we call the local |
577 | * interrupt as well. Thus we cannot inline the local irq ... ] | 595 | * interrupt as well. Thus we cannot inline the local irq ... ] |
578 | */ | 596 | */ |
579 | 597 | void smp_apic_timer_interrupt(struct pt_regs *regs) | |
580 | void fastcall smp_apic_timer_interrupt(struct pt_regs *regs) | ||
581 | { | 598 | { |
582 | struct pt_regs *old_regs = set_irq_regs(regs); | 599 | struct pt_regs *old_regs = set_irq_regs(regs); |
583 | 600 | ||
@@ -616,9 +633,14 @@ int setup_profiling_timer(unsigned int multiplier) | |||
616 | */ | 633 | */ |
617 | void clear_local_APIC(void) | 634 | void clear_local_APIC(void) |
618 | { | 635 | { |
619 | int maxlvt = lapic_get_maxlvt(); | 636 | int maxlvt; |
620 | unsigned long v; | 637 | u32 v; |
638 | |||
639 | /* APIC hasn't been mapped yet */ | ||
640 | if (!apic_phys) | ||
641 | return; | ||
621 | 642 | ||
643 | maxlvt = lapic_get_maxlvt(); | ||
622 | /* | 644 | /* |
623 | * Masking an LVT entry can trigger a local APIC error | 645 | * Masking an LVT entry can trigger a local APIC error |
624 | * if the vector is zero. Mask LVTERR first to prevent this. | 646 | * if the vector is zero. Mask LVTERR first to prevent this. |
@@ -976,7 +998,8 @@ void __cpuinit setup_local_APIC(void) | |||
976 | value |= APIC_LVT_LEVEL_TRIGGER; | 998 | value |= APIC_LVT_LEVEL_TRIGGER; |
977 | apic_write_around(APIC_LVT1, value); | 999 | apic_write_around(APIC_LVT1, value); |
978 | 1000 | ||
979 | if (integrated && !esr_disable) { /* !82489DX */ | 1001 | if (integrated && !esr_disable) { |
1002 | /* !82489DX */ | ||
980 | maxlvt = lapic_get_maxlvt(); | 1003 | maxlvt = lapic_get_maxlvt(); |
981 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ | 1004 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
982 | apic_write(APIC_ESR, 0); | 1005 | apic_write(APIC_ESR, 0); |
@@ -1020,7 +1043,7 @@ void __cpuinit setup_local_APIC(void) | |||
1020 | /* | 1043 | /* |
1021 | * Detect and initialize APIC | 1044 | * Detect and initialize APIC |
1022 | */ | 1045 | */ |
1023 | static int __init detect_init_APIC (void) | 1046 | static int __init detect_init_APIC(void) |
1024 | { | 1047 | { |
1025 | u32 h, l, features; | 1048 | u32 h, l, features; |
1026 | 1049 | ||
@@ -1077,7 +1100,7 @@ static int __init detect_init_APIC (void) | |||
1077 | printk(KERN_WARNING "Could not enable APIC!\n"); | 1100 | printk(KERN_WARNING "Could not enable APIC!\n"); |
1078 | return -1; | 1101 | return -1; |
1079 | } | 1102 | } |
1080 | set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | 1103 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); |
1081 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | 1104 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; |
1082 | 1105 | ||
1083 | /* The BIOS may have set up the APIC at some other address */ | 1106 | /* The BIOS may have set up the APIC at some other address */ |
@@ -1104,8 +1127,6 @@ no_apic: | |||
1104 | */ | 1127 | */ |
1105 | void __init init_apic_mappings(void) | 1128 | void __init init_apic_mappings(void) |
1106 | { | 1129 | { |
1107 | unsigned long apic_phys; | ||
1108 | |||
1109 | /* | 1130 | /* |
1110 | * If no local APIC can be found then set up a fake all | 1131 | * If no local APIC can be found then set up a fake all |
1111 | * zeroes page to simulate the local APIC and another | 1132 | * zeroes page to simulate the local APIC and another |
@@ -1164,10 +1185,10 @@ fake_ioapic_page: | |||
1164 | * This initializes the IO-APIC and APIC hardware if this is | 1185 | * This initializes the IO-APIC and APIC hardware if this is |
1165 | * a UP kernel. | 1186 | * a UP kernel. |
1166 | */ | 1187 | */ |
1167 | int __init APIC_init_uniprocessor (void) | 1188 | int __init APIC_init_uniprocessor(void) |
1168 | { | 1189 | { |
1169 | if (enable_local_apic < 0) | 1190 | if (enable_local_apic < 0) |
1170 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | 1191 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); |
1171 | 1192 | ||
1172 | if (!smp_found_config && !cpu_has_apic) | 1193 | if (!smp_found_config && !cpu_has_apic) |
1173 | return -1; | 1194 | return -1; |
@@ -1179,7 +1200,7 @@ int __init APIC_init_uniprocessor (void) | |||
1179 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { | 1200 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { |
1180 | printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", | 1201 | printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", |
1181 | boot_cpu_physical_apicid); | 1202 | boot_cpu_physical_apicid); |
1182 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | 1203 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); |
1183 | return -1; | 1204 | return -1; |
1184 | } | 1205 | } |
1185 | 1206 | ||
@@ -1210,50 +1231,6 @@ int __init APIC_init_uniprocessor (void) | |||
1210 | } | 1231 | } |
1211 | 1232 | ||
1212 | /* | 1233 | /* |
1213 | * APIC command line parameters | ||
1214 | */ | ||
1215 | static int __init parse_lapic(char *arg) | ||
1216 | { | ||
1217 | enable_local_apic = 1; | ||
1218 | return 0; | ||
1219 | } | ||
1220 | early_param("lapic", parse_lapic); | ||
1221 | |||
1222 | static int __init parse_nolapic(char *arg) | ||
1223 | { | ||
1224 | enable_local_apic = -1; | ||
1225 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | ||
1226 | return 0; | ||
1227 | } | ||
1228 | early_param("nolapic", parse_nolapic); | ||
1229 | |||
1230 | static int __init parse_disable_lapic_timer(char *arg) | ||
1231 | { | ||
1232 | local_apic_timer_disabled = 1; | ||
1233 | return 0; | ||
1234 | } | ||
1235 | early_param("nolapic_timer", parse_disable_lapic_timer); | ||
1236 | |||
1237 | static int __init parse_lapic_timer_c2_ok(char *arg) | ||
1238 | { | ||
1239 | local_apic_timer_c2_ok = 1; | ||
1240 | return 0; | ||
1241 | } | ||
1242 | early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); | ||
1243 | |||
1244 | static int __init apic_set_verbosity(char *str) | ||
1245 | { | ||
1246 | if (strcmp("debug", str) == 0) | ||
1247 | apic_verbosity = APIC_DEBUG; | ||
1248 | else if (strcmp("verbose", str) == 0) | ||
1249 | apic_verbosity = APIC_VERBOSE; | ||
1250 | return 1; | ||
1251 | } | ||
1252 | |||
1253 | __setup("apic=", apic_set_verbosity); | ||
1254 | |||
1255 | |||
1256 | /* | ||
1257 | * Local APIC interrupts | 1234 | * Local APIC interrupts |
1258 | */ | 1235 | */ |
1259 | 1236 | ||
@@ -1306,7 +1283,7 @@ void smp_error_interrupt(struct pt_regs *regs) | |||
1306 | 6: Received illegal vector | 1283 | 6: Received illegal vector |
1307 | 7: Illegal register address | 1284 | 7: Illegal register address |
1308 | */ | 1285 | */ |
1309 | printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", | 1286 | printk(KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", |
1310 | smp_processor_id(), v , v1); | 1287 | smp_processor_id(), v , v1); |
1311 | irq_exit(); | 1288 | irq_exit(); |
1312 | } | 1289 | } |
@@ -1393,7 +1370,7 @@ void disconnect_bsp_APIC(int virt_wire_setup) | |||
1393 | value = apic_read(APIC_LVT0); | 1370 | value = apic_read(APIC_LVT0); |
1394 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | | 1371 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | |
1395 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | 1372 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | |
1396 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); | 1373 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); |
1397 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | 1374 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; |
1398 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); | 1375 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); |
1399 | apic_write_around(APIC_LVT0, value); | 1376 | apic_write_around(APIC_LVT0, value); |
@@ -1530,7 +1507,7 @@ static int lapic_resume(struct sys_device *dev) | |||
1530 | */ | 1507 | */ |
1531 | 1508 | ||
1532 | static struct sysdev_class lapic_sysclass = { | 1509 | static struct sysdev_class lapic_sysclass = { |
1533 | set_kset_name("lapic"), | 1510 | .name = "lapic", |
1534 | .resume = lapic_resume, | 1511 | .resume = lapic_resume, |
1535 | .suspend = lapic_suspend, | 1512 | .suspend = lapic_suspend, |
1536 | }; | 1513 | }; |
@@ -1565,3 +1542,46 @@ device_initcall(init_lapic_sysfs); | |||
1565 | static void apic_pm_activate(void) { } | 1542 | static void apic_pm_activate(void) { } |
1566 | 1543 | ||
1567 | #endif /* CONFIG_PM */ | 1544 | #endif /* CONFIG_PM */ |
1545 | |||
1546 | /* | ||
1547 | * APIC command line parameters | ||
1548 | */ | ||
1549 | static int __init parse_lapic(char *arg) | ||
1550 | { | ||
1551 | enable_local_apic = 1; | ||
1552 | return 0; | ||
1553 | } | ||
1554 | early_param("lapic", parse_lapic); | ||
1555 | |||
1556 | static int __init parse_nolapic(char *arg) | ||
1557 | { | ||
1558 | enable_local_apic = -1; | ||
1559 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | ||
1560 | return 0; | ||
1561 | } | ||
1562 | early_param("nolapic", parse_nolapic); | ||
1563 | |||
1564 | static int __init parse_disable_lapic_timer(char *arg) | ||
1565 | { | ||
1566 | local_apic_timer_disabled = 1; | ||
1567 | return 0; | ||
1568 | } | ||
1569 | early_param("nolapic_timer", parse_disable_lapic_timer); | ||
1570 | |||
1571 | static int __init parse_lapic_timer_c2_ok(char *arg) | ||
1572 | { | ||
1573 | local_apic_timer_c2_ok = 1; | ||
1574 | return 0; | ||
1575 | } | ||
1576 | early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); | ||
1577 | |||
1578 | static int __init apic_set_verbosity(char *str) | ||
1579 | { | ||
1580 | if (strcmp("debug", str) == 0) | ||
1581 | apic_verbosity = APIC_DEBUG; | ||
1582 | else if (strcmp("verbose", str) == 0) | ||
1583 | apic_verbosity = APIC_VERBOSE; | ||
1584 | return 1; | ||
1585 | } | ||
1586 | __setup("apic=", apic_set_verbosity); | ||
1587 | |||
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index f28ccb588fba..d8d03e09dea2 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c | |||
@@ -23,32 +23,37 @@ | |||
23 | #include <linux/mc146818rtc.h> | 23 | #include <linux/mc146818rtc.h> |
24 | #include <linux/kernel_stat.h> | 24 | #include <linux/kernel_stat.h> |
25 | #include <linux/sysdev.h> | 25 | #include <linux/sysdev.h> |
26 | #include <linux/module.h> | ||
27 | #include <linux/ioport.h> | 26 | #include <linux/ioport.h> |
28 | #include <linux/clockchips.h> | 27 | #include <linux/clockchips.h> |
28 | #include <linux/acpi_pmtmr.h> | ||
29 | #include <linux/module.h> | ||
29 | 30 | ||
30 | #include <asm/atomic.h> | 31 | #include <asm/atomic.h> |
31 | #include <asm/smp.h> | 32 | #include <asm/smp.h> |
32 | #include <asm/mtrr.h> | 33 | #include <asm/mtrr.h> |
33 | #include <asm/mpspec.h> | 34 | #include <asm/mpspec.h> |
35 | #include <asm/hpet.h> | ||
34 | #include <asm/pgalloc.h> | 36 | #include <asm/pgalloc.h> |
35 | #include <asm/mach_apic.h> | 37 | #include <asm/mach_apic.h> |
36 | #include <asm/nmi.h> | 38 | #include <asm/nmi.h> |
37 | #include <asm/idle.h> | 39 | #include <asm/idle.h> |
38 | #include <asm/proto.h> | 40 | #include <asm/proto.h> |
39 | #include <asm/timex.h> | 41 | #include <asm/timex.h> |
40 | #include <asm/hpet.h> | ||
41 | #include <asm/apic.h> | 42 | #include <asm/apic.h> |
42 | 43 | ||
43 | int apic_verbosity; | ||
44 | int disable_apic_timer __cpuinitdata; | 44 | int disable_apic_timer __cpuinitdata; |
45 | static int apic_calibrate_pmtmr __initdata; | 45 | static int apic_calibrate_pmtmr __initdata; |
46 | int disable_apic; | ||
46 | 47 | ||
47 | /* Local APIC timer works in C2? */ | 48 | /* Local APIC timer works in C2 */ |
48 | int local_apic_timer_c2_ok; | 49 | int local_apic_timer_c2_ok; |
49 | EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); | 50 | EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); |
50 | 51 | ||
51 | static struct resource *ioapic_resources; | 52 | /* |
53 | * Debug level, exported for io_apic.c | ||
54 | */ | ||
55 | int apic_verbosity; | ||
56 | |||
52 | static struct resource lapic_resource = { | 57 | static struct resource lapic_resource = { |
53 | .name = "Local APIC", | 58 | .name = "Local APIC", |
54 | .flags = IORESOURCE_MEM | IORESOURCE_BUSY, | 59 | .flags = IORESOURCE_MEM | IORESOURCE_BUSY, |
@@ -60,10 +65,8 @@ static int lapic_next_event(unsigned long delta, | |||
60 | struct clock_event_device *evt); | 65 | struct clock_event_device *evt); |
61 | static void lapic_timer_setup(enum clock_event_mode mode, | 66 | static void lapic_timer_setup(enum clock_event_mode mode, |
62 | struct clock_event_device *evt); | 67 | struct clock_event_device *evt); |
63 | |||
64 | static void lapic_timer_broadcast(cpumask_t mask); | 68 | static void lapic_timer_broadcast(cpumask_t mask); |
65 | 69 | static void apic_pm_activate(void); | |
66 | static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen); | ||
67 | 70 | ||
68 | static struct clock_event_device lapic_clockevent = { | 71 | static struct clock_event_device lapic_clockevent = { |
69 | .name = "lapic", | 72 | .name = "lapic", |
@@ -78,6 +81,150 @@ static struct clock_event_device lapic_clockevent = { | |||
78 | }; | 81 | }; |
79 | static DEFINE_PER_CPU(struct clock_event_device, lapic_events); | 82 | static DEFINE_PER_CPU(struct clock_event_device, lapic_events); |
80 | 83 | ||
84 | static unsigned long apic_phys; | ||
85 | |||
86 | /* | ||
87 | * Get the LAPIC version | ||
88 | */ | ||
89 | static inline int lapic_get_version(void) | ||
90 | { | ||
91 | return GET_APIC_VERSION(apic_read(APIC_LVR)); | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * Check, if the APIC is integrated or a seperate chip | ||
96 | */ | ||
97 | static inline int lapic_is_integrated(void) | ||
98 | { | ||
99 | return 1; | ||
100 | } | ||
101 | |||
102 | /* | ||
103 | * Check, whether this is a modern or a first generation APIC | ||
104 | */ | ||
105 | static int modern_apic(void) | ||
106 | { | ||
107 | /* AMD systems use old APIC versions, so check the CPU */ | ||
108 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | ||
109 | boot_cpu_data.x86 >= 0xf) | ||
110 | return 1; | ||
111 | return lapic_get_version() >= 0x14; | ||
112 | } | ||
113 | |||
114 | void apic_wait_icr_idle(void) | ||
115 | { | ||
116 | while (apic_read(APIC_ICR) & APIC_ICR_BUSY) | ||
117 | cpu_relax(); | ||
118 | } | ||
119 | |||
120 | u32 safe_apic_wait_icr_idle(void) | ||
121 | { | ||
122 | u32 send_status; | ||
123 | int timeout; | ||
124 | |||
125 | timeout = 0; | ||
126 | do { | ||
127 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
128 | if (!send_status) | ||
129 | break; | ||
130 | udelay(100); | ||
131 | } while (timeout++ < 1000); | ||
132 | |||
133 | return send_status; | ||
134 | } | ||
135 | |||
136 | /** | ||
137 | * enable_NMI_through_LVT0 - enable NMI through local vector table 0 | ||
138 | */ | ||
139 | void __cpuinit enable_NMI_through_LVT0(void) | ||
140 | { | ||
141 | unsigned int v; | ||
142 | |||
143 | /* unmask and set to NMI */ | ||
144 | v = APIC_DM_NMI; | ||
145 | apic_write(APIC_LVT0, v); | ||
146 | } | ||
147 | |||
148 | /** | ||
149 | * lapic_get_maxlvt - get the maximum number of local vector table entries | ||
150 | */ | ||
151 | int lapic_get_maxlvt(void) | ||
152 | { | ||
153 | unsigned int v, maxlvt; | ||
154 | |||
155 | v = apic_read(APIC_LVR); | ||
156 | maxlvt = GET_APIC_MAXLVT(v); | ||
157 | return maxlvt; | ||
158 | } | ||
159 | |||
160 | /* | ||
161 | * This function sets up the local APIC timer, with a timeout of | ||
162 | * 'clocks' APIC bus clock. During calibration we actually call | ||
163 | * this function twice on the boot CPU, once with a bogus timeout | ||
164 | * value, second time for real. The other (noncalibrating) CPUs | ||
165 | * call this function only once, with the real, calibrated value. | ||
166 | * | ||
167 | * We do reads before writes even if unnecessary, to get around the | ||
168 | * P5 APIC double write bug. | ||
169 | */ | ||
170 | |||
171 | static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | ||
172 | { | ||
173 | unsigned int lvtt_value, tmp_value; | ||
174 | |||
175 | lvtt_value = LOCAL_TIMER_VECTOR; | ||
176 | if (!oneshot) | ||
177 | lvtt_value |= APIC_LVT_TIMER_PERIODIC; | ||
178 | if (!irqen) | ||
179 | lvtt_value |= APIC_LVT_MASKED; | ||
180 | |||
181 | apic_write(APIC_LVTT, lvtt_value); | ||
182 | |||
183 | /* | ||
184 | * Divide PICLK by 16 | ||
185 | */ | ||
186 | tmp_value = apic_read(APIC_TDCR); | ||
187 | apic_write(APIC_TDCR, (tmp_value | ||
188 | & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | ||
189 | | APIC_TDR_DIV_16); | ||
190 | |||
191 | if (!oneshot) | ||
192 | apic_write(APIC_TMICT, clocks); | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | * Setup extended LVT, AMD specific (K8, family 10h) | ||
197 | * | ||
198 | * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and | ||
199 | * MCE interrupts are supported. Thus MCE offset must be set to 0. | ||
200 | */ | ||
201 | |||
202 | #define APIC_EILVT_LVTOFF_MCE 0 | ||
203 | #define APIC_EILVT_LVTOFF_IBS 1 | ||
204 | |||
205 | static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) | ||
206 | { | ||
207 | unsigned long reg = (lvt_off << 4) + APIC_EILVT0; | ||
208 | unsigned int v = (mask << 16) | (msg_type << 8) | vector; | ||
209 | |||
210 | apic_write(reg, v); | ||
211 | } | ||
212 | |||
213 | u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) | ||
214 | { | ||
215 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); | ||
216 | return APIC_EILVT_LVTOFF_MCE; | ||
217 | } | ||
218 | |||
219 | u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) | ||
220 | { | ||
221 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); | ||
222 | return APIC_EILVT_LVTOFF_IBS; | ||
223 | } | ||
224 | |||
225 | /* | ||
226 | * Program the next event, relative to now | ||
227 | */ | ||
81 | static int lapic_next_event(unsigned long delta, | 228 | static int lapic_next_event(unsigned long delta, |
82 | struct clock_event_device *evt) | 229 | struct clock_event_device *evt) |
83 | { | 230 | { |
@@ -85,6 +232,9 @@ static int lapic_next_event(unsigned long delta, | |||
85 | return 0; | 232 | return 0; |
86 | } | 233 | } |
87 | 234 | ||
235 | /* | ||
236 | * Setup the lapic timer in periodic or oneshot mode | ||
237 | */ | ||
88 | static void lapic_timer_setup(enum clock_event_mode mode, | 238 | static void lapic_timer_setup(enum clock_event_mode mode, |
89 | struct clock_event_device *evt) | 239 | struct clock_event_device *evt) |
90 | { | 240 | { |
@@ -127,75 +277,261 @@ static void lapic_timer_broadcast(cpumask_t mask) | |||
127 | #endif | 277 | #endif |
128 | } | 278 | } |
129 | 279 | ||
130 | static void apic_pm_activate(void); | 280 | /* |
281 | * Setup the local APIC timer for this CPU. Copy the initilized values | ||
282 | * of the boot CPU and register the clock event in the framework. | ||
283 | */ | ||
284 | static void setup_APIC_timer(void) | ||
285 | { | ||
286 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); | ||
131 | 287 | ||
132 | void apic_wait_icr_idle(void) | 288 | memcpy(levt, &lapic_clockevent, sizeof(*levt)); |
289 | levt->cpumask = cpumask_of_cpu(smp_processor_id()); | ||
290 | |||
291 | clockevents_register_device(levt); | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | * In this function we calibrate APIC bus clocks to the external | ||
296 | * timer. Unfortunately we cannot use jiffies and the timer irq | ||
297 | * to calibrate, since some later bootup code depends on getting | ||
298 | * the first irq? Ugh. | ||
299 | * | ||
300 | * We want to do the calibration only once since we | ||
301 | * want to have local timer irqs syncron. CPUs connected | ||
302 | * by the same APIC bus have the very same bus frequency. | ||
303 | * And we want to have irqs off anyways, no accidental | ||
304 | * APIC irq that way. | ||
305 | */ | ||
306 | |||
307 | #define TICK_COUNT 100000000 | ||
308 | |||
309 | static void __init calibrate_APIC_clock(void) | ||
133 | { | 310 | { |
134 | while (apic_read(APIC_ICR) & APIC_ICR_BUSY) | 311 | unsigned apic, apic_start; |
135 | cpu_relax(); | 312 | unsigned long tsc, tsc_start; |
313 | int result; | ||
314 | |||
315 | local_irq_disable(); | ||
316 | |||
317 | /* | ||
318 | * Put whatever arbitrary (but long enough) timeout | ||
319 | * value into the APIC clock, we just want to get the | ||
320 | * counter running for calibration. | ||
321 | * | ||
322 | * No interrupt enable ! | ||
323 | */ | ||
324 | __setup_APIC_LVTT(250000000, 0, 0); | ||
325 | |||
326 | apic_start = apic_read(APIC_TMCCT); | ||
327 | #ifdef CONFIG_X86_PM_TIMER | ||
328 | if (apic_calibrate_pmtmr && pmtmr_ioport) { | ||
329 | pmtimer_wait(5000); /* 5ms wait */ | ||
330 | apic = apic_read(APIC_TMCCT); | ||
331 | result = (apic_start - apic) * 1000L / 5; | ||
332 | } else | ||
333 | #endif | ||
334 | { | ||
335 | rdtscll(tsc_start); | ||
336 | |||
337 | do { | ||
338 | apic = apic_read(APIC_TMCCT); | ||
339 | rdtscll(tsc); | ||
340 | } while ((tsc - tsc_start) < TICK_COUNT && | ||
341 | (apic_start - apic) < TICK_COUNT); | ||
342 | |||
343 | result = (apic_start - apic) * 1000L * tsc_khz / | ||
344 | (tsc - tsc_start); | ||
345 | } | ||
346 | |||
347 | local_irq_enable(); | ||
348 | |||
349 | printk(KERN_DEBUG "APIC timer calibration result %d\n", result); | ||
350 | |||
351 | printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", | ||
352 | result / 1000 / 1000, result / 1000 % 1000); | ||
353 | |||
354 | /* Calculate the scaled math multiplication factor */ | ||
355 | lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32); | ||
356 | lapic_clockevent.max_delta_ns = | ||
357 | clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); | ||
358 | lapic_clockevent.min_delta_ns = | ||
359 | clockevent_delta2ns(0xF, &lapic_clockevent); | ||
360 | |||
361 | calibration_result = result / HZ; | ||
136 | } | 362 | } |
137 | 363 | ||
138 | unsigned int safe_apic_wait_icr_idle(void) | 364 | /* |
365 | * Setup the boot APIC | ||
366 | * | ||
367 | * Calibrate and verify the result. | ||
368 | */ | ||
369 | void __init setup_boot_APIC_clock(void) | ||
139 | { | 370 | { |
140 | unsigned int send_status; | 371 | /* |
141 | int timeout; | 372 | * The local apic timer can be disabled via the kernel commandline. |
373 | * Register the lapic timer as a dummy clock event source on SMP | ||
374 | * systems, so the broadcast mechanism is used. On UP systems simply | ||
375 | * ignore it. | ||
376 | */ | ||
377 | if (disable_apic_timer) { | ||
378 | printk(KERN_INFO "Disabling APIC timer\n"); | ||
379 | /* No broadcast on UP ! */ | ||
380 | if (num_possible_cpus() > 1) { | ||
381 | lapic_clockevent.mult = 1; | ||
382 | setup_APIC_timer(); | ||
383 | } | ||
384 | return; | ||
385 | } | ||
142 | 386 | ||
143 | timeout = 0; | 387 | printk(KERN_INFO "Using local APIC timer interrupts.\n"); |
144 | do { | 388 | calibrate_APIC_clock(); |
145 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
146 | if (!send_status) | ||
147 | break; | ||
148 | udelay(100); | ||
149 | } while (timeout++ < 1000); | ||
150 | 389 | ||
151 | return send_status; | 390 | /* |
391 | * Do a sanity check on the APIC calibration result | ||
392 | */ | ||
393 | if (calibration_result < (1000000 / HZ)) { | ||
394 | printk(KERN_WARNING | ||
395 | "APIC frequency too slow, disabling apic timer\n"); | ||
396 | /* No broadcast on UP ! */ | ||
397 | if (num_possible_cpus() > 1) | ||
398 | setup_APIC_timer(); | ||
399 | return; | ||
400 | } | ||
401 | |||
402 | /* | ||
403 | * If nmi_watchdog is set to IO_APIC, we need the | ||
404 | * PIT/HPET going. Otherwise register lapic as a dummy | ||
405 | * device. | ||
406 | */ | ||
407 | if (nmi_watchdog != NMI_IO_APIC) | ||
408 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; | ||
409 | else | ||
410 | printk(KERN_WARNING "APIC timer registered as dummy," | ||
411 | " due to nmi_watchdog=1!\n"); | ||
412 | |||
413 | setup_APIC_timer(); | ||
152 | } | 414 | } |
153 | 415 | ||
154 | void enable_NMI_through_LVT0 (void * dummy) | 416 | /* |
417 | * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the | ||
418 | * C1E flag only in the secondary CPU, so when we detect the wreckage | ||
419 | * we already have enabled the boot CPU local apic timer. Check, if | ||
420 | * disable_apic_timer is set and the DUMMY flag is cleared. If yes, | ||
421 | * set the DUMMY flag again and force the broadcast mode in the | ||
422 | * clockevents layer. | ||
423 | */ | ||
424 | void __cpuinit check_boot_apic_timer_broadcast(void) | ||
155 | { | 425 | { |
156 | unsigned int v; | 426 | if (!disable_apic_timer || |
427 | (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY)) | ||
428 | return; | ||
157 | 429 | ||
158 | /* unmask and set to NMI */ | 430 | printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n"); |
159 | v = APIC_DM_NMI; | 431 | lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY; |
160 | apic_write(APIC_LVT0, v); | 432 | |
433 | local_irq_enable(); | ||
434 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &boot_cpu_id); | ||
435 | local_irq_disable(); | ||
161 | } | 436 | } |
162 | 437 | ||
163 | int get_maxlvt(void) | 438 | void __cpuinit setup_secondary_APIC_clock(void) |
164 | { | 439 | { |
165 | unsigned int v, maxlvt; | 440 | check_boot_apic_timer_broadcast(); |
441 | setup_APIC_timer(); | ||
442 | } | ||
166 | 443 | ||
167 | v = apic_read(APIC_LVR); | 444 | /* |
168 | maxlvt = GET_APIC_MAXLVT(v); | 445 | * The guts of the apic timer interrupt |
169 | return maxlvt; | 446 | */ |
447 | static void local_apic_timer_interrupt(void) | ||
448 | { | ||
449 | int cpu = smp_processor_id(); | ||
450 | struct clock_event_device *evt = &per_cpu(lapic_events, cpu); | ||
451 | |||
452 | /* | ||
453 | * Normally we should not be here till LAPIC has been initialized but | ||
454 | * in some cases like kdump, its possible that there is a pending LAPIC | ||
455 | * timer interrupt from previous kernel's context and is delivered in | ||
456 | * new kernel the moment interrupts are enabled. | ||
457 | * | ||
458 | * Interrupts are enabled early and LAPIC is setup much later, hence | ||
459 | * its possible that when we get here evt->event_handler is NULL. | ||
460 | * Check for event_handler being NULL and discard the interrupt as | ||
461 | * spurious. | ||
462 | */ | ||
463 | if (!evt->event_handler) { | ||
464 | printk(KERN_WARNING | ||
465 | "Spurious LAPIC timer interrupt on cpu %d\n", cpu); | ||
466 | /* Switch it off */ | ||
467 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); | ||
468 | return; | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * the NMI deadlock-detector uses this. | ||
473 | */ | ||
474 | add_pda(apic_timer_irqs, 1); | ||
475 | |||
476 | evt->event_handler(evt); | ||
170 | } | 477 | } |
171 | 478 | ||
172 | /* | 479 | /* |
173 | * 'what should we do if we get a hw irq event on an illegal vector'. | 480 | * Local APIC timer interrupt. This is the most natural way for doing |
174 | * each architecture has to answer this themselves. | 481 | * local interrupts, but local timer interrupts can be emulated by |
482 | * broadcast interrupts too. [in case the hw doesn't support APIC timers] | ||
483 | * | ||
484 | * [ if a single-CPU system runs an SMP kernel then we call the local | ||
485 | * interrupt as well. Thus we cannot inline the local irq ... ] | ||
175 | */ | 486 | */ |
176 | void ack_bad_irq(unsigned int irq) | 487 | void smp_apic_timer_interrupt(struct pt_regs *regs) |
177 | { | 488 | { |
178 | printk("unexpected IRQ trap at vector %02x\n", irq); | 489 | struct pt_regs *old_regs = set_irq_regs(regs); |
490 | |||
179 | /* | 491 | /* |
180 | * Currently unexpected vectors happen only on SMP and APIC. | 492 | * NOTE! We'd better ACK the irq immediately, |
181 | * We _must_ ack these because every local APIC has only N | 493 | * because timer handling can be slow. |
182 | * irq slots per priority level, and a 'hanging, unacked' IRQ | ||
183 | * holds up an irq slot - in excessive cases (when multiple | ||
184 | * unexpected vectors occur) that might lock up the APIC | ||
185 | * completely. | ||
186 | * But don't ack when the APIC is disabled. -AK | ||
187 | */ | 494 | */ |
188 | if (!disable_apic) | 495 | ack_APIC_irq(); |
189 | ack_APIC_irq(); | 496 | /* |
497 | * update_process_times() expects us to have done irq_enter(). | ||
498 | * Besides, if we don't timer interrupts ignore the global | ||
499 | * interrupt lock, which is the WrongThing (tm) to do. | ||
500 | */ | ||
501 | exit_idle(); | ||
502 | irq_enter(); | ||
503 | local_apic_timer_interrupt(); | ||
504 | irq_exit(); | ||
505 | set_irq_regs(old_regs); | ||
506 | } | ||
507 | |||
508 | int setup_profiling_timer(unsigned int multiplier) | ||
509 | { | ||
510 | return -EINVAL; | ||
190 | } | 511 | } |
191 | 512 | ||
513 | |||
514 | /* | ||
515 | * Local APIC start and shutdown | ||
516 | */ | ||
517 | |||
518 | /** | ||
519 | * clear_local_APIC - shutdown the local APIC | ||
520 | * | ||
521 | * This is called, when a CPU is disabled and before rebooting, so the state of | ||
522 | * the local APIC has no dangling leftovers. Also used to cleanout any BIOS | ||
523 | * leftovers during boot. | ||
524 | */ | ||
192 | void clear_local_APIC(void) | 525 | void clear_local_APIC(void) |
193 | { | 526 | { |
194 | int maxlvt; | 527 | int maxlvt = lapic_get_maxlvt(); |
195 | unsigned int v; | 528 | u32 v; |
196 | 529 | ||
197 | maxlvt = get_maxlvt(); | 530 | /* APIC hasn't been mapped yet */ |
531 | if (!apic_phys) | ||
532 | return; | ||
198 | 533 | ||
534 | maxlvt = lapic_get_maxlvt(); | ||
199 | /* | 535 | /* |
200 | * Masking an LVT entry can trigger a local APIC error | 536 | * Masking an LVT entry can trigger a local APIC error |
201 | * if the vector is zero. Mask LVTERR first to prevent this. | 537 | * if the vector is zero. Mask LVTERR first to prevent this. |
@@ -233,45 +569,9 @@ void clear_local_APIC(void) | |||
233 | apic_read(APIC_ESR); | 569 | apic_read(APIC_ESR); |
234 | } | 570 | } |
235 | 571 | ||
236 | void disconnect_bsp_APIC(int virt_wire_setup) | 572 | /** |
237 | { | 573 | * disable_local_APIC - clear and disable the local APIC |
238 | /* Go back to Virtual Wire compatibility mode */ | 574 | */ |
239 | unsigned long value; | ||
240 | |||
241 | /* For the spurious interrupt use vector F, and enable it */ | ||
242 | value = apic_read(APIC_SPIV); | ||
243 | value &= ~APIC_VECTOR_MASK; | ||
244 | value |= APIC_SPIV_APIC_ENABLED; | ||
245 | value |= 0xf; | ||
246 | apic_write(APIC_SPIV, value); | ||
247 | |||
248 | if (!virt_wire_setup) { | ||
249 | /* | ||
250 | * For LVT0 make it edge triggered, active high, | ||
251 | * external and enabled | ||
252 | */ | ||
253 | value = apic_read(APIC_LVT0); | ||
254 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | | ||
255 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | ||
256 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); | ||
257 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | ||
258 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); | ||
259 | apic_write(APIC_LVT0, value); | ||
260 | } else { | ||
261 | /* Disable LVT0 */ | ||
262 | apic_write(APIC_LVT0, APIC_LVT_MASKED); | ||
263 | } | ||
264 | |||
265 | /* For LVT1 make it edge triggered, active high, nmi and enabled */ | ||
266 | value = apic_read(APIC_LVT1); | ||
267 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | | ||
268 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | ||
269 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | ||
270 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | ||
271 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); | ||
272 | apic_write(APIC_LVT1, value); | ||
273 | } | ||
274 | |||
275 | void disable_local_APIC(void) | 575 | void disable_local_APIC(void) |
276 | { | 576 | { |
277 | unsigned int value; | 577 | unsigned int value; |
@@ -333,7 +633,7 @@ int __init verify_local_APIC(void) | |||
333 | reg1 = GET_APIC_VERSION(reg0); | 633 | reg1 = GET_APIC_VERSION(reg0); |
334 | if (reg1 == 0x00 || reg1 == 0xff) | 634 | if (reg1 == 0x00 || reg1 == 0xff) |
335 | return 0; | 635 | return 0; |
336 | reg1 = get_maxlvt(); | 636 | reg1 = lapic_get_maxlvt(); |
337 | if (reg1 < 0x02 || reg1 == 0xff) | 637 | if (reg1 < 0x02 || reg1 == 0xff) |
338 | return 0; | 638 | return 0; |
339 | 639 | ||
@@ -355,18 +655,20 @@ int __init verify_local_APIC(void) | |||
355 | * compatibility mode, but most boxes are anymore. | 655 | * compatibility mode, but most boxes are anymore. |
356 | */ | 656 | */ |
357 | reg0 = apic_read(APIC_LVT0); | 657 | reg0 = apic_read(APIC_LVT0); |
358 | apic_printk(APIC_DEBUG,"Getting LVT0: %x\n", reg0); | 658 | apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0); |
359 | reg1 = apic_read(APIC_LVT1); | 659 | reg1 = apic_read(APIC_LVT1); |
360 | apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1); | 660 | apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1); |
361 | 661 | ||
362 | return 1; | 662 | return 1; |
363 | } | 663 | } |
364 | 664 | ||
665 | /** | ||
666 | * sync_Arb_IDs - synchronize APIC bus arbitration IDs | ||
667 | */ | ||
365 | void __init sync_Arb_IDs(void) | 668 | void __init sync_Arb_IDs(void) |
366 | { | 669 | { |
367 | /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ | 670 | /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ |
368 | unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); | 671 | if (modern_apic()) |
369 | if (ver >= 0x14) /* P4 or higher */ | ||
370 | return; | 672 | return; |
371 | 673 | ||
372 | /* | 674 | /* |
@@ -418,9 +720,12 @@ void __init init_bsp_APIC(void) | |||
418 | apic_write(APIC_LVT1, value); | 720 | apic_write(APIC_LVT1, value); |
419 | } | 721 | } |
420 | 722 | ||
421 | void __cpuinit setup_local_APIC (void) | 723 | /** |
724 | * setup_local_APIC - setup the local APIC | ||
725 | */ | ||
726 | void __cpuinit setup_local_APIC(void) | ||
422 | { | 727 | { |
423 | unsigned int value, maxlvt; | 728 | unsigned int value; |
424 | int i, j; | 729 | int i, j; |
425 | 730 | ||
426 | value = apic_read(APIC_LVR); | 731 | value = apic_read(APIC_LVR); |
@@ -516,30 +821,217 @@ void __cpuinit setup_local_APIC (void) | |||
516 | else | 821 | else |
517 | value = APIC_DM_NMI | APIC_LVT_MASKED; | 822 | value = APIC_DM_NMI | APIC_LVT_MASKED; |
518 | apic_write(APIC_LVT1, value); | 823 | apic_write(APIC_LVT1, value); |
824 | } | ||
519 | 825 | ||
520 | { | 826 | void __cpuinit lapic_setup_esr(void) |
521 | unsigned oldvalue; | 827 | { |
522 | maxlvt = get_maxlvt(); | 828 | unsigned maxlvt = lapic_get_maxlvt(); |
523 | oldvalue = apic_read(APIC_ESR); | 829 | |
524 | value = ERROR_APIC_VECTOR; // enables sending errors | 830 | apic_write(APIC_LVTERR, ERROR_APIC_VECTOR); |
525 | apic_write(APIC_LVTERR, value); | 831 | /* |
526 | /* | 832 | * spec says clear errors after enabling vector. |
527 | * spec says clear errors after enabling vector. | 833 | */ |
528 | */ | 834 | if (maxlvt > 3) |
529 | if (maxlvt > 3) | 835 | apic_write(APIC_ESR, 0); |
530 | apic_write(APIC_ESR, 0); | 836 | } |
531 | value = apic_read(APIC_ESR); | ||
532 | if (value != oldvalue) | ||
533 | apic_printk(APIC_VERBOSE, | ||
534 | "ESR value after enabling vector: %08x, after %08x\n", | ||
535 | oldvalue, value); | ||
536 | } | ||
537 | 837 | ||
838 | void __cpuinit end_local_APIC_setup(void) | ||
839 | { | ||
840 | lapic_setup_esr(); | ||
538 | nmi_watchdog_default(); | 841 | nmi_watchdog_default(); |
539 | setup_apic_nmi_watchdog(NULL); | 842 | setup_apic_nmi_watchdog(NULL); |
540 | apic_pm_activate(); | 843 | apic_pm_activate(); |
541 | } | 844 | } |
542 | 845 | ||
846 | /* | ||
847 | * Detect and enable local APICs on non-SMP boards. | ||
848 | * Original code written by Keir Fraser. | ||
849 | * On AMD64 we trust the BIOS - if it says no APIC it is likely | ||
850 | * not correctly set up (usually the APIC timer won't work etc.) | ||
851 | */ | ||
852 | static int __init detect_init_APIC(void) | ||
853 | { | ||
854 | if (!cpu_has_apic) { | ||
855 | printk(KERN_INFO "No local APIC present\n"); | ||
856 | return -1; | ||
857 | } | ||
858 | |||
859 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | ||
860 | boot_cpu_id = 0; | ||
861 | return 0; | ||
862 | } | ||
863 | |||
864 | /** | ||
865 | * init_apic_mappings - initialize APIC mappings | ||
866 | */ | ||
867 | void __init init_apic_mappings(void) | ||
868 | { | ||
869 | /* | ||
870 | * If no local APIC can be found then set up a fake all | ||
871 | * zeroes page to simulate the local APIC and another | ||
872 | * one for the IO-APIC. | ||
873 | */ | ||
874 | if (!smp_found_config && detect_init_APIC()) { | ||
875 | apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); | ||
876 | apic_phys = __pa(apic_phys); | ||
877 | } else | ||
878 | apic_phys = mp_lapic_addr; | ||
879 | |||
880 | set_fixmap_nocache(FIX_APIC_BASE, apic_phys); | ||
881 | apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", | ||
882 | APIC_BASE, apic_phys); | ||
883 | |||
884 | /* Put local APIC into the resource map. */ | ||
885 | lapic_resource.start = apic_phys; | ||
886 | lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; | ||
887 | insert_resource(&iomem_resource, &lapic_resource); | ||
888 | |||
889 | /* | ||
890 | * Fetch the APIC ID of the BSP in case we have a | ||
891 | * default configuration (or the MP table is broken). | ||
892 | */ | ||
893 | boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); | ||
894 | } | ||
895 | |||
896 | /* | ||
897 | * This initializes the IO-APIC and APIC hardware if this is | ||
898 | * a UP kernel. | ||
899 | */ | ||
900 | int __init APIC_init_uniprocessor(void) | ||
901 | { | ||
902 | if (disable_apic) { | ||
903 | printk(KERN_INFO "Apic disabled\n"); | ||
904 | return -1; | ||
905 | } | ||
906 | if (!cpu_has_apic) { | ||
907 | disable_apic = 1; | ||
908 | printk(KERN_INFO "Apic disabled by BIOS\n"); | ||
909 | return -1; | ||
910 | } | ||
911 | |||
912 | verify_local_APIC(); | ||
913 | |||
914 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); | ||
915 | apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id)); | ||
916 | |||
917 | setup_local_APIC(); | ||
918 | |||
919 | /* | ||
920 | * Now enable IO-APICs, actually call clear_IO_APIC | ||
921 | * We need clear_IO_APIC before enabling vector on BP | ||
922 | */ | ||
923 | if (!skip_ioapic_setup && nr_ioapics) | ||
924 | enable_IO_APIC(); | ||
925 | |||
926 | end_local_APIC_setup(); | ||
927 | |||
928 | if (smp_found_config && !skip_ioapic_setup && nr_ioapics) | ||
929 | setup_IO_APIC(); | ||
930 | else | ||
931 | nr_ioapics = 0; | ||
932 | setup_boot_APIC_clock(); | ||
933 | check_nmi_watchdog(); | ||
934 | return 0; | ||
935 | } | ||
936 | |||
937 | /* | ||
938 | * Local APIC interrupts | ||
939 | */ | ||
940 | |||
941 | /* | ||
942 | * This interrupt should _never_ happen with our APIC/SMP architecture | ||
943 | */ | ||
944 | asmlinkage void smp_spurious_interrupt(void) | ||
945 | { | ||
946 | unsigned int v; | ||
947 | exit_idle(); | ||
948 | irq_enter(); | ||
949 | /* | ||
950 | * Check if this really is a spurious interrupt and ACK it | ||
951 | * if it is a vectored one. Just in case... | ||
952 | * Spurious interrupts should not be ACKed. | ||
953 | */ | ||
954 | v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); | ||
955 | if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) | ||
956 | ack_APIC_irq(); | ||
957 | |||
958 | add_pda(irq_spurious_count, 1); | ||
959 | irq_exit(); | ||
960 | } | ||
961 | |||
962 | /* | ||
963 | * This interrupt should never happen with our APIC/SMP architecture | ||
964 | */ | ||
965 | asmlinkage void smp_error_interrupt(void) | ||
966 | { | ||
967 | unsigned int v, v1; | ||
968 | |||
969 | exit_idle(); | ||
970 | irq_enter(); | ||
971 | /* First tickle the hardware, only then report what went on. -- REW */ | ||
972 | v = apic_read(APIC_ESR); | ||
973 | apic_write(APIC_ESR, 0); | ||
974 | v1 = apic_read(APIC_ESR); | ||
975 | ack_APIC_irq(); | ||
976 | atomic_inc(&irq_err_count); | ||
977 | |||
978 | /* Here is what the APIC error bits mean: | ||
979 | 0: Send CS error | ||
980 | 1: Receive CS error | ||
981 | 2: Send accept error | ||
982 | 3: Receive accept error | ||
983 | 4: Reserved | ||
984 | 5: Send illegal vector | ||
985 | 6: Received illegal vector | ||
986 | 7: Illegal register address | ||
987 | */ | ||
988 | printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", | ||
989 | smp_processor_id(), v , v1); | ||
990 | irq_exit(); | ||
991 | } | ||
992 | |||
993 | void disconnect_bsp_APIC(int virt_wire_setup) | ||
994 | { | ||
995 | /* Go back to Virtual Wire compatibility mode */ | ||
996 | unsigned long value; | ||
997 | |||
998 | /* For the spurious interrupt use vector F, and enable it */ | ||
999 | value = apic_read(APIC_SPIV); | ||
1000 | value &= ~APIC_VECTOR_MASK; | ||
1001 | value |= APIC_SPIV_APIC_ENABLED; | ||
1002 | value |= 0xf; | ||
1003 | apic_write(APIC_SPIV, value); | ||
1004 | |||
1005 | if (!virt_wire_setup) { | ||
1006 | /* | ||
1007 | * For LVT0 make it edge triggered, active high, | ||
1008 | * external and enabled | ||
1009 | */ | ||
1010 | value = apic_read(APIC_LVT0); | ||
1011 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | | ||
1012 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | ||
1013 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | ||
1014 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | ||
1015 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); | ||
1016 | apic_write(APIC_LVT0, value); | ||
1017 | } else { | ||
1018 | /* Disable LVT0 */ | ||
1019 | apic_write(APIC_LVT0, APIC_LVT_MASKED); | ||
1020 | } | ||
1021 | |||
1022 | /* For LVT1 make it edge triggered, active high, nmi and enabled */ | ||
1023 | value = apic_read(APIC_LVT1); | ||
1024 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | | ||
1025 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | ||
1026 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | ||
1027 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | ||
1028 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); | ||
1029 | apic_write(APIC_LVT1, value); | ||
1030 | } | ||
1031 | |||
1032 | /* | ||
1033 | * Power management | ||
1034 | */ | ||
543 | #ifdef CONFIG_PM | 1035 | #ifdef CONFIG_PM |
544 | 1036 | ||
545 | static struct { | 1037 | static struct { |
@@ -571,7 +1063,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) | |||
571 | if (!apic_pm_state.active) | 1063 | if (!apic_pm_state.active) |
572 | return 0; | 1064 | return 0; |
573 | 1065 | ||
574 | maxlvt = get_maxlvt(); | 1066 | maxlvt = lapic_get_maxlvt(); |
575 | 1067 | ||
576 | apic_pm_state.apic_id = apic_read(APIC_ID); | 1068 | apic_pm_state.apic_id = apic_read(APIC_ID); |
577 | apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); | 1069 | apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); |
@@ -605,7 +1097,7 @@ static int lapic_resume(struct sys_device *dev) | |||
605 | if (!apic_pm_state.active) | 1097 | if (!apic_pm_state.active) |
606 | return 0; | 1098 | return 0; |
607 | 1099 | ||
608 | maxlvt = get_maxlvt(); | 1100 | maxlvt = lapic_get_maxlvt(); |
609 | 1101 | ||
610 | local_irq_save(flags); | 1102 | local_irq_save(flags); |
611 | rdmsr(MSR_IA32_APICBASE, l, h); | 1103 | rdmsr(MSR_IA32_APICBASE, l, h); |
@@ -639,14 +1131,14 @@ static int lapic_resume(struct sys_device *dev) | |||
639 | } | 1131 | } |
640 | 1132 | ||
641 | static struct sysdev_class lapic_sysclass = { | 1133 | static struct sysdev_class lapic_sysclass = { |
642 | set_kset_name("lapic"), | 1134 | .name = "lapic", |
643 | .resume = lapic_resume, | 1135 | .resume = lapic_resume, |
644 | .suspend = lapic_suspend, | 1136 | .suspend = lapic_suspend, |
645 | }; | 1137 | }; |
646 | 1138 | ||
647 | static struct sys_device device_lapic = { | 1139 | static struct sys_device device_lapic = { |
648 | .id = 0, | 1140 | .id = 0, |
649 | .cls = &lapic_sysclass, | 1141 | .cls = &lapic_sysclass, |
650 | }; | 1142 | }; |
651 | 1143 | ||
652 | static void __cpuinit apic_pm_activate(void) | 1144 | static void __cpuinit apic_pm_activate(void) |
@@ -657,9 +1149,11 @@ static void __cpuinit apic_pm_activate(void) | |||
657 | static int __init init_lapic_sysfs(void) | 1149 | static int __init init_lapic_sysfs(void) |
658 | { | 1150 | { |
659 | int error; | 1151 | int error; |
1152 | |||
660 | if (!cpu_has_apic) | 1153 | if (!cpu_has_apic) |
661 | return 0; | 1154 | return 0; |
662 | /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ | 1155 | /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ |
1156 | |||
663 | error = sysdev_class_register(&lapic_sysclass); | 1157 | error = sysdev_class_register(&lapic_sysclass); |
664 | if (!error) | 1158 | if (!error) |
665 | error = sysdev_register(&device_lapic); | 1159 | error = sysdev_register(&device_lapic); |
@@ -673,423 +1167,6 @@ static void apic_pm_activate(void) { } | |||
673 | 1167 | ||
674 | #endif /* CONFIG_PM */ | 1168 | #endif /* CONFIG_PM */ |
675 | 1169 | ||
676 | static int __init apic_set_verbosity(char *str) | ||
677 | { | ||
678 | if (str == NULL) { | ||
679 | skip_ioapic_setup = 0; | ||
680 | ioapic_force = 1; | ||
681 | return 0; | ||
682 | } | ||
683 | if (strcmp("debug", str) == 0) | ||
684 | apic_verbosity = APIC_DEBUG; | ||
685 | else if (strcmp("verbose", str) == 0) | ||
686 | apic_verbosity = APIC_VERBOSE; | ||
687 | else { | ||
688 | printk(KERN_WARNING "APIC Verbosity level %s not recognised" | ||
689 | " use apic=verbose or apic=debug\n", str); | ||
690 | return -EINVAL; | ||
691 | } | ||
692 | |||
693 | return 0; | ||
694 | } | ||
695 | early_param("apic", apic_set_verbosity); | ||
696 | |||
697 | /* | ||
698 | * Detect and enable local APICs on non-SMP boards. | ||
699 | * Original code written by Keir Fraser. | ||
700 | * On AMD64 we trust the BIOS - if it says no APIC it is likely | ||
701 | * not correctly set up (usually the APIC timer won't work etc.) | ||
702 | */ | ||
703 | |||
704 | static int __init detect_init_APIC (void) | ||
705 | { | ||
706 | if (!cpu_has_apic) { | ||
707 | printk(KERN_INFO "No local APIC present\n"); | ||
708 | return -1; | ||
709 | } | ||
710 | |||
711 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | ||
712 | boot_cpu_id = 0; | ||
713 | return 0; | ||
714 | } | ||
715 | |||
716 | #ifdef CONFIG_X86_IO_APIC | ||
717 | static struct resource * __init ioapic_setup_resources(void) | ||
718 | { | ||
719 | #define IOAPIC_RESOURCE_NAME_SIZE 11 | ||
720 | unsigned long n; | ||
721 | struct resource *res; | ||
722 | char *mem; | ||
723 | int i; | ||
724 | |||
725 | if (nr_ioapics <= 0) | ||
726 | return NULL; | ||
727 | |||
728 | n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); | ||
729 | n *= nr_ioapics; | ||
730 | |||
731 | mem = alloc_bootmem(n); | ||
732 | res = (void *)mem; | ||
733 | |||
734 | if (mem != NULL) { | ||
735 | memset(mem, 0, n); | ||
736 | mem += sizeof(struct resource) * nr_ioapics; | ||
737 | |||
738 | for (i = 0; i < nr_ioapics; i++) { | ||
739 | res[i].name = mem; | ||
740 | res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; | ||
741 | sprintf(mem, "IOAPIC %u", i); | ||
742 | mem += IOAPIC_RESOURCE_NAME_SIZE; | ||
743 | } | ||
744 | } | ||
745 | |||
746 | ioapic_resources = res; | ||
747 | |||
748 | return res; | ||
749 | } | ||
750 | |||
751 | static int __init ioapic_insert_resources(void) | ||
752 | { | ||
753 | int i; | ||
754 | struct resource *r = ioapic_resources; | ||
755 | |||
756 | if (!r) { | ||
757 | printk("IO APIC resources could be not be allocated.\n"); | ||
758 | return -1; | ||
759 | } | ||
760 | |||
761 | for (i = 0; i < nr_ioapics; i++) { | ||
762 | insert_resource(&iomem_resource, r); | ||
763 | r++; | ||
764 | } | ||
765 | |||
766 | return 0; | ||
767 | } | ||
768 | |||
769 | /* Insert the IO APIC resources after PCI initialization has occured to handle | ||
770 | * IO APICS that are mapped in on a BAR in PCI space. */ | ||
771 | late_initcall(ioapic_insert_resources); | ||
772 | #endif | ||
773 | |||
774 | void __init init_apic_mappings(void) | ||
775 | { | ||
776 | unsigned long apic_phys; | ||
777 | |||
778 | /* | ||
779 | * If no local APIC can be found then set up a fake all | ||
780 | * zeroes page to simulate the local APIC and another | ||
781 | * one for the IO-APIC. | ||
782 | */ | ||
783 | if (!smp_found_config && detect_init_APIC()) { | ||
784 | apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); | ||
785 | apic_phys = __pa(apic_phys); | ||
786 | } else | ||
787 | apic_phys = mp_lapic_addr; | ||
788 | |||
789 | set_fixmap_nocache(FIX_APIC_BASE, apic_phys); | ||
790 | apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", | ||
791 | APIC_BASE, apic_phys); | ||
792 | |||
793 | /* Put local APIC into the resource map. */ | ||
794 | lapic_resource.start = apic_phys; | ||
795 | lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; | ||
796 | insert_resource(&iomem_resource, &lapic_resource); | ||
797 | |||
798 | /* | ||
799 | * Fetch the APIC ID of the BSP in case we have a | ||
800 | * default configuration (or the MP table is broken). | ||
801 | */ | ||
802 | boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); | ||
803 | |||
804 | { | ||
805 | unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; | ||
806 | int i; | ||
807 | struct resource *ioapic_res; | ||
808 | |||
809 | ioapic_res = ioapic_setup_resources(); | ||
810 | for (i = 0; i < nr_ioapics; i++) { | ||
811 | if (smp_found_config) { | ||
812 | ioapic_phys = mp_ioapics[i].mpc_apicaddr; | ||
813 | } else { | ||
814 | ioapic_phys = (unsigned long) | ||
815 | alloc_bootmem_pages(PAGE_SIZE); | ||
816 | ioapic_phys = __pa(ioapic_phys); | ||
817 | } | ||
818 | set_fixmap_nocache(idx, ioapic_phys); | ||
819 | apic_printk(APIC_VERBOSE, | ||
820 | "mapped IOAPIC to %016lx (%016lx)\n", | ||
821 | __fix_to_virt(idx), ioapic_phys); | ||
822 | idx++; | ||
823 | |||
824 | if (ioapic_res != NULL) { | ||
825 | ioapic_res->start = ioapic_phys; | ||
826 | ioapic_res->end = ioapic_phys + (4 * 1024) - 1; | ||
827 | ioapic_res++; | ||
828 | } | ||
829 | } | ||
830 | } | ||
831 | } | ||
832 | |||
833 | /* | ||
834 | * This function sets up the local APIC timer, with a timeout of | ||
835 | * 'clocks' APIC bus clock. During calibration we actually call | ||
836 | * this function twice on the boot CPU, once with a bogus timeout | ||
837 | * value, second time for real. The other (noncalibrating) CPUs | ||
838 | * call this function only once, with the real, calibrated value. | ||
839 | * | ||
840 | * We do reads before writes even if unnecessary, to get around the | ||
841 | * P5 APIC double write bug. | ||
842 | */ | ||
843 | |||
844 | static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | ||
845 | { | ||
846 | unsigned int lvtt_value, tmp_value; | ||
847 | |||
848 | lvtt_value = LOCAL_TIMER_VECTOR; | ||
849 | if (!oneshot) | ||
850 | lvtt_value |= APIC_LVT_TIMER_PERIODIC; | ||
851 | if (!irqen) | ||
852 | lvtt_value |= APIC_LVT_MASKED; | ||
853 | |||
854 | apic_write(APIC_LVTT, lvtt_value); | ||
855 | |||
856 | /* | ||
857 | * Divide PICLK by 16 | ||
858 | */ | ||
859 | tmp_value = apic_read(APIC_TDCR); | ||
860 | apic_write(APIC_TDCR, (tmp_value | ||
861 | & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | ||
862 | | APIC_TDR_DIV_16); | ||
863 | |||
864 | if (!oneshot) | ||
865 | apic_write(APIC_TMICT, clocks); | ||
866 | } | ||
867 | |||
868 | static void setup_APIC_timer(void) | ||
869 | { | ||
870 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); | ||
871 | |||
872 | memcpy(levt, &lapic_clockevent, sizeof(*levt)); | ||
873 | levt->cpumask = cpumask_of_cpu(smp_processor_id()); | ||
874 | |||
875 | clockevents_register_device(levt); | ||
876 | } | ||
877 | |||
878 | /* | ||
879 | * In this function we calibrate APIC bus clocks to the external | ||
880 | * timer. Unfortunately we cannot use jiffies and the timer irq | ||
881 | * to calibrate, since some later bootup code depends on getting | ||
882 | * the first irq? Ugh. | ||
883 | * | ||
884 | * We want to do the calibration only once since we | ||
885 | * want to have local timer irqs syncron. CPUs connected | ||
886 | * by the same APIC bus have the very same bus frequency. | ||
887 | * And we want to have irqs off anyways, no accidental | ||
888 | * APIC irq that way. | ||
889 | */ | ||
890 | |||
891 | #define TICK_COUNT 100000000 | ||
892 | |||
893 | static void __init calibrate_APIC_clock(void) | ||
894 | { | ||
895 | unsigned apic, apic_start; | ||
896 | unsigned long tsc, tsc_start; | ||
897 | int result; | ||
898 | |||
899 | local_irq_disable(); | ||
900 | |||
901 | /* | ||
902 | * Put whatever arbitrary (but long enough) timeout | ||
903 | * value into the APIC clock, we just want to get the | ||
904 | * counter running for calibration. | ||
905 | * | ||
906 | * No interrupt enable ! | ||
907 | */ | ||
908 | __setup_APIC_LVTT(250000000, 0, 0); | ||
909 | |||
910 | apic_start = apic_read(APIC_TMCCT); | ||
911 | #ifdef CONFIG_X86_PM_TIMER | ||
912 | if (apic_calibrate_pmtmr && pmtmr_ioport) { | ||
913 | pmtimer_wait(5000); /* 5ms wait */ | ||
914 | apic = apic_read(APIC_TMCCT); | ||
915 | result = (apic_start - apic) * 1000L / 5; | ||
916 | } else | ||
917 | #endif | ||
918 | { | ||
919 | rdtscll(tsc_start); | ||
920 | |||
921 | do { | ||
922 | apic = apic_read(APIC_TMCCT); | ||
923 | rdtscll(tsc); | ||
924 | } while ((tsc - tsc_start) < TICK_COUNT && | ||
925 | (apic_start - apic) < TICK_COUNT); | ||
926 | |||
927 | result = (apic_start - apic) * 1000L * tsc_khz / | ||
928 | (tsc - tsc_start); | ||
929 | } | ||
930 | |||
931 | local_irq_enable(); | ||
932 | |||
933 | printk(KERN_DEBUG "APIC timer calibration result %d\n", result); | ||
934 | |||
935 | printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", | ||
936 | result / 1000 / 1000, result / 1000 % 1000); | ||
937 | |||
938 | /* Calculate the scaled math multiplication factor */ | ||
939 | lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32); | ||
940 | lapic_clockevent.max_delta_ns = | ||
941 | clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); | ||
942 | lapic_clockevent.min_delta_ns = | ||
943 | clockevent_delta2ns(0xF, &lapic_clockevent); | ||
944 | |||
945 | calibration_result = result / HZ; | ||
946 | } | ||
947 | |||
948 | void __init setup_boot_APIC_clock (void) | ||
949 | { | ||
950 | /* | ||
951 | * The local apic timer can be disabled via the kernel commandline. | ||
952 | * Register the lapic timer as a dummy clock event source on SMP | ||
953 | * systems, so the broadcast mechanism is used. On UP systems simply | ||
954 | * ignore it. | ||
955 | */ | ||
956 | if (disable_apic_timer) { | ||
957 | printk(KERN_INFO "Disabling APIC timer\n"); | ||
958 | /* No broadcast on UP ! */ | ||
959 | if (num_possible_cpus() > 1) | ||
960 | setup_APIC_timer(); | ||
961 | return; | ||
962 | } | ||
963 | |||
964 | printk(KERN_INFO "Using local APIC timer interrupts.\n"); | ||
965 | calibrate_APIC_clock(); | ||
966 | |||
967 | /* | ||
968 | * If nmi_watchdog is set to IO_APIC, we need the | ||
969 | * PIT/HPET going. Otherwise register lapic as a dummy | ||
970 | * device. | ||
971 | */ | ||
972 | if (nmi_watchdog != NMI_IO_APIC) | ||
973 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; | ||
974 | else | ||
975 | printk(KERN_WARNING "APIC timer registered as dummy," | ||
976 | " due to nmi_watchdog=1!\n"); | ||
977 | |||
978 | setup_APIC_timer(); | ||
979 | } | ||
980 | |||
981 | /* | ||
982 | * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the | ||
983 | * C1E flag only in the secondary CPU, so when we detect the wreckage | ||
984 | * we already have enabled the boot CPU local apic timer. Check, if | ||
985 | * disable_apic_timer is set and the DUMMY flag is cleared. If yes, | ||
986 | * set the DUMMY flag again and force the broadcast mode in the | ||
987 | * clockevents layer. | ||
988 | */ | ||
989 | void __cpuinit check_boot_apic_timer_broadcast(void) | ||
990 | { | ||
991 | if (!disable_apic_timer || | ||
992 | (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY)) | ||
993 | return; | ||
994 | |||
995 | printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n"); | ||
996 | lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY; | ||
997 | |||
998 | local_irq_enable(); | ||
999 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &boot_cpu_id); | ||
1000 | local_irq_disable(); | ||
1001 | } | ||
1002 | |||
1003 | void __cpuinit setup_secondary_APIC_clock(void) | ||
1004 | { | ||
1005 | check_boot_apic_timer_broadcast(); | ||
1006 | setup_APIC_timer(); | ||
1007 | } | ||
1008 | |||
1009 | int setup_profiling_timer(unsigned int multiplier) | ||
1010 | { | ||
1011 | return -EINVAL; | ||
1012 | } | ||
1013 | |||
1014 | void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector, | ||
1015 | unsigned char msg_type, unsigned char mask) | ||
1016 | { | ||
1017 | unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE; | ||
1018 | unsigned int v = (mask << 16) | (msg_type << 8) | vector; | ||
1019 | apic_write(reg, v); | ||
1020 | } | ||
1021 | |||
1022 | /* | ||
1023 | * Local timer interrupt handler. It does both profiling and | ||
1024 | * process statistics/rescheduling. | ||
1025 | * | ||
1026 | * We do profiling in every local tick, statistics/rescheduling | ||
1027 | * happen only every 'profiling multiplier' ticks. The default | ||
1028 | * multiplier is 1 and it can be changed by writing the new multiplier | ||
1029 | * value into /proc/profile. | ||
1030 | */ | ||
1031 | |||
1032 | void smp_local_timer_interrupt(void) | ||
1033 | { | ||
1034 | int cpu = smp_processor_id(); | ||
1035 | struct clock_event_device *evt = &per_cpu(lapic_events, cpu); | ||
1036 | |||
1037 | /* | ||
1038 | * Normally we should not be here till LAPIC has been initialized but | ||
1039 | * in some cases like kdump, its possible that there is a pending LAPIC | ||
1040 | * timer interrupt from previous kernel's context and is delivered in | ||
1041 | * new kernel the moment interrupts are enabled. | ||
1042 | * | ||
1043 | * Interrupts are enabled early and LAPIC is setup much later, hence | ||
1044 | * its possible that when we get here evt->event_handler is NULL. | ||
1045 | * Check for event_handler being NULL and discard the interrupt as | ||
1046 | * spurious. | ||
1047 | */ | ||
1048 | if (!evt->event_handler) { | ||
1049 | printk(KERN_WARNING | ||
1050 | "Spurious LAPIC timer interrupt on cpu %d\n", cpu); | ||
1051 | /* Switch it off */ | ||
1052 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); | ||
1053 | return; | ||
1054 | } | ||
1055 | |||
1056 | /* | ||
1057 | * the NMI deadlock-detector uses this. | ||
1058 | */ | ||
1059 | add_pda(apic_timer_irqs, 1); | ||
1060 | |||
1061 | evt->event_handler(evt); | ||
1062 | } | ||
1063 | |||
1064 | /* | ||
1065 | * Local APIC timer interrupt. This is the most natural way for doing | ||
1066 | * local interrupts, but local timer interrupts can be emulated by | ||
1067 | * broadcast interrupts too. [in case the hw doesn't support APIC timers] | ||
1068 | * | ||
1069 | * [ if a single-CPU system runs an SMP kernel then we call the local | ||
1070 | * interrupt as well. Thus we cannot inline the local irq ... ] | ||
1071 | */ | ||
1072 | void smp_apic_timer_interrupt(struct pt_regs *regs) | ||
1073 | { | ||
1074 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
1075 | |||
1076 | /* | ||
1077 | * NOTE! We'd better ACK the irq immediately, | ||
1078 | * because timer handling can be slow. | ||
1079 | */ | ||
1080 | ack_APIC_irq(); | ||
1081 | /* | ||
1082 | * update_process_times() expects us to have done irq_enter(). | ||
1083 | * Besides, if we don't timer interrupts ignore the global | ||
1084 | * interrupt lock, which is the WrongThing (tm) to do. | ||
1085 | */ | ||
1086 | exit_idle(); | ||
1087 | irq_enter(); | ||
1088 | smp_local_timer_interrupt(); | ||
1089 | irq_exit(); | ||
1090 | set_irq_regs(old_regs); | ||
1091 | } | ||
1092 | |||
1093 | /* | 1170 | /* |
1094 | * apic_is_clustered_box() -- Check if we can expect good TSC | 1171 | * apic_is_clustered_box() -- Check if we can expect good TSC |
1095 | * | 1172 | * |
@@ -1103,21 +1180,34 @@ __cpuinit int apic_is_clustered_box(void) | |||
1103 | { | 1180 | { |
1104 | int i, clusters, zeros; | 1181 | int i, clusters, zeros; |
1105 | unsigned id; | 1182 | unsigned id; |
1183 | u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; | ||
1106 | DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); | 1184 | DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); |
1107 | 1185 | ||
1108 | bitmap_zero(clustermap, NUM_APIC_CLUSTERS); | 1186 | bitmap_zero(clustermap, NUM_APIC_CLUSTERS); |
1109 | 1187 | ||
1110 | for (i = 0; i < NR_CPUS; i++) { | 1188 | for (i = 0; i < NR_CPUS; i++) { |
1111 | id = bios_cpu_apicid[i]; | 1189 | /* are we being called early in kernel startup? */ |
1190 | if (bios_cpu_apicid) { | ||
1191 | id = bios_cpu_apicid[i]; | ||
1192 | } | ||
1193 | else if (i < nr_cpu_ids) { | ||
1194 | if (cpu_present(i)) | ||
1195 | id = per_cpu(x86_bios_cpu_apicid, i); | ||
1196 | else | ||
1197 | continue; | ||
1198 | } | ||
1199 | else | ||
1200 | break; | ||
1201 | |||
1112 | if (id != BAD_APICID) | 1202 | if (id != BAD_APICID) |
1113 | __set_bit(APIC_CLUSTERID(id), clustermap); | 1203 | __set_bit(APIC_CLUSTERID(id), clustermap); |
1114 | } | 1204 | } |
1115 | 1205 | ||
1116 | /* Problem: Partially populated chassis may not have CPUs in some of | 1206 | /* Problem: Partially populated chassis may not have CPUs in some of |
1117 | * the APIC clusters they have been allocated. Only present CPUs have | 1207 | * the APIC clusters they have been allocated. Only present CPUs have |
1118 | * bios_cpu_apicid entries, thus causing zeroes in the bitmap. Since | 1208 | * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap. |
1119 | * clusters are allocated sequentially, count zeros only if they are | 1209 | * Since clusters are allocated sequentially, count zeros only if |
1120 | * bounded by ones. | 1210 | * they are bounded by ones. |
1121 | */ | 1211 | */ |
1122 | clusters = 0; | 1212 | clusters = 0; |
1123 | zeros = 0; | 1213 | zeros = 0; |
@@ -1138,96 +1228,33 @@ __cpuinit int apic_is_clustered_box(void) | |||
1138 | } | 1228 | } |
1139 | 1229 | ||
1140 | /* | 1230 | /* |
1141 | * This interrupt should _never_ happen with our APIC/SMP architecture | 1231 | * APIC command line parameters |
1142 | */ | ||
1143 | asmlinkage void smp_spurious_interrupt(void) | ||
1144 | { | ||
1145 | unsigned int v; | ||
1146 | exit_idle(); | ||
1147 | irq_enter(); | ||
1148 | /* | ||
1149 | * Check if this really is a spurious interrupt and ACK it | ||
1150 | * if it is a vectored one. Just in case... | ||
1151 | * Spurious interrupts should not be ACKed. | ||
1152 | */ | ||
1153 | v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); | ||
1154 | if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) | ||
1155 | ack_APIC_irq(); | ||
1156 | |||
1157 | add_pda(irq_spurious_count, 1); | ||
1158 | irq_exit(); | ||
1159 | } | ||
1160 | |||
1161 | /* | ||
1162 | * This interrupt should never happen with our APIC/SMP architecture | ||
1163 | */ | 1232 | */ |
1164 | 1233 | static int __init apic_set_verbosity(char *str) | |
1165 | asmlinkage void smp_error_interrupt(void) | ||
1166 | { | ||
1167 | unsigned int v, v1; | ||
1168 | |||
1169 | exit_idle(); | ||
1170 | irq_enter(); | ||
1171 | /* First tickle the hardware, only then report what went on. -- REW */ | ||
1172 | v = apic_read(APIC_ESR); | ||
1173 | apic_write(APIC_ESR, 0); | ||
1174 | v1 = apic_read(APIC_ESR); | ||
1175 | ack_APIC_irq(); | ||
1176 | atomic_inc(&irq_err_count); | ||
1177 | |||
1178 | /* Here is what the APIC error bits mean: | ||
1179 | 0: Send CS error | ||
1180 | 1: Receive CS error | ||
1181 | 2: Send accept error | ||
1182 | 3: Receive accept error | ||
1183 | 4: Reserved | ||
1184 | 5: Send illegal vector | ||
1185 | 6: Received illegal vector | ||
1186 | 7: Illegal register address | ||
1187 | */ | ||
1188 | printk (KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", | ||
1189 | smp_processor_id(), v , v1); | ||
1190 | irq_exit(); | ||
1191 | } | ||
1192 | |||
1193 | int disable_apic; | ||
1194 | |||
1195 | /* | ||
1196 | * This initializes the IO-APIC and APIC hardware if this is | ||
1197 | * a UP kernel. | ||
1198 | */ | ||
1199 | int __init APIC_init_uniprocessor (void) | ||
1200 | { | 1234 | { |
1201 | if (disable_apic) { | 1235 | if (str == NULL) { |
1202 | printk(KERN_INFO "Apic disabled\n"); | 1236 | skip_ioapic_setup = 0; |
1203 | return -1; | 1237 | ioapic_force = 1; |
1238 | return 0; | ||
1204 | } | 1239 | } |
1205 | if (!cpu_has_apic) { | 1240 | if (strcmp("debug", str) == 0) |
1206 | disable_apic = 1; | 1241 | apic_verbosity = APIC_DEBUG; |
1207 | printk(KERN_INFO "Apic disabled by BIOS\n"); | 1242 | else if (strcmp("verbose", str) == 0) |
1208 | return -1; | 1243 | apic_verbosity = APIC_VERBOSE; |
1244 | else { | ||
1245 | printk(KERN_WARNING "APIC Verbosity level %s not recognised" | ||
1246 | " use apic=verbose or apic=debug\n", str); | ||
1247 | return -EINVAL; | ||
1209 | } | 1248 | } |
1210 | 1249 | ||
1211 | verify_local_APIC(); | ||
1212 | |||
1213 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); | ||
1214 | apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id)); | ||
1215 | |||
1216 | setup_local_APIC(); | ||
1217 | |||
1218 | if (smp_found_config && !skip_ioapic_setup && nr_ioapics) | ||
1219 | setup_IO_APIC(); | ||
1220 | else | ||
1221 | nr_ioapics = 0; | ||
1222 | setup_boot_APIC_clock(); | ||
1223 | check_nmi_watchdog(); | ||
1224 | return 0; | 1250 | return 0; |
1225 | } | 1251 | } |
1252 | early_param("apic", apic_set_verbosity); | ||
1226 | 1253 | ||
1227 | static __init int setup_disableapic(char *str) | 1254 | static __init int setup_disableapic(char *str) |
1228 | { | 1255 | { |
1229 | disable_apic = 1; | 1256 | disable_apic = 1; |
1230 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | 1257 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); |
1231 | return 0; | 1258 | return 0; |
1232 | } | 1259 | } |
1233 | early_param("disableapic", setup_disableapic); | 1260 | early_param("disableapic", setup_disableapic); |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 17089a041028..d4438ef296d8 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -227,6 +227,7 @@ | |||
227 | #include <linux/dmi.h> | 227 | #include <linux/dmi.h> |
228 | #include <linux/suspend.h> | 228 | #include <linux/suspend.h> |
229 | #include <linux/kthread.h> | 229 | #include <linux/kthread.h> |
230 | #include <linux/jiffies.h> | ||
230 | 231 | ||
231 | #include <asm/system.h> | 232 | #include <asm/system.h> |
232 | #include <asm/uaccess.h> | 233 | #include <asm/uaccess.h> |
@@ -235,8 +236,6 @@ | |||
235 | #include <asm/paravirt.h> | 236 | #include <asm/paravirt.h> |
236 | #include <asm/reboot.h> | 237 | #include <asm/reboot.h> |
237 | 238 | ||
238 | #include "io_ports.h" | ||
239 | |||
240 | #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) | 239 | #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) |
241 | extern int (*console_blank_hook)(int); | 240 | extern int (*console_blank_hook)(int); |
242 | #endif | 241 | #endif |
@@ -324,7 +323,7 @@ extern int (*console_blank_hook)(int); | |||
324 | /* | 323 | /* |
325 | * Ignore suspend events for this amount of time after a resume | 324 | * Ignore suspend events for this amount of time after a resume |
326 | */ | 325 | */ |
327 | #define DEFAULT_BOUNCE_INTERVAL (3 * HZ) | 326 | #define DEFAULT_BOUNCE_INTERVAL (3 * HZ) |
328 | 327 | ||
329 | /* | 328 | /* |
330 | * Maximum number of events stored | 329 | * Maximum number of events stored |
@@ -336,7 +335,7 @@ extern int (*console_blank_hook)(int); | |||
336 | */ | 335 | */ |
337 | struct apm_user { | 336 | struct apm_user { |
338 | int magic; | 337 | int magic; |
339 | struct apm_user * next; | 338 | struct apm_user *next; |
340 | unsigned int suser: 1; | 339 | unsigned int suser: 1; |
341 | unsigned int writer: 1; | 340 | unsigned int writer: 1; |
342 | unsigned int reader: 1; | 341 | unsigned int reader: 1; |
@@ -372,44 +371,44 @@ struct apm_user { | |||
372 | static struct { | 371 | static struct { |
373 | unsigned long offset; | 372 | unsigned long offset; |
374 | unsigned short segment; | 373 | unsigned short segment; |
375 | } apm_bios_entry; | 374 | } apm_bios_entry; |
376 | static int clock_slowed; | 375 | static int clock_slowed; |
377 | static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD; | 376 | static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD; |
378 | static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD; | 377 | static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD; |
379 | static int set_pm_idle; | 378 | static int set_pm_idle; |
380 | static int suspends_pending; | 379 | static int suspends_pending; |
381 | static int standbys_pending; | 380 | static int standbys_pending; |
382 | static int ignore_sys_suspend; | 381 | static int ignore_sys_suspend; |
383 | static int ignore_normal_resume; | 382 | static int ignore_normal_resume; |
384 | static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL; | 383 | static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL; |
385 | 384 | ||
386 | static int debug __read_mostly; | 385 | static int debug __read_mostly; |
387 | static int smp __read_mostly; | 386 | static int smp __read_mostly; |
388 | static int apm_disabled = -1; | 387 | static int apm_disabled = -1; |
389 | #ifdef CONFIG_SMP | 388 | #ifdef CONFIG_SMP |
390 | static int power_off; | 389 | static int power_off; |
391 | #else | 390 | #else |
392 | static int power_off = 1; | 391 | static int power_off = 1; |
393 | #endif | 392 | #endif |
394 | #ifdef CONFIG_APM_REAL_MODE_POWER_OFF | 393 | #ifdef CONFIG_APM_REAL_MODE_POWER_OFF |
395 | static int realmode_power_off = 1; | 394 | static int realmode_power_off = 1; |
396 | #else | 395 | #else |
397 | static int realmode_power_off; | 396 | static int realmode_power_off; |
398 | #endif | 397 | #endif |
399 | #ifdef CONFIG_APM_ALLOW_INTS | 398 | #ifdef CONFIG_APM_ALLOW_INTS |
400 | static int allow_ints = 1; | 399 | static int allow_ints = 1; |
401 | #else | 400 | #else |
402 | static int allow_ints; | 401 | static int allow_ints; |
403 | #endif | 402 | #endif |
404 | static int broken_psr; | 403 | static int broken_psr; |
405 | 404 | ||
406 | static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); | 405 | static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); |
407 | static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); | 406 | static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); |
408 | static struct apm_user * user_list; | 407 | static struct apm_user *user_list; |
409 | static DEFINE_SPINLOCK(user_list_lock); | 408 | static DEFINE_SPINLOCK(user_list_lock); |
410 | static const struct desc_struct bad_bios_desc = { 0, 0x00409200 }; | 409 | static const struct desc_struct bad_bios_desc = { { { 0, 0x00409200 } } }; |
411 | 410 | ||
412 | static const char driver_version[] = "1.16ac"; /* no spaces */ | 411 | static const char driver_version[] = "1.16ac"; /* no spaces */ |
413 | 412 | ||
414 | static struct task_struct *kapmd_task; | 413 | static struct task_struct *kapmd_task; |
415 | 414 | ||
@@ -417,7 +416,7 @@ static struct task_struct *kapmd_task; | |||
417 | * APM event names taken from the APM 1.2 specification. These are | 416 | * APM event names taken from the APM 1.2 specification. These are |
418 | * the message codes that the BIOS uses to tell us about events | 417 | * the message codes that the BIOS uses to tell us about events |
419 | */ | 418 | */ |
420 | static const char * const apm_event_name[] = { | 419 | static const char * const apm_event_name[] = { |
421 | "system standby", | 420 | "system standby", |
422 | "system suspend", | 421 | "system suspend", |
423 | "normal resume", | 422 | "normal resume", |
@@ -435,14 +434,14 @@ static const char * const apm_event_name[] = { | |||
435 | 434 | ||
436 | typedef struct lookup_t { | 435 | typedef struct lookup_t { |
437 | int key; | 436 | int key; |
438 | char * msg; | 437 | char *msg; |
439 | } lookup_t; | 438 | } lookup_t; |
440 | 439 | ||
441 | /* | 440 | /* |
442 | * The BIOS returns a set of standard error codes in AX when the | 441 | * The BIOS returns a set of standard error codes in AX when the |
443 | * carry flag is set. | 442 | * carry flag is set. |
444 | */ | 443 | */ |
445 | 444 | ||
446 | static const lookup_t error_table[] = { | 445 | static const lookup_t error_table[] = { |
447 | /* N/A { APM_SUCCESS, "Operation succeeded" }, */ | 446 | /* N/A { APM_SUCCESS, "Operation succeeded" }, */ |
448 | { APM_DISABLED, "Power management disabled" }, | 447 | { APM_DISABLED, "Power management disabled" }, |
@@ -472,24 +471,25 @@ static const lookup_t error_table[] = { | |||
472 | * Write a meaningful log entry to the kernel log in the event of | 471 | * Write a meaningful log entry to the kernel log in the event of |
473 | * an APM error. | 472 | * an APM error. |
474 | */ | 473 | */ |
475 | 474 | ||
476 | static void apm_error(char *str, int err) | 475 | static void apm_error(char *str, int err) |
477 | { | 476 | { |
478 | int i; | 477 | int i; |
479 | 478 | ||
480 | for (i = 0; i < ERROR_COUNT; i++) | 479 | for (i = 0; i < ERROR_COUNT; i++) |
481 | if (error_table[i].key == err) break; | 480 | if (error_table[i].key == err) |
481 | break; | ||
482 | if (i < ERROR_COUNT) | 482 | if (i < ERROR_COUNT) |
483 | printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg); | 483 | printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg); |
484 | else | 484 | else |
485 | printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n", | 485 | printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n", |
486 | str, err); | 486 | str, err); |
487 | } | 487 | } |
488 | 488 | ||
489 | /* | 489 | /* |
490 | * Lock APM functionality to physical CPU 0 | 490 | * Lock APM functionality to physical CPU 0 |
491 | */ | 491 | */ |
492 | 492 | ||
493 | #ifdef CONFIG_SMP | 493 | #ifdef CONFIG_SMP |
494 | 494 | ||
495 | static cpumask_t apm_save_cpus(void) | 495 | static cpumask_t apm_save_cpus(void) |
@@ -511,7 +511,7 @@ static inline void apm_restore_cpus(cpumask_t mask) | |||
511 | /* | 511 | /* |
512 | * No CPU lockdown needed on a uniprocessor | 512 | * No CPU lockdown needed on a uniprocessor |
513 | */ | 513 | */ |
514 | 514 | ||
515 | #define apm_save_cpus() (current->cpus_allowed) | 515 | #define apm_save_cpus() (current->cpus_allowed) |
516 | #define apm_restore_cpus(x) (void)(x) | 516 | #define apm_restore_cpus(x) (void)(x) |
517 | 517 | ||
@@ -590,7 +590,7 @@ static inline void apm_irq_restore(unsigned long flags) | |||
590 | * code is returned in AH (bits 8-15 of eax) and this function | 590 | * code is returned in AH (bits 8-15 of eax) and this function |
591 | * returns non-zero. | 591 | * returns non-zero. |
592 | */ | 592 | */ |
593 | 593 | ||
594 | static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in, | 594 | static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in, |
595 | u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, u32 *esi) | 595 | u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, u32 *esi) |
596 | { | 596 | { |
@@ -602,7 +602,7 @@ static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in, | |||
602 | struct desc_struct *gdt; | 602 | struct desc_struct *gdt; |
603 | 603 | ||
604 | cpus = apm_save_cpus(); | 604 | cpus = apm_save_cpus(); |
605 | 605 | ||
606 | cpu = get_cpu(); | 606 | cpu = get_cpu(); |
607 | gdt = get_cpu_gdt_table(cpu); | 607 | gdt = get_cpu_gdt_table(cpu); |
608 | save_desc_40 = gdt[0x40 / 8]; | 608 | save_desc_40 = gdt[0x40 / 8]; |
@@ -616,7 +616,7 @@ static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in, | |||
616 | gdt[0x40 / 8] = save_desc_40; | 616 | gdt[0x40 / 8] = save_desc_40; |
617 | put_cpu(); | 617 | put_cpu(); |
618 | apm_restore_cpus(cpus); | 618 | apm_restore_cpus(cpus); |
619 | 619 | ||
620 | return *eax & 0xff; | 620 | return *eax & 0xff; |
621 | } | 621 | } |
622 | 622 | ||
@@ -645,7 +645,7 @@ static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax) | |||
645 | struct desc_struct *gdt; | 645 | struct desc_struct *gdt; |
646 | 646 | ||
647 | cpus = apm_save_cpus(); | 647 | cpus = apm_save_cpus(); |
648 | 648 | ||
649 | cpu = get_cpu(); | 649 | cpu = get_cpu(); |
650 | gdt = get_cpu_gdt_table(cpu); | 650 | gdt = get_cpu_gdt_table(cpu); |
651 | save_desc_40 = gdt[0x40 / 8]; | 651 | save_desc_40 = gdt[0x40 / 8]; |
@@ -680,7 +680,7 @@ static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax) | |||
680 | 680 | ||
681 | static int apm_driver_version(u_short *val) | 681 | static int apm_driver_version(u_short *val) |
682 | { | 682 | { |
683 | u32 eax; | 683 | u32 eax; |
684 | 684 | ||
685 | if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax)) | 685 | if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax)) |
686 | return (eax >> 8) & 0xff; | 686 | return (eax >> 8) & 0xff; |
@@ -704,16 +704,16 @@ static int apm_driver_version(u_short *val) | |||
704 | * that APM 1.2 is in use. If no messges are pending the value 0x80 | 704 | * that APM 1.2 is in use. If no messges are pending the value 0x80 |
705 | * is returned (No power management events pending). | 705 | * is returned (No power management events pending). |
706 | */ | 706 | */ |
707 | 707 | ||
708 | static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info) | 708 | static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info) |
709 | { | 709 | { |
710 | u32 eax; | 710 | u32 eax; |
711 | u32 ebx; | 711 | u32 ebx; |
712 | u32 ecx; | 712 | u32 ecx; |
713 | u32 dummy; | 713 | u32 dummy; |
714 | 714 | ||
715 | if (apm_bios_call(APM_FUNC_GET_EVENT, 0, 0, &eax, &ebx, &ecx, | 715 | if (apm_bios_call(APM_FUNC_GET_EVENT, 0, 0, &eax, &ebx, &ecx, |
716 | &dummy, &dummy)) | 716 | &dummy, &dummy)) |
717 | return (eax >> 8) & 0xff; | 717 | return (eax >> 8) & 0xff; |
718 | *event = ebx; | 718 | *event = ebx; |
719 | if (apm_info.connection_version < 0x0102) | 719 | if (apm_info.connection_version < 0x0102) |
@@ -736,10 +736,10 @@ static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info) | |||
736 | * The state holds the state to transition to, which may in fact | 736 | * The state holds the state to transition to, which may in fact |
737 | * be an acceptance of a BIOS requested state change. | 737 | * be an acceptance of a BIOS requested state change. |
738 | */ | 738 | */ |
739 | 739 | ||
740 | static int set_power_state(u_short what, u_short state) | 740 | static int set_power_state(u_short what, u_short state) |
741 | { | 741 | { |
742 | u32 eax; | 742 | u32 eax; |
743 | 743 | ||
744 | if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax)) | 744 | if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax)) |
745 | return (eax >> 8) & 0xff; | 745 | return (eax >> 8) & 0xff; |
@@ -752,7 +752,7 @@ static int set_power_state(u_short what, u_short state) | |||
752 | * | 752 | * |
753 | * Transition the entire system into a new APM power state. | 753 | * Transition the entire system into a new APM power state. |
754 | */ | 754 | */ |
755 | 755 | ||
756 | static int set_system_power_state(u_short state) | 756 | static int set_system_power_state(u_short state) |
757 | { | 757 | { |
758 | return set_power_state(APM_DEVICE_ALL, state); | 758 | return set_power_state(APM_DEVICE_ALL, state); |
@@ -766,13 +766,13 @@ static int set_system_power_state(u_short state) | |||
766 | * to handle the idle request. On a success the function returns 1 | 766 | * to handle the idle request. On a success the function returns 1 |
767 | * if the BIOS did clock slowing or 0 otherwise. | 767 | * if the BIOS did clock slowing or 0 otherwise. |
768 | */ | 768 | */ |
769 | 769 | ||
770 | static int apm_do_idle(void) | 770 | static int apm_do_idle(void) |
771 | { | 771 | { |
772 | u32 eax; | 772 | u32 eax; |
773 | u8 ret = 0; | 773 | u8 ret = 0; |
774 | int idled = 0; | 774 | int idled = 0; |
775 | int polling; | 775 | int polling; |
776 | 776 | ||
777 | polling = !!(current_thread_info()->status & TS_POLLING); | 777 | polling = !!(current_thread_info()->status & TS_POLLING); |
778 | if (polling) { | 778 | if (polling) { |
@@ -799,10 +799,9 @@ static int apm_do_idle(void) | |||
799 | /* This always fails on some SMP boards running UP kernels. | 799 | /* This always fails on some SMP boards running UP kernels. |
800 | * Only report the failure the first 5 times. | 800 | * Only report the failure the first 5 times. |
801 | */ | 801 | */ |
802 | if (++t < 5) | 802 | if (++t < 5) { |
803 | { | ||
804 | printk(KERN_DEBUG "apm_do_idle failed (%d)\n", | 803 | printk(KERN_DEBUG "apm_do_idle failed (%d)\n", |
805 | (eax >> 8) & 0xff); | 804 | (eax >> 8) & 0xff); |
806 | t = jiffies; | 805 | t = jiffies; |
807 | } | 806 | } |
808 | return -1; | 807 | return -1; |
@@ -814,15 +813,15 @@ static int apm_do_idle(void) | |||
814 | /** | 813 | /** |
815 | * apm_do_busy - inform the BIOS the CPU is busy | 814 | * apm_do_busy - inform the BIOS the CPU is busy |
816 | * | 815 | * |
817 | * Request that the BIOS brings the CPU back to full performance. | 816 | * Request that the BIOS brings the CPU back to full performance. |
818 | */ | 817 | */ |
819 | 818 | ||
820 | static void apm_do_busy(void) | 819 | static void apm_do_busy(void) |
821 | { | 820 | { |
822 | u32 dummy; | 821 | u32 dummy; |
823 | 822 | ||
824 | if (clock_slowed || ALWAYS_CALL_BUSY) { | 823 | if (clock_slowed || ALWAYS_CALL_BUSY) { |
825 | (void) apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy); | 824 | (void)apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy); |
826 | clock_slowed = 0; | 825 | clock_slowed = 0; |
827 | } | 826 | } |
828 | } | 827 | } |
@@ -833,15 +832,15 @@ static void apm_do_busy(void) | |||
833 | * power management - we probably want | 832 | * power management - we probably want |
834 | * to conserve power. | 833 | * to conserve power. |
835 | */ | 834 | */ |
836 | #define IDLE_CALC_LIMIT (HZ * 100) | 835 | #define IDLE_CALC_LIMIT (HZ * 100) |
837 | #define IDLE_LEAKY_MAX 16 | 836 | #define IDLE_LEAKY_MAX 16 |
838 | 837 | ||
839 | static void (*original_pm_idle)(void) __read_mostly; | 838 | static void (*original_pm_idle)(void) __read_mostly; |
840 | 839 | ||
841 | /** | 840 | /** |
842 | * apm_cpu_idle - cpu idling for APM capable Linux | 841 | * apm_cpu_idle - cpu idling for APM capable Linux |
843 | * | 842 | * |
844 | * This is the idling function the kernel executes when APM is available. It | 843 | * This is the idling function the kernel executes when APM is available. It |
845 | * tries to do BIOS powermanagement based on the average system idle time. | 844 | * tries to do BIOS powermanagement based on the average system idle time. |
846 | * Furthermore it calls the system default idle routine. | 845 | * Furthermore it calls the system default idle routine. |
847 | */ | 846 | */ |
@@ -882,7 +881,8 @@ recalc: | |||
882 | 881 | ||
883 | t = jiffies; | 882 | t = jiffies; |
884 | switch (apm_do_idle()) { | 883 | switch (apm_do_idle()) { |
885 | case 0: apm_idle_done = 1; | 884 | case 0: |
885 | apm_idle_done = 1; | ||
886 | if (t != jiffies) { | 886 | if (t != jiffies) { |
887 | if (bucket) { | 887 | if (bucket) { |
888 | bucket = IDLE_LEAKY_MAX; | 888 | bucket = IDLE_LEAKY_MAX; |
@@ -893,7 +893,8 @@ recalc: | |||
893 | continue; | 893 | continue; |
894 | } | 894 | } |
895 | break; | 895 | break; |
896 | case 1: apm_idle_done = 1; | 896 | case 1: |
897 | apm_idle_done = 1; | ||
897 | break; | 898 | break; |
898 | default: /* BIOS refused */ | 899 | default: /* BIOS refused */ |
899 | break; | 900 | break; |
@@ -921,10 +922,10 @@ recalc: | |||
921 | * the SMP call on CPU0 as some systems will only honour this call | 922 | * the SMP call on CPU0 as some systems will only honour this call |
922 | * on their first cpu. | 923 | * on their first cpu. |
923 | */ | 924 | */ |
924 | 925 | ||
925 | static void apm_power_off(void) | 926 | static void apm_power_off(void) |
926 | { | 927 | { |
927 | unsigned char po_bios_call[] = { | 928 | unsigned char po_bios_call[] = { |
928 | 0xb8, 0x00, 0x10, /* movw $0x1000,ax */ | 929 | 0xb8, 0x00, 0x10, /* movw $0x1000,ax */ |
929 | 0x8e, 0xd0, /* movw ax,ss */ | 930 | 0x8e, 0xd0, /* movw ax,ss */ |
930 | 0xbc, 0x00, 0xf0, /* movw $0xf000,sp */ | 931 | 0xbc, 0x00, 0xf0, /* movw $0xf000,sp */ |
@@ -935,13 +936,12 @@ static void apm_power_off(void) | |||
935 | }; | 936 | }; |
936 | 937 | ||
937 | /* Some bioses don't like being called from CPU != 0 */ | 938 | /* Some bioses don't like being called from CPU != 0 */ |
938 | if (apm_info.realmode_power_off) | 939 | if (apm_info.realmode_power_off) { |
939 | { | ||
940 | (void)apm_save_cpus(); | 940 | (void)apm_save_cpus(); |
941 | machine_real_restart(po_bios_call, sizeof(po_bios_call)); | 941 | machine_real_restart(po_bios_call, sizeof(po_bios_call)); |
942 | } else { | ||
943 | (void)set_system_power_state(APM_STATE_OFF); | ||
942 | } | 944 | } |
943 | else | ||
944 | (void) set_system_power_state(APM_STATE_OFF); | ||
945 | } | 945 | } |
946 | 946 | ||
947 | #ifdef CONFIG_APM_DO_ENABLE | 947 | #ifdef CONFIG_APM_DO_ENABLE |
@@ -950,17 +950,17 @@ static void apm_power_off(void) | |||
950 | * apm_enable_power_management - enable BIOS APM power management | 950 | * apm_enable_power_management - enable BIOS APM power management |
951 | * @enable: enable yes/no | 951 | * @enable: enable yes/no |
952 | * | 952 | * |
953 | * Enable or disable the APM BIOS power services. | 953 | * Enable or disable the APM BIOS power services. |
954 | */ | 954 | */ |
955 | 955 | ||
956 | static int apm_enable_power_management(int enable) | 956 | static int apm_enable_power_management(int enable) |
957 | { | 957 | { |
958 | u32 eax; | 958 | u32 eax; |
959 | 959 | ||
960 | if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED)) | 960 | if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED)) |
961 | return APM_NOT_ENGAGED; | 961 | return APM_NOT_ENGAGED; |
962 | if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL, | 962 | if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL, |
963 | enable, &eax)) | 963 | enable, &eax)) |
964 | return (eax >> 8) & 0xff; | 964 | return (eax >> 8) & 0xff; |
965 | if (enable) | 965 | if (enable) |
966 | apm_info.bios.flags &= ~APM_BIOS_DISABLED; | 966 | apm_info.bios.flags &= ~APM_BIOS_DISABLED; |
@@ -983,19 +983,19 @@ static int apm_enable_power_management(int enable) | |||
983 | * if reported is a lifetime in secodnds/minutes at current powwer | 983 | * if reported is a lifetime in secodnds/minutes at current powwer |
984 | * consumption. | 984 | * consumption. |
985 | */ | 985 | */ |
986 | 986 | ||
987 | static int apm_get_power_status(u_short *status, u_short *bat, u_short *life) | 987 | static int apm_get_power_status(u_short *status, u_short *bat, u_short *life) |
988 | { | 988 | { |
989 | u32 eax; | 989 | u32 eax; |
990 | u32 ebx; | 990 | u32 ebx; |
991 | u32 ecx; | 991 | u32 ecx; |
992 | u32 edx; | 992 | u32 edx; |
993 | u32 dummy; | 993 | u32 dummy; |
994 | 994 | ||
995 | if (apm_info.get_power_status_broken) | 995 | if (apm_info.get_power_status_broken) |
996 | return APM_32_UNSUPPORTED; | 996 | return APM_32_UNSUPPORTED; |
997 | if (apm_bios_call(APM_FUNC_GET_STATUS, APM_DEVICE_ALL, 0, | 997 | if (apm_bios_call(APM_FUNC_GET_STATUS, APM_DEVICE_ALL, 0, |
998 | &eax, &ebx, &ecx, &edx, &dummy)) | 998 | &eax, &ebx, &ecx, &edx, &dummy)) |
999 | return (eax >> 8) & 0xff; | 999 | return (eax >> 8) & 0xff; |
1000 | *status = ebx; | 1000 | *status = ebx; |
1001 | *bat = ecx; | 1001 | *bat = ecx; |
@@ -1011,11 +1011,11 @@ static int apm_get_power_status(u_short *status, u_short *bat, u_short *life) | |||
1011 | static int apm_get_battery_status(u_short which, u_short *status, | 1011 | static int apm_get_battery_status(u_short which, u_short *status, |
1012 | u_short *bat, u_short *life, u_short *nbat) | 1012 | u_short *bat, u_short *life, u_short *nbat) |
1013 | { | 1013 | { |
1014 | u32 eax; | 1014 | u32 eax; |
1015 | u32 ebx; | 1015 | u32 ebx; |
1016 | u32 ecx; | 1016 | u32 ecx; |
1017 | u32 edx; | 1017 | u32 edx; |
1018 | u32 esi; | 1018 | u32 esi; |
1019 | 1019 | ||
1020 | if (apm_info.connection_version < 0x0102) { | 1020 | if (apm_info.connection_version < 0x0102) { |
1021 | /* pretend we only have one battery. */ | 1021 | /* pretend we only have one battery. */ |
@@ -1026,7 +1026,7 @@ static int apm_get_battery_status(u_short which, u_short *status, | |||
1026 | } | 1026 | } |
1027 | 1027 | ||
1028 | if (apm_bios_call(APM_FUNC_GET_STATUS, (0x8000 | (which)), 0, &eax, | 1028 | if (apm_bios_call(APM_FUNC_GET_STATUS, (0x8000 | (which)), 0, &eax, |
1029 | &ebx, &ecx, &edx, &esi)) | 1029 | &ebx, &ecx, &edx, &esi)) |
1030 | return (eax >> 8) & 0xff; | 1030 | return (eax >> 8) & 0xff; |
1031 | *status = ebx; | 1031 | *status = ebx; |
1032 | *bat = ecx; | 1032 | *bat = ecx; |
@@ -1044,10 +1044,10 @@ static int apm_get_battery_status(u_short which, u_short *status, | |||
1044 | * Activate or deactive power management on either a specific device | 1044 | * Activate or deactive power management on either a specific device |
1045 | * or the entire system (%APM_DEVICE_ALL). | 1045 | * or the entire system (%APM_DEVICE_ALL). |
1046 | */ | 1046 | */ |
1047 | 1047 | ||
1048 | static int apm_engage_power_management(u_short device, int enable) | 1048 | static int apm_engage_power_management(u_short device, int enable) |
1049 | { | 1049 | { |
1050 | u32 eax; | 1050 | u32 eax; |
1051 | 1051 | ||
1052 | if ((enable == 0) && (device == APM_DEVICE_ALL) | 1052 | if ((enable == 0) && (device == APM_DEVICE_ALL) |
1053 | && (apm_info.bios.flags & APM_BIOS_DISABLED)) | 1053 | && (apm_info.bios.flags & APM_BIOS_DISABLED)) |
@@ -1074,7 +1074,7 @@ static int apm_engage_power_management(u_short device, int enable) | |||
1074 | * all video devices. Typically the BIOS will do laptop backlight and | 1074 | * all video devices. Typically the BIOS will do laptop backlight and |
1075 | * monitor powerdown for us. | 1075 | * monitor powerdown for us. |
1076 | */ | 1076 | */ |
1077 | 1077 | ||
1078 | static int apm_console_blank(int blank) | 1078 | static int apm_console_blank(int blank) |
1079 | { | 1079 | { |
1080 | int error = APM_NOT_ENGAGED; /* silence gcc */ | 1080 | int error = APM_NOT_ENGAGED; /* silence gcc */ |
@@ -1126,7 +1126,7 @@ static apm_event_t get_queued_event(struct apm_user *as) | |||
1126 | 1126 | ||
1127 | static void queue_event(apm_event_t event, struct apm_user *sender) | 1127 | static void queue_event(apm_event_t event, struct apm_user *sender) |
1128 | { | 1128 | { |
1129 | struct apm_user * as; | 1129 | struct apm_user *as; |
1130 | 1130 | ||
1131 | spin_lock(&user_list_lock); | 1131 | spin_lock(&user_list_lock); |
1132 | if (user_list == NULL) | 1132 | if (user_list == NULL) |
@@ -1174,11 +1174,11 @@ static void reinit_timer(void) | |||
1174 | 1174 | ||
1175 | spin_lock_irqsave(&i8253_lock, flags); | 1175 | spin_lock_irqsave(&i8253_lock, flags); |
1176 | /* set the clock to HZ */ | 1176 | /* set the clock to HZ */ |
1177 | outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ | 1177 | outb_pit(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ |
1178 | udelay(10); | 1178 | udelay(10); |
1179 | outb_p(LATCH & 0xff, PIT_CH0); /* LSB */ | 1179 | outb_pit(LATCH & 0xff, PIT_CH0); /* LSB */ |
1180 | udelay(10); | 1180 | udelay(10); |
1181 | outb(LATCH >> 8, PIT_CH0); /* MSB */ | 1181 | outb_pit(LATCH >> 8, PIT_CH0); /* MSB */ |
1182 | udelay(10); | 1182 | udelay(10); |
1183 | spin_unlock_irqrestore(&i8253_lock, flags); | 1183 | spin_unlock_irqrestore(&i8253_lock, flags); |
1184 | #endif | 1184 | #endif |
@@ -1186,7 +1186,7 @@ static void reinit_timer(void) | |||
1186 | 1186 | ||
1187 | static int suspend(int vetoable) | 1187 | static int suspend(int vetoable) |
1188 | { | 1188 | { |
1189 | int err; | 1189 | int err; |
1190 | struct apm_user *as; | 1190 | struct apm_user *as; |
1191 | 1191 | ||
1192 | if (pm_send_all(PM_SUSPEND, (void *)3)) { | 1192 | if (pm_send_all(PM_SUSPEND, (void *)3)) { |
@@ -1239,7 +1239,7 @@ static int suspend(int vetoable) | |||
1239 | 1239 | ||
1240 | static void standby(void) | 1240 | static void standby(void) |
1241 | { | 1241 | { |
1242 | int err; | 1242 | int err; |
1243 | 1243 | ||
1244 | local_irq_disable(); | 1244 | local_irq_disable(); |
1245 | device_power_down(PMSG_SUSPEND); | 1245 | device_power_down(PMSG_SUSPEND); |
@@ -1256,8 +1256,8 @@ static void standby(void) | |||
1256 | 1256 | ||
1257 | static apm_event_t get_event(void) | 1257 | static apm_event_t get_event(void) |
1258 | { | 1258 | { |
1259 | int error; | 1259 | int error; |
1260 | apm_event_t event = APM_NO_EVENTS; /* silence gcc */ | 1260 | apm_event_t event = APM_NO_EVENTS; /* silence gcc */ |
1261 | apm_eventinfo_t info; | 1261 | apm_eventinfo_t info; |
1262 | 1262 | ||
1263 | static int notified; | 1263 | static int notified; |
@@ -1275,9 +1275,9 @@ static apm_event_t get_event(void) | |||
1275 | 1275 | ||
1276 | static void check_events(void) | 1276 | static void check_events(void) |
1277 | { | 1277 | { |
1278 | apm_event_t event; | 1278 | apm_event_t event; |
1279 | static unsigned long last_resume; | 1279 | static unsigned long last_resume; |
1280 | static int ignore_bounce; | 1280 | static int ignore_bounce; |
1281 | 1281 | ||
1282 | while ((event = get_event()) != 0) { | 1282 | while ((event = get_event()) != 0) { |
1283 | if (debug) { | 1283 | if (debug) { |
@@ -1289,7 +1289,7 @@ static void check_events(void) | |||
1289 | "event 0x%02x\n", event); | 1289 | "event 0x%02x\n", event); |
1290 | } | 1290 | } |
1291 | if (ignore_bounce | 1291 | if (ignore_bounce |
1292 | && ((jiffies - last_resume) > bounce_interval)) | 1292 | && (time_after(jiffies, last_resume + bounce_interval))) |
1293 | ignore_bounce = 0; | 1293 | ignore_bounce = 0; |
1294 | 1294 | ||
1295 | switch (event) { | 1295 | switch (event) { |
@@ -1357,7 +1357,7 @@ static void check_events(void) | |||
1357 | /* | 1357 | /* |
1358 | * We are not allowed to reject a critical suspend. | 1358 | * We are not allowed to reject a critical suspend. |
1359 | */ | 1359 | */ |
1360 | (void) suspend(0); | 1360 | (void)suspend(0); |
1361 | break; | 1361 | break; |
1362 | } | 1362 | } |
1363 | } | 1363 | } |
@@ -1365,12 +1365,12 @@ static void check_events(void) | |||
1365 | 1365 | ||
1366 | static void apm_event_handler(void) | 1366 | static void apm_event_handler(void) |
1367 | { | 1367 | { |
1368 | static int pending_count = 4; | 1368 | static int pending_count = 4; |
1369 | int err; | 1369 | int err; |
1370 | 1370 | ||
1371 | if ((standbys_pending > 0) || (suspends_pending > 0)) { | 1371 | if ((standbys_pending > 0) || (suspends_pending > 0)) { |
1372 | if ((apm_info.connection_version > 0x100) && | 1372 | if ((apm_info.connection_version > 0x100) && |
1373 | (pending_count-- <= 0)) { | 1373 | (pending_count-- <= 0)) { |
1374 | pending_count = 4; | 1374 | pending_count = 4; |
1375 | if (debug) | 1375 | if (debug) |
1376 | printk(KERN_DEBUG "apm: setting state busy\n"); | 1376 | printk(KERN_DEBUG "apm: setting state busy\n"); |
@@ -1418,9 +1418,9 @@ static int check_apm_user(struct apm_user *as, const char *func) | |||
1418 | 1418 | ||
1419 | static ssize_t do_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos) | 1419 | static ssize_t do_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos) |
1420 | { | 1420 | { |
1421 | struct apm_user * as; | 1421 | struct apm_user *as; |
1422 | int i; | 1422 | int i; |
1423 | apm_event_t event; | 1423 | apm_event_t event; |
1424 | 1424 | ||
1425 | as = fp->private_data; | 1425 | as = fp->private_data; |
1426 | if (check_apm_user(as, "read")) | 1426 | if (check_apm_user(as, "read")) |
@@ -1459,9 +1459,9 @@ static ssize_t do_read(struct file *fp, char __user *buf, size_t count, loff_t * | |||
1459 | return 0; | 1459 | return 0; |
1460 | } | 1460 | } |
1461 | 1461 | ||
1462 | static unsigned int do_poll(struct file *fp, poll_table * wait) | 1462 | static unsigned int do_poll(struct file *fp, poll_table *wait) |
1463 | { | 1463 | { |
1464 | struct apm_user * as; | 1464 | struct apm_user *as; |
1465 | 1465 | ||
1466 | as = fp->private_data; | 1466 | as = fp->private_data; |
1467 | if (check_apm_user(as, "poll")) | 1467 | if (check_apm_user(as, "poll")) |
@@ -1472,10 +1472,10 @@ static unsigned int do_poll(struct file *fp, poll_table * wait) | |||
1472 | return 0; | 1472 | return 0; |
1473 | } | 1473 | } |
1474 | 1474 | ||
1475 | static int do_ioctl(struct inode * inode, struct file *filp, | 1475 | static int do_ioctl(struct inode *inode, struct file *filp, |
1476 | u_int cmd, u_long arg) | 1476 | u_int cmd, u_long arg) |
1477 | { | 1477 | { |
1478 | struct apm_user * as; | 1478 | struct apm_user *as; |
1479 | 1479 | ||
1480 | as = filp->private_data; | 1480 | as = filp->private_data; |
1481 | if (check_apm_user(as, "ioctl")) | 1481 | if (check_apm_user(as, "ioctl")) |
@@ -1515,9 +1515,9 @@ static int do_ioctl(struct inode * inode, struct file *filp, | |||
1515 | return 0; | 1515 | return 0; |
1516 | } | 1516 | } |
1517 | 1517 | ||
1518 | static int do_release(struct inode * inode, struct file * filp) | 1518 | static int do_release(struct inode *inode, struct file *filp) |
1519 | { | 1519 | { |
1520 | struct apm_user * as; | 1520 | struct apm_user *as; |
1521 | 1521 | ||
1522 | as = filp->private_data; | 1522 | as = filp->private_data; |
1523 | if (check_apm_user(as, "release")) | 1523 | if (check_apm_user(as, "release")) |
@@ -1533,11 +1533,11 @@ static int do_release(struct inode * inode, struct file * filp) | |||
1533 | if (suspends_pending <= 0) | 1533 | if (suspends_pending <= 0) |
1534 | (void) suspend(1); | 1534 | (void) suspend(1); |
1535 | } | 1535 | } |
1536 | spin_lock(&user_list_lock); | 1536 | spin_lock(&user_list_lock); |
1537 | if (user_list == as) | 1537 | if (user_list == as) |
1538 | user_list = as->next; | 1538 | user_list = as->next; |
1539 | else { | 1539 | else { |
1540 | struct apm_user * as1; | 1540 | struct apm_user *as1; |
1541 | 1541 | ||
1542 | for (as1 = user_list; | 1542 | for (as1 = user_list; |
1543 | (as1 != NULL) && (as1->next != as); | 1543 | (as1 != NULL) && (as1->next != as); |
@@ -1553,9 +1553,9 @@ static int do_release(struct inode * inode, struct file * filp) | |||
1553 | return 0; | 1553 | return 0; |
1554 | } | 1554 | } |
1555 | 1555 | ||
1556 | static int do_open(struct inode * inode, struct file * filp) | 1556 | static int do_open(struct inode *inode, struct file *filp) |
1557 | { | 1557 | { |
1558 | struct apm_user * as; | 1558 | struct apm_user *as; |
1559 | 1559 | ||
1560 | as = kmalloc(sizeof(*as), GFP_KERNEL); | 1560 | as = kmalloc(sizeof(*as), GFP_KERNEL); |
1561 | if (as == NULL) { | 1561 | if (as == NULL) { |
@@ -1569,7 +1569,7 @@ static int do_open(struct inode * inode, struct file * filp) | |||
1569 | as->suspends_read = as->standbys_read = 0; | 1569 | as->suspends_read = as->standbys_read = 0; |
1570 | /* | 1570 | /* |
1571 | * XXX - this is a tiny bit broken, when we consider BSD | 1571 | * XXX - this is a tiny bit broken, when we consider BSD |
1572 | * process accounting. If the device is opened by root, we | 1572 | * process accounting. If the device is opened by root, we |
1573 | * instantly flag that we used superuser privs. Who knows, | 1573 | * instantly flag that we used superuser privs. Who knows, |
1574 | * we might close the device immediately without doing a | 1574 | * we might close the device immediately without doing a |
1575 | * privileged operation -- cevans | 1575 | * privileged operation -- cevans |
@@ -1652,16 +1652,16 @@ static int proc_apm_show(struct seq_file *m, void *v) | |||
1652 | 8) min = minutes; sec = seconds */ | 1652 | 8) min = minutes; sec = seconds */ |
1653 | 1653 | ||
1654 | seq_printf(m, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n", | 1654 | seq_printf(m, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n", |
1655 | driver_version, | 1655 | driver_version, |
1656 | (apm_info.bios.version >> 8) & 0xff, | 1656 | (apm_info.bios.version >> 8) & 0xff, |
1657 | apm_info.bios.version & 0xff, | 1657 | apm_info.bios.version & 0xff, |
1658 | apm_info.bios.flags, | 1658 | apm_info.bios.flags, |
1659 | ac_line_status, | 1659 | ac_line_status, |
1660 | battery_status, | 1660 | battery_status, |
1661 | battery_flag, | 1661 | battery_flag, |
1662 | percentage, | 1662 | percentage, |
1663 | time_units, | 1663 | time_units, |
1664 | units); | 1664 | units); |
1665 | return 0; | 1665 | return 0; |
1666 | } | 1666 | } |
1667 | 1667 | ||
@@ -1684,8 +1684,8 @@ static int apm(void *unused) | |||
1684 | unsigned short cx; | 1684 | unsigned short cx; |
1685 | unsigned short dx; | 1685 | unsigned short dx; |
1686 | int error; | 1686 | int error; |
1687 | char * power_stat; | 1687 | char *power_stat; |
1688 | char * bat_stat; | 1688 | char *bat_stat; |
1689 | 1689 | ||
1690 | #ifdef CONFIG_SMP | 1690 | #ifdef CONFIG_SMP |
1691 | /* 2002/08/01 - WT | 1691 | /* 2002/08/01 - WT |
@@ -1744,23 +1744,41 @@ static int apm(void *unused) | |||
1744 | } | 1744 | } |
1745 | } | 1745 | } |
1746 | 1746 | ||
1747 | if (debug && (num_online_cpus() == 1 || smp )) { | 1747 | if (debug && (num_online_cpus() == 1 || smp)) { |
1748 | error = apm_get_power_status(&bx, &cx, &dx); | 1748 | error = apm_get_power_status(&bx, &cx, &dx); |
1749 | if (error) | 1749 | if (error) |
1750 | printk(KERN_INFO "apm: power status not available\n"); | 1750 | printk(KERN_INFO "apm: power status not available\n"); |
1751 | else { | 1751 | else { |
1752 | switch ((bx >> 8) & 0xff) { | 1752 | switch ((bx >> 8) & 0xff) { |
1753 | case 0: power_stat = "off line"; break; | 1753 | case 0: |
1754 | case 1: power_stat = "on line"; break; | 1754 | power_stat = "off line"; |
1755 | case 2: power_stat = "on backup power"; break; | 1755 | break; |
1756 | default: power_stat = "unknown"; break; | 1756 | case 1: |
1757 | power_stat = "on line"; | ||
1758 | break; | ||
1759 | case 2: | ||
1760 | power_stat = "on backup power"; | ||
1761 | break; | ||
1762 | default: | ||
1763 | power_stat = "unknown"; | ||
1764 | break; | ||
1757 | } | 1765 | } |
1758 | switch (bx & 0xff) { | 1766 | switch (bx & 0xff) { |
1759 | case 0: bat_stat = "high"; break; | 1767 | case 0: |
1760 | case 1: bat_stat = "low"; break; | 1768 | bat_stat = "high"; |
1761 | case 2: bat_stat = "critical"; break; | 1769 | break; |
1762 | case 3: bat_stat = "charging"; break; | 1770 | case 1: |
1763 | default: bat_stat = "unknown"; break; | 1771 | bat_stat = "low"; |
1772 | break; | ||
1773 | case 2: | ||
1774 | bat_stat = "critical"; | ||
1775 | break; | ||
1776 | case 3: | ||
1777 | bat_stat = "charging"; | ||
1778 | break; | ||
1779 | default: | ||
1780 | bat_stat = "unknown"; | ||
1781 | break; | ||
1764 | } | 1782 | } |
1765 | printk(KERN_INFO | 1783 | printk(KERN_INFO |
1766 | "apm: AC %s, battery status %s, battery life ", | 1784 | "apm: AC %s, battery status %s, battery life ", |
@@ -1777,8 +1795,8 @@ static int apm(void *unused) | |||
1777 | printk("unknown\n"); | 1795 | printk("unknown\n"); |
1778 | else | 1796 | else |
1779 | printk("%d %s\n", dx & 0x7fff, | 1797 | printk("%d %s\n", dx & 0x7fff, |
1780 | (dx & 0x8000) ? | 1798 | (dx & 0x8000) ? |
1781 | "minutes" : "seconds"); | 1799 | "minutes" : "seconds"); |
1782 | } | 1800 | } |
1783 | } | 1801 | } |
1784 | } | 1802 | } |
@@ -1803,7 +1821,7 @@ static int apm(void *unused) | |||
1803 | #ifndef MODULE | 1821 | #ifndef MODULE |
1804 | static int __init apm_setup(char *str) | 1822 | static int __init apm_setup(char *str) |
1805 | { | 1823 | { |
1806 | int invert; | 1824 | int invert; |
1807 | 1825 | ||
1808 | while ((str != NULL) && (*str != '\0')) { | 1826 | while ((str != NULL) && (*str != '\0')) { |
1809 | if (strncmp(str, "off", 3) == 0) | 1827 | if (strncmp(str, "off", 3) == 0) |
@@ -1828,14 +1846,13 @@ static int __init apm_setup(char *str) | |||
1828 | if ((strncmp(str, "power-off", 9) == 0) || | 1846 | if ((strncmp(str, "power-off", 9) == 0) || |
1829 | (strncmp(str, "power_off", 9) == 0)) | 1847 | (strncmp(str, "power_off", 9) == 0)) |
1830 | power_off = !invert; | 1848 | power_off = !invert; |
1831 | if (strncmp(str, "smp", 3) == 0) | 1849 | if (strncmp(str, "smp", 3) == 0) { |
1832 | { | ||
1833 | smp = !invert; | 1850 | smp = !invert; |
1834 | idle_threshold = 100; | 1851 | idle_threshold = 100; |
1835 | } | 1852 | } |
1836 | if ((strncmp(str, "allow-ints", 10) == 0) || | 1853 | if ((strncmp(str, "allow-ints", 10) == 0) || |
1837 | (strncmp(str, "allow_ints", 10) == 0)) | 1854 | (strncmp(str, "allow_ints", 10) == 0)) |
1838 | apm_info.allow_ints = !invert; | 1855 | apm_info.allow_ints = !invert; |
1839 | if ((strncmp(str, "broken-psr", 10) == 0) || | 1856 | if ((strncmp(str, "broken-psr", 10) == 0) || |
1840 | (strncmp(str, "broken_psr", 10) == 0)) | 1857 | (strncmp(str, "broken_psr", 10) == 0)) |
1841 | apm_info.get_power_status_broken = !invert; | 1858 | apm_info.get_power_status_broken = !invert; |
@@ -1881,7 +1898,8 @@ static int __init print_if_true(const struct dmi_system_id *d) | |||
1881 | */ | 1898 | */ |
1882 | static int __init broken_ps2_resume(const struct dmi_system_id *d) | 1899 | static int __init broken_ps2_resume(const struct dmi_system_id *d) |
1883 | { | 1900 | { |
1884 | printk(KERN_INFO "%s machine detected. Mousepad Resume Bug workaround hopefully not needed.\n", d->ident); | 1901 | printk(KERN_INFO "%s machine detected. Mousepad Resume Bug " |
1902 | "workaround hopefully not needed.\n", d->ident); | ||
1885 | return 0; | 1903 | return 0; |
1886 | } | 1904 | } |
1887 | 1905 | ||
@@ -1890,7 +1908,8 @@ static int __init set_realmode_power_off(const struct dmi_system_id *d) | |||
1890 | { | 1908 | { |
1891 | if (apm_info.realmode_power_off == 0) { | 1909 | if (apm_info.realmode_power_off == 0) { |
1892 | apm_info.realmode_power_off = 1; | 1910 | apm_info.realmode_power_off = 1; |
1893 | printk(KERN_INFO "%s bios detected. Using realmode poweroff only.\n", d->ident); | 1911 | printk(KERN_INFO "%s bios detected. " |
1912 | "Using realmode poweroff only.\n", d->ident); | ||
1894 | } | 1913 | } |
1895 | return 0; | 1914 | return 0; |
1896 | } | 1915 | } |
@@ -1900,7 +1919,8 @@ static int __init set_apm_ints(const struct dmi_system_id *d) | |||
1900 | { | 1919 | { |
1901 | if (apm_info.allow_ints == 0) { | 1920 | if (apm_info.allow_ints == 0) { |
1902 | apm_info.allow_ints = 1; | 1921 | apm_info.allow_ints = 1; |
1903 | printk(KERN_INFO "%s machine detected. Enabling interrupts during APM calls.\n", d->ident); | 1922 | printk(KERN_INFO "%s machine detected. " |
1923 | "Enabling interrupts during APM calls.\n", d->ident); | ||
1904 | } | 1924 | } |
1905 | return 0; | 1925 | return 0; |
1906 | } | 1926 | } |
@@ -1910,7 +1930,8 @@ static int __init apm_is_horked(const struct dmi_system_id *d) | |||
1910 | { | 1930 | { |
1911 | if (apm_info.disabled == 0) { | 1931 | if (apm_info.disabled == 0) { |
1912 | apm_info.disabled = 1; | 1932 | apm_info.disabled = 1; |
1913 | printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident); | 1933 | printk(KERN_INFO "%s machine detected. " |
1934 | "Disabling APM.\n", d->ident); | ||
1914 | } | 1935 | } |
1915 | return 0; | 1936 | return 0; |
1916 | } | 1937 | } |
@@ -1919,7 +1940,8 @@ static int __init apm_is_horked_d850md(const struct dmi_system_id *d) | |||
1919 | { | 1940 | { |
1920 | if (apm_info.disabled == 0) { | 1941 | if (apm_info.disabled == 0) { |
1921 | apm_info.disabled = 1; | 1942 | apm_info.disabled = 1; |
1922 | printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident); | 1943 | printk(KERN_INFO "%s machine detected. " |
1944 | "Disabling APM.\n", d->ident); | ||
1923 | printk(KERN_INFO "This bug is fixed in bios P15 which is available for \n"); | 1945 | printk(KERN_INFO "This bug is fixed in bios P15 which is available for \n"); |
1924 | printk(KERN_INFO "download from support.intel.com \n"); | 1946 | printk(KERN_INFO "download from support.intel.com \n"); |
1925 | } | 1947 | } |
@@ -1931,7 +1953,8 @@ static int __init apm_likes_to_melt(const struct dmi_system_id *d) | |||
1931 | { | 1953 | { |
1932 | if (apm_info.forbid_idle == 0) { | 1954 | if (apm_info.forbid_idle == 0) { |
1933 | apm_info.forbid_idle = 1; | 1955 | apm_info.forbid_idle = 1; |
1934 | printk(KERN_INFO "%s machine detected. Disabling APM idle calls.\n", d->ident); | 1956 | printk(KERN_INFO "%s machine detected. " |
1957 | "Disabling APM idle calls.\n", d->ident); | ||
1935 | } | 1958 | } |
1936 | return 0; | 1959 | return 0; |
1937 | } | 1960 | } |
@@ -1954,7 +1977,8 @@ static int __init apm_likes_to_melt(const struct dmi_system_id *d) | |||
1954 | static int __init broken_apm_power(const struct dmi_system_id *d) | 1977 | static int __init broken_apm_power(const struct dmi_system_id *d) |
1955 | { | 1978 | { |
1956 | apm_info.get_power_status_broken = 1; | 1979 | apm_info.get_power_status_broken = 1; |
1957 | printk(KERN_WARNING "BIOS strings suggest APM bugs, disabling power status reporting.\n"); | 1980 | printk(KERN_WARNING "BIOS strings suggest APM bugs, " |
1981 | "disabling power status reporting.\n"); | ||
1958 | return 0; | 1982 | return 0; |
1959 | } | 1983 | } |
1960 | 1984 | ||
@@ -1965,7 +1989,8 @@ static int __init broken_apm_power(const struct dmi_system_id *d) | |||
1965 | static int __init swab_apm_power_in_minutes(const struct dmi_system_id *d) | 1989 | static int __init swab_apm_power_in_minutes(const struct dmi_system_id *d) |
1966 | { | 1990 | { |
1967 | apm_info.get_power_status_swabinminutes = 1; | 1991 | apm_info.get_power_status_swabinminutes = 1; |
1968 | printk(KERN_WARNING "BIOS strings suggest APM reports battery life in minutes and wrong byte order.\n"); | 1992 | printk(KERN_WARNING "BIOS strings suggest APM reports battery life " |
1993 | "in minutes and wrong byte order.\n"); | ||
1969 | return 0; | 1994 | return 0; |
1970 | } | 1995 | } |
1971 | 1996 | ||
@@ -1990,8 +2015,8 @@ static struct dmi_system_id __initdata apm_dmi_table[] = { | |||
1990 | apm_is_horked, "Dell Inspiron 2500", | 2015 | apm_is_horked, "Dell Inspiron 2500", |
1991 | { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), | 2016 | { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), |
1992 | DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"), | 2017 | DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"), |
1993 | DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), | 2018 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), |
1994 | DMI_MATCH(DMI_BIOS_VERSION,"A11"), }, | 2019 | DMI_MATCH(DMI_BIOS_VERSION, "A11"), }, |
1995 | }, | 2020 | }, |
1996 | { /* Allow interrupts during suspend on Dell Inspiron laptops*/ | 2021 | { /* Allow interrupts during suspend on Dell Inspiron laptops*/ |
1997 | set_apm_ints, "Dell Inspiron", { | 2022 | set_apm_ints, "Dell Inspiron", { |
@@ -2014,15 +2039,15 @@ static struct dmi_system_id __initdata apm_dmi_table[] = { | |||
2014 | apm_is_horked, "Dell Dimension 4100", | 2039 | apm_is_horked, "Dell Dimension 4100", |
2015 | { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), | 2040 | { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), |
2016 | DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"), | 2041 | DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"), |
2017 | DMI_MATCH(DMI_BIOS_VENDOR,"Intel Corp."), | 2042 | DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."), |
2018 | DMI_MATCH(DMI_BIOS_VERSION,"A11"), }, | 2043 | DMI_MATCH(DMI_BIOS_VERSION, "A11"), }, |
2019 | }, | 2044 | }, |
2020 | { /* Allow interrupts during suspend on Compaq Laptops*/ | 2045 | { /* Allow interrupts during suspend on Compaq Laptops*/ |
2021 | set_apm_ints, "Compaq 12XL125", | 2046 | set_apm_ints, "Compaq 12XL125", |
2022 | { DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), | 2047 | { DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), |
2023 | DMI_MATCH(DMI_PRODUCT_NAME, "Compaq PC"), | 2048 | DMI_MATCH(DMI_PRODUCT_NAME, "Compaq PC"), |
2024 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), | 2049 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), |
2025 | DMI_MATCH(DMI_BIOS_VERSION,"4.06"), }, | 2050 | DMI_MATCH(DMI_BIOS_VERSION, "4.06"), }, |
2026 | }, | 2051 | }, |
2027 | { /* Allow interrupts during APM or the clock goes slow */ | 2052 | { /* Allow interrupts during APM or the clock goes slow */ |
2028 | set_apm_ints, "ASUSTeK", | 2053 | set_apm_ints, "ASUSTeK", |
@@ -2064,15 +2089,15 @@ static struct dmi_system_id __initdata apm_dmi_table[] = { | |||
2064 | apm_is_horked, "Sharp PC-PJ/AX", | 2089 | apm_is_horked, "Sharp PC-PJ/AX", |
2065 | { DMI_MATCH(DMI_SYS_VENDOR, "SHARP"), | 2090 | { DMI_MATCH(DMI_SYS_VENDOR, "SHARP"), |
2066 | DMI_MATCH(DMI_PRODUCT_NAME, "PC-PJ/AX"), | 2091 | DMI_MATCH(DMI_PRODUCT_NAME, "PC-PJ/AX"), |
2067 | DMI_MATCH(DMI_BIOS_VENDOR,"SystemSoft"), | 2092 | DMI_MATCH(DMI_BIOS_VENDOR, "SystemSoft"), |
2068 | DMI_MATCH(DMI_BIOS_VERSION,"Version R2.08"), }, | 2093 | DMI_MATCH(DMI_BIOS_VERSION, "Version R2.08"), }, |
2069 | }, | 2094 | }, |
2070 | { /* APM crashes */ | 2095 | { /* APM crashes */ |
2071 | apm_is_horked, "Dell Inspiron 2500", | 2096 | apm_is_horked, "Dell Inspiron 2500", |
2072 | { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), | 2097 | { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), |
2073 | DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"), | 2098 | DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"), |
2074 | DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), | 2099 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), |
2075 | DMI_MATCH(DMI_BIOS_VERSION,"A11"), }, | 2100 | DMI_MATCH(DMI_BIOS_VERSION, "A11"), }, |
2076 | }, | 2101 | }, |
2077 | { /* APM idle hangs */ | 2102 | { /* APM idle hangs */ |
2078 | apm_likes_to_melt, "Jabil AMD", | 2103 | apm_likes_to_melt, "Jabil AMD", |
@@ -2203,11 +2228,11 @@ static int __init apm_init(void) | |||
2203 | return -ENODEV; | 2228 | return -ENODEV; |
2204 | } | 2229 | } |
2205 | printk(KERN_INFO | 2230 | printk(KERN_INFO |
2206 | "apm: BIOS version %d.%d Flags 0x%02x (Driver version %s)\n", | 2231 | "apm: BIOS version %d.%d Flags 0x%02x (Driver version %s)\n", |
2207 | ((apm_info.bios.version >> 8) & 0xff), | 2232 | ((apm_info.bios.version >> 8) & 0xff), |
2208 | (apm_info.bios.version & 0xff), | 2233 | (apm_info.bios.version & 0xff), |
2209 | apm_info.bios.flags, | 2234 | apm_info.bios.flags, |
2210 | driver_version); | 2235 | driver_version); |
2211 | if ((apm_info.bios.flags & APM_32_BIT_SUPPORT) == 0) { | 2236 | if ((apm_info.bios.flags & APM_32_BIT_SUPPORT) == 0) { |
2212 | printk(KERN_INFO "apm: no 32 bit BIOS support\n"); | 2237 | printk(KERN_INFO "apm: no 32 bit BIOS support\n"); |
2213 | return -ENODEV; | 2238 | return -ENODEV; |
@@ -2256,14 +2281,12 @@ static int __init apm_init(void) | |||
2256 | apm_info.disabled = 1; | 2281 | apm_info.disabled = 1; |
2257 | return -ENODEV; | 2282 | return -ENODEV; |
2258 | } | 2283 | } |
2259 | if (PM_IS_ACTIVE()) { | 2284 | if (pm_flags & PM_ACPI) { |
2260 | printk(KERN_NOTICE "apm: overridden by ACPI.\n"); | 2285 | printk(KERN_NOTICE "apm: overridden by ACPI.\n"); |
2261 | apm_info.disabled = 1; | 2286 | apm_info.disabled = 1; |
2262 | return -ENODEV; | 2287 | return -ENODEV; |
2263 | } | 2288 | } |
2264 | #ifdef CONFIG_PM_LEGACY | 2289 | pm_flags |= PM_APM; |
2265 | pm_active = 1; | ||
2266 | #endif | ||
2267 | 2290 | ||
2268 | /* | 2291 | /* |
2269 | * Set up a segment that references the real mode segment 0x40 | 2292 | * Set up a segment that references the real mode segment 0x40 |
@@ -2314,9 +2337,9 @@ static int __init apm_init(void) | |||
2314 | } | 2337 | } |
2315 | wake_up_process(kapmd_task); | 2338 | wake_up_process(kapmd_task); |
2316 | 2339 | ||
2317 | if (num_online_cpus() > 1 && !smp ) { | 2340 | if (num_online_cpus() > 1 && !smp) { |
2318 | printk(KERN_NOTICE | 2341 | printk(KERN_NOTICE |
2319 | "apm: disabled - APM is not SMP safe (power off active).\n"); | 2342 | "apm: disabled - APM is not SMP safe (power off active).\n"); |
2320 | return 0; | 2343 | return 0; |
2321 | } | 2344 | } |
2322 | 2345 | ||
@@ -2341,7 +2364,7 @@ static int __init apm_init(void) | |||
2341 | 2364 | ||
2342 | static void __exit apm_exit(void) | 2365 | static void __exit apm_exit(void) |
2343 | { | 2366 | { |
2344 | int error; | 2367 | int error; |
2345 | 2368 | ||
2346 | if (set_pm_idle) { | 2369 | if (set_pm_idle) { |
2347 | pm_idle = original_pm_idle; | 2370 | pm_idle = original_pm_idle; |
@@ -2366,9 +2389,7 @@ static void __exit apm_exit(void) | |||
2366 | kthread_stop(kapmd_task); | 2389 | kthread_stop(kapmd_task); |
2367 | kapmd_task = NULL; | 2390 | kapmd_task = NULL; |
2368 | } | 2391 | } |
2369 | #ifdef CONFIG_PM_LEGACY | 2392 | pm_flags &= ~PM_APM; |
2370 | pm_active = 0; | ||
2371 | #endif | ||
2372 | } | 2393 | } |
2373 | 2394 | ||
2374 | module_init(apm_init); | 2395 | module_init(apm_init); |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 0e45981b2dd7..afd84463b712 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -38,15 +38,15 @@ void foo(void); | |||
38 | 38 | ||
39 | void foo(void) | 39 | void foo(void) |
40 | { | 40 | { |
41 | OFFSET(SIGCONTEXT_eax, sigcontext, eax); | 41 | OFFSET(IA32_SIGCONTEXT_ax, sigcontext, ax); |
42 | OFFSET(SIGCONTEXT_ebx, sigcontext, ebx); | 42 | OFFSET(IA32_SIGCONTEXT_bx, sigcontext, bx); |
43 | OFFSET(SIGCONTEXT_ecx, sigcontext, ecx); | 43 | OFFSET(IA32_SIGCONTEXT_cx, sigcontext, cx); |
44 | OFFSET(SIGCONTEXT_edx, sigcontext, edx); | 44 | OFFSET(IA32_SIGCONTEXT_dx, sigcontext, dx); |
45 | OFFSET(SIGCONTEXT_esi, sigcontext, esi); | 45 | OFFSET(IA32_SIGCONTEXT_si, sigcontext, si); |
46 | OFFSET(SIGCONTEXT_edi, sigcontext, edi); | 46 | OFFSET(IA32_SIGCONTEXT_di, sigcontext, di); |
47 | OFFSET(SIGCONTEXT_ebp, sigcontext, ebp); | 47 | OFFSET(IA32_SIGCONTEXT_bp, sigcontext, bp); |
48 | OFFSET(SIGCONTEXT_esp, sigcontext, esp); | 48 | OFFSET(IA32_SIGCONTEXT_sp, sigcontext, sp); |
49 | OFFSET(SIGCONTEXT_eip, sigcontext, eip); | 49 | OFFSET(IA32_SIGCONTEXT_ip, sigcontext, ip); |
50 | BLANK(); | 50 | BLANK(); |
51 | 51 | ||
52 | OFFSET(CPUINFO_x86, cpuinfo_x86, x86); | 52 | OFFSET(CPUINFO_x86, cpuinfo_x86, x86); |
@@ -70,39 +70,38 @@ void foo(void) | |||
70 | OFFSET(TI_cpu, thread_info, cpu); | 70 | OFFSET(TI_cpu, thread_info, cpu); |
71 | BLANK(); | 71 | BLANK(); |
72 | 72 | ||
73 | OFFSET(GDS_size, Xgt_desc_struct, size); | 73 | OFFSET(GDS_size, desc_ptr, size); |
74 | OFFSET(GDS_address, Xgt_desc_struct, address); | 74 | OFFSET(GDS_address, desc_ptr, address); |
75 | OFFSET(GDS_pad, Xgt_desc_struct, pad); | ||
76 | BLANK(); | 75 | BLANK(); |
77 | 76 | ||
78 | OFFSET(PT_EBX, pt_regs, ebx); | 77 | OFFSET(PT_EBX, pt_regs, bx); |
79 | OFFSET(PT_ECX, pt_regs, ecx); | 78 | OFFSET(PT_ECX, pt_regs, cx); |
80 | OFFSET(PT_EDX, pt_regs, edx); | 79 | OFFSET(PT_EDX, pt_regs, dx); |
81 | OFFSET(PT_ESI, pt_regs, esi); | 80 | OFFSET(PT_ESI, pt_regs, si); |
82 | OFFSET(PT_EDI, pt_regs, edi); | 81 | OFFSET(PT_EDI, pt_regs, di); |
83 | OFFSET(PT_EBP, pt_regs, ebp); | 82 | OFFSET(PT_EBP, pt_regs, bp); |
84 | OFFSET(PT_EAX, pt_regs, eax); | 83 | OFFSET(PT_EAX, pt_regs, ax); |
85 | OFFSET(PT_DS, pt_regs, xds); | 84 | OFFSET(PT_DS, pt_regs, ds); |
86 | OFFSET(PT_ES, pt_regs, xes); | 85 | OFFSET(PT_ES, pt_regs, es); |
87 | OFFSET(PT_FS, pt_regs, xfs); | 86 | OFFSET(PT_FS, pt_regs, fs); |
88 | OFFSET(PT_ORIG_EAX, pt_regs, orig_eax); | 87 | OFFSET(PT_ORIG_EAX, pt_regs, orig_ax); |
89 | OFFSET(PT_EIP, pt_regs, eip); | 88 | OFFSET(PT_EIP, pt_regs, ip); |
90 | OFFSET(PT_CS, pt_regs, xcs); | 89 | OFFSET(PT_CS, pt_regs, cs); |
91 | OFFSET(PT_EFLAGS, pt_regs, eflags); | 90 | OFFSET(PT_EFLAGS, pt_regs, flags); |
92 | OFFSET(PT_OLDESP, pt_regs, esp); | 91 | OFFSET(PT_OLDESP, pt_regs, sp); |
93 | OFFSET(PT_OLDSS, pt_regs, xss); | 92 | OFFSET(PT_OLDSS, pt_regs, ss); |
94 | BLANK(); | 93 | BLANK(); |
95 | 94 | ||
96 | OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); | 95 | OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); |
97 | OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); | 96 | OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); |
98 | BLANK(); | 97 | BLANK(); |
99 | 98 | ||
100 | OFFSET(pbe_address, pbe, address); | 99 | OFFSET(pbe_address, pbe, address); |
101 | OFFSET(pbe_orig_address, pbe, orig_address); | 100 | OFFSET(pbe_orig_address, pbe, orig_address); |
102 | OFFSET(pbe_next, pbe, next); | 101 | OFFSET(pbe_next, pbe, next); |
103 | 102 | ||
104 | /* Offset from the sysenter stack to tss.esp0 */ | 103 | /* Offset from the sysenter stack to tss.sp0 */ |
105 | DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, x86_tss.esp0) - | 104 | DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - |
106 | sizeof(struct tss_struct)); | 105 | sizeof(struct tss_struct)); |
107 | 106 | ||
108 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); | 107 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); |
@@ -111,8 +110,6 @@ void foo(void) | |||
111 | DEFINE(PTRS_PER_PMD, PTRS_PER_PMD); | 110 | DEFINE(PTRS_PER_PMD, PTRS_PER_PMD); |
112 | DEFINE(PTRS_PER_PGD, PTRS_PER_PGD); | 111 | DEFINE(PTRS_PER_PGD, PTRS_PER_PGD); |
113 | 112 | ||
114 | DEFINE(VDSO_PRELINK_asm, VDSO_PRELINK); | ||
115 | |||
116 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); | 113 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); |
117 | 114 | ||
118 | #ifdef CONFIG_PARAVIRT | 115 | #ifdef CONFIG_PARAVIRT |
@@ -123,7 +120,7 @@ void foo(void) | |||
123 | OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); | 120 | OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); |
124 | OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); | 121 | OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); |
125 | OFFSET(PV_CPU_iret, pv_cpu_ops, iret); | 122 | OFFSET(PV_CPU_iret, pv_cpu_ops, iret); |
126 | OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); | 123 | OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret); |
127 | OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); | 124 | OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); |
128 | #endif | 125 | #endif |
129 | 126 | ||
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index d1b6ed98774e..494e1e096ee6 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -38,7 +38,6 @@ int main(void) | |||
38 | #define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry)) | 38 | #define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry)) |
39 | ENTRY(state); | 39 | ENTRY(state); |
40 | ENTRY(flags); | 40 | ENTRY(flags); |
41 | ENTRY(thread); | ||
42 | ENTRY(pid); | 41 | ENTRY(pid); |
43 | BLANK(); | 42 | BLANK(); |
44 | #undef ENTRY | 43 | #undef ENTRY |
@@ -47,6 +46,9 @@ int main(void) | |||
47 | ENTRY(addr_limit); | 46 | ENTRY(addr_limit); |
48 | ENTRY(preempt_count); | 47 | ENTRY(preempt_count); |
49 | ENTRY(status); | 48 | ENTRY(status); |
49 | #ifdef CONFIG_IA32_EMULATION | ||
50 | ENTRY(sysenter_return); | ||
51 | #endif | ||
50 | BLANK(); | 52 | BLANK(); |
51 | #undef ENTRY | 53 | #undef ENTRY |
52 | #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) | 54 | #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) |
@@ -59,17 +61,31 @@ int main(void) | |||
59 | ENTRY(data_offset); | 61 | ENTRY(data_offset); |
60 | BLANK(); | 62 | BLANK(); |
61 | #undef ENTRY | 63 | #undef ENTRY |
64 | #ifdef CONFIG_PARAVIRT | ||
65 | BLANK(); | ||
66 | OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); | ||
67 | OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops); | ||
68 | OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops); | ||
69 | OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); | ||
70 | OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); | ||
71 | OFFSET(PV_CPU_iret, pv_cpu_ops, iret); | ||
72 | OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret); | ||
73 | OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); | ||
74 | OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); | ||
75 | #endif | ||
76 | |||
77 | |||
62 | #ifdef CONFIG_IA32_EMULATION | 78 | #ifdef CONFIG_IA32_EMULATION |
63 | #define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry)) | 79 | #define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry)) |
64 | ENTRY(eax); | 80 | ENTRY(ax); |
65 | ENTRY(ebx); | 81 | ENTRY(bx); |
66 | ENTRY(ecx); | 82 | ENTRY(cx); |
67 | ENTRY(edx); | 83 | ENTRY(dx); |
68 | ENTRY(esi); | 84 | ENTRY(si); |
69 | ENTRY(edi); | 85 | ENTRY(di); |
70 | ENTRY(ebp); | 86 | ENTRY(bp); |
71 | ENTRY(esp); | 87 | ENTRY(sp); |
72 | ENTRY(eip); | 88 | ENTRY(ip); |
73 | BLANK(); | 89 | BLANK(); |
74 | #undef ENTRY | 90 | #undef ENTRY |
75 | DEFINE(IA32_RT_SIGFRAME_sigcontext, | 91 | DEFINE(IA32_RT_SIGFRAME_sigcontext, |
@@ -81,14 +97,14 @@ int main(void) | |||
81 | DEFINE(pbe_next, offsetof(struct pbe, next)); | 97 | DEFINE(pbe_next, offsetof(struct pbe, next)); |
82 | BLANK(); | 98 | BLANK(); |
83 | #define ENTRY(entry) DEFINE(pt_regs_ ## entry, offsetof(struct pt_regs, entry)) | 99 | #define ENTRY(entry) DEFINE(pt_regs_ ## entry, offsetof(struct pt_regs, entry)) |
84 | ENTRY(rbx); | 100 | ENTRY(bx); |
85 | ENTRY(rbx); | 101 | ENTRY(bx); |
86 | ENTRY(rcx); | 102 | ENTRY(cx); |
87 | ENTRY(rdx); | 103 | ENTRY(dx); |
88 | ENTRY(rsp); | 104 | ENTRY(sp); |
89 | ENTRY(rbp); | 105 | ENTRY(bp); |
90 | ENTRY(rsi); | 106 | ENTRY(si); |
91 | ENTRY(rdi); | 107 | ENTRY(di); |
92 | ENTRY(r8); | 108 | ENTRY(r8); |
93 | ENTRY(r9); | 109 | ENTRY(r9); |
94 | ENTRY(r10); | 110 | ENTRY(r10); |
@@ -97,7 +113,7 @@ int main(void) | |||
97 | ENTRY(r13); | 113 | ENTRY(r13); |
98 | ENTRY(r14); | 114 | ENTRY(r14); |
99 | ENTRY(r15); | 115 | ENTRY(r15); |
100 | ENTRY(eflags); | 116 | ENTRY(flags); |
101 | BLANK(); | 117 | BLANK(); |
102 | #undef ENTRY | 118 | #undef ENTRY |
103 | #define ENTRY(entry) DEFINE(saved_context_ ## entry, offsetof(struct saved_context, entry)) | 119 | #define ENTRY(entry) DEFINE(saved_context_ ## entry, offsetof(struct saved_context, entry)) |
@@ -108,7 +124,7 @@ int main(void) | |||
108 | ENTRY(cr8); | 124 | ENTRY(cr8); |
109 | BLANK(); | 125 | BLANK(); |
110 | #undef ENTRY | 126 | #undef ENTRY |
111 | DEFINE(TSS_ist, offsetof(struct tss_struct, ist)); | 127 | DEFINE(TSS_ist, offsetof(struct tss_struct, x86_tss.ist)); |
112 | BLANK(); | 128 | BLANK(); |
113 | DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx)); | 129 | DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx)); |
114 | BLANK(); | 130 | BLANK(); |
diff --git a/arch/x86/kernel/bootflag.c b/arch/x86/kernel/bootflag.c index 0b9860530a6b..30f25a75fe28 100644 --- a/arch/x86/kernel/bootflag.c +++ b/arch/x86/kernel/bootflag.c | |||
@@ -1,8 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Implement 'Simple Boot Flag Specification 2.0' | 2 | * Implement 'Simple Boot Flag Specification 2.0' |
3 | */ | 3 | */ |
4 | |||
5 | |||
6 | #include <linux/types.h> | 4 | #include <linux/types.h> |
7 | #include <linux/kernel.h> | 5 | #include <linux/kernel.h> |
8 | #include <linux/init.h> | 6 | #include <linux/init.h> |
@@ -14,40 +12,38 @@ | |||
14 | 12 | ||
15 | #include <linux/mc146818rtc.h> | 13 | #include <linux/mc146818rtc.h> |
16 | 14 | ||
17 | |||
18 | #define SBF_RESERVED (0x78) | 15 | #define SBF_RESERVED (0x78) |
19 | #define SBF_PNPOS (1<<0) | 16 | #define SBF_PNPOS (1<<0) |
20 | #define SBF_BOOTING (1<<1) | 17 | #define SBF_BOOTING (1<<1) |
21 | #define SBF_DIAG (1<<2) | 18 | #define SBF_DIAG (1<<2) |
22 | #define SBF_PARITY (1<<7) | 19 | #define SBF_PARITY (1<<7) |
23 | 20 | ||
24 | |||
25 | int sbf_port __initdata = -1; /* set via acpi_boot_init() */ | 21 | int sbf_port __initdata = -1; /* set via acpi_boot_init() */ |
26 | 22 | ||
27 | |||
28 | static int __init parity(u8 v) | 23 | static int __init parity(u8 v) |
29 | { | 24 | { |
30 | int x = 0; | 25 | int x = 0; |
31 | int i; | 26 | int i; |
32 | 27 | ||
33 | for(i=0;i<8;i++) | 28 | for (i = 0; i < 8; i++) { |
34 | { | 29 | x ^= (v & 1); |
35 | x^=(v&1); | 30 | v >>= 1; |
36 | v>>=1; | ||
37 | } | 31 | } |
32 | |||
38 | return x; | 33 | return x; |
39 | } | 34 | } |
40 | 35 | ||
41 | static void __init sbf_write(u8 v) | 36 | static void __init sbf_write(u8 v) |
42 | { | 37 | { |
43 | unsigned long flags; | 38 | unsigned long flags; |
44 | if(sbf_port != -1) | 39 | |
45 | { | 40 | if (sbf_port != -1) { |
46 | v &= ~SBF_PARITY; | 41 | v &= ~SBF_PARITY; |
47 | if(!parity(v)) | 42 | if (!parity(v)) |
48 | v|=SBF_PARITY; | 43 | v |= SBF_PARITY; |
49 | 44 | ||
50 | printk(KERN_INFO "Simple Boot Flag at 0x%x set to 0x%x\n", sbf_port, v); | 45 | printk(KERN_INFO "Simple Boot Flag at 0x%x set to 0x%x\n", |
46 | sbf_port, v); | ||
51 | 47 | ||
52 | spin_lock_irqsave(&rtc_lock, flags); | 48 | spin_lock_irqsave(&rtc_lock, flags); |
53 | CMOS_WRITE(v, sbf_port); | 49 | CMOS_WRITE(v, sbf_port); |
@@ -57,33 +53,41 @@ static void __init sbf_write(u8 v) | |||
57 | 53 | ||
58 | static u8 __init sbf_read(void) | 54 | static u8 __init sbf_read(void) |
59 | { | 55 | { |
60 | u8 v; | ||
61 | unsigned long flags; | 56 | unsigned long flags; |
62 | if(sbf_port == -1) | 57 | u8 v; |
58 | |||
59 | if (sbf_port == -1) | ||
63 | return 0; | 60 | return 0; |
61 | |||
64 | spin_lock_irqsave(&rtc_lock, flags); | 62 | spin_lock_irqsave(&rtc_lock, flags); |
65 | v = CMOS_READ(sbf_port); | 63 | v = CMOS_READ(sbf_port); |
66 | spin_unlock_irqrestore(&rtc_lock, flags); | 64 | spin_unlock_irqrestore(&rtc_lock, flags); |
65 | |||
67 | return v; | 66 | return v; |
68 | } | 67 | } |
69 | 68 | ||
70 | static int __init sbf_value_valid(u8 v) | 69 | static int __init sbf_value_valid(u8 v) |
71 | { | 70 | { |
72 | if(v&SBF_RESERVED) /* Reserved bits */ | 71 | if (v & SBF_RESERVED) /* Reserved bits */ |
73 | return 0; | 72 | return 0; |
74 | if(!parity(v)) | 73 | if (!parity(v)) |
75 | return 0; | 74 | return 0; |
75 | |||
76 | return 1; | 76 | return 1; |
77 | } | 77 | } |
78 | 78 | ||
79 | static int __init sbf_init(void) | 79 | static int __init sbf_init(void) |
80 | { | 80 | { |
81 | u8 v; | 81 | u8 v; |
82 | if(sbf_port == -1) | 82 | |
83 | if (sbf_port == -1) | ||
83 | return 0; | 84 | return 0; |
85 | |||
84 | v = sbf_read(); | 86 | v = sbf_read(); |
85 | if(!sbf_value_valid(v)) | 87 | if (!sbf_value_valid(v)) { |
86 | printk(KERN_WARNING "Simple Boot Flag value 0x%x read from CMOS RAM was invalid\n",v); | 88 | printk(KERN_WARNING "Simple Boot Flag value 0x%x read from " |
89 | "CMOS RAM was invalid\n", v); | ||
90 | } | ||
87 | 91 | ||
88 | v &= ~SBF_RESERVED; | 92 | v &= ~SBF_RESERVED; |
89 | v &= ~SBF_BOOTING; | 93 | v &= ~SBF_BOOTING; |
@@ -92,7 +96,7 @@ static int __init sbf_init(void) | |||
92 | v |= SBF_PNPOS; | 96 | v |= SBF_PNPOS; |
93 | #endif | 97 | #endif |
94 | sbf_write(v); | 98 | sbf_write(v); |
99 | |||
95 | return 0; | 100 | return 0; |
96 | } | 101 | } |
97 | |||
98 | module_init(sbf_init); | 102 | module_init(sbf_init); |
diff --git a/arch/x86/kernel/bugs_64.c b/arch/x86/kernel/bugs_64.c index 9a189cef6404..8f520f93ffd4 100644 --- a/arch/x86/kernel/bugs_64.c +++ b/arch/x86/kernel/bugs_64.c | |||
@@ -13,7 +13,6 @@ | |||
13 | void __init check_bugs(void) | 13 | void __init check_bugs(void) |
14 | { | 14 | { |
15 | identify_cpu(&boot_cpu_data); | 15 | identify_cpu(&boot_cpu_data); |
16 | mtrr_bp_init(); | ||
17 | #if !defined(CONFIG_SMP) | 16 | #if !defined(CONFIG_SMP) |
18 | printk("CPU: "); | 17 | printk("CPU: "); |
19 | print_cpu_info(&boot_cpu_data); | 18 | print_cpu_info(&boot_cpu_data); |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index cfdb2f3bd763..a0c4d7c5dbd7 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -3,6 +3,7 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 5 | obj-y := intel_cacheinfo.o addon_cpuid_features.o |
6 | obj-y += feature_names.o | ||
6 | 7 | ||
7 | obj-$(CONFIG_X86_32) += common.o proc.o bugs.o | 8 | obj-$(CONFIG_X86_32) += common.o proc.o bugs.o |
8 | obj-$(CONFIG_X86_32) += amd.o | 9 | obj-$(CONFIG_X86_32) += amd.o |
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 3e91d3ee26ec..238468ae1993 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c | |||
@@ -45,6 +45,6 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | |||
45 | ®s[CR_ECX], ®s[CR_EDX]); | 45 | ®s[CR_ECX], ®s[CR_EDX]); |
46 | 46 | ||
47 | if (regs[cb->reg] & (1 << cb->bit)) | 47 | if (regs[cb->reg] & (1 << cb->bit)) |
48 | set_bit(cb->feature, c->x86_capability); | 48 | set_cpu_cap(c, cb->feature); |
49 | } | 49 | } |
50 | } | 50 | } |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 1ff88c7f45cf..693e353999cd 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -63,6 +63,15 @@ static __cpuinit int amd_apic_timer_broken(void) | |||
63 | 63 | ||
64 | int force_mwait __cpuinitdata; | 64 | int force_mwait __cpuinitdata; |
65 | 65 | ||
66 | void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | ||
67 | { | ||
68 | if (cpuid_eax(0x80000000) >= 0x80000007) { | ||
69 | c->x86_power = cpuid_edx(0x80000007); | ||
70 | if (c->x86_power & (1<<8)) | ||
71 | set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); | ||
72 | } | ||
73 | } | ||
74 | |||
66 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | 75 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) |
67 | { | 76 | { |
68 | u32 l, h; | 77 | u32 l, h; |
@@ -85,6 +94,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
85 | } | 94 | } |
86 | #endif | 95 | #endif |
87 | 96 | ||
97 | early_init_amd(c); | ||
98 | |||
88 | /* | 99 | /* |
89 | * FIXME: We should handle the K5 here. Set up the write | 100 | * FIXME: We should handle the K5 here. Set up the write |
90 | * range and also turn on MSR 83 bits 4 and 31 (write alloc, | 101 | * range and also turn on MSR 83 bits 4 and 31 (write alloc, |
@@ -257,12 +268,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
257 | c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; | 268 | c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; |
258 | } | 269 | } |
259 | 270 | ||
260 | if (cpuid_eax(0x80000000) >= 0x80000007) { | ||
261 | c->x86_power = cpuid_edx(0x80000007); | ||
262 | if (c->x86_power & (1<<8)) | ||
263 | set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); | ||
264 | } | ||
265 | |||
266 | #ifdef CONFIG_X86_HT | 271 | #ifdef CONFIG_X86_HT |
267 | /* | 272 | /* |
268 | * On a AMD multi core setup the lower bits of the APIC id | 273 | * On a AMD multi core setup the lower bits of the APIC id |
@@ -295,12 +300,12 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
295 | local_apic_timer_disabled = 1; | 300 | local_apic_timer_disabled = 1; |
296 | #endif | 301 | #endif |
297 | 302 | ||
298 | if (c->x86 == 0x10 && !force_mwait) | ||
299 | clear_bit(X86_FEATURE_MWAIT, c->x86_capability); | ||
300 | |||
301 | /* K6s reports MCEs but don't actually have all the MSRs */ | 303 | /* K6s reports MCEs but don't actually have all the MSRs */ |
302 | if (c->x86 < 6) | 304 | if (c->x86 < 6) |
303 | clear_bit(X86_FEATURE_MCE, c->x86_capability); | 305 | clear_bit(X86_FEATURE_MCE, c->x86_capability); |
306 | |||
307 | if (cpu_has_xmm2) | ||
308 | set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability); | ||
304 | } | 309 | } |
305 | 310 | ||
306 | static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) | 311 | static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) |
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 205fd5ba57f7..9b95edcfc6ae 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/utsname.h> | 11 | #include <linux/utsname.h> |
12 | #include <asm/bugs.h> | 12 | #include <asm/bugs.h> |
13 | #include <asm/processor.h> | 13 | #include <asm/processor.h> |
14 | #include <asm/processor-flags.h> | ||
14 | #include <asm/i387.h> | 15 | #include <asm/i387.h> |
15 | #include <asm/msr.h> | 16 | #include <asm/msr.h> |
16 | #include <asm/paravirt.h> | 17 | #include <asm/paravirt.h> |
@@ -35,7 +36,7 @@ __setup("mca-pentium", mca_pentium); | |||
35 | static int __init no_387(char *s) | 36 | static int __init no_387(char *s) |
36 | { | 37 | { |
37 | boot_cpu_data.hard_math = 0; | 38 | boot_cpu_data.hard_math = 0; |
38 | write_cr0(0xE | read_cr0()); | 39 | write_cr0(X86_CR0_TS | X86_CR0_EM | X86_CR0_MP | read_cr0()); |
39 | return 1; | 40 | return 1; |
40 | } | 41 | } |
41 | 42 | ||
@@ -153,7 +154,7 @@ static void __init check_config(void) | |||
153 | * If we configured ourselves for a TSC, we'd better have one! | 154 | * If we configured ourselves for a TSC, we'd better have one! |
154 | */ | 155 | */ |
155 | #ifdef CONFIG_X86_TSC | 156 | #ifdef CONFIG_X86_TSC |
156 | if (!cpu_has_tsc && !tsc_disable) | 157 | if (!cpu_has_tsc) |
157 | panic("Kernel compiled for Pentium+, requires TSC feature!"); | 158 | panic("Kernel compiled for Pentium+, requires TSC feature!"); |
158 | #endif | 159 | #endif |
159 | 160 | ||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e2fcf2051bdb..f86a3c4a2669 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -22,43 +22,48 @@ | |||
22 | #include "cpu.h" | 22 | #include "cpu.h" |
23 | 23 | ||
24 | DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { | 24 | DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { |
25 | [GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 }, | 25 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, |
26 | [GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 }, | 26 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, |
27 | [GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 }, | 27 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, |
28 | [GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 }, | 28 | [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, |
29 | /* | 29 | /* |
30 | * Segments used for calling PnP BIOS have byte granularity. | 30 | * Segments used for calling PnP BIOS have byte granularity. |
31 | * They code segments and data segments have fixed 64k limits, | 31 | * They code segments and data segments have fixed 64k limits, |
32 | * the transfer segment sizes are set at run time. | 32 | * the transfer segment sizes are set at run time. |
33 | */ | 33 | */ |
34 | [GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ | 34 | /* 32-bit code */ |
35 | [GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */ | 35 | [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, |
36 | [GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */ | 36 | /* 16-bit code */ |
37 | [GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */ | 37 | [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, |
38 | [GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */ | 38 | /* 16-bit data */ |
39 | [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, | ||
40 | /* 16-bit data */ | ||
41 | [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, | ||
42 | /* 16-bit data */ | ||
43 | [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, | ||
39 | /* | 44 | /* |
40 | * The APM segments have byte granularity and their bases | 45 | * The APM segments have byte granularity and their bases |
41 | * are set at run time. All have 64k limits. | 46 | * are set at run time. All have 64k limits. |
42 | */ | 47 | */ |
43 | [GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ | 48 | /* 32-bit code */ |
49 | [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, | ||
44 | /* 16-bit code */ | 50 | /* 16-bit code */ |
45 | [GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 }, | 51 | [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, |
46 | [GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */ | 52 | /* data */ |
53 | [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, | ||
47 | 54 | ||
48 | [GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 }, | 55 | [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, |
49 | [GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 }, | 56 | [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, |
50 | } }; | 57 | } }; |
51 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); | 58 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); |
52 | 59 | ||
60 | __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; | ||
61 | |||
53 | static int cachesize_override __cpuinitdata = -1; | 62 | static int cachesize_override __cpuinitdata = -1; |
54 | static int disable_x86_fxsr __cpuinitdata; | ||
55 | static int disable_x86_serial_nr __cpuinitdata = 1; | 63 | static int disable_x86_serial_nr __cpuinitdata = 1; |
56 | static int disable_x86_sep __cpuinitdata; | ||
57 | 64 | ||
58 | struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; | 65 | struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; |
59 | 66 | ||
60 | extern int disable_pse; | ||
61 | |||
62 | static void __cpuinit default_init(struct cpuinfo_x86 * c) | 67 | static void __cpuinit default_init(struct cpuinfo_x86 * c) |
63 | { | 68 | { |
64 | /* Not much we can do here... */ | 69 | /* Not much we can do here... */ |
@@ -207,16 +212,8 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) | |||
207 | 212 | ||
208 | static int __init x86_fxsr_setup(char * s) | 213 | static int __init x86_fxsr_setup(char * s) |
209 | { | 214 | { |
210 | /* Tell all the other CPUs to not use it... */ | 215 | setup_clear_cpu_cap(X86_FEATURE_FXSR); |
211 | disable_x86_fxsr = 1; | 216 | setup_clear_cpu_cap(X86_FEATURE_XMM); |
212 | |||
213 | /* | ||
214 | * ... and clear the bits early in the boot_cpu_data | ||
215 | * so that the bootup process doesn't try to do this | ||
216 | * either. | ||
217 | */ | ||
218 | clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability); | ||
219 | clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability); | ||
220 | return 1; | 217 | return 1; |
221 | } | 218 | } |
222 | __setup("nofxsr", x86_fxsr_setup); | 219 | __setup("nofxsr", x86_fxsr_setup); |
@@ -224,7 +221,7 @@ __setup("nofxsr", x86_fxsr_setup); | |||
224 | 221 | ||
225 | static int __init x86_sep_setup(char * s) | 222 | static int __init x86_sep_setup(char * s) |
226 | { | 223 | { |
227 | disable_x86_sep = 1; | 224 | setup_clear_cpu_cap(X86_FEATURE_SEP); |
228 | return 1; | 225 | return 1; |
229 | } | 226 | } |
230 | __setup("nosep", x86_sep_setup); | 227 | __setup("nosep", x86_sep_setup); |
@@ -261,10 +258,10 @@ static int __cpuinit have_cpuid_p(void) | |||
261 | void __init cpu_detect(struct cpuinfo_x86 *c) | 258 | void __init cpu_detect(struct cpuinfo_x86 *c) |
262 | { | 259 | { |
263 | /* Get vendor name */ | 260 | /* Get vendor name */ |
264 | cpuid(0x00000000, &c->cpuid_level, | 261 | cpuid(0x00000000, (unsigned int *)&c->cpuid_level, |
265 | (int *)&c->x86_vendor_id[0], | 262 | (unsigned int *)&c->x86_vendor_id[0], |
266 | (int *)&c->x86_vendor_id[8], | 263 | (unsigned int *)&c->x86_vendor_id[8], |
267 | (int *)&c->x86_vendor_id[4]); | 264 | (unsigned int *)&c->x86_vendor_id[4]); |
268 | 265 | ||
269 | c->x86 = 4; | 266 | c->x86 = 4; |
270 | if (c->cpuid_level >= 0x00000001) { | 267 | if (c->cpuid_level >= 0x00000001) { |
@@ -277,10 +274,39 @@ void __init cpu_detect(struct cpuinfo_x86 *c) | |||
277 | if (c->x86 >= 0x6) | 274 | if (c->x86 >= 0x6) |
278 | c->x86_model += ((tfms >> 16) & 0xF) << 4; | 275 | c->x86_model += ((tfms >> 16) & 0xF) << 4; |
279 | c->x86_mask = tfms & 15; | 276 | c->x86_mask = tfms & 15; |
280 | if (cap0 & (1<<19)) | 277 | if (cap0 & (1<<19)) { |
281 | c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; | 278 | c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; |
279 | c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; | ||
280 | } | ||
282 | } | 281 | } |
283 | } | 282 | } |
283 | static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) | ||
284 | { | ||
285 | u32 tfms, xlvl; | ||
286 | unsigned int ebx; | ||
287 | |||
288 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | ||
289 | if (have_cpuid_p()) { | ||
290 | /* Intel-defined flags: level 0x00000001 */ | ||
291 | if (c->cpuid_level >= 0x00000001) { | ||
292 | u32 capability, excap; | ||
293 | cpuid(0x00000001, &tfms, &ebx, &excap, &capability); | ||
294 | c->x86_capability[0] = capability; | ||
295 | c->x86_capability[4] = excap; | ||
296 | } | ||
297 | |||
298 | /* AMD-defined flags: level 0x80000001 */ | ||
299 | xlvl = cpuid_eax(0x80000000); | ||
300 | if ((xlvl & 0xffff0000) == 0x80000000) { | ||
301 | if (xlvl >= 0x80000001) { | ||
302 | c->x86_capability[1] = cpuid_edx(0x80000001); | ||
303 | c->x86_capability[6] = cpuid_ecx(0x80000001); | ||
304 | } | ||
305 | } | ||
306 | |||
307 | } | ||
308 | |||
309 | } | ||
284 | 310 | ||
285 | /* Do minimum CPU detection early. | 311 | /* Do minimum CPU detection early. |
286 | Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. | 312 | Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. |
@@ -293,6 +319,7 @@ static void __init early_cpu_detect(void) | |||
293 | struct cpuinfo_x86 *c = &boot_cpu_data; | 319 | struct cpuinfo_x86 *c = &boot_cpu_data; |
294 | 320 | ||
295 | c->x86_cache_alignment = 32; | 321 | c->x86_cache_alignment = 32; |
322 | c->x86_clflush_size = 32; | ||
296 | 323 | ||
297 | if (!have_cpuid_p()) | 324 | if (!have_cpuid_p()) |
298 | return; | 325 | return; |
@@ -300,19 +327,30 @@ static void __init early_cpu_detect(void) | |||
300 | cpu_detect(c); | 327 | cpu_detect(c); |
301 | 328 | ||
302 | get_cpu_vendor(c, 1); | 329 | get_cpu_vendor(c, 1); |
330 | |||
331 | switch (c->x86_vendor) { | ||
332 | case X86_VENDOR_AMD: | ||
333 | early_init_amd(c); | ||
334 | break; | ||
335 | case X86_VENDOR_INTEL: | ||
336 | early_init_intel(c); | ||
337 | break; | ||
338 | } | ||
339 | |||
340 | early_get_cap(c); | ||
303 | } | 341 | } |
304 | 342 | ||
305 | static void __cpuinit generic_identify(struct cpuinfo_x86 * c) | 343 | static void __cpuinit generic_identify(struct cpuinfo_x86 * c) |
306 | { | 344 | { |
307 | u32 tfms, xlvl; | 345 | u32 tfms, xlvl; |
308 | int ebx; | 346 | unsigned int ebx; |
309 | 347 | ||
310 | if (have_cpuid_p()) { | 348 | if (have_cpuid_p()) { |
311 | /* Get vendor name */ | 349 | /* Get vendor name */ |
312 | cpuid(0x00000000, &c->cpuid_level, | 350 | cpuid(0x00000000, (unsigned int *)&c->cpuid_level, |
313 | (int *)&c->x86_vendor_id[0], | 351 | (unsigned int *)&c->x86_vendor_id[0], |
314 | (int *)&c->x86_vendor_id[8], | 352 | (unsigned int *)&c->x86_vendor_id[8], |
315 | (int *)&c->x86_vendor_id[4]); | 353 | (unsigned int *)&c->x86_vendor_id[4]); |
316 | 354 | ||
317 | get_cpu_vendor(c, 0); | 355 | get_cpu_vendor(c, 0); |
318 | /* Initialize the standard set of capabilities */ | 356 | /* Initialize the standard set of capabilities */ |
@@ -357,8 +395,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c) | |||
357 | init_scattered_cpuid_features(c); | 395 | init_scattered_cpuid_features(c); |
358 | } | 396 | } |
359 | 397 | ||
360 | early_intel_workaround(c); | ||
361 | |||
362 | #ifdef CONFIG_X86_HT | 398 | #ifdef CONFIG_X86_HT |
363 | c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; | 399 | c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; |
364 | #endif | 400 | #endif |
@@ -392,7 +428,7 @@ __setup("serialnumber", x86_serial_nr_setup); | |||
392 | /* | 428 | /* |
393 | * This does the hard work of actually picking apart the CPU stuff... | 429 | * This does the hard work of actually picking apart the CPU stuff... |
394 | */ | 430 | */ |
395 | static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | 431 | void __cpuinit identify_cpu(struct cpuinfo_x86 *c) |
396 | { | 432 | { |
397 | int i; | 433 | int i; |
398 | 434 | ||
@@ -418,20 +454,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
418 | 454 | ||
419 | generic_identify(c); | 455 | generic_identify(c); |
420 | 456 | ||
421 | printk(KERN_DEBUG "CPU: After generic identify, caps:"); | 457 | if (this_cpu->c_identify) |
422 | for (i = 0; i < NCAPINTS; i++) | ||
423 | printk(" %08lx", c->x86_capability[i]); | ||
424 | printk("\n"); | ||
425 | |||
426 | if (this_cpu->c_identify) { | ||
427 | this_cpu->c_identify(c); | 458 | this_cpu->c_identify(c); |
428 | 459 | ||
429 | printk(KERN_DEBUG "CPU: After vendor identify, caps:"); | ||
430 | for (i = 0; i < NCAPINTS; i++) | ||
431 | printk(" %08lx", c->x86_capability[i]); | ||
432 | printk("\n"); | ||
433 | } | ||
434 | |||
435 | /* | 460 | /* |
436 | * Vendor-specific initialization. In this section we | 461 | * Vendor-specific initialization. In this section we |
437 | * canonicalize the feature flags, meaning if there are | 462 | * canonicalize the feature flags, meaning if there are |
@@ -453,23 +478,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
453 | * we do "generic changes." | 478 | * we do "generic changes." |
454 | */ | 479 | */ |
455 | 480 | ||
456 | /* TSC disabled? */ | ||
457 | if ( tsc_disable ) | ||
458 | clear_bit(X86_FEATURE_TSC, c->x86_capability); | ||
459 | |||
460 | /* FXSR disabled? */ | ||
461 | if (disable_x86_fxsr) { | ||
462 | clear_bit(X86_FEATURE_FXSR, c->x86_capability); | ||
463 | clear_bit(X86_FEATURE_XMM, c->x86_capability); | ||
464 | } | ||
465 | |||
466 | /* SEP disabled? */ | ||
467 | if (disable_x86_sep) | ||
468 | clear_bit(X86_FEATURE_SEP, c->x86_capability); | ||
469 | |||
470 | if (disable_pse) | ||
471 | clear_bit(X86_FEATURE_PSE, c->x86_capability); | ||
472 | |||
473 | /* If the model name is still unset, do table lookup. */ | 481 | /* If the model name is still unset, do table lookup. */ |
474 | if ( !c->x86_model_id[0] ) { | 482 | if ( !c->x86_model_id[0] ) { |
475 | char *p; | 483 | char *p; |
@@ -482,13 +490,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
482 | c->x86, c->x86_model); | 490 | c->x86, c->x86_model); |
483 | } | 491 | } |
484 | 492 | ||
485 | /* Now the feature flags better reflect actual CPU features! */ | ||
486 | |||
487 | printk(KERN_DEBUG "CPU: After all inits, caps:"); | ||
488 | for (i = 0; i < NCAPINTS; i++) | ||
489 | printk(" %08lx", c->x86_capability[i]); | ||
490 | printk("\n"); | ||
491 | |||
492 | /* | 493 | /* |
493 | * On SMP, boot_cpu_data holds the common feature set between | 494 | * On SMP, boot_cpu_data holds the common feature set between |
494 | * all CPUs; so make sure that we indicate which features are | 495 | * all CPUs; so make sure that we indicate which features are |
@@ -501,8 +502,14 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
501 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; | 502 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; |
502 | } | 503 | } |
503 | 504 | ||
505 | /* Clear all flags overriden by options */ | ||
506 | for (i = 0; i < NCAPINTS; i++) | ||
507 | c->x86_capability[i] ^= cleared_cpu_caps[i]; | ||
508 | |||
504 | /* Init Machine Check Exception if available. */ | 509 | /* Init Machine Check Exception if available. */ |
505 | mcheck_init(c); | 510 | mcheck_init(c); |
511 | |||
512 | select_idle_routine(c); | ||
506 | } | 513 | } |
507 | 514 | ||
508 | void __init identify_boot_cpu(void) | 515 | void __init identify_boot_cpu(void) |
@@ -510,7 +517,6 @@ void __init identify_boot_cpu(void) | |||
510 | identify_cpu(&boot_cpu_data); | 517 | identify_cpu(&boot_cpu_data); |
511 | sysenter_setup(); | 518 | sysenter_setup(); |
512 | enable_sep_cpu(); | 519 | enable_sep_cpu(); |
513 | mtrr_bp_init(); | ||
514 | } | 520 | } |
515 | 521 | ||
516 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | 522 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
@@ -567,6 +573,13 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
567 | } | 573 | } |
568 | #endif | 574 | #endif |
569 | 575 | ||
576 | static __init int setup_noclflush(char *arg) | ||
577 | { | ||
578 | setup_clear_cpu_cap(X86_FEATURE_CLFLSH); | ||
579 | return 1; | ||
580 | } | ||
581 | __setup("noclflush", setup_noclflush); | ||
582 | |||
570 | void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) | 583 | void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) |
571 | { | 584 | { |
572 | char *vendor = NULL; | 585 | char *vendor = NULL; |
@@ -590,6 +603,17 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) | |||
590 | printk("\n"); | 603 | printk("\n"); |
591 | } | 604 | } |
592 | 605 | ||
606 | static __init int setup_disablecpuid(char *arg) | ||
607 | { | ||
608 | int bit; | ||
609 | if (get_option(&arg, &bit) && bit < NCAPINTS*32) | ||
610 | setup_clear_cpu_cap(bit); | ||
611 | else | ||
612 | return 0; | ||
613 | return 1; | ||
614 | } | ||
615 | __setup("clearcpuid=", setup_disablecpuid); | ||
616 | |||
593 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; | 617 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; |
594 | 618 | ||
595 | /* This is hacky. :) | 619 | /* This is hacky. :) |
@@ -599,16 +623,6 @@ cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; | |||
599 | * They will insert themselves into the cpu_devs structure. | 623 | * They will insert themselves into the cpu_devs structure. |
600 | * Then, when cpu_init() is called, we can just iterate over that array. | 624 | * Then, when cpu_init() is called, we can just iterate over that array. |
601 | */ | 625 | */ |
602 | |||
603 | extern int intel_cpu_init(void); | ||
604 | extern int cyrix_init_cpu(void); | ||
605 | extern int nsc_init_cpu(void); | ||
606 | extern int amd_init_cpu(void); | ||
607 | extern int centaur_init_cpu(void); | ||
608 | extern int transmeta_init_cpu(void); | ||
609 | extern int nexgen_init_cpu(void); | ||
610 | extern int umc_init_cpu(void); | ||
611 | |||
612 | void __init early_cpu_init(void) | 626 | void __init early_cpu_init(void) |
613 | { | 627 | { |
614 | intel_cpu_init(); | 628 | intel_cpu_init(); |
@@ -620,21 +634,13 @@ void __init early_cpu_init(void) | |||
620 | nexgen_init_cpu(); | 634 | nexgen_init_cpu(); |
621 | umc_init_cpu(); | 635 | umc_init_cpu(); |
622 | early_cpu_detect(); | 636 | early_cpu_detect(); |
623 | |||
624 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
625 | /* pse is not compatible with on-the-fly unmapping, | ||
626 | * disable it even if the cpus claim to support it. | ||
627 | */ | ||
628 | clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); | ||
629 | disable_pse = 1; | ||
630 | #endif | ||
631 | } | 637 | } |
632 | 638 | ||
633 | /* Make sure %fs is initialized properly in idle threads */ | 639 | /* Make sure %fs is initialized properly in idle threads */ |
634 | struct pt_regs * __devinit idle_regs(struct pt_regs *regs) | 640 | struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) |
635 | { | 641 | { |
636 | memset(regs, 0, sizeof(struct pt_regs)); | 642 | memset(regs, 0, sizeof(struct pt_regs)); |
637 | regs->xfs = __KERNEL_PERCPU; | 643 | regs->fs = __KERNEL_PERCPU; |
638 | return regs; | 644 | return regs; |
639 | } | 645 | } |
640 | 646 | ||
@@ -642,7 +648,7 @@ struct pt_regs * __devinit idle_regs(struct pt_regs *regs) | |||
642 | * it's on the real one. */ | 648 | * it's on the real one. */ |
643 | void switch_to_new_gdt(void) | 649 | void switch_to_new_gdt(void) |
644 | { | 650 | { |
645 | struct Xgt_desc_struct gdt_descr; | 651 | struct desc_ptr gdt_descr; |
646 | 652 | ||
647 | gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); | 653 | gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); |
648 | gdt_descr.size = GDT_SIZE - 1; | 654 | gdt_descr.size = GDT_SIZE - 1; |
@@ -672,12 +678,6 @@ void __cpuinit cpu_init(void) | |||
672 | 678 | ||
673 | if (cpu_has_vme || cpu_has_tsc || cpu_has_de) | 679 | if (cpu_has_vme || cpu_has_tsc || cpu_has_de) |
674 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | 680 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); |
675 | if (tsc_disable && cpu_has_tsc) { | ||
676 | printk(KERN_NOTICE "Disabling TSC...\n"); | ||
677 | /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ | ||
678 | clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); | ||
679 | set_in_cr4(X86_CR4_TSD); | ||
680 | } | ||
681 | 681 | ||
682 | load_idt(&idt_descr); | 682 | load_idt(&idt_descr); |
683 | switch_to_new_gdt(); | 683 | switch_to_new_gdt(); |
@@ -691,7 +691,7 @@ void __cpuinit cpu_init(void) | |||
691 | BUG(); | 691 | BUG(); |
692 | enter_lazy_tlb(&init_mm, curr); | 692 | enter_lazy_tlb(&init_mm, curr); |
693 | 693 | ||
694 | load_esp0(t, thread); | 694 | load_sp0(t, thread); |
695 | set_tss_desc(cpu,t); | 695 | set_tss_desc(cpu,t); |
696 | load_TR_desc(); | 696 | load_TR_desc(); |
697 | load_LDT(&init_mm.context); | 697 | load_LDT(&init_mm.context); |
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 2f6432cef6ff..e0b38c33d842 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h | |||
@@ -24,5 +24,15 @@ extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; | |||
24 | extern int get_model_name(struct cpuinfo_x86 *c); | 24 | extern int get_model_name(struct cpuinfo_x86 *c); |
25 | extern void display_cacheinfo(struct cpuinfo_x86 *c); | 25 | extern void display_cacheinfo(struct cpuinfo_x86 *c); |
26 | 26 | ||
27 | extern void early_intel_workaround(struct cpuinfo_x86 *c); | 27 | extern void early_init_intel(struct cpuinfo_x86 *c); |
28 | 28 | extern void early_init_amd(struct cpuinfo_x86 *c); | |
29 | |||
30 | /* Specific CPU type init functions */ | ||
31 | int intel_cpu_init(void); | ||
32 | int amd_init_cpu(void); | ||
33 | int cyrix_init_cpu(void); | ||
34 | int nsc_init_cpu(void); | ||
35 | int centaur_init_cpu(void); | ||
36 | int transmeta_init_cpu(void); | ||
37 | int nexgen_init_cpu(void); | ||
38 | int umc_init_cpu(void); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index fea0af0476b9..a962dcb9c408 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -67,7 +67,8 @@ struct acpi_cpufreq_data { | |||
67 | unsigned int cpu_feature; | 67 | unsigned int cpu_feature; |
68 | }; | 68 | }; |
69 | 69 | ||
70 | static struct acpi_cpufreq_data *drv_data[NR_CPUS]; | 70 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); |
71 | |||
71 | /* acpi_perf_data is a pointer to percpu data. */ | 72 | /* acpi_perf_data is a pointer to percpu data. */ |
72 | static struct acpi_processor_performance *acpi_perf_data; | 73 | static struct acpi_processor_performance *acpi_perf_data; |
73 | 74 | ||
@@ -218,14 +219,14 @@ static u32 get_cur_val(cpumask_t mask) | |||
218 | if (unlikely(cpus_empty(mask))) | 219 | if (unlikely(cpus_empty(mask))) |
219 | return 0; | 220 | return 0; |
220 | 221 | ||
221 | switch (drv_data[first_cpu(mask)]->cpu_feature) { | 222 | switch (per_cpu(drv_data, first_cpu(mask))->cpu_feature) { |
222 | case SYSTEM_INTEL_MSR_CAPABLE: | 223 | case SYSTEM_INTEL_MSR_CAPABLE: |
223 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; | 224 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; |
224 | cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; | 225 | cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; |
225 | break; | 226 | break; |
226 | case SYSTEM_IO_CAPABLE: | 227 | case SYSTEM_IO_CAPABLE: |
227 | cmd.type = SYSTEM_IO_CAPABLE; | 228 | cmd.type = SYSTEM_IO_CAPABLE; |
228 | perf = drv_data[first_cpu(mask)]->acpi_data; | 229 | perf = per_cpu(drv_data, first_cpu(mask))->acpi_data; |
229 | cmd.addr.io.port = perf->control_register.address; | 230 | cmd.addr.io.port = perf->control_register.address; |
230 | cmd.addr.io.bit_width = perf->control_register.bit_width; | 231 | cmd.addr.io.bit_width = perf->control_register.bit_width; |
231 | break; | 232 | break; |
@@ -325,7 +326,7 @@ static unsigned int get_measured_perf(unsigned int cpu) | |||
325 | 326 | ||
326 | #endif | 327 | #endif |
327 | 328 | ||
328 | retval = drv_data[cpu]->max_freq * perf_percent / 100; | 329 | retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100; |
329 | 330 | ||
330 | put_cpu(); | 331 | put_cpu(); |
331 | set_cpus_allowed(current, saved_mask); | 332 | set_cpus_allowed(current, saved_mask); |
@@ -336,7 +337,7 @@ static unsigned int get_measured_perf(unsigned int cpu) | |||
336 | 337 | ||
337 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) | 338 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) |
338 | { | 339 | { |
339 | struct acpi_cpufreq_data *data = drv_data[cpu]; | 340 | struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); |
340 | unsigned int freq; | 341 | unsigned int freq; |
341 | 342 | ||
342 | dprintk("get_cur_freq_on_cpu (%d)\n", cpu); | 343 | dprintk("get_cur_freq_on_cpu (%d)\n", cpu); |
@@ -370,7 +371,7 @@ static unsigned int check_freqs(cpumask_t mask, unsigned int freq, | |||
370 | static int acpi_cpufreq_target(struct cpufreq_policy *policy, | 371 | static int acpi_cpufreq_target(struct cpufreq_policy *policy, |
371 | unsigned int target_freq, unsigned int relation) | 372 | unsigned int target_freq, unsigned int relation) |
372 | { | 373 | { |
373 | struct acpi_cpufreq_data *data = drv_data[policy->cpu]; | 374 | struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); |
374 | struct acpi_processor_performance *perf; | 375 | struct acpi_processor_performance *perf; |
375 | struct cpufreq_freqs freqs; | 376 | struct cpufreq_freqs freqs; |
376 | cpumask_t online_policy_cpus; | 377 | cpumask_t online_policy_cpus; |
@@ -466,7 +467,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
466 | 467 | ||
467 | static int acpi_cpufreq_verify(struct cpufreq_policy *policy) | 468 | static int acpi_cpufreq_verify(struct cpufreq_policy *policy) |
468 | { | 469 | { |
469 | struct acpi_cpufreq_data *data = drv_data[policy->cpu]; | 470 | struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); |
470 | 471 | ||
471 | dprintk("acpi_cpufreq_verify\n"); | 472 | dprintk("acpi_cpufreq_verify\n"); |
472 | 473 | ||
@@ -570,7 +571,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
570 | return -ENOMEM; | 571 | return -ENOMEM; |
571 | 572 | ||
572 | data->acpi_data = percpu_ptr(acpi_perf_data, cpu); | 573 | data->acpi_data = percpu_ptr(acpi_perf_data, cpu); |
573 | drv_data[cpu] = data; | 574 | per_cpu(drv_data, cpu) = data; |
574 | 575 | ||
575 | if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) | 576 | if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) |
576 | acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; | 577 | acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; |
@@ -714,20 +715,20 @@ err_unreg: | |||
714 | acpi_processor_unregister_performance(perf, cpu); | 715 | acpi_processor_unregister_performance(perf, cpu); |
715 | err_free: | 716 | err_free: |
716 | kfree(data); | 717 | kfree(data); |
717 | drv_data[cpu] = NULL; | 718 | per_cpu(drv_data, cpu) = NULL; |
718 | 719 | ||
719 | return result; | 720 | return result; |
720 | } | 721 | } |
721 | 722 | ||
722 | static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) | 723 | static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) |
723 | { | 724 | { |
724 | struct acpi_cpufreq_data *data = drv_data[policy->cpu]; | 725 | struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); |
725 | 726 | ||
726 | dprintk("acpi_cpufreq_cpu_exit\n"); | 727 | dprintk("acpi_cpufreq_cpu_exit\n"); |
727 | 728 | ||
728 | if (data) { | 729 | if (data) { |
729 | cpufreq_frequency_table_put_attr(policy->cpu); | 730 | cpufreq_frequency_table_put_attr(policy->cpu); |
730 | drv_data[policy->cpu] = NULL; | 731 | per_cpu(drv_data, policy->cpu) = NULL; |
731 | acpi_processor_unregister_performance(data->acpi_data, | 732 | acpi_processor_unregister_performance(data->acpi_data, |
732 | policy->cpu); | 733 | policy->cpu); |
733 | kfree(data); | 734 | kfree(data); |
@@ -738,7 +739,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) | |||
738 | 739 | ||
739 | static int acpi_cpufreq_resume(struct cpufreq_policy *policy) | 740 | static int acpi_cpufreq_resume(struct cpufreq_policy *policy) |
740 | { | 741 | { |
741 | struct acpi_cpufreq_data *data = drv_data[policy->cpu]; | 742 | struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); |
742 | 743 | ||
743 | dprintk("acpi_cpufreq_resume\n"); | 744 | dprintk("acpi_cpufreq_resume\n"); |
744 | 745 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 749d00cb2ebd..06fcce516d51 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c | |||
@@ -694,7 +694,7 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle, | |||
694 | if ( acpi_bus_get_device(obj_handle, &d) ) { | 694 | if ( acpi_bus_get_device(obj_handle, &d) ) { |
695 | return 0; | 695 | return 0; |
696 | } | 696 | } |
697 | *return_value = (void *)acpi_driver_data(d); | 697 | *return_value = acpi_driver_data(d); |
698 | return 1; | 698 | return 1; |
699 | } | 699 | } |
700 | 700 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 99e1ef9939be..5affe91ca1e5 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -52,7 +52,7 @@ | |||
52 | /* serialize freq changes */ | 52 | /* serialize freq changes */ |
53 | static DEFINE_MUTEX(fidvid_mutex); | 53 | static DEFINE_MUTEX(fidvid_mutex); |
54 | 54 | ||
55 | static struct powernow_k8_data *powernow_data[NR_CPUS]; | 55 | static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data); |
56 | 56 | ||
57 | static int cpu_family = CPU_OPTERON; | 57 | static int cpu_family = CPU_OPTERON; |
58 | 58 | ||
@@ -827,7 +827,6 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf | |||
827 | 827 | ||
828 | for (i = 0; i < data->acpi_data.state_count; i++) { | 828 | for (i = 0; i < data->acpi_data.state_count; i++) { |
829 | u32 index; | 829 | u32 index; |
830 | u32 hi = 0, lo = 0; | ||
831 | 830 | ||
832 | index = data->acpi_data.states[i].control & HW_PSTATE_MASK; | 831 | index = data->acpi_data.states[i].control & HW_PSTATE_MASK; |
833 | if (index > data->max_hw_pstate) { | 832 | if (index > data->max_hw_pstate) { |
@@ -1018,7 +1017,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i | |||
1018 | static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) | 1017 | static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) |
1019 | { | 1018 | { |
1020 | cpumask_t oldmask = CPU_MASK_ALL; | 1019 | cpumask_t oldmask = CPU_MASK_ALL; |
1021 | struct powernow_k8_data *data = powernow_data[pol->cpu]; | 1020 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); |
1022 | u32 checkfid; | 1021 | u32 checkfid; |
1023 | u32 checkvid; | 1022 | u32 checkvid; |
1024 | unsigned int newstate; | 1023 | unsigned int newstate; |
@@ -1094,7 +1093,7 @@ err_out: | |||
1094 | /* Driver entry point to verify the policy and range of frequencies */ | 1093 | /* Driver entry point to verify the policy and range of frequencies */ |
1095 | static int powernowk8_verify(struct cpufreq_policy *pol) | 1094 | static int powernowk8_verify(struct cpufreq_policy *pol) |
1096 | { | 1095 | { |
1097 | struct powernow_k8_data *data = powernow_data[pol->cpu]; | 1096 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); |
1098 | 1097 | ||
1099 | if (!data) | 1098 | if (!data) |
1100 | return -EINVAL; | 1099 | return -EINVAL; |
@@ -1202,7 +1201,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1202 | dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", | 1201 | dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", |
1203 | data->currfid, data->currvid); | 1202 | data->currfid, data->currvid); |
1204 | 1203 | ||
1205 | powernow_data[pol->cpu] = data; | 1204 | per_cpu(powernow_data, pol->cpu) = data; |
1206 | 1205 | ||
1207 | return 0; | 1206 | return 0; |
1208 | 1207 | ||
@@ -1216,7 +1215,7 @@ err_out: | |||
1216 | 1215 | ||
1217 | static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol) | 1216 | static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol) |
1218 | { | 1217 | { |
1219 | struct powernow_k8_data *data = powernow_data[pol->cpu]; | 1218 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); |
1220 | 1219 | ||
1221 | if (!data) | 1220 | if (!data) |
1222 | return -EINVAL; | 1221 | return -EINVAL; |
@@ -1237,7 +1236,7 @@ static unsigned int powernowk8_get (unsigned int cpu) | |||
1237 | cpumask_t oldmask = current->cpus_allowed; | 1236 | cpumask_t oldmask = current->cpus_allowed; |
1238 | unsigned int khz = 0; | 1237 | unsigned int khz = 0; |
1239 | 1238 | ||
1240 | data = powernow_data[first_cpu(per_cpu(cpu_core_map, cpu))]; | 1239 | data = per_cpu(powernow_data, first_cpu(per_cpu(cpu_core_map, cpu))); |
1241 | 1240 | ||
1242 | if (!data) | 1241 | if (!data) |
1243 | return -EINVAL; | 1242 | return -EINVAL; |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index 76c3ab0da468..98d4fdb7dc04 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | |||
@@ -189,10 +189,7 @@ static unsigned int pentium4_get_frequency(void) | |||
189 | printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to <linux@brodo.de>\n"); | 189 | printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to <linux@brodo.de>\n"); |
190 | 190 | ||
191 | /* Multiplier. */ | 191 | /* Multiplier. */ |
192 | if (c->x86_model < 2) | 192 | mult = msr_lo >> 24; |
193 | mult = msr_lo >> 27; | ||
194 | else | ||
195 | mult = msr_lo >> 24; | ||
196 | 193 | ||
197 | dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult)); | 194 | dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult)); |
198 | 195 | ||
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 88d66fb8411d..7139b0262703 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <asm/dma.h> | 5 | #include <asm/dma.h> |
6 | #include <asm/io.h> | 6 | #include <asm/io.h> |
7 | #include <asm/processor-cyrix.h> | 7 | #include <asm/processor-cyrix.h> |
8 | #include <asm/processor-flags.h> | ||
8 | #include <asm/timer.h> | 9 | #include <asm/timer.h> |
9 | #include <asm/pci-direct.h> | 10 | #include <asm/pci-direct.h> |
10 | #include <asm/tsc.h> | 11 | #include <asm/tsc.h> |
@@ -82,8 +83,6 @@ static char cyrix_model_mult2[] __cpuinitdata = "12233445"; | |||
82 | * FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP | 83 | * FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP |
83 | */ | 84 | */ |
84 | 85 | ||
85 | extern void calibrate_delay(void) __init; | ||
86 | |||
87 | static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c) | 86 | static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c) |
88 | { | 87 | { |
89 | unsigned long flags; | 88 | unsigned long flags; |
@@ -126,15 +125,12 @@ static void __cpuinit set_cx86_reorder(void) | |||
126 | 125 | ||
127 | static void __cpuinit set_cx86_memwb(void) | 126 | static void __cpuinit set_cx86_memwb(void) |
128 | { | 127 | { |
129 | u32 cr0; | ||
130 | |||
131 | printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); | 128 | printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); |
132 | 129 | ||
133 | /* CCR2 bit 2: unlock NW bit */ | 130 | /* CCR2 bit 2: unlock NW bit */ |
134 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); | 131 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); |
135 | /* set 'Not Write-through' */ | 132 | /* set 'Not Write-through' */ |
136 | cr0 = 0x20000000; | 133 | write_cr0(read_cr0() | X86_CR0_NW); |
137 | write_cr0(read_cr0() | cr0); | ||
138 | /* CCR2 bit 2: lock NW bit and set WT1 */ | 134 | /* CCR2 bit 2: lock NW bit and set WT1 */ |
139 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); | 135 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); |
140 | } | 136 | } |
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c new file mode 100644 index 000000000000..ee975ac6bbcb --- /dev/null +++ b/arch/x86/kernel/cpu/feature_names.c | |||
@@ -0,0 +1,83 @@ | |||
1 | /* | ||
2 | * Strings for the various x86 capability flags. | ||
3 | * | ||
4 | * This file must not contain any executable code. | ||
5 | */ | ||
6 | |||
7 | #include "asm/cpufeature.h" | ||
8 | |||
9 | /* | ||
10 | * These flag bits must match the definitions in <asm/cpufeature.h>. | ||
11 | * NULL means this bit is undefined or reserved; either way it doesn't | ||
12 | * have meaning as far as Linux is concerned. Note that it's important | ||
13 | * to realize there is a difference between this table and CPUID -- if | ||
14 | * applications want to get the raw CPUID data, they should access | ||
15 | * /dev/cpu/<cpu_nr>/cpuid instead. | ||
16 | */ | ||
17 | const char * const x86_cap_flags[NCAPINTS*32] = { | ||
18 | /* Intel-defined */ | ||
19 | "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", | ||
20 | "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", | ||
21 | "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", | ||
22 | "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", | ||
23 | |||
24 | /* AMD-defined */ | ||
25 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
26 | NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, | ||
27 | NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, | ||
28 | NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", | ||
29 | "3dnowext", "3dnow", | ||
30 | |||
31 | /* Transmeta-defined */ | ||
32 | "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, | ||
33 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
34 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
35 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
36 | |||
37 | /* Other (Linux-defined) */ | ||
38 | "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", | ||
39 | NULL, NULL, NULL, NULL, | ||
40 | "constant_tsc", "up", NULL, "arch_perfmon", | ||
41 | "pebs", "bts", NULL, NULL, | ||
42 | "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
43 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
44 | |||
45 | /* Intel-defined (#2) */ | ||
46 | "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", | ||
47 | "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, | ||
48 | NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", | ||
49 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
50 | |||
51 | /* VIA/Cyrix/Centaur-defined */ | ||
52 | NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", | ||
53 | "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, | ||
54 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
55 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
56 | |||
57 | /* AMD-defined (#2) */ | ||
58 | "lahf_lm", "cmp_legacy", "svm", "extapic", | ||
59 | "cr8_legacy", "abm", "sse4a", "misalignsse", | ||
60 | "3dnowprefetch", "osvw", "ibs", "sse5", | ||
61 | "skinit", "wdt", NULL, NULL, | ||
62 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
63 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
64 | |||
65 | /* Auxiliary (Linux-defined) */ | ||
66 | "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
67 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
68 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
69 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
70 | }; | ||
71 | |||
72 | const char *const x86_power_flags[32] = { | ||
73 | "ts", /* temperature sensor */ | ||
74 | "fid", /* frequency id control */ | ||
75 | "vid", /* voltage id control */ | ||
76 | "ttp", /* thermal trip */ | ||
77 | "tm", | ||
78 | "stc", | ||
79 | "100mhzsteps", | ||
80 | "hwpstate", | ||
81 | "", /* tsc invariant mapped to constant_tsc */ | ||
82 | /* nothing */ | ||
83 | }; | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index cc8c501b9f39..fae31ce747bd 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -11,6 +11,9 @@ | |||
11 | #include <asm/pgtable.h> | 11 | #include <asm/pgtable.h> |
12 | #include <asm/msr.h> | 12 | #include <asm/msr.h> |
13 | #include <asm/uaccess.h> | 13 | #include <asm/uaccess.h> |
14 | #include <asm/ptrace.h> | ||
15 | #include <asm/ds.h> | ||
16 | #include <asm/bugs.h> | ||
14 | 17 | ||
15 | #include "cpu.h" | 18 | #include "cpu.h" |
16 | 19 | ||
@@ -27,13 +30,14 @@ | |||
27 | struct movsl_mask movsl_mask __read_mostly; | 30 | struct movsl_mask movsl_mask __read_mostly; |
28 | #endif | 31 | #endif |
29 | 32 | ||
30 | void __cpuinit early_intel_workaround(struct cpuinfo_x86 *c) | 33 | void __cpuinit early_init_intel(struct cpuinfo_x86 *c) |
31 | { | 34 | { |
32 | if (c->x86_vendor != X86_VENDOR_INTEL) | ||
33 | return; | ||
34 | /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ | 35 | /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ |
35 | if (c->x86 == 15 && c->x86_cache_alignment == 64) | 36 | if (c->x86 == 15 && c->x86_cache_alignment == 64) |
36 | c->x86_cache_alignment = 128; | 37 | c->x86_cache_alignment = 128; |
38 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || | ||
39 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) | ||
40 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
37 | } | 41 | } |
38 | 42 | ||
39 | /* | 43 | /* |
@@ -113,6 +117,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
113 | unsigned int l2 = 0; | 117 | unsigned int l2 = 0; |
114 | char *p = NULL; | 118 | char *p = NULL; |
115 | 119 | ||
120 | early_init_intel(c); | ||
121 | |||
116 | #ifdef CONFIG_X86_F00F_BUG | 122 | #ifdef CONFIG_X86_F00F_BUG |
117 | /* | 123 | /* |
118 | * All current models of Pentium and Pentium with MMX technology CPUs | 124 | * All current models of Pentium and Pentium with MMX technology CPUs |
@@ -132,7 +138,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
132 | } | 138 | } |
133 | #endif | 139 | #endif |
134 | 140 | ||
135 | select_idle_routine(c); | ||
136 | l2 = init_intel_cacheinfo(c); | 141 | l2 = init_intel_cacheinfo(c); |
137 | if (c->cpuid_level > 9 ) { | 142 | if (c->cpuid_level > 9 ) { |
138 | unsigned eax = cpuid_eax(10); | 143 | unsigned eax = cpuid_eax(10); |
@@ -201,16 +206,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
201 | } | 206 | } |
202 | #endif | 207 | #endif |
203 | 208 | ||
209 | if (cpu_has_xmm2) | ||
210 | set_bit(X86_FEATURE_LFENCE_RDTSC, c->x86_capability); | ||
204 | if (c->x86 == 15) { | 211 | if (c->x86 == 15) { |
205 | set_bit(X86_FEATURE_P4, c->x86_capability); | 212 | set_bit(X86_FEATURE_P4, c->x86_capability); |
206 | set_bit(X86_FEATURE_SYNC_RDTSC, c->x86_capability); | ||
207 | } | 213 | } |
208 | if (c->x86 == 6) | 214 | if (c->x86 == 6) |
209 | set_bit(X86_FEATURE_P3, c->x86_capability); | 215 | set_bit(X86_FEATURE_P3, c->x86_capability); |
210 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || | ||
211 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) | ||
212 | set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); | ||
213 | |||
214 | if (cpu_has_ds) { | 216 | if (cpu_has_ds) { |
215 | unsigned int l1; | 217 | unsigned int l1; |
216 | rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); | 218 | rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); |
@@ -219,6 +221,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
219 | if (!(l1 & (1<<12))) | 221 | if (!(l1 & (1<<12))) |
220 | set_bit(X86_FEATURE_PEBS, c->x86_capability); | 222 | set_bit(X86_FEATURE_PEBS, c->x86_capability); |
221 | } | 223 | } |
224 | |||
225 | if (cpu_has_bts) | ||
226 | ds_init_intel(c); | ||
222 | } | 227 | } |
223 | 228 | ||
224 | static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) | 229 | static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) |
@@ -342,5 +347,22 @@ unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) | |||
342 | EXPORT_SYMBOL(cmpxchg_386_u32); | 347 | EXPORT_SYMBOL(cmpxchg_386_u32); |
343 | #endif | 348 | #endif |
344 | 349 | ||
350 | #ifndef CONFIG_X86_CMPXCHG64 | ||
351 | unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) | ||
352 | { | ||
353 | u64 prev; | ||
354 | unsigned long flags; | ||
355 | |||
356 | /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ | ||
357 | local_irq_save(flags); | ||
358 | prev = *(u64 *)ptr; | ||
359 | if (prev == old) | ||
360 | *(u64 *)ptr = new; | ||
361 | local_irq_restore(flags); | ||
362 | return prev; | ||
363 | } | ||
364 | EXPORT_SYMBOL(cmpxchg_486_u64); | ||
365 | #endif | ||
366 | |||
345 | // arch_initcall(intel_cpu_init); | 367 | // arch_initcall(intel_cpu_init); |
346 | 368 | ||
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 9f530ff43c21..1b889860eb73 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -352,8 +352,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
352 | */ | 352 | */ |
353 | if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { | 353 | if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { |
354 | /* supports eax=2 call */ | 354 | /* supports eax=2 call */ |
355 | int i, j, n; | 355 | int j, n; |
356 | int regs[4]; | 356 | unsigned int regs[4]; |
357 | unsigned char *dp = (unsigned char *)regs; | 357 | unsigned char *dp = (unsigned char *)regs; |
358 | int only_trace = 0; | 358 | int only_trace = 0; |
359 | 359 | ||
@@ -368,7 +368,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
368 | 368 | ||
369 | /* If bit 31 is set, this is an unknown format */ | 369 | /* If bit 31 is set, this is an unknown format */ |
370 | for ( j = 0 ; j < 3 ; j++ ) { | 370 | for ( j = 0 ; j < 3 ; j++ ) { |
371 | if ( regs[j] < 0 ) regs[j] = 0; | 371 | if (regs[j] & (1 << 31)) regs[j] = 0; |
372 | } | 372 | } |
373 | 373 | ||
374 | /* Byte 0 is level count, not a descriptor */ | 374 | /* Byte 0 is level count, not a descriptor */ |
@@ -733,10 +733,8 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
733 | if (unlikely(retval < 0)) | 733 | if (unlikely(retval < 0)) |
734 | return retval; | 734 | return retval; |
735 | 735 | ||
736 | cache_kobject[cpu]->parent = &sys_dev->kobj; | 736 | retval = kobject_init_and_add(cache_kobject[cpu], &ktype_percpu_entry, |
737 | kobject_set_name(cache_kobject[cpu], "%s", "cache"); | 737 | &sys_dev->kobj, "%s", "cache"); |
738 | cache_kobject[cpu]->ktype = &ktype_percpu_entry; | ||
739 | retval = kobject_register(cache_kobject[cpu]); | ||
740 | if (retval < 0) { | 738 | if (retval < 0) { |
741 | cpuid4_cache_sysfs_exit(cpu); | 739 | cpuid4_cache_sysfs_exit(cpu); |
742 | return retval; | 740 | return retval; |
@@ -746,23 +744,23 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
746 | this_object = INDEX_KOBJECT_PTR(cpu,i); | 744 | this_object = INDEX_KOBJECT_PTR(cpu,i); |
747 | this_object->cpu = cpu; | 745 | this_object->cpu = cpu; |
748 | this_object->index = i; | 746 | this_object->index = i; |
749 | this_object->kobj.parent = cache_kobject[cpu]; | 747 | retval = kobject_init_and_add(&(this_object->kobj), |
750 | kobject_set_name(&(this_object->kobj), "index%1lu", i); | 748 | &ktype_cache, cache_kobject[cpu], |
751 | this_object->kobj.ktype = &ktype_cache; | 749 | "index%1lu", i); |
752 | retval = kobject_register(&(this_object->kobj)); | ||
753 | if (unlikely(retval)) { | 750 | if (unlikely(retval)) { |
754 | for (j = 0; j < i; j++) { | 751 | for (j = 0; j < i; j++) { |
755 | kobject_unregister( | 752 | kobject_put(&(INDEX_KOBJECT_PTR(cpu,j)->kobj)); |
756 | &(INDEX_KOBJECT_PTR(cpu,j)->kobj)); | ||
757 | } | 753 | } |
758 | kobject_unregister(cache_kobject[cpu]); | 754 | kobject_put(cache_kobject[cpu]); |
759 | cpuid4_cache_sysfs_exit(cpu); | 755 | cpuid4_cache_sysfs_exit(cpu); |
760 | break; | 756 | break; |
761 | } | 757 | } |
758 | kobject_uevent(&(this_object->kobj), KOBJ_ADD); | ||
762 | } | 759 | } |
763 | if (!retval) | 760 | if (!retval) |
764 | cpu_set(cpu, cache_dev_map); | 761 | cpu_set(cpu, cache_dev_map); |
765 | 762 | ||
763 | kobject_uevent(cache_kobject[cpu], KOBJ_ADD); | ||
766 | return retval; | 764 | return retval; |
767 | } | 765 | } |
768 | 766 | ||
@@ -778,8 +776,8 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) | |||
778 | cpu_clear(cpu, cache_dev_map); | 776 | cpu_clear(cpu, cache_dev_map); |
779 | 777 | ||
780 | for (i = 0; i < num_cache_leaves; i++) | 778 | for (i = 0; i < num_cache_leaves; i++) |
781 | kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); | 779 | kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); |
782 | kobject_unregister(cache_kobject[cpu]); | 780 | kobject_put(cache_kobject[cpu]); |
783 | cpuid4_cache_sysfs_exit(cpu); | 781 | cpuid4_cache_sysfs_exit(cpu); |
784 | } | 782 | } |
785 | 783 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c index eef63e3630c2..e633c9c2b764 100644 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ b/arch/x86/kernel/cpu/mcheck/k7.c | |||
@@ -16,7 +16,7 @@ | |||
16 | #include "mce.h" | 16 | #include "mce.h" |
17 | 17 | ||
18 | /* Machine Check Handler For AMD Athlon/Duron */ | 18 | /* Machine Check Handler For AMD Athlon/Duron */ |
19 | static fastcall void k7_machine_check(struct pt_regs * regs, long error_code) | 19 | static void k7_machine_check(struct pt_regs * regs, long error_code) |
20 | { | 20 | { |
21 | int recover=1; | 21 | int recover=1; |
22 | u32 alow, ahigh, high, low; | 22 | u32 alow, ahigh, high, low; |
@@ -27,29 +27,32 @@ static fastcall void k7_machine_check(struct pt_regs * regs, long error_code) | |||
27 | if (mcgstl & (1<<0)) /* Recoverable ? */ | 27 | if (mcgstl & (1<<0)) /* Recoverable ? */ |
28 | recover=0; | 28 | recover=0; |
29 | 29 | ||
30 | printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | 30 | printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", |
31 | smp_processor_id(), mcgsth, mcgstl); | 31 | smp_processor_id(), mcgsth, mcgstl); |
32 | 32 | ||
33 | for (i=1; i<nr_mce_banks; i++) { | 33 | for (i = 1; i < nr_mce_banks; i++) { |
34 | rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); | 34 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); |
35 | if (high&(1<<31)) { | 35 | if (high&(1<<31)) { |
36 | char misc[20]; | ||
37 | char addr[24]; | ||
38 | misc[0] = addr[0] = '\0'; | ||
36 | if (high & (1<<29)) | 39 | if (high & (1<<29)) |
37 | recover |= 1; | 40 | recover |= 1; |
38 | if (high & (1<<25)) | 41 | if (high & (1<<25)) |
39 | recover |= 2; | 42 | recover |= 2; |
40 | printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); | ||
41 | high &= ~(1<<31); | 43 | high &= ~(1<<31); |
42 | if (high & (1<<27)) { | 44 | if (high & (1<<27)) { |
43 | rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); | 45 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); |
44 | printk ("[%08x%08x]", ahigh, alow); | 46 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); |
45 | } | 47 | } |
46 | if (high & (1<<26)) { | 48 | if (high & (1<<26)) { |
47 | rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | 49 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); |
48 | printk (" at %08x%08x", ahigh, alow); | 50 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); |
49 | } | 51 | } |
50 | printk ("\n"); | 52 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", |
53 | smp_processor_id(), i, high, low, misc, addr); | ||
51 | /* Clear it */ | 54 | /* Clear it */ |
52 | wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); | 55 | wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); |
53 | /* Serialize */ | 56 | /* Serialize */ |
54 | wmb(); | 57 | wmb(); |
55 | add_taint(TAINT_MACHINE_CHECK); | 58 | add_taint(TAINT_MACHINE_CHECK); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h index 81fb6e2d35f3..ae9f628838f1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.h +++ b/arch/x86/kernel/cpu/mcheck/mce.h | |||
@@ -8,7 +8,7 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c); | |||
8 | void winchip_mcheck_init(struct cpuinfo_x86 *c); | 8 | void winchip_mcheck_init(struct cpuinfo_x86 *c); |
9 | 9 | ||
10 | /* Call the installed machine check handler for this CPU setup. */ | 10 | /* Call the installed machine check handler for this CPU setup. */ |
11 | extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code); | 11 | extern void (*machine_check_vector)(struct pt_regs *, long error_code); |
12 | 12 | ||
13 | extern int nr_mce_banks; | 13 | extern int nr_mce_banks; |
14 | 14 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c index 34c781eddee4..a5182dcd94ae 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_32.c +++ b/arch/x86/kernel/cpu/mcheck/mce_32.c | |||
@@ -22,13 +22,13 @@ int nr_mce_banks; | |||
22 | EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ | 22 | EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ |
23 | 23 | ||
24 | /* Handle unconfigured int18 (should never happen) */ | 24 | /* Handle unconfigured int18 (should never happen) */ |
25 | static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_code) | 25 | static void unexpected_machine_check(struct pt_regs * regs, long error_code) |
26 | { | 26 | { |
27 | printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id()); | 27 | printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id()); |
28 | } | 28 | } |
29 | 29 | ||
30 | /* Call the installed machine check handler for this CPU setup. */ | 30 | /* Call the installed machine check handler for this CPU setup. */ |
31 | void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; | 31 | void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; |
32 | 32 | ||
33 | /* This has to be run for each processor */ | 33 | /* This has to be run for each processor */ |
34 | void mcheck_init(struct cpuinfo_x86 *c) | 34 | void mcheck_init(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 4b21d29fb5aa..9a699ed03598 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c | |||
@@ -63,7 +63,7 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | |||
63 | * separate MCEs from kernel messages to avoid bogus bug reports. | 63 | * separate MCEs from kernel messages to avoid bogus bug reports. |
64 | */ | 64 | */ |
65 | 65 | ||
66 | struct mce_log mcelog = { | 66 | static struct mce_log mcelog = { |
67 | MCE_LOG_SIGNATURE, | 67 | MCE_LOG_SIGNATURE, |
68 | MCE_LOG_LEN, | 68 | MCE_LOG_LEN, |
69 | }; | 69 | }; |
@@ -80,7 +80,7 @@ void mce_log(struct mce *mce) | |||
80 | /* When the buffer fills up discard new entries. Assume | 80 | /* When the buffer fills up discard new entries. Assume |
81 | that the earlier errors are the more interesting. */ | 81 | that the earlier errors are the more interesting. */ |
82 | if (entry >= MCE_LOG_LEN) { | 82 | if (entry >= MCE_LOG_LEN) { |
83 | set_bit(MCE_OVERFLOW, &mcelog.flags); | 83 | set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); |
84 | return; | 84 | return; |
85 | } | 85 | } |
86 | /* Old left over entry. Skip. */ | 86 | /* Old left over entry. Skip. */ |
@@ -110,12 +110,12 @@ static void print_mce(struct mce *m) | |||
110 | KERN_EMERG | 110 | KERN_EMERG |
111 | "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", | 111 | "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", |
112 | m->cpu, m->mcgstatus, m->bank, m->status); | 112 | m->cpu, m->mcgstatus, m->bank, m->status); |
113 | if (m->rip) { | 113 | if (m->ip) { |
114 | printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", | 114 | printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", |
115 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", | 115 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", |
116 | m->cs, m->rip); | 116 | m->cs, m->ip); |
117 | if (m->cs == __KERNEL_CS) | 117 | if (m->cs == __KERNEL_CS) |
118 | print_symbol("{%s}", m->rip); | 118 | print_symbol("{%s}", m->ip); |
119 | printk("\n"); | 119 | printk("\n"); |
120 | } | 120 | } |
121 | printk(KERN_EMERG "TSC %Lx ", m->tsc); | 121 | printk(KERN_EMERG "TSC %Lx ", m->tsc); |
@@ -156,16 +156,16 @@ static int mce_available(struct cpuinfo_x86 *c) | |||
156 | static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) | 156 | static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) |
157 | { | 157 | { |
158 | if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) { | 158 | if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) { |
159 | m->rip = regs->rip; | 159 | m->ip = regs->ip; |
160 | m->cs = regs->cs; | 160 | m->cs = regs->cs; |
161 | } else { | 161 | } else { |
162 | m->rip = 0; | 162 | m->ip = 0; |
163 | m->cs = 0; | 163 | m->cs = 0; |
164 | } | 164 | } |
165 | if (rip_msr) { | 165 | if (rip_msr) { |
166 | /* Assume the RIP in the MSR is exact. Is this true? */ | 166 | /* Assume the RIP in the MSR is exact. Is this true? */ |
167 | m->mcgstatus |= MCG_STATUS_EIPV; | 167 | m->mcgstatus |= MCG_STATUS_EIPV; |
168 | rdmsrl(rip_msr, m->rip); | 168 | rdmsrl(rip_msr, m->ip); |
169 | m->cs = 0; | 169 | m->cs = 0; |
170 | } | 170 | } |
171 | } | 171 | } |
@@ -192,10 +192,10 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
192 | 192 | ||
193 | atomic_inc(&mce_entry); | 193 | atomic_inc(&mce_entry); |
194 | 194 | ||
195 | if (regs) | 195 | if ((regs |
196 | notify_die(DIE_NMI, "machine check", regs, error_code, 18, | 196 | && notify_die(DIE_NMI, "machine check", regs, error_code, |
197 | SIGKILL); | 197 | 18, SIGKILL) == NOTIFY_STOP) |
198 | if (!banks) | 198 | || !banks) |
199 | goto out2; | 199 | goto out2; |
200 | 200 | ||
201 | memset(&m, 0, sizeof(struct mce)); | 201 | memset(&m, 0, sizeof(struct mce)); |
@@ -288,7 +288,7 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
288 | * instruction which caused the MCE. | 288 | * instruction which caused the MCE. |
289 | */ | 289 | */ |
290 | if (m.mcgstatus & MCG_STATUS_EIPV) | 290 | if (m.mcgstatus & MCG_STATUS_EIPV) |
291 | user_space = panicm.rip && (panicm.cs & 3); | 291 | user_space = panicm.ip && (panicm.cs & 3); |
292 | 292 | ||
293 | /* | 293 | /* |
294 | * If we know that the error was in user space, send a | 294 | * If we know that the error was in user space, send a |
@@ -564,7 +564,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
564 | loff_t *off) | 564 | loff_t *off) |
565 | { | 565 | { |
566 | unsigned long *cpu_tsc; | 566 | unsigned long *cpu_tsc; |
567 | static DECLARE_MUTEX(mce_read_sem); | 567 | static DEFINE_MUTEX(mce_read_mutex); |
568 | unsigned next; | 568 | unsigned next; |
569 | char __user *buf = ubuf; | 569 | char __user *buf = ubuf; |
570 | int i, err; | 570 | int i, err; |
@@ -573,12 +573,12 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
573 | if (!cpu_tsc) | 573 | if (!cpu_tsc) |
574 | return -ENOMEM; | 574 | return -ENOMEM; |
575 | 575 | ||
576 | down(&mce_read_sem); | 576 | mutex_lock(&mce_read_mutex); |
577 | next = rcu_dereference(mcelog.next); | 577 | next = rcu_dereference(mcelog.next); |
578 | 578 | ||
579 | /* Only supports full reads right now */ | 579 | /* Only supports full reads right now */ |
580 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { | 580 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { |
581 | up(&mce_read_sem); | 581 | mutex_unlock(&mce_read_mutex); |
582 | kfree(cpu_tsc); | 582 | kfree(cpu_tsc); |
583 | return -EINVAL; | 583 | return -EINVAL; |
584 | } | 584 | } |
@@ -621,7 +621,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
621 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); | 621 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); |
622 | } | 622 | } |
623 | } | 623 | } |
624 | up(&mce_read_sem); | 624 | mutex_unlock(&mce_read_mutex); |
625 | kfree(cpu_tsc); | 625 | kfree(cpu_tsc); |
626 | return err ? -EFAULT : buf - ubuf; | 626 | return err ? -EFAULT : buf - ubuf; |
627 | } | 627 | } |
@@ -634,8 +634,7 @@ static unsigned int mce_poll(struct file *file, poll_table *wait) | |||
634 | return 0; | 634 | return 0; |
635 | } | 635 | } |
636 | 636 | ||
637 | static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, | 637 | static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) |
638 | unsigned long arg) | ||
639 | { | 638 | { |
640 | int __user *p = (int __user *)arg; | 639 | int __user *p = (int __user *)arg; |
641 | 640 | ||
@@ -664,7 +663,7 @@ static const struct file_operations mce_chrdev_ops = { | |||
664 | .release = mce_release, | 663 | .release = mce_release, |
665 | .read = mce_read, | 664 | .read = mce_read, |
666 | .poll = mce_poll, | 665 | .poll = mce_poll, |
667 | .ioctl = mce_ioctl, | 666 | .unlocked_ioctl = mce_ioctl, |
668 | }; | 667 | }; |
669 | 668 | ||
670 | static struct miscdevice mce_log_device = { | 669 | static struct miscdevice mce_log_device = { |
@@ -745,7 +744,7 @@ static void mce_restart(void) | |||
745 | 744 | ||
746 | static struct sysdev_class mce_sysclass = { | 745 | static struct sysdev_class mce_sysclass = { |
747 | .resume = mce_resume, | 746 | .resume = mce_resume, |
748 | set_kset_name("machinecheck"), | 747 | .name = "machinecheck", |
749 | }; | 748 | }; |
750 | 749 | ||
751 | DEFINE_PER_CPU(struct sys_device, device_mce); | 750 | DEFINE_PER_CPU(struct sys_device, device_mce); |
@@ -855,8 +854,8 @@ static void mce_remove_device(unsigned int cpu) | |||
855 | } | 854 | } |
856 | 855 | ||
857 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | 856 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ |
858 | static int | 857 | static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, |
859 | mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | 858 | unsigned long action, void *hcpu) |
860 | { | 859 | { |
861 | unsigned int cpu = (unsigned long)hcpu; | 860 | unsigned int cpu = (unsigned long)hcpu; |
862 | 861 | ||
@@ -873,7 +872,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
873 | return NOTIFY_OK; | 872 | return NOTIFY_OK; |
874 | } | 873 | } |
875 | 874 | ||
876 | static struct notifier_block mce_cpu_notifier = { | 875 | static struct notifier_block mce_cpu_notifier __cpuinitdata = { |
877 | .notifier_call = mce_cpu_callback, | 876 | .notifier_call = mce_cpu_callback, |
878 | }; | 877 | }; |
879 | 878 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 752fb16a817d..32671da8184e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c | |||
@@ -65,7 +65,7 @@ static struct threshold_block threshold_defaults = { | |||
65 | }; | 65 | }; |
66 | 66 | ||
67 | struct threshold_bank { | 67 | struct threshold_bank { |
68 | struct kobject kobj; | 68 | struct kobject *kobj; |
69 | struct threshold_block *blocks; | 69 | struct threshold_block *blocks; |
70 | cpumask_t cpus; | 70 | cpumask_t cpus; |
71 | }; | 71 | }; |
@@ -118,6 +118,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
118 | { | 118 | { |
119 | unsigned int bank, block; | 119 | unsigned int bank, block; |
120 | unsigned int cpu = smp_processor_id(); | 120 | unsigned int cpu = smp_processor_id(); |
121 | u8 lvt_off; | ||
121 | u32 low = 0, high = 0, address = 0; | 122 | u32 low = 0, high = 0, address = 0; |
122 | 123 | ||
123 | for (bank = 0; bank < NR_BANKS; ++bank) { | 124 | for (bank = 0; bank < NR_BANKS; ++bank) { |
@@ -153,14 +154,13 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
153 | if (shared_bank[bank] && c->cpu_core_id) | 154 | if (shared_bank[bank] && c->cpu_core_id) |
154 | break; | 155 | break; |
155 | #endif | 156 | #endif |
157 | lvt_off = setup_APIC_eilvt_mce(THRESHOLD_APIC_VECTOR, | ||
158 | APIC_EILVT_MSG_FIX, 0); | ||
159 | |||
156 | high &= ~MASK_LVTOFF_HI; | 160 | high &= ~MASK_LVTOFF_HI; |
157 | high |= K8_APIC_EXT_LVT_ENTRY_THRESHOLD << 20; | 161 | high |= lvt_off << 20; |
158 | wrmsr(address, low, high); | 162 | wrmsr(address, low, high); |
159 | 163 | ||
160 | setup_APIC_extended_lvt(K8_APIC_EXT_LVT_ENTRY_THRESHOLD, | ||
161 | THRESHOLD_APIC_VECTOR, | ||
162 | K8_APIC_EXT_INT_MSG_FIX, 0); | ||
163 | |||
164 | threshold_defaults.address = address; | 164 | threshold_defaults.address = address; |
165 | threshold_restart_bank(&threshold_defaults, 0, 0); | 165 | threshold_restart_bank(&threshold_defaults, 0, 0); |
166 | } | 166 | } |
@@ -432,10 +432,9 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, | |||
432 | else | 432 | else |
433 | per_cpu(threshold_banks, cpu)[bank]->blocks = b; | 433 | per_cpu(threshold_banks, cpu)[bank]->blocks = b; |
434 | 434 | ||
435 | kobject_set_name(&b->kobj, "misc%i", block); | 435 | err = kobject_init_and_add(&b->kobj, &threshold_ktype, |
436 | b->kobj.parent = &per_cpu(threshold_banks, cpu)[bank]->kobj; | 436 | per_cpu(threshold_banks, cpu)[bank]->kobj, |
437 | b->kobj.ktype = &threshold_ktype; | 437 | "misc%i", block); |
438 | err = kobject_register(&b->kobj); | ||
439 | if (err) | 438 | if (err) |
440 | goto out_free; | 439 | goto out_free; |
441 | recurse: | 440 | recurse: |
@@ -451,11 +450,14 @@ recurse: | |||
451 | if (err) | 450 | if (err) |
452 | goto out_free; | 451 | goto out_free; |
453 | 452 | ||
453 | if (b) | ||
454 | kobject_uevent(&b->kobj, KOBJ_ADD); | ||
455 | |||
454 | return err; | 456 | return err; |
455 | 457 | ||
456 | out_free: | 458 | out_free: |
457 | if (b) { | 459 | if (b) { |
458 | kobject_unregister(&b->kobj); | 460 | kobject_put(&b->kobj); |
459 | kfree(b); | 461 | kfree(b); |
460 | } | 462 | } |
461 | return err; | 463 | return err; |
@@ -489,7 +491,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
489 | goto out; | 491 | goto out; |
490 | 492 | ||
491 | err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj, | 493 | err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj, |
492 | &b->kobj, name); | 494 | b->kobj, name); |
493 | if (err) | 495 | if (err) |
494 | goto out; | 496 | goto out; |
495 | 497 | ||
@@ -505,16 +507,15 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
505 | goto out; | 507 | goto out; |
506 | } | 508 | } |
507 | 509 | ||
508 | kobject_set_name(&b->kobj, "threshold_bank%i", bank); | 510 | b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); |
509 | b->kobj.parent = &per_cpu(device_mce, cpu).kobj; | 511 | if (!b->kobj) |
512 | goto out_free; | ||
513 | |||
510 | #ifndef CONFIG_SMP | 514 | #ifndef CONFIG_SMP |
511 | b->cpus = CPU_MASK_ALL; | 515 | b->cpus = CPU_MASK_ALL; |
512 | #else | 516 | #else |
513 | b->cpus = per_cpu(cpu_core_map, cpu); | 517 | b->cpus = per_cpu(cpu_core_map, cpu); |
514 | #endif | 518 | #endif |
515 | err = kobject_register(&b->kobj); | ||
516 | if (err) | ||
517 | goto out_free; | ||
518 | 519 | ||
519 | per_cpu(threshold_banks, cpu)[bank] = b; | 520 | per_cpu(threshold_banks, cpu)[bank] = b; |
520 | 521 | ||
@@ -531,7 +532,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
531 | continue; | 532 | continue; |
532 | 533 | ||
533 | err = sysfs_create_link(&per_cpu(device_mce, i).kobj, | 534 | err = sysfs_create_link(&per_cpu(device_mce, i).kobj, |
534 | &b->kobj, name); | 535 | b->kobj, name); |
535 | if (err) | 536 | if (err) |
536 | goto out; | 537 | goto out; |
537 | 538 | ||
@@ -554,7 +555,7 @@ static __cpuinit int threshold_create_device(unsigned int cpu) | |||
554 | int err = 0; | 555 | int err = 0; |
555 | 556 | ||
556 | for (bank = 0; bank < NR_BANKS; ++bank) { | 557 | for (bank = 0; bank < NR_BANKS; ++bank) { |
557 | if (!(per_cpu(bank_map, cpu) & 1 << bank)) | 558 | if (!(per_cpu(bank_map, cpu) & (1 << bank))) |
558 | continue; | 559 | continue; |
559 | err = threshold_create_bank(cpu, bank); | 560 | err = threshold_create_bank(cpu, bank); |
560 | if (err) | 561 | if (err) |
@@ -581,7 +582,7 @@ static void deallocate_threshold_block(unsigned int cpu, | |||
581 | return; | 582 | return; |
582 | 583 | ||
583 | list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) { | 584 | list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) { |
584 | kobject_unregister(&pos->kobj); | 585 | kobject_put(&pos->kobj); |
585 | list_del(&pos->miscj); | 586 | list_del(&pos->miscj); |
586 | kfree(pos); | 587 | kfree(pos); |
587 | } | 588 | } |
@@ -627,7 +628,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
627 | deallocate_threshold_block(cpu, bank); | 628 | deallocate_threshold_block(cpu, bank); |
628 | 629 | ||
629 | free_out: | 630 | free_out: |
630 | kobject_unregister(&b->kobj); | 631 | kobject_put(b->kobj); |
631 | kfree(b); | 632 | kfree(b); |
632 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 633 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
633 | } | 634 | } |
@@ -637,14 +638,14 @@ static void threshold_remove_device(unsigned int cpu) | |||
637 | unsigned int bank; | 638 | unsigned int bank; |
638 | 639 | ||
639 | for (bank = 0; bank < NR_BANKS; ++bank) { | 640 | for (bank = 0; bank < NR_BANKS; ++bank) { |
640 | if (!(per_cpu(bank_map, cpu) & 1 << bank)) | 641 | if (!(per_cpu(bank_map, cpu) & (1 << bank))) |
641 | continue; | 642 | continue; |
642 | threshold_remove_bank(cpu, bank); | 643 | threshold_remove_bank(cpu, bank); |
643 | } | 644 | } |
644 | } | 645 | } |
645 | 646 | ||
646 | /* get notified when a cpu comes on/off */ | 647 | /* get notified when a cpu comes on/off */ |
647 | static int threshold_cpu_callback(struct notifier_block *nfb, | 648 | static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, |
648 | unsigned long action, void *hcpu) | 649 | unsigned long action, void *hcpu) |
649 | { | 650 | { |
650 | /* cpu was unsigned int to begin with */ | 651 | /* cpu was unsigned int to begin with */ |
@@ -669,7 +670,7 @@ static int threshold_cpu_callback(struct notifier_block *nfb, | |||
669 | return NOTIFY_OK; | 670 | return NOTIFY_OK; |
670 | } | 671 | } |
671 | 672 | ||
672 | static struct notifier_block threshold_cpu_notifier = { | 673 | static struct notifier_block threshold_cpu_notifier __cpuinitdata = { |
673 | .notifier_call = threshold_cpu_callback, | 674 | .notifier_call = threshold_cpu_callback, |
674 | }; | 675 | }; |
675 | 676 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index be4dabfee1f5..cb03345554a5 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c | |||
@@ -57,7 +57,7 @@ static void intel_thermal_interrupt(struct pt_regs *regs) | |||
57 | /* Thermal interrupt handler for this CPU setup */ | 57 | /* Thermal interrupt handler for this CPU setup */ |
58 | static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt; | 58 | static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt; |
59 | 59 | ||
60 | fastcall void smp_thermal_interrupt(struct pt_regs *regs) | 60 | void smp_thermal_interrupt(struct pt_regs *regs) |
61 | { | 61 | { |
62 | irq_enter(); | 62 | irq_enter(); |
63 | vendor_thermal_interrupt(regs); | 63 | vendor_thermal_interrupt(regs); |
@@ -141,7 +141,7 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | |||
141 | rdmsr (MSR_IA32_MCG_EIP, r->eip, h); | 141 | rdmsr (MSR_IA32_MCG_EIP, r->eip, h); |
142 | } | 142 | } |
143 | 143 | ||
144 | static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) | 144 | static void intel_machine_check(struct pt_regs * regs, long error_code) |
145 | { | 145 | { |
146 | int recover=1; | 146 | int recover=1; |
147 | u32 alow, ahigh, high, low; | 147 | u32 alow, ahigh, high, low; |
@@ -152,38 +152,41 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) | |||
152 | if (mcgstl & (1<<0)) /* Recoverable ? */ | 152 | if (mcgstl & (1<<0)) /* Recoverable ? */ |
153 | recover=0; | 153 | recover=0; |
154 | 154 | ||
155 | printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | 155 | printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", |
156 | smp_processor_id(), mcgsth, mcgstl); | 156 | smp_processor_id(), mcgsth, mcgstl); |
157 | 157 | ||
158 | if (mce_num_extended_msrs > 0) { | 158 | if (mce_num_extended_msrs > 0) { |
159 | struct intel_mce_extended_msrs dbg; | 159 | struct intel_mce_extended_msrs dbg; |
160 | intel_get_extended_msrs(&dbg); | 160 | intel_get_extended_msrs(&dbg); |
161 | printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", | 161 | printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n" |
162 | smp_processor_id(), dbg.eip, dbg.eflags); | 162 | "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n" |
163 | printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", | 163 | "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", |
164 | dbg.eax, dbg.ebx, dbg.ecx, dbg.edx); | 164 | smp_processor_id(), dbg.eip, dbg.eflags, |
165 | printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", | 165 | dbg.eax, dbg.ebx, dbg.ecx, dbg.edx, |
166 | dbg.esi, dbg.edi, dbg.ebp, dbg.esp); | 166 | dbg.esi, dbg.edi, dbg.ebp, dbg.esp); |
167 | } | 167 | } |
168 | 168 | ||
169 | for (i=0; i<nr_mce_banks; i++) { | 169 | for (i = 0; i < nr_mce_banks; i++) { |
170 | rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); | 170 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); |
171 | if (high & (1<<31)) { | 171 | if (high & (1<<31)) { |
172 | char misc[20]; | ||
173 | char addr[24]; | ||
174 | misc[0] = addr[0] = '\0'; | ||
172 | if (high & (1<<29)) | 175 | if (high & (1<<29)) |
173 | recover |= 1; | 176 | recover |= 1; |
174 | if (high & (1<<25)) | 177 | if (high & (1<<25)) |
175 | recover |= 2; | 178 | recover |= 2; |
176 | printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); | ||
177 | high &= ~(1<<31); | 179 | high &= ~(1<<31); |
178 | if (high & (1<<27)) { | 180 | if (high & (1<<27)) { |
179 | rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); | 181 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); |
180 | printk ("[%08x%08x]", ahigh, alow); | 182 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); |
181 | } | 183 | } |
182 | if (high & (1<<26)) { | 184 | if (high & (1<<26)) { |
183 | rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | 185 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); |
184 | printk (" at %08x%08x", ahigh, alow); | 186 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); |
185 | } | 187 | } |
186 | printk ("\n"); | 188 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", |
189 | smp_processor_id(), i, high, low, misc, addr); | ||
187 | } | 190 | } |
188 | } | 191 | } |
189 | 192 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 94bc43d950cf..a18310aaae0c 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c | |||
@@ -16,7 +16,7 @@ | |||
16 | #include "mce.h" | 16 | #include "mce.h" |
17 | 17 | ||
18 | /* Machine check handler for Pentium class Intel */ | 18 | /* Machine check handler for Pentium class Intel */ |
19 | static fastcall void pentium_machine_check(struct pt_regs * regs, long error_code) | 19 | static void pentium_machine_check(struct pt_regs * regs, long error_code) |
20 | { | 20 | { |
21 | u32 loaddr, hi, lotype; | 21 | u32 loaddr, hi, lotype; |
22 | rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); | 22 | rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); |
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c index deeae42ce199..74342604d30e 100644 --- a/arch/x86/kernel/cpu/mcheck/p6.c +++ b/arch/x86/kernel/cpu/mcheck/p6.c | |||
@@ -16,7 +16,7 @@ | |||
16 | #include "mce.h" | 16 | #include "mce.h" |
17 | 17 | ||
18 | /* Machine Check Handler For PII/PIII */ | 18 | /* Machine Check Handler For PII/PIII */ |
19 | static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) | 19 | static void intel_machine_check(struct pt_regs * regs, long error_code) |
20 | { | 20 | { |
21 | int recover=1; | 21 | int recover=1; |
22 | u32 alow, ahigh, high, low; | 22 | u32 alow, ahigh, high, low; |
@@ -27,27 +27,30 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) | |||
27 | if (mcgstl & (1<<0)) /* Recoverable ? */ | 27 | if (mcgstl & (1<<0)) /* Recoverable ? */ |
28 | recover=0; | 28 | recover=0; |
29 | 29 | ||
30 | printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | 30 | printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", |
31 | smp_processor_id(), mcgsth, mcgstl); | 31 | smp_processor_id(), mcgsth, mcgstl); |
32 | 32 | ||
33 | for (i=0; i<nr_mce_banks; i++) { | 33 | for (i = 0; i < nr_mce_banks; i++) { |
34 | rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); | 34 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); |
35 | if (high & (1<<31)) { | 35 | if (high & (1<<31)) { |
36 | char misc[20]; | ||
37 | char addr[24]; | ||
38 | misc[0] = addr[0] = '\0'; | ||
36 | if (high & (1<<29)) | 39 | if (high & (1<<29)) |
37 | recover |= 1; | 40 | recover |= 1; |
38 | if (high & (1<<25)) | 41 | if (high & (1<<25)) |
39 | recover |= 2; | 42 | recover |= 2; |
40 | printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); | ||
41 | high &= ~(1<<31); | 43 | high &= ~(1<<31); |
42 | if (high & (1<<27)) { | 44 | if (high & (1<<27)) { |
43 | rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); | 45 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); |
44 | printk ("[%08x%08x]", ahigh, alow); | 46 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); |
45 | } | 47 | } |
46 | if (high & (1<<26)) { | 48 | if (high & (1<<26)) { |
47 | rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | 49 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); |
48 | printk (" at %08x%08x", ahigh, alow); | 50 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); |
49 | } | 51 | } |
50 | printk ("\n"); | 52 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", |
53 | smp_processor_id(), i, high, low, misc, addr); | ||
51 | } | 54 | } |
52 | } | 55 | } |
53 | 56 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 9e424b6c293d..3d428d5afc52 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c | |||
@@ -15,7 +15,7 @@ | |||
15 | #include "mce.h" | 15 | #include "mce.h" |
16 | 16 | ||
17 | /* Machine check handler for WinChip C6 */ | 17 | /* Machine check handler for WinChip C6 */ |
18 | static fastcall void winchip_machine_check(struct pt_regs * regs, long error_code) | 18 | static void winchip_machine_check(struct pt_regs * regs, long error_code) |
19 | { | 19 | { |
20 | printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); | 20 | printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); |
21 | add_taint(TAINT_MACHINE_CHECK); | 21 | add_taint(TAINT_MACHINE_CHECK); |
diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c index 0949cdbf848a..ee2331b0e58f 100644 --- a/arch/x86/kernel/cpu/mtrr/amd.c +++ b/arch/x86/kernel/cpu/mtrr/amd.c | |||
@@ -53,8 +53,6 @@ static void amd_set_mtrr(unsigned int reg, unsigned long base, | |||
53 | <base> The base address of the region. | 53 | <base> The base address of the region. |
54 | <size> The size of the region. If this is 0 the region is disabled. | 54 | <size> The size of the region. If this is 0 the region is disabled. |
55 | <type> The type of the region. | 55 | <type> The type of the region. |
56 | <do_safe> If TRUE, do the change safely. If FALSE, safety measures should | ||
57 | be done externally. | ||
58 | [RETURNS] Nothing. | 56 | [RETURNS] Nothing. |
59 | */ | 57 | */ |
60 | { | 58 | { |
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index 9964be3de2b7..ff14c320040c 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c | |||
@@ -4,10 +4,9 @@ | |||
4 | #include <asm/msr.h> | 4 | #include <asm/msr.h> |
5 | #include <asm/io.h> | 5 | #include <asm/io.h> |
6 | #include <asm/processor-cyrix.h> | 6 | #include <asm/processor-cyrix.h> |
7 | #include <asm/processor-flags.h> | ||
7 | #include "mtrr.h" | 8 | #include "mtrr.h" |
8 | 9 | ||
9 | int arr3_protected; | ||
10 | |||
11 | static void | 10 | static void |
12 | cyrix_get_arr(unsigned int reg, unsigned long *base, | 11 | cyrix_get_arr(unsigned int reg, unsigned long *base, |
13 | unsigned long *size, mtrr_type * type) | 12 | unsigned long *size, mtrr_type * type) |
@@ -98,8 +97,6 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) | |||
98 | case 4: | 97 | case 4: |
99 | return replace_reg; | 98 | return replace_reg; |
100 | case 3: | 99 | case 3: |
101 | if (arr3_protected) | ||
102 | break; | ||
103 | case 2: | 100 | case 2: |
104 | case 1: | 101 | case 1: |
105 | case 0: | 102 | case 0: |
@@ -114,8 +111,6 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) | |||
114 | } else { | 111 | } else { |
115 | for (i = 0; i < 7; i++) { | 112 | for (i = 0; i < 7; i++) { |
116 | cyrix_get_arr(i, &lbase, &lsize, <ype); | 113 | cyrix_get_arr(i, &lbase, &lsize, <ype); |
117 | if ((i == 3) && arr3_protected) | ||
118 | continue; | ||
119 | if (lsize == 0) | 114 | if (lsize == 0) |
120 | return i; | 115 | return i; |
121 | } | 116 | } |
@@ -142,7 +137,7 @@ static void prepare_set(void) | |||
142 | 137 | ||
143 | /* Disable and flush caches. Note that wbinvd flushes the TLBs as | 138 | /* Disable and flush caches. Note that wbinvd flushes the TLBs as |
144 | a side-effect */ | 139 | a side-effect */ |
145 | cr0 = read_cr0() | 0x40000000; | 140 | cr0 = read_cr0() | X86_CR0_CD; |
146 | wbinvd(); | 141 | wbinvd(); |
147 | write_cr0(cr0); | 142 | write_cr0(cr0); |
148 | wbinvd(); | 143 | wbinvd(); |
@@ -259,107 +254,6 @@ static void cyrix_set_all(void) | |||
259 | post_set(); | 254 | post_set(); |
260 | } | 255 | } |
261 | 256 | ||
262 | #if 0 | ||
263 | /* | ||
264 | * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection | ||
265 | * with the SMM (System Management Mode) mode. So we need the following: | ||
266 | * Check whether SMI_LOCK (CCR3 bit 0) is set | ||
267 | * if it is set, write a warning message: ARR3 cannot be changed! | ||
268 | * (it cannot be changed until the next processor reset) | ||
269 | * if it is reset, then we can change it, set all the needed bits: | ||
270 | * - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset) | ||
271 | * - disable access to SMM memory (CCR1 bit 2 reset) | ||
272 | * - disable SMM mode (CCR1 bit 1 reset) | ||
273 | * - disable write protection of ARR3 (CCR6 bit 1 reset) | ||
274 | * - (maybe) disable ARR3 | ||
275 | * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set) | ||
276 | */ | ||
277 | static void __init | ||
278 | cyrix_arr_init(void) | ||
279 | { | ||
280 | struct set_mtrr_context ctxt; | ||
281 | unsigned char ccr[7]; | ||
282 | int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 }; | ||
283 | #ifdef CONFIG_SMP | ||
284 | int i; | ||
285 | #endif | ||
286 | |||
287 | /* flush cache and enable MAPEN */ | ||
288 | set_mtrr_prepare_save(&ctxt); | ||
289 | set_mtrr_cache_disable(&ctxt); | ||
290 | |||
291 | /* Save all CCRs locally */ | ||
292 | ccr[0] = getCx86(CX86_CCR0); | ||
293 | ccr[1] = getCx86(CX86_CCR1); | ||
294 | ccr[2] = getCx86(CX86_CCR2); | ||
295 | ccr[3] = ctxt.ccr3; | ||
296 | ccr[4] = getCx86(CX86_CCR4); | ||
297 | ccr[5] = getCx86(CX86_CCR5); | ||
298 | ccr[6] = getCx86(CX86_CCR6); | ||
299 | |||
300 | if (ccr[3] & 1) { | ||
301 | ccrc[3] = 1; | ||
302 | arr3_protected = 1; | ||
303 | } else { | ||
304 | /* Disable SMM mode (bit 1), access to SMM memory (bit 2) and | ||
305 | * access to SMM memory through ARR3 (bit 7). | ||
306 | */ | ||
307 | if (ccr[1] & 0x80) { | ||
308 | ccr[1] &= 0x7f; | ||
309 | ccrc[1] |= 0x80; | ||
310 | } | ||
311 | if (ccr[1] & 0x04) { | ||
312 | ccr[1] &= 0xfb; | ||
313 | ccrc[1] |= 0x04; | ||
314 | } | ||
315 | if (ccr[1] & 0x02) { | ||
316 | ccr[1] &= 0xfd; | ||
317 | ccrc[1] |= 0x02; | ||
318 | } | ||
319 | arr3_protected = 0; | ||
320 | if (ccr[6] & 0x02) { | ||
321 | ccr[6] &= 0xfd; | ||
322 | ccrc[6] = 1; /* Disable write protection of ARR3 */ | ||
323 | setCx86(CX86_CCR6, ccr[6]); | ||
324 | } | ||
325 | /* Disable ARR3. This is safe now that we disabled SMM. */ | ||
326 | /* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */ | ||
327 | } | ||
328 | /* If we changed CCR1 in memory, change it in the processor, too. */ | ||
329 | if (ccrc[1]) | ||
330 | setCx86(CX86_CCR1, ccr[1]); | ||
331 | |||
332 | /* Enable ARR usage by the processor */ | ||
333 | if (!(ccr[5] & 0x20)) { | ||
334 | ccr[5] |= 0x20; | ||
335 | ccrc[5] = 1; | ||
336 | setCx86(CX86_CCR5, ccr[5]); | ||
337 | } | ||
338 | #ifdef CONFIG_SMP | ||
339 | for (i = 0; i < 7; i++) | ||
340 | ccr_state[i] = ccr[i]; | ||
341 | for (i = 0; i < 8; i++) | ||
342 | cyrix_get_arr(i, | ||
343 | &arr_state[i].base, &arr_state[i].size, | ||
344 | &arr_state[i].type); | ||
345 | #endif | ||
346 | |||
347 | set_mtrr_done(&ctxt); /* flush cache and disable MAPEN */ | ||
348 | |||
349 | if (ccrc[5]) | ||
350 | printk(KERN_INFO "mtrr: ARR usage was not enabled, enabled manually\n"); | ||
351 | if (ccrc[3]) | ||
352 | printk(KERN_INFO "mtrr: ARR3 cannot be changed\n"); | ||
353 | /* | ||
354 | if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3 disabled\n"); | ||
355 | if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n"); | ||
356 | if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n"); | ||
357 | */ | ||
358 | if (ccrc[6]) | ||
359 | printk(KERN_INFO "mtrr: ARR3 was write protected, unprotected\n"); | ||
360 | } | ||
361 | #endif | ||
362 | |||
363 | static struct mtrr_ops cyrix_mtrr_ops = { | 257 | static struct mtrr_ops cyrix_mtrr_ops = { |
364 | .vendor = X86_VENDOR_CYRIX, | 258 | .vendor = X86_VENDOR_CYRIX, |
365 | // .init = cyrix_arr_init, | 259 | // .init = cyrix_arr_init, |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 992f08dfbb6c..103d61a59b19 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -9,11 +9,12 @@ | |||
9 | #include <asm/msr.h> | 9 | #include <asm/msr.h> |
10 | #include <asm/system.h> | 10 | #include <asm/system.h> |
11 | #include <asm/cpufeature.h> | 11 | #include <asm/cpufeature.h> |
12 | #include <asm/processor-flags.h> | ||
12 | #include <asm/tlbflush.h> | 13 | #include <asm/tlbflush.h> |
13 | #include "mtrr.h" | 14 | #include "mtrr.h" |
14 | 15 | ||
15 | struct mtrr_state { | 16 | struct mtrr_state { |
16 | struct mtrr_var_range *var_ranges; | 17 | struct mtrr_var_range var_ranges[MAX_VAR_RANGES]; |
17 | mtrr_type fixed_ranges[NUM_FIXED_RANGES]; | 18 | mtrr_type fixed_ranges[NUM_FIXED_RANGES]; |
18 | unsigned char enabled; | 19 | unsigned char enabled; |
19 | unsigned char have_fixed; | 20 | unsigned char have_fixed; |
@@ -85,12 +86,6 @@ void __init get_mtrr_state(void) | |||
85 | struct mtrr_var_range *vrs; | 86 | struct mtrr_var_range *vrs; |
86 | unsigned lo, dummy; | 87 | unsigned lo, dummy; |
87 | 88 | ||
88 | if (!mtrr_state.var_ranges) { | ||
89 | mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range), | ||
90 | GFP_KERNEL); | ||
91 | if (!mtrr_state.var_ranges) | ||
92 | return; | ||
93 | } | ||
94 | vrs = mtrr_state.var_ranges; | 89 | vrs = mtrr_state.var_ranges; |
95 | 90 | ||
96 | rdmsr(MTRRcap_MSR, lo, dummy); | 91 | rdmsr(MTRRcap_MSR, lo, dummy); |
@@ -188,7 +183,7 @@ static inline void k8_enable_fixed_iorrs(void) | |||
188 | * \param changed pointer which indicates whether the MTRR needed to be changed | 183 | * \param changed pointer which indicates whether the MTRR needed to be changed |
189 | * \param msrwords pointer to the MSR values which the MSR should have | 184 | * \param msrwords pointer to the MSR values which the MSR should have |
190 | */ | 185 | */ |
191 | static void set_fixed_range(int msr, int * changed, unsigned int * msrwords) | 186 | static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) |
192 | { | 187 | { |
193 | unsigned lo, hi; | 188 | unsigned lo, hi; |
194 | 189 | ||
@@ -200,7 +195,7 @@ static void set_fixed_range(int msr, int * changed, unsigned int * msrwords) | |||
200 | ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) | 195 | ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) |
201 | k8_enable_fixed_iorrs(); | 196 | k8_enable_fixed_iorrs(); |
202 | mtrr_wrmsr(msr, msrwords[0], msrwords[1]); | 197 | mtrr_wrmsr(msr, msrwords[0], msrwords[1]); |
203 | *changed = TRUE; | 198 | *changed = true; |
204 | } | 199 | } |
205 | } | 200 | } |
206 | 201 | ||
@@ -260,7 +255,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
260 | static int set_fixed_ranges(mtrr_type * frs) | 255 | static int set_fixed_ranges(mtrr_type * frs) |
261 | { | 256 | { |
262 | unsigned long long *saved = (unsigned long long *) frs; | 257 | unsigned long long *saved = (unsigned long long *) frs; |
263 | int changed = FALSE; | 258 | bool changed = false; |
264 | int block=-1, range; | 259 | int block=-1, range; |
265 | 260 | ||
266 | while (fixed_range_blocks[++block].ranges) | 261 | while (fixed_range_blocks[++block].ranges) |
@@ -273,17 +268,17 @@ static int set_fixed_ranges(mtrr_type * frs) | |||
273 | 268 | ||
274 | /* Set the MSR pair relating to a var range. Returns TRUE if | 269 | /* Set the MSR pair relating to a var range. Returns TRUE if |
275 | changes are made */ | 270 | changes are made */ |
276 | static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) | 271 | static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) |
277 | { | 272 | { |
278 | unsigned int lo, hi; | 273 | unsigned int lo, hi; |
279 | int changed = FALSE; | 274 | bool changed = false; |
280 | 275 | ||
281 | rdmsr(MTRRphysBase_MSR(index), lo, hi); | 276 | rdmsr(MTRRphysBase_MSR(index), lo, hi); |
282 | if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) | 277 | if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) |
283 | || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != | 278 | || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != |
284 | (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { | 279 | (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { |
285 | mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); | 280 | mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); |
286 | changed = TRUE; | 281 | changed = true; |
287 | } | 282 | } |
288 | 283 | ||
289 | rdmsr(MTRRphysMask_MSR(index), lo, hi); | 284 | rdmsr(MTRRphysMask_MSR(index), lo, hi); |
@@ -292,7 +287,7 @@ static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) | |||
292 | || (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != | 287 | || (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != |
293 | (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { | 288 | (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { |
294 | mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); | 289 | mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); |
295 | changed = TRUE; | 290 | changed = true; |
296 | } | 291 | } |
297 | return changed; | 292 | return changed; |
298 | } | 293 | } |
@@ -350,7 +345,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) | |||
350 | spin_lock(&set_atomicity_lock); | 345 | spin_lock(&set_atomicity_lock); |
351 | 346 | ||
352 | /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ | 347 | /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ |
353 | cr0 = read_cr0() | 0x40000000; /* set CD flag */ | 348 | cr0 = read_cr0() | X86_CR0_CD; |
354 | write_cr0(cr0); | 349 | write_cr0(cr0); |
355 | wbinvd(); | 350 | wbinvd(); |
356 | 351 | ||
@@ -417,8 +412,6 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, | |||
417 | <base> The base address of the region. | 412 | <base> The base address of the region. |
418 | <size> The size of the region. If this is 0 the region is disabled. | 413 | <size> The size of the region. If this is 0 the region is disabled. |
419 | <type> The type of the region. | 414 | <type> The type of the region. |
420 | <do_safe> If TRUE, do the change safely. If FALSE, safety measures should | ||
421 | be done externally. | ||
422 | [RETURNS] Nothing. | 415 | [RETURNS] Nothing. |
423 | */ | 416 | */ |
424 | { | 417 | { |
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index c7d8f1756745..91e150acb46c 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c | |||
@@ -11,10 +11,6 @@ | |||
11 | #include <asm/mtrr.h> | 11 | #include <asm/mtrr.h> |
12 | #include "mtrr.h" | 12 | #include "mtrr.h" |
13 | 13 | ||
14 | /* RED-PEN: this is accessed without any locking */ | ||
15 | extern unsigned int *usage_table; | ||
16 | |||
17 | |||
18 | #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private) | 14 | #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private) |
19 | 15 | ||
20 | static const char *const mtrr_strings[MTRR_NUM_TYPES] = | 16 | static const char *const mtrr_strings[MTRR_NUM_TYPES] = |
@@ -37,7 +33,7 @@ const char *mtrr_attrib_to_str(int x) | |||
37 | 33 | ||
38 | static int | 34 | static int |
39 | mtrr_file_add(unsigned long base, unsigned long size, | 35 | mtrr_file_add(unsigned long base, unsigned long size, |
40 | unsigned int type, char increment, struct file *file, int page) | 36 | unsigned int type, bool increment, struct file *file, int page) |
41 | { | 37 | { |
42 | int reg, max; | 38 | int reg, max; |
43 | unsigned int *fcount = FILE_FCOUNT(file); | 39 | unsigned int *fcount = FILE_FCOUNT(file); |
@@ -55,7 +51,7 @@ mtrr_file_add(unsigned long base, unsigned long size, | |||
55 | base >>= PAGE_SHIFT; | 51 | base >>= PAGE_SHIFT; |
56 | size >>= PAGE_SHIFT; | 52 | size >>= PAGE_SHIFT; |
57 | } | 53 | } |
58 | reg = mtrr_add_page(base, size, type, 1); | 54 | reg = mtrr_add_page(base, size, type, true); |
59 | if (reg >= 0) | 55 | if (reg >= 0) |
60 | ++fcount[reg]; | 56 | ++fcount[reg]; |
61 | return reg; | 57 | return reg; |
@@ -141,7 +137,7 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) | |||
141 | size >>= PAGE_SHIFT; | 137 | size >>= PAGE_SHIFT; |
142 | err = | 138 | err = |
143 | mtrr_add_page((unsigned long) base, (unsigned long) size, i, | 139 | mtrr_add_page((unsigned long) base, (unsigned long) size, i, |
144 | 1); | 140 | true); |
145 | if (err < 0) | 141 | if (err < 0) |
146 | return err; | 142 | return err; |
147 | return len; | 143 | return len; |
@@ -217,7 +213,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
217 | if (!capable(CAP_SYS_ADMIN)) | 213 | if (!capable(CAP_SYS_ADMIN)) |
218 | return -EPERM; | 214 | return -EPERM; |
219 | err = | 215 | err = |
220 | mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, | 216 | mtrr_file_add(sentry.base, sentry.size, sentry.type, true, |
221 | file, 0); | 217 | file, 0); |
222 | break; | 218 | break; |
223 | case MTRRIOC_SET_ENTRY: | 219 | case MTRRIOC_SET_ENTRY: |
@@ -226,7 +222,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
226 | #endif | 222 | #endif |
227 | if (!capable(CAP_SYS_ADMIN)) | 223 | if (!capable(CAP_SYS_ADMIN)) |
228 | return -EPERM; | 224 | return -EPERM; |
229 | err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); | 225 | err = mtrr_add(sentry.base, sentry.size, sentry.type, false); |
230 | break; | 226 | break; |
231 | case MTRRIOC_DEL_ENTRY: | 227 | case MTRRIOC_DEL_ENTRY: |
232 | #ifdef CONFIG_COMPAT | 228 | #ifdef CONFIG_COMPAT |
@@ -270,7 +266,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
270 | if (!capable(CAP_SYS_ADMIN)) | 266 | if (!capable(CAP_SYS_ADMIN)) |
271 | return -EPERM; | 267 | return -EPERM; |
272 | err = | 268 | err = |
273 | mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, | 269 | mtrr_file_add(sentry.base, sentry.size, sentry.type, true, |
274 | file, 1); | 270 | file, 1); |
275 | break; | 271 | break; |
276 | case MTRRIOC_SET_PAGE_ENTRY: | 272 | case MTRRIOC_SET_PAGE_ENTRY: |
@@ -279,7 +275,8 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
279 | #endif | 275 | #endif |
280 | if (!capable(CAP_SYS_ADMIN)) | 276 | if (!capable(CAP_SYS_ADMIN)) |
281 | return -EPERM; | 277 | return -EPERM; |
282 | err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); | 278 | err = |
279 | mtrr_add_page(sentry.base, sentry.size, sentry.type, false); | ||
283 | break; | 280 | break; |
284 | case MTRRIOC_DEL_PAGE_ENTRY: | 281 | case MTRRIOC_DEL_PAGE_ENTRY: |
285 | #ifdef CONFIG_COMPAT | 282 | #ifdef CONFIG_COMPAT |
@@ -396,7 +393,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) | |||
396 | for (i = 0; i < max; i++) { | 393 | for (i = 0; i < max; i++) { |
397 | mtrr_if->get(i, &base, &size, &type); | 394 | mtrr_if->get(i, &base, &size, &type); |
398 | if (size == 0) | 395 | if (size == 0) |
399 | usage_table[i] = 0; | 396 | mtrr_usage_table[i] = 0; |
400 | else { | 397 | else { |
401 | if (size < (0x100000 >> PAGE_SHIFT)) { | 398 | if (size < (0x100000 >> PAGE_SHIFT)) { |
402 | /* less than 1MB */ | 399 | /* less than 1MB */ |
@@ -410,7 +407,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) | |||
410 | len += seq_printf(seq, | 407 | len += seq_printf(seq, |
411 | "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n", | 408 | "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n", |
412 | i, base, base >> (20 - PAGE_SHIFT), size, factor, | 409 | i, base, base >> (20 - PAGE_SHIFT), size, factor, |
413 | mtrr_attrib_to_str(type), usage_table[i]); | 410 | mtrr_attrib_to_str(type), mtrr_usage_table[i]); |
414 | } | 411 | } |
415 | } | 412 | } |
416 | return 0; | 413 | return 0; |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 3b20613325dc..b6e136f23d3d 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -38,8 +38,8 @@ | |||
38 | #include <linux/cpu.h> | 38 | #include <linux/cpu.h> |
39 | #include <linux/mutex.h> | 39 | #include <linux/mutex.h> |
40 | 40 | ||
41 | #include <asm/e820.h> | ||
41 | #include <asm/mtrr.h> | 42 | #include <asm/mtrr.h> |
42 | |||
43 | #include <asm/uaccess.h> | 43 | #include <asm/uaccess.h> |
44 | #include <asm/processor.h> | 44 | #include <asm/processor.h> |
45 | #include <asm/msr.h> | 45 | #include <asm/msr.h> |
@@ -47,7 +47,7 @@ | |||
47 | 47 | ||
48 | u32 num_var_ranges = 0; | 48 | u32 num_var_ranges = 0; |
49 | 49 | ||
50 | unsigned int *usage_table; | 50 | unsigned int mtrr_usage_table[MAX_VAR_RANGES]; |
51 | static DEFINE_MUTEX(mtrr_mutex); | 51 | static DEFINE_MUTEX(mtrr_mutex); |
52 | 52 | ||
53 | u64 size_or_mask, size_and_mask; | 53 | u64 size_or_mask, size_and_mask; |
@@ -59,12 +59,6 @@ struct mtrr_ops * mtrr_if = NULL; | |||
59 | static void set_mtrr(unsigned int reg, unsigned long base, | 59 | static void set_mtrr(unsigned int reg, unsigned long base, |
60 | unsigned long size, mtrr_type type); | 60 | unsigned long size, mtrr_type type); |
61 | 61 | ||
62 | #ifndef CONFIG_X86_64 | ||
63 | extern int arr3_protected; | ||
64 | #else | ||
65 | #define arr3_protected 0 | ||
66 | #endif | ||
67 | |||
68 | void set_mtrr_ops(struct mtrr_ops * ops) | 62 | void set_mtrr_ops(struct mtrr_ops * ops) |
69 | { | 63 | { |
70 | if (ops->vendor && ops->vendor < X86_VENDOR_NUM) | 64 | if (ops->vendor && ops->vendor < X86_VENDOR_NUM) |
@@ -121,13 +115,8 @@ static void __init init_table(void) | |||
121 | int i, max; | 115 | int i, max; |
122 | 116 | ||
123 | max = num_var_ranges; | 117 | max = num_var_ranges; |
124 | if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL)) | ||
125 | == NULL) { | ||
126 | printk(KERN_ERR "mtrr: could not allocate\n"); | ||
127 | return; | ||
128 | } | ||
129 | for (i = 0; i < max; i++) | 118 | for (i = 0; i < max; i++) |
130 | usage_table[i] = 1; | 119 | mtrr_usage_table[i] = 1; |
131 | } | 120 | } |
132 | 121 | ||
133 | struct set_mtrr_data { | 122 | struct set_mtrr_data { |
@@ -311,7 +300,7 @@ static void set_mtrr(unsigned int reg, unsigned long base, | |||
311 | */ | 300 | */ |
312 | 301 | ||
313 | int mtrr_add_page(unsigned long base, unsigned long size, | 302 | int mtrr_add_page(unsigned long base, unsigned long size, |
314 | unsigned int type, char increment) | 303 | unsigned int type, bool increment) |
315 | { | 304 | { |
316 | int i, replace, error; | 305 | int i, replace, error; |
317 | mtrr_type ltype; | 306 | mtrr_type ltype; |
@@ -349,7 +338,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, | |||
349 | replace = -1; | 338 | replace = -1; |
350 | 339 | ||
351 | /* No CPU hotplug when we change MTRR entries */ | 340 | /* No CPU hotplug when we change MTRR entries */ |
352 | lock_cpu_hotplug(); | 341 | get_online_cpus(); |
353 | /* Search for existing MTRR */ | 342 | /* Search for existing MTRR */ |
354 | mutex_lock(&mtrr_mutex); | 343 | mutex_lock(&mtrr_mutex); |
355 | for (i = 0; i < num_var_ranges; ++i) { | 344 | for (i = 0; i < num_var_ranges; ++i) { |
@@ -383,7 +372,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, | |||
383 | goto out; | 372 | goto out; |
384 | } | 373 | } |
385 | if (increment) | 374 | if (increment) |
386 | ++usage_table[i]; | 375 | ++mtrr_usage_table[i]; |
387 | error = i; | 376 | error = i; |
388 | goto out; | 377 | goto out; |
389 | } | 378 | } |
@@ -391,13 +380,15 @@ int mtrr_add_page(unsigned long base, unsigned long size, | |||
391 | i = mtrr_if->get_free_region(base, size, replace); | 380 | i = mtrr_if->get_free_region(base, size, replace); |
392 | if (i >= 0) { | 381 | if (i >= 0) { |
393 | set_mtrr(i, base, size, type); | 382 | set_mtrr(i, base, size, type); |
394 | if (likely(replace < 0)) | 383 | if (likely(replace < 0)) { |
395 | usage_table[i] = 1; | 384 | mtrr_usage_table[i] = 1; |
396 | else { | 385 | } else { |
397 | usage_table[i] = usage_table[replace] + !!increment; | 386 | mtrr_usage_table[i] = mtrr_usage_table[replace]; |
387 | if (increment) | ||
388 | mtrr_usage_table[i]++; | ||
398 | if (unlikely(replace != i)) { | 389 | if (unlikely(replace != i)) { |
399 | set_mtrr(replace, 0, 0, 0); | 390 | set_mtrr(replace, 0, 0, 0); |
400 | usage_table[replace] = 0; | 391 | mtrr_usage_table[replace] = 0; |
401 | } | 392 | } |
402 | } | 393 | } |
403 | } else | 394 | } else |
@@ -405,7 +396,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, | |||
405 | error = i; | 396 | error = i; |
406 | out: | 397 | out: |
407 | mutex_unlock(&mtrr_mutex); | 398 | mutex_unlock(&mtrr_mutex); |
408 | unlock_cpu_hotplug(); | 399 | put_online_cpus(); |
409 | return error; | 400 | return error; |
410 | } | 401 | } |
411 | 402 | ||
@@ -460,7 +451,7 @@ static int mtrr_check(unsigned long base, unsigned long size) | |||
460 | 451 | ||
461 | int | 452 | int |
462 | mtrr_add(unsigned long base, unsigned long size, unsigned int type, | 453 | mtrr_add(unsigned long base, unsigned long size, unsigned int type, |
463 | char increment) | 454 | bool increment) |
464 | { | 455 | { |
465 | if (mtrr_check(base, size)) | 456 | if (mtrr_check(base, size)) |
466 | return -EINVAL; | 457 | return -EINVAL; |
@@ -495,7 +486,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) | |||
495 | 486 | ||
496 | max = num_var_ranges; | 487 | max = num_var_ranges; |
497 | /* No CPU hotplug when we change MTRR entries */ | 488 | /* No CPU hotplug when we change MTRR entries */ |
498 | lock_cpu_hotplug(); | 489 | get_online_cpus(); |
499 | mutex_lock(&mtrr_mutex); | 490 | mutex_lock(&mtrr_mutex); |
500 | if (reg < 0) { | 491 | if (reg < 0) { |
501 | /* Search for existing MTRR */ | 492 | /* Search for existing MTRR */ |
@@ -516,27 +507,21 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) | |||
516 | printk(KERN_WARNING "mtrr: register: %d too big\n", reg); | 507 | printk(KERN_WARNING "mtrr: register: %d too big\n", reg); |
517 | goto out; | 508 | goto out; |
518 | } | 509 | } |
519 | if (is_cpu(CYRIX) && !use_intel()) { | ||
520 | if ((reg == 3) && arr3_protected) { | ||
521 | printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n"); | ||
522 | goto out; | ||
523 | } | ||
524 | } | ||
525 | mtrr_if->get(reg, &lbase, &lsize, <ype); | 510 | mtrr_if->get(reg, &lbase, &lsize, <ype); |
526 | if (lsize < 1) { | 511 | if (lsize < 1) { |
527 | printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg); | 512 | printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg); |
528 | goto out; | 513 | goto out; |
529 | } | 514 | } |
530 | if (usage_table[reg] < 1) { | 515 | if (mtrr_usage_table[reg] < 1) { |
531 | printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); | 516 | printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); |
532 | goto out; | 517 | goto out; |
533 | } | 518 | } |
534 | if (--usage_table[reg] < 1) | 519 | if (--mtrr_usage_table[reg] < 1) |
535 | set_mtrr(reg, 0, 0, 0); | 520 | set_mtrr(reg, 0, 0, 0); |
536 | error = reg; | 521 | error = reg; |
537 | out: | 522 | out: |
538 | mutex_unlock(&mtrr_mutex); | 523 | mutex_unlock(&mtrr_mutex); |
539 | unlock_cpu_hotplug(); | 524 | put_online_cpus(); |
540 | return error; | 525 | return error; |
541 | } | 526 | } |
542 | /** | 527 | /** |
@@ -569,10 +554,6 @@ EXPORT_SYMBOL(mtrr_del); | |||
569 | * These should be called implicitly, but we can't yet until all the initcall | 554 | * These should be called implicitly, but we can't yet until all the initcall |
570 | * stuff is done... | 555 | * stuff is done... |
571 | */ | 556 | */ |
572 | extern void amd_init_mtrr(void); | ||
573 | extern void cyrix_init_mtrr(void); | ||
574 | extern void centaur_init_mtrr(void); | ||
575 | |||
576 | static void __init init_ifs(void) | 557 | static void __init init_ifs(void) |
577 | { | 558 | { |
578 | #ifndef CONFIG_X86_64 | 559 | #ifndef CONFIG_X86_64 |
@@ -591,16 +572,11 @@ struct mtrr_value { | |||
591 | unsigned long lsize; | 572 | unsigned long lsize; |
592 | }; | 573 | }; |
593 | 574 | ||
594 | static struct mtrr_value * mtrr_state; | 575 | static struct mtrr_value mtrr_state[MAX_VAR_RANGES]; |
595 | 576 | ||
596 | static int mtrr_save(struct sys_device * sysdev, pm_message_t state) | 577 | static int mtrr_save(struct sys_device * sysdev, pm_message_t state) |
597 | { | 578 | { |
598 | int i; | 579 | int i; |
599 | int size = num_var_ranges * sizeof(struct mtrr_value); | ||
600 | |||
601 | mtrr_state = kzalloc(size,GFP_ATOMIC); | ||
602 | if (!mtrr_state) | ||
603 | return -ENOMEM; | ||
604 | 580 | ||
605 | for (i = 0; i < num_var_ranges; i++) { | 581 | for (i = 0; i < num_var_ranges; i++) { |
606 | mtrr_if->get(i, | 582 | mtrr_if->get(i, |
@@ -622,7 +598,6 @@ static int mtrr_restore(struct sys_device * sysdev) | |||
622 | mtrr_state[i].lsize, | 598 | mtrr_state[i].lsize, |
623 | mtrr_state[i].ltype); | 599 | mtrr_state[i].ltype); |
624 | } | 600 | } |
625 | kfree(mtrr_state); | ||
626 | return 0; | 601 | return 0; |
627 | } | 602 | } |
628 | 603 | ||
@@ -633,6 +608,111 @@ static struct sysdev_driver mtrr_sysdev_driver = { | |||
633 | .resume = mtrr_restore, | 608 | .resume = mtrr_restore, |
634 | }; | 609 | }; |
635 | 610 | ||
611 | static int disable_mtrr_trim; | ||
612 | |||
613 | static int __init disable_mtrr_trim_setup(char *str) | ||
614 | { | ||
615 | disable_mtrr_trim = 1; | ||
616 | return 0; | ||
617 | } | ||
618 | early_param("disable_mtrr_trim", disable_mtrr_trim_setup); | ||
619 | |||
620 | /* | ||
621 | * Newer AMD K8s and later CPUs have a special magic MSR way to force WB | ||
622 | * for memory >4GB. Check for that here. | ||
623 | * Note this won't check if the MTRRs < 4GB where the magic bit doesn't | ||
624 | * apply to are wrong, but so far we don't know of any such case in the wild. | ||
625 | */ | ||
626 | #define Tom2Enabled (1U << 21) | ||
627 | #define Tom2ForceMemTypeWB (1U << 22) | ||
628 | |||
629 | static __init int amd_special_default_mtrr(void) | ||
630 | { | ||
631 | u32 l, h; | ||
632 | |||
633 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) | ||
634 | return 0; | ||
635 | if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) | ||
636 | return 0; | ||
637 | /* In case some hypervisor doesn't pass SYSCFG through */ | ||
638 | if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) | ||
639 | return 0; | ||
640 | /* | ||
641 | * Memory between 4GB and top of mem is forced WB by this magic bit. | ||
642 | * Reserved before K8RevF, but should be zero there. | ||
643 | */ | ||
644 | if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) == | ||
645 | (Tom2Enabled | Tom2ForceMemTypeWB)) | ||
646 | return 1; | ||
647 | return 0; | ||
648 | } | ||
649 | |||
650 | /** | ||
651 | * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs | ||
652 | * | ||
653 | * Some buggy BIOSes don't setup the MTRRs properly for systems with certain | ||
654 | * memory configurations. This routine checks that the highest MTRR matches | ||
655 | * the end of memory, to make sure the MTRRs having a write back type cover | ||
656 | * all of the memory the kernel is intending to use. If not, it'll trim any | ||
657 | * memory off the end by adjusting end_pfn, removing it from the kernel's | ||
658 | * allocation pools, warning the user with an obnoxious message. | ||
659 | */ | ||
660 | int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | ||
661 | { | ||
662 | unsigned long i, base, size, highest_pfn = 0, def, dummy; | ||
663 | mtrr_type type; | ||
664 | u64 trim_start, trim_size; | ||
665 | |||
666 | /* | ||
667 | * Make sure we only trim uncachable memory on machines that | ||
668 | * support the Intel MTRR architecture: | ||
669 | */ | ||
670 | if (!is_cpu(INTEL) || disable_mtrr_trim) | ||
671 | return 0; | ||
672 | rdmsr(MTRRdefType_MSR, def, dummy); | ||
673 | def &= 0xff; | ||
674 | if (def != MTRR_TYPE_UNCACHABLE) | ||
675 | return 0; | ||
676 | |||
677 | if (amd_special_default_mtrr()) | ||
678 | return 0; | ||
679 | |||
680 | /* Find highest cached pfn */ | ||
681 | for (i = 0; i < num_var_ranges; i++) { | ||
682 | mtrr_if->get(i, &base, &size, &type); | ||
683 | if (type != MTRR_TYPE_WRBACK) | ||
684 | continue; | ||
685 | if (highest_pfn < base + size) | ||
686 | highest_pfn = base + size; | ||
687 | } | ||
688 | |||
689 | /* kvm/qemu doesn't have mtrr set right, don't trim them all */ | ||
690 | if (!highest_pfn) { | ||
691 | printk(KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n"); | ||
692 | WARN_ON(1); | ||
693 | return 0; | ||
694 | } | ||
695 | |||
696 | if (highest_pfn < end_pfn) { | ||
697 | printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" | ||
698 | " all of memory, losing %luMB of RAM.\n", | ||
699 | (end_pfn - highest_pfn) >> (20 - PAGE_SHIFT)); | ||
700 | |||
701 | WARN_ON(1); | ||
702 | |||
703 | printk(KERN_INFO "update e820 for mtrr\n"); | ||
704 | trim_start = highest_pfn; | ||
705 | trim_start <<= PAGE_SHIFT; | ||
706 | trim_size = end_pfn; | ||
707 | trim_size <<= PAGE_SHIFT; | ||
708 | trim_size -= trim_start; | ||
709 | add_memory_region(trim_start, trim_size, E820_RESERVED); | ||
710 | update_e820(); | ||
711 | return 1; | ||
712 | } | ||
713 | |||
714 | return 0; | ||
715 | } | ||
636 | 716 | ||
637 | /** | 717 | /** |
638 | * mtrr_bp_init - initialize mtrrs on the boot CPU | 718 | * mtrr_bp_init - initialize mtrrs on the boot CPU |
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 289dfe6030e3..2cc77eb6fea3 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h | |||
@@ -2,10 +2,8 @@ | |||
2 | * local mtrr defines. | 2 | * local mtrr defines. |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #ifndef TRUE | 5 | #include <linux/types.h> |
6 | #define TRUE 1 | 6 | #include <linux/stddef.h> |
7 | #define FALSE 0 | ||
8 | #endif | ||
9 | 7 | ||
10 | #define MTRRcap_MSR 0x0fe | 8 | #define MTRRcap_MSR 0x0fe |
11 | #define MTRRdefType_MSR 0x2ff | 9 | #define MTRRdefType_MSR 0x2ff |
@@ -14,6 +12,7 @@ | |||
14 | #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) | 12 | #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) |
15 | 13 | ||
16 | #define NUM_FIXED_RANGES 88 | 14 | #define NUM_FIXED_RANGES 88 |
15 | #define MAX_VAR_RANGES 256 | ||
17 | #define MTRRfix64K_00000_MSR 0x250 | 16 | #define MTRRfix64K_00000_MSR 0x250 |
18 | #define MTRRfix16K_80000_MSR 0x258 | 17 | #define MTRRfix16K_80000_MSR 0x258 |
19 | #define MTRRfix16K_A0000_MSR 0x259 | 18 | #define MTRRfix16K_A0000_MSR 0x259 |
@@ -34,6 +33,8 @@ | |||
34 | an 8 bit field: */ | 33 | an 8 bit field: */ |
35 | typedef u8 mtrr_type; | 34 | typedef u8 mtrr_type; |
36 | 35 | ||
36 | extern unsigned int mtrr_usage_table[MAX_VAR_RANGES]; | ||
37 | |||
37 | struct mtrr_ops { | 38 | struct mtrr_ops { |
38 | u32 vendor; | 39 | u32 vendor; |
39 | u32 use_intel_if; | 40 | u32 use_intel_if; |
@@ -96,3 +97,7 @@ void mtrr_state_warn(void); | |||
96 | const char *mtrr_attrib_to_str(int x); | 97 | const char *mtrr_attrib_to_str(int x); |
97 | void mtrr_wrmsr(unsigned, unsigned, unsigned); | 98 | void mtrr_wrmsr(unsigned, unsigned, unsigned); |
98 | 99 | ||
100 | /* CPU specific mtrr init functions */ | ||
101 | int amd_init_mtrr(void); | ||
102 | int cyrix_init_mtrr(void); | ||
103 | int centaur_init_mtrr(void); | ||
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c index 49e20c2afcdf..9f8ba923d1c9 100644 --- a/arch/x86/kernel/cpu/mtrr/state.c +++ b/arch/x86/kernel/cpu/mtrr/state.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <asm/mtrr.h> | 4 | #include <asm/mtrr.h> |
5 | #include <asm/msr.h> | 5 | #include <asm/msr.h> |
6 | #include <asm/processor-cyrix.h> | 6 | #include <asm/processor-cyrix.h> |
7 | #include <asm/processor-flags.h> | ||
7 | #include "mtrr.h" | 8 | #include "mtrr.h" |
8 | 9 | ||
9 | 10 | ||
@@ -25,7 +26,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) | |||
25 | 26 | ||
26 | /* Disable and flush caches. Note that wbinvd flushes the TLBs as | 27 | /* Disable and flush caches. Note that wbinvd flushes the TLBs as |
27 | a side-effect */ | 28 | a side-effect */ |
28 | cr0 = read_cr0() | 0x40000000; | 29 | cr0 = read_cr0() | X86_CR0_CD; |
29 | wbinvd(); | 30 | wbinvd(); |
30 | write_cr0(cr0); | 31 | write_cr0(cr0); |
31 | wbinvd(); | 32 | wbinvd(); |
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index c02541e6e653..9b838324b818 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -167,7 +167,6 @@ void release_evntsel_nmi(unsigned int msr) | |||
167 | clear_bit(counter, evntsel_nmi_owner); | 167 | clear_bit(counter, evntsel_nmi_owner); |
168 | } | 168 | } |
169 | 169 | ||
170 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); | ||
171 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); | 170 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); |
172 | EXPORT_SYMBOL(reserve_perfctr_nmi); | 171 | EXPORT_SYMBOL(reserve_perfctr_nmi); |
173 | EXPORT_SYMBOL(release_perfctr_nmi); | 172 | EXPORT_SYMBOL(release_perfctr_nmi); |
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 3900e46d66db..af11d31dce0a 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c | |||
@@ -10,80 +10,6 @@ | |||
10 | */ | 10 | */ |
11 | static int show_cpuinfo(struct seq_file *m, void *v) | 11 | static int show_cpuinfo(struct seq_file *m, void *v) |
12 | { | 12 | { |
13 | /* | ||
14 | * These flag bits must match the definitions in <asm/cpufeature.h>. | ||
15 | * NULL means this bit is undefined or reserved; either way it doesn't | ||
16 | * have meaning as far as Linux is concerned. Note that it's important | ||
17 | * to realize there is a difference between this table and CPUID -- if | ||
18 | * applications want to get the raw CPUID data, they should access | ||
19 | * /dev/cpu/<cpu_nr>/cpuid instead. | ||
20 | */ | ||
21 | static const char * const x86_cap_flags[] = { | ||
22 | /* Intel-defined */ | ||
23 | "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", | ||
24 | "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", | ||
25 | "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", | ||
26 | "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", | ||
27 | |||
28 | /* AMD-defined */ | ||
29 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
30 | NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, | ||
31 | NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, | ||
32 | NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", | ||
33 | "3dnowext", "3dnow", | ||
34 | |||
35 | /* Transmeta-defined */ | ||
36 | "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, | ||
37 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
38 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
39 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
40 | |||
41 | /* Other (Linux-defined) */ | ||
42 | "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", | ||
43 | NULL, NULL, NULL, NULL, | ||
44 | "constant_tsc", "up", NULL, "arch_perfmon", | ||
45 | "pebs", "bts", NULL, "sync_rdtsc", | ||
46 | "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
47 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
48 | |||
49 | /* Intel-defined (#2) */ | ||
50 | "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", | ||
51 | "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, | ||
52 | NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", | ||
53 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
54 | |||
55 | /* VIA/Cyrix/Centaur-defined */ | ||
56 | NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", | ||
57 | "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, | ||
58 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
59 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
60 | |||
61 | /* AMD-defined (#2) */ | ||
62 | "lahf_lm", "cmp_legacy", "svm", "extapic", | ||
63 | "cr8_legacy", "abm", "sse4a", "misalignsse", | ||
64 | "3dnowprefetch", "osvw", "ibs", "sse5", | ||
65 | "skinit", "wdt", NULL, NULL, | ||
66 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
67 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
68 | |||
69 | /* Auxiliary (Linux-defined) */ | ||
70 | "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
71 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
72 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
73 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
74 | }; | ||
75 | static const char * const x86_power_flags[] = { | ||
76 | "ts", /* temperature sensor */ | ||
77 | "fid", /* frequency id control */ | ||
78 | "vid", /* voltage id control */ | ||
79 | "ttp", /* thermal trip */ | ||
80 | "tm", | ||
81 | "stc", | ||
82 | "100mhzsteps", | ||
83 | "hwpstate", | ||
84 | "", /* constant_tsc - moved to flags */ | ||
85 | /* nothing */ | ||
86 | }; | ||
87 | struct cpuinfo_x86 *c = v; | 13 | struct cpuinfo_x86 *c = v; |
88 | int i, n = 0; | 14 | int i, n = 0; |
89 | int fpu_exception; | 15 | int fpu_exception; |
@@ -188,7 +114,7 @@ static void *c_next(struct seq_file *m, void *v, loff_t *pos) | |||
188 | static void c_stop(struct seq_file *m, void *v) | 114 | static void c_stop(struct seq_file *m, void *v) |
189 | { | 115 | { |
190 | } | 116 | } |
191 | struct seq_operations cpuinfo_op = { | 117 | const struct seq_operations cpuinfo_op = { |
192 | .start = c_start, | 118 | .start = c_start, |
193 | .next = c_next, | 119 | .next = c_next, |
194 | .stop = c_stop, | 120 | .stop = c_stop, |
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 05c9936a16cc..288e7a6598ac 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* ----------------------------------------------------------------------- * | 1 | /* ----------------------------------------------------------------------- * |
2 | * | 2 | * |
3 | * Copyright 2000 H. Peter Anvin - All Rights Reserved | 3 | * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
@@ -17,6 +17,10 @@ | |||
17 | * and then read in chunks of 16 bytes. A larger size means multiple | 17 | * and then read in chunks of 16 bytes. A larger size means multiple |
18 | * reads of consecutive levels. | 18 | * reads of consecutive levels. |
19 | * | 19 | * |
20 | * The lower 32 bits of the file position is used as the incoming %eax, | ||
21 | * and the upper 32 bits of the file position as the incoming %ecx, | ||
22 | * the latter intended for "counting" eax levels like eax=4. | ||
23 | * | ||
20 | * This driver uses /dev/cpu/%d/cpuid where %d is the minor number, and on | 24 | * This driver uses /dev/cpu/%d/cpuid where %d is the minor number, and on |
21 | * an SMP box will direct the access to CPU %d. | 25 | * an SMP box will direct the access to CPU %d. |
22 | */ | 26 | */ |
@@ -43,35 +47,24 @@ | |||
43 | 47 | ||
44 | static struct class *cpuid_class; | 48 | static struct class *cpuid_class; |
45 | 49 | ||
46 | struct cpuid_command { | 50 | struct cpuid_regs { |
47 | u32 reg; | 51 | u32 eax, ebx, ecx, edx; |
48 | u32 *data; | ||
49 | }; | 52 | }; |
50 | 53 | ||
51 | static void cpuid_smp_cpuid(void *cmd_block) | 54 | static void cpuid_smp_cpuid(void *cmd_block) |
52 | { | 55 | { |
53 | struct cpuid_command *cmd = (struct cpuid_command *)cmd_block; | 56 | struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block; |
54 | |||
55 | cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2], | ||
56 | &cmd->data[3]); | ||
57 | } | ||
58 | |||
59 | static inline void do_cpuid(int cpu, u32 reg, u32 * data) | ||
60 | { | ||
61 | struct cpuid_command cmd; | ||
62 | |||
63 | cmd.reg = reg; | ||
64 | cmd.data = data; | ||
65 | 57 | ||
66 | smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1); | 58 | cpuid_count(cmd->eax, cmd->ecx, |
59 | &cmd->eax, &cmd->ebx, &cmd->ecx, &cmd->edx); | ||
67 | } | 60 | } |
68 | 61 | ||
69 | static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) | 62 | static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) |
70 | { | 63 | { |
71 | loff_t ret; | 64 | loff_t ret; |
65 | struct inode *inode = file->f_mapping->host; | ||
72 | 66 | ||
73 | lock_kernel(); | 67 | mutex_lock(&inode->i_mutex); |
74 | |||
75 | switch (orig) { | 68 | switch (orig) { |
76 | case 0: | 69 | case 0: |
77 | file->f_pos = offset; | 70 | file->f_pos = offset; |
@@ -84,8 +77,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) | |||
84 | default: | 77 | default: |
85 | ret = -EINVAL; | 78 | ret = -EINVAL; |
86 | } | 79 | } |
87 | 80 | mutex_unlock(&inode->i_mutex); | |
88 | unlock_kernel(); | ||
89 | return ret; | 81 | return ret; |
90 | } | 82 | } |
91 | 83 | ||
@@ -93,19 +85,21 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, | |||
93 | size_t count, loff_t * ppos) | 85 | size_t count, loff_t * ppos) |
94 | { | 86 | { |
95 | char __user *tmp = buf; | 87 | char __user *tmp = buf; |
96 | u32 data[4]; | 88 | struct cpuid_regs cmd; |
97 | u32 reg = *ppos; | ||
98 | int cpu = iminor(file->f_path.dentry->d_inode); | 89 | int cpu = iminor(file->f_path.dentry->d_inode); |
90 | u64 pos = *ppos; | ||
99 | 91 | ||
100 | if (count % 16) | 92 | if (count % 16) |
101 | return -EINVAL; /* Invalid chunk size */ | 93 | return -EINVAL; /* Invalid chunk size */ |
102 | 94 | ||
103 | for (; count; count -= 16) { | 95 | for (; count; count -= 16) { |
104 | do_cpuid(cpu, reg, data); | 96 | cmd.eax = pos; |
105 | if (copy_to_user(tmp, &data, 16)) | 97 | cmd.ecx = pos >> 32; |
98 | smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1); | ||
99 | if (copy_to_user(tmp, &cmd, 16)) | ||
106 | return -EFAULT; | 100 | return -EFAULT; |
107 | tmp += 16; | 101 | tmp += 16; |
108 | *ppos = reg++; | 102 | *ppos = ++pos; |
109 | } | 103 | } |
110 | 104 | ||
111 | return tmp - buf; | 105 | return tmp - buf; |
@@ -157,20 +151,20 @@ static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb, | |||
157 | 151 | ||
158 | switch (action) { | 152 | switch (action) { |
159 | case CPU_UP_PREPARE: | 153 | case CPU_UP_PREPARE: |
160 | case CPU_UP_PREPARE_FROZEN: | ||
161 | err = cpuid_device_create(cpu); | 154 | err = cpuid_device_create(cpu); |
162 | break; | 155 | break; |
163 | case CPU_UP_CANCELED: | 156 | case CPU_UP_CANCELED: |
164 | case CPU_UP_CANCELED_FROZEN: | ||
165 | case CPU_DEAD: | 157 | case CPU_DEAD: |
166 | case CPU_DEAD_FROZEN: | ||
167 | cpuid_device_destroy(cpu); | 158 | cpuid_device_destroy(cpu); |
168 | break; | 159 | break; |
160 | case CPU_UP_CANCELED_FROZEN: | ||
161 | destroy_suspended_device(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); | ||
162 | break; | ||
169 | } | 163 | } |
170 | return err ? NOTIFY_BAD : NOTIFY_OK; | 164 | return err ? NOTIFY_BAD : NOTIFY_OK; |
171 | } | 165 | } |
172 | 166 | ||
173 | static struct notifier_block __cpuinitdata cpuid_class_cpu_notifier = | 167 | static struct notifier_block __refdata cpuid_class_cpu_notifier = |
174 | { | 168 | { |
175 | .notifier_call = cpuid_class_cpu_callback, | 169 | .notifier_call = cpuid_class_cpu_callback, |
176 | }; | 170 | }; |
@@ -193,7 +187,7 @@ static int __init cpuid_init(void) | |||
193 | } | 187 | } |
194 | for_each_online_cpu(i) { | 188 | for_each_online_cpu(i) { |
195 | err = cpuid_device_create(i); | 189 | err = cpuid_device_create(i); |
196 | if (err != 0) | 190 | if (err != 0) |
197 | goto out_class; | 191 | goto out_class; |
198 | } | 192 | } |
199 | register_hotcpu_notifier(&cpuid_class_cpu_notifier); | 193 | register_hotcpu_notifier(&cpuid_class_cpu_notifier); |
@@ -208,7 +202,7 @@ out_class: | |||
208 | } | 202 | } |
209 | class_destroy(cpuid_class); | 203 | class_destroy(cpuid_class); |
210 | out_chrdev: | 204 | out_chrdev: |
211 | unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); | 205 | unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); |
212 | out: | 206 | out: |
213 | return err; | 207 | return err; |
214 | } | 208 | } |
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index 40978af630e7..a47798b59f07 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c | |||
@@ -17,7 +17,7 @@ static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; | |||
17 | 17 | ||
18 | static void doublefault_fn(void) | 18 | static void doublefault_fn(void) |
19 | { | 19 | { |
20 | struct Xgt_desc_struct gdt_desc = {0, 0}; | 20 | struct desc_ptr gdt_desc = {0, 0}; |
21 | unsigned long gdt, tss; | 21 | unsigned long gdt, tss; |
22 | 22 | ||
23 | store_gdt(&gdt_desc); | 23 | store_gdt(&gdt_desc); |
@@ -33,14 +33,15 @@ static void doublefault_fn(void) | |||
33 | printk(KERN_EMERG "double fault, tss at %08lx\n", tss); | 33 | printk(KERN_EMERG "double fault, tss at %08lx\n", tss); |
34 | 34 | ||
35 | if (ptr_ok(tss)) { | 35 | if (ptr_ok(tss)) { |
36 | struct i386_hw_tss *t = (struct i386_hw_tss *)tss; | 36 | struct x86_hw_tss *t = (struct x86_hw_tss *)tss; |
37 | 37 | ||
38 | printk(KERN_EMERG "eip = %08lx, esp = %08lx\n", t->eip, t->esp); | 38 | printk(KERN_EMERG "eip = %08lx, esp = %08lx\n", |
39 | t->ip, t->sp); | ||
39 | 40 | ||
40 | printk(KERN_EMERG "eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n", | 41 | printk(KERN_EMERG "eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n", |
41 | t->eax, t->ebx, t->ecx, t->edx); | 42 | t->ax, t->bx, t->cx, t->dx); |
42 | printk(KERN_EMERG "esi = %08lx, edi = %08lx\n", | 43 | printk(KERN_EMERG "esi = %08lx, edi = %08lx\n", |
43 | t->esi, t->edi); | 44 | t->si, t->di); |
44 | } | 45 | } |
45 | } | 46 | } |
46 | 47 | ||
@@ -50,15 +51,15 @@ static void doublefault_fn(void) | |||
50 | 51 | ||
51 | struct tss_struct doublefault_tss __cacheline_aligned = { | 52 | struct tss_struct doublefault_tss __cacheline_aligned = { |
52 | .x86_tss = { | 53 | .x86_tss = { |
53 | .esp0 = STACK_START, | 54 | .sp0 = STACK_START, |
54 | .ss0 = __KERNEL_DS, | 55 | .ss0 = __KERNEL_DS, |
55 | .ldt = 0, | 56 | .ldt = 0, |
56 | .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, | 57 | .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, |
57 | 58 | ||
58 | .eip = (unsigned long) doublefault_fn, | 59 | .ip = (unsigned long) doublefault_fn, |
59 | /* 0x2 bit is always set */ | 60 | /* 0x2 bit is always set */ |
60 | .eflags = X86_EFLAGS_SF | 0x2, | 61 | .flags = X86_EFLAGS_SF | 0x2, |
61 | .esp = STACK_START, | 62 | .sp = STACK_START, |
62 | .es = __USER_DS, | 63 | .es = __USER_DS, |
63 | .cs = __KERNEL_CS, | 64 | .cs = __KERNEL_CS, |
64 | .ss = __KERNEL_DS, | 65 | .ss = __KERNEL_DS, |
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c new file mode 100644 index 000000000000..dcd918c1580d --- /dev/null +++ b/arch/x86/kernel/ds.c | |||
@@ -0,0 +1,464 @@ | |||
1 | /* | ||
2 | * Debug Store support | ||
3 | * | ||
4 | * This provides a low-level interface to the hardware's Debug Store | ||
5 | * feature that is used for last branch recording (LBR) and | ||
6 | * precise-event based sampling (PEBS). | ||
7 | * | ||
8 | * Different architectures use a different DS layout/pointer size. | ||
9 | * The below functions therefore work on a void*. | ||
10 | * | ||
11 | * | ||
12 | * Since there is no user for PEBS, yet, only LBR (or branch | ||
13 | * trace store, BTS) is supported. | ||
14 | * | ||
15 | * | ||
16 | * Copyright (C) 2007 Intel Corporation. | ||
17 | * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007 | ||
18 | */ | ||
19 | |||
20 | #include <asm/ds.h> | ||
21 | |||
22 | #include <linux/errno.h> | ||
23 | #include <linux/string.h> | ||
24 | #include <linux/slab.h> | ||
25 | |||
26 | |||
27 | /* | ||
28 | * Debug Store (DS) save area configuration (see Intel64 and IA32 | ||
29 | * Architectures Software Developer's Manual, section 18.5) | ||
30 | * | ||
31 | * The DS configuration consists of the following fields; different | ||
32 | * architetures vary in the size of those fields. | ||
33 | * - double-word aligned base linear address of the BTS buffer | ||
34 | * - write pointer into the BTS buffer | ||
35 | * - end linear address of the BTS buffer (one byte beyond the end of | ||
36 | * the buffer) | ||
37 | * - interrupt pointer into BTS buffer | ||
38 | * (interrupt occurs when write pointer passes interrupt pointer) | ||
39 | * - double-word aligned base linear address of the PEBS buffer | ||
40 | * - write pointer into the PEBS buffer | ||
41 | * - end linear address of the PEBS buffer (one byte beyond the end of | ||
42 | * the buffer) | ||
43 | * - interrupt pointer into PEBS buffer | ||
44 | * (interrupt occurs when write pointer passes interrupt pointer) | ||
45 | * - value to which counter is reset following counter overflow | ||
46 | * | ||
47 | * On later architectures, the last branch recording hardware uses | ||
48 | * 64bit pointers even in 32bit mode. | ||
49 | * | ||
50 | * | ||
51 | * Branch Trace Store (BTS) records store information about control | ||
52 | * flow changes. They at least provide the following information: | ||
53 | * - source linear address | ||
54 | * - destination linear address | ||
55 | * | ||
56 | * Netburst supported a predicated bit that had been dropped in later | ||
57 | * architectures. We do not suppor it. | ||
58 | * | ||
59 | * | ||
60 | * In order to abstract from the actual DS and BTS layout, we describe | ||
61 | * the access to the relevant fields. | ||
62 | * Thanks to Andi Kleen for proposing this design. | ||
63 | * | ||
64 | * The implementation, however, is not as general as it might seem. In | ||
65 | * order to stay somewhat simple and efficient, we assume an | ||
66 | * underlying unsigned type (mostly a pointer type) and we expect the | ||
67 | * field to be at least as big as that type. | ||
68 | */ | ||
69 | |||
70 | /* | ||
71 | * A special from_ip address to indicate that the BTS record is an | ||
72 | * info record that needs to be interpreted or skipped. | ||
73 | */ | ||
74 | #define BTS_ESCAPE_ADDRESS (-1) | ||
75 | |||
76 | /* | ||
77 | * A field access descriptor | ||
78 | */ | ||
79 | struct access_desc { | ||
80 | unsigned char offset; | ||
81 | unsigned char size; | ||
82 | }; | ||
83 | |||
84 | /* | ||
85 | * The configuration for a particular DS/BTS hardware implementation. | ||
86 | */ | ||
87 | struct ds_configuration { | ||
88 | /* the DS configuration */ | ||
89 | unsigned char sizeof_ds; | ||
90 | struct access_desc bts_buffer_base; | ||
91 | struct access_desc bts_index; | ||
92 | struct access_desc bts_absolute_maximum; | ||
93 | struct access_desc bts_interrupt_threshold; | ||
94 | /* the BTS configuration */ | ||
95 | unsigned char sizeof_bts; | ||
96 | struct access_desc from_ip; | ||
97 | struct access_desc to_ip; | ||
98 | /* BTS variants used to store additional information like | ||
99 | timestamps */ | ||
100 | struct access_desc info_type; | ||
101 | struct access_desc info_data; | ||
102 | unsigned long debugctl_mask; | ||
103 | }; | ||
104 | |||
105 | /* | ||
106 | * The global configuration used by the below accessor functions | ||
107 | */ | ||
108 | static struct ds_configuration ds_cfg; | ||
109 | |||
110 | /* | ||
111 | * Accessor functions for some DS and BTS fields using the above | ||
112 | * global ptrace_bts_cfg. | ||
113 | */ | ||
114 | static inline unsigned long get_bts_buffer_base(char *base) | ||
115 | { | ||
116 | return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset); | ||
117 | } | ||
118 | static inline void set_bts_buffer_base(char *base, unsigned long value) | ||
119 | { | ||
120 | (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value; | ||
121 | } | ||
122 | static inline unsigned long get_bts_index(char *base) | ||
123 | { | ||
124 | return *(unsigned long *)(base + ds_cfg.bts_index.offset); | ||
125 | } | ||
126 | static inline void set_bts_index(char *base, unsigned long value) | ||
127 | { | ||
128 | (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value; | ||
129 | } | ||
130 | static inline unsigned long get_bts_absolute_maximum(char *base) | ||
131 | { | ||
132 | return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset); | ||
133 | } | ||
134 | static inline void set_bts_absolute_maximum(char *base, unsigned long value) | ||
135 | { | ||
136 | (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value; | ||
137 | } | ||
138 | static inline unsigned long get_bts_interrupt_threshold(char *base) | ||
139 | { | ||
140 | return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset); | ||
141 | } | ||
142 | static inline void set_bts_interrupt_threshold(char *base, unsigned long value) | ||
143 | { | ||
144 | (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; | ||
145 | } | ||
146 | static inline unsigned long get_from_ip(char *base) | ||
147 | { | ||
148 | return *(unsigned long *)(base + ds_cfg.from_ip.offset); | ||
149 | } | ||
150 | static inline void set_from_ip(char *base, unsigned long value) | ||
151 | { | ||
152 | (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value; | ||
153 | } | ||
154 | static inline unsigned long get_to_ip(char *base) | ||
155 | { | ||
156 | return *(unsigned long *)(base + ds_cfg.to_ip.offset); | ||
157 | } | ||
158 | static inline void set_to_ip(char *base, unsigned long value) | ||
159 | { | ||
160 | (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value; | ||
161 | } | ||
162 | static inline unsigned char get_info_type(char *base) | ||
163 | { | ||
164 | return *(unsigned char *)(base + ds_cfg.info_type.offset); | ||
165 | } | ||
166 | static inline void set_info_type(char *base, unsigned char value) | ||
167 | { | ||
168 | (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; | ||
169 | } | ||
170 | static inline unsigned long get_info_data(char *base) | ||
171 | { | ||
172 | return *(unsigned long *)(base + ds_cfg.info_data.offset); | ||
173 | } | ||
174 | static inline void set_info_data(char *base, unsigned long value) | ||
175 | { | ||
176 | (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value; | ||
177 | } | ||
178 | |||
179 | |||
180 | int ds_allocate(void **dsp, size_t bts_size_in_bytes) | ||
181 | { | ||
182 | size_t bts_size_in_records; | ||
183 | unsigned long bts; | ||
184 | void *ds; | ||
185 | |||
186 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | ||
187 | return -EOPNOTSUPP; | ||
188 | |||
189 | if (bts_size_in_bytes < 0) | ||
190 | return -EINVAL; | ||
191 | |||
192 | bts_size_in_records = | ||
193 | bts_size_in_bytes / ds_cfg.sizeof_bts; | ||
194 | bts_size_in_bytes = | ||
195 | bts_size_in_records * ds_cfg.sizeof_bts; | ||
196 | |||
197 | if (bts_size_in_bytes <= 0) | ||
198 | return -EINVAL; | ||
199 | |||
200 | bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL); | ||
201 | |||
202 | if (!bts) | ||
203 | return -ENOMEM; | ||
204 | |||
205 | ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); | ||
206 | |||
207 | if (!ds) { | ||
208 | kfree((void *)bts); | ||
209 | return -ENOMEM; | ||
210 | } | ||
211 | |||
212 | set_bts_buffer_base(ds, bts); | ||
213 | set_bts_index(ds, bts); | ||
214 | set_bts_absolute_maximum(ds, bts + bts_size_in_bytes); | ||
215 | set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1); | ||
216 | |||
217 | *dsp = ds; | ||
218 | return 0; | ||
219 | } | ||
220 | |||
221 | int ds_free(void **dsp) | ||
222 | { | ||
223 | if (*dsp) | ||
224 | kfree((void *)get_bts_buffer_base(*dsp)); | ||
225 | kfree(*dsp); | ||
226 | *dsp = NULL; | ||
227 | |||
228 | return 0; | ||
229 | } | ||
230 | |||
231 | int ds_get_bts_size(void *ds) | ||
232 | { | ||
233 | int size_in_bytes; | ||
234 | |||
235 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | ||
236 | return -EOPNOTSUPP; | ||
237 | |||
238 | if (!ds) | ||
239 | return 0; | ||
240 | |||
241 | size_in_bytes = | ||
242 | get_bts_absolute_maximum(ds) - | ||
243 | get_bts_buffer_base(ds); | ||
244 | return size_in_bytes; | ||
245 | } | ||
246 | |||
247 | int ds_get_bts_end(void *ds) | ||
248 | { | ||
249 | int size_in_bytes = ds_get_bts_size(ds); | ||
250 | |||
251 | if (size_in_bytes <= 0) | ||
252 | return size_in_bytes; | ||
253 | |||
254 | return size_in_bytes / ds_cfg.sizeof_bts; | ||
255 | } | ||
256 | |||
257 | int ds_get_bts_index(void *ds) | ||
258 | { | ||
259 | int index_offset_in_bytes; | ||
260 | |||
261 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | ||
262 | return -EOPNOTSUPP; | ||
263 | |||
264 | index_offset_in_bytes = | ||
265 | get_bts_index(ds) - | ||
266 | get_bts_buffer_base(ds); | ||
267 | |||
268 | return index_offset_in_bytes / ds_cfg.sizeof_bts; | ||
269 | } | ||
270 | |||
271 | int ds_set_overflow(void *ds, int method) | ||
272 | { | ||
273 | switch (method) { | ||
274 | case DS_O_SIGNAL: | ||
275 | return -EOPNOTSUPP; | ||
276 | case DS_O_WRAP: | ||
277 | return 0; | ||
278 | default: | ||
279 | return -EINVAL; | ||
280 | } | ||
281 | } | ||
282 | |||
283 | int ds_get_overflow(void *ds) | ||
284 | { | ||
285 | return DS_O_WRAP; | ||
286 | } | ||
287 | |||
288 | int ds_clear(void *ds) | ||
289 | { | ||
290 | int bts_size = ds_get_bts_size(ds); | ||
291 | unsigned long bts_base; | ||
292 | |||
293 | if (bts_size <= 0) | ||
294 | return bts_size; | ||
295 | |||
296 | bts_base = get_bts_buffer_base(ds); | ||
297 | memset((void *)bts_base, 0, bts_size); | ||
298 | |||
299 | set_bts_index(ds, bts_base); | ||
300 | return 0; | ||
301 | } | ||
302 | |||
303 | int ds_read_bts(void *ds, int index, struct bts_struct *out) | ||
304 | { | ||
305 | void *bts; | ||
306 | |||
307 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | ||
308 | return -EOPNOTSUPP; | ||
309 | |||
310 | if (index < 0) | ||
311 | return -EINVAL; | ||
312 | |||
313 | if (index >= ds_get_bts_size(ds)) | ||
314 | return -EINVAL; | ||
315 | |||
316 | bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts)); | ||
317 | |||
318 | memset(out, 0, sizeof(*out)); | ||
319 | if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) { | ||
320 | out->qualifier = get_info_type(bts); | ||
321 | out->variant.jiffies = get_info_data(bts); | ||
322 | } else { | ||
323 | out->qualifier = BTS_BRANCH; | ||
324 | out->variant.lbr.from_ip = get_from_ip(bts); | ||
325 | out->variant.lbr.to_ip = get_to_ip(bts); | ||
326 | } | ||
327 | |||
328 | return sizeof(*out);; | ||
329 | } | ||
330 | |||
331 | int ds_write_bts(void *ds, const struct bts_struct *in) | ||
332 | { | ||
333 | unsigned long bts; | ||
334 | |||
335 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | ||
336 | return -EOPNOTSUPP; | ||
337 | |||
338 | if (ds_get_bts_size(ds) <= 0) | ||
339 | return -ENXIO; | ||
340 | |||
341 | bts = get_bts_index(ds); | ||
342 | |||
343 | memset((void *)bts, 0, ds_cfg.sizeof_bts); | ||
344 | switch (in->qualifier) { | ||
345 | case BTS_INVALID: | ||
346 | break; | ||
347 | |||
348 | case BTS_BRANCH: | ||
349 | set_from_ip((void *)bts, in->variant.lbr.from_ip); | ||
350 | set_to_ip((void *)bts, in->variant.lbr.to_ip); | ||
351 | break; | ||
352 | |||
353 | case BTS_TASK_ARRIVES: | ||
354 | case BTS_TASK_DEPARTS: | ||
355 | set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS); | ||
356 | set_info_type((void *)bts, in->qualifier); | ||
357 | set_info_data((void *)bts, in->variant.jiffies); | ||
358 | break; | ||
359 | |||
360 | default: | ||
361 | return -EINVAL; | ||
362 | } | ||
363 | |||
364 | bts = bts + ds_cfg.sizeof_bts; | ||
365 | if (bts >= get_bts_absolute_maximum(ds)) | ||
366 | bts = get_bts_buffer_base(ds); | ||
367 | set_bts_index(ds, bts); | ||
368 | |||
369 | return ds_cfg.sizeof_bts; | ||
370 | } | ||
371 | |||
372 | unsigned long ds_debugctl_mask(void) | ||
373 | { | ||
374 | return ds_cfg.debugctl_mask; | ||
375 | } | ||
376 | |||
377 | #ifdef __i386__ | ||
378 | static const struct ds_configuration ds_cfg_netburst = { | ||
379 | .sizeof_ds = 9 * 4, | ||
380 | .bts_buffer_base = { 0, 4 }, | ||
381 | .bts_index = { 4, 4 }, | ||
382 | .bts_absolute_maximum = { 8, 4 }, | ||
383 | .bts_interrupt_threshold = { 12, 4 }, | ||
384 | .sizeof_bts = 3 * 4, | ||
385 | .from_ip = { 0, 4 }, | ||
386 | .to_ip = { 4, 4 }, | ||
387 | .info_type = { 4, 1 }, | ||
388 | .info_data = { 8, 4 }, | ||
389 | .debugctl_mask = (1<<2)|(1<<3) | ||
390 | }; | ||
391 | |||
392 | static const struct ds_configuration ds_cfg_pentium_m = { | ||
393 | .sizeof_ds = 9 * 4, | ||
394 | .bts_buffer_base = { 0, 4 }, | ||
395 | .bts_index = { 4, 4 }, | ||
396 | .bts_absolute_maximum = { 8, 4 }, | ||
397 | .bts_interrupt_threshold = { 12, 4 }, | ||
398 | .sizeof_bts = 3 * 4, | ||
399 | .from_ip = { 0, 4 }, | ||
400 | .to_ip = { 4, 4 }, | ||
401 | .info_type = { 4, 1 }, | ||
402 | .info_data = { 8, 4 }, | ||
403 | .debugctl_mask = (1<<6)|(1<<7) | ||
404 | }; | ||
405 | #endif /* _i386_ */ | ||
406 | |||
407 | static const struct ds_configuration ds_cfg_core2 = { | ||
408 | .sizeof_ds = 9 * 8, | ||
409 | .bts_buffer_base = { 0, 8 }, | ||
410 | .bts_index = { 8, 8 }, | ||
411 | .bts_absolute_maximum = { 16, 8 }, | ||
412 | .bts_interrupt_threshold = { 24, 8 }, | ||
413 | .sizeof_bts = 3 * 8, | ||
414 | .from_ip = { 0, 8 }, | ||
415 | .to_ip = { 8, 8 }, | ||
416 | .info_type = { 8, 1 }, | ||
417 | .info_data = { 16, 8 }, | ||
418 | .debugctl_mask = (1<<6)|(1<<7)|(1<<9) | ||
419 | }; | ||
420 | |||
421 | static inline void | ||
422 | ds_configure(const struct ds_configuration *cfg) | ||
423 | { | ||
424 | ds_cfg = *cfg; | ||
425 | } | ||
426 | |||
427 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | ||
428 | { | ||
429 | switch (c->x86) { | ||
430 | case 0x6: | ||
431 | switch (c->x86_model) { | ||
432 | #ifdef __i386__ | ||
433 | case 0xD: | ||
434 | case 0xE: /* Pentium M */ | ||
435 | ds_configure(&ds_cfg_pentium_m); | ||
436 | break; | ||
437 | #endif /* _i386_ */ | ||
438 | case 0xF: /* Core2 */ | ||
439 | ds_configure(&ds_cfg_core2); | ||
440 | break; | ||
441 | default: | ||
442 | /* sorry, don't know about them */ | ||
443 | break; | ||
444 | } | ||
445 | break; | ||
446 | case 0xF: | ||
447 | switch (c->x86_model) { | ||
448 | #ifdef __i386__ | ||
449 | case 0x0: | ||
450 | case 0x1: | ||
451 | case 0x2: /* Netburst */ | ||
452 | ds_configure(&ds_cfg_netburst); | ||
453 | break; | ||
454 | #endif /* _i386_ */ | ||
455 | default: | ||
456 | /* sorry, don't know about them */ | ||
457 | break; | ||
458 | } | ||
459 | break; | ||
460 | default: | ||
461 | /* sorry, don't know about them */ | ||
462 | break; | ||
463 | } | ||
464 | } | ||
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index 18f500d185a2..4e16ef4a2659 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c | |||
@@ -7,7 +7,6 @@ | |||
7 | #include <linux/kexec.h> | 7 | #include <linux/kexec.h> |
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/efi.h> | ||
11 | #include <linux/pfn.h> | 10 | #include <linux/pfn.h> |
12 | #include <linux/uaccess.h> | 11 | #include <linux/uaccess.h> |
13 | #include <linux/suspend.h> | 12 | #include <linux/suspend.h> |
@@ -17,11 +16,6 @@ | |||
17 | #include <asm/e820.h> | 16 | #include <asm/e820.h> |
18 | #include <asm/setup.h> | 17 | #include <asm/setup.h> |
19 | 18 | ||
20 | #ifdef CONFIG_EFI | ||
21 | int efi_enabled = 0; | ||
22 | EXPORT_SYMBOL(efi_enabled); | ||
23 | #endif | ||
24 | |||
25 | struct e820map e820; | 19 | struct e820map e820; |
26 | struct change_member { | 20 | struct change_member { |
27 | struct e820entry *pbios; /* pointer to original bios entry */ | 21 | struct e820entry *pbios; /* pointer to original bios entry */ |
@@ -37,26 +31,6 @@ unsigned long pci_mem_start = 0x10000000; | |||
37 | EXPORT_SYMBOL(pci_mem_start); | 31 | EXPORT_SYMBOL(pci_mem_start); |
38 | #endif | 32 | #endif |
39 | extern int user_defined_memmap; | 33 | extern int user_defined_memmap; |
40 | struct resource data_resource = { | ||
41 | .name = "Kernel data", | ||
42 | .start = 0, | ||
43 | .end = 0, | ||
44 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
45 | }; | ||
46 | |||
47 | struct resource code_resource = { | ||
48 | .name = "Kernel code", | ||
49 | .start = 0, | ||
50 | .end = 0, | ||
51 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
52 | }; | ||
53 | |||
54 | struct resource bss_resource = { | ||
55 | .name = "Kernel bss", | ||
56 | .start = 0, | ||
57 | .end = 0, | ||
58 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
59 | }; | ||
60 | 34 | ||
61 | static struct resource system_rom_resource = { | 35 | static struct resource system_rom_resource = { |
62 | .name = "System ROM", | 36 | .name = "System ROM", |
@@ -111,60 +85,6 @@ static struct resource video_rom_resource = { | |||
111 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | 85 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
112 | }; | 86 | }; |
113 | 87 | ||
114 | static struct resource video_ram_resource = { | ||
115 | .name = "Video RAM area", | ||
116 | .start = 0xa0000, | ||
117 | .end = 0xbffff, | ||
118 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
119 | }; | ||
120 | |||
121 | static struct resource standard_io_resources[] = { { | ||
122 | .name = "dma1", | ||
123 | .start = 0x0000, | ||
124 | .end = 0x001f, | ||
125 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
126 | }, { | ||
127 | .name = "pic1", | ||
128 | .start = 0x0020, | ||
129 | .end = 0x0021, | ||
130 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
131 | }, { | ||
132 | .name = "timer0", | ||
133 | .start = 0x0040, | ||
134 | .end = 0x0043, | ||
135 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
136 | }, { | ||
137 | .name = "timer1", | ||
138 | .start = 0x0050, | ||
139 | .end = 0x0053, | ||
140 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
141 | }, { | ||
142 | .name = "keyboard", | ||
143 | .start = 0x0060, | ||
144 | .end = 0x006f, | ||
145 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
146 | }, { | ||
147 | .name = "dma page reg", | ||
148 | .start = 0x0080, | ||
149 | .end = 0x008f, | ||
150 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
151 | }, { | ||
152 | .name = "pic2", | ||
153 | .start = 0x00a0, | ||
154 | .end = 0x00a1, | ||
155 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
156 | }, { | ||
157 | .name = "dma2", | ||
158 | .start = 0x00c0, | ||
159 | .end = 0x00df, | ||
160 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
161 | }, { | ||
162 | .name = "fpu", | ||
163 | .start = 0x00f0, | ||
164 | .end = 0x00ff, | ||
165 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
166 | } }; | ||
167 | |||
168 | #define ROMSIGNATURE 0xaa55 | 88 | #define ROMSIGNATURE 0xaa55 |
169 | 89 | ||
170 | static int __init romsignature(const unsigned char *rom) | 90 | static int __init romsignature(const unsigned char *rom) |
@@ -260,10 +180,9 @@ static void __init probe_roms(void) | |||
260 | * Request address space for all standard RAM and ROM resources | 180 | * Request address space for all standard RAM and ROM resources |
261 | * and also for regions reported as reserved by the e820. | 181 | * and also for regions reported as reserved by the e820. |
262 | */ | 182 | */ |
263 | static void __init | 183 | void __init init_iomem_resources(struct resource *code_resource, |
264 | legacy_init_iomem_resources(struct resource *code_resource, | 184 | struct resource *data_resource, |
265 | struct resource *data_resource, | 185 | struct resource *bss_resource) |
266 | struct resource *bss_resource) | ||
267 | { | 186 | { |
268 | int i; | 187 | int i; |
269 | 188 | ||
@@ -305,35 +224,6 @@ legacy_init_iomem_resources(struct resource *code_resource, | |||
305 | } | 224 | } |
306 | } | 225 | } |
307 | 226 | ||
308 | /* | ||
309 | * Request address space for all standard resources | ||
310 | * | ||
311 | * This is called just before pcibios_init(), which is also a | ||
312 | * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). | ||
313 | */ | ||
314 | static int __init request_standard_resources(void) | ||
315 | { | ||
316 | int i; | ||
317 | |||
318 | printk("Setting up standard PCI resources\n"); | ||
319 | if (efi_enabled) | ||
320 | efi_initialize_iomem_resources(&code_resource, | ||
321 | &data_resource, &bss_resource); | ||
322 | else | ||
323 | legacy_init_iomem_resources(&code_resource, | ||
324 | &data_resource, &bss_resource); | ||
325 | |||
326 | /* EFI systems may still have VGA */ | ||
327 | request_resource(&iomem_resource, &video_ram_resource); | ||
328 | |||
329 | /* request I/O space for devices used on all i[345]86 PCs */ | ||
330 | for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) | ||
331 | request_resource(&ioport_resource, &standard_io_resources[i]); | ||
332 | return 0; | ||
333 | } | ||
334 | |||
335 | subsys_initcall(request_standard_resources); | ||
336 | |||
337 | #if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) | 227 | #if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) |
338 | /** | 228 | /** |
339 | * e820_mark_nosave_regions - Find the ranges of physical addresses that do not | 229 | * e820_mark_nosave_regions - Find the ranges of physical addresses that do not |
@@ -370,19 +260,17 @@ void __init add_memory_region(unsigned long long start, | |||
370 | { | 260 | { |
371 | int x; | 261 | int x; |
372 | 262 | ||
373 | if (!efi_enabled) { | 263 | x = e820.nr_map; |
374 | x = e820.nr_map; | ||
375 | |||
376 | if (x == E820MAX) { | ||
377 | printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); | ||
378 | return; | ||
379 | } | ||
380 | 264 | ||
381 | e820.map[x].addr = start; | 265 | if (x == E820MAX) { |
382 | e820.map[x].size = size; | 266 | printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); |
383 | e820.map[x].type = type; | 267 | return; |
384 | e820.nr_map++; | ||
385 | } | 268 | } |
269 | |||
270 | e820.map[x].addr = start; | ||
271 | e820.map[x].size = size; | ||
272 | e820.map[x].type = type; | ||
273 | e820.nr_map++; | ||
386 | } /* add_memory_region */ | 274 | } /* add_memory_region */ |
387 | 275 | ||
388 | /* | 276 | /* |
@@ -598,29 +486,6 @@ int __init copy_e820_map(struct e820entry * biosmap, int nr_map) | |||
598 | } | 486 | } |
599 | 487 | ||
600 | /* | 488 | /* |
601 | * Callback for efi_memory_walk. | ||
602 | */ | ||
603 | static int __init | ||
604 | efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) | ||
605 | { | ||
606 | unsigned long *max_pfn = arg, pfn; | ||
607 | |||
608 | if (start < end) { | ||
609 | pfn = PFN_UP(end -1); | ||
610 | if (pfn > *max_pfn) | ||
611 | *max_pfn = pfn; | ||
612 | } | ||
613 | return 0; | ||
614 | } | ||
615 | |||
616 | static int __init | ||
617 | efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) | ||
618 | { | ||
619 | memory_present(0, PFN_UP(start), PFN_DOWN(end)); | ||
620 | return 0; | ||
621 | } | ||
622 | |||
623 | /* | ||
624 | * Find the highest page frame number we have available | 489 | * Find the highest page frame number we have available |
625 | */ | 490 | */ |
626 | void __init find_max_pfn(void) | 491 | void __init find_max_pfn(void) |
@@ -628,11 +493,6 @@ void __init find_max_pfn(void) | |||
628 | int i; | 493 | int i; |
629 | 494 | ||
630 | max_pfn = 0; | 495 | max_pfn = 0; |
631 | if (efi_enabled) { | ||
632 | efi_memmap_walk(efi_find_max_pfn, &max_pfn); | ||
633 | efi_memmap_walk(efi_memory_present_wrapper, NULL); | ||
634 | return; | ||
635 | } | ||
636 | 496 | ||
637 | for (i = 0; i < e820.nr_map; i++) { | 497 | for (i = 0; i < e820.nr_map; i++) { |
638 | unsigned long start, end; | 498 | unsigned long start, end; |
@@ -650,34 +510,12 @@ void __init find_max_pfn(void) | |||
650 | } | 510 | } |
651 | 511 | ||
652 | /* | 512 | /* |
653 | * Free all available memory for boot time allocation. Used | ||
654 | * as a callback function by efi_memory_walk() | ||
655 | */ | ||
656 | |||
657 | static int __init | ||
658 | free_available_memory(unsigned long start, unsigned long end, void *arg) | ||
659 | { | ||
660 | /* check max_low_pfn */ | ||
661 | if (start >= (max_low_pfn << PAGE_SHIFT)) | ||
662 | return 0; | ||
663 | if (end >= (max_low_pfn << PAGE_SHIFT)) | ||
664 | end = max_low_pfn << PAGE_SHIFT; | ||
665 | if (start < end) | ||
666 | free_bootmem(start, end - start); | ||
667 | |||
668 | return 0; | ||
669 | } | ||
670 | /* | ||
671 | * Register fully available low RAM pages with the bootmem allocator. | 513 | * Register fully available low RAM pages with the bootmem allocator. |
672 | */ | 514 | */ |
673 | void __init register_bootmem_low_pages(unsigned long max_low_pfn) | 515 | void __init register_bootmem_low_pages(unsigned long max_low_pfn) |
674 | { | 516 | { |
675 | int i; | 517 | int i; |
676 | 518 | ||
677 | if (efi_enabled) { | ||
678 | efi_memmap_walk(free_available_memory, NULL); | ||
679 | return; | ||
680 | } | ||
681 | for (i = 0; i < e820.nr_map; i++) { | 519 | for (i = 0; i < e820.nr_map; i++) { |
682 | unsigned long curr_pfn, last_pfn, size; | 520 | unsigned long curr_pfn, last_pfn, size; |
683 | /* | 521 | /* |
@@ -785,56 +623,12 @@ void __init print_memory_map(char *who) | |||
785 | } | 623 | } |
786 | } | 624 | } |
787 | 625 | ||
788 | static __init __always_inline void efi_limit_regions(unsigned long long size) | ||
789 | { | ||
790 | unsigned long long current_addr = 0; | ||
791 | efi_memory_desc_t *md, *next_md; | ||
792 | void *p, *p1; | ||
793 | int i, j; | ||
794 | |||
795 | j = 0; | ||
796 | p1 = memmap.map; | ||
797 | for (p = p1, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) { | ||
798 | md = p; | ||
799 | next_md = p1; | ||
800 | current_addr = md->phys_addr + | ||
801 | PFN_PHYS(md->num_pages); | ||
802 | if (is_available_memory(md)) { | ||
803 | if (md->phys_addr >= size) continue; | ||
804 | memcpy(next_md, md, memmap.desc_size); | ||
805 | if (current_addr >= size) { | ||
806 | next_md->num_pages -= | ||
807 | PFN_UP(current_addr-size); | ||
808 | } | ||
809 | p1 += memmap.desc_size; | ||
810 | next_md = p1; | ||
811 | j++; | ||
812 | } else if ((md->attribute & EFI_MEMORY_RUNTIME) == | ||
813 | EFI_MEMORY_RUNTIME) { | ||
814 | /* In order to make runtime services | ||
815 | * available we have to include runtime | ||
816 | * memory regions in memory map */ | ||
817 | memcpy(next_md, md, memmap.desc_size); | ||
818 | p1 += memmap.desc_size; | ||
819 | next_md = p1; | ||
820 | j++; | ||
821 | } | ||
822 | } | ||
823 | memmap.nr_map = j; | ||
824 | memmap.map_end = memmap.map + | ||
825 | (memmap.nr_map * memmap.desc_size); | ||
826 | } | ||
827 | |||
828 | void __init limit_regions(unsigned long long size) | 626 | void __init limit_regions(unsigned long long size) |
829 | { | 627 | { |
830 | unsigned long long current_addr; | 628 | unsigned long long current_addr; |
831 | int i; | 629 | int i; |
832 | 630 | ||
833 | print_memory_map("limit_regions start"); | 631 | print_memory_map("limit_regions start"); |
834 | if (efi_enabled) { | ||
835 | efi_limit_regions(size); | ||
836 | return; | ||
837 | } | ||
838 | for (i = 0; i < e820.nr_map; i++) { | 632 | for (i = 0; i < e820.nr_map; i++) { |
839 | current_addr = e820.map[i].addr + e820.map[i].size; | 633 | current_addr = e820.map[i].addr + e820.map[i].size; |
840 | if (current_addr < size) | 634 | if (current_addr < size) |
@@ -955,3 +749,14 @@ static int __init parse_memmap(char *arg) | |||
955 | return 0; | 749 | return 0; |
956 | } | 750 | } |
957 | early_param("memmap", parse_memmap); | 751 | early_param("memmap", parse_memmap); |
752 | void __init update_e820(void) | ||
753 | { | ||
754 | u8 nr_map; | ||
755 | |||
756 | nr_map = e820.nr_map; | ||
757 | if (sanitize_e820_map(e820.map, &nr_map)) | ||
758 | return; | ||
759 | e820.nr_map = nr_map; | ||
760 | printk(KERN_INFO "modified physical RAM map:\n"); | ||
761 | print_memory_map("modified"); | ||
762 | } | ||
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 04698e0b056c..9f65b4cc323c 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* | 1 | /* |
2 | * Handle the memory map. | 2 | * Handle the memory map. |
3 | * The functions here do the job until bootmem takes over. | 3 | * The functions here do the job until bootmem takes over. |
4 | * | 4 | * |
@@ -26,80 +26,92 @@ | |||
26 | #include <asm/proto.h> | 26 | #include <asm/proto.h> |
27 | #include <asm/setup.h> | 27 | #include <asm/setup.h> |
28 | #include <asm/sections.h> | 28 | #include <asm/sections.h> |
29 | #include <asm/kdebug.h> | ||
29 | 30 | ||
30 | struct e820map e820; | 31 | struct e820map e820; |
31 | 32 | ||
32 | /* | 33 | /* |
33 | * PFN of last memory page. | 34 | * PFN of last memory page. |
34 | */ | 35 | */ |
35 | unsigned long end_pfn; | 36 | unsigned long end_pfn; |
36 | EXPORT_SYMBOL(end_pfn); | ||
37 | 37 | ||
38 | /* | 38 | /* |
39 | * end_pfn only includes RAM, while end_pfn_map includes all e820 entries. | 39 | * end_pfn only includes RAM, while end_pfn_map includes all e820 entries. |
40 | * The direct mapping extends to end_pfn_map, so that we can directly access | 40 | * The direct mapping extends to end_pfn_map, so that we can directly access |
41 | * apertures, ACPI and other tables without having to play with fixmaps. | 41 | * apertures, ACPI and other tables without having to play with fixmaps. |
42 | */ | 42 | */ |
43 | unsigned long end_pfn_map; | 43 | unsigned long end_pfn_map; |
44 | 44 | ||
45 | /* | 45 | /* |
46 | * Last pfn which the user wants to use. | 46 | * Last pfn which the user wants to use. |
47 | */ | 47 | */ |
48 | static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; | 48 | static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; |
49 | 49 | ||
50 | extern struct resource code_resource, data_resource, bss_resource; | 50 | /* |
51 | 51 | * Early reserved memory areas. | |
52 | /* Check for some hardcoded bad areas that early boot is not allowed to touch */ | 52 | */ |
53 | static inline int bad_addr(unsigned long *addrp, unsigned long size) | 53 | #define MAX_EARLY_RES 20 |
54 | { | 54 | |
55 | unsigned long addr = *addrp, last = addr + size; | 55 | struct early_res { |
56 | 56 | unsigned long start, end; | |
57 | /* various gunk below that needed for SMP startup */ | 57 | char name[16]; |
58 | if (addr < 0x8000) { | 58 | }; |
59 | *addrp = PAGE_ALIGN(0x8000); | 59 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { |
60 | return 1; | 60 | { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ |
61 | } | 61 | #ifdef CONFIG_SMP |
62 | 62 | { SMP_TRAMPOLINE_BASE, SMP_TRAMPOLINE_BASE + 2*PAGE_SIZE, "SMP_TRAMPOLINE" }, | |
63 | /* direct mapping tables of the kernel */ | ||
64 | if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) { | ||
65 | *addrp = PAGE_ALIGN(table_end << PAGE_SHIFT); | ||
66 | return 1; | ||
67 | } | ||
68 | |||
69 | /* initrd */ | ||
70 | #ifdef CONFIG_BLK_DEV_INITRD | ||
71 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | ||
72 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | ||
73 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | ||
74 | unsigned long ramdisk_end = ramdisk_image+ramdisk_size; | ||
75 | |||
76 | if (last >= ramdisk_image && addr < ramdisk_end) { | ||
77 | *addrp = PAGE_ALIGN(ramdisk_end); | ||
78 | return 1; | ||
79 | } | ||
80 | } | ||
81 | #endif | 63 | #endif |
82 | /* kernel code */ | 64 | {} |
83 | if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) { | 65 | }; |
84 | *addrp = PAGE_ALIGN(__pa_symbol(&_end)); | 66 | |
85 | return 1; | 67 | void __init reserve_early(unsigned long start, unsigned long end, char *name) |
68 | { | ||
69 | int i; | ||
70 | struct early_res *r; | ||
71 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
72 | r = &early_res[i]; | ||
73 | if (end > r->start && start < r->end) | ||
74 | panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n", | ||
75 | start, end - 1, name?name:"", r->start, r->end - 1, r->name); | ||
86 | } | 76 | } |
77 | if (i >= MAX_EARLY_RES) | ||
78 | panic("Too many early reservations"); | ||
79 | r = &early_res[i]; | ||
80 | r->start = start; | ||
81 | r->end = end; | ||
82 | if (name) | ||
83 | strncpy(r->name, name, sizeof(r->name) - 1); | ||
84 | } | ||
87 | 85 | ||
88 | if (last >= ebda_addr && addr < ebda_addr + ebda_size) { | 86 | void __init early_res_to_bootmem(void) |
89 | *addrp = PAGE_ALIGN(ebda_addr + ebda_size); | 87 | { |
90 | return 1; | 88 | int i; |
89 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
90 | struct early_res *r = &early_res[i]; | ||
91 | printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i, | ||
92 | r->start, r->end - 1, r->name); | ||
93 | reserve_bootmem_generic(r->start, r->end - r->start); | ||
91 | } | 94 | } |
95 | } | ||
92 | 96 | ||
93 | #ifdef CONFIG_NUMA | 97 | /* Check for already reserved areas */ |
94 | /* NUMA memory to node map */ | 98 | static inline int bad_addr(unsigned long *addrp, unsigned long size) |
95 | if (last >= nodemap_addr && addr < nodemap_addr + nodemap_size) { | 99 | { |
96 | *addrp = nodemap_addr + nodemap_size; | 100 | int i; |
97 | return 1; | 101 | unsigned long addr = *addrp, last; |
102 | int changed = 0; | ||
103 | again: | ||
104 | last = addr + size; | ||
105 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
106 | struct early_res *r = &early_res[i]; | ||
107 | if (last >= r->start && addr < r->end) { | ||
108 | *addrp = addr = r->end; | ||
109 | changed = 1; | ||
110 | goto again; | ||
111 | } | ||
98 | } | 112 | } |
99 | #endif | 113 | return changed; |
100 | /* XXX ramdisk image here? */ | 114 | } |
101 | return 0; | ||
102 | } | ||
103 | 115 | ||
104 | /* | 116 | /* |
105 | * This function checks if any part of the range <start,end> is mapped | 117 | * This function checks if any part of the range <start,end> is mapped |
@@ -107,16 +119,18 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size) | |||
107 | */ | 119 | */ |
108 | int | 120 | int |
109 | e820_any_mapped(unsigned long start, unsigned long end, unsigned type) | 121 | e820_any_mapped(unsigned long start, unsigned long end, unsigned type) |
110 | { | 122 | { |
111 | int i; | 123 | int i; |
112 | for (i = 0; i < e820.nr_map; i++) { | 124 | |
113 | struct e820entry *ei = &e820.map[i]; | 125 | for (i = 0; i < e820.nr_map; i++) { |
114 | if (type && ei->type != type) | 126 | struct e820entry *ei = &e820.map[i]; |
127 | |||
128 | if (type && ei->type != type) | ||
115 | continue; | 129 | continue; |
116 | if (ei->addr >= end || ei->addr + ei->size <= start) | 130 | if (ei->addr >= end || ei->addr + ei->size <= start) |
117 | continue; | 131 | continue; |
118 | return 1; | 132 | return 1; |
119 | } | 133 | } |
120 | return 0; | 134 | return 0; |
121 | } | 135 | } |
122 | EXPORT_SYMBOL_GPL(e820_any_mapped); | 136 | EXPORT_SYMBOL_GPL(e820_any_mapped); |
@@ -127,11 +141,14 @@ EXPORT_SYMBOL_GPL(e820_any_mapped); | |||
127 | * Note: this function only works correct if the e820 table is sorted and | 141 | * Note: this function only works correct if the e820 table is sorted and |
128 | * not-overlapping, which is the case | 142 | * not-overlapping, which is the case |
129 | */ | 143 | */ |
130 | int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type) | 144 | int __init e820_all_mapped(unsigned long start, unsigned long end, |
145 | unsigned type) | ||
131 | { | 146 | { |
132 | int i; | 147 | int i; |
148 | |||
133 | for (i = 0; i < e820.nr_map; i++) { | 149 | for (i = 0; i < e820.nr_map; i++) { |
134 | struct e820entry *ei = &e820.map[i]; | 150 | struct e820entry *ei = &e820.map[i]; |
151 | |||
135 | if (type && ei->type != type) | 152 | if (type && ei->type != type) |
136 | continue; | 153 | continue; |
137 | /* is the region (part) in overlap with the current region ?*/ | 154 | /* is the region (part) in overlap with the current region ?*/ |
@@ -143,65 +160,75 @@ int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type | |||
143 | */ | 160 | */ |
144 | if (ei->addr <= start) | 161 | if (ei->addr <= start) |
145 | start = ei->addr + ei->size; | 162 | start = ei->addr + ei->size; |
146 | /* if start is now at or beyond end, we're done, full coverage */ | 163 | /* |
164 | * if start is now at or beyond end, we're done, full | ||
165 | * coverage | ||
166 | */ | ||
147 | if (start >= end) | 167 | if (start >= end) |
148 | return 1; /* we're done */ | 168 | return 1; |
149 | } | 169 | } |
150 | return 0; | 170 | return 0; |
151 | } | 171 | } |
152 | 172 | ||
153 | /* | 173 | /* |
154 | * Find a free area in a specific range. | 174 | * Find a free area with specified alignment in a specific range. |
155 | */ | 175 | */ |
156 | unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsigned size) | 176 | unsigned long __init find_e820_area(unsigned long start, unsigned long end, |
157 | { | 177 | unsigned size, unsigned long align) |
158 | int i; | 178 | { |
159 | for (i = 0; i < e820.nr_map; i++) { | 179 | int i; |
160 | struct e820entry *ei = &e820.map[i]; | 180 | unsigned long mask = ~(align - 1); |
161 | unsigned long addr = ei->addr, last; | 181 | |
162 | if (ei->type != E820_RAM) | 182 | for (i = 0; i < e820.nr_map; i++) { |
163 | continue; | 183 | struct e820entry *ei = &e820.map[i]; |
164 | if (addr < start) | 184 | unsigned long addr = ei->addr, last; |
185 | |||
186 | if (ei->type != E820_RAM) | ||
187 | continue; | ||
188 | if (addr < start) | ||
165 | addr = start; | 189 | addr = start; |
166 | if (addr > ei->addr + ei->size) | 190 | if (addr > ei->addr + ei->size) |
167 | continue; | 191 | continue; |
168 | while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size) | 192 | while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size) |
169 | ; | 193 | ; |
170 | last = PAGE_ALIGN(addr) + size; | 194 | addr = (addr + align - 1) & mask; |
195 | last = addr + size; | ||
171 | if (last > ei->addr + ei->size) | 196 | if (last > ei->addr + ei->size) |
172 | continue; | 197 | continue; |
173 | if (last > end) | 198 | if (last > end) |
174 | continue; | 199 | continue; |
175 | return addr; | 200 | return addr; |
176 | } | 201 | } |
177 | return -1UL; | 202 | return -1UL; |
178 | } | 203 | } |
179 | 204 | ||
180 | /* | 205 | /* |
181 | * Find the highest page frame number we have available | 206 | * Find the highest page frame number we have available |
182 | */ | 207 | */ |
183 | unsigned long __init e820_end_of_ram(void) | 208 | unsigned long __init e820_end_of_ram(void) |
184 | { | 209 | { |
185 | unsigned long end_pfn = 0; | 210 | unsigned long end_pfn; |
211 | |||
186 | end_pfn = find_max_pfn_with_active_regions(); | 212 | end_pfn = find_max_pfn_with_active_regions(); |
187 | 213 | ||
188 | if (end_pfn > end_pfn_map) | 214 | if (end_pfn > end_pfn_map) |
189 | end_pfn_map = end_pfn; | 215 | end_pfn_map = end_pfn; |
190 | if (end_pfn_map > MAXMEM>>PAGE_SHIFT) | 216 | if (end_pfn_map > MAXMEM>>PAGE_SHIFT) |
191 | end_pfn_map = MAXMEM>>PAGE_SHIFT; | 217 | end_pfn_map = MAXMEM>>PAGE_SHIFT; |
192 | if (end_pfn > end_user_pfn) | 218 | if (end_pfn > end_user_pfn) |
193 | end_pfn = end_user_pfn; | 219 | end_pfn = end_user_pfn; |
194 | if (end_pfn > end_pfn_map) | 220 | if (end_pfn > end_pfn_map) |
195 | end_pfn = end_pfn_map; | 221 | end_pfn = end_pfn_map; |
196 | 222 | ||
197 | printk("end_pfn_map = %lu\n", end_pfn_map); | 223 | printk(KERN_INFO "end_pfn_map = %lu\n", end_pfn_map); |
198 | return end_pfn; | 224 | return end_pfn; |
199 | } | 225 | } |
200 | 226 | ||
201 | /* | 227 | /* |
202 | * Mark e820 reserved areas as busy for the resource manager. | 228 | * Mark e820 reserved areas as busy for the resource manager. |
203 | */ | 229 | */ |
204 | void __init e820_reserve_resources(void) | 230 | void __init e820_reserve_resources(struct resource *code_resource, |
231 | struct resource *data_resource, struct resource *bss_resource) | ||
205 | { | 232 | { |
206 | int i; | 233 | int i; |
207 | for (i = 0; i < e820.nr_map; i++) { | 234 | for (i = 0; i < e820.nr_map; i++) { |
@@ -219,13 +246,13 @@ void __init e820_reserve_resources(void) | |||
219 | request_resource(&iomem_resource, res); | 246 | request_resource(&iomem_resource, res); |
220 | if (e820.map[i].type == E820_RAM) { | 247 | if (e820.map[i].type == E820_RAM) { |
221 | /* | 248 | /* |
222 | * We don't know which RAM region contains kernel data, | 249 | * We don't know which RAM region contains kernel data, |
223 | * so we try it repeatedly and let the resource manager | 250 | * so we try it repeatedly and let the resource manager |
224 | * test it. | 251 | * test it. |
225 | */ | 252 | */ |
226 | request_resource(res, &code_resource); | 253 | request_resource(res, code_resource); |
227 | request_resource(res, &data_resource); | 254 | request_resource(res, data_resource); |
228 | request_resource(res, &bss_resource); | 255 | request_resource(res, bss_resource); |
229 | #ifdef CONFIG_KEXEC | 256 | #ifdef CONFIG_KEXEC |
230 | if (crashk_res.start != crashk_res.end) | 257 | if (crashk_res.start != crashk_res.end) |
231 | request_resource(res, &crashk_res); | 258 | request_resource(res, &crashk_res); |
@@ -322,9 +349,9 @@ e820_register_active_regions(int nid, unsigned long start_pfn, | |||
322 | add_active_range(nid, ei_startpfn, ei_endpfn); | 349 | add_active_range(nid, ei_startpfn, ei_endpfn); |
323 | } | 350 | } |
324 | 351 | ||
325 | /* | 352 | /* |
326 | * Add a memory region to the kernel e820 map. | 353 | * Add a memory region to the kernel e820 map. |
327 | */ | 354 | */ |
328 | void __init add_memory_region(unsigned long start, unsigned long size, int type) | 355 | void __init add_memory_region(unsigned long start, unsigned long size, int type) |
329 | { | 356 | { |
330 | int x = e820.nr_map; | 357 | int x = e820.nr_map; |
@@ -349,9 +376,7 @@ unsigned long __init e820_hole_size(unsigned long start, unsigned long end) | |||
349 | { | 376 | { |
350 | unsigned long start_pfn = start >> PAGE_SHIFT; | 377 | unsigned long start_pfn = start >> PAGE_SHIFT; |
351 | unsigned long end_pfn = end >> PAGE_SHIFT; | 378 | unsigned long end_pfn = end >> PAGE_SHIFT; |
352 | unsigned long ei_startpfn; | 379 | unsigned long ei_startpfn, ei_endpfn, ram = 0; |
353 | unsigned long ei_endpfn; | ||
354 | unsigned long ram = 0; | ||
355 | int i; | 380 | int i; |
356 | 381 | ||
357 | for (i = 0; i < e820.nr_map; i++) { | 382 | for (i = 0; i < e820.nr_map; i++) { |
@@ -363,28 +388,31 @@ unsigned long __init e820_hole_size(unsigned long start, unsigned long end) | |||
363 | return end - start - (ram << PAGE_SHIFT); | 388 | return end - start - (ram << PAGE_SHIFT); |
364 | } | 389 | } |
365 | 390 | ||
366 | void __init e820_print_map(char *who) | 391 | static void __init e820_print_map(char *who) |
367 | { | 392 | { |
368 | int i; | 393 | int i; |
369 | 394 | ||
370 | for (i = 0; i < e820.nr_map; i++) { | 395 | for (i = 0; i < e820.nr_map; i++) { |
371 | printk(KERN_INFO " %s: %016Lx - %016Lx ", who, | 396 | printk(KERN_INFO " %s: %016Lx - %016Lx ", who, |
372 | (unsigned long long) e820.map[i].addr, | 397 | (unsigned long long) e820.map[i].addr, |
373 | (unsigned long long) (e820.map[i].addr + e820.map[i].size)); | 398 | (unsigned long long) |
399 | (e820.map[i].addr + e820.map[i].size)); | ||
374 | switch (e820.map[i].type) { | 400 | switch (e820.map[i].type) { |
375 | case E820_RAM: printk("(usable)\n"); | 401 | case E820_RAM: |
376 | break; | 402 | printk(KERN_CONT "(usable)\n"); |
403 | break; | ||
377 | case E820_RESERVED: | 404 | case E820_RESERVED: |
378 | printk("(reserved)\n"); | 405 | printk(KERN_CONT "(reserved)\n"); |
379 | break; | 406 | break; |
380 | case E820_ACPI: | 407 | case E820_ACPI: |
381 | printk("(ACPI data)\n"); | 408 | printk(KERN_CONT "(ACPI data)\n"); |
382 | break; | 409 | break; |
383 | case E820_NVS: | 410 | case E820_NVS: |
384 | printk("(ACPI NVS)\n"); | 411 | printk(KERN_CONT "(ACPI NVS)\n"); |
385 | break; | 412 | break; |
386 | default: printk("type %u\n", e820.map[i].type); | 413 | default: |
387 | break; | 414 | printk(KERN_CONT "type %u\n", e820.map[i].type); |
415 | break; | ||
388 | } | 416 | } |
389 | } | 417 | } |
390 | } | 418 | } |
@@ -392,11 +420,11 @@ void __init e820_print_map(char *who) | |||
392 | /* | 420 | /* |
393 | * Sanitize the BIOS e820 map. | 421 | * Sanitize the BIOS e820 map. |
394 | * | 422 | * |
395 | * Some e820 responses include overlapping entries. The following | 423 | * Some e820 responses include overlapping entries. The following |
396 | * replaces the original e820 map with a new one, removing overlaps. | 424 | * replaces the original e820 map with a new one, removing overlaps. |
397 | * | 425 | * |
398 | */ | 426 | */ |
399 | static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | 427 | static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) |
400 | { | 428 | { |
401 | struct change_member { | 429 | struct change_member { |
402 | struct e820entry *pbios; /* pointer to original bios entry */ | 430 | struct e820entry *pbios; /* pointer to original bios entry */ |
@@ -416,7 +444,8 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | |||
416 | int i; | 444 | int i; |
417 | 445 | ||
418 | /* | 446 | /* |
419 | Visually we're performing the following (1,2,3,4 = memory types)... | 447 | Visually we're performing the following |
448 | (1,2,3,4 = memory types)... | ||
420 | 449 | ||
421 | Sample memory map (w/overlaps): | 450 | Sample memory map (w/overlaps): |
422 | ____22__________________ | 451 | ____22__________________ |
@@ -458,22 +487,23 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | |||
458 | old_nr = *pnr_map; | 487 | old_nr = *pnr_map; |
459 | 488 | ||
460 | /* bail out if we find any unreasonable addresses in bios map */ | 489 | /* bail out if we find any unreasonable addresses in bios map */ |
461 | for (i=0; i<old_nr; i++) | 490 | for (i = 0; i < old_nr; i++) |
462 | if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) | 491 | if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) |
463 | return -1; | 492 | return -1; |
464 | 493 | ||
465 | /* create pointers for initial change-point information (for sorting) */ | 494 | /* create pointers for initial change-point information (for sorting) */ |
466 | for (i=0; i < 2*old_nr; i++) | 495 | for (i = 0; i < 2 * old_nr; i++) |
467 | change_point[i] = &change_point_list[i]; | 496 | change_point[i] = &change_point_list[i]; |
468 | 497 | ||
469 | /* record all known change-points (starting and ending addresses), | 498 | /* record all known change-points (starting and ending addresses), |
470 | omitting those that are for empty memory regions */ | 499 | omitting those that are for empty memory regions */ |
471 | chgidx = 0; | 500 | chgidx = 0; |
472 | for (i=0; i < old_nr; i++) { | 501 | for (i = 0; i < old_nr; i++) { |
473 | if (biosmap[i].size != 0) { | 502 | if (biosmap[i].size != 0) { |
474 | change_point[chgidx]->addr = biosmap[i].addr; | 503 | change_point[chgidx]->addr = biosmap[i].addr; |
475 | change_point[chgidx++]->pbios = &biosmap[i]; | 504 | change_point[chgidx++]->pbios = &biosmap[i]; |
476 | change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; | 505 | change_point[chgidx]->addr = biosmap[i].addr + |
506 | biosmap[i].size; | ||
477 | change_point[chgidx++]->pbios = &biosmap[i]; | 507 | change_point[chgidx++]->pbios = &biosmap[i]; |
478 | } | 508 | } |
479 | } | 509 | } |
@@ -483,75 +513,106 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | |||
483 | still_changing = 1; | 513 | still_changing = 1; |
484 | while (still_changing) { | 514 | while (still_changing) { |
485 | still_changing = 0; | 515 | still_changing = 0; |
486 | for (i=1; i < chg_nr; i++) { | 516 | for (i = 1; i < chg_nr; i++) { |
487 | /* if <current_addr> > <last_addr>, swap */ | 517 | unsigned long long curaddr, lastaddr; |
488 | /* or, if current=<start_addr> & last=<end_addr>, swap */ | 518 | unsigned long long curpbaddr, lastpbaddr; |
489 | if ((change_point[i]->addr < change_point[i-1]->addr) || | 519 | |
490 | ((change_point[i]->addr == change_point[i-1]->addr) && | 520 | curaddr = change_point[i]->addr; |
491 | (change_point[i]->addr == change_point[i]->pbios->addr) && | 521 | lastaddr = change_point[i - 1]->addr; |
492 | (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) | 522 | curpbaddr = change_point[i]->pbios->addr; |
493 | ) | 523 | lastpbaddr = change_point[i - 1]->pbios->addr; |
494 | { | 524 | |
525 | /* | ||
526 | * swap entries, when: | ||
527 | * | ||
528 | * curaddr > lastaddr or | ||
529 | * curaddr == lastaddr and curaddr == curpbaddr and | ||
530 | * lastaddr != lastpbaddr | ||
531 | */ | ||
532 | if (curaddr < lastaddr || | ||
533 | (curaddr == lastaddr && curaddr == curpbaddr && | ||
534 | lastaddr != lastpbaddr)) { | ||
495 | change_tmp = change_point[i]; | 535 | change_tmp = change_point[i]; |
496 | change_point[i] = change_point[i-1]; | 536 | change_point[i] = change_point[i-1]; |
497 | change_point[i-1] = change_tmp; | 537 | change_point[i-1] = change_tmp; |
498 | still_changing=1; | 538 | still_changing = 1; |
499 | } | 539 | } |
500 | } | 540 | } |
501 | } | 541 | } |
502 | 542 | ||
503 | /* create a new bios memory map, removing overlaps */ | 543 | /* create a new bios memory map, removing overlaps */ |
504 | overlap_entries=0; /* number of entries in the overlap table */ | 544 | overlap_entries = 0; /* number of entries in the overlap table */ |
505 | new_bios_entry=0; /* index for creating new bios map entries */ | 545 | new_bios_entry = 0; /* index for creating new bios map entries */ |
506 | last_type = 0; /* start with undefined memory type */ | 546 | last_type = 0; /* start with undefined memory type */ |
507 | last_addr = 0; /* start with 0 as last starting address */ | 547 | last_addr = 0; /* start with 0 as last starting address */ |
548 | |||
508 | /* loop through change-points, determining affect on the new bios map */ | 549 | /* loop through change-points, determining affect on the new bios map */ |
509 | for (chgidx=0; chgidx < chg_nr; chgidx++) | 550 | for (chgidx = 0; chgidx < chg_nr; chgidx++) { |
510 | { | ||
511 | /* keep track of all overlapping bios entries */ | 551 | /* keep track of all overlapping bios entries */ |
512 | if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) | 552 | if (change_point[chgidx]->addr == |
513 | { | 553 | change_point[chgidx]->pbios->addr) { |
514 | /* add map entry to overlap list (> 1 entry implies an overlap) */ | 554 | /* |
515 | overlap_list[overlap_entries++]=change_point[chgidx]->pbios; | 555 | * add map entry to overlap list (> 1 entry |
516 | } | 556 | * implies an overlap) |
517 | else | 557 | */ |
518 | { | 558 | overlap_list[overlap_entries++] = |
519 | /* remove entry from list (order independent, so swap with last) */ | 559 | change_point[chgidx]->pbios; |
520 | for (i=0; i<overlap_entries; i++) | 560 | } else { |
521 | { | 561 | /* |
522 | if (overlap_list[i] == change_point[chgidx]->pbios) | 562 | * remove entry from list (order independent, |
523 | overlap_list[i] = overlap_list[overlap_entries-1]; | 563 | * so swap with last) |
564 | */ | ||
565 | for (i = 0; i < overlap_entries; i++) { | ||
566 | if (overlap_list[i] == | ||
567 | change_point[chgidx]->pbios) | ||
568 | overlap_list[i] = | ||
569 | overlap_list[overlap_entries-1]; | ||
524 | } | 570 | } |
525 | overlap_entries--; | 571 | overlap_entries--; |
526 | } | 572 | } |
527 | /* if there are overlapping entries, decide which "type" to use */ | 573 | /* |
528 | /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ | 574 | * if there are overlapping entries, decide which |
575 | * "type" to use (larger value takes precedence -- | ||
576 | * 1=usable, 2,3,4,4+=unusable) | ||
577 | */ | ||
529 | current_type = 0; | 578 | current_type = 0; |
530 | for (i=0; i<overlap_entries; i++) | 579 | for (i = 0; i < overlap_entries; i++) |
531 | if (overlap_list[i]->type > current_type) | 580 | if (overlap_list[i]->type > current_type) |
532 | current_type = overlap_list[i]->type; | 581 | current_type = overlap_list[i]->type; |
533 | /* continue building up new bios map based on this information */ | 582 | /* |
583 | * continue building up new bios map based on this | ||
584 | * information | ||
585 | */ | ||
534 | if (current_type != last_type) { | 586 | if (current_type != last_type) { |
535 | if (last_type != 0) { | 587 | if (last_type != 0) { |
536 | new_bios[new_bios_entry].size = | 588 | new_bios[new_bios_entry].size = |
537 | change_point[chgidx]->addr - last_addr; | 589 | change_point[chgidx]->addr - last_addr; |
538 | /* move forward only if the new size was non-zero */ | 590 | /* |
591 | * move forward only if the new size | ||
592 | * was non-zero | ||
593 | */ | ||
539 | if (new_bios[new_bios_entry].size != 0) | 594 | if (new_bios[new_bios_entry].size != 0) |
595 | /* | ||
596 | * no more space left for new | ||
597 | * bios entries ? | ||
598 | */ | ||
540 | if (++new_bios_entry >= E820MAX) | 599 | if (++new_bios_entry >= E820MAX) |
541 | break; /* no more space left for new bios entries */ | 600 | break; |
542 | } | 601 | } |
543 | if (current_type != 0) { | 602 | if (current_type != 0) { |
544 | new_bios[new_bios_entry].addr = change_point[chgidx]->addr; | 603 | new_bios[new_bios_entry].addr = |
604 | change_point[chgidx]->addr; | ||
545 | new_bios[new_bios_entry].type = current_type; | 605 | new_bios[new_bios_entry].type = current_type; |
546 | last_addr=change_point[chgidx]->addr; | 606 | last_addr = change_point[chgidx]->addr; |
547 | } | 607 | } |
548 | last_type = current_type; | 608 | last_type = current_type; |
549 | } | 609 | } |
550 | } | 610 | } |
551 | new_nr = new_bios_entry; /* retain count for new bios entries */ | 611 | /* retain count for new bios entries */ |
612 | new_nr = new_bios_entry; | ||
552 | 613 | ||
553 | /* copy new bios mapping into original location */ | 614 | /* copy new bios mapping into original location */ |
554 | memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); | 615 | memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); |
555 | *pnr_map = new_nr; | 616 | *pnr_map = new_nr; |
556 | 617 | ||
557 | return 0; | 618 | return 0; |
@@ -566,7 +627,7 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | |||
566 | * will have given us a memory map that we can use to properly | 627 | * will have given us a memory map that we can use to properly |
567 | * set up memory. If we aren't, we'll fake a memory map. | 628 | * set up memory. If we aren't, we'll fake a memory map. |
568 | */ | 629 | */ |
569 | static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) | 630 | static int __init copy_e820_map(struct e820entry *biosmap, int nr_map) |
570 | { | 631 | { |
571 | /* Only one memory region (or negative)? Ignore it */ | 632 | /* Only one memory region (or negative)? Ignore it */ |
572 | if (nr_map < 2) | 633 | if (nr_map < 2) |
@@ -583,18 +644,20 @@ static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) | |||
583 | return -1; | 644 | return -1; |
584 | 645 | ||
585 | add_memory_region(start, size, type); | 646 | add_memory_region(start, size, type); |
586 | } while (biosmap++,--nr_map); | 647 | } while (biosmap++, --nr_map); |
587 | return 0; | 648 | return 0; |
588 | } | 649 | } |
589 | 650 | ||
590 | void early_panic(char *msg) | 651 | static void early_panic(char *msg) |
591 | { | 652 | { |
592 | early_printk(msg); | 653 | early_printk(msg); |
593 | panic(msg); | 654 | panic(msg); |
594 | } | 655 | } |
595 | 656 | ||
596 | void __init setup_memory_region(void) | 657 | /* We're not void only for x86 32-bit compat */ |
658 | char * __init machine_specific_memory_setup(void) | ||
597 | { | 659 | { |
660 | char *who = "BIOS-e820"; | ||
598 | /* | 661 | /* |
599 | * Try to copy the BIOS-supplied E820-map. | 662 | * Try to copy the BIOS-supplied E820-map. |
600 | * | 663 | * |
@@ -605,7 +668,10 @@ void __init setup_memory_region(void) | |||
605 | if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) | 668 | if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) |
606 | early_panic("Cannot find a valid memory map"); | 669 | early_panic("Cannot find a valid memory map"); |
607 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | 670 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); |
608 | e820_print_map("BIOS-e820"); | 671 | e820_print_map(who); |
672 | |||
673 | /* In case someone cares... */ | ||
674 | return who; | ||
609 | } | 675 | } |
610 | 676 | ||
611 | static int __init parse_memopt(char *p) | 677 | static int __init parse_memopt(char *p) |
@@ -613,9 +679,9 @@ static int __init parse_memopt(char *p) | |||
613 | if (!p) | 679 | if (!p) |
614 | return -EINVAL; | 680 | return -EINVAL; |
615 | end_user_pfn = memparse(p, &p); | 681 | end_user_pfn = memparse(p, &p); |
616 | end_user_pfn >>= PAGE_SHIFT; | 682 | end_user_pfn >>= PAGE_SHIFT; |
617 | return 0; | 683 | return 0; |
618 | } | 684 | } |
619 | early_param("mem", parse_memopt); | 685 | early_param("mem", parse_memopt); |
620 | 686 | ||
621 | static int userdef __initdata; | 687 | static int userdef __initdata; |
@@ -627,9 +693,9 @@ static int __init parse_memmap_opt(char *p) | |||
627 | 693 | ||
628 | if (!strcmp(p, "exactmap")) { | 694 | if (!strcmp(p, "exactmap")) { |
629 | #ifdef CONFIG_CRASH_DUMP | 695 | #ifdef CONFIG_CRASH_DUMP |
630 | /* If we are doing a crash dump, we | 696 | /* |
631 | * still need to know the real mem | 697 | * If we are doing a crash dump, we still need to know |
632 | * size before original memory map is | 698 | * the real mem size before original memory map is |
633 | * reset. | 699 | * reset. |
634 | */ | 700 | */ |
635 | e820_register_active_regions(0, 0, -1UL); | 701 | e820_register_active_regions(0, 0, -1UL); |
@@ -646,6 +712,8 @@ static int __init parse_memmap_opt(char *p) | |||
646 | mem_size = memparse(p, &p); | 712 | mem_size = memparse(p, &p); |
647 | if (p == oldp) | 713 | if (p == oldp) |
648 | return -EINVAL; | 714 | return -EINVAL; |
715 | |||
716 | userdef = 1; | ||
649 | if (*p == '@') { | 717 | if (*p == '@') { |
650 | start_at = memparse(p+1, &p); | 718 | start_at = memparse(p+1, &p); |
651 | add_memory_region(start_at, mem_size, E820_RAM); | 719 | add_memory_region(start_at, mem_size, E820_RAM); |
@@ -665,11 +733,29 @@ early_param("memmap", parse_memmap_opt); | |||
665 | void __init finish_e820_parsing(void) | 733 | void __init finish_e820_parsing(void) |
666 | { | 734 | { |
667 | if (userdef) { | 735 | if (userdef) { |
736 | char nr = e820.nr_map; | ||
737 | |||
738 | if (sanitize_e820_map(e820.map, &nr) < 0) | ||
739 | early_panic("Invalid user supplied memory map"); | ||
740 | e820.nr_map = nr; | ||
741 | |||
668 | printk(KERN_INFO "user-defined physical RAM map:\n"); | 742 | printk(KERN_INFO "user-defined physical RAM map:\n"); |
669 | e820_print_map("user"); | 743 | e820_print_map("user"); |
670 | } | 744 | } |
671 | } | 745 | } |
672 | 746 | ||
747 | void __init update_e820(void) | ||
748 | { | ||
749 | u8 nr_map; | ||
750 | |||
751 | nr_map = e820.nr_map; | ||
752 | if (sanitize_e820_map(e820.map, &nr_map)) | ||
753 | return; | ||
754 | e820.nr_map = nr_map; | ||
755 | printk(KERN_INFO "modified physical RAM map:\n"); | ||
756 | e820_print_map("modified"); | ||
757 | } | ||
758 | |||
673 | unsigned long pci_mem_start = 0xaeedbabe; | 759 | unsigned long pci_mem_start = 0xaeedbabe; |
674 | EXPORT_SYMBOL(pci_mem_start); | 760 | EXPORT_SYMBOL(pci_mem_start); |
675 | 761 | ||
@@ -713,8 +799,10 @@ __init void e820_setup_gap(void) | |||
713 | 799 | ||
714 | if (!found) { | 800 | if (!found) { |
715 | gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; | 801 | gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; |
716 | printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit address range\n" | 802 | printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " |
717 | KERN_ERR "PCI: Unassigned devices with 32bit resource registers may break!\n"); | 803 | "address range\n" |
804 | KERN_ERR "PCI: Unassigned devices with 32bit resource " | ||
805 | "registers may break!\n"); | ||
718 | } | 806 | } |
719 | 807 | ||
720 | /* | 808 | /* |
@@ -727,8 +815,9 @@ __init void e820_setup_gap(void) | |||
727 | /* Fun with two's complement */ | 815 | /* Fun with two's complement */ |
728 | pci_mem_start = (gapstart + round) & -round; | 816 | pci_mem_start = (gapstart + round) & -round; |
729 | 817 | ||
730 | printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", | 818 | printk(KERN_INFO |
731 | pci_mem_start, gapstart, gapsize); | 819 | "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", |
820 | pci_mem_start, gapstart, gapsize); | ||
732 | } | 821 | } |
733 | 822 | ||
734 | int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) | 823 | int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) |
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 88bb83ec895f..9f51e1ea9e82 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
@@ -21,7 +21,33 @@ | |||
21 | #include <asm/gart.h> | 21 | #include <asm/gart.h> |
22 | #endif | 22 | #endif |
23 | 23 | ||
24 | static void __init via_bugs(void) | 24 | static void __init fix_hypertransport_config(int num, int slot, int func) |
25 | { | ||
26 | u32 htcfg; | ||
27 | /* | ||
28 | * we found a hypertransport bus | ||
29 | * make sure that we are broadcasting | ||
30 | * interrupts to all cpus on the ht bus | ||
31 | * if we're using extended apic ids | ||
32 | */ | ||
33 | htcfg = read_pci_config(num, slot, func, 0x68); | ||
34 | if (htcfg & (1 << 18)) { | ||
35 | printk(KERN_INFO "Detected use of extended apic ids " | ||
36 | "on hypertransport bus\n"); | ||
37 | if ((htcfg & (1 << 17)) == 0) { | ||
38 | printk(KERN_INFO "Enabling hypertransport extended " | ||
39 | "apic interrupt broadcast\n"); | ||
40 | printk(KERN_INFO "Note this is a bios bug, " | ||
41 | "please contact your hw vendor\n"); | ||
42 | htcfg |= (1 << 17); | ||
43 | write_pci_config(num, slot, func, 0x68, htcfg); | ||
44 | } | ||
45 | } | ||
46 | |||
47 | |||
48 | } | ||
49 | |||
50 | static void __init via_bugs(int num, int slot, int func) | ||
25 | { | 51 | { |
26 | #ifdef CONFIG_GART_IOMMU | 52 | #ifdef CONFIG_GART_IOMMU |
27 | if ((end_pfn > MAX_DMA32_PFN || force_iommu) && | 53 | if ((end_pfn > MAX_DMA32_PFN || force_iommu) && |
@@ -44,7 +70,7 @@ static int __init nvidia_hpet_check(struct acpi_table_header *header) | |||
44 | #endif /* CONFIG_X86_IO_APIC */ | 70 | #endif /* CONFIG_X86_IO_APIC */ |
45 | #endif /* CONFIG_ACPI */ | 71 | #endif /* CONFIG_ACPI */ |
46 | 72 | ||
47 | static void __init nvidia_bugs(void) | 73 | static void __init nvidia_bugs(int num, int slot, int func) |
48 | { | 74 | { |
49 | #ifdef CONFIG_ACPI | 75 | #ifdef CONFIG_ACPI |
50 | #ifdef CONFIG_X86_IO_APIC | 76 | #ifdef CONFIG_X86_IO_APIC |
@@ -72,7 +98,7 @@ static void __init nvidia_bugs(void) | |||
72 | 98 | ||
73 | } | 99 | } |
74 | 100 | ||
75 | static void __init ati_bugs(void) | 101 | static void __init ati_bugs(int num, int slot, int func) |
76 | { | 102 | { |
77 | #ifdef CONFIG_X86_IO_APIC | 103 | #ifdef CONFIG_X86_IO_APIC |
78 | if (timer_over_8254 == 1) { | 104 | if (timer_over_8254 == 1) { |
@@ -83,18 +109,67 @@ static void __init ati_bugs(void) | |||
83 | #endif | 109 | #endif |
84 | } | 110 | } |
85 | 111 | ||
112 | #define QFLAG_APPLY_ONCE 0x1 | ||
113 | #define QFLAG_APPLIED 0x2 | ||
114 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) | ||
86 | struct chipset { | 115 | struct chipset { |
87 | u16 vendor; | 116 | u32 vendor; |
88 | void (*f)(void); | 117 | u32 device; |
118 | u32 class; | ||
119 | u32 class_mask; | ||
120 | u32 flags; | ||
121 | void (*f)(int num, int slot, int func); | ||
89 | }; | 122 | }; |
90 | 123 | ||
91 | static struct chipset early_qrk[] __initdata = { | 124 | static struct chipset early_qrk[] __initdata = { |
92 | { PCI_VENDOR_ID_NVIDIA, nvidia_bugs }, | 125 | { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, |
93 | { PCI_VENDOR_ID_VIA, via_bugs }, | 126 | PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs }, |
94 | { PCI_VENDOR_ID_ATI, ati_bugs }, | 127 | { PCI_VENDOR_ID_VIA, PCI_ANY_ID, |
128 | PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, | ||
129 | { PCI_VENDOR_ID_ATI, PCI_ANY_ID, | ||
130 | PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, ati_bugs }, | ||
131 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, | ||
132 | PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, | ||
95 | {} | 133 | {} |
96 | }; | 134 | }; |
97 | 135 | ||
136 | static void __init check_dev_quirk(int num, int slot, int func) | ||
137 | { | ||
138 | u16 class; | ||
139 | u16 vendor; | ||
140 | u16 device; | ||
141 | u8 type; | ||
142 | int i; | ||
143 | |||
144 | class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE); | ||
145 | |||
146 | if (class == 0xffff) | ||
147 | return; | ||
148 | |||
149 | vendor = read_pci_config_16(num, slot, func, PCI_VENDOR_ID); | ||
150 | |||
151 | device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID); | ||
152 | |||
153 | for (i = 0; early_qrk[i].f != NULL; i++) { | ||
154 | if (((early_qrk[i].vendor == PCI_ANY_ID) || | ||
155 | (early_qrk[i].vendor == vendor)) && | ||
156 | ((early_qrk[i].device == PCI_ANY_ID) || | ||
157 | (early_qrk[i].device == device)) && | ||
158 | (!((early_qrk[i].class ^ class) & | ||
159 | early_qrk[i].class_mask))) { | ||
160 | if ((early_qrk[i].flags & | ||
161 | QFLAG_DONE) != QFLAG_DONE) | ||
162 | early_qrk[i].f(num, slot, func); | ||
163 | early_qrk[i].flags |= QFLAG_APPLIED; | ||
164 | } | ||
165 | } | ||
166 | |||
167 | type = read_pci_config_byte(num, slot, func, | ||
168 | PCI_HEADER_TYPE); | ||
169 | if (!(type & 0x80)) | ||
170 | return; | ||
171 | } | ||
172 | |||
98 | void __init early_quirks(void) | 173 | void __init early_quirks(void) |
99 | { | 174 | { |
100 | int num, slot, func; | 175 | int num, slot, func; |
@@ -103,36 +178,8 @@ void __init early_quirks(void) | |||
103 | return; | 178 | return; |
104 | 179 | ||
105 | /* Poor man's PCI discovery */ | 180 | /* Poor man's PCI discovery */ |
106 | for (num = 0; num < 32; num++) { | 181 | for (num = 0; num < 32; num++) |
107 | for (slot = 0; slot < 32; slot++) { | 182 | for (slot = 0; slot < 32; slot++) |
108 | for (func = 0; func < 8; func++) { | 183 | for (func = 0; func < 8; func++) |
109 | u32 class; | 184 | check_dev_quirk(num, slot, func); |
110 | u32 vendor; | ||
111 | u8 type; | ||
112 | int i; | ||
113 | class = read_pci_config(num,slot,func, | ||
114 | PCI_CLASS_REVISION); | ||
115 | if (class == 0xffffffff) | ||
116 | break; | ||
117 | |||
118 | if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) | ||
119 | continue; | ||
120 | |||
121 | vendor = read_pci_config(num, slot, func, | ||
122 | PCI_VENDOR_ID); | ||
123 | vendor &= 0xffff; | ||
124 | |||
125 | for (i = 0; early_qrk[i].f; i++) | ||
126 | if (early_qrk[i].vendor == vendor) { | ||
127 | early_qrk[i].f(); | ||
128 | return; | ||
129 | } | ||
130 | |||
131 | type = read_pci_config_byte(num, slot, func, | ||
132 | PCI_HEADER_TYPE); | ||
133 | if (!(type & 0x80)) | ||
134 | break; | ||
135 | } | ||
136 | } | ||
137 | } | ||
138 | } | 185 | } |
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index b7d6c23f2871..cff84cd9987f 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
@@ -193,7 +193,7 @@ static struct console simnow_console = { | |||
193 | }; | 193 | }; |
194 | 194 | ||
195 | /* Direct interface for emergencies */ | 195 | /* Direct interface for emergencies */ |
196 | struct console *early_console = &early_vga_console; | 196 | static struct console *early_console = &early_vga_console; |
197 | static int early_console_initialized = 0; | 197 | static int early_console_initialized = 0; |
198 | 198 | ||
199 | void early_printk(const char *fmt, ...) | 199 | void early_printk(const char *fmt, ...) |
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c new file mode 100644 index 000000000000..32dd62b36ff7 --- /dev/null +++ b/arch/x86/kernel/efi.c | |||
@@ -0,0 +1,515 @@ | |||
1 | /* | ||
2 | * Common EFI (Extensible Firmware Interface) support functions | ||
3 | * Based on Extensible Firmware Interface Specification version 1.0 | ||
4 | * | ||
5 | * Copyright (C) 1999 VA Linux Systems | ||
6 | * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> | ||
7 | * Copyright (C) 1999-2002 Hewlett-Packard Co. | ||
8 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
9 | * Stephane Eranian <eranian@hpl.hp.com> | ||
10 | * Copyright (C) 2005-2008 Intel Co. | ||
11 | * Fenghua Yu <fenghua.yu@intel.com> | ||
12 | * Bibo Mao <bibo.mao@intel.com> | ||
13 | * Chandramouli Narayanan <mouli@linux.intel.com> | ||
14 | * Huang Ying <ying.huang@intel.com> | ||
15 | * | ||
16 | * Copied from efi_32.c to eliminate the duplicated code between EFI | ||
17 | * 32/64 support code. --ying 2007-10-26 | ||
18 | * | ||
19 | * All EFI Runtime Services are not implemented yet as EFI only | ||
20 | * supports physical mode addressing on SoftSDV. This is to be fixed | ||
21 | * in a future version. --drummond 1999-07-20 | ||
22 | * | ||
23 | * Implemented EFI runtime services and virtual mode calls. --davidm | ||
24 | * | ||
25 | * Goutham Rao: <goutham.rao@intel.com> | ||
26 | * Skip non-WB memory and ignore empty memory ranges. | ||
27 | */ | ||
28 | |||
29 | #include <linux/kernel.h> | ||
30 | #include <linux/init.h> | ||
31 | #include <linux/efi.h> | ||
32 | #include <linux/bootmem.h> | ||
33 | #include <linux/spinlock.h> | ||
34 | #include <linux/uaccess.h> | ||
35 | #include <linux/time.h> | ||
36 | #include <linux/io.h> | ||
37 | #include <linux/reboot.h> | ||
38 | #include <linux/bcd.h> | ||
39 | |||
40 | #include <asm/setup.h> | ||
41 | #include <asm/efi.h> | ||
42 | #include <asm/time.h> | ||
43 | #include <asm/cacheflush.h> | ||
44 | #include <asm/tlbflush.h> | ||
45 | |||
46 | #define EFI_DEBUG 1 | ||
47 | #define PFX "EFI: " | ||
48 | |||
49 | int efi_enabled; | ||
50 | EXPORT_SYMBOL(efi_enabled); | ||
51 | |||
52 | struct efi efi; | ||
53 | EXPORT_SYMBOL(efi); | ||
54 | |||
55 | struct efi_memory_map memmap; | ||
56 | |||
57 | struct efi efi_phys __initdata; | ||
58 | static efi_system_table_t efi_systab __initdata; | ||
59 | |||
60 | static int __init setup_noefi(char *arg) | ||
61 | { | ||
62 | efi_enabled = 0; | ||
63 | return 0; | ||
64 | } | ||
65 | early_param("noefi", setup_noefi); | ||
66 | |||
67 | static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) | ||
68 | { | ||
69 | return efi_call_virt2(get_time, tm, tc); | ||
70 | } | ||
71 | |||
72 | static efi_status_t virt_efi_set_time(efi_time_t *tm) | ||
73 | { | ||
74 | return efi_call_virt1(set_time, tm); | ||
75 | } | ||
76 | |||
77 | static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled, | ||
78 | efi_bool_t *pending, | ||
79 | efi_time_t *tm) | ||
80 | { | ||
81 | return efi_call_virt3(get_wakeup_time, | ||
82 | enabled, pending, tm); | ||
83 | } | ||
84 | |||
85 | static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) | ||
86 | { | ||
87 | return efi_call_virt2(set_wakeup_time, | ||
88 | enabled, tm); | ||
89 | } | ||
90 | |||
91 | static efi_status_t virt_efi_get_variable(efi_char16_t *name, | ||
92 | efi_guid_t *vendor, | ||
93 | u32 *attr, | ||
94 | unsigned long *data_size, | ||
95 | void *data) | ||
96 | { | ||
97 | return efi_call_virt5(get_variable, | ||
98 | name, vendor, attr, | ||
99 | data_size, data); | ||
100 | } | ||
101 | |||
102 | static efi_status_t virt_efi_get_next_variable(unsigned long *name_size, | ||
103 | efi_char16_t *name, | ||
104 | efi_guid_t *vendor) | ||
105 | { | ||
106 | return efi_call_virt3(get_next_variable, | ||
107 | name_size, name, vendor); | ||
108 | } | ||
109 | |||
110 | static efi_status_t virt_efi_set_variable(efi_char16_t *name, | ||
111 | efi_guid_t *vendor, | ||
112 | unsigned long attr, | ||
113 | unsigned long data_size, | ||
114 | void *data) | ||
115 | { | ||
116 | return efi_call_virt5(set_variable, | ||
117 | name, vendor, attr, | ||
118 | data_size, data); | ||
119 | } | ||
120 | |||
121 | static efi_status_t virt_efi_get_next_high_mono_count(u32 *count) | ||
122 | { | ||
123 | return efi_call_virt1(get_next_high_mono_count, count); | ||
124 | } | ||
125 | |||
126 | static void virt_efi_reset_system(int reset_type, | ||
127 | efi_status_t status, | ||
128 | unsigned long data_size, | ||
129 | efi_char16_t *data) | ||
130 | { | ||
131 | efi_call_virt4(reset_system, reset_type, status, | ||
132 | data_size, data); | ||
133 | } | ||
134 | |||
135 | static efi_status_t virt_efi_set_virtual_address_map( | ||
136 | unsigned long memory_map_size, | ||
137 | unsigned long descriptor_size, | ||
138 | u32 descriptor_version, | ||
139 | efi_memory_desc_t *virtual_map) | ||
140 | { | ||
141 | return efi_call_virt4(set_virtual_address_map, | ||
142 | memory_map_size, descriptor_size, | ||
143 | descriptor_version, virtual_map); | ||
144 | } | ||
145 | |||
146 | static efi_status_t __init phys_efi_set_virtual_address_map( | ||
147 | unsigned long memory_map_size, | ||
148 | unsigned long descriptor_size, | ||
149 | u32 descriptor_version, | ||
150 | efi_memory_desc_t *virtual_map) | ||
151 | { | ||
152 | efi_status_t status; | ||
153 | |||
154 | efi_call_phys_prelog(); | ||
155 | status = efi_call_phys4(efi_phys.set_virtual_address_map, | ||
156 | memory_map_size, descriptor_size, | ||
157 | descriptor_version, virtual_map); | ||
158 | efi_call_phys_epilog(); | ||
159 | return status; | ||
160 | } | ||
161 | |||
162 | static efi_status_t __init phys_efi_get_time(efi_time_t *tm, | ||
163 | efi_time_cap_t *tc) | ||
164 | { | ||
165 | efi_status_t status; | ||
166 | |||
167 | efi_call_phys_prelog(); | ||
168 | status = efi_call_phys2(efi_phys.get_time, tm, tc); | ||
169 | efi_call_phys_epilog(); | ||
170 | return status; | ||
171 | } | ||
172 | |||
173 | int efi_set_rtc_mmss(unsigned long nowtime) | ||
174 | { | ||
175 | int real_seconds, real_minutes; | ||
176 | efi_status_t status; | ||
177 | efi_time_t eft; | ||
178 | efi_time_cap_t cap; | ||
179 | |||
180 | status = efi.get_time(&eft, &cap); | ||
181 | if (status != EFI_SUCCESS) { | ||
182 | printk(KERN_ERR "Oops: efitime: can't read time!\n"); | ||
183 | return -1; | ||
184 | } | ||
185 | |||
186 | real_seconds = nowtime % 60; | ||
187 | real_minutes = nowtime / 60; | ||
188 | if (((abs(real_minutes - eft.minute) + 15)/30) & 1) | ||
189 | real_minutes += 30; | ||
190 | real_minutes %= 60; | ||
191 | eft.minute = real_minutes; | ||
192 | eft.second = real_seconds; | ||
193 | |||
194 | status = efi.set_time(&eft); | ||
195 | if (status != EFI_SUCCESS) { | ||
196 | printk(KERN_ERR "Oops: efitime: can't write time!\n"); | ||
197 | return -1; | ||
198 | } | ||
199 | return 0; | ||
200 | } | ||
201 | |||
202 | unsigned long efi_get_time(void) | ||
203 | { | ||
204 | efi_status_t status; | ||
205 | efi_time_t eft; | ||
206 | efi_time_cap_t cap; | ||
207 | |||
208 | status = efi.get_time(&eft, &cap); | ||
209 | if (status != EFI_SUCCESS) | ||
210 | printk(KERN_ERR "Oops: efitime: can't read time!\n"); | ||
211 | |||
212 | return mktime(eft.year, eft.month, eft.day, eft.hour, | ||
213 | eft.minute, eft.second); | ||
214 | } | ||
215 | |||
216 | #if EFI_DEBUG | ||
217 | static void __init print_efi_memmap(void) | ||
218 | { | ||
219 | efi_memory_desc_t *md; | ||
220 | void *p; | ||
221 | int i; | ||
222 | |||
223 | for (p = memmap.map, i = 0; | ||
224 | p < memmap.map_end; | ||
225 | p += memmap.desc_size, i++) { | ||
226 | md = p; | ||
227 | printk(KERN_INFO PFX "mem%02u: type=%u, attr=0x%llx, " | ||
228 | "range=[0x%016llx-0x%016llx) (%lluMB)\n", | ||
229 | i, md->type, md->attribute, md->phys_addr, | ||
230 | md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), | ||
231 | (md->num_pages >> (20 - EFI_PAGE_SHIFT))); | ||
232 | } | ||
233 | } | ||
234 | #endif /* EFI_DEBUG */ | ||
235 | |||
236 | void __init efi_init(void) | ||
237 | { | ||
238 | efi_config_table_t *config_tables; | ||
239 | efi_runtime_services_t *runtime; | ||
240 | efi_char16_t *c16; | ||
241 | char vendor[100] = "unknown"; | ||
242 | int i = 0; | ||
243 | void *tmp; | ||
244 | |||
245 | #ifdef CONFIG_X86_32 | ||
246 | efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; | ||
247 | memmap.phys_map = (void *)boot_params.efi_info.efi_memmap; | ||
248 | #else | ||
249 | efi_phys.systab = (efi_system_table_t *) | ||
250 | (boot_params.efi_info.efi_systab | | ||
251 | ((__u64)boot_params.efi_info.efi_systab_hi<<32)); | ||
252 | memmap.phys_map = (void *) | ||
253 | (boot_params.efi_info.efi_memmap | | ||
254 | ((__u64)boot_params.efi_info.efi_memmap_hi<<32)); | ||
255 | #endif | ||
256 | memmap.nr_map = boot_params.efi_info.efi_memmap_size / | ||
257 | boot_params.efi_info.efi_memdesc_size; | ||
258 | memmap.desc_version = boot_params.efi_info.efi_memdesc_version; | ||
259 | memmap.desc_size = boot_params.efi_info.efi_memdesc_size; | ||
260 | |||
261 | efi.systab = early_ioremap((unsigned long)efi_phys.systab, | ||
262 | sizeof(efi_system_table_t)); | ||
263 | if (efi.systab == NULL) | ||
264 | printk(KERN_ERR "Couldn't map the EFI system table!\n"); | ||
265 | memcpy(&efi_systab, efi.systab, sizeof(efi_system_table_t)); | ||
266 | early_iounmap(efi.systab, sizeof(efi_system_table_t)); | ||
267 | efi.systab = &efi_systab; | ||
268 | |||
269 | /* | ||
270 | * Verify the EFI Table | ||
271 | */ | ||
272 | if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) | ||
273 | printk(KERN_ERR "EFI system table signature incorrect!\n"); | ||
274 | if ((efi.systab->hdr.revision >> 16) == 0) | ||
275 | printk(KERN_ERR "Warning: EFI system table version " | ||
276 | "%d.%02d, expected 1.00 or greater!\n", | ||
277 | efi.systab->hdr.revision >> 16, | ||
278 | efi.systab->hdr.revision & 0xffff); | ||
279 | |||
280 | /* | ||
281 | * Show what we know for posterity | ||
282 | */ | ||
283 | c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2); | ||
284 | if (c16) { | ||
285 | for (i = 0; i < sizeof(vendor) && *c16; ++i) | ||
286 | vendor[i] = *c16++; | ||
287 | vendor[i] = '\0'; | ||
288 | } else | ||
289 | printk(KERN_ERR PFX "Could not map the firmware vendor!\n"); | ||
290 | early_iounmap(tmp, 2); | ||
291 | |||
292 | printk(KERN_INFO "EFI v%u.%.02u by %s \n", | ||
293 | efi.systab->hdr.revision >> 16, | ||
294 | efi.systab->hdr.revision & 0xffff, vendor); | ||
295 | |||
296 | /* | ||
297 | * Let's see what config tables the firmware passed to us. | ||
298 | */ | ||
299 | config_tables = early_ioremap( | ||
300 | efi.systab->tables, | ||
301 | efi.systab->nr_tables * sizeof(efi_config_table_t)); | ||
302 | if (config_tables == NULL) | ||
303 | printk(KERN_ERR "Could not map EFI Configuration Table!\n"); | ||
304 | |||
305 | printk(KERN_INFO); | ||
306 | for (i = 0; i < efi.systab->nr_tables; i++) { | ||
307 | if (!efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID)) { | ||
308 | efi.mps = config_tables[i].table; | ||
309 | printk(" MPS=0x%lx ", config_tables[i].table); | ||
310 | } else if (!efi_guidcmp(config_tables[i].guid, | ||
311 | ACPI_20_TABLE_GUID)) { | ||
312 | efi.acpi20 = config_tables[i].table; | ||
313 | printk(" ACPI 2.0=0x%lx ", config_tables[i].table); | ||
314 | } else if (!efi_guidcmp(config_tables[i].guid, | ||
315 | ACPI_TABLE_GUID)) { | ||
316 | efi.acpi = config_tables[i].table; | ||
317 | printk(" ACPI=0x%lx ", config_tables[i].table); | ||
318 | } else if (!efi_guidcmp(config_tables[i].guid, | ||
319 | SMBIOS_TABLE_GUID)) { | ||
320 | efi.smbios = config_tables[i].table; | ||
321 | printk(" SMBIOS=0x%lx ", config_tables[i].table); | ||
322 | } else if (!efi_guidcmp(config_tables[i].guid, | ||
323 | HCDP_TABLE_GUID)) { | ||
324 | efi.hcdp = config_tables[i].table; | ||
325 | printk(" HCDP=0x%lx ", config_tables[i].table); | ||
326 | } else if (!efi_guidcmp(config_tables[i].guid, | ||
327 | UGA_IO_PROTOCOL_GUID)) { | ||
328 | efi.uga = config_tables[i].table; | ||
329 | printk(" UGA=0x%lx ", config_tables[i].table); | ||
330 | } | ||
331 | } | ||
332 | printk("\n"); | ||
333 | early_iounmap(config_tables, | ||
334 | efi.systab->nr_tables * sizeof(efi_config_table_t)); | ||
335 | |||
336 | /* | ||
337 | * Check out the runtime services table. We need to map | ||
338 | * the runtime services table so that we can grab the physical | ||
339 | * address of several of the EFI runtime functions, needed to | ||
340 | * set the firmware into virtual mode. | ||
341 | */ | ||
342 | runtime = early_ioremap((unsigned long)efi.systab->runtime, | ||
343 | sizeof(efi_runtime_services_t)); | ||
344 | if (runtime != NULL) { | ||
345 | /* | ||
346 | * We will only need *early* access to the following | ||
347 | * two EFI runtime services before set_virtual_address_map | ||
348 | * is invoked. | ||
349 | */ | ||
350 | efi_phys.get_time = (efi_get_time_t *)runtime->get_time; | ||
351 | efi_phys.set_virtual_address_map = | ||
352 | (efi_set_virtual_address_map_t *) | ||
353 | runtime->set_virtual_address_map; | ||
354 | /* | ||
355 | * Make efi_get_time can be called before entering | ||
356 | * virtual mode. | ||
357 | */ | ||
358 | efi.get_time = phys_efi_get_time; | ||
359 | } else | ||
360 | printk(KERN_ERR "Could not map the EFI runtime service " | ||
361 | "table!\n"); | ||
362 | early_iounmap(runtime, sizeof(efi_runtime_services_t)); | ||
363 | |||
364 | /* Map the EFI memory map */ | ||
365 | memmap.map = early_ioremap((unsigned long)memmap.phys_map, | ||
366 | memmap.nr_map * memmap.desc_size); | ||
367 | if (memmap.map == NULL) | ||
368 | printk(KERN_ERR "Could not map the EFI memory map!\n"); | ||
369 | memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); | ||
370 | if (memmap.desc_size != sizeof(efi_memory_desc_t)) | ||
371 | printk(KERN_WARNING "Kernel-defined memdesc" | ||
372 | "doesn't match the one from EFI!\n"); | ||
373 | |||
374 | /* Setup for EFI runtime service */ | ||
375 | reboot_type = BOOT_EFI; | ||
376 | |||
377 | #if EFI_DEBUG | ||
378 | print_efi_memmap(); | ||
379 | #endif | ||
380 | } | ||
381 | |||
382 | static void __init runtime_code_page_mkexec(void) | ||
383 | { | ||
384 | efi_memory_desc_t *md; | ||
385 | void *p; | ||
386 | |||
387 | if (!(__supported_pte_mask & _PAGE_NX)) | ||
388 | return; | ||
389 | |||
390 | /* Make EFI runtime service code area executable */ | ||
391 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
392 | md = p; | ||
393 | |||
394 | if (md->type != EFI_RUNTIME_SERVICES_CODE) | ||
395 | continue; | ||
396 | |||
397 | set_memory_x(md->virt_addr, md->num_pages << EFI_PAGE_SHIFT); | ||
398 | } | ||
399 | } | ||
400 | |||
401 | /* | ||
402 | * This function will switch the EFI runtime services to virtual mode. | ||
403 | * Essentially, look through the EFI memmap and map every region that | ||
404 | * has the runtime attribute bit set in its memory descriptor and update | ||
405 | * that memory descriptor with the virtual address obtained from ioremap(). | ||
406 | * This enables the runtime services to be called without having to | ||
407 | * thunk back into physical mode for every invocation. | ||
408 | */ | ||
409 | void __init efi_enter_virtual_mode(void) | ||
410 | { | ||
411 | efi_memory_desc_t *md; | ||
412 | efi_status_t status; | ||
413 | unsigned long size; | ||
414 | u64 end, systab; | ||
415 | void *p, *va; | ||
416 | |||
417 | efi.systab = NULL; | ||
418 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
419 | md = p; | ||
420 | if (!(md->attribute & EFI_MEMORY_RUNTIME)) | ||
421 | continue; | ||
422 | |||
423 | size = md->num_pages << EFI_PAGE_SHIFT; | ||
424 | end = md->phys_addr + size; | ||
425 | |||
426 | if ((end >> PAGE_SHIFT) <= max_pfn_mapped) | ||
427 | va = __va(md->phys_addr); | ||
428 | else | ||
429 | va = efi_ioremap(md->phys_addr, size); | ||
430 | |||
431 | if (md->attribute & EFI_MEMORY_WB) | ||
432 | set_memory_uc(md->virt_addr, size); | ||
433 | |||
434 | md->virt_addr = (u64) (unsigned long) va; | ||
435 | |||
436 | if (!va) { | ||
437 | printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n", | ||
438 | (unsigned long long)md->phys_addr); | ||
439 | continue; | ||
440 | } | ||
441 | |||
442 | systab = (u64) (unsigned long) efi_phys.systab; | ||
443 | if (md->phys_addr <= systab && systab < end) { | ||
444 | systab += md->virt_addr - md->phys_addr; | ||
445 | efi.systab = (efi_system_table_t *) (unsigned long) systab; | ||
446 | } | ||
447 | } | ||
448 | |||
449 | BUG_ON(!efi.systab); | ||
450 | |||
451 | status = phys_efi_set_virtual_address_map( | ||
452 | memmap.desc_size * memmap.nr_map, | ||
453 | memmap.desc_size, | ||
454 | memmap.desc_version, | ||
455 | memmap.phys_map); | ||
456 | |||
457 | if (status != EFI_SUCCESS) { | ||
458 | printk(KERN_ALERT "Unable to switch EFI into virtual mode " | ||
459 | "(status=%lx)!\n", status); | ||
460 | panic("EFI call to SetVirtualAddressMap() failed!"); | ||
461 | } | ||
462 | |||
463 | /* | ||
464 | * Now that EFI is in virtual mode, update the function | ||
465 | * pointers in the runtime service table to the new virtual addresses. | ||
466 | * | ||
467 | * Call EFI services through wrapper functions. | ||
468 | */ | ||
469 | efi.get_time = virt_efi_get_time; | ||
470 | efi.set_time = virt_efi_set_time; | ||
471 | efi.get_wakeup_time = virt_efi_get_wakeup_time; | ||
472 | efi.set_wakeup_time = virt_efi_set_wakeup_time; | ||
473 | efi.get_variable = virt_efi_get_variable; | ||
474 | efi.get_next_variable = virt_efi_get_next_variable; | ||
475 | efi.set_variable = virt_efi_set_variable; | ||
476 | efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; | ||
477 | efi.reset_system = virt_efi_reset_system; | ||
478 | efi.set_virtual_address_map = virt_efi_set_virtual_address_map; | ||
479 | runtime_code_page_mkexec(); | ||
480 | early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); | ||
481 | memmap.map = NULL; | ||
482 | } | ||
483 | |||
484 | /* | ||
485 | * Convenience functions to obtain memory types and attributes | ||
486 | */ | ||
487 | u32 efi_mem_type(unsigned long phys_addr) | ||
488 | { | ||
489 | efi_memory_desc_t *md; | ||
490 | void *p; | ||
491 | |||
492 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
493 | md = p; | ||
494 | if ((md->phys_addr <= phys_addr) && | ||
495 | (phys_addr < (md->phys_addr + | ||
496 | (md->num_pages << EFI_PAGE_SHIFT)))) | ||
497 | return md->type; | ||
498 | } | ||
499 | return 0; | ||
500 | } | ||
501 | |||
502 | u64 efi_mem_attributes(unsigned long phys_addr) | ||
503 | { | ||
504 | efi_memory_desc_t *md; | ||
505 | void *p; | ||
506 | |||
507 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
508 | md = p; | ||
509 | if ((md->phys_addr <= phys_addr) && | ||
510 | (phys_addr < (md->phys_addr + | ||
511 | (md->num_pages << EFI_PAGE_SHIFT)))) | ||
512 | return md->attribute; | ||
513 | } | ||
514 | return 0; | ||
515 | } | ||
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c index e2be78f49399..cb91f985b4a1 100644 --- a/arch/x86/kernel/efi_32.c +++ b/arch/x86/kernel/efi_32.c | |||
@@ -20,40 +20,15 @@ | |||
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/kernel.h> | 22 | #include <linux/kernel.h> |
23 | #include <linux/init.h> | ||
24 | #include <linux/mm.h> | ||
25 | #include <linux/types.h> | 23 | #include <linux/types.h> |
26 | #include <linux/time.h> | ||
27 | #include <linux/spinlock.h> | ||
28 | #include <linux/bootmem.h> | ||
29 | #include <linux/ioport.h> | 24 | #include <linux/ioport.h> |
30 | #include <linux/module.h> | ||
31 | #include <linux/efi.h> | 25 | #include <linux/efi.h> |
32 | #include <linux/kexec.h> | ||
33 | 26 | ||
34 | #include <asm/setup.h> | ||
35 | #include <asm/io.h> | 27 | #include <asm/io.h> |
36 | #include <asm/page.h> | 28 | #include <asm/page.h> |
37 | #include <asm/pgtable.h> | 29 | #include <asm/pgtable.h> |
38 | #include <asm/processor.h> | ||
39 | #include <asm/desc.h> | ||
40 | #include <asm/tlbflush.h> | 30 | #include <asm/tlbflush.h> |
41 | 31 | ||
42 | #define EFI_DEBUG 0 | ||
43 | #define PFX "EFI: " | ||
44 | |||
45 | extern efi_status_t asmlinkage efi_call_phys(void *, ...); | ||
46 | |||
47 | struct efi efi; | ||
48 | EXPORT_SYMBOL(efi); | ||
49 | static struct efi efi_phys; | ||
50 | struct efi_memory_map memmap; | ||
51 | |||
52 | /* | ||
53 | * We require an early boot_ioremap mapping mechanism initially | ||
54 | */ | ||
55 | extern void * boot_ioremap(unsigned long, unsigned long); | ||
56 | |||
57 | /* | 32 | /* |
58 | * To make EFI call EFI runtime service in physical addressing mode we need | 33 | * To make EFI call EFI runtime service in physical addressing mode we need |
59 | * prelog/epilog before/after the invocation to disable interrupt, to | 34 | * prelog/epilog before/after the invocation to disable interrupt, to |
@@ -62,16 +37,14 @@ extern void * boot_ioremap(unsigned long, unsigned long); | |||
62 | */ | 37 | */ |
63 | 38 | ||
64 | static unsigned long efi_rt_eflags; | 39 | static unsigned long efi_rt_eflags; |
65 | static DEFINE_SPINLOCK(efi_rt_lock); | ||
66 | static pgd_t efi_bak_pg_dir_pointer[2]; | 40 | static pgd_t efi_bak_pg_dir_pointer[2]; |
67 | 41 | ||
68 | static void efi_call_phys_prelog(void) __acquires(efi_rt_lock) | 42 | void efi_call_phys_prelog(void) |
69 | { | 43 | { |
70 | unsigned long cr4; | 44 | unsigned long cr4; |
71 | unsigned long temp; | 45 | unsigned long temp; |
72 | struct Xgt_desc_struct gdt_descr; | 46 | struct desc_ptr gdt_descr; |
73 | 47 | ||
74 | spin_lock(&efi_rt_lock); | ||
75 | local_irq_save(efi_rt_eflags); | 48 | local_irq_save(efi_rt_eflags); |
76 | 49 | ||
77 | /* | 50 | /* |
@@ -101,17 +74,17 @@ static void efi_call_phys_prelog(void) __acquires(efi_rt_lock) | |||
101 | /* | 74 | /* |
102 | * After the lock is released, the original page table is restored. | 75 | * After the lock is released, the original page table is restored. |
103 | */ | 76 | */ |
104 | local_flush_tlb(); | 77 | __flush_tlb_all(); |
105 | 78 | ||
106 | gdt_descr.address = __pa(get_cpu_gdt_table(0)); | 79 | gdt_descr.address = __pa(get_cpu_gdt_table(0)); |
107 | gdt_descr.size = GDT_SIZE - 1; | 80 | gdt_descr.size = GDT_SIZE - 1; |
108 | load_gdt(&gdt_descr); | 81 | load_gdt(&gdt_descr); |
109 | } | 82 | } |
110 | 83 | ||
111 | static void efi_call_phys_epilog(void) __releases(efi_rt_lock) | 84 | void efi_call_phys_epilog(void) |
112 | { | 85 | { |
113 | unsigned long cr4; | 86 | unsigned long cr4; |
114 | struct Xgt_desc_struct gdt_descr; | 87 | struct desc_ptr gdt_descr; |
115 | 88 | ||
116 | gdt_descr.address = (unsigned long)get_cpu_gdt_table(0); | 89 | gdt_descr.address = (unsigned long)get_cpu_gdt_table(0); |
117 | gdt_descr.size = GDT_SIZE - 1; | 90 | gdt_descr.size = GDT_SIZE - 1; |
@@ -132,586 +105,7 @@ static void efi_call_phys_epilog(void) __releases(efi_rt_lock) | |||
132 | /* | 105 | /* |
133 | * After the lock is released, the original page table is restored. | 106 | * After the lock is released, the original page table is restored. |
134 | */ | 107 | */ |
135 | local_flush_tlb(); | 108 | __flush_tlb_all(); |
136 | 109 | ||
137 | local_irq_restore(efi_rt_eflags); | 110 | local_irq_restore(efi_rt_eflags); |
138 | spin_unlock(&efi_rt_lock); | ||
139 | } | ||
140 | |||
141 | static efi_status_t | ||
142 | phys_efi_set_virtual_address_map(unsigned long memory_map_size, | ||
143 | unsigned long descriptor_size, | ||
144 | u32 descriptor_version, | ||
145 | efi_memory_desc_t *virtual_map) | ||
146 | { | ||
147 | efi_status_t status; | ||
148 | |||
149 | efi_call_phys_prelog(); | ||
150 | status = efi_call_phys(efi_phys.set_virtual_address_map, | ||
151 | memory_map_size, descriptor_size, | ||
152 | descriptor_version, virtual_map); | ||
153 | efi_call_phys_epilog(); | ||
154 | return status; | ||
155 | } | ||
156 | |||
157 | static efi_status_t | ||
158 | phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) | ||
159 | { | ||
160 | efi_status_t status; | ||
161 | |||
162 | efi_call_phys_prelog(); | ||
163 | status = efi_call_phys(efi_phys.get_time, tm, tc); | ||
164 | efi_call_phys_epilog(); | ||
165 | return status; | ||
166 | } | ||
167 | |||
168 | inline int efi_set_rtc_mmss(unsigned long nowtime) | ||
169 | { | ||
170 | int real_seconds, real_minutes; | ||
171 | efi_status_t status; | ||
172 | efi_time_t eft; | ||
173 | efi_time_cap_t cap; | ||
174 | |||
175 | spin_lock(&efi_rt_lock); | ||
176 | status = efi.get_time(&eft, &cap); | ||
177 | spin_unlock(&efi_rt_lock); | ||
178 | if (status != EFI_SUCCESS) | ||
179 | panic("Ooops, efitime: can't read time!\n"); | ||
180 | real_seconds = nowtime % 60; | ||
181 | real_minutes = nowtime / 60; | ||
182 | |||
183 | if (((abs(real_minutes - eft.minute) + 15)/30) & 1) | ||
184 | real_minutes += 30; | ||
185 | real_minutes %= 60; | ||
186 | |||
187 | eft.minute = real_minutes; | ||
188 | eft.second = real_seconds; | ||
189 | |||
190 | if (status != EFI_SUCCESS) { | ||
191 | printk("Ooops: efitime: can't read time!\n"); | ||
192 | return -1; | ||
193 | } | ||
194 | return 0; | ||
195 | } | ||
196 | /* | ||
197 | * This is used during kernel init before runtime | ||
198 | * services have been remapped and also during suspend, therefore, | ||
199 | * we'll need to call both in physical and virtual modes. | ||
200 | */ | ||
201 | inline unsigned long efi_get_time(void) | ||
202 | { | ||
203 | efi_status_t status; | ||
204 | efi_time_t eft; | ||
205 | efi_time_cap_t cap; | ||
206 | |||
207 | if (efi.get_time) { | ||
208 | /* if we are in virtual mode use remapped function */ | ||
209 | status = efi.get_time(&eft, &cap); | ||
210 | } else { | ||
211 | /* we are in physical mode */ | ||
212 | status = phys_efi_get_time(&eft, &cap); | ||
213 | } | ||
214 | |||
215 | if (status != EFI_SUCCESS) | ||
216 | printk("Oops: efitime: can't read time status: 0x%lx\n",status); | ||
217 | |||
218 | return mktime(eft.year, eft.month, eft.day, eft.hour, | ||
219 | eft.minute, eft.second); | ||
220 | } | ||
221 | |||
222 | int is_available_memory(efi_memory_desc_t * md) | ||
223 | { | ||
224 | if (!(md->attribute & EFI_MEMORY_WB)) | ||
225 | return 0; | ||
226 | |||
227 | switch (md->type) { | ||
228 | case EFI_LOADER_CODE: | ||
229 | case EFI_LOADER_DATA: | ||
230 | case EFI_BOOT_SERVICES_CODE: | ||
231 | case EFI_BOOT_SERVICES_DATA: | ||
232 | case EFI_CONVENTIONAL_MEMORY: | ||
233 | return 1; | ||
234 | } | ||
235 | return 0; | ||
236 | } | ||
237 | |||
238 | /* | ||
239 | * We need to map the EFI memory map again after paging_init(). | ||
240 | */ | ||
241 | void __init efi_map_memmap(void) | ||
242 | { | ||
243 | memmap.map = NULL; | ||
244 | |||
245 | memmap.map = bt_ioremap((unsigned long) memmap.phys_map, | ||
246 | (memmap.nr_map * memmap.desc_size)); | ||
247 | if (memmap.map == NULL) | ||
248 | printk(KERN_ERR PFX "Could not remap the EFI memmap!\n"); | ||
249 | |||
250 | memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); | ||
251 | } | ||
252 | |||
253 | #if EFI_DEBUG | ||
254 | static void __init print_efi_memmap(void) | ||
255 | { | ||
256 | efi_memory_desc_t *md; | ||
257 | void *p; | ||
258 | int i; | ||
259 | |||
260 | for (p = memmap.map, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) { | ||
261 | md = p; | ||
262 | printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, " | ||
263 | "range=[0x%016llx-0x%016llx) (%lluMB)\n", | ||
264 | i, md->type, md->attribute, md->phys_addr, | ||
265 | md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), | ||
266 | (md->num_pages >> (20 - EFI_PAGE_SHIFT))); | ||
267 | } | ||
268 | } | ||
269 | #endif /* EFI_DEBUG */ | ||
270 | |||
271 | /* | ||
272 | * Walks the EFI memory map and calls CALLBACK once for each EFI | ||
273 | * memory descriptor that has memory that is available for kernel use. | ||
274 | */ | ||
275 | void efi_memmap_walk(efi_freemem_callback_t callback, void *arg) | ||
276 | { | ||
277 | int prev_valid = 0; | ||
278 | struct range { | ||
279 | unsigned long start; | ||
280 | unsigned long end; | ||
281 | } uninitialized_var(prev), curr; | ||
282 | efi_memory_desc_t *md; | ||
283 | unsigned long start, end; | ||
284 | void *p; | ||
285 | |||
286 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
287 | md = p; | ||
288 | |||
289 | if ((md->num_pages == 0) || (!is_available_memory(md))) | ||
290 | continue; | ||
291 | |||
292 | curr.start = md->phys_addr; | ||
293 | curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT); | ||
294 | |||
295 | if (!prev_valid) { | ||
296 | prev = curr; | ||
297 | prev_valid = 1; | ||
298 | } else { | ||
299 | if (curr.start < prev.start) | ||
300 | printk(KERN_INFO PFX "Unordered memory map\n"); | ||
301 | if (prev.end == curr.start) | ||
302 | prev.end = curr.end; | ||
303 | else { | ||
304 | start = | ||
305 | (unsigned long) (PAGE_ALIGN(prev.start)); | ||
306 | end = (unsigned long) (prev.end & PAGE_MASK); | ||
307 | if ((end > start) | ||
308 | && (*callback) (start, end, arg) < 0) | ||
309 | return; | ||
310 | prev = curr; | ||
311 | } | ||
312 | } | ||
313 | } | ||
314 | if (prev_valid) { | ||
315 | start = (unsigned long) PAGE_ALIGN(prev.start); | ||
316 | end = (unsigned long) (prev.end & PAGE_MASK); | ||
317 | if (end > start) | ||
318 | (*callback) (start, end, arg); | ||
319 | } | ||
320 | } | ||
321 | |||
322 | void __init efi_init(void) | ||
323 | { | ||
324 | efi_config_table_t *config_tables; | ||
325 | efi_runtime_services_t *runtime; | ||
326 | efi_char16_t *c16; | ||
327 | char vendor[100] = "unknown"; | ||
328 | unsigned long num_config_tables; | ||
329 | int i = 0; | ||
330 | |||
331 | memset(&efi, 0, sizeof(efi) ); | ||
332 | memset(&efi_phys, 0, sizeof(efi_phys)); | ||
333 | |||
334 | efi_phys.systab = | ||
335 | (efi_system_table_t *)boot_params.efi_info.efi_systab; | ||
336 | memmap.phys_map = (void *)boot_params.efi_info.efi_memmap; | ||
337 | memmap.nr_map = boot_params.efi_info.efi_memmap_size/ | ||
338 | boot_params.efi_info.efi_memdesc_size; | ||
339 | memmap.desc_version = boot_params.efi_info.efi_memdesc_version; | ||
340 | memmap.desc_size = boot_params.efi_info.efi_memdesc_size; | ||
341 | |||
342 | efi.systab = (efi_system_table_t *) | ||
343 | boot_ioremap((unsigned long) efi_phys.systab, | ||
344 | sizeof(efi_system_table_t)); | ||
345 | /* | ||
346 | * Verify the EFI Table | ||
347 | */ | ||
348 | if (efi.systab == NULL) | ||
349 | printk(KERN_ERR PFX "Woah! Couldn't map the EFI system table.\n"); | ||
350 | if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) | ||
351 | printk(KERN_ERR PFX "Woah! EFI system table signature incorrect\n"); | ||
352 | if ((efi.systab->hdr.revision >> 16) == 0) | ||
353 | printk(KERN_ERR PFX "Warning: EFI system table version " | ||
354 | "%d.%02d, expected 1.00 or greater\n", | ||
355 | efi.systab->hdr.revision >> 16, | ||
356 | efi.systab->hdr.revision & 0xffff); | ||
357 | |||
358 | /* | ||
359 | * Grab some details from the system table | ||
360 | */ | ||
361 | num_config_tables = efi.systab->nr_tables; | ||
362 | config_tables = (efi_config_table_t *)efi.systab->tables; | ||
363 | runtime = efi.systab->runtime; | ||
364 | |||
365 | /* | ||
366 | * Show what we know for posterity | ||
367 | */ | ||
368 | c16 = (efi_char16_t *) boot_ioremap(efi.systab->fw_vendor, 2); | ||
369 | if (c16) { | ||
370 | for (i = 0; i < (sizeof(vendor) - 1) && *c16; ++i) | ||
371 | vendor[i] = *c16++; | ||
372 | vendor[i] = '\0'; | ||
373 | } else | ||
374 | printk(KERN_ERR PFX "Could not map the firmware vendor!\n"); | ||
375 | |||
376 | printk(KERN_INFO PFX "EFI v%u.%.02u by %s \n", | ||
377 | efi.systab->hdr.revision >> 16, | ||
378 | efi.systab->hdr.revision & 0xffff, vendor); | ||
379 | |||
380 | /* | ||
381 | * Let's see what config tables the firmware passed to us. | ||
382 | */ | ||
383 | config_tables = (efi_config_table_t *) | ||
384 | boot_ioremap((unsigned long) config_tables, | ||
385 | num_config_tables * sizeof(efi_config_table_t)); | ||
386 | |||
387 | if (config_tables == NULL) | ||
388 | printk(KERN_ERR PFX "Could not map EFI Configuration Table!\n"); | ||
389 | |||
390 | efi.mps = EFI_INVALID_TABLE_ADDR; | ||
391 | efi.acpi = EFI_INVALID_TABLE_ADDR; | ||
392 | efi.acpi20 = EFI_INVALID_TABLE_ADDR; | ||
393 | efi.smbios = EFI_INVALID_TABLE_ADDR; | ||
394 | efi.sal_systab = EFI_INVALID_TABLE_ADDR; | ||
395 | efi.boot_info = EFI_INVALID_TABLE_ADDR; | ||
396 | efi.hcdp = EFI_INVALID_TABLE_ADDR; | ||
397 | efi.uga = EFI_INVALID_TABLE_ADDR; | ||
398 | |||
399 | for (i = 0; i < num_config_tables; i++) { | ||
400 | if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { | ||
401 | efi.mps = config_tables[i].table; | ||
402 | printk(KERN_INFO " MPS=0x%lx ", config_tables[i].table); | ||
403 | } else | ||
404 | if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) { | ||
405 | efi.acpi20 = config_tables[i].table; | ||
406 | printk(KERN_INFO " ACPI 2.0=0x%lx ", config_tables[i].table); | ||
407 | } else | ||
408 | if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { | ||
409 | efi.acpi = config_tables[i].table; | ||
410 | printk(KERN_INFO " ACPI=0x%lx ", config_tables[i].table); | ||
411 | } else | ||
412 | if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) { | ||
413 | efi.smbios = config_tables[i].table; | ||
414 | printk(KERN_INFO " SMBIOS=0x%lx ", config_tables[i].table); | ||
415 | } else | ||
416 | if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { | ||
417 | efi.hcdp = config_tables[i].table; | ||
418 | printk(KERN_INFO " HCDP=0x%lx ", config_tables[i].table); | ||
419 | } else | ||
420 | if (efi_guidcmp(config_tables[i].guid, UGA_IO_PROTOCOL_GUID) == 0) { | ||
421 | efi.uga = config_tables[i].table; | ||
422 | printk(KERN_INFO " UGA=0x%lx ", config_tables[i].table); | ||
423 | } | ||
424 | } | ||
425 | printk("\n"); | ||
426 | |||
427 | /* | ||
428 | * Check out the runtime services table. We need to map | ||
429 | * the runtime services table so that we can grab the physical | ||
430 | * address of several of the EFI runtime functions, needed to | ||
431 | * set the firmware into virtual mode. | ||
432 | */ | ||
433 | |||
434 | runtime = (efi_runtime_services_t *) boot_ioremap((unsigned long) | ||
435 | runtime, | ||
436 | sizeof(efi_runtime_services_t)); | ||
437 | if (runtime != NULL) { | ||
438 | /* | ||
439 | * We will only need *early* access to the following | ||
440 | * two EFI runtime services before set_virtual_address_map | ||
441 | * is invoked. | ||
442 | */ | ||
443 | efi_phys.get_time = (efi_get_time_t *) runtime->get_time; | ||
444 | efi_phys.set_virtual_address_map = | ||
445 | (efi_set_virtual_address_map_t *) | ||
446 | runtime->set_virtual_address_map; | ||
447 | } else | ||
448 | printk(KERN_ERR PFX "Could not map the runtime service table!\n"); | ||
449 | |||
450 | /* Map the EFI memory map for use until paging_init() */ | ||
451 | memmap.map = boot_ioremap(boot_params.efi_info.efi_memmap, | ||
452 | boot_params.efi_info.efi_memmap_size); | ||
453 | if (memmap.map == NULL) | ||
454 | printk(KERN_ERR PFX "Could not map the EFI memory map!\n"); | ||
455 | |||
456 | memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); | ||
457 | |||
458 | #if EFI_DEBUG | ||
459 | print_efi_memmap(); | ||
460 | #endif | ||
461 | } | ||
462 | |||
463 | static inline void __init check_range_for_systab(efi_memory_desc_t *md) | ||
464 | { | ||
465 | if (((unsigned long)md->phys_addr <= (unsigned long)efi_phys.systab) && | ||
466 | ((unsigned long)efi_phys.systab < md->phys_addr + | ||
467 | ((unsigned long)md->num_pages << EFI_PAGE_SHIFT))) { | ||
468 | unsigned long addr; | ||
469 | |||
470 | addr = md->virt_addr - md->phys_addr + | ||
471 | (unsigned long)efi_phys.systab; | ||
472 | efi.systab = (efi_system_table_t *)addr; | ||
473 | } | ||
474 | } | ||
475 | |||
476 | /* | ||
477 | * Wrap all the virtual calls in a way that forces the parameters on the stack. | ||
478 | */ | ||
479 | |||
480 | #define efi_call_virt(f, args...) \ | ||
481 | ((efi_##f##_t __attribute__((regparm(0)))*)efi.systab->runtime->f)(args) | ||
482 | |||
483 | static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) | ||
484 | { | ||
485 | return efi_call_virt(get_time, tm, tc); | ||
486 | } | ||
487 | |||
488 | static efi_status_t virt_efi_set_time (efi_time_t *tm) | ||
489 | { | ||
490 | return efi_call_virt(set_time, tm); | ||
491 | } | ||
492 | |||
493 | static efi_status_t virt_efi_get_wakeup_time (efi_bool_t *enabled, | ||
494 | efi_bool_t *pending, | ||
495 | efi_time_t *tm) | ||
496 | { | ||
497 | return efi_call_virt(get_wakeup_time, enabled, pending, tm); | ||
498 | } | ||
499 | |||
500 | static efi_status_t virt_efi_set_wakeup_time (efi_bool_t enabled, | ||
501 | efi_time_t *tm) | ||
502 | { | ||
503 | return efi_call_virt(set_wakeup_time, enabled, tm); | ||
504 | } | ||
505 | |||
506 | static efi_status_t virt_efi_get_variable (efi_char16_t *name, | ||
507 | efi_guid_t *vendor, u32 *attr, | ||
508 | unsigned long *data_size, void *data) | ||
509 | { | ||
510 | return efi_call_virt(get_variable, name, vendor, attr, data_size, data); | ||
511 | } | ||
512 | |||
513 | static efi_status_t virt_efi_get_next_variable (unsigned long *name_size, | ||
514 | efi_char16_t *name, | ||
515 | efi_guid_t *vendor) | ||
516 | { | ||
517 | return efi_call_virt(get_next_variable, name_size, name, vendor); | ||
518 | } | ||
519 | |||
520 | static efi_status_t virt_efi_set_variable (efi_char16_t *name, | ||
521 | efi_guid_t *vendor, | ||
522 | unsigned long attr, | ||
523 | unsigned long data_size, void *data) | ||
524 | { | ||
525 | return efi_call_virt(set_variable, name, vendor, attr, data_size, data); | ||
526 | } | ||
527 | |||
528 | static efi_status_t virt_efi_get_next_high_mono_count (u32 *count) | ||
529 | { | ||
530 | return efi_call_virt(get_next_high_mono_count, count); | ||
531 | } | ||
532 | |||
533 | static void virt_efi_reset_system (int reset_type, efi_status_t status, | ||
534 | unsigned long data_size, | ||
535 | efi_char16_t *data) | ||
536 | { | ||
537 | efi_call_virt(reset_system, reset_type, status, data_size, data); | ||
538 | } | ||
539 | |||
540 | /* | ||
541 | * This function will switch the EFI runtime services to virtual mode. | ||
542 | * Essentially, look through the EFI memmap and map every region that | ||
543 | * has the runtime attribute bit set in its memory descriptor and update | ||
544 | * that memory descriptor with the virtual address obtained from ioremap(). | ||
545 | * This enables the runtime services to be called without having to | ||
546 | * thunk back into physical mode for every invocation. | ||
547 | */ | ||
548 | |||
549 | void __init efi_enter_virtual_mode(void) | ||
550 | { | ||
551 | efi_memory_desc_t *md; | ||
552 | efi_status_t status; | ||
553 | void *p; | ||
554 | |||
555 | efi.systab = NULL; | ||
556 | |||
557 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
558 | md = p; | ||
559 | |||
560 | if (!(md->attribute & EFI_MEMORY_RUNTIME)) | ||
561 | continue; | ||
562 | |||
563 | md->virt_addr = (unsigned long)ioremap(md->phys_addr, | ||
564 | md->num_pages << EFI_PAGE_SHIFT); | ||
565 | if (!(unsigned long)md->virt_addr) { | ||
566 | printk(KERN_ERR PFX "ioremap of 0x%lX failed\n", | ||
567 | (unsigned long)md->phys_addr); | ||
568 | } | ||
569 | /* update the virtual address of the EFI system table */ | ||
570 | check_range_for_systab(md); | ||
571 | } | ||
572 | |||
573 | BUG_ON(!efi.systab); | ||
574 | |||
575 | status = phys_efi_set_virtual_address_map( | ||
576 | memmap.desc_size * memmap.nr_map, | ||
577 | memmap.desc_size, | ||
578 | memmap.desc_version, | ||
579 | memmap.phys_map); | ||
580 | |||
581 | if (status != EFI_SUCCESS) { | ||
582 | printk (KERN_ALERT "You are screwed! " | ||
583 | "Unable to switch EFI into virtual mode " | ||
584 | "(status=%lx)\n", status); | ||
585 | panic("EFI call to SetVirtualAddressMap() failed!"); | ||
586 | } | ||
587 | |||
588 | /* | ||
589 | * Now that EFI is in virtual mode, update the function | ||
590 | * pointers in the runtime service table to the new virtual addresses. | ||
591 | */ | ||
592 | |||
593 | efi.get_time = virt_efi_get_time; | ||
594 | efi.set_time = virt_efi_set_time; | ||
595 | efi.get_wakeup_time = virt_efi_get_wakeup_time; | ||
596 | efi.set_wakeup_time = virt_efi_set_wakeup_time; | ||
597 | efi.get_variable = virt_efi_get_variable; | ||
598 | efi.get_next_variable = virt_efi_get_next_variable; | ||
599 | efi.set_variable = virt_efi_set_variable; | ||
600 | efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; | ||
601 | efi.reset_system = virt_efi_reset_system; | ||
602 | } | ||
603 | |||
604 | void __init | ||
605 | efi_initialize_iomem_resources(struct resource *code_resource, | ||
606 | struct resource *data_resource, | ||
607 | struct resource *bss_resource) | ||
608 | { | ||
609 | struct resource *res; | ||
610 | efi_memory_desc_t *md; | ||
611 | void *p; | ||
612 | |||
613 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
614 | md = p; | ||
615 | |||
616 | if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) > | ||
617 | 0x100000000ULL) | ||
618 | continue; | ||
619 | res = kzalloc(sizeof(struct resource), GFP_ATOMIC); | ||
620 | switch (md->type) { | ||
621 | case EFI_RESERVED_TYPE: | ||
622 | res->name = "Reserved Memory"; | ||
623 | break; | ||
624 | case EFI_LOADER_CODE: | ||
625 | res->name = "Loader Code"; | ||
626 | break; | ||
627 | case EFI_LOADER_DATA: | ||
628 | res->name = "Loader Data"; | ||
629 | break; | ||
630 | case EFI_BOOT_SERVICES_DATA: | ||
631 | res->name = "BootServices Data"; | ||
632 | break; | ||
633 | case EFI_BOOT_SERVICES_CODE: | ||
634 | res->name = "BootServices Code"; | ||
635 | break; | ||
636 | case EFI_RUNTIME_SERVICES_CODE: | ||
637 | res->name = "Runtime Service Code"; | ||
638 | break; | ||
639 | case EFI_RUNTIME_SERVICES_DATA: | ||
640 | res->name = "Runtime Service Data"; | ||
641 | break; | ||
642 | case EFI_CONVENTIONAL_MEMORY: | ||
643 | res->name = "Conventional Memory"; | ||
644 | break; | ||
645 | case EFI_UNUSABLE_MEMORY: | ||
646 | res->name = "Unusable Memory"; | ||
647 | break; | ||
648 | case EFI_ACPI_RECLAIM_MEMORY: | ||
649 | res->name = "ACPI Reclaim"; | ||
650 | break; | ||
651 | case EFI_ACPI_MEMORY_NVS: | ||
652 | res->name = "ACPI NVS"; | ||
653 | break; | ||
654 | case EFI_MEMORY_MAPPED_IO: | ||
655 | res->name = "Memory Mapped IO"; | ||
656 | break; | ||
657 | case EFI_MEMORY_MAPPED_IO_PORT_SPACE: | ||
658 | res->name = "Memory Mapped IO Port Space"; | ||
659 | break; | ||
660 | default: | ||
661 | res->name = "Reserved"; | ||
662 | break; | ||
663 | } | ||
664 | res->start = md->phys_addr; | ||
665 | res->end = res->start + ((md->num_pages << EFI_PAGE_SHIFT) - 1); | ||
666 | res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; | ||
667 | if (request_resource(&iomem_resource, res) < 0) | ||
668 | printk(KERN_ERR PFX "Failed to allocate res %s : " | ||
669 | "0x%llx-0x%llx\n", res->name, | ||
670 | (unsigned long long)res->start, | ||
671 | (unsigned long long)res->end); | ||
672 | /* | ||
673 | * We don't know which region contains kernel data so we try | ||
674 | * it repeatedly and let the resource manager test it. | ||
675 | */ | ||
676 | if (md->type == EFI_CONVENTIONAL_MEMORY) { | ||
677 | request_resource(res, code_resource); | ||
678 | request_resource(res, data_resource); | ||
679 | request_resource(res, bss_resource); | ||
680 | #ifdef CONFIG_KEXEC | ||
681 | request_resource(res, &crashk_res); | ||
682 | #endif | ||
683 | } | ||
684 | } | ||
685 | } | ||
686 | |||
687 | /* | ||
688 | * Convenience functions to obtain memory types and attributes | ||
689 | */ | ||
690 | |||
691 | u32 efi_mem_type(unsigned long phys_addr) | ||
692 | { | ||
693 | efi_memory_desc_t *md; | ||
694 | void *p; | ||
695 | |||
696 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
697 | md = p; | ||
698 | if ((md->phys_addr <= phys_addr) && (phys_addr < | ||
699 | (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) | ||
700 | return md->type; | ||
701 | } | ||
702 | return 0; | ||
703 | } | ||
704 | |||
705 | u64 efi_mem_attributes(unsigned long phys_addr) | ||
706 | { | ||
707 | efi_memory_desc_t *md; | ||
708 | void *p; | ||
709 | |||
710 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
711 | md = p; | ||
712 | if ((md->phys_addr <= phys_addr) && (phys_addr < | ||
713 | (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) | ||
714 | return md->attribute; | ||
715 | } | ||
716 | return 0; | ||
717 | } | 111 | } |
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c new file mode 100644 index 000000000000..09d5c2330934 --- /dev/null +++ b/arch/x86/kernel/efi_64.c | |||
@@ -0,0 +1,134 @@ | |||
1 | /* | ||
2 | * x86_64 specific EFI support functions | ||
3 | * Based on Extensible Firmware Interface Specification version 1.0 | ||
4 | * | ||
5 | * Copyright (C) 2005-2008 Intel Co. | ||
6 | * Fenghua Yu <fenghua.yu@intel.com> | ||
7 | * Bibo Mao <bibo.mao@intel.com> | ||
8 | * Chandramouli Narayanan <mouli@linux.intel.com> | ||
9 | * Huang Ying <ying.huang@intel.com> | ||
10 | * | ||
11 | * Code to convert EFI to E820 map has been implemented in elilo bootloader | ||
12 | * based on a EFI patch by Edgar Hucek. Based on the E820 map, the page table | ||
13 | * is setup appropriately for EFI runtime code. | ||
14 | * - mouli 06/14/2007. | ||
15 | * | ||
16 | */ | ||
17 | |||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/init.h> | ||
20 | #include <linux/mm.h> | ||
21 | #include <linux/types.h> | ||
22 | #include <linux/spinlock.h> | ||
23 | #include <linux/bootmem.h> | ||
24 | #include <linux/ioport.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/efi.h> | ||
27 | #include <linux/uaccess.h> | ||
28 | #include <linux/io.h> | ||
29 | #include <linux/reboot.h> | ||
30 | |||
31 | #include <asm/setup.h> | ||
32 | #include <asm/page.h> | ||
33 | #include <asm/e820.h> | ||
34 | #include <asm/pgtable.h> | ||
35 | #include <asm/tlbflush.h> | ||
36 | #include <asm/proto.h> | ||
37 | #include <asm/efi.h> | ||
38 | |||
39 | static pgd_t save_pgd __initdata; | ||
40 | static unsigned long efi_flags __initdata; | ||
41 | |||
42 | static void __init early_mapping_set_exec(unsigned long start, | ||
43 | unsigned long end, | ||
44 | int executable) | ||
45 | { | ||
46 | pte_t *kpte; | ||
47 | unsigned int level; | ||
48 | |||
49 | while (start < end) { | ||
50 | kpte = lookup_address((unsigned long)__va(start), &level); | ||
51 | BUG_ON(!kpte); | ||
52 | if (executable) | ||
53 | set_pte(kpte, pte_mkexec(*kpte)); | ||
54 | else | ||
55 | set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \ | ||
56 | __supported_pte_mask)); | ||
57 | if (level == PG_LEVEL_4K) | ||
58 | start = (start + PAGE_SIZE) & PAGE_MASK; | ||
59 | else | ||
60 | start = (start + PMD_SIZE) & PMD_MASK; | ||
61 | } | ||
62 | } | ||
63 | |||
64 | static void __init early_runtime_code_mapping_set_exec(int executable) | ||
65 | { | ||
66 | efi_memory_desc_t *md; | ||
67 | void *p; | ||
68 | |||
69 | if (!(__supported_pte_mask & _PAGE_NX)) | ||
70 | return; | ||
71 | |||
72 | /* Make EFI runtime service code area executable */ | ||
73 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
74 | md = p; | ||
75 | if (md->type == EFI_RUNTIME_SERVICES_CODE) { | ||
76 | unsigned long end; | ||
77 | end = md->phys_addr + (md->num_pages << PAGE_SHIFT); | ||
78 | early_mapping_set_exec(md->phys_addr, end, executable); | ||
79 | } | ||
80 | } | ||
81 | } | ||
82 | |||
83 | void __init efi_call_phys_prelog(void) | ||
84 | { | ||
85 | unsigned long vaddress; | ||
86 | |||
87 | local_irq_save(efi_flags); | ||
88 | early_runtime_code_mapping_set_exec(1); | ||
89 | vaddress = (unsigned long)__va(0x0UL); | ||
90 | save_pgd = *pgd_offset_k(0x0UL); | ||
91 | set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress)); | ||
92 | __flush_tlb_all(); | ||
93 | } | ||
94 | |||
95 | void __init efi_call_phys_epilog(void) | ||
96 | { | ||
97 | /* | ||
98 | * After the lock is released, the original page table is restored. | ||
99 | */ | ||
100 | set_pgd(pgd_offset_k(0x0UL), save_pgd); | ||
101 | early_runtime_code_mapping_set_exec(0); | ||
102 | __flush_tlb_all(); | ||
103 | local_irq_restore(efi_flags); | ||
104 | } | ||
105 | |||
106 | void __init efi_reserve_bootmem(void) | ||
107 | { | ||
108 | reserve_bootmem_generic((unsigned long)memmap.phys_map, | ||
109 | memmap.nr_map * memmap.desc_size); | ||
110 | } | ||
111 | |||
112 | void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size) | ||
113 | { | ||
114 | static unsigned pages_mapped; | ||
115 | unsigned i, pages; | ||
116 | |||
117 | /* phys_addr and size must be page aligned */ | ||
118 | if ((phys_addr & ~PAGE_MASK) || (size & ~PAGE_MASK)) | ||
119 | return NULL; | ||
120 | |||
121 | pages = size >> PAGE_SHIFT; | ||
122 | if (pages_mapped + pages > MAX_EFI_IO_PAGES) | ||
123 | return NULL; | ||
124 | |||
125 | for (i = 0; i < pages; i++) { | ||
126 | __set_fixmap(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped, | ||
127 | phys_addr, PAGE_KERNEL); | ||
128 | phys_addr += PAGE_SIZE; | ||
129 | pages_mapped++; | ||
130 | } | ||
131 | |||
132 | return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \ | ||
133 | (pages_mapped - pages)); | ||
134 | } | ||
diff --git a/arch/x86/kernel/efi_stub_64.S b/arch/x86/kernel/efi_stub_64.S new file mode 100644 index 000000000000..99b47d48c9f4 --- /dev/null +++ b/arch/x86/kernel/efi_stub_64.S | |||
@@ -0,0 +1,109 @@ | |||
1 | /* | ||
2 | * Function calling ABI conversion from Linux to EFI for x86_64 | ||
3 | * | ||
4 | * Copyright (C) 2007 Intel Corp | ||
5 | * Bibo Mao <bibo.mao@intel.com> | ||
6 | * Huang Ying <ying.huang@intel.com> | ||
7 | */ | ||
8 | |||
9 | #include <linux/linkage.h> | ||
10 | |||
11 | #define SAVE_XMM \ | ||
12 | mov %rsp, %rax; \ | ||
13 | subq $0x70, %rsp; \ | ||
14 | and $~0xf, %rsp; \ | ||
15 | mov %rax, (%rsp); \ | ||
16 | mov %cr0, %rax; \ | ||
17 | clts; \ | ||
18 | mov %rax, 0x8(%rsp); \ | ||
19 | movaps %xmm0, 0x60(%rsp); \ | ||
20 | movaps %xmm1, 0x50(%rsp); \ | ||
21 | movaps %xmm2, 0x40(%rsp); \ | ||
22 | movaps %xmm3, 0x30(%rsp); \ | ||
23 | movaps %xmm4, 0x20(%rsp); \ | ||
24 | movaps %xmm5, 0x10(%rsp) | ||
25 | |||
26 | #define RESTORE_XMM \ | ||
27 | movaps 0x60(%rsp), %xmm0; \ | ||
28 | movaps 0x50(%rsp), %xmm1; \ | ||
29 | movaps 0x40(%rsp), %xmm2; \ | ||
30 | movaps 0x30(%rsp), %xmm3; \ | ||
31 | movaps 0x20(%rsp), %xmm4; \ | ||
32 | movaps 0x10(%rsp), %xmm5; \ | ||
33 | mov 0x8(%rsp), %rsi; \ | ||
34 | mov %rsi, %cr0; \ | ||
35 | mov (%rsp), %rsp | ||
36 | |||
37 | ENTRY(efi_call0) | ||
38 | SAVE_XMM | ||
39 | subq $32, %rsp | ||
40 | call *%rdi | ||
41 | addq $32, %rsp | ||
42 | RESTORE_XMM | ||
43 | ret | ||
44 | |||
45 | ENTRY(efi_call1) | ||
46 | SAVE_XMM | ||
47 | subq $32, %rsp | ||
48 | mov %rsi, %rcx | ||
49 | call *%rdi | ||
50 | addq $32, %rsp | ||
51 | RESTORE_XMM | ||
52 | ret | ||
53 | |||
54 | ENTRY(efi_call2) | ||
55 | SAVE_XMM | ||
56 | subq $32, %rsp | ||
57 | mov %rsi, %rcx | ||
58 | call *%rdi | ||
59 | addq $32, %rsp | ||
60 | RESTORE_XMM | ||
61 | ret | ||
62 | |||
63 | ENTRY(efi_call3) | ||
64 | SAVE_XMM | ||
65 | subq $32, %rsp | ||
66 | mov %rcx, %r8 | ||
67 | mov %rsi, %rcx | ||
68 | call *%rdi | ||
69 | addq $32, %rsp | ||
70 | RESTORE_XMM | ||
71 | ret | ||
72 | |||
73 | ENTRY(efi_call4) | ||
74 | SAVE_XMM | ||
75 | subq $32, %rsp | ||
76 | mov %r8, %r9 | ||
77 | mov %rcx, %r8 | ||
78 | mov %rsi, %rcx | ||
79 | call *%rdi | ||
80 | addq $32, %rsp | ||
81 | RESTORE_XMM | ||
82 | ret | ||
83 | |||
84 | ENTRY(efi_call5) | ||
85 | SAVE_XMM | ||
86 | subq $48, %rsp | ||
87 | mov %r9, 32(%rsp) | ||
88 | mov %r8, %r9 | ||
89 | mov %rcx, %r8 | ||
90 | mov %rsi, %rcx | ||
91 | call *%rdi | ||
92 | addq $48, %rsp | ||
93 | RESTORE_XMM | ||
94 | ret | ||
95 | |||
96 | ENTRY(efi_call6) | ||
97 | SAVE_XMM | ||
98 | mov (%rsp), %rax | ||
99 | mov 8(%rax), %rax | ||
100 | subq $48, %rsp | ||
101 | mov %r9, 32(%rsp) | ||
102 | mov %rax, 40(%rsp) | ||
103 | mov %r8, %r9 | ||
104 | mov %rcx, %r8 | ||
105 | mov %rsi, %rcx | ||
106 | call *%rdi | ||
107 | addq $48, %rsp | ||
108 | RESTORE_XMM | ||
109 | ret | ||
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index dc7f938e5015..be5c31d04884 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -58,7 +58,7 @@ | |||
58 | * for paravirtualization. The following will never clobber any registers: | 58 | * for paravirtualization. The following will never clobber any registers: |
59 | * INTERRUPT_RETURN (aka. "iret") | 59 | * INTERRUPT_RETURN (aka. "iret") |
60 | * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") | 60 | * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") |
61 | * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). | 61 | * ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit"). |
62 | * | 62 | * |
63 | * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must | 63 | * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must |
64 | * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). | 64 | * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). |
@@ -283,12 +283,12 @@ END(resume_kernel) | |||
283 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ | 283 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ |
284 | 284 | ||
285 | # sysenter call handler stub | 285 | # sysenter call handler stub |
286 | ENTRY(sysenter_entry) | 286 | ENTRY(ia32_sysenter_target) |
287 | CFI_STARTPROC simple | 287 | CFI_STARTPROC simple |
288 | CFI_SIGNAL_FRAME | 288 | CFI_SIGNAL_FRAME |
289 | CFI_DEF_CFA esp, 0 | 289 | CFI_DEF_CFA esp, 0 |
290 | CFI_REGISTER esp, ebp | 290 | CFI_REGISTER esp, ebp |
291 | movl TSS_sysenter_esp0(%esp),%esp | 291 | movl TSS_sysenter_sp0(%esp),%esp |
292 | sysenter_past_esp: | 292 | sysenter_past_esp: |
293 | /* | 293 | /* |
294 | * No need to follow this irqs on/off section: the syscall | 294 | * No need to follow this irqs on/off section: the syscall |
@@ -351,7 +351,7 @@ sysenter_past_esp: | |||
351 | xorl %ebp,%ebp | 351 | xorl %ebp,%ebp |
352 | TRACE_IRQS_ON | 352 | TRACE_IRQS_ON |
353 | 1: mov PT_FS(%esp), %fs | 353 | 1: mov PT_FS(%esp), %fs |
354 | ENABLE_INTERRUPTS_SYSEXIT | 354 | ENABLE_INTERRUPTS_SYSCALL_RET |
355 | CFI_ENDPROC | 355 | CFI_ENDPROC |
356 | .pushsection .fixup,"ax" | 356 | .pushsection .fixup,"ax" |
357 | 2: movl $0,PT_FS(%esp) | 357 | 2: movl $0,PT_FS(%esp) |
@@ -360,7 +360,7 @@ sysenter_past_esp: | |||
360 | .align 4 | 360 | .align 4 |
361 | .long 1b,2b | 361 | .long 1b,2b |
362 | .popsection | 362 | .popsection |
363 | ENDPROC(sysenter_entry) | 363 | ENDPROC(ia32_sysenter_target) |
364 | 364 | ||
365 | # system call handler stub | 365 | # system call handler stub |
366 | ENTRY(system_call) | 366 | ENTRY(system_call) |
@@ -583,7 +583,7 @@ END(syscall_badsys) | |||
583 | * Build the entry stubs and pointer table with | 583 | * Build the entry stubs and pointer table with |
584 | * some assembler magic. | 584 | * some assembler magic. |
585 | */ | 585 | */ |
586 | .data | 586 | .section .rodata,"a" |
587 | ENTRY(interrupt) | 587 | ENTRY(interrupt) |
588 | .text | 588 | .text |
589 | 589 | ||
@@ -743,7 +743,7 @@ END(device_not_available) | |||
743 | * that sets up the real kernel stack. Check here, since we can't | 743 | * that sets up the real kernel stack. Check here, since we can't |
744 | * allow the wrong stack to be used. | 744 | * allow the wrong stack to be used. |
745 | * | 745 | * |
746 | * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have | 746 | * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have |
747 | * already pushed 3 words if it hits on the sysenter instruction: | 747 | * already pushed 3 words if it hits on the sysenter instruction: |
748 | * eflags, cs and eip. | 748 | * eflags, cs and eip. |
749 | * | 749 | * |
@@ -755,7 +755,7 @@ END(device_not_available) | |||
755 | cmpw $__KERNEL_CS,4(%esp); \ | 755 | cmpw $__KERNEL_CS,4(%esp); \ |
756 | jne ok; \ | 756 | jne ok; \ |
757 | label: \ | 757 | label: \ |
758 | movl TSS_sysenter_esp0+offset(%esp),%esp; \ | 758 | movl TSS_sysenter_sp0+offset(%esp),%esp; \ |
759 | CFI_DEF_CFA esp, 0; \ | 759 | CFI_DEF_CFA esp, 0; \ |
760 | CFI_UNDEFINED eip; \ | 760 | CFI_UNDEFINED eip; \ |
761 | pushfl; \ | 761 | pushfl; \ |
@@ -768,7 +768,7 @@ label: \ | |||
768 | 768 | ||
769 | KPROBE_ENTRY(debug) | 769 | KPROBE_ENTRY(debug) |
770 | RING0_INT_FRAME | 770 | RING0_INT_FRAME |
771 | cmpl $sysenter_entry,(%esp) | 771 | cmpl $ia32_sysenter_target,(%esp) |
772 | jne debug_stack_correct | 772 | jne debug_stack_correct |
773 | FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) | 773 | FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) |
774 | debug_stack_correct: | 774 | debug_stack_correct: |
@@ -799,7 +799,7 @@ KPROBE_ENTRY(nmi) | |||
799 | popl %eax | 799 | popl %eax |
800 | CFI_ADJUST_CFA_OFFSET -4 | 800 | CFI_ADJUST_CFA_OFFSET -4 |
801 | je nmi_espfix_stack | 801 | je nmi_espfix_stack |
802 | cmpl $sysenter_entry,(%esp) | 802 | cmpl $ia32_sysenter_target,(%esp) |
803 | je nmi_stack_fixup | 803 | je nmi_stack_fixup |
804 | pushl %eax | 804 | pushl %eax |
805 | CFI_ADJUST_CFA_OFFSET 4 | 805 | CFI_ADJUST_CFA_OFFSET 4 |
@@ -812,7 +812,7 @@ KPROBE_ENTRY(nmi) | |||
812 | popl %eax | 812 | popl %eax |
813 | CFI_ADJUST_CFA_OFFSET -4 | 813 | CFI_ADJUST_CFA_OFFSET -4 |
814 | jae nmi_stack_correct | 814 | jae nmi_stack_correct |
815 | cmpl $sysenter_entry,12(%esp) | 815 | cmpl $ia32_sysenter_target,12(%esp) |
816 | je nmi_debug_stack_check | 816 | je nmi_debug_stack_check |
817 | nmi_stack_correct: | 817 | nmi_stack_correct: |
818 | /* We have a RING0_INT_FRAME here */ | 818 | /* We have a RING0_INT_FRAME here */ |
@@ -882,10 +882,10 @@ ENTRY(native_iret) | |||
882 | .previous | 882 | .previous |
883 | END(native_iret) | 883 | END(native_iret) |
884 | 884 | ||
885 | ENTRY(native_irq_enable_sysexit) | 885 | ENTRY(native_irq_enable_syscall_ret) |
886 | sti | 886 | sti |
887 | sysexit | 887 | sysexit |
888 | END(native_irq_enable_sysexit) | 888 | END(native_irq_enable_syscall_ret) |
889 | #endif | 889 | #endif |
890 | 890 | ||
891 | KPROBE_ENTRY(int3) | 891 | KPROBE_ENTRY(int3) |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 3a058bb16409..c7341e81941c 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <asm/hw_irq.h> | 50 | #include <asm/hw_irq.h> |
51 | #include <asm/page.h> | 51 | #include <asm/page.h> |
52 | #include <asm/irqflags.h> | 52 | #include <asm/irqflags.h> |
53 | #include <asm/paravirt.h> | ||
53 | 54 | ||
54 | .code64 | 55 | .code64 |
55 | 56 | ||
@@ -57,6 +58,13 @@ | |||
57 | #define retint_kernel retint_restore_args | 58 | #define retint_kernel retint_restore_args |
58 | #endif | 59 | #endif |
59 | 60 | ||
61 | #ifdef CONFIG_PARAVIRT | ||
62 | ENTRY(native_irq_enable_syscall_ret) | ||
63 | movq %gs:pda_oldrsp,%rsp | ||
64 | swapgs | ||
65 | sysretq | ||
66 | #endif /* CONFIG_PARAVIRT */ | ||
67 | |||
60 | 68 | ||
61 | .macro TRACE_IRQS_IRETQ offset=ARGOFFSET | 69 | .macro TRACE_IRQS_IRETQ offset=ARGOFFSET |
62 | #ifdef CONFIG_TRACE_IRQFLAGS | 70 | #ifdef CONFIG_TRACE_IRQFLAGS |
@@ -216,14 +224,21 @@ ENTRY(system_call) | |||
216 | CFI_DEF_CFA rsp,PDA_STACKOFFSET | 224 | CFI_DEF_CFA rsp,PDA_STACKOFFSET |
217 | CFI_REGISTER rip,rcx | 225 | CFI_REGISTER rip,rcx |
218 | /*CFI_REGISTER rflags,r11*/ | 226 | /*CFI_REGISTER rflags,r11*/ |
219 | swapgs | 227 | SWAPGS_UNSAFE_STACK |
228 | /* | ||
229 | * A hypervisor implementation might want to use a label | ||
230 | * after the swapgs, so that it can do the swapgs | ||
231 | * for the guest and jump here on syscall. | ||
232 | */ | ||
233 | ENTRY(system_call_after_swapgs) | ||
234 | |||
220 | movq %rsp,%gs:pda_oldrsp | 235 | movq %rsp,%gs:pda_oldrsp |
221 | movq %gs:pda_kernelstack,%rsp | 236 | movq %gs:pda_kernelstack,%rsp |
222 | /* | 237 | /* |
223 | * No need to follow this irqs off/on section - it's straight | 238 | * No need to follow this irqs off/on section - it's straight |
224 | * and short: | 239 | * and short: |
225 | */ | 240 | */ |
226 | sti | 241 | ENABLE_INTERRUPTS(CLBR_NONE) |
227 | SAVE_ARGS 8,1 | 242 | SAVE_ARGS 8,1 |
228 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 243 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
229 | movq %rcx,RIP-ARGOFFSET(%rsp) | 244 | movq %rcx,RIP-ARGOFFSET(%rsp) |
@@ -246,7 +261,7 @@ ret_from_sys_call: | |||
246 | sysret_check: | 261 | sysret_check: |
247 | LOCKDEP_SYS_EXIT | 262 | LOCKDEP_SYS_EXIT |
248 | GET_THREAD_INFO(%rcx) | 263 | GET_THREAD_INFO(%rcx) |
249 | cli | 264 | DISABLE_INTERRUPTS(CLBR_NONE) |
250 | TRACE_IRQS_OFF | 265 | TRACE_IRQS_OFF |
251 | movl threadinfo_flags(%rcx),%edx | 266 | movl threadinfo_flags(%rcx),%edx |
252 | andl %edi,%edx | 267 | andl %edi,%edx |
@@ -260,9 +275,7 @@ sysret_check: | |||
260 | CFI_REGISTER rip,rcx | 275 | CFI_REGISTER rip,rcx |
261 | RESTORE_ARGS 0,-ARG_SKIP,1 | 276 | RESTORE_ARGS 0,-ARG_SKIP,1 |
262 | /*CFI_REGISTER rflags,r11*/ | 277 | /*CFI_REGISTER rflags,r11*/ |
263 | movq %gs:pda_oldrsp,%rsp | 278 | ENABLE_INTERRUPTS_SYSCALL_RET |
264 | swapgs | ||
265 | sysretq | ||
266 | 279 | ||
267 | CFI_RESTORE_STATE | 280 | CFI_RESTORE_STATE |
268 | /* Handle reschedules */ | 281 | /* Handle reschedules */ |
@@ -271,7 +284,7 @@ sysret_careful: | |||
271 | bt $TIF_NEED_RESCHED,%edx | 284 | bt $TIF_NEED_RESCHED,%edx |
272 | jnc sysret_signal | 285 | jnc sysret_signal |
273 | TRACE_IRQS_ON | 286 | TRACE_IRQS_ON |
274 | sti | 287 | ENABLE_INTERRUPTS(CLBR_NONE) |
275 | pushq %rdi | 288 | pushq %rdi |
276 | CFI_ADJUST_CFA_OFFSET 8 | 289 | CFI_ADJUST_CFA_OFFSET 8 |
277 | call schedule | 290 | call schedule |
@@ -282,8 +295,8 @@ sysret_careful: | |||
282 | /* Handle a signal */ | 295 | /* Handle a signal */ |
283 | sysret_signal: | 296 | sysret_signal: |
284 | TRACE_IRQS_ON | 297 | TRACE_IRQS_ON |
285 | sti | 298 | ENABLE_INTERRUPTS(CLBR_NONE) |
286 | testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx | 299 | testl $_TIF_DO_NOTIFY_MASK,%edx |
287 | jz 1f | 300 | jz 1f |
288 | 301 | ||
289 | /* Really a signal */ | 302 | /* Really a signal */ |
@@ -295,7 +308,7 @@ sysret_signal: | |||
295 | 1: movl $_TIF_NEED_RESCHED,%edi | 308 | 1: movl $_TIF_NEED_RESCHED,%edi |
296 | /* Use IRET because user could have changed frame. This | 309 | /* Use IRET because user could have changed frame. This |
297 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ | 310 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ |
298 | cli | 311 | DISABLE_INTERRUPTS(CLBR_NONE) |
299 | TRACE_IRQS_OFF | 312 | TRACE_IRQS_OFF |
300 | jmp int_with_check | 313 | jmp int_with_check |
301 | 314 | ||
@@ -327,7 +340,7 @@ tracesys: | |||
327 | */ | 340 | */ |
328 | .globl int_ret_from_sys_call | 341 | .globl int_ret_from_sys_call |
329 | int_ret_from_sys_call: | 342 | int_ret_from_sys_call: |
330 | cli | 343 | DISABLE_INTERRUPTS(CLBR_NONE) |
331 | TRACE_IRQS_OFF | 344 | TRACE_IRQS_OFF |
332 | testl $3,CS-ARGOFFSET(%rsp) | 345 | testl $3,CS-ARGOFFSET(%rsp) |
333 | je retint_restore_args | 346 | je retint_restore_args |
@@ -349,20 +362,20 @@ int_careful: | |||
349 | bt $TIF_NEED_RESCHED,%edx | 362 | bt $TIF_NEED_RESCHED,%edx |
350 | jnc int_very_careful | 363 | jnc int_very_careful |
351 | TRACE_IRQS_ON | 364 | TRACE_IRQS_ON |
352 | sti | 365 | ENABLE_INTERRUPTS(CLBR_NONE) |
353 | pushq %rdi | 366 | pushq %rdi |
354 | CFI_ADJUST_CFA_OFFSET 8 | 367 | CFI_ADJUST_CFA_OFFSET 8 |
355 | call schedule | 368 | call schedule |
356 | popq %rdi | 369 | popq %rdi |
357 | CFI_ADJUST_CFA_OFFSET -8 | 370 | CFI_ADJUST_CFA_OFFSET -8 |
358 | cli | 371 | DISABLE_INTERRUPTS(CLBR_NONE) |
359 | TRACE_IRQS_OFF | 372 | TRACE_IRQS_OFF |
360 | jmp int_with_check | 373 | jmp int_with_check |
361 | 374 | ||
362 | /* handle signals and tracing -- both require a full stack frame */ | 375 | /* handle signals and tracing -- both require a full stack frame */ |
363 | int_very_careful: | 376 | int_very_careful: |
364 | TRACE_IRQS_ON | 377 | TRACE_IRQS_ON |
365 | sti | 378 | ENABLE_INTERRUPTS(CLBR_NONE) |
366 | SAVE_REST | 379 | SAVE_REST |
367 | /* Check for syscall exit trace */ | 380 | /* Check for syscall exit trace */ |
368 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx | 381 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx |
@@ -377,7 +390,7 @@ int_very_careful: | |||
377 | jmp int_restore_rest | 390 | jmp int_restore_rest |
378 | 391 | ||
379 | int_signal: | 392 | int_signal: |
380 | testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx | 393 | testl $_TIF_DO_NOTIFY_MASK,%edx |
381 | jz 1f | 394 | jz 1f |
382 | movq %rsp,%rdi # &ptregs -> arg1 | 395 | movq %rsp,%rdi # &ptregs -> arg1 |
383 | xorl %esi,%esi # oldset -> arg2 | 396 | xorl %esi,%esi # oldset -> arg2 |
@@ -385,7 +398,7 @@ int_signal: | |||
385 | 1: movl $_TIF_NEED_RESCHED,%edi | 398 | 1: movl $_TIF_NEED_RESCHED,%edi |
386 | int_restore_rest: | 399 | int_restore_rest: |
387 | RESTORE_REST | 400 | RESTORE_REST |
388 | cli | 401 | DISABLE_INTERRUPTS(CLBR_NONE) |
389 | TRACE_IRQS_OFF | 402 | TRACE_IRQS_OFF |
390 | jmp int_with_check | 403 | jmp int_with_check |
391 | CFI_ENDPROC | 404 | CFI_ENDPROC |
@@ -506,7 +519,7 @@ END(stub_rt_sigreturn) | |||
506 | CFI_DEF_CFA_REGISTER rbp | 519 | CFI_DEF_CFA_REGISTER rbp |
507 | testl $3,CS(%rdi) | 520 | testl $3,CS(%rdi) |
508 | je 1f | 521 | je 1f |
509 | swapgs | 522 | SWAPGS |
510 | /* irqcount is used to check if a CPU is already on an interrupt | 523 | /* irqcount is used to check if a CPU is already on an interrupt |
511 | stack or not. While this is essentially redundant with preempt_count | 524 | stack or not. While this is essentially redundant with preempt_count |
512 | it is a little cheaper to use a separate counter in the PDA | 525 | it is a little cheaper to use a separate counter in the PDA |
@@ -527,7 +540,7 @@ ENTRY(common_interrupt) | |||
527 | interrupt do_IRQ | 540 | interrupt do_IRQ |
528 | /* 0(%rsp): oldrsp-ARGOFFSET */ | 541 | /* 0(%rsp): oldrsp-ARGOFFSET */ |
529 | ret_from_intr: | 542 | ret_from_intr: |
530 | cli | 543 | DISABLE_INTERRUPTS(CLBR_NONE) |
531 | TRACE_IRQS_OFF | 544 | TRACE_IRQS_OFF |
532 | decl %gs:pda_irqcount | 545 | decl %gs:pda_irqcount |
533 | leaveq | 546 | leaveq |
@@ -556,64 +569,76 @@ retint_swapgs: /* return to user-space */ | |||
556 | /* | 569 | /* |
557 | * The iretq could re-enable interrupts: | 570 | * The iretq could re-enable interrupts: |
558 | */ | 571 | */ |
559 | cli | 572 | DISABLE_INTERRUPTS(CLBR_ANY) |
560 | TRACE_IRQS_IRETQ | 573 | TRACE_IRQS_IRETQ |
561 | swapgs | 574 | SWAPGS |
562 | jmp restore_args | 575 | jmp restore_args |
563 | 576 | ||
564 | retint_restore_args: /* return to kernel space */ | 577 | retint_restore_args: /* return to kernel space */ |
565 | cli | 578 | DISABLE_INTERRUPTS(CLBR_ANY) |
566 | /* | 579 | /* |
567 | * The iretq could re-enable interrupts: | 580 | * The iretq could re-enable interrupts: |
568 | */ | 581 | */ |
569 | TRACE_IRQS_IRETQ | 582 | TRACE_IRQS_IRETQ |
570 | restore_args: | 583 | restore_args: |
571 | RESTORE_ARGS 0,8,0 | 584 | RESTORE_ARGS 0,8,0 |
572 | iret_label: | 585 | #ifdef CONFIG_PARAVIRT |
586 | INTERRUPT_RETURN | ||
587 | #endif | ||
588 | ENTRY(native_iret) | ||
573 | iretq | 589 | iretq |
574 | 590 | ||
575 | .section __ex_table,"a" | 591 | .section __ex_table,"a" |
576 | .quad iret_label,bad_iret | 592 | .quad native_iret, bad_iret |
577 | .previous | 593 | .previous |
578 | .section .fixup,"ax" | 594 | .section .fixup,"ax" |
579 | /* force a signal here? this matches i386 behaviour */ | ||
580 | /* running with kernel gs */ | ||
581 | bad_iret: | 595 | bad_iret: |
582 | movq $11,%rdi /* SIGSEGV */ | 596 | /* |
583 | TRACE_IRQS_ON | 597 | * The iret traps when the %cs or %ss being restored is bogus. |
584 | sti | 598 | * We've lost the original trap vector and error code. |
585 | jmp do_exit | 599 | * #GPF is the most likely one to get for an invalid selector. |
586 | .previous | 600 | * So pretend we completed the iret and took the #GPF in user mode. |
587 | 601 | * | |
602 | * We are now running with the kernel GS after exception recovery. | ||
603 | * But error_entry expects us to have user GS to match the user %cs, | ||
604 | * so swap back. | ||
605 | */ | ||
606 | pushq $0 | ||
607 | |||
608 | SWAPGS | ||
609 | jmp general_protection | ||
610 | |||
611 | .previous | ||
612 | |||
588 | /* edi: workmask, edx: work */ | 613 | /* edi: workmask, edx: work */ |
589 | retint_careful: | 614 | retint_careful: |
590 | CFI_RESTORE_STATE | 615 | CFI_RESTORE_STATE |
591 | bt $TIF_NEED_RESCHED,%edx | 616 | bt $TIF_NEED_RESCHED,%edx |
592 | jnc retint_signal | 617 | jnc retint_signal |
593 | TRACE_IRQS_ON | 618 | TRACE_IRQS_ON |
594 | sti | 619 | ENABLE_INTERRUPTS(CLBR_NONE) |
595 | pushq %rdi | 620 | pushq %rdi |
596 | CFI_ADJUST_CFA_OFFSET 8 | 621 | CFI_ADJUST_CFA_OFFSET 8 |
597 | call schedule | 622 | call schedule |
598 | popq %rdi | 623 | popq %rdi |
599 | CFI_ADJUST_CFA_OFFSET -8 | 624 | CFI_ADJUST_CFA_OFFSET -8 |
600 | GET_THREAD_INFO(%rcx) | 625 | GET_THREAD_INFO(%rcx) |
601 | cli | 626 | DISABLE_INTERRUPTS(CLBR_NONE) |
602 | TRACE_IRQS_OFF | 627 | TRACE_IRQS_OFF |
603 | jmp retint_check | 628 | jmp retint_check |
604 | 629 | ||
605 | retint_signal: | 630 | retint_signal: |
606 | testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx | 631 | testl $_TIF_DO_NOTIFY_MASK,%edx |
607 | jz retint_swapgs | 632 | jz retint_swapgs |
608 | TRACE_IRQS_ON | 633 | TRACE_IRQS_ON |
609 | sti | 634 | ENABLE_INTERRUPTS(CLBR_NONE) |
610 | SAVE_REST | 635 | SAVE_REST |
611 | movq $-1,ORIG_RAX(%rsp) | 636 | movq $-1,ORIG_RAX(%rsp) |
612 | xorl %esi,%esi # oldset | 637 | xorl %esi,%esi # oldset |
613 | movq %rsp,%rdi # &pt_regs | 638 | movq %rsp,%rdi # &pt_regs |
614 | call do_notify_resume | 639 | call do_notify_resume |
615 | RESTORE_REST | 640 | RESTORE_REST |
616 | cli | 641 | DISABLE_INTERRUPTS(CLBR_NONE) |
617 | TRACE_IRQS_OFF | 642 | TRACE_IRQS_OFF |
618 | movl $_TIF_NEED_RESCHED,%edi | 643 | movl $_TIF_NEED_RESCHED,%edi |
619 | GET_THREAD_INFO(%rcx) | 644 | GET_THREAD_INFO(%rcx) |
@@ -731,7 +756,7 @@ END(spurious_interrupt) | |||
731 | rdmsr | 756 | rdmsr |
732 | testl %edx,%edx | 757 | testl %edx,%edx |
733 | js 1f | 758 | js 1f |
734 | swapgs | 759 | SWAPGS |
735 | xorl %ebx,%ebx | 760 | xorl %ebx,%ebx |
736 | 1: | 761 | 1: |
737 | .if \ist | 762 | .if \ist |
@@ -747,7 +772,7 @@ END(spurious_interrupt) | |||
747 | .if \ist | 772 | .if \ist |
748 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | 773 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) |
749 | .endif | 774 | .endif |
750 | cli | 775 | DISABLE_INTERRUPTS(CLBR_NONE) |
751 | .if \irqtrace | 776 | .if \irqtrace |
752 | TRACE_IRQS_OFF | 777 | TRACE_IRQS_OFF |
753 | .endif | 778 | .endif |
@@ -776,10 +801,10 @@ paranoid_swapgs\trace: | |||
776 | .if \trace | 801 | .if \trace |
777 | TRACE_IRQS_IRETQ 0 | 802 | TRACE_IRQS_IRETQ 0 |
778 | .endif | 803 | .endif |
779 | swapgs | 804 | SWAPGS_UNSAFE_STACK |
780 | paranoid_restore\trace: | 805 | paranoid_restore\trace: |
781 | RESTORE_ALL 8 | 806 | RESTORE_ALL 8 |
782 | iretq | 807 | INTERRUPT_RETURN |
783 | paranoid_userspace\trace: | 808 | paranoid_userspace\trace: |
784 | GET_THREAD_INFO(%rcx) | 809 | GET_THREAD_INFO(%rcx) |
785 | movl threadinfo_flags(%rcx),%ebx | 810 | movl threadinfo_flags(%rcx),%ebx |
@@ -794,11 +819,11 @@ paranoid_userspace\trace: | |||
794 | .if \trace | 819 | .if \trace |
795 | TRACE_IRQS_ON | 820 | TRACE_IRQS_ON |
796 | .endif | 821 | .endif |
797 | sti | 822 | ENABLE_INTERRUPTS(CLBR_NONE) |
798 | xorl %esi,%esi /* arg2: oldset */ | 823 | xorl %esi,%esi /* arg2: oldset */ |
799 | movq %rsp,%rdi /* arg1: &pt_regs */ | 824 | movq %rsp,%rdi /* arg1: &pt_regs */ |
800 | call do_notify_resume | 825 | call do_notify_resume |
801 | cli | 826 | DISABLE_INTERRUPTS(CLBR_NONE) |
802 | .if \trace | 827 | .if \trace |
803 | TRACE_IRQS_OFF | 828 | TRACE_IRQS_OFF |
804 | .endif | 829 | .endif |
@@ -807,9 +832,9 @@ paranoid_schedule\trace: | |||
807 | .if \trace | 832 | .if \trace |
808 | TRACE_IRQS_ON | 833 | TRACE_IRQS_ON |
809 | .endif | 834 | .endif |
810 | sti | 835 | ENABLE_INTERRUPTS(CLBR_ANY) |
811 | call schedule | 836 | call schedule |
812 | cli | 837 | DISABLE_INTERRUPTS(CLBR_ANY) |
813 | .if \trace | 838 | .if \trace |
814 | TRACE_IRQS_OFF | 839 | TRACE_IRQS_OFF |
815 | .endif | 840 | .endif |
@@ -862,7 +887,7 @@ KPROBE_ENTRY(error_entry) | |||
862 | testl $3,CS(%rsp) | 887 | testl $3,CS(%rsp) |
863 | je error_kernelspace | 888 | je error_kernelspace |
864 | error_swapgs: | 889 | error_swapgs: |
865 | swapgs | 890 | SWAPGS |
866 | error_sti: | 891 | error_sti: |
867 | movq %rdi,RDI(%rsp) | 892 | movq %rdi,RDI(%rsp) |
868 | CFI_REL_OFFSET rdi,RDI | 893 | CFI_REL_OFFSET rdi,RDI |
@@ -874,7 +899,7 @@ error_sti: | |||
874 | error_exit: | 899 | error_exit: |
875 | movl %ebx,%eax | 900 | movl %ebx,%eax |
876 | RESTORE_REST | 901 | RESTORE_REST |
877 | cli | 902 | DISABLE_INTERRUPTS(CLBR_NONE) |
878 | TRACE_IRQS_OFF | 903 | TRACE_IRQS_OFF |
879 | GET_THREAD_INFO(%rcx) | 904 | GET_THREAD_INFO(%rcx) |
880 | testl %eax,%eax | 905 | testl %eax,%eax |
@@ -894,7 +919,7 @@ error_kernelspace: | |||
894 | iret run with kernel gs again, so don't set the user space flag. | 919 | iret run with kernel gs again, so don't set the user space flag. |
895 | B stepping K8s sometimes report an truncated RIP for IRET | 920 | B stepping K8s sometimes report an truncated RIP for IRET |
896 | exceptions returning to compat mode. Check for these here too. */ | 921 | exceptions returning to compat mode. Check for these here too. */ |
897 | leaq iret_label(%rip),%rbp | 922 | leaq native_iret(%rip),%rbp |
898 | cmpq %rbp,RIP(%rsp) | 923 | cmpq %rbp,RIP(%rsp) |
899 | je error_swapgs | 924 | je error_swapgs |
900 | movl %ebp,%ebp /* zero extend */ | 925 | movl %ebp,%ebp /* zero extend */ |
@@ -911,12 +936,12 @@ ENTRY(load_gs_index) | |||
911 | CFI_STARTPROC | 936 | CFI_STARTPROC |
912 | pushf | 937 | pushf |
913 | CFI_ADJUST_CFA_OFFSET 8 | 938 | CFI_ADJUST_CFA_OFFSET 8 |
914 | cli | 939 | DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) |
915 | swapgs | 940 | SWAPGS |
916 | gs_change: | 941 | gs_change: |
917 | movl %edi,%gs | 942 | movl %edi,%gs |
918 | 2: mfence /* workaround */ | 943 | 2: mfence /* workaround */ |
919 | swapgs | 944 | SWAPGS |
920 | popf | 945 | popf |
921 | CFI_ADJUST_CFA_OFFSET -8 | 946 | CFI_ADJUST_CFA_OFFSET -8 |
922 | ret | 947 | ret |
@@ -930,7 +955,7 @@ ENDPROC(load_gs_index) | |||
930 | .section .fixup,"ax" | 955 | .section .fixup,"ax" |
931 | /* running with kernelgs */ | 956 | /* running with kernelgs */ |
932 | bad_gs: | 957 | bad_gs: |
933 | swapgs /* switch back to user gs */ | 958 | SWAPGS /* switch back to user gs */ |
934 | xorl %eax,%eax | 959 | xorl %eax,%eax |
935 | movl %eax,%gs | 960 | movl %eax,%gs |
936 | jmp 2b | 961 | jmp 2b |
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index ce703e21c912..4ae7b6440260 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c | |||
@@ -24,18 +24,11 @@ | |||
24 | #include <acpi/acpi_bus.h> | 24 | #include <acpi/acpi_bus.h> |
25 | #endif | 25 | #endif |
26 | 26 | ||
27 | /* | 27 | /* which logical CPU number maps to which CPU (physical APIC ID) */ |
28 | * which logical CPU number maps to which CPU (physical APIC ID) | 28 | u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata |
29 | * | ||
30 | * The following static array is used during kernel startup | ||
31 | * and the x86_cpu_to_apicid_ptr contains the address of the | ||
32 | * array during this time. Is it zeroed when the per_cpu | ||
33 | * data area is removed. | ||
34 | */ | ||
35 | u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata | ||
36 | = { [0 ... NR_CPUS-1] = BAD_APICID }; | 29 | = { [0 ... NR_CPUS-1] = BAD_APICID }; |
37 | void *x86_cpu_to_apicid_ptr; | 30 | void *x86_cpu_to_apicid_early_ptr; |
38 | DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID; | 31 | DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; |
39 | EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); | 32 | EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); |
40 | 33 | ||
41 | struct genapic __read_mostly *genapic = &apic_flat; | 34 | struct genapic __read_mostly *genapic = &apic_flat; |
diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c index f12d8c5d9809..9c7f7d395968 100644 --- a/arch/x86/kernel/geode_32.c +++ b/arch/x86/kernel/geode_32.c | |||
@@ -1,6 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * AMD Geode southbridge support code | 2 | * AMD Geode southbridge support code |
3 | * Copyright (C) 2006, Advanced Micro Devices, Inc. | 3 | * Copyright (C) 2006, Advanced Micro Devices, Inc. |
4 | * Copyright (C) 2007, Andres Salomon <dilinger@debian.org> | ||
4 | * | 5 | * |
5 | * This program is free software; you can redistribute it and/or | 6 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of version 2 of the GNU General Public License | 7 | * modify it under the terms of version 2 of the GNU General Public License |
@@ -51,45 +52,62 @@ EXPORT_SYMBOL_GPL(geode_get_dev_base); | |||
51 | 52 | ||
52 | /* === GPIO API === */ | 53 | /* === GPIO API === */ |
53 | 54 | ||
54 | void geode_gpio_set(unsigned int gpio, unsigned int reg) | 55 | void geode_gpio_set(u32 gpio, unsigned int reg) |
55 | { | 56 | { |
56 | u32 base = geode_get_dev_base(GEODE_DEV_GPIO); | 57 | u32 base = geode_get_dev_base(GEODE_DEV_GPIO); |
57 | 58 | ||
58 | if (!base) | 59 | if (!base) |
59 | return; | 60 | return; |
60 | 61 | ||
61 | if (gpio < 16) | 62 | /* low bank register */ |
62 | outl(1 << gpio, base + reg); | 63 | if (gpio & 0xFFFF) |
63 | else | 64 | outl(gpio & 0xFFFF, base + reg); |
64 | outl(1 << (gpio - 16), base + 0x80 + reg); | 65 | /* high bank register */ |
66 | gpio >>= 16; | ||
67 | if (gpio) | ||
68 | outl(gpio, base + 0x80 + reg); | ||
65 | } | 69 | } |
66 | EXPORT_SYMBOL_GPL(geode_gpio_set); | 70 | EXPORT_SYMBOL_GPL(geode_gpio_set); |
67 | 71 | ||
68 | void geode_gpio_clear(unsigned int gpio, unsigned int reg) | 72 | void geode_gpio_clear(u32 gpio, unsigned int reg) |
69 | { | 73 | { |
70 | u32 base = geode_get_dev_base(GEODE_DEV_GPIO); | 74 | u32 base = geode_get_dev_base(GEODE_DEV_GPIO); |
71 | 75 | ||
72 | if (!base) | 76 | if (!base) |
73 | return; | 77 | return; |
74 | 78 | ||
75 | if (gpio < 16) | 79 | /* low bank register */ |
76 | outl(1 << (gpio + 16), base + reg); | 80 | if (gpio & 0xFFFF) |
77 | else | 81 | outl((gpio & 0xFFFF) << 16, base + reg); |
78 | outl(1 << gpio, base + 0x80 + reg); | 82 | /* high bank register */ |
83 | gpio &= (0xFFFF << 16); | ||
84 | if (gpio) | ||
85 | outl(gpio, base + 0x80 + reg); | ||
79 | } | 86 | } |
80 | EXPORT_SYMBOL_GPL(geode_gpio_clear); | 87 | EXPORT_SYMBOL_GPL(geode_gpio_clear); |
81 | 88 | ||
82 | int geode_gpio_isset(unsigned int gpio, unsigned int reg) | 89 | int geode_gpio_isset(u32 gpio, unsigned int reg) |
83 | { | 90 | { |
84 | u32 base = geode_get_dev_base(GEODE_DEV_GPIO); | 91 | u32 base = geode_get_dev_base(GEODE_DEV_GPIO); |
92 | u32 val; | ||
85 | 93 | ||
86 | if (!base) | 94 | if (!base) |
87 | return 0; | 95 | return 0; |
88 | 96 | ||
89 | if (gpio < 16) | 97 | /* low bank register */ |
90 | return (inl(base + reg) & (1 << gpio)) ? 1 : 0; | 98 | if (gpio & 0xFFFF) { |
91 | else | 99 | val = inl(base + reg) & (gpio & 0xFFFF); |
92 | return (inl(base + 0x80 + reg) & (1 << (gpio - 16))) ? 1 : 0; | 100 | if ((gpio & 0xFFFF) == val) |
101 | return 1; | ||
102 | } | ||
103 | /* high bank register */ | ||
104 | gpio >>= 16; | ||
105 | if (gpio) { | ||
106 | val = inl(base + 0x80 + reg) & gpio; | ||
107 | if (gpio == val) | ||
108 | return 1; | ||
109 | } | ||
110 | return 0; | ||
93 | } | 111 | } |
94 | EXPORT_SYMBOL_GPL(geode_gpio_isset); | 112 | EXPORT_SYMBOL_GPL(geode_gpio_isset); |
95 | 113 | ||
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 6b3469311e42..24dbf56928d7 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/kernel.h> | 10 | #include <linux/kernel.h> |
11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
12 | #include <linux/percpu.h> | 12 | #include <linux/percpu.h> |
13 | #include <linux/start_kernel.h> | ||
13 | 14 | ||
14 | #include <asm/processor.h> | 15 | #include <asm/processor.h> |
15 | #include <asm/proto.h> | 16 | #include <asm/proto.h> |
@@ -19,12 +20,14 @@ | |||
19 | #include <asm/pgtable.h> | 20 | #include <asm/pgtable.h> |
20 | #include <asm/tlbflush.h> | 21 | #include <asm/tlbflush.h> |
21 | #include <asm/sections.h> | 22 | #include <asm/sections.h> |
23 | #include <asm/kdebug.h> | ||
24 | #include <asm/e820.h> | ||
22 | 25 | ||
23 | static void __init zap_identity_mappings(void) | 26 | static void __init zap_identity_mappings(void) |
24 | { | 27 | { |
25 | pgd_t *pgd = pgd_offset_k(0UL); | 28 | pgd_t *pgd = pgd_offset_k(0UL); |
26 | pgd_clear(pgd); | 29 | pgd_clear(pgd); |
27 | __flush_tlb(); | 30 | __flush_tlb_all(); |
28 | } | 31 | } |
29 | 32 | ||
30 | /* Don't add a printk in there. printk relies on the PDA which is not initialized | 33 | /* Don't add a printk in there. printk relies on the PDA which is not initialized |
@@ -46,6 +49,35 @@ static void __init copy_bootdata(char *real_mode_data) | |||
46 | } | 49 | } |
47 | } | 50 | } |
48 | 51 | ||
52 | #define EBDA_ADDR_POINTER 0x40E | ||
53 | |||
54 | static __init void reserve_ebda(void) | ||
55 | { | ||
56 | unsigned ebda_addr, ebda_size; | ||
57 | |||
58 | /* | ||
59 | * there is a real-mode segmented pointer pointing to the | ||
60 | * 4K EBDA area at 0x40E | ||
61 | */ | ||
62 | ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER); | ||
63 | ebda_addr <<= 4; | ||
64 | |||
65 | if (!ebda_addr) | ||
66 | return; | ||
67 | |||
68 | ebda_size = *(unsigned short *)__va(ebda_addr); | ||
69 | |||
70 | /* Round EBDA up to pages */ | ||
71 | if (ebda_size == 0) | ||
72 | ebda_size = 1; | ||
73 | ebda_size <<= 10; | ||
74 | ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE); | ||
75 | if (ebda_size > 64*1024) | ||
76 | ebda_size = 64*1024; | ||
77 | |||
78 | reserve_early(ebda_addr, ebda_addr + ebda_size, "EBDA"); | ||
79 | } | ||
80 | |||
49 | void __init x86_64_start_kernel(char * real_mode_data) | 81 | void __init x86_64_start_kernel(char * real_mode_data) |
50 | { | 82 | { |
51 | int i; | 83 | int i; |
@@ -56,8 +88,13 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
56 | /* Make NULL pointers segfault */ | 88 | /* Make NULL pointers segfault */ |
57 | zap_identity_mappings(); | 89 | zap_identity_mappings(); |
58 | 90 | ||
59 | for (i = 0; i < IDT_ENTRIES; i++) | 91 | for (i = 0; i < IDT_ENTRIES; i++) { |
92 | #ifdef CONFIG_EARLY_PRINTK | ||
93 | set_intr_gate(i, &early_idt_handlers[i]); | ||
94 | #else | ||
60 | set_intr_gate(i, early_idt_handler); | 95 | set_intr_gate(i, early_idt_handler); |
96 | #endif | ||
97 | } | ||
61 | load_idt((const struct desc_ptr *)&idt_descr); | 98 | load_idt((const struct desc_ptr *)&idt_descr); |
62 | 99 | ||
63 | early_printk("Kernel alive\n"); | 100 | early_printk("Kernel alive\n"); |
@@ -67,8 +104,24 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
67 | 104 | ||
68 | pda_init(0); | 105 | pda_init(0); |
69 | copy_bootdata(__va(real_mode_data)); | 106 | copy_bootdata(__va(real_mode_data)); |
70 | #ifdef CONFIG_SMP | 107 | |
71 | cpu_set(0, cpu_online_map); | 108 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); |
72 | #endif | 109 | |
110 | /* Reserve INITRD */ | ||
111 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | ||
112 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | ||
113 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | ||
114 | unsigned long ramdisk_end = ramdisk_image + ramdisk_size; | ||
115 | reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); | ||
116 | } | ||
117 | |||
118 | reserve_ebda(); | ||
119 | |||
120 | /* | ||
121 | * At this point everything still needed from the boot loader | ||
122 | * or BIOS or kernel text should be early reserved or marked not | ||
123 | * RAM in e820. All other memory is free game. | ||
124 | */ | ||
125 | |||
73 | start_kernel(); | 126 | start_kernel(); |
74 | } | 127 | } |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index fbad51fce672..5d8c5730686b 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -9,6 +9,7 @@ | |||
9 | 9 | ||
10 | .text | 10 | .text |
11 | #include <linux/threads.h> | 11 | #include <linux/threads.h> |
12 | #include <linux/init.h> | ||
12 | #include <linux/linkage.h> | 13 | #include <linux/linkage.h> |
13 | #include <asm/segment.h> | 14 | #include <asm/segment.h> |
14 | #include <asm/page.h> | 15 | #include <asm/page.h> |
@@ -151,7 +152,9 @@ WEAK(xen_entry) | |||
151 | /* Unknown implementation; there's really | 152 | /* Unknown implementation; there's really |
152 | nothing we can do at this point. */ | 153 | nothing we can do at this point. */ |
153 | ud2a | 154 | ud2a |
154 | .data | 155 | |
156 | __INITDATA | ||
157 | |||
155 | subarch_entries: | 158 | subarch_entries: |
156 | .long default_entry /* normal x86/PC */ | 159 | .long default_entry /* normal x86/PC */ |
157 | .long lguest_entry /* lguest hypervisor */ | 160 | .long lguest_entry /* lguest hypervisor */ |
@@ -199,7 +202,6 @@ default_entry: | |||
199 | addl $0x67, %eax /* 0x67 == _PAGE_TABLE */ | 202 | addl $0x67, %eax /* 0x67 == _PAGE_TABLE */ |
200 | movl %eax, 4092(%edx) | 203 | movl %eax, 4092(%edx) |
201 | 204 | ||
202 | xorl %ebx,%ebx /* This is the boot CPU (BSP) */ | ||
203 | jmp 3f | 205 | jmp 3f |
204 | /* | 206 | /* |
205 | * Non-boot CPU entry point; entered from trampoline.S | 207 | * Non-boot CPU entry point; entered from trampoline.S |
@@ -222,6 +224,8 @@ ENTRY(startup_32_smp) | |||
222 | movl %eax,%es | 224 | movl %eax,%es |
223 | movl %eax,%fs | 225 | movl %eax,%fs |
224 | movl %eax,%gs | 226 | movl %eax,%gs |
227 | #endif /* CONFIG_SMP */ | ||
228 | 3: | ||
225 | 229 | ||
226 | /* | 230 | /* |
227 | * New page tables may be in 4Mbyte page mode and may | 231 | * New page tables may be in 4Mbyte page mode and may |
@@ -268,12 +272,6 @@ ENTRY(startup_32_smp) | |||
268 | wrmsr | 272 | wrmsr |
269 | 273 | ||
270 | 6: | 274 | 6: |
271 | /* This is a secondary processor (AP) */ | ||
272 | xorl %ebx,%ebx | ||
273 | incl %ebx | ||
274 | |||
275 | #endif /* CONFIG_SMP */ | ||
276 | 3: | ||
277 | 275 | ||
278 | /* | 276 | /* |
279 | * Enable paging | 277 | * Enable paging |
@@ -297,7 +295,7 @@ ENTRY(startup_32_smp) | |||
297 | popfl | 295 | popfl |
298 | 296 | ||
299 | #ifdef CONFIG_SMP | 297 | #ifdef CONFIG_SMP |
300 | andl %ebx,%ebx | 298 | cmpb $0, ready |
301 | jz 1f /* Initial CPU cleans BSS */ | 299 | jz 1f /* Initial CPU cleans BSS */ |
302 | jmp checkCPUtype | 300 | jmp checkCPUtype |
303 | 1: | 301 | 1: |
@@ -502,6 +500,7 @@ early_fault: | |||
502 | call printk | 500 | call printk |
503 | #endif | 501 | #endif |
504 | #endif | 502 | #endif |
503 | call dump_stack | ||
505 | hlt_loop: | 504 | hlt_loop: |
506 | hlt | 505 | hlt |
507 | jmp hlt_loop | 506 | jmp hlt_loop |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index b6167fe3330e..09b38d539b09 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -19,6 +19,13 @@ | |||
19 | #include <asm/msr.h> | 19 | #include <asm/msr.h> |
20 | #include <asm/cache.h> | 20 | #include <asm/cache.h> |
21 | 21 | ||
22 | #ifdef CONFIG_PARAVIRT | ||
23 | #include <asm/asm-offsets.h> | ||
24 | #include <asm/paravirt.h> | ||
25 | #else | ||
26 | #define GET_CR2_INTO_RCX movq %cr2, %rcx | ||
27 | #endif | ||
28 | |||
22 | /* we are not able to switch in one step to the final KERNEL ADRESS SPACE | 29 | /* we are not able to switch in one step to the final KERNEL ADRESS SPACE |
23 | * because we need identity-mapped pages. | 30 | * because we need identity-mapped pages. |
24 | * | 31 | * |
@@ -56,7 +63,7 @@ startup_64: | |||
56 | 63 | ||
57 | /* Is the address not 2M aligned? */ | 64 | /* Is the address not 2M aligned? */ |
58 | movq %rbp, %rax | 65 | movq %rbp, %rax |
59 | andl $~LARGE_PAGE_MASK, %eax | 66 | andl $~PMD_PAGE_MASK, %eax |
60 | testl %eax, %eax | 67 | testl %eax, %eax |
61 | jnz bad_address | 68 | jnz bad_address |
62 | 69 | ||
@@ -81,7 +88,7 @@ startup_64: | |||
81 | 88 | ||
82 | /* Add an Identity mapping if I am above 1G */ | 89 | /* Add an Identity mapping if I am above 1G */ |
83 | leaq _text(%rip), %rdi | 90 | leaq _text(%rip), %rdi |
84 | andq $LARGE_PAGE_MASK, %rdi | 91 | andq $PMD_PAGE_MASK, %rdi |
85 | 92 | ||
86 | movq %rdi, %rax | 93 | movq %rdi, %rax |
87 | shrq $PUD_SHIFT, %rax | 94 | shrq $PUD_SHIFT, %rax |
@@ -243,31 +250,55 @@ ENTRY(secondary_startup_64) | |||
243 | lretq | 250 | lretq |
244 | 251 | ||
245 | /* SMP bootup changes these two */ | 252 | /* SMP bootup changes these two */ |
246 | #ifndef CONFIG_HOTPLUG_CPU | 253 | __CPUINITDATA |
247 | .pushsection .init.data | ||
248 | #endif | ||
249 | .align 8 | 254 | .align 8 |
250 | .globl initial_code | 255 | ENTRY(initial_code) |
251 | initial_code: | ||
252 | .quad x86_64_start_kernel | 256 | .quad x86_64_start_kernel |
253 | #ifndef CONFIG_HOTPLUG_CPU | 257 | __FINITDATA |
254 | .popsection | 258 | |
255 | #endif | 259 | ENTRY(init_rsp) |
256 | .globl init_rsp | ||
257 | init_rsp: | ||
258 | .quad init_thread_union+THREAD_SIZE-8 | 260 | .quad init_thread_union+THREAD_SIZE-8 |
259 | 261 | ||
260 | bad_address: | 262 | bad_address: |
261 | jmp bad_address | 263 | jmp bad_address |
262 | 264 | ||
265 | #ifdef CONFIG_EARLY_PRINTK | ||
266 | .macro early_idt_tramp first, last | ||
267 | .ifgt \last-\first | ||
268 | early_idt_tramp \first, \last-1 | ||
269 | .endif | ||
270 | movl $\last,%esi | ||
271 | jmp early_idt_handler | ||
272 | .endm | ||
273 | |||
274 | .globl early_idt_handlers | ||
275 | early_idt_handlers: | ||
276 | early_idt_tramp 0, 63 | ||
277 | early_idt_tramp 64, 127 | ||
278 | early_idt_tramp 128, 191 | ||
279 | early_idt_tramp 192, 255 | ||
280 | #endif | ||
281 | |||
263 | ENTRY(early_idt_handler) | 282 | ENTRY(early_idt_handler) |
283 | #ifdef CONFIG_EARLY_PRINTK | ||
264 | cmpl $2,early_recursion_flag(%rip) | 284 | cmpl $2,early_recursion_flag(%rip) |
265 | jz 1f | 285 | jz 1f |
266 | incl early_recursion_flag(%rip) | 286 | incl early_recursion_flag(%rip) |
287 | GET_CR2_INTO_RCX | ||
288 | movq %rcx,%r9 | ||
289 | xorl %r8d,%r8d # zero for error code | ||
290 | movl %esi,%ecx # get vector number | ||
291 | # Test %ecx against mask of vectors that push error code. | ||
292 | cmpl $31,%ecx | ||
293 | ja 0f | ||
294 | movl $1,%eax | ||
295 | salq %cl,%rax | ||
296 | testl $0x27d00,%eax | ||
297 | je 0f | ||
298 | popq %r8 # get error code | ||
299 | 0: movq 0(%rsp),%rcx # get ip | ||
300 | movq 8(%rsp),%rdx # get cs | ||
267 | xorl %eax,%eax | 301 | xorl %eax,%eax |
268 | movq 8(%rsp),%rsi # get rip | ||
269 | movq (%rsp),%rdx | ||
270 | movq %cr2,%rcx | ||
271 | leaq early_idt_msg(%rip),%rdi | 302 | leaq early_idt_msg(%rip),%rdi |
272 | call early_printk | 303 | call early_printk |
273 | cmpl $2,early_recursion_flag(%rip) | 304 | cmpl $2,early_recursion_flag(%rip) |
@@ -278,15 +309,19 @@ ENTRY(early_idt_handler) | |||
278 | movq 8(%rsp),%rsi # get rip again | 309 | movq 8(%rsp),%rsi # get rip again |
279 | call __print_symbol | 310 | call __print_symbol |
280 | #endif | 311 | #endif |
312 | #endif /* EARLY_PRINTK */ | ||
281 | 1: hlt | 313 | 1: hlt |
282 | jmp 1b | 314 | jmp 1b |
315 | |||
316 | #ifdef CONFIG_EARLY_PRINTK | ||
283 | early_recursion_flag: | 317 | early_recursion_flag: |
284 | .long 0 | 318 | .long 0 |
285 | 319 | ||
286 | early_idt_msg: | 320 | early_idt_msg: |
287 | .asciz "PANIC: early exception rip %lx error %lx cr2 %lx\n" | 321 | .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n" |
288 | early_idt_ripmsg: | 322 | early_idt_ripmsg: |
289 | .asciz "RIP %s\n" | 323 | .asciz "RIP %s\n" |
324 | #endif /* CONFIG_EARLY_PRINTK */ | ||
290 | 325 | ||
291 | .balign PAGE_SIZE | 326 | .balign PAGE_SIZE |
292 | 327 | ||
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 4a86ffd67ec5..429d084e014d 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -6,7 +6,6 @@ | |||
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
7 | #include <linux/sysdev.h> | 7 | #include <linux/sysdev.h> |
8 | #include <linux/pm.h> | 8 | #include <linux/pm.h> |
9 | #include <linux/delay.h> | ||
10 | 9 | ||
11 | #include <asm/fixmap.h> | 10 | #include <asm/fixmap.h> |
12 | #include <asm/hpet.h> | 11 | #include <asm/hpet.h> |
@@ -16,7 +15,8 @@ | |||
16 | #define HPET_MASK CLOCKSOURCE_MASK(32) | 15 | #define HPET_MASK CLOCKSOURCE_MASK(32) |
17 | #define HPET_SHIFT 22 | 16 | #define HPET_SHIFT 22 |
18 | 17 | ||
19 | /* FSEC = 10^-15 NSEC = 10^-9 */ | 18 | /* FSEC = 10^-15 |
19 | NSEC = 10^-9 */ | ||
20 | #define FSEC_PER_NSEC 1000000 | 20 | #define FSEC_PER_NSEC 1000000 |
21 | 21 | ||
22 | /* | 22 | /* |
@@ -107,6 +107,7 @@ int is_hpet_enabled(void) | |||
107 | { | 107 | { |
108 | return is_hpet_capable() && hpet_legacy_int_enabled; | 108 | return is_hpet_capable() && hpet_legacy_int_enabled; |
109 | } | 109 | } |
110 | EXPORT_SYMBOL_GPL(is_hpet_enabled); | ||
110 | 111 | ||
111 | /* | 112 | /* |
112 | * When the hpet driver (/dev/hpet) is enabled, we need to reserve | 113 | * When the hpet driver (/dev/hpet) is enabled, we need to reserve |
@@ -132,16 +133,13 @@ static void hpet_reserve_platform_timers(unsigned long id) | |||
132 | #ifdef CONFIG_HPET_EMULATE_RTC | 133 | #ifdef CONFIG_HPET_EMULATE_RTC |
133 | hpet_reserve_timer(&hd, 1); | 134 | hpet_reserve_timer(&hd, 1); |
134 | #endif | 135 | #endif |
135 | |||
136 | hd.hd_irq[0] = HPET_LEGACY_8254; | 136 | hd.hd_irq[0] = HPET_LEGACY_8254; |
137 | hd.hd_irq[1] = HPET_LEGACY_RTC; | 137 | hd.hd_irq[1] = HPET_LEGACY_RTC; |
138 | 138 | ||
139 | for (i = 2; i < nrtimers; timer++, i++) | 139 | for (i = 2; i < nrtimers; timer++, i++) |
140 | hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >> | 140 | hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >> |
141 | Tn_INT_ROUTE_CNF_SHIFT; | 141 | Tn_INT_ROUTE_CNF_SHIFT; |
142 | |||
143 | hpet_alloc(&hd); | 142 | hpet_alloc(&hd); |
144 | |||
145 | } | 143 | } |
146 | #else | 144 | #else |
147 | static void hpet_reserve_platform_timers(unsigned long id) { } | 145 | static void hpet_reserve_platform_timers(unsigned long id) { } |
@@ -478,6 +476,7 @@ void hpet_disable(void) | |||
478 | */ | 476 | */ |
479 | #include <linux/mc146818rtc.h> | 477 | #include <linux/mc146818rtc.h> |
480 | #include <linux/rtc.h> | 478 | #include <linux/rtc.h> |
479 | #include <asm/rtc.h> | ||
481 | 480 | ||
482 | #define DEFAULT_RTC_INT_FREQ 64 | 481 | #define DEFAULT_RTC_INT_FREQ 64 |
483 | #define DEFAULT_RTC_SHIFT 6 | 482 | #define DEFAULT_RTC_SHIFT 6 |
@@ -492,6 +491,38 @@ static unsigned long hpet_default_delta; | |||
492 | static unsigned long hpet_pie_delta; | 491 | static unsigned long hpet_pie_delta; |
493 | static unsigned long hpet_pie_limit; | 492 | static unsigned long hpet_pie_limit; |
494 | 493 | ||
494 | static rtc_irq_handler irq_handler; | ||
495 | |||
496 | /* | ||
497 | * Registers a IRQ handler. | ||
498 | */ | ||
499 | int hpet_register_irq_handler(rtc_irq_handler handler) | ||
500 | { | ||
501 | if (!is_hpet_enabled()) | ||
502 | return -ENODEV; | ||
503 | if (irq_handler) | ||
504 | return -EBUSY; | ||
505 | |||
506 | irq_handler = handler; | ||
507 | |||
508 | return 0; | ||
509 | } | ||
510 | EXPORT_SYMBOL_GPL(hpet_register_irq_handler); | ||
511 | |||
512 | /* | ||
513 | * Deregisters the IRQ handler registered with hpet_register_irq_handler() | ||
514 | * and does cleanup. | ||
515 | */ | ||
516 | void hpet_unregister_irq_handler(rtc_irq_handler handler) | ||
517 | { | ||
518 | if (!is_hpet_enabled()) | ||
519 | return; | ||
520 | |||
521 | irq_handler = NULL; | ||
522 | hpet_rtc_flags = 0; | ||
523 | } | ||
524 | EXPORT_SYMBOL_GPL(hpet_unregister_irq_handler); | ||
525 | |||
495 | /* | 526 | /* |
496 | * Timer 1 for RTC emulation. We use one shot mode, as periodic mode | 527 | * Timer 1 for RTC emulation. We use one shot mode, as periodic mode |
497 | * is not supported by all HPET implementations for timer 1. | 528 | * is not supported by all HPET implementations for timer 1. |
@@ -533,6 +564,7 @@ int hpet_rtc_timer_init(void) | |||
533 | 564 | ||
534 | return 1; | 565 | return 1; |
535 | } | 566 | } |
567 | EXPORT_SYMBOL_GPL(hpet_rtc_timer_init); | ||
536 | 568 | ||
537 | /* | 569 | /* |
538 | * The functions below are called from rtc driver. | 570 | * The functions below are called from rtc driver. |
@@ -547,6 +579,7 @@ int hpet_mask_rtc_irq_bit(unsigned long bit_mask) | |||
547 | hpet_rtc_flags &= ~bit_mask; | 579 | hpet_rtc_flags &= ~bit_mask; |
548 | return 1; | 580 | return 1; |
549 | } | 581 | } |
582 | EXPORT_SYMBOL_GPL(hpet_mask_rtc_irq_bit); | ||
550 | 583 | ||
551 | int hpet_set_rtc_irq_bit(unsigned long bit_mask) | 584 | int hpet_set_rtc_irq_bit(unsigned long bit_mask) |
552 | { | 585 | { |
@@ -562,6 +595,7 @@ int hpet_set_rtc_irq_bit(unsigned long bit_mask) | |||
562 | 595 | ||
563 | return 1; | 596 | return 1; |
564 | } | 597 | } |
598 | EXPORT_SYMBOL_GPL(hpet_set_rtc_irq_bit); | ||
565 | 599 | ||
566 | int hpet_set_alarm_time(unsigned char hrs, unsigned char min, | 600 | int hpet_set_alarm_time(unsigned char hrs, unsigned char min, |
567 | unsigned char sec) | 601 | unsigned char sec) |
@@ -575,6 +609,7 @@ int hpet_set_alarm_time(unsigned char hrs, unsigned char min, | |||
575 | 609 | ||
576 | return 1; | 610 | return 1; |
577 | } | 611 | } |
612 | EXPORT_SYMBOL_GPL(hpet_set_alarm_time); | ||
578 | 613 | ||
579 | int hpet_set_periodic_freq(unsigned long freq) | 614 | int hpet_set_periodic_freq(unsigned long freq) |
580 | { | 615 | { |
@@ -593,11 +628,13 @@ int hpet_set_periodic_freq(unsigned long freq) | |||
593 | } | 628 | } |
594 | return 1; | 629 | return 1; |
595 | } | 630 | } |
631 | EXPORT_SYMBOL_GPL(hpet_set_periodic_freq); | ||
596 | 632 | ||
597 | int hpet_rtc_dropped_irq(void) | 633 | int hpet_rtc_dropped_irq(void) |
598 | { | 634 | { |
599 | return is_hpet_enabled(); | 635 | return is_hpet_enabled(); |
600 | } | 636 | } |
637 | EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq); | ||
601 | 638 | ||
602 | static void hpet_rtc_timer_reinit(void) | 639 | static void hpet_rtc_timer_reinit(void) |
603 | { | 640 | { |
@@ -641,9 +678,10 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) | |||
641 | unsigned long rtc_int_flag = 0; | 678 | unsigned long rtc_int_flag = 0; |
642 | 679 | ||
643 | hpet_rtc_timer_reinit(); | 680 | hpet_rtc_timer_reinit(); |
681 | memset(&curr_time, 0, sizeof(struct rtc_time)); | ||
644 | 682 | ||
645 | if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) | 683 | if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) |
646 | rtc_get_rtc_time(&curr_time); | 684 | get_rtc_time(&curr_time); |
647 | 685 | ||
648 | if (hpet_rtc_flags & RTC_UIE && | 686 | if (hpet_rtc_flags & RTC_UIE && |
649 | curr_time.tm_sec != hpet_prev_update_sec) { | 687 | curr_time.tm_sec != hpet_prev_update_sec) { |
@@ -657,7 +695,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) | |||
657 | hpet_pie_count = 0; | 695 | hpet_pie_count = 0; |
658 | } | 696 | } |
659 | 697 | ||
660 | if (hpet_rtc_flags & RTC_PIE && | 698 | if (hpet_rtc_flags & RTC_AIE && |
661 | (curr_time.tm_sec == hpet_alarm_time.tm_sec) && | 699 | (curr_time.tm_sec == hpet_alarm_time.tm_sec) && |
662 | (curr_time.tm_min == hpet_alarm_time.tm_min) && | 700 | (curr_time.tm_min == hpet_alarm_time.tm_min) && |
663 | (curr_time.tm_hour == hpet_alarm_time.tm_hour)) | 701 | (curr_time.tm_hour == hpet_alarm_time.tm_hour)) |
@@ -665,8 +703,10 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) | |||
665 | 703 | ||
666 | if (rtc_int_flag) { | 704 | if (rtc_int_flag) { |
667 | rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8)); | 705 | rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8)); |
668 | rtc_interrupt(rtc_int_flag, dev_id); | 706 | if (irq_handler) |
707 | irq_handler(rtc_int_flag, dev_id); | ||
669 | } | 708 | } |
670 | return IRQ_HANDLED; | 709 | return IRQ_HANDLED; |
671 | } | 710 | } |
711 | EXPORT_SYMBOL_GPL(hpet_rtc_interrupt); | ||
672 | #endif | 712 | #endif |
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 02112fcc0de7..061627806a2d 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c | |||
@@ -22,12 +22,5 @@ EXPORT_SYMBOL(__put_user_8); | |||
22 | 22 | ||
23 | EXPORT_SYMBOL(strstr); | 23 | EXPORT_SYMBOL(strstr); |
24 | 24 | ||
25 | #ifdef CONFIG_SMP | ||
26 | extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); | ||
27 | extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); | ||
28 | EXPORT_SYMBOL(__write_lock_failed); | ||
29 | EXPORT_SYMBOL(__read_lock_failed); | ||
30 | #endif | ||
31 | |||
32 | EXPORT_SYMBOL(csum_partial); | 25 | EXPORT_SYMBOL(csum_partial); |
33 | EXPORT_SYMBOL(empty_zero_page); | 26 | EXPORT_SYMBOL(empty_zero_page); |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c new file mode 100644 index 000000000000..26719bd2c77c --- /dev/null +++ b/arch/x86/kernel/i387.c | |||
@@ -0,0 +1,479 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1994 Linus Torvalds | ||
3 | * | ||
4 | * Pentium III FXSR, SSE support | ||
5 | * General FPU state handling cleanups | ||
6 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
7 | */ | ||
8 | |||
9 | #include <linux/sched.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/regset.h> | ||
12 | #include <asm/processor.h> | ||
13 | #include <asm/i387.h> | ||
14 | #include <asm/math_emu.h> | ||
15 | #include <asm/sigcontext.h> | ||
16 | #include <asm/user.h> | ||
17 | #include <asm/ptrace.h> | ||
18 | #include <asm/uaccess.h> | ||
19 | |||
20 | #ifdef CONFIG_X86_64 | ||
21 | |||
22 | #include <asm/sigcontext32.h> | ||
23 | #include <asm/user32.h> | ||
24 | |||
25 | #else | ||
26 | |||
27 | #define save_i387_ia32 save_i387 | ||
28 | #define restore_i387_ia32 restore_i387 | ||
29 | |||
30 | #define _fpstate_ia32 _fpstate | ||
31 | #define user_i387_ia32_struct user_i387_struct | ||
32 | #define user32_fxsr_struct user_fxsr_struct | ||
33 | |||
34 | #endif | ||
35 | |||
36 | #ifdef CONFIG_MATH_EMULATION | ||
37 | #define HAVE_HWFP (boot_cpu_data.hard_math) | ||
38 | #else | ||
39 | #define HAVE_HWFP 1 | ||
40 | #endif | ||
41 | |||
42 | unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; | ||
43 | |||
44 | void mxcsr_feature_mask_init(void) | ||
45 | { | ||
46 | unsigned long mask = 0; | ||
47 | clts(); | ||
48 | if (cpu_has_fxsr) { | ||
49 | memset(¤t->thread.i387.fxsave, 0, | ||
50 | sizeof(struct i387_fxsave_struct)); | ||
51 | asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave)); | ||
52 | mask = current->thread.i387.fxsave.mxcsr_mask; | ||
53 | if (mask == 0) | ||
54 | mask = 0x0000ffbf; | ||
55 | } | ||
56 | mxcsr_feature_mask &= mask; | ||
57 | stts(); | ||
58 | } | ||
59 | |||
60 | #ifdef CONFIG_X86_64 | ||
61 | /* | ||
62 | * Called at bootup to set up the initial FPU state that is later cloned | ||
63 | * into all processes. | ||
64 | */ | ||
65 | void __cpuinit fpu_init(void) | ||
66 | { | ||
67 | unsigned long oldcr0 = read_cr0(); | ||
68 | extern void __bad_fxsave_alignment(void); | ||
69 | |||
70 | if (offsetof(struct task_struct, thread.i387.fxsave) & 15) | ||
71 | __bad_fxsave_alignment(); | ||
72 | set_in_cr4(X86_CR4_OSFXSR); | ||
73 | set_in_cr4(X86_CR4_OSXMMEXCPT); | ||
74 | |||
75 | write_cr0(oldcr0 & ~((1UL<<3)|(1UL<<2))); /* clear TS and EM */ | ||
76 | |||
77 | mxcsr_feature_mask_init(); | ||
78 | /* clean state in init */ | ||
79 | current_thread_info()->status = 0; | ||
80 | clear_used_math(); | ||
81 | } | ||
82 | #endif /* CONFIG_X86_64 */ | ||
83 | |||
84 | /* | ||
85 | * The _current_ task is using the FPU for the first time | ||
86 | * so initialize it and set the mxcsr to its default | ||
87 | * value at reset if we support XMM instructions and then | ||
88 | * remeber the current task has used the FPU. | ||
89 | */ | ||
90 | void init_fpu(struct task_struct *tsk) | ||
91 | { | ||
92 | if (tsk_used_math(tsk)) { | ||
93 | if (tsk == current) | ||
94 | unlazy_fpu(tsk); | ||
95 | return; | ||
96 | } | ||
97 | |||
98 | if (cpu_has_fxsr) { | ||
99 | memset(&tsk->thread.i387.fxsave, 0, | ||
100 | sizeof(struct i387_fxsave_struct)); | ||
101 | tsk->thread.i387.fxsave.cwd = 0x37f; | ||
102 | if (cpu_has_xmm) | ||
103 | tsk->thread.i387.fxsave.mxcsr = MXCSR_DEFAULT; | ||
104 | } else { | ||
105 | memset(&tsk->thread.i387.fsave, 0, | ||
106 | sizeof(struct i387_fsave_struct)); | ||
107 | tsk->thread.i387.fsave.cwd = 0xffff037fu; | ||
108 | tsk->thread.i387.fsave.swd = 0xffff0000u; | ||
109 | tsk->thread.i387.fsave.twd = 0xffffffffu; | ||
110 | tsk->thread.i387.fsave.fos = 0xffff0000u; | ||
111 | } | ||
112 | /* | ||
113 | * Only the device not available exception or ptrace can call init_fpu. | ||
114 | */ | ||
115 | set_stopped_child_used_math(tsk); | ||
116 | } | ||
117 | |||
118 | int fpregs_active(struct task_struct *target, const struct user_regset *regset) | ||
119 | { | ||
120 | return tsk_used_math(target) ? regset->n : 0; | ||
121 | } | ||
122 | |||
123 | int xfpregs_active(struct task_struct *target, const struct user_regset *regset) | ||
124 | { | ||
125 | return (cpu_has_fxsr && tsk_used_math(target)) ? regset->n : 0; | ||
126 | } | ||
127 | |||
128 | int xfpregs_get(struct task_struct *target, const struct user_regset *regset, | ||
129 | unsigned int pos, unsigned int count, | ||
130 | void *kbuf, void __user *ubuf) | ||
131 | { | ||
132 | if (!cpu_has_fxsr) | ||
133 | return -ENODEV; | ||
134 | |||
135 | unlazy_fpu(target); | ||
136 | |||
137 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | ||
138 | &target->thread.i387.fxsave, 0, -1); | ||
139 | } | ||
140 | |||
141 | int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | ||
142 | unsigned int pos, unsigned int count, | ||
143 | const void *kbuf, const void __user *ubuf) | ||
144 | { | ||
145 | int ret; | ||
146 | |||
147 | if (!cpu_has_fxsr) | ||
148 | return -ENODEV; | ||
149 | |||
150 | unlazy_fpu(target); | ||
151 | set_stopped_child_used_math(target); | ||
152 | |||
153 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | ||
154 | &target->thread.i387.fxsave, 0, -1); | ||
155 | |||
156 | /* | ||
157 | * mxcsr reserved bits must be masked to zero for security reasons. | ||
158 | */ | ||
159 | target->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; | ||
160 | |||
161 | return ret; | ||
162 | } | ||
163 | |||
164 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | ||
165 | |||
166 | /* | ||
167 | * FPU tag word conversions. | ||
168 | */ | ||
169 | |||
170 | static inline unsigned short twd_i387_to_fxsr(unsigned short twd) | ||
171 | { | ||
172 | unsigned int tmp; /* to avoid 16 bit prefixes in the code */ | ||
173 | |||
174 | /* Transform each pair of bits into 01 (valid) or 00 (empty) */ | ||
175 | tmp = ~twd; | ||
176 | tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ | ||
177 | /* and move the valid bits to the lower byte. */ | ||
178 | tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ | ||
179 | tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ | ||
180 | tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ | ||
181 | return tmp; | ||
182 | } | ||
183 | |||
184 | #define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16); | ||
185 | #define FP_EXP_TAG_VALID 0 | ||
186 | #define FP_EXP_TAG_ZERO 1 | ||
187 | #define FP_EXP_TAG_SPECIAL 2 | ||
188 | #define FP_EXP_TAG_EMPTY 3 | ||
189 | |||
190 | static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) | ||
191 | { | ||
192 | struct _fpxreg *st; | ||
193 | u32 tos = (fxsave->swd >> 11) & 7; | ||
194 | u32 twd = (unsigned long) fxsave->twd; | ||
195 | u32 tag; | ||
196 | u32 ret = 0xffff0000u; | ||
197 | int i; | ||
198 | |||
199 | for (i = 0; i < 8; i++, twd >>= 1) { | ||
200 | if (twd & 0x1) { | ||
201 | st = FPREG_ADDR(fxsave, (i - tos) & 7); | ||
202 | |||
203 | switch (st->exponent & 0x7fff) { | ||
204 | case 0x7fff: | ||
205 | tag = FP_EXP_TAG_SPECIAL; | ||
206 | break; | ||
207 | case 0x0000: | ||
208 | if (!st->significand[0] && | ||
209 | !st->significand[1] && | ||
210 | !st->significand[2] && | ||
211 | !st->significand[3]) | ||
212 | tag = FP_EXP_TAG_ZERO; | ||
213 | else | ||
214 | tag = FP_EXP_TAG_SPECIAL; | ||
215 | break; | ||
216 | default: | ||
217 | if (st->significand[3] & 0x8000) | ||
218 | tag = FP_EXP_TAG_VALID; | ||
219 | else | ||
220 | tag = FP_EXP_TAG_SPECIAL; | ||
221 | break; | ||
222 | } | ||
223 | } else { | ||
224 | tag = FP_EXP_TAG_EMPTY; | ||
225 | } | ||
226 | ret |= tag << (2 * i); | ||
227 | } | ||
228 | return ret; | ||
229 | } | ||
230 | |||
231 | /* | ||
232 | * FXSR floating point environment conversions. | ||
233 | */ | ||
234 | |||
235 | static void convert_from_fxsr(struct user_i387_ia32_struct *env, | ||
236 | struct task_struct *tsk) | ||
237 | { | ||
238 | struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; | ||
239 | struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; | ||
240 | struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; | ||
241 | int i; | ||
242 | |||
243 | env->cwd = fxsave->cwd | 0xffff0000u; | ||
244 | env->swd = fxsave->swd | 0xffff0000u; | ||
245 | env->twd = twd_fxsr_to_i387(fxsave); | ||
246 | |||
247 | #ifdef CONFIG_X86_64 | ||
248 | env->fip = fxsave->rip; | ||
249 | env->foo = fxsave->rdp; | ||
250 | if (tsk == current) { | ||
251 | /* | ||
252 | * should be actually ds/cs at fpu exception time, but | ||
253 | * that information is not available in 64bit mode. | ||
254 | */ | ||
255 | asm("mov %%ds,%0" : "=r" (env->fos)); | ||
256 | asm("mov %%cs,%0" : "=r" (env->fcs)); | ||
257 | } else { | ||
258 | struct pt_regs *regs = task_pt_regs(tsk); | ||
259 | env->fos = 0xffff0000 | tsk->thread.ds; | ||
260 | env->fcs = regs->cs; | ||
261 | } | ||
262 | #else | ||
263 | env->fip = fxsave->fip; | ||
264 | env->fcs = fxsave->fcs; | ||
265 | env->foo = fxsave->foo; | ||
266 | env->fos = fxsave->fos; | ||
267 | #endif | ||
268 | |||
269 | for (i = 0; i < 8; ++i) | ||
270 | memcpy(&to[i], &from[i], sizeof(to[0])); | ||
271 | } | ||
272 | |||
273 | static void convert_to_fxsr(struct task_struct *tsk, | ||
274 | const struct user_i387_ia32_struct *env) | ||
275 | |||
276 | { | ||
277 | struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; | ||
278 | struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; | ||
279 | struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; | ||
280 | int i; | ||
281 | |||
282 | fxsave->cwd = env->cwd; | ||
283 | fxsave->swd = env->swd; | ||
284 | fxsave->twd = twd_i387_to_fxsr(env->twd); | ||
285 | fxsave->fop = (u16) ((u32) env->fcs >> 16); | ||
286 | #ifdef CONFIG_X86_64 | ||
287 | fxsave->rip = env->fip; | ||
288 | fxsave->rdp = env->foo; | ||
289 | /* cs and ds ignored */ | ||
290 | #else | ||
291 | fxsave->fip = env->fip; | ||
292 | fxsave->fcs = (env->fcs & 0xffff); | ||
293 | fxsave->foo = env->foo; | ||
294 | fxsave->fos = env->fos; | ||
295 | #endif | ||
296 | |||
297 | for (i = 0; i < 8; ++i) | ||
298 | memcpy(&to[i], &from[i], sizeof(from[0])); | ||
299 | } | ||
300 | |||
301 | int fpregs_get(struct task_struct *target, const struct user_regset *regset, | ||
302 | unsigned int pos, unsigned int count, | ||
303 | void *kbuf, void __user *ubuf) | ||
304 | { | ||
305 | struct user_i387_ia32_struct env; | ||
306 | |||
307 | if (!HAVE_HWFP) | ||
308 | return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); | ||
309 | |||
310 | unlazy_fpu(target); | ||
311 | |||
312 | if (!cpu_has_fxsr) | ||
313 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | ||
314 | &target->thread.i387.fsave, 0, -1); | ||
315 | |||
316 | if (kbuf && pos == 0 && count == sizeof(env)) { | ||
317 | convert_from_fxsr(kbuf, target); | ||
318 | return 0; | ||
319 | } | ||
320 | |||
321 | convert_from_fxsr(&env, target); | ||
322 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1); | ||
323 | } | ||
324 | |||
325 | int fpregs_set(struct task_struct *target, const struct user_regset *regset, | ||
326 | unsigned int pos, unsigned int count, | ||
327 | const void *kbuf, const void __user *ubuf) | ||
328 | { | ||
329 | struct user_i387_ia32_struct env; | ||
330 | int ret; | ||
331 | |||
332 | if (!HAVE_HWFP) | ||
333 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); | ||
334 | |||
335 | unlazy_fpu(target); | ||
336 | set_stopped_child_used_math(target); | ||
337 | |||
338 | if (!cpu_has_fxsr) | ||
339 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, | ||
340 | &target->thread.i387.fsave, 0, -1); | ||
341 | |||
342 | if (pos > 0 || count < sizeof(env)) | ||
343 | convert_from_fxsr(&env, target); | ||
344 | |||
345 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1); | ||
346 | if (!ret) | ||
347 | convert_to_fxsr(target, &env); | ||
348 | |||
349 | return ret; | ||
350 | } | ||
351 | |||
352 | /* | ||
353 | * Signal frame handlers. | ||
354 | */ | ||
355 | |||
356 | static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) | ||
357 | { | ||
358 | struct task_struct *tsk = current; | ||
359 | |||
360 | unlazy_fpu(tsk); | ||
361 | tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd; | ||
362 | if (__copy_to_user(buf, &tsk->thread.i387.fsave, | ||
363 | sizeof(struct i387_fsave_struct))) | ||
364 | return -1; | ||
365 | return 1; | ||
366 | } | ||
367 | |||
368 | static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) | ||
369 | { | ||
370 | struct task_struct *tsk = current; | ||
371 | struct user_i387_ia32_struct env; | ||
372 | int err = 0; | ||
373 | |||
374 | unlazy_fpu(tsk); | ||
375 | |||
376 | convert_from_fxsr(&env, tsk); | ||
377 | if (__copy_to_user(buf, &env, sizeof(env))) | ||
378 | return -1; | ||
379 | |||
380 | err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status); | ||
381 | err |= __put_user(X86_FXSR_MAGIC, &buf->magic); | ||
382 | if (err) | ||
383 | return -1; | ||
384 | |||
385 | if (__copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave, | ||
386 | sizeof(struct i387_fxsave_struct))) | ||
387 | return -1; | ||
388 | return 1; | ||
389 | } | ||
390 | |||
391 | int save_i387_ia32(struct _fpstate_ia32 __user *buf) | ||
392 | { | ||
393 | if (!used_math()) | ||
394 | return 0; | ||
395 | |||
396 | /* This will cause a "finit" to be triggered by the next | ||
397 | * attempted FPU operation by the 'current' process. | ||
398 | */ | ||
399 | clear_used_math(); | ||
400 | |||
401 | if (HAVE_HWFP) { | ||
402 | if (cpu_has_fxsr) { | ||
403 | return save_i387_fxsave(buf); | ||
404 | } else { | ||
405 | return save_i387_fsave(buf); | ||
406 | } | ||
407 | } else { | ||
408 | return fpregs_soft_get(current, NULL, | ||
409 | 0, sizeof(struct user_i387_ia32_struct), | ||
410 | NULL, buf) ? -1 : 1; | ||
411 | } | ||
412 | } | ||
413 | |||
414 | static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) | ||
415 | { | ||
416 | struct task_struct *tsk = current; | ||
417 | clear_fpu(tsk); | ||
418 | return __copy_from_user(&tsk->thread.i387.fsave, buf, | ||
419 | sizeof(struct i387_fsave_struct)); | ||
420 | } | ||
421 | |||
422 | static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) | ||
423 | { | ||
424 | int err; | ||
425 | struct task_struct *tsk = current; | ||
426 | struct user_i387_ia32_struct env; | ||
427 | clear_fpu(tsk); | ||
428 | err = __copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0], | ||
429 | sizeof(struct i387_fxsave_struct)); | ||
430 | /* mxcsr reserved bits must be masked to zero for security reasons */ | ||
431 | tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; | ||
432 | if (err || __copy_from_user(&env, buf, sizeof(env))) | ||
433 | return 1; | ||
434 | convert_to_fxsr(tsk, &env); | ||
435 | return 0; | ||
436 | } | ||
437 | |||
438 | int restore_i387_ia32(struct _fpstate_ia32 __user *buf) | ||
439 | { | ||
440 | int err; | ||
441 | |||
442 | if (HAVE_HWFP) { | ||
443 | if (cpu_has_fxsr) { | ||
444 | err = restore_i387_fxsave(buf); | ||
445 | } else { | ||
446 | err = restore_i387_fsave(buf); | ||
447 | } | ||
448 | } else { | ||
449 | err = fpregs_soft_set(current, NULL, | ||
450 | 0, sizeof(struct user_i387_ia32_struct), | ||
451 | NULL, buf) != 0; | ||
452 | } | ||
453 | set_used_math(); | ||
454 | return err; | ||
455 | } | ||
456 | |||
457 | /* | ||
458 | * FPU state for core dumps. | ||
459 | * This is only used for a.out dumps now. | ||
460 | * It is declared generically using elf_fpregset_t (which is | ||
461 | * struct user_i387_struct) but is in fact only used for 32-bit | ||
462 | * dumps, so on 64-bit it is really struct user_i387_ia32_struct. | ||
463 | */ | ||
464 | int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu) | ||
465 | { | ||
466 | int fpvalid; | ||
467 | struct task_struct *tsk = current; | ||
468 | |||
469 | fpvalid = !!used_math(); | ||
470 | if (fpvalid) | ||
471 | fpvalid = !fpregs_get(tsk, NULL, | ||
472 | 0, sizeof(struct user_i387_ia32_struct), | ||
473 | fpu, NULL); | ||
474 | |||
475 | return fpvalid; | ||
476 | } | ||
477 | EXPORT_SYMBOL(dump_fpu); | ||
478 | |||
479 | #endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ | ||
diff --git a/arch/x86/kernel/i387_32.c b/arch/x86/kernel/i387_32.c deleted file mode 100644 index 7d2e12f6c78b..000000000000 --- a/arch/x86/kernel/i387_32.c +++ /dev/null | |||
@@ -1,544 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1994 Linus Torvalds | ||
3 | * | ||
4 | * Pentium III FXSR, SSE support | ||
5 | * General FPU state handling cleanups | ||
6 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
7 | */ | ||
8 | |||
9 | #include <linux/sched.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <asm/processor.h> | ||
12 | #include <asm/i387.h> | ||
13 | #include <asm/math_emu.h> | ||
14 | #include <asm/sigcontext.h> | ||
15 | #include <asm/user.h> | ||
16 | #include <asm/ptrace.h> | ||
17 | #include <asm/uaccess.h> | ||
18 | |||
19 | #ifdef CONFIG_MATH_EMULATION | ||
20 | #define HAVE_HWFP (boot_cpu_data.hard_math) | ||
21 | #else | ||
22 | #define HAVE_HWFP 1 | ||
23 | #endif | ||
24 | |||
25 | static unsigned long mxcsr_feature_mask __read_mostly = 0xffffffff; | ||
26 | |||
27 | void mxcsr_feature_mask_init(void) | ||
28 | { | ||
29 | unsigned long mask = 0; | ||
30 | clts(); | ||
31 | if (cpu_has_fxsr) { | ||
32 | memset(¤t->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct)); | ||
33 | asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave)); | ||
34 | mask = current->thread.i387.fxsave.mxcsr_mask; | ||
35 | if (mask == 0) mask = 0x0000ffbf; | ||
36 | } | ||
37 | mxcsr_feature_mask &= mask; | ||
38 | stts(); | ||
39 | } | ||
40 | |||
41 | /* | ||
42 | * The _current_ task is using the FPU for the first time | ||
43 | * so initialize it and set the mxcsr to its default | ||
44 | * value at reset if we support XMM instructions and then | ||
45 | * remeber the current task has used the FPU. | ||
46 | */ | ||
47 | void init_fpu(struct task_struct *tsk) | ||
48 | { | ||
49 | if (cpu_has_fxsr) { | ||
50 | memset(&tsk->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct)); | ||
51 | tsk->thread.i387.fxsave.cwd = 0x37f; | ||
52 | if (cpu_has_xmm) | ||
53 | tsk->thread.i387.fxsave.mxcsr = 0x1f80; | ||
54 | } else { | ||
55 | memset(&tsk->thread.i387.fsave, 0, sizeof(struct i387_fsave_struct)); | ||
56 | tsk->thread.i387.fsave.cwd = 0xffff037fu; | ||
57 | tsk->thread.i387.fsave.swd = 0xffff0000u; | ||
58 | tsk->thread.i387.fsave.twd = 0xffffffffu; | ||
59 | tsk->thread.i387.fsave.fos = 0xffff0000u; | ||
60 | } | ||
61 | /* only the device not available exception or ptrace can call init_fpu */ | ||
62 | set_stopped_child_used_math(tsk); | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * FPU lazy state save handling. | ||
67 | */ | ||
68 | |||
69 | void kernel_fpu_begin(void) | ||
70 | { | ||
71 | struct thread_info *thread = current_thread_info(); | ||
72 | |||
73 | preempt_disable(); | ||
74 | if (thread->status & TS_USEDFPU) { | ||
75 | __save_init_fpu(thread->task); | ||
76 | return; | ||
77 | } | ||
78 | clts(); | ||
79 | } | ||
80 | EXPORT_SYMBOL_GPL(kernel_fpu_begin); | ||
81 | |||
82 | /* | ||
83 | * FPU tag word conversions. | ||
84 | */ | ||
85 | |||
86 | static inline unsigned short twd_i387_to_fxsr( unsigned short twd ) | ||
87 | { | ||
88 | unsigned int tmp; /* to avoid 16 bit prefixes in the code */ | ||
89 | |||
90 | /* Transform each pair of bits into 01 (valid) or 00 (empty) */ | ||
91 | tmp = ~twd; | ||
92 | tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ | ||
93 | /* and move the valid bits to the lower byte. */ | ||
94 | tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ | ||
95 | tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ | ||
96 | tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ | ||
97 | return tmp; | ||
98 | } | ||
99 | |||
100 | static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave ) | ||
101 | { | ||
102 | struct _fpxreg *st = NULL; | ||
103 | unsigned long tos = (fxsave->swd >> 11) & 7; | ||
104 | unsigned long twd = (unsigned long) fxsave->twd; | ||
105 | unsigned long tag; | ||
106 | unsigned long ret = 0xffff0000u; | ||
107 | int i; | ||
108 | |||
109 | #define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16); | ||
110 | |||
111 | for ( i = 0 ; i < 8 ; i++ ) { | ||
112 | if ( twd & 0x1 ) { | ||
113 | st = FPREG_ADDR( fxsave, (i - tos) & 7 ); | ||
114 | |||
115 | switch ( st->exponent & 0x7fff ) { | ||
116 | case 0x7fff: | ||
117 | tag = 2; /* Special */ | ||
118 | break; | ||
119 | case 0x0000: | ||
120 | if ( !st->significand[0] && | ||
121 | !st->significand[1] && | ||
122 | !st->significand[2] && | ||
123 | !st->significand[3] ) { | ||
124 | tag = 1; /* Zero */ | ||
125 | } else { | ||
126 | tag = 2; /* Special */ | ||
127 | } | ||
128 | break; | ||
129 | default: | ||
130 | if ( st->significand[3] & 0x8000 ) { | ||
131 | tag = 0; /* Valid */ | ||
132 | } else { | ||
133 | tag = 2; /* Special */ | ||
134 | } | ||
135 | break; | ||
136 | } | ||
137 | } else { | ||
138 | tag = 3; /* Empty */ | ||
139 | } | ||
140 | ret |= (tag << (2 * i)); | ||
141 | twd = twd >> 1; | ||
142 | } | ||
143 | return ret; | ||
144 | } | ||
145 | |||
146 | /* | ||
147 | * FPU state interaction. | ||
148 | */ | ||
149 | |||
150 | unsigned short get_fpu_cwd( struct task_struct *tsk ) | ||
151 | { | ||
152 | if ( cpu_has_fxsr ) { | ||
153 | return tsk->thread.i387.fxsave.cwd; | ||
154 | } else { | ||
155 | return (unsigned short)tsk->thread.i387.fsave.cwd; | ||
156 | } | ||
157 | } | ||
158 | |||
159 | unsigned short get_fpu_swd( struct task_struct *tsk ) | ||
160 | { | ||
161 | if ( cpu_has_fxsr ) { | ||
162 | return tsk->thread.i387.fxsave.swd; | ||
163 | } else { | ||
164 | return (unsigned short)tsk->thread.i387.fsave.swd; | ||
165 | } | ||
166 | } | ||
167 | |||
168 | #if 0 | ||
169 | unsigned short get_fpu_twd( struct task_struct *tsk ) | ||
170 | { | ||
171 | if ( cpu_has_fxsr ) { | ||
172 | return tsk->thread.i387.fxsave.twd; | ||
173 | } else { | ||
174 | return (unsigned short)tsk->thread.i387.fsave.twd; | ||
175 | } | ||
176 | } | ||
177 | #endif /* 0 */ | ||
178 | |||
179 | unsigned short get_fpu_mxcsr( struct task_struct *tsk ) | ||
180 | { | ||
181 | if ( cpu_has_xmm ) { | ||
182 | return tsk->thread.i387.fxsave.mxcsr; | ||
183 | } else { | ||
184 | return 0x1f80; | ||
185 | } | ||
186 | } | ||
187 | |||
188 | #if 0 | ||
189 | |||
190 | void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd ) | ||
191 | { | ||
192 | if ( cpu_has_fxsr ) { | ||
193 | tsk->thread.i387.fxsave.cwd = cwd; | ||
194 | } else { | ||
195 | tsk->thread.i387.fsave.cwd = ((long)cwd | 0xffff0000u); | ||
196 | } | ||
197 | } | ||
198 | |||
199 | void set_fpu_swd( struct task_struct *tsk, unsigned short swd ) | ||
200 | { | ||
201 | if ( cpu_has_fxsr ) { | ||
202 | tsk->thread.i387.fxsave.swd = swd; | ||
203 | } else { | ||
204 | tsk->thread.i387.fsave.swd = ((long)swd | 0xffff0000u); | ||
205 | } | ||
206 | } | ||
207 | |||
208 | void set_fpu_twd( struct task_struct *tsk, unsigned short twd ) | ||
209 | { | ||
210 | if ( cpu_has_fxsr ) { | ||
211 | tsk->thread.i387.fxsave.twd = twd_i387_to_fxsr(twd); | ||
212 | } else { | ||
213 | tsk->thread.i387.fsave.twd = ((long)twd | 0xffff0000u); | ||
214 | } | ||
215 | } | ||
216 | |||
217 | #endif /* 0 */ | ||
218 | |||
219 | /* | ||
220 | * FXSR floating point environment conversions. | ||
221 | */ | ||
222 | |||
223 | static int convert_fxsr_to_user( struct _fpstate __user *buf, | ||
224 | struct i387_fxsave_struct *fxsave ) | ||
225 | { | ||
226 | unsigned long env[7]; | ||
227 | struct _fpreg __user *to; | ||
228 | struct _fpxreg *from; | ||
229 | int i; | ||
230 | |||
231 | env[0] = (unsigned long)fxsave->cwd | 0xffff0000ul; | ||
232 | env[1] = (unsigned long)fxsave->swd | 0xffff0000ul; | ||
233 | env[2] = twd_fxsr_to_i387(fxsave); | ||
234 | env[3] = fxsave->fip; | ||
235 | env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16); | ||
236 | env[5] = fxsave->foo; | ||
237 | env[6] = fxsave->fos; | ||
238 | |||
239 | if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) ) | ||
240 | return 1; | ||
241 | |||
242 | to = &buf->_st[0]; | ||
243 | from = (struct _fpxreg *) &fxsave->st_space[0]; | ||
244 | for ( i = 0 ; i < 8 ; i++, to++, from++ ) { | ||
245 | unsigned long __user *t = (unsigned long __user *)to; | ||
246 | unsigned long *f = (unsigned long *)from; | ||
247 | |||
248 | if (__put_user(*f, t) || | ||
249 | __put_user(*(f + 1), t + 1) || | ||
250 | __put_user(from->exponent, &to->exponent)) | ||
251 | return 1; | ||
252 | } | ||
253 | return 0; | ||
254 | } | ||
255 | |||
256 | static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave, | ||
257 | struct _fpstate __user *buf ) | ||
258 | { | ||
259 | unsigned long env[7]; | ||
260 | struct _fpxreg *to; | ||
261 | struct _fpreg __user *from; | ||
262 | int i; | ||
263 | |||
264 | if ( __copy_from_user( env, buf, 7 * sizeof(long) ) ) | ||
265 | return 1; | ||
266 | |||
267 | fxsave->cwd = (unsigned short)(env[0] & 0xffff); | ||
268 | fxsave->swd = (unsigned short)(env[1] & 0xffff); | ||
269 | fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff)); | ||
270 | fxsave->fip = env[3]; | ||
271 | fxsave->fop = (unsigned short)((env[4] & 0xffff0000ul) >> 16); | ||
272 | fxsave->fcs = (env[4] & 0xffff); | ||
273 | fxsave->foo = env[5]; | ||
274 | fxsave->fos = env[6]; | ||
275 | |||
276 | to = (struct _fpxreg *) &fxsave->st_space[0]; | ||
277 | from = &buf->_st[0]; | ||
278 | for ( i = 0 ; i < 8 ; i++, to++, from++ ) { | ||
279 | unsigned long *t = (unsigned long *)to; | ||
280 | unsigned long __user *f = (unsigned long __user *)from; | ||
281 | |||
282 | if (__get_user(*t, f) || | ||
283 | __get_user(*(t + 1), f + 1) || | ||
284 | __get_user(to->exponent, &from->exponent)) | ||
285 | return 1; | ||
286 | } | ||
287 | return 0; | ||
288 | } | ||
289 | |||
290 | /* | ||
291 | * Signal frame handlers. | ||
292 | */ | ||
293 | |||
294 | static inline int save_i387_fsave( struct _fpstate __user *buf ) | ||
295 | { | ||
296 | struct task_struct *tsk = current; | ||
297 | |||
298 | unlazy_fpu( tsk ); | ||
299 | tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd; | ||
300 | if ( __copy_to_user( buf, &tsk->thread.i387.fsave, | ||
301 | sizeof(struct i387_fsave_struct) ) ) | ||
302 | return -1; | ||
303 | return 1; | ||
304 | } | ||
305 | |||
306 | static int save_i387_fxsave( struct _fpstate __user *buf ) | ||
307 | { | ||
308 | struct task_struct *tsk = current; | ||
309 | int err = 0; | ||
310 | |||
311 | unlazy_fpu( tsk ); | ||
312 | |||
313 | if ( convert_fxsr_to_user( buf, &tsk->thread.i387.fxsave ) ) | ||
314 | return -1; | ||
315 | |||
316 | err |= __put_user( tsk->thread.i387.fxsave.swd, &buf->status ); | ||
317 | err |= __put_user( X86_FXSR_MAGIC, &buf->magic ); | ||
318 | if ( err ) | ||
319 | return -1; | ||
320 | |||
321 | if ( __copy_to_user( &buf->_fxsr_env[0], &tsk->thread.i387.fxsave, | ||
322 | sizeof(struct i387_fxsave_struct) ) ) | ||
323 | return -1; | ||
324 | return 1; | ||
325 | } | ||
326 | |||
327 | int save_i387( struct _fpstate __user *buf ) | ||
328 | { | ||
329 | if ( !used_math() ) | ||
330 | return 0; | ||
331 | |||
332 | /* This will cause a "finit" to be triggered by the next | ||
333 | * attempted FPU operation by the 'current' process. | ||
334 | */ | ||
335 | clear_used_math(); | ||
336 | |||
337 | if ( HAVE_HWFP ) { | ||
338 | if ( cpu_has_fxsr ) { | ||
339 | return save_i387_fxsave( buf ); | ||
340 | } else { | ||
341 | return save_i387_fsave( buf ); | ||
342 | } | ||
343 | } else { | ||
344 | return save_i387_soft( ¤t->thread.i387.soft, buf ); | ||
345 | } | ||
346 | } | ||
347 | |||
348 | static inline int restore_i387_fsave( struct _fpstate __user *buf ) | ||
349 | { | ||
350 | struct task_struct *tsk = current; | ||
351 | clear_fpu( tsk ); | ||
352 | return __copy_from_user( &tsk->thread.i387.fsave, buf, | ||
353 | sizeof(struct i387_fsave_struct) ); | ||
354 | } | ||
355 | |||
356 | static int restore_i387_fxsave( struct _fpstate __user *buf ) | ||
357 | { | ||
358 | int err; | ||
359 | struct task_struct *tsk = current; | ||
360 | clear_fpu( tsk ); | ||
361 | err = __copy_from_user( &tsk->thread.i387.fxsave, &buf->_fxsr_env[0], | ||
362 | sizeof(struct i387_fxsave_struct) ); | ||
363 | /* mxcsr reserved bits must be masked to zero for security reasons */ | ||
364 | tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; | ||
365 | return err ? 1 : convert_fxsr_from_user( &tsk->thread.i387.fxsave, buf ); | ||
366 | } | ||
367 | |||
368 | int restore_i387( struct _fpstate __user *buf ) | ||
369 | { | ||
370 | int err; | ||
371 | |||
372 | if ( HAVE_HWFP ) { | ||
373 | if ( cpu_has_fxsr ) { | ||
374 | err = restore_i387_fxsave( buf ); | ||
375 | } else { | ||
376 | err = restore_i387_fsave( buf ); | ||
377 | } | ||
378 | } else { | ||
379 | err = restore_i387_soft( ¤t->thread.i387.soft, buf ); | ||
380 | } | ||
381 | set_used_math(); | ||
382 | return err; | ||
383 | } | ||
384 | |||
385 | /* | ||
386 | * ptrace request handlers. | ||
387 | */ | ||
388 | |||
389 | static inline int get_fpregs_fsave( struct user_i387_struct __user *buf, | ||
390 | struct task_struct *tsk ) | ||
391 | { | ||
392 | return __copy_to_user( buf, &tsk->thread.i387.fsave, | ||
393 | sizeof(struct user_i387_struct) ); | ||
394 | } | ||
395 | |||
396 | static inline int get_fpregs_fxsave( struct user_i387_struct __user *buf, | ||
397 | struct task_struct *tsk ) | ||
398 | { | ||
399 | return convert_fxsr_to_user( (struct _fpstate __user *)buf, | ||
400 | &tsk->thread.i387.fxsave ); | ||
401 | } | ||
402 | |||
403 | int get_fpregs( struct user_i387_struct __user *buf, struct task_struct *tsk ) | ||
404 | { | ||
405 | if ( HAVE_HWFP ) { | ||
406 | if ( cpu_has_fxsr ) { | ||
407 | return get_fpregs_fxsave( buf, tsk ); | ||
408 | } else { | ||
409 | return get_fpregs_fsave( buf, tsk ); | ||
410 | } | ||
411 | } else { | ||
412 | return save_i387_soft( &tsk->thread.i387.soft, | ||
413 | (struct _fpstate __user *)buf ); | ||
414 | } | ||
415 | } | ||
416 | |||
417 | static inline int set_fpregs_fsave( struct task_struct *tsk, | ||
418 | struct user_i387_struct __user *buf ) | ||
419 | { | ||
420 | return __copy_from_user( &tsk->thread.i387.fsave, buf, | ||
421 | sizeof(struct user_i387_struct) ); | ||
422 | } | ||
423 | |||
424 | static inline int set_fpregs_fxsave( struct task_struct *tsk, | ||
425 | struct user_i387_struct __user *buf ) | ||
426 | { | ||
427 | return convert_fxsr_from_user( &tsk->thread.i387.fxsave, | ||
428 | (struct _fpstate __user *)buf ); | ||
429 | } | ||
430 | |||
431 | int set_fpregs( struct task_struct *tsk, struct user_i387_struct __user *buf ) | ||
432 | { | ||
433 | if ( HAVE_HWFP ) { | ||
434 | if ( cpu_has_fxsr ) { | ||
435 | return set_fpregs_fxsave( tsk, buf ); | ||
436 | } else { | ||
437 | return set_fpregs_fsave( tsk, buf ); | ||
438 | } | ||
439 | } else { | ||
440 | return restore_i387_soft( &tsk->thread.i387.soft, | ||
441 | (struct _fpstate __user *)buf ); | ||
442 | } | ||
443 | } | ||
444 | |||
445 | int get_fpxregs( struct user_fxsr_struct __user *buf, struct task_struct *tsk ) | ||
446 | { | ||
447 | if ( cpu_has_fxsr ) { | ||
448 | if (__copy_to_user( buf, &tsk->thread.i387.fxsave, | ||
449 | sizeof(struct user_fxsr_struct) )) | ||
450 | return -EFAULT; | ||
451 | return 0; | ||
452 | } else { | ||
453 | return -EIO; | ||
454 | } | ||
455 | } | ||
456 | |||
457 | int set_fpxregs( struct task_struct *tsk, struct user_fxsr_struct __user *buf ) | ||
458 | { | ||
459 | int ret = 0; | ||
460 | |||
461 | if ( cpu_has_fxsr ) { | ||
462 | if (__copy_from_user( &tsk->thread.i387.fxsave, buf, | ||
463 | sizeof(struct user_fxsr_struct) )) | ||
464 | ret = -EFAULT; | ||
465 | /* mxcsr reserved bits must be masked to zero for security reasons */ | ||
466 | tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; | ||
467 | } else { | ||
468 | ret = -EIO; | ||
469 | } | ||
470 | return ret; | ||
471 | } | ||
472 | |||
473 | /* | ||
474 | * FPU state for core dumps. | ||
475 | */ | ||
476 | |||
477 | static inline void copy_fpu_fsave( struct task_struct *tsk, | ||
478 | struct user_i387_struct *fpu ) | ||
479 | { | ||
480 | memcpy( fpu, &tsk->thread.i387.fsave, | ||
481 | sizeof(struct user_i387_struct) ); | ||
482 | } | ||
483 | |||
484 | static inline void copy_fpu_fxsave( struct task_struct *tsk, | ||
485 | struct user_i387_struct *fpu ) | ||
486 | { | ||
487 | unsigned short *to; | ||
488 | unsigned short *from; | ||
489 | int i; | ||
490 | |||
491 | memcpy( fpu, &tsk->thread.i387.fxsave, 7 * sizeof(long) ); | ||
492 | |||
493 | to = (unsigned short *)&fpu->st_space[0]; | ||
494 | from = (unsigned short *)&tsk->thread.i387.fxsave.st_space[0]; | ||
495 | for ( i = 0 ; i < 8 ; i++, to += 5, from += 8 ) { | ||
496 | memcpy( to, from, 5 * sizeof(unsigned short) ); | ||
497 | } | ||
498 | } | ||
499 | |||
500 | int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu ) | ||
501 | { | ||
502 | int fpvalid; | ||
503 | struct task_struct *tsk = current; | ||
504 | |||
505 | fpvalid = !!used_math(); | ||
506 | if ( fpvalid ) { | ||
507 | unlazy_fpu( tsk ); | ||
508 | if ( cpu_has_fxsr ) { | ||
509 | copy_fpu_fxsave( tsk, fpu ); | ||
510 | } else { | ||
511 | copy_fpu_fsave( tsk, fpu ); | ||
512 | } | ||
513 | } | ||
514 | |||
515 | return fpvalid; | ||
516 | } | ||
517 | EXPORT_SYMBOL(dump_fpu); | ||
518 | |||
519 | int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu) | ||
520 | { | ||
521 | int fpvalid = !!tsk_used_math(tsk); | ||
522 | |||
523 | if (fpvalid) { | ||
524 | if (tsk == current) | ||
525 | unlazy_fpu(tsk); | ||
526 | if (cpu_has_fxsr) | ||
527 | copy_fpu_fxsave(tsk, fpu); | ||
528 | else | ||
529 | copy_fpu_fsave(tsk, fpu); | ||
530 | } | ||
531 | return fpvalid; | ||
532 | } | ||
533 | |||
534 | int dump_task_extended_fpu(struct task_struct *tsk, struct user_fxsr_struct *fpu) | ||
535 | { | ||
536 | int fpvalid = tsk_used_math(tsk) && cpu_has_fxsr; | ||
537 | |||
538 | if (fpvalid) { | ||
539 | if (tsk == current) | ||
540 | unlazy_fpu(tsk); | ||
541 | memcpy(fpu, &tsk->thread.i387.fxsave, sizeof(*fpu)); | ||
542 | } | ||
543 | return fpvalid; | ||
544 | } | ||
diff --git a/arch/x86/kernel/i387_64.c b/arch/x86/kernel/i387_64.c deleted file mode 100644 index bfaff28fb134..000000000000 --- a/arch/x86/kernel/i387_64.c +++ /dev/null | |||
@@ -1,150 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1994 Linus Torvalds | ||
3 | * Copyright (C) 2002 Andi Kleen, SuSE Labs | ||
4 | * | ||
5 | * Pentium III FXSR, SSE support | ||
6 | * General FPU state handling cleanups | ||
7 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
8 | * | ||
9 | * x86-64 rework 2002 Andi Kleen. | ||
10 | * Does direct fxsave in and out of user space now for signal handlers. | ||
11 | * All the FSAVE<->FXSAVE conversion code has been moved to the 32bit emulation, | ||
12 | * the 64bit user space sees a FXSAVE frame directly. | ||
13 | */ | ||
14 | |||
15 | #include <linux/sched.h> | ||
16 | #include <linux/init.h> | ||
17 | #include <asm/processor.h> | ||
18 | #include <asm/i387.h> | ||
19 | #include <asm/sigcontext.h> | ||
20 | #include <asm/user.h> | ||
21 | #include <asm/ptrace.h> | ||
22 | #include <asm/uaccess.h> | ||
23 | |||
24 | unsigned int mxcsr_feature_mask __read_mostly = 0xffffffff; | ||
25 | |||
26 | void mxcsr_feature_mask_init(void) | ||
27 | { | ||
28 | unsigned int mask; | ||
29 | clts(); | ||
30 | memset(¤t->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct)); | ||
31 | asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave)); | ||
32 | mask = current->thread.i387.fxsave.mxcsr_mask; | ||
33 | if (mask == 0) mask = 0x0000ffbf; | ||
34 | mxcsr_feature_mask &= mask; | ||
35 | stts(); | ||
36 | } | ||
37 | |||
38 | /* | ||
39 | * Called at bootup to set up the initial FPU state that is later cloned | ||
40 | * into all processes. | ||
41 | */ | ||
42 | void __cpuinit fpu_init(void) | ||
43 | { | ||
44 | unsigned long oldcr0 = read_cr0(); | ||
45 | extern void __bad_fxsave_alignment(void); | ||
46 | |||
47 | if (offsetof(struct task_struct, thread.i387.fxsave) & 15) | ||
48 | __bad_fxsave_alignment(); | ||
49 | set_in_cr4(X86_CR4_OSFXSR); | ||
50 | set_in_cr4(X86_CR4_OSXMMEXCPT); | ||
51 | |||
52 | write_cr0(oldcr0 & ~((1UL<<3)|(1UL<<2))); /* clear TS and EM */ | ||
53 | |||
54 | mxcsr_feature_mask_init(); | ||
55 | /* clean state in init */ | ||
56 | current_thread_info()->status = 0; | ||
57 | clear_used_math(); | ||
58 | } | ||
59 | |||
60 | void init_fpu(struct task_struct *child) | ||
61 | { | ||
62 | if (tsk_used_math(child)) { | ||
63 | if (child == current) | ||
64 | unlazy_fpu(child); | ||
65 | return; | ||
66 | } | ||
67 | memset(&child->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct)); | ||
68 | child->thread.i387.fxsave.cwd = 0x37f; | ||
69 | child->thread.i387.fxsave.mxcsr = 0x1f80; | ||
70 | /* only the device not available exception or ptrace can call init_fpu */ | ||
71 | set_stopped_child_used_math(child); | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * Signal frame handlers. | ||
76 | */ | ||
77 | |||
78 | int save_i387(struct _fpstate __user *buf) | ||
79 | { | ||
80 | struct task_struct *tsk = current; | ||
81 | int err = 0; | ||
82 | |||
83 | BUILD_BUG_ON(sizeof(struct user_i387_struct) != | ||
84 | sizeof(tsk->thread.i387.fxsave)); | ||
85 | |||
86 | if ((unsigned long)buf % 16) | ||
87 | printk("save_i387: bad fpstate %p\n",buf); | ||
88 | |||
89 | if (!used_math()) | ||
90 | return 0; | ||
91 | clear_used_math(); /* trigger finit */ | ||
92 | if (task_thread_info(tsk)->status & TS_USEDFPU) { | ||
93 | err = save_i387_checking((struct i387_fxsave_struct __user *)buf); | ||
94 | if (err) return err; | ||
95 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | ||
96 | stts(); | ||
97 | } else { | ||
98 | if (__copy_to_user(buf, &tsk->thread.i387.fxsave, | ||
99 | sizeof(struct i387_fxsave_struct))) | ||
100 | return -1; | ||
101 | } | ||
102 | return 1; | ||
103 | } | ||
104 | |||
105 | /* | ||
106 | * ptrace request handlers. | ||
107 | */ | ||
108 | |||
109 | int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *tsk) | ||
110 | { | ||
111 | init_fpu(tsk); | ||
112 | return __copy_to_user(buf, &tsk->thread.i387.fxsave, | ||
113 | sizeof(struct user_i387_struct)) ? -EFAULT : 0; | ||
114 | } | ||
115 | |||
116 | int set_fpregs(struct task_struct *tsk, struct user_i387_struct __user *buf) | ||
117 | { | ||
118 | if (__copy_from_user(&tsk->thread.i387.fxsave, buf, | ||
119 | sizeof(struct user_i387_struct))) | ||
120 | return -EFAULT; | ||
121 | return 0; | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * FPU state for core dumps. | ||
126 | */ | ||
127 | |||
128 | int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu ) | ||
129 | { | ||
130 | struct task_struct *tsk = current; | ||
131 | |||
132 | if (!used_math()) | ||
133 | return 0; | ||
134 | |||
135 | unlazy_fpu(tsk); | ||
136 | memcpy(fpu, &tsk->thread.i387.fxsave, sizeof(struct user_i387_struct)); | ||
137 | return 1; | ||
138 | } | ||
139 | |||
140 | int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu) | ||
141 | { | ||
142 | int fpvalid = !!tsk_used_math(tsk); | ||
143 | |||
144 | if (fpvalid) { | ||
145 | if (tsk == current) | ||
146 | unlazy_fpu(tsk); | ||
147 | memcpy(fpu, &tsk->thread.i387.fxsave, sizeof(struct user_i387_struct)); | ||
148 | } | ||
149 | return fpvalid; | ||
150 | } | ||
diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c index 29313832df0c..dbd6c1d1b638 100644 --- a/arch/x86/kernel/i8237.c +++ b/arch/x86/kernel/i8237.c | |||
@@ -51,7 +51,7 @@ static int i8237A_suspend(struct sys_device *dev, pm_message_t state) | |||
51 | } | 51 | } |
52 | 52 | ||
53 | static struct sysdev_class i8237_sysdev_class = { | 53 | static struct sysdev_class i8237_sysdev_class = { |
54 | set_kset_name("i8237"), | 54 | .name = "i8237", |
55 | .suspend = i8237A_suspend, | 55 | .suspend = i8237A_suspend, |
56 | .resume = i8237A_resume, | 56 | .resume = i8237A_resume, |
57 | }; | 57 | }; |
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index a42c80745325..ef62b07b2b48 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c | |||
@@ -13,10 +13,17 @@ | |||
13 | #include <asm/delay.h> | 13 | #include <asm/delay.h> |
14 | #include <asm/i8253.h> | 14 | #include <asm/i8253.h> |
15 | #include <asm/io.h> | 15 | #include <asm/io.h> |
16 | #include <asm/hpet.h> | ||
16 | 17 | ||
17 | DEFINE_SPINLOCK(i8253_lock); | 18 | DEFINE_SPINLOCK(i8253_lock); |
18 | EXPORT_SYMBOL(i8253_lock); | 19 | EXPORT_SYMBOL(i8253_lock); |
19 | 20 | ||
21 | #ifdef CONFIG_X86_32 | ||
22 | static void pit_disable_clocksource(void); | ||
23 | #else | ||
24 | static inline void pit_disable_clocksource(void) { } | ||
25 | #endif | ||
26 | |||
20 | /* | 27 | /* |
21 | * HPET replaces the PIT, when enabled. So we need to know, which of | 28 | * HPET replaces the PIT, when enabled. So we need to know, which of |
22 | * the two timers is used | 29 | * the two timers is used |
@@ -31,38 +38,38 @@ struct clock_event_device *global_clock_event; | |||
31 | static void init_pit_timer(enum clock_event_mode mode, | 38 | static void init_pit_timer(enum clock_event_mode mode, |
32 | struct clock_event_device *evt) | 39 | struct clock_event_device *evt) |
33 | { | 40 | { |
34 | unsigned long flags; | 41 | spin_lock(&i8253_lock); |
35 | |||
36 | spin_lock_irqsave(&i8253_lock, flags); | ||
37 | 42 | ||
38 | switch(mode) { | 43 | switch(mode) { |
39 | case CLOCK_EVT_MODE_PERIODIC: | 44 | case CLOCK_EVT_MODE_PERIODIC: |
40 | /* binary, mode 2, LSB/MSB, ch 0 */ | 45 | /* binary, mode 2, LSB/MSB, ch 0 */ |
41 | outb_p(0x34, PIT_MODE); | 46 | outb_pit(0x34, PIT_MODE); |
42 | outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ | 47 | outb_pit(LATCH & 0xff , PIT_CH0); /* LSB */ |
43 | outb(LATCH >> 8 , PIT_CH0); /* MSB */ | 48 | outb_pit(LATCH >> 8 , PIT_CH0); /* MSB */ |
44 | break; | 49 | break; |
45 | 50 | ||
46 | case CLOCK_EVT_MODE_SHUTDOWN: | 51 | case CLOCK_EVT_MODE_SHUTDOWN: |
47 | case CLOCK_EVT_MODE_UNUSED: | 52 | case CLOCK_EVT_MODE_UNUSED: |
48 | if (evt->mode == CLOCK_EVT_MODE_PERIODIC || | 53 | if (evt->mode == CLOCK_EVT_MODE_PERIODIC || |
49 | evt->mode == CLOCK_EVT_MODE_ONESHOT) { | 54 | evt->mode == CLOCK_EVT_MODE_ONESHOT) { |
50 | outb_p(0x30, PIT_MODE); | 55 | outb_pit(0x30, PIT_MODE); |
51 | outb_p(0, PIT_CH0); | 56 | outb_pit(0, PIT_CH0); |
52 | outb_p(0, PIT_CH0); | 57 | outb_pit(0, PIT_CH0); |
53 | } | 58 | } |
59 | pit_disable_clocksource(); | ||
54 | break; | 60 | break; |
55 | 61 | ||
56 | case CLOCK_EVT_MODE_ONESHOT: | 62 | case CLOCK_EVT_MODE_ONESHOT: |
57 | /* One shot setup */ | 63 | /* One shot setup */ |
58 | outb_p(0x38, PIT_MODE); | 64 | pit_disable_clocksource(); |
65 | outb_pit(0x38, PIT_MODE); | ||
59 | break; | 66 | break; |
60 | 67 | ||
61 | case CLOCK_EVT_MODE_RESUME: | 68 | case CLOCK_EVT_MODE_RESUME: |
62 | /* Nothing to do here */ | 69 | /* Nothing to do here */ |
63 | break; | 70 | break; |
64 | } | 71 | } |
65 | spin_unlock_irqrestore(&i8253_lock, flags); | 72 | spin_unlock(&i8253_lock); |
66 | } | 73 | } |
67 | 74 | ||
68 | /* | 75 | /* |
@@ -72,12 +79,10 @@ static void init_pit_timer(enum clock_event_mode mode, | |||
72 | */ | 79 | */ |
73 | static int pit_next_event(unsigned long delta, struct clock_event_device *evt) | 80 | static int pit_next_event(unsigned long delta, struct clock_event_device *evt) |
74 | { | 81 | { |
75 | unsigned long flags; | 82 | spin_lock(&i8253_lock); |
76 | 83 | outb_pit(delta & 0xff , PIT_CH0); /* LSB */ | |
77 | spin_lock_irqsave(&i8253_lock, flags); | 84 | outb_pit(delta >> 8 , PIT_CH0); /* MSB */ |
78 | outb_p(delta & 0xff , PIT_CH0); /* LSB */ | 85 | spin_unlock(&i8253_lock); |
79 | outb(delta >> 8 , PIT_CH0); /* MSB */ | ||
80 | spin_unlock_irqrestore(&i8253_lock, flags); | ||
81 | 86 | ||
82 | return 0; | 87 | return 0; |
83 | } | 88 | } |
@@ -148,15 +153,15 @@ static cycle_t pit_read(void) | |||
148 | * count), it cannot be newer. | 153 | * count), it cannot be newer. |
149 | */ | 154 | */ |
150 | jifs = jiffies; | 155 | jifs = jiffies; |
151 | outb_p(0x00, PIT_MODE); /* latch the count ASAP */ | 156 | outb_pit(0x00, PIT_MODE); /* latch the count ASAP */ |
152 | count = inb_p(PIT_CH0); /* read the latched count */ | 157 | count = inb_pit(PIT_CH0); /* read the latched count */ |
153 | count |= inb_p(PIT_CH0) << 8; | 158 | count |= inb_pit(PIT_CH0) << 8; |
154 | 159 | ||
155 | /* VIA686a test code... reset the latch if count > max + 1 */ | 160 | /* VIA686a test code... reset the latch if count > max + 1 */ |
156 | if (count > LATCH) { | 161 | if (count > LATCH) { |
157 | outb_p(0x34, PIT_MODE); | 162 | outb_pit(0x34, PIT_MODE); |
158 | outb_p(LATCH & 0xff, PIT_CH0); | 163 | outb_pit(LATCH & 0xff, PIT_CH0); |
159 | outb(LATCH >> 8, PIT_CH0); | 164 | outb_pit(LATCH >> 8, PIT_CH0); |
160 | count = LATCH - 1; | 165 | count = LATCH - 1; |
161 | } | 166 | } |
162 | 167 | ||
@@ -195,9 +200,28 @@ static struct clocksource clocksource_pit = { | |||
195 | .shift = 20, | 200 | .shift = 20, |
196 | }; | 201 | }; |
197 | 202 | ||
203 | static void pit_disable_clocksource(void) | ||
204 | { | ||
205 | /* | ||
206 | * Use mult to check whether it is registered or not | ||
207 | */ | ||
208 | if (clocksource_pit.mult) { | ||
209 | clocksource_unregister(&clocksource_pit); | ||
210 | clocksource_pit.mult = 0; | ||
211 | } | ||
212 | } | ||
213 | |||
198 | static int __init init_pit_clocksource(void) | 214 | static int __init init_pit_clocksource(void) |
199 | { | 215 | { |
200 | if (num_possible_cpus() > 1) /* PIT does not scale! */ | 216 | /* |
217 | * Several reasons not to register PIT as a clocksource: | ||
218 | * | ||
219 | * - On SMP PIT does not scale due to i8253_lock | ||
220 | * - when HPET is enabled | ||
221 | * - when local APIC timer is active (PIT is switched off) | ||
222 | */ | ||
223 | if (num_possible_cpus() > 1 || is_hpet_enabled() || | ||
224 | pit_clockevent.mode != CLOCK_EVT_MODE_PERIODIC) | ||
201 | return 0; | 225 | return 0; |
202 | 226 | ||
203 | clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); | 227 | clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); |
diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c index f634fc715c99..2d25b77102fe 100644 --- a/arch/x86/kernel/i8259_32.c +++ b/arch/x86/kernel/i8259_32.c | |||
@@ -21,8 +21,6 @@ | |||
21 | #include <asm/arch_hooks.h> | 21 | #include <asm/arch_hooks.h> |
22 | #include <asm/i8259.h> | 22 | #include <asm/i8259.h> |
23 | 23 | ||
24 | #include <io_ports.h> | ||
25 | |||
26 | /* | 24 | /* |
27 | * This is the 'legacy' 8259A Programmable Interrupt Controller, | 25 | * This is the 'legacy' 8259A Programmable Interrupt Controller, |
28 | * present in the majority of PC/AT boxes. | 26 | * present in the majority of PC/AT boxes. |
@@ -258,7 +256,7 @@ static int i8259A_shutdown(struct sys_device *dev) | |||
258 | } | 256 | } |
259 | 257 | ||
260 | static struct sysdev_class i8259_sysdev_class = { | 258 | static struct sysdev_class i8259_sysdev_class = { |
261 | set_kset_name("i8259"), | 259 | .name = "i8259", |
262 | .suspend = i8259A_suspend, | 260 | .suspend = i8259A_suspend, |
263 | .resume = i8259A_resume, | 261 | .resume = i8259A_resume, |
264 | .shutdown = i8259A_shutdown, | 262 | .shutdown = i8259A_shutdown, |
@@ -291,20 +289,20 @@ void init_8259A(int auto_eoi) | |||
291 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ | 289 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ |
292 | 290 | ||
293 | /* | 291 | /* |
294 | * outb_p - this has to work on a wide range of PC hardware. | 292 | * outb_pic - this has to work on a wide range of PC hardware. |
295 | */ | 293 | */ |
296 | outb_p(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ | 294 | outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ |
297 | outb_p(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ | 295 | outb_pic(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ |
298 | outb_p(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ | 296 | outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ |
299 | if (auto_eoi) /* master does Auto EOI */ | 297 | if (auto_eoi) /* master does Auto EOI */ |
300 | outb_p(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); | 298 | outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); |
301 | else /* master expects normal EOI */ | 299 | else /* master expects normal EOI */ |
302 | outb_p(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); | 300 | outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); |
303 | 301 | ||
304 | outb_p(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ | 302 | outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ |
305 | outb_p(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ | 303 | outb_pic(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ |
306 | outb_p(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ | 304 | outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ |
307 | outb_p(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ | 305 | outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ |
308 | if (auto_eoi) | 306 | if (auto_eoi) |
309 | /* | 307 | /* |
310 | * In AEOI mode we just have to mask the interrupt | 308 | * In AEOI mode we just have to mask the interrupt |
@@ -341,7 +339,7 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id) | |||
341 | outb(0,0xF0); | 339 | outb(0,0xF0); |
342 | if (ignore_fpu_irq || !boot_cpu_data.hard_math) | 340 | if (ignore_fpu_irq || !boot_cpu_data.hard_math) |
343 | return IRQ_NONE; | 341 | return IRQ_NONE; |
344 | math_error((void __user *)get_irq_regs()->eip); | 342 | math_error((void __user *)get_irq_regs()->ip); |
345 | return IRQ_HANDLED; | 343 | return IRQ_HANDLED; |
346 | } | 344 | } |
347 | 345 | ||
diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c index 3f27ea0b9816..fa57a1568508 100644 --- a/arch/x86/kernel/i8259_64.c +++ b/arch/x86/kernel/i8259_64.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <asm/delay.h> | 21 | #include <asm/delay.h> |
22 | #include <asm/desc.h> | 22 | #include <asm/desc.h> |
23 | #include <asm/apic.h> | 23 | #include <asm/apic.h> |
24 | #include <asm/i8259.h> | ||
24 | 25 | ||
25 | /* | 26 | /* |
26 | * Common place to define all x86 IRQ vectors | 27 | * Common place to define all x86 IRQ vectors |
@@ -48,7 +49,7 @@ | |||
48 | */ | 49 | */ |
49 | 50 | ||
50 | /* | 51 | /* |
51 | * The IO-APIC gives us many more interrupt sources. Most of these | 52 | * The IO-APIC gives us many more interrupt sources. Most of these |
52 | * are unused but an SMP system is supposed to have enough memory ... | 53 | * are unused but an SMP system is supposed to have enough memory ... |
53 | * sometimes (mostly wrt. hw bugs) we get corrupted vectors all | 54 | * sometimes (mostly wrt. hw bugs) we get corrupted vectors all |
54 | * across the spectrum, so we really want to be prepared to get all | 55 | * across the spectrum, so we really want to be prepared to get all |
@@ -76,7 +77,7 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) | |||
76 | IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) | 77 | IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) |
77 | 78 | ||
78 | /* for the irq vectors */ | 79 | /* for the irq vectors */ |
79 | static void (*interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { | 80 | static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { |
80 | IRQLIST_16(0x2), IRQLIST_16(0x3), | 81 | IRQLIST_16(0x2), IRQLIST_16(0x3), |
81 | IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), | 82 | IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), |
82 | IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), | 83 | IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), |
@@ -114,11 +115,7 @@ static struct irq_chip i8259A_chip = { | |||
114 | /* | 115 | /* |
115 | * This contains the irq mask for both 8259A irq controllers, | 116 | * This contains the irq mask for both 8259A irq controllers, |
116 | */ | 117 | */ |
117 | static unsigned int cached_irq_mask = 0xffff; | 118 | unsigned int cached_irq_mask = 0xffff; |
118 | |||
119 | #define __byte(x,y) (((unsigned char *)&(y))[x]) | ||
120 | #define cached_21 (__byte(0,cached_irq_mask)) | ||
121 | #define cached_A1 (__byte(1,cached_irq_mask)) | ||
122 | 119 | ||
123 | /* | 120 | /* |
124 | * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) | 121 | * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) |
@@ -139,9 +136,9 @@ void disable_8259A_irq(unsigned int irq) | |||
139 | spin_lock_irqsave(&i8259A_lock, flags); | 136 | spin_lock_irqsave(&i8259A_lock, flags); |
140 | cached_irq_mask |= mask; | 137 | cached_irq_mask |= mask; |
141 | if (irq & 8) | 138 | if (irq & 8) |
142 | outb(cached_A1,0xA1); | 139 | outb(cached_slave_mask, PIC_SLAVE_IMR); |
143 | else | 140 | else |
144 | outb(cached_21,0x21); | 141 | outb(cached_master_mask, PIC_MASTER_IMR); |
145 | spin_unlock_irqrestore(&i8259A_lock, flags); | 142 | spin_unlock_irqrestore(&i8259A_lock, flags); |
146 | } | 143 | } |
147 | 144 | ||
@@ -153,9 +150,9 @@ void enable_8259A_irq(unsigned int irq) | |||
153 | spin_lock_irqsave(&i8259A_lock, flags); | 150 | spin_lock_irqsave(&i8259A_lock, flags); |
154 | cached_irq_mask &= mask; | 151 | cached_irq_mask &= mask; |
155 | if (irq & 8) | 152 | if (irq & 8) |
156 | outb(cached_A1,0xA1); | 153 | outb(cached_slave_mask, PIC_SLAVE_IMR); |
157 | else | 154 | else |
158 | outb(cached_21,0x21); | 155 | outb(cached_master_mask, PIC_MASTER_IMR); |
159 | spin_unlock_irqrestore(&i8259A_lock, flags); | 156 | spin_unlock_irqrestore(&i8259A_lock, flags); |
160 | } | 157 | } |
161 | 158 | ||
@@ -167,9 +164,9 @@ int i8259A_irq_pending(unsigned int irq) | |||
167 | 164 | ||
168 | spin_lock_irqsave(&i8259A_lock, flags); | 165 | spin_lock_irqsave(&i8259A_lock, flags); |
169 | if (irq < 8) | 166 | if (irq < 8) |
170 | ret = inb(0x20) & mask; | 167 | ret = inb(PIC_MASTER_CMD) & mask; |
171 | else | 168 | else |
172 | ret = inb(0xA0) & (mask >> 8); | 169 | ret = inb(PIC_SLAVE_CMD) & (mask >> 8); |
173 | spin_unlock_irqrestore(&i8259A_lock, flags); | 170 | spin_unlock_irqrestore(&i8259A_lock, flags); |
174 | 171 | ||
175 | return ret; | 172 | return ret; |
@@ -196,14 +193,14 @@ static inline int i8259A_irq_real(unsigned int irq) | |||
196 | int irqmask = 1<<irq; | 193 | int irqmask = 1<<irq; |
197 | 194 | ||
198 | if (irq < 8) { | 195 | if (irq < 8) { |
199 | outb(0x0B,0x20); /* ISR register */ | 196 | outb(0x0B,PIC_MASTER_CMD); /* ISR register */ |
200 | value = inb(0x20) & irqmask; | 197 | value = inb(PIC_MASTER_CMD) & irqmask; |
201 | outb(0x0A,0x20); /* back to the IRR register */ | 198 | outb(0x0A,PIC_MASTER_CMD); /* back to the IRR register */ |
202 | return value; | 199 | return value; |
203 | } | 200 | } |
204 | outb(0x0B,0xA0); /* ISR register */ | 201 | outb(0x0B,PIC_SLAVE_CMD); /* ISR register */ |
205 | value = inb(0xA0) & (irqmask >> 8); | 202 | value = inb(PIC_SLAVE_CMD) & (irqmask >> 8); |
206 | outb(0x0A,0xA0); /* back to the IRR register */ | 203 | outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ |
207 | return value; | 204 | return value; |
208 | } | 205 | } |
209 | 206 | ||
@@ -240,14 +237,17 @@ static void mask_and_ack_8259A(unsigned int irq) | |||
240 | 237 | ||
241 | handle_real_irq: | 238 | handle_real_irq: |
242 | if (irq & 8) { | 239 | if (irq & 8) { |
243 | inb(0xA1); /* DUMMY - (do we need this?) */ | 240 | inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ |
244 | outb(cached_A1,0xA1); | 241 | outb(cached_slave_mask, PIC_SLAVE_IMR); |
245 | outb(0x60+(irq&7),0xA0);/* 'Specific EOI' to slave */ | 242 | /* 'Specific EOI' to slave */ |
246 | outb(0x62,0x20); /* 'Specific EOI' to master-IRQ2 */ | 243 | outb(0x60+(irq&7),PIC_SLAVE_CMD); |
244 | /* 'Specific EOI' to master-IRQ2 */ | ||
245 | outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); | ||
247 | } else { | 246 | } else { |
248 | inb(0x21); /* DUMMY - (do we need this?) */ | 247 | inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ |
249 | outb(cached_21,0x21); | 248 | outb(cached_master_mask, PIC_MASTER_IMR); |
250 | outb(0x60+irq,0x20); /* 'Specific EOI' to master */ | 249 | /* 'Specific EOI' to master */ |
250 | outb(0x60+irq,PIC_MASTER_CMD); | ||
251 | } | 251 | } |
252 | spin_unlock_irqrestore(&i8259A_lock, flags); | 252 | spin_unlock_irqrestore(&i8259A_lock, flags); |
253 | return; | 253 | return; |
@@ -270,7 +270,8 @@ spurious_8259A_irq: | |||
270 | * lets ACK and report it. [once per IRQ] | 270 | * lets ACK and report it. [once per IRQ] |
271 | */ | 271 | */ |
272 | if (!(spurious_irq_mask & irqmask)) { | 272 | if (!(spurious_irq_mask & irqmask)) { |
273 | printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); | 273 | printk(KERN_DEBUG |
274 | "spurious 8259A interrupt: IRQ%d.\n", irq); | ||
274 | spurious_irq_mask |= irqmask; | 275 | spurious_irq_mask |= irqmask; |
275 | } | 276 | } |
276 | atomic_inc(&irq_err_count); | 277 | atomic_inc(&irq_err_count); |
@@ -283,51 +284,6 @@ spurious_8259A_irq: | |||
283 | } | 284 | } |
284 | } | 285 | } |
285 | 286 | ||
286 | void init_8259A(int auto_eoi) | ||
287 | { | ||
288 | unsigned long flags; | ||
289 | |||
290 | i8259A_auto_eoi = auto_eoi; | ||
291 | |||
292 | spin_lock_irqsave(&i8259A_lock, flags); | ||
293 | |||
294 | outb(0xff, 0x21); /* mask all of 8259A-1 */ | ||
295 | outb(0xff, 0xA1); /* mask all of 8259A-2 */ | ||
296 | |||
297 | /* | ||
298 | * outb_p - this has to work on a wide range of PC hardware. | ||
299 | */ | ||
300 | outb_p(0x11, 0x20); /* ICW1: select 8259A-1 init */ | ||
301 | outb_p(IRQ0_VECTOR, 0x21); /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ | ||
302 | outb_p(0x04, 0x21); /* 8259A-1 (the master) has a slave on IR2 */ | ||
303 | if (auto_eoi) | ||
304 | outb_p(0x03, 0x21); /* master does Auto EOI */ | ||
305 | else | ||
306 | outb_p(0x01, 0x21); /* master expects normal EOI */ | ||
307 | |||
308 | outb_p(0x11, 0xA0); /* ICW1: select 8259A-2 init */ | ||
309 | outb_p(IRQ8_VECTOR, 0xA1); /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ | ||
310 | outb_p(0x02, 0xA1); /* 8259A-2 is a slave on master's IR2 */ | ||
311 | outb_p(0x01, 0xA1); /* (slave's support for AEOI in flat mode | ||
312 | is to be investigated) */ | ||
313 | |||
314 | if (auto_eoi) | ||
315 | /* | ||
316 | * in AEOI mode we just have to mask the interrupt | ||
317 | * when acking. | ||
318 | */ | ||
319 | i8259A_chip.mask_ack = disable_8259A_irq; | ||
320 | else | ||
321 | i8259A_chip.mask_ack = mask_and_ack_8259A; | ||
322 | |||
323 | udelay(100); /* wait for 8259A to initialize */ | ||
324 | |||
325 | outb(cached_21, 0x21); /* restore master IRQ mask */ | ||
326 | outb(cached_A1, 0xA1); /* restore slave IRQ mask */ | ||
327 | |||
328 | spin_unlock_irqrestore(&i8259A_lock, flags); | ||
329 | } | ||
330 | |||
331 | static char irq_trigger[2]; | 287 | static char irq_trigger[2]; |
332 | /** | 288 | /** |
333 | * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ | 289 | * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ |
@@ -364,13 +320,13 @@ static int i8259A_shutdown(struct sys_device *dev) | |||
364 | * the kernel initialization code can get it | 320 | * the kernel initialization code can get it |
365 | * out of. | 321 | * out of. |
366 | */ | 322 | */ |
367 | outb(0xff, 0x21); /* mask all of 8259A-1 */ | 323 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ |
368 | outb(0xff, 0xA1); /* mask all of 8259A-1 */ | 324 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ |
369 | return 0; | 325 | return 0; |
370 | } | 326 | } |
371 | 327 | ||
372 | static struct sysdev_class i8259_sysdev_class = { | 328 | static struct sysdev_class i8259_sysdev_class = { |
373 | set_kset_name("i8259"), | 329 | .name = "i8259", |
374 | .suspend = i8259A_suspend, | 330 | .suspend = i8259A_suspend, |
375 | .resume = i8259A_resume, | 331 | .resume = i8259A_resume, |
376 | .shutdown = i8259A_shutdown, | 332 | .shutdown = i8259A_shutdown, |
@@ -391,6 +347,58 @@ static int __init i8259A_init_sysfs(void) | |||
391 | 347 | ||
392 | device_initcall(i8259A_init_sysfs); | 348 | device_initcall(i8259A_init_sysfs); |
393 | 349 | ||
350 | void init_8259A(int auto_eoi) | ||
351 | { | ||
352 | unsigned long flags; | ||
353 | |||
354 | i8259A_auto_eoi = auto_eoi; | ||
355 | |||
356 | spin_lock_irqsave(&i8259A_lock, flags); | ||
357 | |||
358 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ | ||
359 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ | ||
360 | |||
361 | /* | ||
362 | * outb_pic - this has to work on a wide range of PC hardware. | ||
363 | */ | ||
364 | outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ | ||
365 | /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ | ||
366 | outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); | ||
367 | /* 8259A-1 (the master) has a slave on IR2 */ | ||
368 | outb_pic(0x04, PIC_MASTER_IMR); | ||
369 | if (auto_eoi) /* master does Auto EOI */ | ||
370 | outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); | ||
371 | else /* master expects normal EOI */ | ||
372 | outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); | ||
373 | |||
374 | outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ | ||
375 | /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ | ||
376 | outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); | ||
377 | /* 8259A-2 is a slave on master's IR2 */ | ||
378 | outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); | ||
379 | /* (slave's support for AEOI in flat mode is to be investigated) */ | ||
380 | outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); | ||
381 | |||
382 | if (auto_eoi) | ||
383 | /* | ||
384 | * In AEOI mode we just have to mask the interrupt | ||
385 | * when acking. | ||
386 | */ | ||
387 | i8259A_chip.mask_ack = disable_8259A_irq; | ||
388 | else | ||
389 | i8259A_chip.mask_ack = mask_and_ack_8259A; | ||
390 | |||
391 | udelay(100); /* wait for 8259A to initialize */ | ||
392 | |||
393 | outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ | ||
394 | outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ | ||
395 | |||
396 | spin_unlock_irqrestore(&i8259A_lock, flags); | ||
397 | } | ||
398 | |||
399 | |||
400 | |||
401 | |||
394 | /* | 402 | /* |
395 | * IRQ2 is cascade interrupt to second interrupt controller | 403 | * IRQ2 is cascade interrupt to second interrupt controller |
396 | */ | 404 | */ |
@@ -448,7 +456,9 @@ void __init init_ISA_irqs (void) | |||
448 | } | 456 | } |
449 | } | 457 | } |
450 | 458 | ||
451 | void __init init_IRQ(void) | 459 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); |
460 | |||
461 | void __init native_init_IRQ(void) | ||
452 | { | 462 | { |
453 | int i; | 463 | int i; |
454 | 464 | ||
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index 468c9c437842..5b3ce7934363 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c | |||
@@ -15,7 +15,6 @@ static struct files_struct init_files = INIT_FILES; | |||
15 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); | 15 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); |
16 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); | 16 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); |
17 | struct mm_struct init_mm = INIT_MM(init_mm); | 17 | struct mm_struct init_mm = INIT_MM(init_mm); |
18 | EXPORT_SYMBOL(init_mm); | ||
19 | 18 | ||
20 | /* | 19 | /* |
21 | * Initial thread structure. | 20 | * Initial thread structure. |
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index c3a565bba106..4ca548632c8d 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/htirq.h> | 35 | #include <linux/htirq.h> |
36 | #include <linux/freezer.h> | 36 | #include <linux/freezer.h> |
37 | #include <linux/kthread.h> | 37 | #include <linux/kthread.h> |
38 | #include <linux/jiffies.h> /* time_after() */ | ||
38 | 39 | ||
39 | #include <asm/io.h> | 40 | #include <asm/io.h> |
40 | #include <asm/smp.h> | 41 | #include <asm/smp.h> |
@@ -48,8 +49,6 @@ | |||
48 | #include <mach_apic.h> | 49 | #include <mach_apic.h> |
49 | #include <mach_apicdef.h> | 50 | #include <mach_apicdef.h> |
50 | 51 | ||
51 | #include "io_ports.h" | ||
52 | |||
53 | int (*ioapic_renumber_irq)(int ioapic, int irq); | 52 | int (*ioapic_renumber_irq)(int ioapic, int irq); |
54 | atomic_t irq_mis_count; | 53 | atomic_t irq_mis_count; |
55 | 54 | ||
@@ -351,7 +350,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) | |||
351 | # include <asm/processor.h> /* kernel_thread() */ | 350 | # include <asm/processor.h> /* kernel_thread() */ |
352 | # include <linux/kernel_stat.h> /* kstat */ | 351 | # include <linux/kernel_stat.h> /* kstat */ |
353 | # include <linux/slab.h> /* kmalloc() */ | 352 | # include <linux/slab.h> /* kmalloc() */ |
354 | # include <linux/timer.h> /* time_after() */ | 353 | # include <linux/timer.h> |
355 | 354 | ||
356 | #define IRQBALANCE_CHECK_ARCH -999 | 355 | #define IRQBALANCE_CHECK_ARCH -999 |
357 | #define MAX_BALANCED_IRQ_INTERVAL (5*HZ) | 356 | #define MAX_BALANCED_IRQ_INTERVAL (5*HZ) |
@@ -727,7 +726,7 @@ late_initcall(balanced_irq_init); | |||
727 | #endif /* CONFIG_SMP */ | 726 | #endif /* CONFIG_SMP */ |
728 | 727 | ||
729 | #ifndef CONFIG_SMP | 728 | #ifndef CONFIG_SMP |
730 | void fastcall send_IPI_self(int vector) | 729 | void send_IPI_self(int vector) |
731 | { | 730 | { |
732 | unsigned int cfg; | 731 | unsigned int cfg; |
733 | 732 | ||
@@ -1900,7 +1899,7 @@ static int __init timer_irq_works(void) | |||
1900 | * might have cached one ExtINT interrupt. Finally, at | 1899 | * might have cached one ExtINT interrupt. Finally, at |
1901 | * least one tick may be lost due to delays. | 1900 | * least one tick may be lost due to delays. |
1902 | */ | 1901 | */ |
1903 | if (jiffies - t1 > 4) | 1902 | if (time_after(jiffies, t1 + 4)) |
1904 | return 1; | 1903 | return 1; |
1905 | 1904 | ||
1906 | return 0; | 1905 | return 0; |
@@ -2080,7 +2079,7 @@ static struct irq_chip lapic_chip __read_mostly = { | |||
2080 | .eoi = ack_apic, | 2079 | .eoi = ack_apic, |
2081 | }; | 2080 | }; |
2082 | 2081 | ||
2083 | static void setup_nmi (void) | 2082 | static void __init setup_nmi(void) |
2084 | { | 2083 | { |
2085 | /* | 2084 | /* |
2086 | * Dirty trick to enable the NMI watchdog ... | 2085 | * Dirty trick to enable the NMI watchdog ... |
@@ -2093,7 +2092,7 @@ static void setup_nmi (void) | |||
2093 | */ | 2092 | */ |
2094 | apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); | 2093 | apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); |
2095 | 2094 | ||
2096 | on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1); | 2095 | enable_NMI_through_LVT0(); |
2097 | 2096 | ||
2098 | apic_printk(APIC_VERBOSE, " done.\n"); | 2097 | apic_printk(APIC_VERBOSE, " done.\n"); |
2099 | } | 2098 | } |
@@ -2169,14 +2168,10 @@ static inline void __init check_timer(void) | |||
2169 | { | 2168 | { |
2170 | int apic1, pin1, apic2, pin2; | 2169 | int apic1, pin1, apic2, pin2; |
2171 | int vector; | 2170 | int vector; |
2172 | unsigned int ver; | ||
2173 | unsigned long flags; | 2171 | unsigned long flags; |
2174 | 2172 | ||
2175 | local_irq_save(flags); | 2173 | local_irq_save(flags); |
2176 | 2174 | ||
2177 | ver = apic_read(APIC_LVR); | ||
2178 | ver = GET_APIC_VERSION(ver); | ||
2179 | |||
2180 | /* | 2175 | /* |
2181 | * get/set the timer IRQ vector: | 2176 | * get/set the timer IRQ vector: |
2182 | */ | 2177 | */ |
@@ -2189,15 +2184,11 @@ static inline void __init check_timer(void) | |||
2189 | * mode for the 8259A whenever interrupts are routed | 2184 | * mode for the 8259A whenever interrupts are routed |
2190 | * through I/O APICs. Also IRQ0 has to be enabled in | 2185 | * through I/O APICs. Also IRQ0 has to be enabled in |
2191 | * the 8259A which implies the virtual wire has to be | 2186 | * the 8259A which implies the virtual wire has to be |
2192 | * disabled in the local APIC. Finally timer interrupts | 2187 | * disabled in the local APIC. |
2193 | * need to be acknowledged manually in the 8259A for | ||
2194 | * timer_interrupt() and for the i82489DX when using | ||
2195 | * the NMI watchdog. | ||
2196 | */ | 2188 | */ |
2197 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | 2189 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); |
2198 | init_8259A(1); | 2190 | init_8259A(1); |
2199 | timer_ack = !cpu_has_tsc; | 2191 | timer_ack = 1; |
2200 | timer_ack |= (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); | ||
2201 | if (timer_over_8254 > 0) | 2192 | if (timer_over_8254 > 0) |
2202 | enable_8259A_irq(0); | 2193 | enable_8259A_irq(0); |
2203 | 2194 | ||
@@ -2409,7 +2400,7 @@ static int ioapic_resume(struct sys_device *dev) | |||
2409 | } | 2400 | } |
2410 | 2401 | ||
2411 | static struct sysdev_class ioapic_sysdev_class = { | 2402 | static struct sysdev_class ioapic_sysdev_class = { |
2412 | set_kset_name("ioapic"), | 2403 | .name = "ioapic", |
2413 | .suspend = ioapic_suspend, | 2404 | .suspend = ioapic_suspend, |
2414 | .resume = ioapic_resume, | 2405 | .resume = ioapic_resume, |
2415 | }; | 2406 | }; |
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index cbac1670c7c3..1627c0d53e0b 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c | |||
@@ -32,9 +32,11 @@ | |||
32 | #include <linux/msi.h> | 32 | #include <linux/msi.h> |
33 | #include <linux/htirq.h> | 33 | #include <linux/htirq.h> |
34 | #include <linux/dmar.h> | 34 | #include <linux/dmar.h> |
35 | #include <linux/jiffies.h> | ||
35 | #ifdef CONFIG_ACPI | 36 | #ifdef CONFIG_ACPI |
36 | #include <acpi/acpi_bus.h> | 37 | #include <acpi/acpi_bus.h> |
37 | #endif | 38 | #endif |
39 | #include <linux/bootmem.h> | ||
38 | 40 | ||
39 | #include <asm/idle.h> | 41 | #include <asm/idle.h> |
40 | #include <asm/io.h> | 42 | #include <asm/io.h> |
@@ -1069,7 +1071,7 @@ void __apicdebuginit print_local_APIC(void * dummy) | |||
1069 | v = apic_read(APIC_LVR); | 1071 | v = apic_read(APIC_LVR); |
1070 | printk(KERN_INFO "... APIC VERSION: %08x\n", v); | 1072 | printk(KERN_INFO "... APIC VERSION: %08x\n", v); |
1071 | ver = GET_APIC_VERSION(v); | 1073 | ver = GET_APIC_VERSION(v); |
1072 | maxlvt = get_maxlvt(); | 1074 | maxlvt = lapic_get_maxlvt(); |
1073 | 1075 | ||
1074 | v = apic_read(APIC_TASKPRI); | 1076 | v = apic_read(APIC_TASKPRI); |
1075 | printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); | 1077 | printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); |
@@ -1171,7 +1173,7 @@ void __apicdebuginit print_PIC(void) | |||
1171 | 1173 | ||
1172 | #endif /* 0 */ | 1174 | #endif /* 0 */ |
1173 | 1175 | ||
1174 | static void __init enable_IO_APIC(void) | 1176 | void __init enable_IO_APIC(void) |
1175 | { | 1177 | { |
1176 | union IO_APIC_reg_01 reg_01; | 1178 | union IO_APIC_reg_01 reg_01; |
1177 | int i8259_apic, i8259_pin; | 1179 | int i8259_apic, i8259_pin; |
@@ -1298,7 +1300,7 @@ static int __init timer_irq_works(void) | |||
1298 | */ | 1300 | */ |
1299 | 1301 | ||
1300 | /* jiffies wrap? */ | 1302 | /* jiffies wrap? */ |
1301 | if (jiffies - t1 > 4) | 1303 | if (time_after(jiffies, t1 + 4)) |
1302 | return 1; | 1304 | return 1; |
1303 | return 0; | 1305 | return 0; |
1304 | } | 1306 | } |
@@ -1411,7 +1413,7 @@ static void irq_complete_move(unsigned int irq) | |||
1411 | if (likely(!cfg->move_in_progress)) | 1413 | if (likely(!cfg->move_in_progress)) |
1412 | return; | 1414 | return; |
1413 | 1415 | ||
1414 | vector = ~get_irq_regs()->orig_rax; | 1416 | vector = ~get_irq_regs()->orig_ax; |
1415 | me = smp_processor_id(); | 1417 | me = smp_processor_id(); |
1416 | if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { | 1418 | if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { |
1417 | cpumask_t cleanup_mask; | 1419 | cpumask_t cleanup_mask; |
@@ -1438,7 +1440,7 @@ static void ack_apic_level(unsigned int irq) | |||
1438 | int do_unmask_irq = 0; | 1440 | int do_unmask_irq = 0; |
1439 | 1441 | ||
1440 | irq_complete_move(irq); | 1442 | irq_complete_move(irq); |
1441 | #if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) | 1443 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
1442 | /* If we are moving the irq we need to mask it */ | 1444 | /* If we are moving the irq we need to mask it */ |
1443 | if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { | 1445 | if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { |
1444 | do_unmask_irq = 1; | 1446 | do_unmask_irq = 1; |
@@ -1565,7 +1567,7 @@ static struct hw_interrupt_type lapic_irq_type __read_mostly = { | |||
1565 | .end = end_lapic_irq, | 1567 | .end = end_lapic_irq, |
1566 | }; | 1568 | }; |
1567 | 1569 | ||
1568 | static void setup_nmi (void) | 1570 | static void __init setup_nmi(void) |
1569 | { | 1571 | { |
1570 | /* | 1572 | /* |
1571 | * Dirty trick to enable the NMI watchdog ... | 1573 | * Dirty trick to enable the NMI watchdog ... |
@@ -1578,7 +1580,7 @@ static void setup_nmi (void) | |||
1578 | */ | 1580 | */ |
1579 | printk(KERN_INFO "activating NMI Watchdog ..."); | 1581 | printk(KERN_INFO "activating NMI Watchdog ..."); |
1580 | 1582 | ||
1581 | enable_NMI_through_LVT0(NULL); | 1583 | enable_NMI_through_LVT0(); |
1582 | 1584 | ||
1583 | printk(" done.\n"); | 1585 | printk(" done.\n"); |
1584 | } | 1586 | } |
@@ -1654,7 +1656,7 @@ static inline void unlock_ExtINT_logic(void) | |||
1654 | * | 1656 | * |
1655 | * FIXME: really need to revamp this for modern platforms only. | 1657 | * FIXME: really need to revamp this for modern platforms only. |
1656 | */ | 1658 | */ |
1657 | static inline void check_timer(void) | 1659 | static inline void __init check_timer(void) |
1658 | { | 1660 | { |
1659 | struct irq_cfg *cfg = irq_cfg + 0; | 1661 | struct irq_cfg *cfg = irq_cfg + 0; |
1660 | int apic1, pin1, apic2, pin2; | 1662 | int apic1, pin1, apic2, pin2; |
@@ -1788,7 +1790,10 @@ __setup("no_timer_check", notimercheck); | |||
1788 | 1790 | ||
1789 | void __init setup_IO_APIC(void) | 1791 | void __init setup_IO_APIC(void) |
1790 | { | 1792 | { |
1791 | enable_IO_APIC(); | 1793 | |
1794 | /* | ||
1795 | * calling enable_IO_APIC() is moved to setup_local_APIC for BP | ||
1796 | */ | ||
1792 | 1797 | ||
1793 | if (acpi_ioapic) | 1798 | if (acpi_ioapic) |
1794 | io_apic_irqs = ~0; /* all IRQs go through IOAPIC */ | 1799 | io_apic_irqs = ~0; /* all IRQs go through IOAPIC */ |
@@ -1850,7 +1855,7 @@ static int ioapic_resume(struct sys_device *dev) | |||
1850 | } | 1855 | } |
1851 | 1856 | ||
1852 | static struct sysdev_class ioapic_sysdev_class = { | 1857 | static struct sysdev_class ioapic_sysdev_class = { |
1853 | set_kset_name("ioapic"), | 1858 | .name = "ioapic", |
1854 | .suspend = ioapic_suspend, | 1859 | .suspend = ioapic_suspend, |
1855 | .resume = ioapic_resume, | 1860 | .resume = ioapic_resume, |
1856 | }; | 1861 | }; |
@@ -2288,3 +2293,92 @@ void __init setup_ioapic_dest(void) | |||
2288 | } | 2293 | } |
2289 | #endif | 2294 | #endif |
2290 | 2295 | ||
2296 | #define IOAPIC_RESOURCE_NAME_SIZE 11 | ||
2297 | |||
2298 | static struct resource *ioapic_resources; | ||
2299 | |||
2300 | static struct resource * __init ioapic_setup_resources(void) | ||
2301 | { | ||
2302 | unsigned long n; | ||
2303 | struct resource *res; | ||
2304 | char *mem; | ||
2305 | int i; | ||
2306 | |||
2307 | if (nr_ioapics <= 0) | ||
2308 | return NULL; | ||
2309 | |||
2310 | n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); | ||
2311 | n *= nr_ioapics; | ||
2312 | |||
2313 | mem = alloc_bootmem(n); | ||
2314 | res = (void *)mem; | ||
2315 | |||
2316 | if (mem != NULL) { | ||
2317 | memset(mem, 0, n); | ||
2318 | mem += sizeof(struct resource) * nr_ioapics; | ||
2319 | |||
2320 | for (i = 0; i < nr_ioapics; i++) { | ||
2321 | res[i].name = mem; | ||
2322 | res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; | ||
2323 | sprintf(mem, "IOAPIC %u", i); | ||
2324 | mem += IOAPIC_RESOURCE_NAME_SIZE; | ||
2325 | } | ||
2326 | } | ||
2327 | |||
2328 | ioapic_resources = res; | ||
2329 | |||
2330 | return res; | ||
2331 | } | ||
2332 | |||
2333 | void __init ioapic_init_mappings(void) | ||
2334 | { | ||
2335 | unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; | ||
2336 | struct resource *ioapic_res; | ||
2337 | int i; | ||
2338 | |||
2339 | ioapic_res = ioapic_setup_resources(); | ||
2340 | for (i = 0; i < nr_ioapics; i++) { | ||
2341 | if (smp_found_config) { | ||
2342 | ioapic_phys = mp_ioapics[i].mpc_apicaddr; | ||
2343 | } else { | ||
2344 | ioapic_phys = (unsigned long) | ||
2345 | alloc_bootmem_pages(PAGE_SIZE); | ||
2346 | ioapic_phys = __pa(ioapic_phys); | ||
2347 | } | ||
2348 | set_fixmap_nocache(idx, ioapic_phys); | ||
2349 | apic_printk(APIC_VERBOSE, | ||
2350 | "mapped IOAPIC to %016lx (%016lx)\n", | ||
2351 | __fix_to_virt(idx), ioapic_phys); | ||
2352 | idx++; | ||
2353 | |||
2354 | if (ioapic_res != NULL) { | ||
2355 | ioapic_res->start = ioapic_phys; | ||
2356 | ioapic_res->end = ioapic_phys + (4 * 1024) - 1; | ||
2357 | ioapic_res++; | ||
2358 | } | ||
2359 | } | ||
2360 | } | ||
2361 | |||
2362 | static int __init ioapic_insert_resources(void) | ||
2363 | { | ||
2364 | int i; | ||
2365 | struct resource *r = ioapic_resources; | ||
2366 | |||
2367 | if (!r) { | ||
2368 | printk(KERN_ERR | ||
2369 | "IO APIC resources could be not be allocated.\n"); | ||
2370 | return -1; | ||
2371 | } | ||
2372 | |||
2373 | for (i = 0; i < nr_ioapics; i++) { | ||
2374 | insert_resource(&iomem_resource, r); | ||
2375 | r++; | ||
2376 | } | ||
2377 | |||
2378 | return 0; | ||
2379 | } | ||
2380 | |||
2381 | /* Insert the IO APIC resources after PCI initialization has occured to handle | ||
2382 | * IO APICS that are mapped in on a BAR in PCI space. */ | ||
2383 | late_initcall(ioapic_insert_resources); | ||
2384 | |||
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c new file mode 100644 index 000000000000..bd49321034db --- /dev/null +++ b/arch/x86/kernel/io_delay.c | |||
@@ -0,0 +1,114 @@ | |||
1 | /* | ||
2 | * I/O delay strategies for inb_p/outb_p | ||
3 | * | ||
4 | * Allow for a DMI based override of port 0x80, needed for certain HP laptops | ||
5 | * and possibly other systems. Also allow for the gradual elimination of | ||
6 | * outb_p/inb_p API uses. | ||
7 | */ | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/delay.h> | ||
12 | #include <linux/dmi.h> | ||
13 | #include <asm/io.h> | ||
14 | |||
15 | int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE; | ||
16 | EXPORT_SYMBOL_GPL(io_delay_type); | ||
17 | |||
18 | static int __initdata io_delay_override; | ||
19 | |||
20 | /* | ||
21 | * Paravirt wants native_io_delay to be a constant. | ||
22 | */ | ||
23 | void native_io_delay(void) | ||
24 | { | ||
25 | switch (io_delay_type) { | ||
26 | default: | ||
27 | case CONFIG_IO_DELAY_TYPE_0X80: | ||
28 | asm volatile ("outb %al, $0x80"); | ||
29 | break; | ||
30 | case CONFIG_IO_DELAY_TYPE_0XED: | ||
31 | asm volatile ("outb %al, $0xed"); | ||
32 | break; | ||
33 | case CONFIG_IO_DELAY_TYPE_UDELAY: | ||
34 | /* | ||
35 | * 2 usecs is an upper-bound for the outb delay but | ||
36 | * note that udelay doesn't have the bus-level | ||
37 | * side-effects that outb does, nor does udelay() have | ||
38 | * precise timings during very early bootup (the delays | ||
39 | * are shorter until calibrated): | ||
40 | */ | ||
41 | udelay(2); | ||
42 | case CONFIG_IO_DELAY_TYPE_NONE: | ||
43 | break; | ||
44 | } | ||
45 | } | ||
46 | EXPORT_SYMBOL(native_io_delay); | ||
47 | |||
48 | static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id) | ||
49 | { | ||
50 | if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) { | ||
51 | printk(KERN_NOTICE "%s: using 0xed I/O delay port\n", | ||
52 | id->ident); | ||
53 | io_delay_type = CONFIG_IO_DELAY_TYPE_0XED; | ||
54 | } | ||
55 | |||
56 | return 0; | ||
57 | } | ||
58 | |||
59 | /* | ||
60 | * Quirk table for systems that misbehave (lock up, etc.) if port | ||
61 | * 0x80 is used: | ||
62 | */ | ||
63 | static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = { | ||
64 | { | ||
65 | .callback = dmi_io_delay_0xed_port, | ||
66 | .ident = "Compaq Presario V6000", | ||
67 | .matches = { | ||
68 | DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), | ||
69 | DMI_MATCH(DMI_BOARD_NAME, "30B7") | ||
70 | } | ||
71 | }, | ||
72 | { | ||
73 | .callback = dmi_io_delay_0xed_port, | ||
74 | .ident = "HP Pavilion dv9000z", | ||
75 | .matches = { | ||
76 | DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), | ||
77 | DMI_MATCH(DMI_BOARD_NAME, "30B9") | ||
78 | } | ||
79 | }, | ||
80 | { | ||
81 | .callback = dmi_io_delay_0xed_port, | ||
82 | .ident = "HP Pavilion tx1000", | ||
83 | .matches = { | ||
84 | DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), | ||
85 | DMI_MATCH(DMI_BOARD_NAME, "30BF") | ||
86 | } | ||
87 | }, | ||
88 | { } | ||
89 | }; | ||
90 | |||
91 | void __init io_delay_init(void) | ||
92 | { | ||
93 | if (!io_delay_override) | ||
94 | dmi_check_system(io_delay_0xed_port_dmi_table); | ||
95 | } | ||
96 | |||
97 | static int __init io_delay_param(char *s) | ||
98 | { | ||
99 | if (!strcmp(s, "0x80")) | ||
100 | io_delay_type = CONFIG_IO_DELAY_TYPE_0X80; | ||
101 | else if (!strcmp(s, "0xed")) | ||
102 | io_delay_type = CONFIG_IO_DELAY_TYPE_0XED; | ||
103 | else if (!strcmp(s, "udelay")) | ||
104 | io_delay_type = CONFIG_IO_DELAY_TYPE_UDELAY; | ||
105 | else if (!strcmp(s, "none")) | ||
106 | io_delay_type = CONFIG_IO_DELAY_TYPE_NONE; | ||
107 | else | ||
108 | return -EINVAL; | ||
109 | |||
110 | io_delay_override = 1; | ||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | early_param("io_delay", io_delay_param); | ||
diff --git a/arch/x86/kernel/ioport_32.c b/arch/x86/kernel/ioport.c index 4ed48dc8df1e..50e5e4a31c85 100644 --- a/arch/x86/kernel/ioport_32.c +++ b/arch/x86/kernel/ioport.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * This contains the io-permission bitmap code - written by obz, with changes | 2 | * This contains the io-permission bitmap code - written by obz, with changes |
3 | * by Linus. | 3 | * by Linus. 32/64 bits code unification by Miguel Botón. |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
@@ -16,49 +16,27 @@ | |||
16 | #include <linux/syscalls.h> | 16 | #include <linux/syscalls.h> |
17 | 17 | ||
18 | /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ | 18 | /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ |
19 | static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) | 19 | static void set_bitmap(unsigned long *bitmap, unsigned int base, |
20 | unsigned int extent, int new_value) | ||
20 | { | 21 | { |
21 | unsigned long mask; | 22 | unsigned int i; |
22 | unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG); | ||
23 | unsigned int low_index = base & (BITS_PER_LONG-1); | ||
24 | int length = low_index + extent; | ||
25 | |||
26 | if (low_index != 0) { | ||
27 | mask = (~0UL << low_index); | ||
28 | if (length < BITS_PER_LONG) | ||
29 | mask &= ~(~0UL << length); | ||
30 | if (new_value) | ||
31 | *bitmap_base++ |= mask; | ||
32 | else | ||
33 | *bitmap_base++ &= ~mask; | ||
34 | length -= BITS_PER_LONG; | ||
35 | } | ||
36 | |||
37 | mask = (new_value ? ~0UL : 0UL); | ||
38 | while (length >= BITS_PER_LONG) { | ||
39 | *bitmap_base++ = mask; | ||
40 | length -= BITS_PER_LONG; | ||
41 | } | ||
42 | 23 | ||
43 | if (length > 0) { | 24 | for (i = base; i < base + extent; i++) { |
44 | mask = ~(~0UL << length); | ||
45 | if (new_value) | 25 | if (new_value) |
46 | *bitmap_base++ |= mask; | 26 | __set_bit(i, bitmap); |
47 | else | 27 | else |
48 | *bitmap_base++ &= ~mask; | 28 | __clear_bit(i, bitmap); |
49 | } | 29 | } |
50 | } | 30 | } |
51 | 31 | ||
52 | |||
53 | /* | 32 | /* |
54 | * this changes the io permissions bitmap in the current task. | 33 | * this changes the io permissions bitmap in the current task. |
55 | */ | 34 | */ |
56 | asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | 35 | asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) |
57 | { | 36 | { |
58 | unsigned long i, max_long, bytes, bytes_updated; | ||
59 | struct thread_struct * t = ¤t->thread; | 37 | struct thread_struct * t = ¤t->thread; |
60 | struct tss_struct * tss; | 38 | struct tss_struct * tss; |
61 | unsigned long *bitmap; | 39 | unsigned int i, max_long, bytes, bytes_updated; |
62 | 40 | ||
63 | if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) | 41 | if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) |
64 | return -EINVAL; | 42 | return -EINVAL; |
@@ -71,7 +49,8 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | |||
71 | * this is why we delay this operation until now: | 49 | * this is why we delay this operation until now: |
72 | */ | 50 | */ |
73 | if (!t->io_bitmap_ptr) { | 51 | if (!t->io_bitmap_ptr) { |
74 | bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); | 52 | unsigned long *bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); |
53 | |||
75 | if (!bitmap) | 54 | if (!bitmap) |
76 | return -ENOMEM; | 55 | return -ENOMEM; |
77 | 56 | ||
@@ -100,11 +79,12 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | |||
100 | if (t->io_bitmap_ptr[i] != ~0UL) | 79 | if (t->io_bitmap_ptr[i] != ~0UL) |
101 | max_long = i; | 80 | max_long = i; |
102 | 81 | ||
103 | bytes = (max_long + 1) * sizeof(long); | 82 | bytes = (max_long + 1) * sizeof(unsigned long); |
104 | bytes_updated = max(bytes, t->io_bitmap_max); | 83 | bytes_updated = max(bytes, t->io_bitmap_max); |
105 | 84 | ||
106 | t->io_bitmap_max = bytes; | 85 | t->io_bitmap_max = bytes; |
107 | 86 | ||
87 | #ifdef CONFIG_X86_32 | ||
108 | /* | 88 | /* |
109 | * Sets the lazy trigger so that the next I/O operation will | 89 | * Sets the lazy trigger so that the next I/O operation will |
110 | * reload the correct bitmap. | 90 | * reload the correct bitmap. |
@@ -113,6 +93,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | |||
113 | */ | 93 | */ |
114 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; | 94 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; |
115 | tss->io_bitmap_owner = NULL; | 95 | tss->io_bitmap_owner = NULL; |
96 | #else | ||
97 | /* Update the TSS: */ | ||
98 | memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); | ||
99 | #endif | ||
116 | 100 | ||
117 | put_cpu(); | 101 | put_cpu(); |
118 | 102 | ||
@@ -124,18 +108,14 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | |||
124 | * beyond the 0x3ff range: to get the full 65536 ports bitmapped | 108 | * beyond the 0x3ff range: to get the full 65536 ports bitmapped |
125 | * you'd need 8kB of bitmaps/process, which is a bit excessive. | 109 | * you'd need 8kB of bitmaps/process, which is a bit excessive. |
126 | * | 110 | * |
127 | * Here we just change the eflags value on the stack: we allow | 111 | * Here we just change the flags value on the stack: we allow |
128 | * only the super-user to do it. This depends on the stack-layout | 112 | * only the super-user to do it. This depends on the stack-layout |
129 | * on system-call entry - see also fork() and the signal handling | 113 | * on system-call entry - see also fork() and the signal handling |
130 | * code. | 114 | * code. |
131 | */ | 115 | */ |
132 | 116 | static int do_iopl(unsigned int level, struct pt_regs *regs) | |
133 | asmlinkage long sys_iopl(unsigned long unused) | ||
134 | { | 117 | { |
135 | volatile struct pt_regs * regs = (struct pt_regs *) &unused; | 118 | unsigned int old = (regs->flags >> 12) & 3; |
136 | unsigned int level = regs->ebx; | ||
137 | unsigned int old = (regs->eflags >> 12) & 3; | ||
138 | struct thread_struct *t = ¤t->thread; | ||
139 | 119 | ||
140 | if (level > 3) | 120 | if (level > 3) |
141 | return -EINVAL; | 121 | return -EINVAL; |
@@ -144,8 +124,31 @@ asmlinkage long sys_iopl(unsigned long unused) | |||
144 | if (!capable(CAP_SYS_RAWIO)) | 124 | if (!capable(CAP_SYS_RAWIO)) |
145 | return -EPERM; | 125 | return -EPERM; |
146 | } | 126 | } |
127 | regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12); | ||
128 | |||
129 | return 0; | ||
130 | } | ||
131 | |||
132 | #ifdef CONFIG_X86_32 | ||
133 | asmlinkage long sys_iopl(unsigned long regsp) | ||
134 | { | ||
135 | struct pt_regs *regs = (struct pt_regs *)®sp; | ||
136 | unsigned int level = regs->bx; | ||
137 | struct thread_struct *t = ¤t->thread; | ||
138 | int rc; | ||
139 | |||
140 | rc = do_iopl(level, regs); | ||
141 | if (rc < 0) | ||
142 | goto out; | ||
143 | |||
147 | t->iopl = level << 12; | 144 | t->iopl = level << 12; |
148 | regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl; | ||
149 | set_iopl_mask(t->iopl); | 145 | set_iopl_mask(t->iopl); |
150 | return 0; | 146 | out: |
147 | return rc; | ||
148 | } | ||
149 | #else | ||
150 | asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs) | ||
151 | { | ||
152 | return do_iopl(level, regs); | ||
151 | } | 153 | } |
154 | #endif | ||
diff --git a/arch/x86/kernel/ioport_64.c b/arch/x86/kernel/ioport_64.c deleted file mode 100644 index 5f62fad64dab..000000000000 --- a/arch/x86/kernel/ioport_64.c +++ /dev/null | |||
@@ -1,117 +0,0 @@ | |||
1 | /* | ||
2 | * This contains the io-permission bitmap code - written by obz, with changes | ||
3 | * by Linus. | ||
4 | */ | ||
5 | |||
6 | #include <linux/sched.h> | ||
7 | #include <linux/kernel.h> | ||
8 | #include <linux/capability.h> | ||
9 | #include <linux/errno.h> | ||
10 | #include <linux/types.h> | ||
11 | #include <linux/ioport.h> | ||
12 | #include <linux/smp.h> | ||
13 | #include <linux/stddef.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include <linux/thread_info.h> | ||
16 | #include <linux/syscalls.h> | ||
17 | |||
18 | /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ | ||
19 | static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) | ||
20 | { | ||
21 | int i; | ||
22 | if (new_value) | ||
23 | for (i = base; i < base + extent; i++) | ||
24 | __set_bit(i, bitmap); | ||
25 | else | ||
26 | for (i = base; i < base + extent; i++) | ||
27 | clear_bit(i, bitmap); | ||
28 | } | ||
29 | |||
30 | /* | ||
31 | * this changes the io permissions bitmap in the current task. | ||
32 | */ | ||
33 | asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | ||
34 | { | ||
35 | unsigned int i, max_long, bytes, bytes_updated; | ||
36 | struct thread_struct * t = ¤t->thread; | ||
37 | struct tss_struct * tss; | ||
38 | unsigned long *bitmap; | ||
39 | |||
40 | if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) | ||
41 | return -EINVAL; | ||
42 | if (turn_on && !capable(CAP_SYS_RAWIO)) | ||
43 | return -EPERM; | ||
44 | |||
45 | /* | ||
46 | * If it's the first ioperm() call in this thread's lifetime, set the | ||
47 | * IO bitmap up. ioperm() is much less timing critical than clone(), | ||
48 | * this is why we delay this operation until now: | ||
49 | */ | ||
50 | if (!t->io_bitmap_ptr) { | ||
51 | bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); | ||
52 | if (!bitmap) | ||
53 | return -ENOMEM; | ||
54 | |||
55 | memset(bitmap, 0xff, IO_BITMAP_BYTES); | ||
56 | t->io_bitmap_ptr = bitmap; | ||
57 | set_thread_flag(TIF_IO_BITMAP); | ||
58 | } | ||
59 | |||
60 | /* | ||
61 | * do it in the per-thread copy and in the TSS ... | ||
62 | * | ||
63 | * Disable preemption via get_cpu() - we must not switch away | ||
64 | * because the ->io_bitmap_max value must match the bitmap | ||
65 | * contents: | ||
66 | */ | ||
67 | tss = &per_cpu(init_tss, get_cpu()); | ||
68 | |||
69 | set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); | ||
70 | |||
71 | /* | ||
72 | * Search for a (possibly new) maximum. This is simple and stupid, | ||
73 | * to keep it obviously correct: | ||
74 | */ | ||
75 | max_long = 0; | ||
76 | for (i = 0; i < IO_BITMAP_LONGS; i++) | ||
77 | if (t->io_bitmap_ptr[i] != ~0UL) | ||
78 | max_long = i; | ||
79 | |||
80 | bytes = (max_long + 1) * sizeof(long); | ||
81 | bytes_updated = max(bytes, t->io_bitmap_max); | ||
82 | |||
83 | t->io_bitmap_max = bytes; | ||
84 | |||
85 | /* Update the TSS: */ | ||
86 | memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); | ||
87 | |||
88 | put_cpu(); | ||
89 | |||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | /* | ||
94 | * sys_iopl has to be used when you want to access the IO ports | ||
95 | * beyond the 0x3ff range: to get the full 65536 ports bitmapped | ||
96 | * you'd need 8kB of bitmaps/process, which is a bit excessive. | ||
97 | * | ||
98 | * Here we just change the eflags value on the stack: we allow | ||
99 | * only the super-user to do it. This depends on the stack-layout | ||
100 | * on system-call entry - see also fork() and the signal handling | ||
101 | * code. | ||
102 | */ | ||
103 | |||
104 | asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs) | ||
105 | { | ||
106 | unsigned int old = (regs->eflags >> 12) & 3; | ||
107 | |||
108 | if (level > 3) | ||
109 | return -EINVAL; | ||
110 | /* Trying to gain more privileges? */ | ||
111 | if (level > old) { | ||
112 | if (!capable(CAP_SYS_RAWIO)) | ||
113 | return -EPERM; | ||
114 | } | ||
115 | regs->eflags = (regs->eflags &~ X86_EFLAGS_IOPL) | (level << 12); | ||
116 | return 0; | ||
117 | } | ||
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index d3fde94f7345..cef054b09d27 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -66,11 +66,11 @@ static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; | |||
66 | * SMP cross-CPU interrupts have their own specific | 66 | * SMP cross-CPU interrupts have their own specific |
67 | * handlers). | 67 | * handlers). |
68 | */ | 68 | */ |
69 | fastcall unsigned int do_IRQ(struct pt_regs *regs) | 69 | unsigned int do_IRQ(struct pt_regs *regs) |
70 | { | 70 | { |
71 | struct pt_regs *old_regs; | 71 | struct pt_regs *old_regs; |
72 | /* high bit used in ret_from_ code */ | 72 | /* high bit used in ret_from_ code */ |
73 | int irq = ~regs->orig_eax; | 73 | int irq = ~regs->orig_ax; |
74 | struct irq_desc *desc = irq_desc + irq; | 74 | struct irq_desc *desc = irq_desc + irq; |
75 | #ifdef CONFIG_4KSTACKS | 75 | #ifdef CONFIG_4KSTACKS |
76 | union irq_ctx *curctx, *irqctx; | 76 | union irq_ctx *curctx, *irqctx; |
@@ -88,13 +88,13 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs) | |||
88 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | 88 | #ifdef CONFIG_DEBUG_STACKOVERFLOW |
89 | /* Debugging check for stack overflow: is there less than 1KB free? */ | 89 | /* Debugging check for stack overflow: is there less than 1KB free? */ |
90 | { | 90 | { |
91 | long esp; | 91 | long sp; |
92 | 92 | ||
93 | __asm__ __volatile__("andl %%esp,%0" : | 93 | __asm__ __volatile__("andl %%esp,%0" : |
94 | "=r" (esp) : "0" (THREAD_SIZE - 1)); | 94 | "=r" (sp) : "0" (THREAD_SIZE - 1)); |
95 | if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { | 95 | if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) { |
96 | printk("do_IRQ: stack overflow: %ld\n", | 96 | printk("do_IRQ: stack overflow: %ld\n", |
97 | esp - sizeof(struct thread_info)); | 97 | sp - sizeof(struct thread_info)); |
98 | dump_stack(); | 98 | dump_stack(); |
99 | } | 99 | } |
100 | } | 100 | } |
@@ -112,7 +112,7 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs) | |||
112 | * current stack (which is the irq stack already after all) | 112 | * current stack (which is the irq stack already after all) |
113 | */ | 113 | */ |
114 | if (curctx != irqctx) { | 114 | if (curctx != irqctx) { |
115 | int arg1, arg2, ebx; | 115 | int arg1, arg2, bx; |
116 | 116 | ||
117 | /* build the stack frame on the IRQ stack */ | 117 | /* build the stack frame on the IRQ stack */ |
118 | isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); | 118 | isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); |
@@ -128,10 +128,10 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs) | |||
128 | (curctx->tinfo.preempt_count & SOFTIRQ_MASK); | 128 | (curctx->tinfo.preempt_count & SOFTIRQ_MASK); |
129 | 129 | ||
130 | asm volatile( | 130 | asm volatile( |
131 | " xchgl %%ebx,%%esp \n" | 131 | " xchgl %%ebx,%%esp \n" |
132 | " call *%%edi \n" | 132 | " call *%%edi \n" |
133 | " movl %%ebx,%%esp \n" | 133 | " movl %%ebx,%%esp \n" |
134 | : "=a" (arg1), "=d" (arg2), "=b" (ebx) | 134 | : "=a" (arg1), "=d" (arg2), "=b" (bx) |
135 | : "0" (irq), "1" (desc), "2" (isp), | 135 | : "0" (irq), "1" (desc), "2" (isp), |
136 | "D" (desc->handle_irq) | 136 | "D" (desc->handle_irq) |
137 | : "memory", "cc" | 137 | : "memory", "cc" |
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 6b5c730d67b9..3aac15466a91 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
@@ -20,6 +20,26 @@ | |||
20 | 20 | ||
21 | atomic_t irq_err_count; | 21 | atomic_t irq_err_count; |
22 | 22 | ||
23 | /* | ||
24 | * 'what should we do if we get a hw irq event on an illegal vector'. | ||
25 | * each architecture has to answer this themselves. | ||
26 | */ | ||
27 | void ack_bad_irq(unsigned int irq) | ||
28 | { | ||
29 | printk(KERN_WARNING "unexpected IRQ trap at vector %02x\n", irq); | ||
30 | /* | ||
31 | * Currently unexpected vectors happen only on SMP and APIC. | ||
32 | * We _must_ ack these because every local APIC has only N | ||
33 | * irq slots per priority level, and a 'hanging, unacked' IRQ | ||
34 | * holds up an irq slot - in excessive cases (when multiple | ||
35 | * unexpected vectors occur) that might lock up the APIC | ||
36 | * completely. | ||
37 | * But don't ack when the APIC is disabled. -AK | ||
38 | */ | ||
39 | if (!disable_apic) | ||
40 | ack_APIC_irq(); | ||
41 | } | ||
42 | |||
23 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | 43 | #ifdef CONFIG_DEBUG_STACKOVERFLOW |
24 | /* | 44 | /* |
25 | * Probabilistic stack overflow check: | 45 | * Probabilistic stack overflow check: |
@@ -33,11 +53,11 @@ static inline void stack_overflow_check(struct pt_regs *regs) | |||
33 | u64 curbase = (u64)task_stack_page(current); | 53 | u64 curbase = (u64)task_stack_page(current); |
34 | static unsigned long warned = -60*HZ; | 54 | static unsigned long warned = -60*HZ; |
35 | 55 | ||
36 | if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE && | 56 | if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE && |
37 | regs->rsp < curbase + sizeof(struct thread_info) + 128 && | 57 | regs->sp < curbase + sizeof(struct thread_info) + 128 && |
38 | time_after(jiffies, warned + 60*HZ)) { | 58 | time_after(jiffies, warned + 60*HZ)) { |
39 | printk("do_IRQ: %s near stack overflow (cur:%Lx,rsp:%lx)\n", | 59 | printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", |
40 | current->comm, curbase, regs->rsp); | 60 | current->comm, curbase, regs->sp); |
41 | show_stack(NULL,NULL); | 61 | show_stack(NULL,NULL); |
42 | warned = jiffies; | 62 | warned = jiffies; |
43 | } | 63 | } |
@@ -142,7 +162,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) | |||
142 | struct pt_regs *old_regs = set_irq_regs(regs); | 162 | struct pt_regs *old_regs = set_irq_regs(regs); |
143 | 163 | ||
144 | /* high bit used in ret_from_ code */ | 164 | /* high bit used in ret_from_ code */ |
145 | unsigned vector = ~regs->orig_rax; | 165 | unsigned vector = ~regs->orig_ax; |
146 | unsigned irq; | 166 | unsigned irq; |
147 | 167 | ||
148 | exit_idle(); | 168 | exit_idle(); |
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c new file mode 100644 index 000000000000..73354302fda7 --- /dev/null +++ b/arch/x86/kernel/kdebugfs.c | |||
@@ -0,0 +1,65 @@ | |||
1 | /* | ||
2 | * Architecture specific debugfs files | ||
3 | * | ||
4 | * Copyright (C) 2007, Intel Corp. | ||
5 | * Huang Ying <ying.huang@intel.com> | ||
6 | * | ||
7 | * This file is released under the GPLv2. | ||
8 | */ | ||
9 | |||
10 | #include <linux/debugfs.h> | ||
11 | #include <linux/stat.h> | ||
12 | #include <linux/init.h> | ||
13 | |||
14 | #include <asm/setup.h> | ||
15 | |||
16 | #ifdef CONFIG_DEBUG_BOOT_PARAMS | ||
17 | static struct debugfs_blob_wrapper boot_params_blob = { | ||
18 | .data = &boot_params, | ||
19 | .size = sizeof(boot_params), | ||
20 | }; | ||
21 | |||
22 | static int __init boot_params_kdebugfs_init(void) | ||
23 | { | ||
24 | int error; | ||
25 | struct dentry *dbp, *version, *data; | ||
26 | |||
27 | dbp = debugfs_create_dir("boot_params", NULL); | ||
28 | if (!dbp) { | ||
29 | error = -ENOMEM; | ||
30 | goto err_return; | ||
31 | } | ||
32 | version = debugfs_create_x16("version", S_IRUGO, dbp, | ||
33 | &boot_params.hdr.version); | ||
34 | if (!version) { | ||
35 | error = -ENOMEM; | ||
36 | goto err_dir; | ||
37 | } | ||
38 | data = debugfs_create_blob("data", S_IRUGO, dbp, | ||
39 | &boot_params_blob); | ||
40 | if (!data) { | ||
41 | error = -ENOMEM; | ||
42 | goto err_version; | ||
43 | } | ||
44 | return 0; | ||
45 | err_version: | ||
46 | debugfs_remove(version); | ||
47 | err_dir: | ||
48 | debugfs_remove(dbp); | ||
49 | err_return: | ||
50 | return error; | ||
51 | } | ||
52 | #endif | ||
53 | |||
54 | static int __init arch_kdebugfs_init(void) | ||
55 | { | ||
56 | int error = 0; | ||
57 | |||
58 | #ifdef CONFIG_DEBUG_BOOT_PARAMS | ||
59 | error = boot_params_kdebugfs_init(); | ||
60 | #endif | ||
61 | |||
62 | return error; | ||
63 | } | ||
64 | |||
65 | arch_initcall(arch_kdebugfs_init); | ||
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c new file mode 100644 index 000000000000..a99e764fd66a --- /dev/null +++ b/arch/x86/kernel/kprobes.c | |||
@@ -0,0 +1,1066 @@ | |||
1 | /* | ||
2 | * Kernel Probes (KProbes) | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2002, 2004 | ||
19 | * | ||
20 | * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel | ||
21 | * Probes initial implementation ( includes contributions from | ||
22 | * Rusty Russell). | ||
23 | * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes | ||
24 | * interface to access function arguments. | ||
25 | * 2004-Oct Jim Keniston <jkenisto@us.ibm.com> and Prasanna S Panchamukhi | ||
26 | * <prasanna@in.ibm.com> adapted for x86_64 from i386. | ||
27 | * 2005-Mar Roland McGrath <roland@redhat.com> | ||
28 | * Fixed to handle %rip-relative addressing mode correctly. | ||
29 | * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston | ||
30 | * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi | ||
31 | * <prasanna@in.ibm.com> added function-return probes. | ||
32 | * 2005-May Rusty Lynch <rusty.lynch@intel.com> | ||
33 | * Added function return probes functionality | ||
34 | * 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added | ||
35 | * kprobe-booster and kretprobe-booster for i386. | ||
36 | * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster | ||
37 | * and kretprobe-booster for x86-64 | ||
38 | * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven | ||
39 | * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com> | ||
40 | * unified x86 kprobes code. | ||
41 | */ | ||
42 | |||
43 | #include <linux/kprobes.h> | ||
44 | #include <linux/ptrace.h> | ||
45 | #include <linux/string.h> | ||
46 | #include <linux/slab.h> | ||
47 | #include <linux/hardirq.h> | ||
48 | #include <linux/preempt.h> | ||
49 | #include <linux/module.h> | ||
50 | #include <linux/kdebug.h> | ||
51 | |||
52 | #include <asm/cacheflush.h> | ||
53 | #include <asm/desc.h> | ||
54 | #include <asm/pgtable.h> | ||
55 | #include <asm/uaccess.h> | ||
56 | #include <asm/alternative.h> | ||
57 | |||
58 | void jprobe_return_end(void); | ||
59 | |||
60 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; | ||
61 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); | ||
62 | |||
63 | #ifdef CONFIG_X86_64 | ||
64 | #define stack_addr(regs) ((unsigned long *)regs->sp) | ||
65 | #else | ||
66 | /* | ||
67 | * "®s->sp" looks wrong, but it's correct for x86_32. x86_32 CPUs | ||
68 | * don't save the ss and esp registers if the CPU is already in kernel | ||
69 | * mode when it traps. So for kprobes, regs->sp and regs->ss are not | ||
70 | * the [nonexistent] saved stack pointer and ss register, but rather | ||
71 | * the top 8 bytes of the pre-int3 stack. So ®s->sp happens to | ||
72 | * point to the top of the pre-int3 stack. | ||
73 | */ | ||
74 | #define stack_addr(regs) ((unsigned long *)®s->sp) | ||
75 | #endif | ||
76 | |||
77 | #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ | ||
78 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ | ||
79 | (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ | ||
80 | (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ | ||
81 | (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ | ||
82 | << (row % 32)) | ||
83 | /* | ||
84 | * Undefined/reserved opcodes, conditional jump, Opcode Extension | ||
85 | * Groups, and some special opcodes can not boost. | ||
86 | */ | ||
87 | static const u32 twobyte_is_boostable[256 / 32] = { | ||
88 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
89 | /* ---------------------------------------------- */ | ||
90 | W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */ | ||
91 | W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 10 */ | ||
92 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */ | ||
93 | W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ | ||
94 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ | ||
95 | W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */ | ||
96 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1) | /* 60 */ | ||
97 | W(0x70, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ | ||
98 | W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 80 */ | ||
99 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | ||
100 | W(0xa0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* a0 */ | ||
101 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) , /* b0 */ | ||
102 | W(0xc0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ | ||
103 | W(0xd0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) , /* d0 */ | ||
104 | W(0xe0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* e0 */ | ||
105 | W(0xf0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0) /* f0 */ | ||
106 | /* ----------------------------------------------- */ | ||
107 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
108 | }; | ||
109 | static const u32 onebyte_has_modrm[256 / 32] = { | ||
110 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
111 | /* ----------------------------------------------- */ | ||
112 | W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */ | ||
113 | W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */ | ||
114 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */ | ||
115 | W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */ | ||
116 | W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ | ||
117 | W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */ | ||
118 | W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */ | ||
119 | W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */ | ||
120 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
121 | W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */ | ||
122 | W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */ | ||
123 | W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */ | ||
124 | W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */ | ||
125 | W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
126 | W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */ | ||
127 | W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */ | ||
128 | /* ----------------------------------------------- */ | ||
129 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
130 | }; | ||
131 | static const u32 twobyte_has_modrm[256 / 32] = { | ||
132 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
133 | /* ----------------------------------------------- */ | ||
134 | W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */ | ||
135 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */ | ||
136 | W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */ | ||
137 | W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */ | ||
138 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */ | ||
139 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */ | ||
140 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */ | ||
141 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */ | ||
142 | W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */ | ||
143 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */ | ||
144 | W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */ | ||
145 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */ | ||
146 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */ | ||
147 | W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */ | ||
148 | W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */ | ||
149 | W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */ | ||
150 | /* ----------------------------------------------- */ | ||
151 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
152 | }; | ||
153 | #undef W | ||
154 | |||
155 | struct kretprobe_blackpoint kretprobe_blacklist[] = { | ||
156 | {"__switch_to", }, /* This function switches only current task, but | ||
157 | doesn't switch kernel stack.*/ | ||
158 | {NULL, NULL} /* Terminator */ | ||
159 | }; | ||
160 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); | ||
161 | |||
162 | /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ | ||
163 | static void __kprobes set_jmp_op(void *from, void *to) | ||
164 | { | ||
165 | struct __arch_jmp_op { | ||
166 | char op; | ||
167 | s32 raddr; | ||
168 | } __attribute__((packed)) * jop; | ||
169 | jop = (struct __arch_jmp_op *)from; | ||
170 | jop->raddr = (s32)((long)(to) - ((long)(from) + 5)); | ||
171 | jop->op = RELATIVEJUMP_INSTRUCTION; | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * Check for the REX prefix which can only exist on X86_64 | ||
176 | * X86_32 always returns 0 | ||
177 | */ | ||
178 | static int __kprobes is_REX_prefix(kprobe_opcode_t *insn) | ||
179 | { | ||
180 | #ifdef CONFIG_X86_64 | ||
181 | if ((*insn & 0xf0) == 0x40) | ||
182 | return 1; | ||
183 | #endif | ||
184 | return 0; | ||
185 | } | ||
186 | |||
187 | /* | ||
188 | * Returns non-zero if opcode is boostable. | ||
189 | * RIP relative instructions are adjusted at copying time in 64 bits mode | ||
190 | */ | ||
191 | static int __kprobes can_boost(kprobe_opcode_t *opcodes) | ||
192 | { | ||
193 | kprobe_opcode_t opcode; | ||
194 | kprobe_opcode_t *orig_opcodes = opcodes; | ||
195 | |||
196 | retry: | ||
197 | if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) | ||
198 | return 0; | ||
199 | opcode = *(opcodes++); | ||
200 | |||
201 | /* 2nd-byte opcode */ | ||
202 | if (opcode == 0x0f) { | ||
203 | if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) | ||
204 | return 0; | ||
205 | return test_bit(*opcodes, | ||
206 | (unsigned long *)twobyte_is_boostable); | ||
207 | } | ||
208 | |||
209 | switch (opcode & 0xf0) { | ||
210 | #ifdef CONFIG_X86_64 | ||
211 | case 0x40: | ||
212 | goto retry; /* REX prefix is boostable */ | ||
213 | #endif | ||
214 | case 0x60: | ||
215 | if (0x63 < opcode && opcode < 0x67) | ||
216 | goto retry; /* prefixes */ | ||
217 | /* can't boost Address-size override and bound */ | ||
218 | return (opcode != 0x62 && opcode != 0x67); | ||
219 | case 0x70: | ||
220 | return 0; /* can't boost conditional jump */ | ||
221 | case 0xc0: | ||
222 | /* can't boost software-interruptions */ | ||
223 | return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf; | ||
224 | case 0xd0: | ||
225 | /* can boost AA* and XLAT */ | ||
226 | return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); | ||
227 | case 0xe0: | ||
228 | /* can boost in/out and absolute jmps */ | ||
229 | return ((opcode & 0x04) || opcode == 0xea); | ||
230 | case 0xf0: | ||
231 | if ((opcode & 0x0c) == 0 && opcode != 0xf1) | ||
232 | goto retry; /* lock/rep(ne) prefix */ | ||
233 | /* clear and set flags are boostable */ | ||
234 | return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); | ||
235 | default: | ||
236 | /* segment override prefixes are boostable */ | ||
237 | if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e) | ||
238 | goto retry; /* prefixes */ | ||
239 | /* CS override prefix and call are not boostable */ | ||
240 | return (opcode != 0x2e && opcode != 0x9a); | ||
241 | } | ||
242 | } | ||
243 | |||
244 | /* | ||
245 | * Returns non-zero if opcode modifies the interrupt flag. | ||
246 | */ | ||
247 | static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | ||
248 | { | ||
249 | switch (*insn) { | ||
250 | case 0xfa: /* cli */ | ||
251 | case 0xfb: /* sti */ | ||
252 | case 0xcf: /* iret/iretd */ | ||
253 | case 0x9d: /* popf/popfd */ | ||
254 | return 1; | ||
255 | } | ||
256 | |||
257 | /* | ||
258 | * on X86_64, 0x40-0x4f are REX prefixes so we need to look | ||
259 | * at the next byte instead.. but of course not recurse infinitely | ||
260 | */ | ||
261 | if (is_REX_prefix(insn)) | ||
262 | return is_IF_modifier(++insn); | ||
263 | |||
264 | return 0; | ||
265 | } | ||
266 | |||
267 | /* | ||
268 | * Adjust the displacement if the instruction uses the %rip-relative | ||
269 | * addressing mode. | ||
270 | * If it does, Return the address of the 32-bit displacement word. | ||
271 | * If not, return null. | ||
272 | * Only applicable to 64-bit x86. | ||
273 | */ | ||
274 | static void __kprobes fix_riprel(struct kprobe *p) | ||
275 | { | ||
276 | #ifdef CONFIG_X86_64 | ||
277 | u8 *insn = p->ainsn.insn; | ||
278 | s64 disp; | ||
279 | int need_modrm; | ||
280 | |||
281 | /* Skip legacy instruction prefixes. */ | ||
282 | while (1) { | ||
283 | switch (*insn) { | ||
284 | case 0x66: | ||
285 | case 0x67: | ||
286 | case 0x2e: | ||
287 | case 0x3e: | ||
288 | case 0x26: | ||
289 | case 0x64: | ||
290 | case 0x65: | ||
291 | case 0x36: | ||
292 | case 0xf0: | ||
293 | case 0xf3: | ||
294 | case 0xf2: | ||
295 | ++insn; | ||
296 | continue; | ||
297 | } | ||
298 | break; | ||
299 | } | ||
300 | |||
301 | /* Skip REX instruction prefix. */ | ||
302 | if (is_REX_prefix(insn)) | ||
303 | ++insn; | ||
304 | |||
305 | if (*insn == 0x0f) { | ||
306 | /* Two-byte opcode. */ | ||
307 | ++insn; | ||
308 | need_modrm = test_bit(*insn, | ||
309 | (unsigned long *)twobyte_has_modrm); | ||
310 | } else | ||
311 | /* One-byte opcode. */ | ||
312 | need_modrm = test_bit(*insn, | ||
313 | (unsigned long *)onebyte_has_modrm); | ||
314 | |||
315 | if (need_modrm) { | ||
316 | u8 modrm = *++insn; | ||
317 | if ((modrm & 0xc7) == 0x05) { | ||
318 | /* %rip+disp32 addressing mode */ | ||
319 | /* Displacement follows ModRM byte. */ | ||
320 | ++insn; | ||
321 | /* | ||
322 | * The copied instruction uses the %rip-relative | ||
323 | * addressing mode. Adjust the displacement for the | ||
324 | * difference between the original location of this | ||
325 | * instruction and the location of the copy that will | ||
326 | * actually be run. The tricky bit here is making sure | ||
327 | * that the sign extension happens correctly in this | ||
328 | * calculation, since we need a signed 32-bit result to | ||
329 | * be sign-extended to 64 bits when it's added to the | ||
330 | * %rip value and yield the same 64-bit result that the | ||
331 | * sign-extension of the original signed 32-bit | ||
332 | * displacement would have given. | ||
333 | */ | ||
334 | disp = (u8 *) p->addr + *((s32 *) insn) - | ||
335 | (u8 *) p->ainsn.insn; | ||
336 | BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ | ||
337 | *(s32 *)insn = (s32) disp; | ||
338 | } | ||
339 | } | ||
340 | #endif | ||
341 | } | ||
342 | |||
343 | static void __kprobes arch_copy_kprobe(struct kprobe *p) | ||
344 | { | ||
345 | memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | ||
346 | |||
347 | fix_riprel(p); | ||
348 | |||
349 | if (can_boost(p->addr)) | ||
350 | p->ainsn.boostable = 0; | ||
351 | else | ||
352 | p->ainsn.boostable = -1; | ||
353 | |||
354 | p->opcode = *p->addr; | ||
355 | } | ||
356 | |||
357 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | ||
358 | { | ||
359 | /* insn: must be on special executable page on x86. */ | ||
360 | p->ainsn.insn = get_insn_slot(); | ||
361 | if (!p->ainsn.insn) | ||
362 | return -ENOMEM; | ||
363 | arch_copy_kprobe(p); | ||
364 | return 0; | ||
365 | } | ||
366 | |||
367 | void __kprobes arch_arm_kprobe(struct kprobe *p) | ||
368 | { | ||
369 | text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); | ||
370 | } | ||
371 | |||
372 | void __kprobes arch_disarm_kprobe(struct kprobe *p) | ||
373 | { | ||
374 | text_poke(p->addr, &p->opcode, 1); | ||
375 | } | ||
376 | |||
377 | void __kprobes arch_remove_kprobe(struct kprobe *p) | ||
378 | { | ||
379 | mutex_lock(&kprobe_mutex); | ||
380 | free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); | ||
381 | mutex_unlock(&kprobe_mutex); | ||
382 | } | ||
383 | |||
384 | static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) | ||
385 | { | ||
386 | kcb->prev_kprobe.kp = kprobe_running(); | ||
387 | kcb->prev_kprobe.status = kcb->kprobe_status; | ||
388 | kcb->prev_kprobe.old_flags = kcb->kprobe_old_flags; | ||
389 | kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags; | ||
390 | } | ||
391 | |||
392 | static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) | ||
393 | { | ||
394 | __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; | ||
395 | kcb->kprobe_status = kcb->prev_kprobe.status; | ||
396 | kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags; | ||
397 | kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags; | ||
398 | } | ||
399 | |||
400 | static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, | ||
401 | struct kprobe_ctlblk *kcb) | ||
402 | { | ||
403 | __get_cpu_var(current_kprobe) = p; | ||
404 | kcb->kprobe_saved_flags = kcb->kprobe_old_flags | ||
405 | = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); | ||
406 | if (is_IF_modifier(p->ainsn.insn)) | ||
407 | kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF; | ||
408 | } | ||
409 | |||
410 | static void __kprobes clear_btf(void) | ||
411 | { | ||
412 | if (test_thread_flag(TIF_DEBUGCTLMSR)) | ||
413 | wrmsrl(MSR_IA32_DEBUGCTLMSR, 0); | ||
414 | } | ||
415 | |||
416 | static void __kprobes restore_btf(void) | ||
417 | { | ||
418 | if (test_thread_flag(TIF_DEBUGCTLMSR)) | ||
419 | wrmsrl(MSR_IA32_DEBUGCTLMSR, current->thread.debugctlmsr); | ||
420 | } | ||
421 | |||
422 | static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) | ||
423 | { | ||
424 | clear_btf(); | ||
425 | regs->flags |= X86_EFLAGS_TF; | ||
426 | regs->flags &= ~X86_EFLAGS_IF; | ||
427 | /* single step inline if the instruction is an int3 */ | ||
428 | if (p->opcode == BREAKPOINT_INSTRUCTION) | ||
429 | regs->ip = (unsigned long)p->addr; | ||
430 | else | ||
431 | regs->ip = (unsigned long)p->ainsn.insn; | ||
432 | } | ||
433 | |||
434 | /* Called with kretprobe_lock held */ | ||
435 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | ||
436 | struct pt_regs *regs) | ||
437 | { | ||
438 | unsigned long *sara = stack_addr(regs); | ||
439 | |||
440 | ri->ret_addr = (kprobe_opcode_t *) *sara; | ||
441 | |||
442 | /* Replace the return addr with trampoline addr */ | ||
443 | *sara = (unsigned long) &kretprobe_trampoline; | ||
444 | } | ||
445 | |||
446 | static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, | ||
447 | struct kprobe_ctlblk *kcb) | ||
448 | { | ||
449 | #if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM) | ||
450 | if (p->ainsn.boostable == 1 && !p->post_handler) { | ||
451 | /* Boost up -- we can execute copied instructions directly */ | ||
452 | reset_current_kprobe(); | ||
453 | regs->ip = (unsigned long)p->ainsn.insn; | ||
454 | preempt_enable_no_resched(); | ||
455 | return; | ||
456 | } | ||
457 | #endif | ||
458 | prepare_singlestep(p, regs); | ||
459 | kcb->kprobe_status = KPROBE_HIT_SS; | ||
460 | } | ||
461 | |||
462 | /* | ||
463 | * We have reentered the kprobe_handler(), since another probe was hit while | ||
464 | * within the handler. We save the original kprobes variables and just single | ||
465 | * step on the instruction of the new probe without calling any user handlers. | ||
466 | */ | ||
467 | static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | ||
468 | struct kprobe_ctlblk *kcb) | ||
469 | { | ||
470 | switch (kcb->kprobe_status) { | ||
471 | case KPROBE_HIT_SSDONE: | ||
472 | #ifdef CONFIG_X86_64 | ||
473 | /* TODO: Provide re-entrancy from post_kprobes_handler() and | ||
474 | * avoid exception stack corruption while single-stepping on | ||
475 | * the instruction of the new probe. | ||
476 | */ | ||
477 | arch_disarm_kprobe(p); | ||
478 | regs->ip = (unsigned long)p->addr; | ||
479 | reset_current_kprobe(); | ||
480 | preempt_enable_no_resched(); | ||
481 | break; | ||
482 | #endif | ||
483 | case KPROBE_HIT_ACTIVE: | ||
484 | save_previous_kprobe(kcb); | ||
485 | set_current_kprobe(p, regs, kcb); | ||
486 | kprobes_inc_nmissed_count(p); | ||
487 | prepare_singlestep(p, regs); | ||
488 | kcb->kprobe_status = KPROBE_REENTER; | ||
489 | break; | ||
490 | case KPROBE_HIT_SS: | ||
491 | if (p == kprobe_running()) { | ||
492 | regs->flags &= ~TF_MASK; | ||
493 | regs->flags |= kcb->kprobe_saved_flags; | ||
494 | return 0; | ||
495 | } else { | ||
496 | /* A probe has been hit in the codepath leading up | ||
497 | * to, or just after, single-stepping of a probed | ||
498 | * instruction. This entire codepath should strictly | ||
499 | * reside in .kprobes.text section. Raise a warning | ||
500 | * to highlight this peculiar case. | ||
501 | */ | ||
502 | } | ||
503 | default: | ||
504 | /* impossible cases */ | ||
505 | WARN_ON(1); | ||
506 | return 0; | ||
507 | } | ||
508 | |||
509 | return 1; | ||
510 | } | ||
511 | |||
512 | /* | ||
513 | * Interrupts are disabled on entry as trap3 is an interrupt gate and they | ||
514 | * remain disabled thorough out this function. | ||
515 | */ | ||
516 | static int __kprobes kprobe_handler(struct pt_regs *regs) | ||
517 | { | ||
518 | kprobe_opcode_t *addr; | ||
519 | struct kprobe *p; | ||
520 | struct kprobe_ctlblk *kcb; | ||
521 | |||
522 | addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); | ||
523 | if (*addr != BREAKPOINT_INSTRUCTION) { | ||
524 | /* | ||
525 | * The breakpoint instruction was removed right | ||
526 | * after we hit it. Another cpu has removed | ||
527 | * either a probepoint or a debugger breakpoint | ||
528 | * at this address. In either case, no further | ||
529 | * handling of this interrupt is appropriate. | ||
530 | * Back up over the (now missing) int3 and run | ||
531 | * the original instruction. | ||
532 | */ | ||
533 | regs->ip = (unsigned long)addr; | ||
534 | return 1; | ||
535 | } | ||
536 | |||
537 | /* | ||
538 | * We don't want to be preempted for the entire | ||
539 | * duration of kprobe processing. We conditionally | ||
540 | * re-enable preemption at the end of this function, | ||
541 | * and also in reenter_kprobe() and setup_singlestep(). | ||
542 | */ | ||
543 | preempt_disable(); | ||
544 | |||
545 | kcb = get_kprobe_ctlblk(); | ||
546 | p = get_kprobe(addr); | ||
547 | |||
548 | if (p) { | ||
549 | if (kprobe_running()) { | ||
550 | if (reenter_kprobe(p, regs, kcb)) | ||
551 | return 1; | ||
552 | } else { | ||
553 | set_current_kprobe(p, regs, kcb); | ||
554 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | ||
555 | |||
556 | /* | ||
557 | * If we have no pre-handler or it returned 0, we | ||
558 | * continue with normal processing. If we have a | ||
559 | * pre-handler and it returned non-zero, it prepped | ||
560 | * for calling the break_handler below on re-entry | ||
561 | * for jprobe processing, so get out doing nothing | ||
562 | * more here. | ||
563 | */ | ||
564 | if (!p->pre_handler || !p->pre_handler(p, regs)) | ||
565 | setup_singlestep(p, regs, kcb); | ||
566 | return 1; | ||
567 | } | ||
568 | } else if (kprobe_running()) { | ||
569 | p = __get_cpu_var(current_kprobe); | ||
570 | if (p->break_handler && p->break_handler(p, regs)) { | ||
571 | setup_singlestep(p, regs, kcb); | ||
572 | return 1; | ||
573 | } | ||
574 | } /* else: not a kprobe fault; let the kernel handle it */ | ||
575 | |||
576 | preempt_enable_no_resched(); | ||
577 | return 0; | ||
578 | } | ||
579 | |||
580 | /* | ||
581 | * When a retprobed function returns, this code saves registers and | ||
582 | * calls trampoline_handler() runs, which calls the kretprobe's handler. | ||
583 | */ | ||
584 | void __kprobes kretprobe_trampoline_holder(void) | ||
585 | { | ||
586 | asm volatile ( | ||
587 | ".global kretprobe_trampoline\n" | ||
588 | "kretprobe_trampoline: \n" | ||
589 | #ifdef CONFIG_X86_64 | ||
590 | /* We don't bother saving the ss register */ | ||
591 | " pushq %rsp\n" | ||
592 | " pushfq\n" | ||
593 | /* | ||
594 | * Skip cs, ip, orig_ax. | ||
595 | * trampoline_handler() will plug in these values | ||
596 | */ | ||
597 | " subq $24, %rsp\n" | ||
598 | " pushq %rdi\n" | ||
599 | " pushq %rsi\n" | ||
600 | " pushq %rdx\n" | ||
601 | " pushq %rcx\n" | ||
602 | " pushq %rax\n" | ||
603 | " pushq %r8\n" | ||
604 | " pushq %r9\n" | ||
605 | " pushq %r10\n" | ||
606 | " pushq %r11\n" | ||
607 | " pushq %rbx\n" | ||
608 | " pushq %rbp\n" | ||
609 | " pushq %r12\n" | ||
610 | " pushq %r13\n" | ||
611 | " pushq %r14\n" | ||
612 | " pushq %r15\n" | ||
613 | " movq %rsp, %rdi\n" | ||
614 | " call trampoline_handler\n" | ||
615 | /* Replace saved sp with true return address. */ | ||
616 | " movq %rax, 152(%rsp)\n" | ||
617 | " popq %r15\n" | ||
618 | " popq %r14\n" | ||
619 | " popq %r13\n" | ||
620 | " popq %r12\n" | ||
621 | " popq %rbp\n" | ||
622 | " popq %rbx\n" | ||
623 | " popq %r11\n" | ||
624 | " popq %r10\n" | ||
625 | " popq %r9\n" | ||
626 | " popq %r8\n" | ||
627 | " popq %rax\n" | ||
628 | " popq %rcx\n" | ||
629 | " popq %rdx\n" | ||
630 | " popq %rsi\n" | ||
631 | " popq %rdi\n" | ||
632 | /* Skip orig_ax, ip, cs */ | ||
633 | " addq $24, %rsp\n" | ||
634 | " popfq\n" | ||
635 | #else | ||
636 | " pushf\n" | ||
637 | /* | ||
638 | * Skip cs, ip, orig_ax. | ||
639 | * trampoline_handler() will plug in these values | ||
640 | */ | ||
641 | " subl $12, %esp\n" | ||
642 | " pushl %fs\n" | ||
643 | " pushl %ds\n" | ||
644 | " pushl %es\n" | ||
645 | " pushl %eax\n" | ||
646 | " pushl %ebp\n" | ||
647 | " pushl %edi\n" | ||
648 | " pushl %esi\n" | ||
649 | " pushl %edx\n" | ||
650 | " pushl %ecx\n" | ||
651 | " pushl %ebx\n" | ||
652 | " movl %esp, %eax\n" | ||
653 | " call trampoline_handler\n" | ||
654 | /* Move flags to cs */ | ||
655 | " movl 52(%esp), %edx\n" | ||
656 | " movl %edx, 48(%esp)\n" | ||
657 | /* Replace saved flags with true return address. */ | ||
658 | " movl %eax, 52(%esp)\n" | ||
659 | " popl %ebx\n" | ||
660 | " popl %ecx\n" | ||
661 | " popl %edx\n" | ||
662 | " popl %esi\n" | ||
663 | " popl %edi\n" | ||
664 | " popl %ebp\n" | ||
665 | " popl %eax\n" | ||
666 | /* Skip ip, orig_ax, es, ds, fs */ | ||
667 | " addl $20, %esp\n" | ||
668 | " popf\n" | ||
669 | #endif | ||
670 | " ret\n"); | ||
671 | } | ||
672 | |||
673 | /* | ||
674 | * Called from kretprobe_trampoline | ||
675 | */ | ||
676 | void * __kprobes trampoline_handler(struct pt_regs *regs) | ||
677 | { | ||
678 | struct kretprobe_instance *ri = NULL; | ||
679 | struct hlist_head *head, empty_rp; | ||
680 | struct hlist_node *node, *tmp; | ||
681 | unsigned long flags, orig_ret_address = 0; | ||
682 | unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; | ||
683 | |||
684 | INIT_HLIST_HEAD(&empty_rp); | ||
685 | spin_lock_irqsave(&kretprobe_lock, flags); | ||
686 | head = kretprobe_inst_table_head(current); | ||
687 | /* fixup registers */ | ||
688 | #ifdef CONFIG_X86_64 | ||
689 | regs->cs = __KERNEL_CS; | ||
690 | #else | ||
691 | regs->cs = __KERNEL_CS | get_kernel_rpl(); | ||
692 | #endif | ||
693 | regs->ip = trampoline_address; | ||
694 | regs->orig_ax = ~0UL; | ||
695 | |||
696 | /* | ||
697 | * It is possible to have multiple instances associated with a given | ||
698 | * task either because multiple functions in the call path have | ||
699 | * return probes installed on them, and/or more then one | ||
700 | * return probe was registered for a target function. | ||
701 | * | ||
702 | * We can handle this because: | ||
703 | * - instances are always pushed into the head of the list | ||
704 | * - when multiple return probes are registered for the same | ||
705 | * function, the (chronologically) first instance's ret_addr | ||
706 | * will be the real return address, and all the rest will | ||
707 | * point to kretprobe_trampoline. | ||
708 | */ | ||
709 | hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { | ||
710 | if (ri->task != current) | ||
711 | /* another task is sharing our hash bucket */ | ||
712 | continue; | ||
713 | |||
714 | if (ri->rp && ri->rp->handler) { | ||
715 | __get_cpu_var(current_kprobe) = &ri->rp->kp; | ||
716 | get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; | ||
717 | ri->rp->handler(ri, regs); | ||
718 | __get_cpu_var(current_kprobe) = NULL; | ||
719 | } | ||
720 | |||
721 | orig_ret_address = (unsigned long)ri->ret_addr; | ||
722 | recycle_rp_inst(ri, &empty_rp); | ||
723 | |||
724 | if (orig_ret_address != trampoline_address) | ||
725 | /* | ||
726 | * This is the real return address. Any other | ||
727 | * instances associated with this task are for | ||
728 | * other calls deeper on the call stack | ||
729 | */ | ||
730 | break; | ||
731 | } | ||
732 | |||
733 | kretprobe_assert(ri, orig_ret_address, trampoline_address); | ||
734 | |||
735 | spin_unlock_irqrestore(&kretprobe_lock, flags); | ||
736 | |||
737 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { | ||
738 | hlist_del(&ri->hlist); | ||
739 | kfree(ri); | ||
740 | } | ||
741 | return (void *)orig_ret_address; | ||
742 | } | ||
743 | |||
744 | /* | ||
745 | * Called after single-stepping. p->addr is the address of the | ||
746 | * instruction whose first byte has been replaced by the "int 3" | ||
747 | * instruction. To avoid the SMP problems that can occur when we | ||
748 | * temporarily put back the original opcode to single-step, we | ||
749 | * single-stepped a copy of the instruction. The address of this | ||
750 | * copy is p->ainsn.insn. | ||
751 | * | ||
752 | * This function prepares to return from the post-single-step | ||
753 | * interrupt. We have to fix up the stack as follows: | ||
754 | * | ||
755 | * 0) Except in the case of absolute or indirect jump or call instructions, | ||
756 | * the new ip is relative to the copied instruction. We need to make | ||
757 | * it relative to the original instruction. | ||
758 | * | ||
759 | * 1) If the single-stepped instruction was pushfl, then the TF and IF | ||
760 | * flags are set in the just-pushed flags, and may need to be cleared. | ||
761 | * | ||
762 | * 2) If the single-stepped instruction was a call, the return address | ||
763 | * that is atop the stack is the address following the copied instruction. | ||
764 | * We need to make it the address following the original instruction. | ||
765 | * | ||
766 | * If this is the first time we've single-stepped the instruction at | ||
767 | * this probepoint, and the instruction is boostable, boost it: add a | ||
768 | * jump instruction after the copied instruction, that jumps to the next | ||
769 | * instruction after the probepoint. | ||
770 | */ | ||
771 | static void __kprobes resume_execution(struct kprobe *p, | ||
772 | struct pt_regs *regs, struct kprobe_ctlblk *kcb) | ||
773 | { | ||
774 | unsigned long *tos = stack_addr(regs); | ||
775 | unsigned long copy_ip = (unsigned long)p->ainsn.insn; | ||
776 | unsigned long orig_ip = (unsigned long)p->addr; | ||
777 | kprobe_opcode_t *insn = p->ainsn.insn; | ||
778 | |||
779 | /*skip the REX prefix*/ | ||
780 | if (is_REX_prefix(insn)) | ||
781 | insn++; | ||
782 | |||
783 | regs->flags &= ~X86_EFLAGS_TF; | ||
784 | switch (*insn) { | ||
785 | case 0x9c: /* pushfl */ | ||
786 | *tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF); | ||
787 | *tos |= kcb->kprobe_old_flags; | ||
788 | break; | ||
789 | case 0xc2: /* iret/ret/lret */ | ||
790 | case 0xc3: | ||
791 | case 0xca: | ||
792 | case 0xcb: | ||
793 | case 0xcf: | ||
794 | case 0xea: /* jmp absolute -- ip is correct */ | ||
795 | /* ip is already adjusted, no more changes required */ | ||
796 | p->ainsn.boostable = 1; | ||
797 | goto no_change; | ||
798 | case 0xe8: /* call relative - Fix return addr */ | ||
799 | *tos = orig_ip + (*tos - copy_ip); | ||
800 | break; | ||
801 | #ifdef CONFIG_X86_32 | ||
802 | case 0x9a: /* call absolute -- same as call absolute, indirect */ | ||
803 | *tos = orig_ip + (*tos - copy_ip); | ||
804 | goto no_change; | ||
805 | #endif | ||
806 | case 0xff: | ||
807 | if ((insn[1] & 0x30) == 0x10) { | ||
808 | /* | ||
809 | * call absolute, indirect | ||
810 | * Fix return addr; ip is correct. | ||
811 | * But this is not boostable | ||
812 | */ | ||
813 | *tos = orig_ip + (*tos - copy_ip); | ||
814 | goto no_change; | ||
815 | } else if (((insn[1] & 0x31) == 0x20) || | ||
816 | ((insn[1] & 0x31) == 0x21)) { | ||
817 | /* | ||
818 | * jmp near and far, absolute indirect | ||
819 | * ip is correct. And this is boostable | ||
820 | */ | ||
821 | p->ainsn.boostable = 1; | ||
822 | goto no_change; | ||
823 | } | ||
824 | default: | ||
825 | break; | ||
826 | } | ||
827 | |||
828 | if (p->ainsn.boostable == 0) { | ||
829 | if ((regs->ip > copy_ip) && | ||
830 | (regs->ip - copy_ip) + 5 < MAX_INSN_SIZE) { | ||
831 | /* | ||
832 | * These instructions can be executed directly if it | ||
833 | * jumps back to correct address. | ||
834 | */ | ||
835 | set_jmp_op((void *)regs->ip, | ||
836 | (void *)orig_ip + (regs->ip - copy_ip)); | ||
837 | p->ainsn.boostable = 1; | ||
838 | } else { | ||
839 | p->ainsn.boostable = -1; | ||
840 | } | ||
841 | } | ||
842 | |||
843 | regs->ip += orig_ip - copy_ip; | ||
844 | |||
845 | no_change: | ||
846 | restore_btf(); | ||
847 | } | ||
848 | |||
849 | /* | ||
850 | * Interrupts are disabled on entry as trap1 is an interrupt gate and they | ||
851 | * remain disabled thoroughout this function. | ||
852 | */ | ||
853 | static int __kprobes post_kprobe_handler(struct pt_regs *regs) | ||
854 | { | ||
855 | struct kprobe *cur = kprobe_running(); | ||
856 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
857 | |||
858 | if (!cur) | ||
859 | return 0; | ||
860 | |||
861 | if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { | ||
862 | kcb->kprobe_status = KPROBE_HIT_SSDONE; | ||
863 | cur->post_handler(cur, regs, 0); | ||
864 | } | ||
865 | |||
866 | resume_execution(cur, regs, kcb); | ||
867 | regs->flags |= kcb->kprobe_saved_flags; | ||
868 | trace_hardirqs_fixup_flags(regs->flags); | ||
869 | |||
870 | /* Restore back the original saved kprobes variables and continue. */ | ||
871 | if (kcb->kprobe_status == KPROBE_REENTER) { | ||
872 | restore_previous_kprobe(kcb); | ||
873 | goto out; | ||
874 | } | ||
875 | reset_current_kprobe(); | ||
876 | out: | ||
877 | preempt_enable_no_resched(); | ||
878 | |||
879 | /* | ||
880 | * if somebody else is singlestepping across a probe point, flags | ||
881 | * will have TF set, in which case, continue the remaining processing | ||
882 | * of do_debug, as if this is not a probe hit. | ||
883 | */ | ||
884 | if (regs->flags & X86_EFLAGS_TF) | ||
885 | return 0; | ||
886 | |||
887 | return 1; | ||
888 | } | ||
889 | |||
890 | int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) | ||
891 | { | ||
892 | struct kprobe *cur = kprobe_running(); | ||
893 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
894 | |||
895 | switch (kcb->kprobe_status) { | ||
896 | case KPROBE_HIT_SS: | ||
897 | case KPROBE_REENTER: | ||
898 | /* | ||
899 | * We are here because the instruction being single | ||
900 | * stepped caused a page fault. We reset the current | ||
901 | * kprobe and the ip points back to the probe address | ||
902 | * and allow the page fault handler to continue as a | ||
903 | * normal page fault. | ||
904 | */ | ||
905 | regs->ip = (unsigned long)cur->addr; | ||
906 | regs->flags |= kcb->kprobe_old_flags; | ||
907 | if (kcb->kprobe_status == KPROBE_REENTER) | ||
908 | restore_previous_kprobe(kcb); | ||
909 | else | ||
910 | reset_current_kprobe(); | ||
911 | preempt_enable_no_resched(); | ||
912 | break; | ||
913 | case KPROBE_HIT_ACTIVE: | ||
914 | case KPROBE_HIT_SSDONE: | ||
915 | /* | ||
916 | * We increment the nmissed count for accounting, | ||
917 | * we can also use npre/npostfault count for accounting | ||
918 | * these specific fault cases. | ||
919 | */ | ||
920 | kprobes_inc_nmissed_count(cur); | ||
921 | |||
922 | /* | ||
923 | * We come here because instructions in the pre/post | ||
924 | * handler caused the page_fault, this could happen | ||
925 | * if handler tries to access user space by | ||
926 | * copy_from_user(), get_user() etc. Let the | ||
927 | * user-specified handler try to fix it first. | ||
928 | */ | ||
929 | if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) | ||
930 | return 1; | ||
931 | |||
932 | /* | ||
933 | * In case the user-specified fault handler returned | ||
934 | * zero, try to fix up. | ||
935 | */ | ||
936 | if (fixup_exception(regs)) | ||
937 | return 1; | ||
938 | |||
939 | /* | ||
940 | * fixup routine could not handle it, | ||
941 | * Let do_page_fault() fix it. | ||
942 | */ | ||
943 | break; | ||
944 | default: | ||
945 | break; | ||
946 | } | ||
947 | return 0; | ||
948 | } | ||
949 | |||
950 | /* | ||
951 | * Wrapper routine for handling exceptions. | ||
952 | */ | ||
953 | int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | ||
954 | unsigned long val, void *data) | ||
955 | { | ||
956 | struct die_args *args = data; | ||
957 | int ret = NOTIFY_DONE; | ||
958 | |||
959 | if (args->regs && user_mode_vm(args->regs)) | ||
960 | return ret; | ||
961 | |||
962 | switch (val) { | ||
963 | case DIE_INT3: | ||
964 | if (kprobe_handler(args->regs)) | ||
965 | ret = NOTIFY_STOP; | ||
966 | break; | ||
967 | case DIE_DEBUG: | ||
968 | if (post_kprobe_handler(args->regs)) | ||
969 | ret = NOTIFY_STOP; | ||
970 | break; | ||
971 | case DIE_GPF: | ||
972 | /* | ||
973 | * To be potentially processing a kprobe fault and to | ||
974 | * trust the result from kprobe_running(), we have | ||
975 | * be non-preemptible. | ||
976 | */ | ||
977 | if (!preemptible() && kprobe_running() && | ||
978 | kprobe_fault_handler(args->regs, args->trapnr)) | ||
979 | ret = NOTIFY_STOP; | ||
980 | break; | ||
981 | default: | ||
982 | break; | ||
983 | } | ||
984 | return ret; | ||
985 | } | ||
986 | |||
987 | int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) | ||
988 | { | ||
989 | struct jprobe *jp = container_of(p, struct jprobe, kp); | ||
990 | unsigned long addr; | ||
991 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
992 | |||
993 | kcb->jprobe_saved_regs = *regs; | ||
994 | kcb->jprobe_saved_sp = stack_addr(regs); | ||
995 | addr = (unsigned long)(kcb->jprobe_saved_sp); | ||
996 | |||
997 | /* | ||
998 | * As Linus pointed out, gcc assumes that the callee | ||
999 | * owns the argument space and could overwrite it, e.g. | ||
1000 | * tailcall optimization. So, to be absolutely safe | ||
1001 | * we also save and restore enough stack bytes to cover | ||
1002 | * the argument area. | ||
1003 | */ | ||
1004 | memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, | ||
1005 | MIN_STACK_SIZE(addr)); | ||
1006 | regs->flags &= ~X86_EFLAGS_IF; | ||
1007 | trace_hardirqs_off(); | ||
1008 | regs->ip = (unsigned long)(jp->entry); | ||
1009 | return 1; | ||
1010 | } | ||
1011 | |||
1012 | void __kprobes jprobe_return(void) | ||
1013 | { | ||
1014 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
1015 | |||
1016 | asm volatile ( | ||
1017 | #ifdef CONFIG_X86_64 | ||
1018 | " xchg %%rbx,%%rsp \n" | ||
1019 | #else | ||
1020 | " xchgl %%ebx,%%esp \n" | ||
1021 | #endif | ||
1022 | " int3 \n" | ||
1023 | " .globl jprobe_return_end\n" | ||
1024 | " jprobe_return_end: \n" | ||
1025 | " nop \n"::"b" | ||
1026 | (kcb->jprobe_saved_sp):"memory"); | ||
1027 | } | ||
1028 | |||
1029 | int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | ||
1030 | { | ||
1031 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
1032 | u8 *addr = (u8 *) (regs->ip - 1); | ||
1033 | struct jprobe *jp = container_of(p, struct jprobe, kp); | ||
1034 | |||
1035 | if ((addr > (u8 *) jprobe_return) && | ||
1036 | (addr < (u8 *) jprobe_return_end)) { | ||
1037 | if (stack_addr(regs) != kcb->jprobe_saved_sp) { | ||
1038 | struct pt_regs *saved_regs = &kcb->jprobe_saved_regs; | ||
1039 | printk(KERN_ERR | ||
1040 | "current sp %p does not match saved sp %p\n", | ||
1041 | stack_addr(regs), kcb->jprobe_saved_sp); | ||
1042 | printk(KERN_ERR "Saved registers for jprobe %p\n", jp); | ||
1043 | show_registers(saved_regs); | ||
1044 | printk(KERN_ERR "Current registers\n"); | ||
1045 | show_registers(regs); | ||
1046 | BUG(); | ||
1047 | } | ||
1048 | *regs = kcb->jprobe_saved_regs; | ||
1049 | memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp), | ||
1050 | kcb->jprobes_stack, | ||
1051 | MIN_STACK_SIZE(kcb->jprobe_saved_sp)); | ||
1052 | preempt_enable_no_resched(); | ||
1053 | return 1; | ||
1054 | } | ||
1055 | return 0; | ||
1056 | } | ||
1057 | |||
1058 | int __init arch_init_kprobes(void) | ||
1059 | { | ||
1060 | return 0; | ||
1061 | } | ||
1062 | |||
1063 | int __kprobes arch_trampoline_kprobe(struct kprobe *p) | ||
1064 | { | ||
1065 | return 0; | ||
1066 | } | ||
diff --git a/arch/x86/kernel/kprobes_32.c b/arch/x86/kernel/kprobes_32.c deleted file mode 100644 index 3a020f79f82b..000000000000 --- a/arch/x86/kernel/kprobes_32.c +++ /dev/null | |||
@@ -1,756 +0,0 @@ | |||
1 | /* | ||
2 | * Kernel Probes (KProbes) | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2002, 2004 | ||
19 | * | ||
20 | * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel | ||
21 | * Probes initial implementation ( includes contributions from | ||
22 | * Rusty Russell). | ||
23 | * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes | ||
24 | * interface to access function arguments. | ||
25 | * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston | ||
26 | * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi | ||
27 | * <prasanna@in.ibm.com> added function-return probes. | ||
28 | */ | ||
29 | |||
30 | #include <linux/kprobes.h> | ||
31 | #include <linux/ptrace.h> | ||
32 | #include <linux/preempt.h> | ||
33 | #include <linux/kdebug.h> | ||
34 | #include <asm/cacheflush.h> | ||
35 | #include <asm/desc.h> | ||
36 | #include <asm/uaccess.h> | ||
37 | #include <asm/alternative.h> | ||
38 | |||
39 | void jprobe_return_end(void); | ||
40 | |||
41 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; | ||
42 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); | ||
43 | |||
44 | struct kretprobe_blackpoint kretprobe_blacklist[] = { | ||
45 | {"__switch_to", }, /* This function switches only current task, but | ||
46 | doesn't switch kernel stack.*/ | ||
47 | {NULL, NULL} /* Terminator */ | ||
48 | }; | ||
49 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); | ||
50 | |||
51 | /* insert a jmp code */ | ||
52 | static __always_inline void set_jmp_op(void *from, void *to) | ||
53 | { | ||
54 | struct __arch_jmp_op { | ||
55 | char op; | ||
56 | long raddr; | ||
57 | } __attribute__((packed)) *jop; | ||
58 | jop = (struct __arch_jmp_op *)from; | ||
59 | jop->raddr = (long)(to) - ((long)(from) + 5); | ||
60 | jop->op = RELATIVEJUMP_INSTRUCTION; | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | * returns non-zero if opcodes can be boosted. | ||
65 | */ | ||
66 | static __always_inline int can_boost(kprobe_opcode_t *opcodes) | ||
67 | { | ||
68 | #define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \ | ||
69 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ | ||
70 | (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ | ||
71 | (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ | ||
72 | (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ | ||
73 | << (row % 32)) | ||
74 | /* | ||
75 | * Undefined/reserved opcodes, conditional jump, Opcode Extension | ||
76 | * Groups, and some special opcodes can not be boost. | ||
77 | */ | ||
78 | static const unsigned long twobyte_is_boostable[256 / 32] = { | ||
79 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
80 | /* ------------------------------- */ | ||
81 | W(0x00, 0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0)| /* 00 */ | ||
82 | W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */ | ||
83 | W(0x20, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */ | ||
84 | W(0x30, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */ | ||
85 | W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */ | ||
86 | W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */ | ||
87 | W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1)| /* 60 */ | ||
88 | W(0x70, 0,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */ | ||
89 | W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 80 */ | ||
90 | W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */ | ||
91 | W(0xa0, 1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1)| /* a0 */ | ||
92 | W(0xb0, 1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1), /* b0 */ | ||
93 | W(0xc0, 1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1)| /* c0 */ | ||
94 | W(0xd0, 0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1), /* d0 */ | ||
95 | W(0xe0, 0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1)| /* e0 */ | ||
96 | W(0xf0, 0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0) /* f0 */ | ||
97 | /* ------------------------------- */ | ||
98 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
99 | }; | ||
100 | #undef W | ||
101 | kprobe_opcode_t opcode; | ||
102 | kprobe_opcode_t *orig_opcodes = opcodes; | ||
103 | retry: | ||
104 | if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) | ||
105 | return 0; | ||
106 | opcode = *(opcodes++); | ||
107 | |||
108 | /* 2nd-byte opcode */ | ||
109 | if (opcode == 0x0f) { | ||
110 | if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) | ||
111 | return 0; | ||
112 | return test_bit(*opcodes, twobyte_is_boostable); | ||
113 | } | ||
114 | |||
115 | switch (opcode & 0xf0) { | ||
116 | case 0x60: | ||
117 | if (0x63 < opcode && opcode < 0x67) | ||
118 | goto retry; /* prefixes */ | ||
119 | /* can't boost Address-size override and bound */ | ||
120 | return (opcode != 0x62 && opcode != 0x67); | ||
121 | case 0x70: | ||
122 | return 0; /* can't boost conditional jump */ | ||
123 | case 0xc0: | ||
124 | /* can't boost software-interruptions */ | ||
125 | return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf; | ||
126 | case 0xd0: | ||
127 | /* can boost AA* and XLAT */ | ||
128 | return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); | ||
129 | case 0xe0: | ||
130 | /* can boost in/out and absolute jmps */ | ||
131 | return ((opcode & 0x04) || opcode == 0xea); | ||
132 | case 0xf0: | ||
133 | if ((opcode & 0x0c) == 0 && opcode != 0xf1) | ||
134 | goto retry; /* lock/rep(ne) prefix */ | ||
135 | /* clear and set flags can be boost */ | ||
136 | return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); | ||
137 | default: | ||
138 | if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e) | ||
139 | goto retry; /* prefixes */ | ||
140 | /* can't boost CS override and call */ | ||
141 | return (opcode != 0x2e && opcode != 0x9a); | ||
142 | } | ||
143 | } | ||
144 | |||
145 | /* | ||
146 | * returns non-zero if opcode modifies the interrupt flag. | ||
147 | */ | ||
148 | static int __kprobes is_IF_modifier(kprobe_opcode_t opcode) | ||
149 | { | ||
150 | switch (opcode) { | ||
151 | case 0xfa: /* cli */ | ||
152 | case 0xfb: /* sti */ | ||
153 | case 0xcf: /* iret/iretd */ | ||
154 | case 0x9d: /* popf/popfd */ | ||
155 | return 1; | ||
156 | } | ||
157 | return 0; | ||
158 | } | ||
159 | |||
160 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | ||
161 | { | ||
162 | /* insn: must be on special executable page on i386. */ | ||
163 | p->ainsn.insn = get_insn_slot(); | ||
164 | if (!p->ainsn.insn) | ||
165 | return -ENOMEM; | ||
166 | |||
167 | memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | ||
168 | p->opcode = *p->addr; | ||
169 | if (can_boost(p->addr)) { | ||
170 | p->ainsn.boostable = 0; | ||
171 | } else { | ||
172 | p->ainsn.boostable = -1; | ||
173 | } | ||
174 | return 0; | ||
175 | } | ||
176 | |||
177 | void __kprobes arch_arm_kprobe(struct kprobe *p) | ||
178 | { | ||
179 | text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); | ||
180 | } | ||
181 | |||
182 | void __kprobes arch_disarm_kprobe(struct kprobe *p) | ||
183 | { | ||
184 | text_poke(p->addr, &p->opcode, 1); | ||
185 | } | ||
186 | |||
187 | void __kprobes arch_remove_kprobe(struct kprobe *p) | ||
188 | { | ||
189 | mutex_lock(&kprobe_mutex); | ||
190 | free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); | ||
191 | mutex_unlock(&kprobe_mutex); | ||
192 | } | ||
193 | |||
194 | static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) | ||
195 | { | ||
196 | kcb->prev_kprobe.kp = kprobe_running(); | ||
197 | kcb->prev_kprobe.status = kcb->kprobe_status; | ||
198 | kcb->prev_kprobe.old_eflags = kcb->kprobe_old_eflags; | ||
199 | kcb->prev_kprobe.saved_eflags = kcb->kprobe_saved_eflags; | ||
200 | } | ||
201 | |||
202 | static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) | ||
203 | { | ||
204 | __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; | ||
205 | kcb->kprobe_status = kcb->prev_kprobe.status; | ||
206 | kcb->kprobe_old_eflags = kcb->prev_kprobe.old_eflags; | ||
207 | kcb->kprobe_saved_eflags = kcb->prev_kprobe.saved_eflags; | ||
208 | } | ||
209 | |||
210 | static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, | ||
211 | struct kprobe_ctlblk *kcb) | ||
212 | { | ||
213 | __get_cpu_var(current_kprobe) = p; | ||
214 | kcb->kprobe_saved_eflags = kcb->kprobe_old_eflags | ||
215 | = (regs->eflags & (TF_MASK | IF_MASK)); | ||
216 | if (is_IF_modifier(p->opcode)) | ||
217 | kcb->kprobe_saved_eflags &= ~IF_MASK; | ||
218 | } | ||
219 | |||
220 | static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) | ||
221 | { | ||
222 | regs->eflags |= TF_MASK; | ||
223 | regs->eflags &= ~IF_MASK; | ||
224 | /*single step inline if the instruction is an int3*/ | ||
225 | if (p->opcode == BREAKPOINT_INSTRUCTION) | ||
226 | regs->eip = (unsigned long)p->addr; | ||
227 | else | ||
228 | regs->eip = (unsigned long)p->ainsn.insn; | ||
229 | } | ||
230 | |||
231 | /* Called with kretprobe_lock held */ | ||
232 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | ||
233 | struct pt_regs *regs) | ||
234 | { | ||
235 | unsigned long *sara = (unsigned long *)®s->esp; | ||
236 | |||
237 | ri->ret_addr = (kprobe_opcode_t *) *sara; | ||
238 | |||
239 | /* Replace the return addr with trampoline addr */ | ||
240 | *sara = (unsigned long) &kretprobe_trampoline; | ||
241 | } | ||
242 | |||
243 | /* | ||
244 | * Interrupts are disabled on entry as trap3 is an interrupt gate and they | ||
245 | * remain disabled thorough out this function. | ||
246 | */ | ||
247 | static int __kprobes kprobe_handler(struct pt_regs *regs) | ||
248 | { | ||
249 | struct kprobe *p; | ||
250 | int ret = 0; | ||
251 | kprobe_opcode_t *addr; | ||
252 | struct kprobe_ctlblk *kcb; | ||
253 | |||
254 | addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t)); | ||
255 | |||
256 | /* | ||
257 | * We don't want to be preempted for the entire | ||
258 | * duration of kprobe processing | ||
259 | */ | ||
260 | preempt_disable(); | ||
261 | kcb = get_kprobe_ctlblk(); | ||
262 | |||
263 | /* Check we're not actually recursing */ | ||
264 | if (kprobe_running()) { | ||
265 | p = get_kprobe(addr); | ||
266 | if (p) { | ||
267 | if (kcb->kprobe_status == KPROBE_HIT_SS && | ||
268 | *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { | ||
269 | regs->eflags &= ~TF_MASK; | ||
270 | regs->eflags |= kcb->kprobe_saved_eflags; | ||
271 | goto no_kprobe; | ||
272 | } | ||
273 | /* We have reentered the kprobe_handler(), since | ||
274 | * another probe was hit while within the handler. | ||
275 | * We here save the original kprobes variables and | ||
276 | * just single step on the instruction of the new probe | ||
277 | * without calling any user handlers. | ||
278 | */ | ||
279 | save_previous_kprobe(kcb); | ||
280 | set_current_kprobe(p, regs, kcb); | ||
281 | kprobes_inc_nmissed_count(p); | ||
282 | prepare_singlestep(p, regs); | ||
283 | kcb->kprobe_status = KPROBE_REENTER; | ||
284 | return 1; | ||
285 | } else { | ||
286 | if (*addr != BREAKPOINT_INSTRUCTION) { | ||
287 | /* The breakpoint instruction was removed by | ||
288 | * another cpu right after we hit, no further | ||
289 | * handling of this interrupt is appropriate | ||
290 | */ | ||
291 | regs->eip -= sizeof(kprobe_opcode_t); | ||
292 | ret = 1; | ||
293 | goto no_kprobe; | ||
294 | } | ||
295 | p = __get_cpu_var(current_kprobe); | ||
296 | if (p->break_handler && p->break_handler(p, regs)) { | ||
297 | goto ss_probe; | ||
298 | } | ||
299 | } | ||
300 | goto no_kprobe; | ||
301 | } | ||
302 | |||
303 | p = get_kprobe(addr); | ||
304 | if (!p) { | ||
305 | if (*addr != BREAKPOINT_INSTRUCTION) { | ||
306 | /* | ||
307 | * The breakpoint instruction was removed right | ||
308 | * after we hit it. Another cpu has removed | ||
309 | * either a probepoint or a debugger breakpoint | ||
310 | * at this address. In either case, no further | ||
311 | * handling of this interrupt is appropriate. | ||
312 | * Back up over the (now missing) int3 and run | ||
313 | * the original instruction. | ||
314 | */ | ||
315 | regs->eip -= sizeof(kprobe_opcode_t); | ||
316 | ret = 1; | ||
317 | } | ||
318 | /* Not one of ours: let kernel handle it */ | ||
319 | goto no_kprobe; | ||
320 | } | ||
321 | |||
322 | set_current_kprobe(p, regs, kcb); | ||
323 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | ||
324 | |||
325 | if (p->pre_handler && p->pre_handler(p, regs)) | ||
326 | /* handler has already set things up, so skip ss setup */ | ||
327 | return 1; | ||
328 | |||
329 | ss_probe: | ||
330 | #if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM) | ||
331 | if (p->ainsn.boostable == 1 && !p->post_handler){ | ||
332 | /* Boost up -- we can execute copied instructions directly */ | ||
333 | reset_current_kprobe(); | ||
334 | regs->eip = (unsigned long)p->ainsn.insn; | ||
335 | preempt_enable_no_resched(); | ||
336 | return 1; | ||
337 | } | ||
338 | #endif | ||
339 | prepare_singlestep(p, regs); | ||
340 | kcb->kprobe_status = KPROBE_HIT_SS; | ||
341 | return 1; | ||
342 | |||
343 | no_kprobe: | ||
344 | preempt_enable_no_resched(); | ||
345 | return ret; | ||
346 | } | ||
347 | |||
348 | /* | ||
349 | * For function-return probes, init_kprobes() establishes a probepoint | ||
350 | * here. When a retprobed function returns, this probe is hit and | ||
351 | * trampoline_probe_handler() runs, calling the kretprobe's handler. | ||
352 | */ | ||
353 | void __kprobes kretprobe_trampoline_holder(void) | ||
354 | { | ||
355 | asm volatile ( ".global kretprobe_trampoline\n" | ||
356 | "kretprobe_trampoline: \n" | ||
357 | " pushf\n" | ||
358 | /* skip cs, eip, orig_eax */ | ||
359 | " subl $12, %esp\n" | ||
360 | " pushl %fs\n" | ||
361 | " pushl %ds\n" | ||
362 | " pushl %es\n" | ||
363 | " pushl %eax\n" | ||
364 | " pushl %ebp\n" | ||
365 | " pushl %edi\n" | ||
366 | " pushl %esi\n" | ||
367 | " pushl %edx\n" | ||
368 | " pushl %ecx\n" | ||
369 | " pushl %ebx\n" | ||
370 | " movl %esp, %eax\n" | ||
371 | " call trampoline_handler\n" | ||
372 | /* move eflags to cs */ | ||
373 | " movl 52(%esp), %edx\n" | ||
374 | " movl %edx, 48(%esp)\n" | ||
375 | /* save true return address on eflags */ | ||
376 | " movl %eax, 52(%esp)\n" | ||
377 | " popl %ebx\n" | ||
378 | " popl %ecx\n" | ||
379 | " popl %edx\n" | ||
380 | " popl %esi\n" | ||
381 | " popl %edi\n" | ||
382 | " popl %ebp\n" | ||
383 | " popl %eax\n" | ||
384 | /* skip eip, orig_eax, es, ds, fs */ | ||
385 | " addl $20, %esp\n" | ||
386 | " popf\n" | ||
387 | " ret\n"); | ||
388 | } | ||
389 | |||
390 | /* | ||
391 | * Called from kretprobe_trampoline | ||
392 | */ | ||
393 | fastcall void *__kprobes trampoline_handler(struct pt_regs *regs) | ||
394 | { | ||
395 | struct kretprobe_instance *ri = NULL; | ||
396 | struct hlist_head *head, empty_rp; | ||
397 | struct hlist_node *node, *tmp; | ||
398 | unsigned long flags, orig_ret_address = 0; | ||
399 | unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; | ||
400 | |||
401 | INIT_HLIST_HEAD(&empty_rp); | ||
402 | spin_lock_irqsave(&kretprobe_lock, flags); | ||
403 | head = kretprobe_inst_table_head(current); | ||
404 | /* fixup registers */ | ||
405 | regs->xcs = __KERNEL_CS | get_kernel_rpl(); | ||
406 | regs->eip = trampoline_address; | ||
407 | regs->orig_eax = 0xffffffff; | ||
408 | |||
409 | /* | ||
410 | * It is possible to have multiple instances associated with a given | ||
411 | * task either because an multiple functions in the call path | ||
412 | * have a return probe installed on them, and/or more then one return | ||
413 | * return probe was registered for a target function. | ||
414 | * | ||
415 | * We can handle this because: | ||
416 | * - instances are always inserted at the head of the list | ||
417 | * - when multiple return probes are registered for the same | ||
418 | * function, the first instance's ret_addr will point to the | ||
419 | * real return address, and all the rest will point to | ||
420 | * kretprobe_trampoline | ||
421 | */ | ||
422 | hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { | ||
423 | if (ri->task != current) | ||
424 | /* another task is sharing our hash bucket */ | ||
425 | continue; | ||
426 | |||
427 | if (ri->rp && ri->rp->handler){ | ||
428 | __get_cpu_var(current_kprobe) = &ri->rp->kp; | ||
429 | get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; | ||
430 | ri->rp->handler(ri, regs); | ||
431 | __get_cpu_var(current_kprobe) = NULL; | ||
432 | } | ||
433 | |||
434 | orig_ret_address = (unsigned long)ri->ret_addr; | ||
435 | recycle_rp_inst(ri, &empty_rp); | ||
436 | |||
437 | if (orig_ret_address != trampoline_address) | ||
438 | /* | ||
439 | * This is the real return address. Any other | ||
440 | * instances associated with this task are for | ||
441 | * other calls deeper on the call stack | ||
442 | */ | ||
443 | break; | ||
444 | } | ||
445 | |||
446 | kretprobe_assert(ri, orig_ret_address, trampoline_address); | ||
447 | spin_unlock_irqrestore(&kretprobe_lock, flags); | ||
448 | |||
449 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { | ||
450 | hlist_del(&ri->hlist); | ||
451 | kfree(ri); | ||
452 | } | ||
453 | return (void*)orig_ret_address; | ||
454 | } | ||
455 | |||
456 | /* | ||
457 | * Called after single-stepping. p->addr is the address of the | ||
458 | * instruction whose first byte has been replaced by the "int 3" | ||
459 | * instruction. To avoid the SMP problems that can occur when we | ||
460 | * temporarily put back the original opcode to single-step, we | ||
461 | * single-stepped a copy of the instruction. The address of this | ||
462 | * copy is p->ainsn.insn. | ||
463 | * | ||
464 | * This function prepares to return from the post-single-step | ||
465 | * interrupt. We have to fix up the stack as follows: | ||
466 | * | ||
467 | * 0) Except in the case of absolute or indirect jump or call instructions, | ||
468 | * the new eip is relative to the copied instruction. We need to make | ||
469 | * it relative to the original instruction. | ||
470 | * | ||
471 | * 1) If the single-stepped instruction was pushfl, then the TF and IF | ||
472 | * flags are set in the just-pushed eflags, and may need to be cleared. | ||
473 | * | ||
474 | * 2) If the single-stepped instruction was a call, the return address | ||
475 | * that is atop the stack is the address following the copied instruction. | ||
476 | * We need to make it the address following the original instruction. | ||
477 | * | ||
478 | * This function also checks instruction size for preparing direct execution. | ||
479 | */ | ||
480 | static void __kprobes resume_execution(struct kprobe *p, | ||
481 | struct pt_regs *regs, struct kprobe_ctlblk *kcb) | ||
482 | { | ||
483 | unsigned long *tos = (unsigned long *)®s->esp; | ||
484 | unsigned long copy_eip = (unsigned long)p->ainsn.insn; | ||
485 | unsigned long orig_eip = (unsigned long)p->addr; | ||
486 | |||
487 | regs->eflags &= ~TF_MASK; | ||
488 | switch (p->ainsn.insn[0]) { | ||
489 | case 0x9c: /* pushfl */ | ||
490 | *tos &= ~(TF_MASK | IF_MASK); | ||
491 | *tos |= kcb->kprobe_old_eflags; | ||
492 | break; | ||
493 | case 0xc2: /* iret/ret/lret */ | ||
494 | case 0xc3: | ||
495 | case 0xca: | ||
496 | case 0xcb: | ||
497 | case 0xcf: | ||
498 | case 0xea: /* jmp absolute -- eip is correct */ | ||
499 | /* eip is already adjusted, no more changes required */ | ||
500 | p->ainsn.boostable = 1; | ||
501 | goto no_change; | ||
502 | case 0xe8: /* call relative - Fix return addr */ | ||
503 | *tos = orig_eip + (*tos - copy_eip); | ||
504 | break; | ||
505 | case 0x9a: /* call absolute -- same as call absolute, indirect */ | ||
506 | *tos = orig_eip + (*tos - copy_eip); | ||
507 | goto no_change; | ||
508 | case 0xff: | ||
509 | if ((p->ainsn.insn[1] & 0x30) == 0x10) { | ||
510 | /* | ||
511 | * call absolute, indirect | ||
512 | * Fix return addr; eip is correct. | ||
513 | * But this is not boostable | ||
514 | */ | ||
515 | *tos = orig_eip + (*tos - copy_eip); | ||
516 | goto no_change; | ||
517 | } else if (((p->ainsn.insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */ | ||
518 | ((p->ainsn.insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */ | ||
519 | /* eip is correct. And this is boostable */ | ||
520 | p->ainsn.boostable = 1; | ||
521 | goto no_change; | ||
522 | } | ||
523 | default: | ||
524 | break; | ||
525 | } | ||
526 | |||
527 | if (p->ainsn.boostable == 0) { | ||
528 | if ((regs->eip > copy_eip) && | ||
529 | (regs->eip - copy_eip) + 5 < MAX_INSN_SIZE) { | ||
530 | /* | ||
531 | * These instructions can be executed directly if it | ||
532 | * jumps back to correct address. | ||
533 | */ | ||
534 | set_jmp_op((void *)regs->eip, | ||
535 | (void *)orig_eip + (regs->eip - copy_eip)); | ||
536 | p->ainsn.boostable = 1; | ||
537 | } else { | ||
538 | p->ainsn.boostable = -1; | ||
539 | } | ||
540 | } | ||
541 | |||
542 | regs->eip = orig_eip + (regs->eip - copy_eip); | ||
543 | |||
544 | no_change: | ||
545 | return; | ||
546 | } | ||
547 | |||
548 | /* | ||
549 | * Interrupts are disabled on entry as trap1 is an interrupt gate and they | ||
550 | * remain disabled thoroughout this function. | ||
551 | */ | ||
552 | static int __kprobes post_kprobe_handler(struct pt_regs *regs) | ||
553 | { | ||
554 | struct kprobe *cur = kprobe_running(); | ||
555 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
556 | |||
557 | if (!cur) | ||
558 | return 0; | ||
559 | |||
560 | if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { | ||
561 | kcb->kprobe_status = KPROBE_HIT_SSDONE; | ||
562 | cur->post_handler(cur, regs, 0); | ||
563 | } | ||
564 | |||
565 | resume_execution(cur, regs, kcb); | ||
566 | regs->eflags |= kcb->kprobe_saved_eflags; | ||
567 | trace_hardirqs_fixup_flags(regs->eflags); | ||
568 | |||
569 | /*Restore back the original saved kprobes variables and continue. */ | ||
570 | if (kcb->kprobe_status == KPROBE_REENTER) { | ||
571 | restore_previous_kprobe(kcb); | ||
572 | goto out; | ||
573 | } | ||
574 | reset_current_kprobe(); | ||
575 | out: | ||
576 | preempt_enable_no_resched(); | ||
577 | |||
578 | /* | ||
579 | * if somebody else is singlestepping across a probe point, eflags | ||
580 | * will have TF set, in which case, continue the remaining processing | ||
581 | * of do_debug, as if this is not a probe hit. | ||
582 | */ | ||
583 | if (regs->eflags & TF_MASK) | ||
584 | return 0; | ||
585 | |||
586 | return 1; | ||
587 | } | ||
588 | |||
589 | int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) | ||
590 | { | ||
591 | struct kprobe *cur = kprobe_running(); | ||
592 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
593 | |||
594 | switch(kcb->kprobe_status) { | ||
595 | case KPROBE_HIT_SS: | ||
596 | case KPROBE_REENTER: | ||
597 | /* | ||
598 | * We are here because the instruction being single | ||
599 | * stepped caused a page fault. We reset the current | ||
600 | * kprobe and the eip points back to the probe address | ||
601 | * and allow the page fault handler to continue as a | ||
602 | * normal page fault. | ||
603 | */ | ||
604 | regs->eip = (unsigned long)cur->addr; | ||
605 | regs->eflags |= kcb->kprobe_old_eflags; | ||
606 | if (kcb->kprobe_status == KPROBE_REENTER) | ||
607 | restore_previous_kprobe(kcb); | ||
608 | else | ||
609 | reset_current_kprobe(); | ||
610 | preempt_enable_no_resched(); | ||
611 | break; | ||
612 | case KPROBE_HIT_ACTIVE: | ||
613 | case KPROBE_HIT_SSDONE: | ||
614 | /* | ||
615 | * We increment the nmissed count for accounting, | ||
616 | * we can also use npre/npostfault count for accouting | ||
617 | * these specific fault cases. | ||
618 | */ | ||
619 | kprobes_inc_nmissed_count(cur); | ||
620 | |||
621 | /* | ||
622 | * We come here because instructions in the pre/post | ||
623 | * handler caused the page_fault, this could happen | ||
624 | * if handler tries to access user space by | ||
625 | * copy_from_user(), get_user() etc. Let the | ||
626 | * user-specified handler try to fix it first. | ||
627 | */ | ||
628 | if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) | ||
629 | return 1; | ||
630 | |||
631 | /* | ||
632 | * In case the user-specified fault handler returned | ||
633 | * zero, try to fix up. | ||
634 | */ | ||
635 | if (fixup_exception(regs)) | ||
636 | return 1; | ||
637 | |||
638 | /* | ||
639 | * fixup_exception() could not handle it, | ||
640 | * Let do_page_fault() fix it. | ||
641 | */ | ||
642 | break; | ||
643 | default: | ||
644 | break; | ||
645 | } | ||
646 | return 0; | ||
647 | } | ||
648 | |||
649 | /* | ||
650 | * Wrapper routine to for handling exceptions. | ||
651 | */ | ||
652 | int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | ||
653 | unsigned long val, void *data) | ||
654 | { | ||
655 | struct die_args *args = (struct die_args *)data; | ||
656 | int ret = NOTIFY_DONE; | ||
657 | |||
658 | if (args->regs && user_mode_vm(args->regs)) | ||
659 | return ret; | ||
660 | |||
661 | switch (val) { | ||
662 | case DIE_INT3: | ||
663 | if (kprobe_handler(args->regs)) | ||
664 | ret = NOTIFY_STOP; | ||
665 | break; | ||
666 | case DIE_DEBUG: | ||
667 | if (post_kprobe_handler(args->regs)) | ||
668 | ret = NOTIFY_STOP; | ||
669 | break; | ||
670 | case DIE_GPF: | ||
671 | /* kprobe_running() needs smp_processor_id() */ | ||
672 | preempt_disable(); | ||
673 | if (kprobe_running() && | ||
674 | kprobe_fault_handler(args->regs, args->trapnr)) | ||
675 | ret = NOTIFY_STOP; | ||
676 | preempt_enable(); | ||
677 | break; | ||
678 | default: | ||
679 | break; | ||
680 | } | ||
681 | return ret; | ||
682 | } | ||
683 | |||
684 | int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) | ||
685 | { | ||
686 | struct jprobe *jp = container_of(p, struct jprobe, kp); | ||
687 | unsigned long addr; | ||
688 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
689 | |||
690 | kcb->jprobe_saved_regs = *regs; | ||
691 | kcb->jprobe_saved_esp = ®s->esp; | ||
692 | addr = (unsigned long)(kcb->jprobe_saved_esp); | ||
693 | |||
694 | /* | ||
695 | * TBD: As Linus pointed out, gcc assumes that the callee | ||
696 | * owns the argument space and could overwrite it, e.g. | ||
697 | * tailcall optimization. So, to be absolutely safe | ||
698 | * we also save and restore enough stack bytes to cover | ||
699 | * the argument area. | ||
700 | */ | ||
701 | memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, | ||
702 | MIN_STACK_SIZE(addr)); | ||
703 | regs->eflags &= ~IF_MASK; | ||
704 | trace_hardirqs_off(); | ||
705 | regs->eip = (unsigned long)(jp->entry); | ||
706 | return 1; | ||
707 | } | ||
708 | |||
709 | void __kprobes jprobe_return(void) | ||
710 | { | ||
711 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
712 | |||
713 | asm volatile (" xchgl %%ebx,%%esp \n" | ||
714 | " int3 \n" | ||
715 | " .globl jprobe_return_end \n" | ||
716 | " jprobe_return_end: \n" | ||
717 | " nop \n"::"b" | ||
718 | (kcb->jprobe_saved_esp):"memory"); | ||
719 | } | ||
720 | |||
721 | int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | ||
722 | { | ||
723 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
724 | u8 *addr = (u8 *) (regs->eip - 1); | ||
725 | unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_esp); | ||
726 | struct jprobe *jp = container_of(p, struct jprobe, kp); | ||
727 | |||
728 | if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { | ||
729 | if (®s->esp != kcb->jprobe_saved_esp) { | ||
730 | struct pt_regs *saved_regs = &kcb->jprobe_saved_regs; | ||
731 | printk("current esp %p does not match saved esp %p\n", | ||
732 | ®s->esp, kcb->jprobe_saved_esp); | ||
733 | printk("Saved registers for jprobe %p\n", jp); | ||
734 | show_registers(saved_regs); | ||
735 | printk("Current registers\n"); | ||
736 | show_registers(regs); | ||
737 | BUG(); | ||
738 | } | ||
739 | *regs = kcb->jprobe_saved_regs; | ||
740 | memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, | ||
741 | MIN_STACK_SIZE(stack_addr)); | ||
742 | preempt_enable_no_resched(); | ||
743 | return 1; | ||
744 | } | ||
745 | return 0; | ||
746 | } | ||
747 | |||
748 | int __kprobes arch_trampoline_kprobe(struct kprobe *p) | ||
749 | { | ||
750 | return 0; | ||
751 | } | ||
752 | |||
753 | int __init arch_init_kprobes(void) | ||
754 | { | ||
755 | return 0; | ||
756 | } | ||
diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes_64.c deleted file mode 100644 index 5df19a9f9239..000000000000 --- a/arch/x86/kernel/kprobes_64.c +++ /dev/null | |||
@@ -1,749 +0,0 @@ | |||
1 | /* | ||
2 | * Kernel Probes (KProbes) | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2002, 2004 | ||
19 | * | ||
20 | * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel | ||
21 | * Probes initial implementation ( includes contributions from | ||
22 | * Rusty Russell). | ||
23 | * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes | ||
24 | * interface to access function arguments. | ||
25 | * 2004-Oct Jim Keniston <kenistoj@us.ibm.com> and Prasanna S Panchamukhi | ||
26 | * <prasanna@in.ibm.com> adapted for x86_64 | ||
27 | * 2005-Mar Roland McGrath <roland@redhat.com> | ||
28 | * Fixed to handle %rip-relative addressing mode correctly. | ||
29 | * 2005-May Rusty Lynch <rusty.lynch@intel.com> | ||
30 | * Added function return probes functionality | ||
31 | */ | ||
32 | |||
33 | #include <linux/kprobes.h> | ||
34 | #include <linux/ptrace.h> | ||
35 | #include <linux/string.h> | ||
36 | #include <linux/slab.h> | ||
37 | #include <linux/preempt.h> | ||
38 | #include <linux/module.h> | ||
39 | #include <linux/kdebug.h> | ||
40 | |||
41 | #include <asm/pgtable.h> | ||
42 | #include <asm/uaccess.h> | ||
43 | #include <asm/alternative.h> | ||
44 | |||
45 | void jprobe_return_end(void); | ||
46 | static void __kprobes arch_copy_kprobe(struct kprobe *p); | ||
47 | |||
48 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; | ||
49 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); | ||
50 | |||
51 | struct kretprobe_blackpoint kretprobe_blacklist[] = { | ||
52 | {"__switch_to", }, /* This function switches only current task, but | ||
53 | doesn't switch kernel stack.*/ | ||
54 | {NULL, NULL} /* Terminator */ | ||
55 | }; | ||
56 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); | ||
57 | |||
58 | /* | ||
59 | * returns non-zero if opcode modifies the interrupt flag. | ||
60 | */ | ||
61 | static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | ||
62 | { | ||
63 | switch (*insn) { | ||
64 | case 0xfa: /* cli */ | ||
65 | case 0xfb: /* sti */ | ||
66 | case 0xcf: /* iret/iretd */ | ||
67 | case 0x9d: /* popf/popfd */ | ||
68 | return 1; | ||
69 | } | ||
70 | |||
71 | if (*insn >= 0x40 && *insn <= 0x4f && *++insn == 0xcf) | ||
72 | return 1; | ||
73 | return 0; | ||
74 | } | ||
75 | |||
76 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | ||
77 | { | ||
78 | /* insn: must be on special executable page on x86_64. */ | ||
79 | p->ainsn.insn = get_insn_slot(); | ||
80 | if (!p->ainsn.insn) { | ||
81 | return -ENOMEM; | ||
82 | } | ||
83 | arch_copy_kprobe(p); | ||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | /* | ||
88 | * Determine if the instruction uses the %rip-relative addressing mode. | ||
89 | * If it does, return the address of the 32-bit displacement word. | ||
90 | * If not, return null. | ||
91 | */ | ||
92 | static s32 __kprobes *is_riprel(u8 *insn) | ||
93 | { | ||
94 | #define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \ | ||
95 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ | ||
96 | (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ | ||
97 | (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ | ||
98 | (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ | ||
99 | << (row % 64)) | ||
100 | static const u64 onebyte_has_modrm[256 / 64] = { | ||
101 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
102 | /* ------------------------------- */ | ||
103 | W(0x00, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 00 */ | ||
104 | W(0x10, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 10 */ | ||
105 | W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 20 */ | ||
106 | W(0x30, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0), /* 30 */ | ||
107 | W(0x40, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 40 */ | ||
108 | W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 50 */ | ||
109 | W(0x60, 0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0)| /* 60 */ | ||
110 | W(0x70, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 70 */ | ||
111 | W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ | ||
112 | W(0x90, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 90 */ | ||
113 | W(0xa0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* a0 */ | ||
114 | W(0xb0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* b0 */ | ||
115 | W(0xc0, 1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0)| /* c0 */ | ||
116 | W(0xd0, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)| /* d0 */ | ||
117 | W(0xe0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* e0 */ | ||
118 | W(0xf0, 0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1) /* f0 */ | ||
119 | /* ------------------------------- */ | ||
120 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
121 | }; | ||
122 | static const u64 twobyte_has_modrm[256 / 64] = { | ||
123 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
124 | /* ------------------------------- */ | ||
125 | W(0x00, 1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,1)| /* 0f */ | ||
126 | W(0x10, 1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0)| /* 1f */ | ||
127 | W(0x20, 1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1)| /* 2f */ | ||
128 | W(0x30, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 3f */ | ||
129 | W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 4f */ | ||
130 | W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 5f */ | ||
131 | W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 6f */ | ||
132 | W(0x70, 1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1), /* 7f */ | ||
133 | W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 8f */ | ||
134 | W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 9f */ | ||
135 | W(0xa0, 0,0,0,1,1,1,1,1,0,0,0,1,1,1,1,1)| /* af */ | ||
136 | W(0xb0, 1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1), /* bf */ | ||
137 | W(0xc0, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0)| /* cf */ | ||
138 | W(0xd0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* df */ | ||
139 | W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* ef */ | ||
140 | W(0xf0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0) /* ff */ | ||
141 | /* ------------------------------- */ | ||
142 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
143 | }; | ||
144 | #undef W | ||
145 | int need_modrm; | ||
146 | |||
147 | /* Skip legacy instruction prefixes. */ | ||
148 | while (1) { | ||
149 | switch (*insn) { | ||
150 | case 0x66: | ||
151 | case 0x67: | ||
152 | case 0x2e: | ||
153 | case 0x3e: | ||
154 | case 0x26: | ||
155 | case 0x64: | ||
156 | case 0x65: | ||
157 | case 0x36: | ||
158 | case 0xf0: | ||
159 | case 0xf3: | ||
160 | case 0xf2: | ||
161 | ++insn; | ||
162 | continue; | ||
163 | } | ||
164 | break; | ||
165 | } | ||
166 | |||
167 | /* Skip REX instruction prefix. */ | ||
168 | if ((*insn & 0xf0) == 0x40) | ||
169 | ++insn; | ||
170 | |||
171 | if (*insn == 0x0f) { /* Two-byte opcode. */ | ||
172 | ++insn; | ||
173 | need_modrm = test_bit(*insn, twobyte_has_modrm); | ||
174 | } else { /* One-byte opcode. */ | ||
175 | need_modrm = test_bit(*insn, onebyte_has_modrm); | ||
176 | } | ||
177 | |||
178 | if (need_modrm) { | ||
179 | u8 modrm = *++insn; | ||
180 | if ((modrm & 0xc7) == 0x05) { /* %rip+disp32 addressing mode */ | ||
181 | /* Displacement follows ModRM byte. */ | ||
182 | return (s32 *) ++insn; | ||
183 | } | ||
184 | } | ||
185 | |||
186 | /* No %rip-relative addressing mode here. */ | ||
187 | return NULL; | ||
188 | } | ||
189 | |||
190 | static void __kprobes arch_copy_kprobe(struct kprobe *p) | ||
191 | { | ||
192 | s32 *ripdisp; | ||
193 | memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE); | ||
194 | ripdisp = is_riprel(p->ainsn.insn); | ||
195 | if (ripdisp) { | ||
196 | /* | ||
197 | * The copied instruction uses the %rip-relative | ||
198 | * addressing mode. Adjust the displacement for the | ||
199 | * difference between the original location of this | ||
200 | * instruction and the location of the copy that will | ||
201 | * actually be run. The tricky bit here is making sure | ||
202 | * that the sign extension happens correctly in this | ||
203 | * calculation, since we need a signed 32-bit result to | ||
204 | * be sign-extended to 64 bits when it's added to the | ||
205 | * %rip value and yield the same 64-bit result that the | ||
206 | * sign-extension of the original signed 32-bit | ||
207 | * displacement would have given. | ||
208 | */ | ||
209 | s64 disp = (u8 *) p->addr + *ripdisp - (u8 *) p->ainsn.insn; | ||
210 | BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ | ||
211 | *ripdisp = disp; | ||
212 | } | ||
213 | p->opcode = *p->addr; | ||
214 | } | ||
215 | |||
216 | void __kprobes arch_arm_kprobe(struct kprobe *p) | ||
217 | { | ||
218 | text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); | ||
219 | } | ||
220 | |||
221 | void __kprobes arch_disarm_kprobe(struct kprobe *p) | ||
222 | { | ||
223 | text_poke(p->addr, &p->opcode, 1); | ||
224 | } | ||
225 | |||
226 | void __kprobes arch_remove_kprobe(struct kprobe *p) | ||
227 | { | ||
228 | mutex_lock(&kprobe_mutex); | ||
229 | free_insn_slot(p->ainsn.insn, 0); | ||
230 | mutex_unlock(&kprobe_mutex); | ||
231 | } | ||
232 | |||
233 | static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) | ||
234 | { | ||
235 | kcb->prev_kprobe.kp = kprobe_running(); | ||
236 | kcb->prev_kprobe.status = kcb->kprobe_status; | ||
237 | kcb->prev_kprobe.old_rflags = kcb->kprobe_old_rflags; | ||
238 | kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags; | ||
239 | } | ||
240 | |||
241 | static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) | ||
242 | { | ||
243 | __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; | ||
244 | kcb->kprobe_status = kcb->prev_kprobe.status; | ||
245 | kcb->kprobe_old_rflags = kcb->prev_kprobe.old_rflags; | ||
246 | kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags; | ||
247 | } | ||
248 | |||
249 | static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, | ||
250 | struct kprobe_ctlblk *kcb) | ||
251 | { | ||
252 | __get_cpu_var(current_kprobe) = p; | ||
253 | kcb->kprobe_saved_rflags = kcb->kprobe_old_rflags | ||
254 | = (regs->eflags & (TF_MASK | IF_MASK)); | ||
255 | if (is_IF_modifier(p->ainsn.insn)) | ||
256 | kcb->kprobe_saved_rflags &= ~IF_MASK; | ||
257 | } | ||
258 | |||
259 | static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) | ||
260 | { | ||
261 | regs->eflags |= TF_MASK; | ||
262 | regs->eflags &= ~IF_MASK; | ||
263 | /*single step inline if the instruction is an int3*/ | ||
264 | if (p->opcode == BREAKPOINT_INSTRUCTION) | ||
265 | regs->rip = (unsigned long)p->addr; | ||
266 | else | ||
267 | regs->rip = (unsigned long)p->ainsn.insn; | ||
268 | } | ||
269 | |||
270 | /* Called with kretprobe_lock held */ | ||
271 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | ||
272 | struct pt_regs *regs) | ||
273 | { | ||
274 | unsigned long *sara = (unsigned long *)regs->rsp; | ||
275 | |||
276 | ri->ret_addr = (kprobe_opcode_t *) *sara; | ||
277 | /* Replace the return addr with trampoline addr */ | ||
278 | *sara = (unsigned long) &kretprobe_trampoline; | ||
279 | } | ||
280 | |||
281 | int __kprobes kprobe_handler(struct pt_regs *regs) | ||
282 | { | ||
283 | struct kprobe *p; | ||
284 | int ret = 0; | ||
285 | kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->rip - sizeof(kprobe_opcode_t)); | ||
286 | struct kprobe_ctlblk *kcb; | ||
287 | |||
288 | /* | ||
289 | * We don't want to be preempted for the entire | ||
290 | * duration of kprobe processing | ||
291 | */ | ||
292 | preempt_disable(); | ||
293 | kcb = get_kprobe_ctlblk(); | ||
294 | |||
295 | /* Check we're not actually recursing */ | ||
296 | if (kprobe_running()) { | ||
297 | p = get_kprobe(addr); | ||
298 | if (p) { | ||
299 | if (kcb->kprobe_status == KPROBE_HIT_SS && | ||
300 | *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { | ||
301 | regs->eflags &= ~TF_MASK; | ||
302 | regs->eflags |= kcb->kprobe_saved_rflags; | ||
303 | goto no_kprobe; | ||
304 | } else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) { | ||
305 | /* TODO: Provide re-entrancy from | ||
306 | * post_kprobes_handler() and avoid exception | ||
307 | * stack corruption while single-stepping on | ||
308 | * the instruction of the new probe. | ||
309 | */ | ||
310 | arch_disarm_kprobe(p); | ||
311 | regs->rip = (unsigned long)p->addr; | ||
312 | reset_current_kprobe(); | ||
313 | ret = 1; | ||
314 | } else { | ||
315 | /* We have reentered the kprobe_handler(), since | ||
316 | * another probe was hit while within the | ||
317 | * handler. We here save the original kprobe | ||
318 | * variables and just single step on instruction | ||
319 | * of the new probe without calling any user | ||
320 | * handlers. | ||
321 | */ | ||
322 | save_previous_kprobe(kcb); | ||
323 | set_current_kprobe(p, regs, kcb); | ||
324 | kprobes_inc_nmissed_count(p); | ||
325 | prepare_singlestep(p, regs); | ||
326 | kcb->kprobe_status = KPROBE_REENTER; | ||
327 | return 1; | ||
328 | } | ||
329 | } else { | ||
330 | if (*addr != BREAKPOINT_INSTRUCTION) { | ||
331 | /* The breakpoint instruction was removed by | ||
332 | * another cpu right after we hit, no further | ||
333 | * handling of this interrupt is appropriate | ||
334 | */ | ||
335 | regs->rip = (unsigned long)addr; | ||
336 | ret = 1; | ||
337 | goto no_kprobe; | ||
338 | } | ||
339 | p = __get_cpu_var(current_kprobe); | ||
340 | if (p->break_handler && p->break_handler(p, regs)) { | ||
341 | goto ss_probe; | ||
342 | } | ||
343 | } | ||
344 | goto no_kprobe; | ||
345 | } | ||
346 | |||
347 | p = get_kprobe(addr); | ||
348 | if (!p) { | ||
349 | if (*addr != BREAKPOINT_INSTRUCTION) { | ||
350 | /* | ||
351 | * The breakpoint instruction was removed right | ||
352 | * after we hit it. Another cpu has removed | ||
353 | * either a probepoint or a debugger breakpoint | ||
354 | * at this address. In either case, no further | ||
355 | * handling of this interrupt is appropriate. | ||
356 | * Back up over the (now missing) int3 and run | ||
357 | * the original instruction. | ||
358 | */ | ||
359 | regs->rip = (unsigned long)addr; | ||
360 | ret = 1; | ||
361 | } | ||
362 | /* Not one of ours: let kernel handle it */ | ||
363 | goto no_kprobe; | ||
364 | } | ||
365 | |||
366 | set_current_kprobe(p, regs, kcb); | ||
367 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | ||
368 | |||
369 | if (p->pre_handler && p->pre_handler(p, regs)) | ||
370 | /* handler has already set things up, so skip ss setup */ | ||
371 | return 1; | ||
372 | |||
373 | ss_probe: | ||
374 | prepare_singlestep(p, regs); | ||
375 | kcb->kprobe_status = KPROBE_HIT_SS; | ||
376 | return 1; | ||
377 | |||
378 | no_kprobe: | ||
379 | preempt_enable_no_resched(); | ||
380 | return ret; | ||
381 | } | ||
382 | |||
383 | /* | ||
384 | * For function-return probes, init_kprobes() establishes a probepoint | ||
385 | * here. When a retprobed function returns, this probe is hit and | ||
386 | * trampoline_probe_handler() runs, calling the kretprobe's handler. | ||
387 | */ | ||
388 | void kretprobe_trampoline_holder(void) | ||
389 | { | ||
390 | asm volatile ( ".global kretprobe_trampoline\n" | ||
391 | "kretprobe_trampoline: \n" | ||
392 | "nop\n"); | ||
393 | } | ||
394 | |||
395 | /* | ||
396 | * Called when we hit the probe point at kretprobe_trampoline | ||
397 | */ | ||
398 | int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) | ||
399 | { | ||
400 | struct kretprobe_instance *ri = NULL; | ||
401 | struct hlist_head *head, empty_rp; | ||
402 | struct hlist_node *node, *tmp; | ||
403 | unsigned long flags, orig_ret_address = 0; | ||
404 | unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; | ||
405 | |||
406 | INIT_HLIST_HEAD(&empty_rp); | ||
407 | spin_lock_irqsave(&kretprobe_lock, flags); | ||
408 | head = kretprobe_inst_table_head(current); | ||
409 | |||
410 | /* | ||
411 | * It is possible to have multiple instances associated with a given | ||
412 | * task either because an multiple functions in the call path | ||
413 | * have a return probe installed on them, and/or more then one return | ||
414 | * return probe was registered for a target function. | ||
415 | * | ||
416 | * We can handle this because: | ||
417 | * - instances are always inserted at the head of the list | ||
418 | * - when multiple return probes are registered for the same | ||
419 | * function, the first instance's ret_addr will point to the | ||
420 | * real return address, and all the rest will point to | ||
421 | * kretprobe_trampoline | ||
422 | */ | ||
423 | hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { | ||
424 | if (ri->task != current) | ||
425 | /* another task is sharing our hash bucket */ | ||
426 | continue; | ||
427 | |||
428 | if (ri->rp && ri->rp->handler) | ||
429 | ri->rp->handler(ri, regs); | ||
430 | |||
431 | orig_ret_address = (unsigned long)ri->ret_addr; | ||
432 | recycle_rp_inst(ri, &empty_rp); | ||
433 | |||
434 | if (orig_ret_address != trampoline_address) | ||
435 | /* | ||
436 | * This is the real return address. Any other | ||
437 | * instances associated with this task are for | ||
438 | * other calls deeper on the call stack | ||
439 | */ | ||
440 | break; | ||
441 | } | ||
442 | |||
443 | kretprobe_assert(ri, orig_ret_address, trampoline_address); | ||
444 | regs->rip = orig_ret_address; | ||
445 | |||
446 | reset_current_kprobe(); | ||
447 | spin_unlock_irqrestore(&kretprobe_lock, flags); | ||
448 | preempt_enable_no_resched(); | ||
449 | |||
450 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { | ||
451 | hlist_del(&ri->hlist); | ||
452 | kfree(ri); | ||
453 | } | ||
454 | /* | ||
455 | * By returning a non-zero value, we are telling | ||
456 | * kprobe_handler() that we don't want the post_handler | ||
457 | * to run (and have re-enabled preemption) | ||
458 | */ | ||
459 | return 1; | ||
460 | } | ||
461 | |||
462 | /* | ||
463 | * Called after single-stepping. p->addr is the address of the | ||
464 | * instruction whose first byte has been replaced by the "int 3" | ||
465 | * instruction. To avoid the SMP problems that can occur when we | ||
466 | * temporarily put back the original opcode to single-step, we | ||
467 | * single-stepped a copy of the instruction. The address of this | ||
468 | * copy is p->ainsn.insn. | ||
469 | * | ||
470 | * This function prepares to return from the post-single-step | ||
471 | * interrupt. We have to fix up the stack as follows: | ||
472 | * | ||
473 | * 0) Except in the case of absolute or indirect jump or call instructions, | ||
474 | * the new rip is relative to the copied instruction. We need to make | ||
475 | * it relative to the original instruction. | ||
476 | * | ||
477 | * 1) If the single-stepped instruction was pushfl, then the TF and IF | ||
478 | * flags are set in the just-pushed eflags, and may need to be cleared. | ||
479 | * | ||
480 | * 2) If the single-stepped instruction was a call, the return address | ||
481 | * that is atop the stack is the address following the copied instruction. | ||
482 | * We need to make it the address following the original instruction. | ||
483 | */ | ||
484 | static void __kprobes resume_execution(struct kprobe *p, | ||
485 | struct pt_regs *regs, struct kprobe_ctlblk *kcb) | ||
486 | { | ||
487 | unsigned long *tos = (unsigned long *)regs->rsp; | ||
488 | unsigned long copy_rip = (unsigned long)p->ainsn.insn; | ||
489 | unsigned long orig_rip = (unsigned long)p->addr; | ||
490 | kprobe_opcode_t *insn = p->ainsn.insn; | ||
491 | |||
492 | /*skip the REX prefix*/ | ||
493 | if (*insn >= 0x40 && *insn <= 0x4f) | ||
494 | insn++; | ||
495 | |||
496 | regs->eflags &= ~TF_MASK; | ||
497 | switch (*insn) { | ||
498 | case 0x9c: /* pushfl */ | ||
499 | *tos &= ~(TF_MASK | IF_MASK); | ||
500 | *tos |= kcb->kprobe_old_rflags; | ||
501 | break; | ||
502 | case 0xc2: /* iret/ret/lret */ | ||
503 | case 0xc3: | ||
504 | case 0xca: | ||
505 | case 0xcb: | ||
506 | case 0xcf: | ||
507 | case 0xea: /* jmp absolute -- ip is correct */ | ||
508 | /* ip is already adjusted, no more changes required */ | ||
509 | goto no_change; | ||
510 | case 0xe8: /* call relative - Fix return addr */ | ||
511 | *tos = orig_rip + (*tos - copy_rip); | ||
512 | break; | ||
513 | case 0xff: | ||
514 | if ((insn[1] & 0x30) == 0x10) { | ||
515 | /* call absolute, indirect */ | ||
516 | /* Fix return addr; ip is correct. */ | ||
517 | *tos = orig_rip + (*tos - copy_rip); | ||
518 | goto no_change; | ||
519 | } else if (((insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */ | ||
520 | ((insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */ | ||
521 | /* ip is correct. */ | ||
522 | goto no_change; | ||
523 | } | ||
524 | default: | ||
525 | break; | ||
526 | } | ||
527 | |||
528 | regs->rip = orig_rip + (regs->rip - copy_rip); | ||
529 | no_change: | ||
530 | |||
531 | return; | ||
532 | } | ||
533 | |||
534 | int __kprobes post_kprobe_handler(struct pt_regs *regs) | ||
535 | { | ||
536 | struct kprobe *cur = kprobe_running(); | ||
537 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
538 | |||
539 | if (!cur) | ||
540 | return 0; | ||
541 | |||
542 | if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { | ||
543 | kcb->kprobe_status = KPROBE_HIT_SSDONE; | ||
544 | cur->post_handler(cur, regs, 0); | ||
545 | } | ||
546 | |||
547 | resume_execution(cur, regs, kcb); | ||
548 | regs->eflags |= kcb->kprobe_saved_rflags; | ||
549 | trace_hardirqs_fixup_flags(regs->eflags); | ||
550 | |||
551 | /* Restore the original saved kprobes variables and continue. */ | ||
552 | if (kcb->kprobe_status == KPROBE_REENTER) { | ||
553 | restore_previous_kprobe(kcb); | ||
554 | goto out; | ||
555 | } | ||
556 | reset_current_kprobe(); | ||
557 | out: | ||
558 | preempt_enable_no_resched(); | ||
559 | |||
560 | /* | ||
561 | * if somebody else is singlestepping across a probe point, eflags | ||
562 | * will have TF set, in which case, continue the remaining processing | ||
563 | * of do_debug, as if this is not a probe hit. | ||
564 | */ | ||
565 | if (regs->eflags & TF_MASK) | ||
566 | return 0; | ||
567 | |||
568 | return 1; | ||
569 | } | ||
570 | |||
571 | int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) | ||
572 | { | ||
573 | struct kprobe *cur = kprobe_running(); | ||
574 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
575 | const struct exception_table_entry *fixup; | ||
576 | |||
577 | switch(kcb->kprobe_status) { | ||
578 | case KPROBE_HIT_SS: | ||
579 | case KPROBE_REENTER: | ||
580 | /* | ||
581 | * We are here because the instruction being single | ||
582 | * stepped caused a page fault. We reset the current | ||
583 | * kprobe and the rip points back to the probe address | ||
584 | * and allow the page fault handler to continue as a | ||
585 | * normal page fault. | ||
586 | */ | ||
587 | regs->rip = (unsigned long)cur->addr; | ||
588 | regs->eflags |= kcb->kprobe_old_rflags; | ||
589 | if (kcb->kprobe_status == KPROBE_REENTER) | ||
590 | restore_previous_kprobe(kcb); | ||
591 | else | ||
592 | reset_current_kprobe(); | ||
593 | preempt_enable_no_resched(); | ||
594 | break; | ||
595 | case KPROBE_HIT_ACTIVE: | ||
596 | case KPROBE_HIT_SSDONE: | ||
597 | /* | ||
598 | * We increment the nmissed count for accounting, | ||
599 | * we can also use npre/npostfault count for accouting | ||
600 | * these specific fault cases. | ||
601 | */ | ||
602 | kprobes_inc_nmissed_count(cur); | ||
603 | |||
604 | /* | ||
605 | * We come here because instructions in the pre/post | ||
606 | * handler caused the page_fault, this could happen | ||
607 | * if handler tries to access user space by | ||
608 | * copy_from_user(), get_user() etc. Let the | ||
609 | * user-specified handler try to fix it first. | ||
610 | */ | ||
611 | if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) | ||
612 | return 1; | ||
613 | |||
614 | /* | ||
615 | * In case the user-specified fault handler returned | ||
616 | * zero, try to fix up. | ||
617 | */ | ||
618 | fixup = search_exception_tables(regs->rip); | ||
619 | if (fixup) { | ||
620 | regs->rip = fixup->fixup; | ||
621 | return 1; | ||
622 | } | ||
623 | |||
624 | /* | ||
625 | * fixup() could not handle it, | ||
626 | * Let do_page_fault() fix it. | ||
627 | */ | ||
628 | break; | ||
629 | default: | ||
630 | break; | ||
631 | } | ||
632 | return 0; | ||
633 | } | ||
634 | |||
635 | /* | ||
636 | * Wrapper routine for handling exceptions. | ||
637 | */ | ||
638 | int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | ||
639 | unsigned long val, void *data) | ||
640 | { | ||
641 | struct die_args *args = (struct die_args *)data; | ||
642 | int ret = NOTIFY_DONE; | ||
643 | |||
644 | if (args->regs && user_mode(args->regs)) | ||
645 | return ret; | ||
646 | |||
647 | switch (val) { | ||
648 | case DIE_INT3: | ||
649 | if (kprobe_handler(args->regs)) | ||
650 | ret = NOTIFY_STOP; | ||
651 | break; | ||
652 | case DIE_DEBUG: | ||
653 | if (post_kprobe_handler(args->regs)) | ||
654 | ret = NOTIFY_STOP; | ||
655 | break; | ||
656 | case DIE_GPF: | ||
657 | /* kprobe_running() needs smp_processor_id() */ | ||
658 | preempt_disable(); | ||
659 | if (kprobe_running() && | ||
660 | kprobe_fault_handler(args->regs, args->trapnr)) | ||
661 | ret = NOTIFY_STOP; | ||
662 | preempt_enable(); | ||
663 | break; | ||
664 | default: | ||
665 | break; | ||
666 | } | ||
667 | return ret; | ||
668 | } | ||
669 | |||
670 | int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) | ||
671 | { | ||
672 | struct jprobe *jp = container_of(p, struct jprobe, kp); | ||
673 | unsigned long addr; | ||
674 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
675 | |||
676 | kcb->jprobe_saved_regs = *regs; | ||
677 | kcb->jprobe_saved_rsp = (long *) regs->rsp; | ||
678 | addr = (unsigned long)(kcb->jprobe_saved_rsp); | ||
679 | /* | ||
680 | * As Linus pointed out, gcc assumes that the callee | ||
681 | * owns the argument space and could overwrite it, e.g. | ||
682 | * tailcall optimization. So, to be absolutely safe | ||
683 | * we also save and restore enough stack bytes to cover | ||
684 | * the argument area. | ||
685 | */ | ||
686 | memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, | ||
687 | MIN_STACK_SIZE(addr)); | ||
688 | regs->eflags &= ~IF_MASK; | ||
689 | trace_hardirqs_off(); | ||
690 | regs->rip = (unsigned long)(jp->entry); | ||
691 | return 1; | ||
692 | } | ||
693 | |||
694 | void __kprobes jprobe_return(void) | ||
695 | { | ||
696 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
697 | |||
698 | asm volatile (" xchg %%rbx,%%rsp \n" | ||
699 | " int3 \n" | ||
700 | " .globl jprobe_return_end \n" | ||
701 | " jprobe_return_end: \n" | ||
702 | " nop \n"::"b" | ||
703 | (kcb->jprobe_saved_rsp):"memory"); | ||
704 | } | ||
705 | |||
706 | int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | ||
707 | { | ||
708 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
709 | u8 *addr = (u8 *) (regs->rip - 1); | ||
710 | unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_rsp); | ||
711 | struct jprobe *jp = container_of(p, struct jprobe, kp); | ||
712 | |||
713 | if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { | ||
714 | if ((unsigned long *)regs->rsp != kcb->jprobe_saved_rsp) { | ||
715 | struct pt_regs *saved_regs = &kcb->jprobe_saved_regs; | ||
716 | printk("current rsp %p does not match saved rsp %p\n", | ||
717 | (long *)regs->rsp, kcb->jprobe_saved_rsp); | ||
718 | printk("Saved registers for jprobe %p\n", jp); | ||
719 | show_registers(saved_regs); | ||
720 | printk("Current registers\n"); | ||
721 | show_registers(regs); | ||
722 | BUG(); | ||
723 | } | ||
724 | *regs = kcb->jprobe_saved_regs; | ||
725 | memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, | ||
726 | MIN_STACK_SIZE(stack_addr)); | ||
727 | preempt_enable_no_resched(); | ||
728 | return 1; | ||
729 | } | ||
730 | return 0; | ||
731 | } | ||
732 | |||
733 | static struct kprobe trampoline_p = { | ||
734 | .addr = (kprobe_opcode_t *) &kretprobe_trampoline, | ||
735 | .pre_handler = trampoline_probe_handler | ||
736 | }; | ||
737 | |||
738 | int __init arch_init_kprobes(void) | ||
739 | { | ||
740 | return register_kprobe(&trampoline_p); | ||
741 | } | ||
742 | |||
743 | int __kprobes arch_trampoline_kprobe(struct kprobe *p) | ||
744 | { | ||
745 | if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) | ||
746 | return 1; | ||
747 | |||
748 | return 0; | ||
749 | } | ||
diff --git a/arch/x86/kernel/ldt_32.c b/arch/x86/kernel/ldt.c index 9ff90a27c45f..0224c3637c73 100644 --- a/arch/x86/kernel/ldt_32.c +++ b/arch/x86/kernel/ldt.c | |||
@@ -1,6 +1,9 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds | 2 | * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds |
3 | * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> | 3 | * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> |
4 | * Copyright (C) 2002 Andi Kleen | ||
5 | * | ||
6 | * This handles calls from both 32bit and 64bit mode. | ||
4 | */ | 7 | */ |
5 | 8 | ||
6 | #include <linux/errno.h> | 9 | #include <linux/errno.h> |
@@ -9,7 +12,6 @@ | |||
9 | #include <linux/mm.h> | 12 | #include <linux/mm.h> |
10 | #include <linux/smp.h> | 13 | #include <linux/smp.h> |
11 | #include <linux/vmalloc.h> | 14 | #include <linux/vmalloc.h> |
12 | #include <linux/slab.h> | ||
13 | 15 | ||
14 | #include <asm/uaccess.h> | 16 | #include <asm/uaccess.h> |
15 | #include <asm/system.h> | 17 | #include <asm/system.h> |
@@ -17,7 +19,7 @@ | |||
17 | #include <asm/desc.h> | 19 | #include <asm/desc.h> |
18 | #include <asm/mmu_context.h> | 20 | #include <asm/mmu_context.h> |
19 | 21 | ||
20 | #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ | 22 | #ifdef CONFIG_SMP |
21 | static void flush_ldt(void *null) | 23 | static void flush_ldt(void *null) |
22 | { | 24 | { |
23 | if (current->active_mm) | 25 | if (current->active_mm) |
@@ -27,26 +29,32 @@ static void flush_ldt(void *null) | |||
27 | 29 | ||
28 | static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | 30 | static int alloc_ldt(mm_context_t *pc, int mincount, int reload) |
29 | { | 31 | { |
30 | void *oldldt; | 32 | void *oldldt, *newldt; |
31 | void *newldt; | ||
32 | int oldsize; | 33 | int oldsize; |
33 | 34 | ||
34 | if (mincount <= pc->size) | 35 | if (mincount <= pc->size) |
35 | return 0; | 36 | return 0; |
36 | oldsize = pc->size; | 37 | oldsize = pc->size; |
37 | mincount = (mincount+511)&(~511); | 38 | mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) & |
38 | if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) | 39 | (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1)); |
39 | newldt = vmalloc(mincount*LDT_ENTRY_SIZE); | 40 | if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) |
41 | newldt = vmalloc(mincount * LDT_ENTRY_SIZE); | ||
40 | else | 42 | else |
41 | newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); | 43 | newldt = (void *)__get_free_page(GFP_KERNEL); |
42 | 44 | ||
43 | if (!newldt) | 45 | if (!newldt) |
44 | return -ENOMEM; | 46 | return -ENOMEM; |
45 | 47 | ||
46 | if (oldsize) | 48 | if (oldsize) |
47 | memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); | 49 | memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE); |
48 | oldldt = pc->ldt; | 50 | oldldt = pc->ldt; |
49 | memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); | 51 | memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, |
52 | (mincount - oldsize) * LDT_ENTRY_SIZE); | ||
53 | |||
54 | #ifdef CONFIG_X86_64 | ||
55 | /* CHECKME: Do we really need this ? */ | ||
56 | wmb(); | ||
57 | #endif | ||
50 | pc->ldt = newldt; | 58 | pc->ldt = newldt; |
51 | wmb(); | 59 | wmb(); |
52 | pc->size = mincount; | 60 | pc->size = mincount; |
@@ -55,6 +63,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
55 | if (reload) { | 63 | if (reload) { |
56 | #ifdef CONFIG_SMP | 64 | #ifdef CONFIG_SMP |
57 | cpumask_t mask; | 65 | cpumask_t mask; |
66 | |||
58 | preempt_disable(); | 67 | preempt_disable(); |
59 | load_LDT(pc); | 68 | load_LDT(pc); |
60 | mask = cpumask_of_cpu(smp_processor_id()); | 69 | mask = cpumask_of_cpu(smp_processor_id()); |
@@ -66,10 +75,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
66 | #endif | 75 | #endif |
67 | } | 76 | } |
68 | if (oldsize) { | 77 | if (oldsize) { |
69 | if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) | 78 | if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) |
70 | vfree(oldldt); | 79 | vfree(oldldt); |
71 | else | 80 | else |
72 | kfree(oldldt); | 81 | put_page(virt_to_page(oldldt)); |
73 | } | 82 | } |
74 | return 0; | 83 | return 0; |
75 | } | 84 | } |
@@ -77,9 +86,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
77 | static inline int copy_ldt(mm_context_t *new, mm_context_t *old) | 86 | static inline int copy_ldt(mm_context_t *new, mm_context_t *old) |
78 | { | 87 | { |
79 | int err = alloc_ldt(new, old->size, 0); | 88 | int err = alloc_ldt(new, old->size, 0); |
89 | |||
80 | if (err < 0) | 90 | if (err < 0) |
81 | return err; | 91 | return err; |
82 | memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); | 92 | memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE); |
83 | return 0; | 93 | return 0; |
84 | } | 94 | } |
85 | 95 | ||
@@ -89,7 +99,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old) | |||
89 | */ | 99 | */ |
90 | int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | 100 | int init_new_context(struct task_struct *tsk, struct mm_struct *mm) |
91 | { | 101 | { |
92 | struct mm_struct * old_mm; | 102 | struct mm_struct *old_mm; |
93 | int retval = 0; | 103 | int retval = 0; |
94 | 104 | ||
95 | mutex_init(&mm->context.lock); | 105 | mutex_init(&mm->context.lock); |
@@ -105,33 +115,38 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | |||
105 | 115 | ||
106 | /* | 116 | /* |
107 | * No need to lock the MM as we are the last user | 117 | * No need to lock the MM as we are the last user |
118 | * | ||
119 | * 64bit: Don't touch the LDT register - we're already in the next thread. | ||
108 | */ | 120 | */ |
109 | void destroy_context(struct mm_struct *mm) | 121 | void destroy_context(struct mm_struct *mm) |
110 | { | 122 | { |
111 | if (mm->context.size) { | 123 | if (mm->context.size) { |
124 | #ifdef CONFIG_X86_32 | ||
125 | /* CHECKME: Can this ever happen ? */ | ||
112 | if (mm == current->active_mm) | 126 | if (mm == current->active_mm) |
113 | clear_LDT(); | 127 | clear_LDT(); |
114 | if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) | 128 | #endif |
129 | if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) | ||
115 | vfree(mm->context.ldt); | 130 | vfree(mm->context.ldt); |
116 | else | 131 | else |
117 | kfree(mm->context.ldt); | 132 | put_page(virt_to_page(mm->context.ldt)); |
118 | mm->context.size = 0; | 133 | mm->context.size = 0; |
119 | } | 134 | } |
120 | } | 135 | } |
121 | 136 | ||
122 | static int read_ldt(void __user * ptr, unsigned long bytecount) | 137 | static int read_ldt(void __user *ptr, unsigned long bytecount) |
123 | { | 138 | { |
124 | int err; | 139 | int err; |
125 | unsigned long size; | 140 | unsigned long size; |
126 | struct mm_struct * mm = current->mm; | 141 | struct mm_struct *mm = current->mm; |
127 | 142 | ||
128 | if (!mm->context.size) | 143 | if (!mm->context.size) |
129 | return 0; | 144 | return 0; |
130 | if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) | 145 | if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) |
131 | bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; | 146 | bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; |
132 | 147 | ||
133 | mutex_lock(&mm->context.lock); | 148 | mutex_lock(&mm->context.lock); |
134 | size = mm->context.size*LDT_ENTRY_SIZE; | 149 | size = mm->context.size * LDT_ENTRY_SIZE; |
135 | if (size > bytecount) | 150 | if (size > bytecount) |
136 | size = bytecount; | 151 | size = bytecount; |
137 | 152 | ||
@@ -143,7 +158,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount) | |||
143 | goto error_return; | 158 | goto error_return; |
144 | if (size != bytecount) { | 159 | if (size != bytecount) { |
145 | /* zero-fill the rest */ | 160 | /* zero-fill the rest */ |
146 | if (clear_user(ptr+size, bytecount-size) != 0) { | 161 | if (clear_user(ptr + size, bytecount - size) != 0) { |
147 | err = -EFAULT; | 162 | err = -EFAULT; |
148 | goto error_return; | 163 | goto error_return; |
149 | } | 164 | } |
@@ -153,34 +168,32 @@ error_return: | |||
153 | return err; | 168 | return err; |
154 | } | 169 | } |
155 | 170 | ||
156 | static int read_default_ldt(void __user * ptr, unsigned long bytecount) | 171 | static int read_default_ldt(void __user *ptr, unsigned long bytecount) |
157 | { | 172 | { |
158 | int err; | 173 | /* CHECKME: Can we use _one_ random number ? */ |
159 | unsigned long size; | 174 | #ifdef CONFIG_X86_32 |
160 | 175 | unsigned long size = 5 * sizeof(struct desc_struct); | |
161 | err = 0; | 176 | #else |
162 | size = 5*sizeof(struct desc_struct); | 177 | unsigned long size = 128; |
163 | if (size > bytecount) | 178 | #endif |
164 | size = bytecount; | 179 | if (bytecount > size) |
165 | 180 | bytecount = size; | |
166 | err = size; | 181 | if (clear_user(ptr, bytecount)) |
167 | if (clear_user(ptr, size)) | 182 | return -EFAULT; |
168 | err = -EFAULT; | 183 | return bytecount; |
169 | |||
170 | return err; | ||
171 | } | 184 | } |
172 | 185 | ||
173 | static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) | 186 | static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) |
174 | { | 187 | { |
175 | struct mm_struct * mm = current->mm; | 188 | struct mm_struct *mm = current->mm; |
176 | __u32 entry_1, entry_2; | 189 | struct desc_struct ldt; |
177 | int error; | 190 | int error; |
178 | struct user_desc ldt_info; | 191 | struct user_desc ldt_info; |
179 | 192 | ||
180 | error = -EINVAL; | 193 | error = -EINVAL; |
181 | if (bytecount != sizeof(ldt_info)) | 194 | if (bytecount != sizeof(ldt_info)) |
182 | goto out; | 195 | goto out; |
183 | error = -EFAULT; | 196 | error = -EFAULT; |
184 | if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) | 197 | if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) |
185 | goto out; | 198 | goto out; |
186 | 199 | ||
@@ -196,28 +209,27 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) | |||
196 | 209 | ||
197 | mutex_lock(&mm->context.lock); | 210 | mutex_lock(&mm->context.lock); |
198 | if (ldt_info.entry_number >= mm->context.size) { | 211 | if (ldt_info.entry_number >= mm->context.size) { |
199 | error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); | 212 | error = alloc_ldt(¤t->mm->context, |
213 | ldt_info.entry_number + 1, 1); | ||
200 | if (error < 0) | 214 | if (error < 0) |
201 | goto out_unlock; | 215 | goto out_unlock; |
202 | } | 216 | } |
203 | 217 | ||
204 | /* Allow LDTs to be cleared by the user. */ | 218 | /* Allow LDTs to be cleared by the user. */ |
205 | if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { | 219 | if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { |
206 | if (oldmode || LDT_empty(&ldt_info)) { | 220 | if (oldmode || LDT_empty(&ldt_info)) { |
207 | entry_1 = 0; | 221 | memset(&ldt, 0, sizeof(ldt)); |
208 | entry_2 = 0; | ||
209 | goto install; | 222 | goto install; |
210 | } | 223 | } |
211 | } | 224 | } |
212 | 225 | ||
213 | entry_1 = LDT_entry_a(&ldt_info); | 226 | fill_ldt(&ldt, &ldt_info); |
214 | entry_2 = LDT_entry_b(&ldt_info); | ||
215 | if (oldmode) | 227 | if (oldmode) |
216 | entry_2 &= ~(1 << 20); | 228 | ldt.avl = 0; |
217 | 229 | ||
218 | /* Install the new entry ... */ | 230 | /* Install the new entry ... */ |
219 | install: | 231 | install: |
220 | write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, entry_2); | 232 | write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt); |
221 | error = 0; | 233 | error = 0; |
222 | 234 | ||
223 | out_unlock: | 235 | out_unlock: |
@@ -226,7 +238,8 @@ out: | |||
226 | return error; | 238 | return error; |
227 | } | 239 | } |
228 | 240 | ||
229 | asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) | 241 | asmlinkage int sys_modify_ldt(int func, void __user *ptr, |
242 | unsigned long bytecount) | ||
230 | { | 243 | { |
231 | int ret = -ENOSYS; | 244 | int ret = -ENOSYS; |
232 | 245 | ||
diff --git a/arch/x86/kernel/ldt_64.c b/arch/x86/kernel/ldt_64.c deleted file mode 100644 index 60e57abb8e90..000000000000 --- a/arch/x86/kernel/ldt_64.c +++ /dev/null | |||
@@ -1,250 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds | ||
3 | * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> | ||
4 | * Copyright (C) 2002 Andi Kleen | ||
5 | * | ||
6 | * This handles calls from both 32bit and 64bit mode. | ||
7 | */ | ||
8 | |||
9 | #include <linux/errno.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include <linux/string.h> | ||
12 | #include <linux/mm.h> | ||
13 | #include <linux/smp.h> | ||
14 | #include <linux/vmalloc.h> | ||
15 | #include <linux/slab.h> | ||
16 | |||
17 | #include <asm/uaccess.h> | ||
18 | #include <asm/system.h> | ||
19 | #include <asm/ldt.h> | ||
20 | #include <asm/desc.h> | ||
21 | #include <asm/proto.h> | ||
22 | |||
23 | #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ | ||
24 | static void flush_ldt(void *null) | ||
25 | { | ||
26 | if (current->active_mm) | ||
27 | load_LDT(¤t->active_mm->context); | ||
28 | } | ||
29 | #endif | ||
30 | |||
31 | static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload) | ||
32 | { | ||
33 | void *oldldt; | ||
34 | void *newldt; | ||
35 | unsigned oldsize; | ||
36 | |||
37 | if (mincount <= (unsigned)pc->size) | ||
38 | return 0; | ||
39 | oldsize = pc->size; | ||
40 | mincount = (mincount+511)&(~511); | ||
41 | if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) | ||
42 | newldt = vmalloc(mincount*LDT_ENTRY_SIZE); | ||
43 | else | ||
44 | newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); | ||
45 | |||
46 | if (!newldt) | ||
47 | return -ENOMEM; | ||
48 | |||
49 | if (oldsize) | ||
50 | memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); | ||
51 | oldldt = pc->ldt; | ||
52 | memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); | ||
53 | wmb(); | ||
54 | pc->ldt = newldt; | ||
55 | wmb(); | ||
56 | pc->size = mincount; | ||
57 | wmb(); | ||
58 | if (reload) { | ||
59 | #ifdef CONFIG_SMP | ||
60 | cpumask_t mask; | ||
61 | |||
62 | preempt_disable(); | ||
63 | mask = cpumask_of_cpu(smp_processor_id()); | ||
64 | load_LDT(pc); | ||
65 | if (!cpus_equal(current->mm->cpu_vm_mask, mask)) | ||
66 | smp_call_function(flush_ldt, NULL, 1, 1); | ||
67 | preempt_enable(); | ||
68 | #else | ||
69 | load_LDT(pc); | ||
70 | #endif | ||
71 | } | ||
72 | if (oldsize) { | ||
73 | if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) | ||
74 | vfree(oldldt); | ||
75 | else | ||
76 | kfree(oldldt); | ||
77 | } | ||
78 | return 0; | ||
79 | } | ||
80 | |||
81 | static inline int copy_ldt(mm_context_t *new, mm_context_t *old) | ||
82 | { | ||
83 | int err = alloc_ldt(new, old->size, 0); | ||
84 | if (err < 0) | ||
85 | return err; | ||
86 | memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); | ||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | /* | ||
91 | * we do not have to muck with descriptors here, that is | ||
92 | * done in switch_mm() as needed. | ||
93 | */ | ||
94 | int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | ||
95 | { | ||
96 | struct mm_struct * old_mm; | ||
97 | int retval = 0; | ||
98 | |||
99 | mutex_init(&mm->context.lock); | ||
100 | mm->context.size = 0; | ||
101 | old_mm = current->mm; | ||
102 | if (old_mm && old_mm->context.size > 0) { | ||
103 | mutex_lock(&old_mm->context.lock); | ||
104 | retval = copy_ldt(&mm->context, &old_mm->context); | ||
105 | mutex_unlock(&old_mm->context.lock); | ||
106 | } | ||
107 | return retval; | ||
108 | } | ||
109 | |||
110 | /* | ||
111 | * | ||
112 | * Don't touch the LDT register - we're already in the next thread. | ||
113 | */ | ||
114 | void destroy_context(struct mm_struct *mm) | ||
115 | { | ||
116 | if (mm->context.size) { | ||
117 | if ((unsigned)mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) | ||
118 | vfree(mm->context.ldt); | ||
119 | else | ||
120 | kfree(mm->context.ldt); | ||
121 | mm->context.size = 0; | ||
122 | } | ||
123 | } | ||
124 | |||
125 | static int read_ldt(void __user * ptr, unsigned long bytecount) | ||
126 | { | ||
127 | int err; | ||
128 | unsigned long size; | ||
129 | struct mm_struct * mm = current->mm; | ||
130 | |||
131 | if (!mm->context.size) | ||
132 | return 0; | ||
133 | if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) | ||
134 | bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; | ||
135 | |||
136 | mutex_lock(&mm->context.lock); | ||
137 | size = mm->context.size*LDT_ENTRY_SIZE; | ||
138 | if (size > bytecount) | ||
139 | size = bytecount; | ||
140 | |||
141 | err = 0; | ||
142 | if (copy_to_user(ptr, mm->context.ldt, size)) | ||
143 | err = -EFAULT; | ||
144 | mutex_unlock(&mm->context.lock); | ||
145 | if (err < 0) | ||
146 | goto error_return; | ||
147 | if (size != bytecount) { | ||
148 | /* zero-fill the rest */ | ||
149 | if (clear_user(ptr+size, bytecount-size) != 0) { | ||
150 | err = -EFAULT; | ||
151 | goto error_return; | ||
152 | } | ||
153 | } | ||
154 | return bytecount; | ||
155 | error_return: | ||
156 | return err; | ||
157 | } | ||
158 | |||
159 | static int read_default_ldt(void __user * ptr, unsigned long bytecount) | ||
160 | { | ||
161 | /* Arbitrary number */ | ||
162 | /* x86-64 default LDT is all zeros */ | ||
163 | if (bytecount > 128) | ||
164 | bytecount = 128; | ||
165 | if (clear_user(ptr, bytecount)) | ||
166 | return -EFAULT; | ||
167 | return bytecount; | ||
168 | } | ||
169 | |||
170 | static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) | ||
171 | { | ||
172 | struct task_struct *me = current; | ||
173 | struct mm_struct * mm = me->mm; | ||
174 | __u32 entry_1, entry_2, *lp; | ||
175 | int error; | ||
176 | struct user_desc ldt_info; | ||
177 | |||
178 | error = -EINVAL; | ||
179 | |||
180 | if (bytecount != sizeof(ldt_info)) | ||
181 | goto out; | ||
182 | error = -EFAULT; | ||
183 | if (copy_from_user(&ldt_info, ptr, bytecount)) | ||
184 | goto out; | ||
185 | |||
186 | error = -EINVAL; | ||
187 | if (ldt_info.entry_number >= LDT_ENTRIES) | ||
188 | goto out; | ||
189 | if (ldt_info.contents == 3) { | ||
190 | if (oldmode) | ||
191 | goto out; | ||
192 | if (ldt_info.seg_not_present == 0) | ||
193 | goto out; | ||
194 | } | ||
195 | |||
196 | mutex_lock(&mm->context.lock); | ||
197 | if (ldt_info.entry_number >= (unsigned)mm->context.size) { | ||
198 | error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); | ||
199 | if (error < 0) | ||
200 | goto out_unlock; | ||
201 | } | ||
202 | |||
203 | lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt); | ||
204 | |||
205 | /* Allow LDTs to be cleared by the user. */ | ||
206 | if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { | ||
207 | if (oldmode || LDT_empty(&ldt_info)) { | ||
208 | entry_1 = 0; | ||
209 | entry_2 = 0; | ||
210 | goto install; | ||
211 | } | ||
212 | } | ||
213 | |||
214 | entry_1 = LDT_entry_a(&ldt_info); | ||
215 | entry_2 = LDT_entry_b(&ldt_info); | ||
216 | if (oldmode) | ||
217 | entry_2 &= ~(1 << 20); | ||
218 | |||
219 | /* Install the new entry ... */ | ||
220 | install: | ||
221 | *lp = entry_1; | ||
222 | *(lp+1) = entry_2; | ||
223 | error = 0; | ||
224 | |||
225 | out_unlock: | ||
226 | mutex_unlock(&mm->context.lock); | ||
227 | out: | ||
228 | return error; | ||
229 | } | ||
230 | |||
231 | asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) | ||
232 | { | ||
233 | int ret = -ENOSYS; | ||
234 | |||
235 | switch (func) { | ||
236 | case 0: | ||
237 | ret = read_ldt(ptr, bytecount); | ||
238 | break; | ||
239 | case 1: | ||
240 | ret = write_ldt(ptr, bytecount, 1); | ||
241 | break; | ||
242 | case 2: | ||
243 | ret = read_default_ldt(ptr, bytecount); | ||
244 | break; | ||
245 | case 0x11: | ||
246 | ret = write_ldt(ptr, bytecount, 0); | ||
247 | break; | ||
248 | } | ||
249 | return ret; | ||
250 | } | ||
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 11b935f4f886..c1cfd60639d4 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -32,7 +32,7 @@ static u32 kexec_pte1[1024] PAGE_ALIGNED; | |||
32 | 32 | ||
33 | static void set_idt(void *newidt, __u16 limit) | 33 | static void set_idt(void *newidt, __u16 limit) |
34 | { | 34 | { |
35 | struct Xgt_desc_struct curidt; | 35 | struct desc_ptr curidt; |
36 | 36 | ||
37 | /* ia32 supports unaliged loads & stores */ | 37 | /* ia32 supports unaliged loads & stores */ |
38 | curidt.size = limit; | 38 | curidt.size = limit; |
@@ -44,7 +44,7 @@ static void set_idt(void *newidt, __u16 limit) | |||
44 | 44 | ||
45 | static void set_gdt(void *newgdt, __u16 limit) | 45 | static void set_gdt(void *newgdt, __u16 limit) |
46 | { | 46 | { |
47 | struct Xgt_desc_struct curgdt; | 47 | struct desc_ptr curgdt; |
48 | 48 | ||
49 | /* ia32 supports unaligned loads & stores */ | 49 | /* ia32 supports unaligned loads & stores */ |
50 | curgdt.size = limit; | 50 | curgdt.size = limit; |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index aa3d2c8f7737..a1fef42f8cdb 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -234,10 +234,5 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
234 | void arch_crash_save_vmcoreinfo(void) | 234 | void arch_crash_save_vmcoreinfo(void) |
235 | { | 235 | { |
236 | VMCOREINFO_SYMBOL(init_level4_pgt); | 236 | VMCOREINFO_SYMBOL(init_level4_pgt); |
237 | |||
238 | #ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE | ||
239 | VMCOREINFO_SYMBOL(node_data); | ||
240 | VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); | ||
241 | #endif | ||
242 | } | 237 | } |
243 | 238 | ||
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 0ab680f2d9db..219f86eb6123 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c | |||
@@ -63,6 +63,21 @@ static int __init mfgpt_disable(char *s) | |||
63 | } | 63 | } |
64 | __setup("nomfgpt", mfgpt_disable); | 64 | __setup("nomfgpt", mfgpt_disable); |
65 | 65 | ||
66 | /* Reset the MFGPT timers. This is required by some broken BIOSes which already | ||
67 | * do the same and leave the system in an unstable state. TinyBIOS 0.98 is | ||
68 | * affected at least (0.99 is OK with MFGPT workaround left to off). | ||
69 | */ | ||
70 | static int __init mfgpt_fix(char *s) | ||
71 | { | ||
72 | u32 val, dummy; | ||
73 | |||
74 | /* The following udocumented bit resets the MFGPT timers */ | ||
75 | val = 0xFF; dummy = 0; | ||
76 | wrmsr(0x5140002B, val, dummy); | ||
77 | return 1; | ||
78 | } | ||
79 | __setup("mfgptfix", mfgpt_fix); | ||
80 | |||
66 | /* | 81 | /* |
67 | * Check whether any MFGPTs are available for the kernel to use. In most | 82 | * Check whether any MFGPTs are available for the kernel to use. In most |
68 | * cases, firmware that uses AMD's VSA code will claim all timers during | 83 | * cases, firmware that uses AMD's VSA code will claim all timers during |
@@ -278,12 +293,12 @@ static int mfgpt_next_event(unsigned long delta, struct clock_event_device *evt) | |||
278 | 293 | ||
279 | static irqreturn_t mfgpt_tick(int irq, void *dev_id) | 294 | static irqreturn_t mfgpt_tick(int irq, void *dev_id) |
280 | { | 295 | { |
296 | /* Turn off the clock (and clear the event) */ | ||
297 | mfgpt_disable_timer(mfgpt_event_clock); | ||
298 | |||
281 | if (mfgpt_tick_mode == CLOCK_EVT_MODE_SHUTDOWN) | 299 | if (mfgpt_tick_mode == CLOCK_EVT_MODE_SHUTDOWN) |
282 | return IRQ_HANDLED; | 300 | return IRQ_HANDLED; |
283 | 301 | ||
284 | /* Turn off the clock */ | ||
285 | mfgpt_disable_timer(mfgpt_event_clock); | ||
286 | |||
287 | /* Clear the counter */ | 302 | /* Clear the counter */ |
288 | geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0); | 303 | geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0); |
289 | 304 | ||
@@ -319,10 +334,6 @@ static int __init mfgpt_timer_setup(void) | |||
319 | } | 334 | } |
320 | 335 | ||
321 | mfgpt_event_clock = timer; | 336 | mfgpt_event_clock = timer; |
322 | /* Set the clock scale and enable the event mode for CMP2 */ | ||
323 | val = MFGPT_SCALE | (3 << 8); | ||
324 | |||
325 | geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, val); | ||
326 | 337 | ||
327 | /* Set up the IRQ on the MFGPT side */ | 338 | /* Set up the IRQ on the MFGPT side */ |
328 | if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, irq)) { | 339 | if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, irq)) { |
@@ -339,6 +350,11 @@ static int __init mfgpt_timer_setup(void) | |||
339 | goto err; | 350 | goto err; |
340 | } | 351 | } |
341 | 352 | ||
353 | /* Set the clock scale and enable the event mode for CMP2 */ | ||
354 | val = MFGPT_SCALE | (3 << 8); | ||
355 | |||
356 | geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, val); | ||
357 | |||
342 | /* Set up the clock event */ | 358 | /* Set up the clock event */ |
343 | mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC, 32); | 359 | mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC, 32); |
344 | mfgpt_clockevent.min_delta_ns = clockevent_delta2ns(0xF, | 360 | mfgpt_clockevent.min_delta_ns = clockevent_delta2ns(0xF, |
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 09c315214a5e..f2702d01b8a8 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c | |||
@@ -244,8 +244,8 @@ static int microcode_sanity_check(void *mc) | |||
244 | return 0; | 244 | return 0; |
245 | /* check extended signature checksum */ | 245 | /* check extended signature checksum */ |
246 | for (i = 0; i < ext_sigcount; i++) { | 246 | for (i = 0; i < ext_sigcount; i++) { |
247 | ext_sig = (struct extended_signature *)((void *)ext_header | 247 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE + |
248 | + EXT_HEADER_SIZE + EXT_SIGNATURE_SIZE * i); | 248 | EXT_SIGNATURE_SIZE * i; |
249 | sum = orig_sum | 249 | sum = orig_sum |
250 | - (mc_header->sig + mc_header->pf + mc_header->cksum) | 250 | - (mc_header->sig + mc_header->pf + mc_header->cksum) |
251 | + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); | 251 | + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); |
@@ -279,11 +279,9 @@ static int get_maching_microcode(void *mc, int cpu) | |||
279 | if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) | 279 | if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) |
280 | return 0; | 280 | return 0; |
281 | 281 | ||
282 | ext_header = (struct extended_sigtable *)(mc + | 282 | ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; |
283 | get_datasize(mc_header) + MC_HEADER_SIZE); | ||
284 | ext_sigcount = ext_header->count; | 283 | ext_sigcount = ext_header->count; |
285 | ext_sig = (struct extended_signature *)((void *)ext_header | 284 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE; |
286 | + EXT_HEADER_SIZE); | ||
287 | for (i = 0; i < ext_sigcount; i++) { | 285 | for (i = 0; i < ext_sigcount; i++) { |
288 | if (microcode_update_match(cpu, mc_header, | 286 | if (microcode_update_match(cpu, mc_header, |
289 | ext_sig->sig, ext_sig->pf)) | 287 | ext_sig->sig, ext_sig->pf)) |
@@ -436,7 +434,7 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_ | |||
436 | return -EINVAL; | 434 | return -EINVAL; |
437 | } | 435 | } |
438 | 436 | ||
439 | lock_cpu_hotplug(); | 437 | get_online_cpus(); |
440 | mutex_lock(µcode_mutex); | 438 | mutex_lock(µcode_mutex); |
441 | 439 | ||
442 | user_buffer = (void __user *) buf; | 440 | user_buffer = (void __user *) buf; |
@@ -447,7 +445,7 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_ | |||
447 | ret = (ssize_t)len; | 445 | ret = (ssize_t)len; |
448 | 446 | ||
449 | mutex_unlock(µcode_mutex); | 447 | mutex_unlock(µcode_mutex); |
450 | unlock_cpu_hotplug(); | 448 | put_online_cpus(); |
451 | 449 | ||
452 | return ret; | 450 | return ret; |
453 | } | 451 | } |
@@ -539,7 +537,7 @@ static int cpu_request_microcode(int cpu) | |||
539 | pr_debug("ucode data file %s load failed\n", name); | 537 | pr_debug("ucode data file %s load failed\n", name); |
540 | return error; | 538 | return error; |
541 | } | 539 | } |
542 | buf = (void *)firmware->data; | 540 | buf = firmware->data; |
543 | size = firmware->size; | 541 | size = firmware->size; |
544 | while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset)) | 542 | while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset)) |
545 | > 0) { | 543 | > 0) { |
@@ -658,14 +656,14 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz) | |||
658 | 656 | ||
659 | old = current->cpus_allowed; | 657 | old = current->cpus_allowed; |
660 | 658 | ||
661 | lock_cpu_hotplug(); | 659 | get_online_cpus(); |
662 | set_cpus_allowed(current, cpumask_of_cpu(cpu)); | 660 | set_cpus_allowed(current, cpumask_of_cpu(cpu)); |
663 | 661 | ||
664 | mutex_lock(µcode_mutex); | 662 | mutex_lock(µcode_mutex); |
665 | if (uci->valid) | 663 | if (uci->valid) |
666 | err = cpu_request_microcode(cpu); | 664 | err = cpu_request_microcode(cpu); |
667 | mutex_unlock(µcode_mutex); | 665 | mutex_unlock(µcode_mutex); |
668 | unlock_cpu_hotplug(); | 666 | put_online_cpus(); |
669 | set_cpus_allowed(current, old); | 667 | set_cpus_allowed(current, old); |
670 | } | 668 | } |
671 | if (err) | 669 | if (err) |
@@ -799,7 +797,7 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) | |||
799 | return NOTIFY_OK; | 797 | return NOTIFY_OK; |
800 | } | 798 | } |
801 | 799 | ||
802 | static struct notifier_block __cpuinitdata mc_cpu_notifier = { | 800 | static struct notifier_block __refdata mc_cpu_notifier = { |
803 | .notifier_call = mc_cpu_callback, | 801 | .notifier_call = mc_cpu_callback, |
804 | }; | 802 | }; |
805 | 803 | ||
@@ -817,9 +815,9 @@ static int __init microcode_init (void) | |||
817 | return PTR_ERR(microcode_pdev); | 815 | return PTR_ERR(microcode_pdev); |
818 | } | 816 | } |
819 | 817 | ||
820 | lock_cpu_hotplug(); | 818 | get_online_cpus(); |
821 | error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); | 819 | error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); |
822 | unlock_cpu_hotplug(); | 820 | put_online_cpus(); |
823 | if (error) { | 821 | if (error) { |
824 | microcode_dev_exit(); | 822 | microcode_dev_exit(); |
825 | platform_device_unregister(microcode_pdev); | 823 | platform_device_unregister(microcode_pdev); |
@@ -839,9 +837,9 @@ static void __exit microcode_exit (void) | |||
839 | 837 | ||
840 | unregister_hotcpu_notifier(&mc_cpu_notifier); | 838 | unregister_hotcpu_notifier(&mc_cpu_notifier); |
841 | 839 | ||
842 | lock_cpu_hotplug(); | 840 | get_online_cpus(); |
843 | sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); | 841 | sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); |
844 | unlock_cpu_hotplug(); | 842 | put_online_cpus(); |
845 | 843 | ||
846 | platform_device_unregister(microcode_pdev); | 844 | platform_device_unregister(microcode_pdev); |
847 | } | 845 | } |
diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 7a05a7f6099a..67009cdd5eca 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c | |||
@@ -68,7 +68,7 @@ unsigned int def_to_bigsmp = 0; | |||
68 | /* Processor that is doing the boot up */ | 68 | /* Processor that is doing the boot up */ |
69 | unsigned int boot_cpu_physical_apicid = -1U; | 69 | unsigned int boot_cpu_physical_apicid = -1U; |
70 | /* Internal processor count */ | 70 | /* Internal processor count */ |
71 | unsigned int __cpuinitdata num_processors; | 71 | unsigned int num_processors; |
72 | 72 | ||
73 | /* Bitmask of physically existing CPUs */ | 73 | /* Bitmask of physically existing CPUs */ |
74 | physid_mask_t phys_cpu_present_map; | 74 | physid_mask_t phys_cpu_present_map; |
@@ -258,7 +258,7 @@ static void __init MP_ioapic_info (struct mpc_config_ioapic *m) | |||
258 | if (!(m->mpc_flags & MPC_APIC_USABLE)) | 258 | if (!(m->mpc_flags & MPC_APIC_USABLE)) |
259 | return; | 259 | return; |
260 | 260 | ||
261 | printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n", | 261 | printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", |
262 | m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); | 262 | m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); |
263 | if (nr_ioapics >= MAX_IO_APICS) { | 263 | if (nr_ioapics >= MAX_IO_APICS) { |
264 | printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n", | 264 | printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n", |
@@ -405,9 +405,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) | |||
405 | 405 | ||
406 | mps_oem_check(mpc, oem, str); | 406 | mps_oem_check(mpc, oem, str); |
407 | 407 | ||
408 | printk("APIC at: 0x%lX\n",mpc->mpc_lapic); | 408 | printk("APIC at: 0x%X\n", mpc->mpc_lapic); |
409 | 409 | ||
410 | /* | 410 | /* |
411 | * Save the local APIC address (it might be non-default) -- but only | 411 | * Save the local APIC address (it might be non-default) -- but only |
412 | * if we're not using ACPI. | 412 | * if we're not using ACPI. |
413 | */ | 413 | */ |
@@ -721,7 +721,7 @@ static int __init smp_scan_config (unsigned long base, unsigned long length) | |||
721 | unsigned long *bp = phys_to_virt(base); | 721 | unsigned long *bp = phys_to_virt(base); |
722 | struct intel_mp_floating *mpf; | 722 | struct intel_mp_floating *mpf; |
723 | 723 | ||
724 | Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length); | 724 | printk(KERN_INFO "Scan SMP from %p for %ld bytes.\n", bp,length); |
725 | if (sizeof(*mpf) != 16) | 725 | if (sizeof(*mpf) != 16) |
726 | printk("Error: MPF size\n"); | 726 | printk("Error: MPF size\n"); |
727 | 727 | ||
@@ -734,8 +734,8 @@ static int __init smp_scan_config (unsigned long base, unsigned long length) | |||
734 | || (mpf->mpf_specification == 4)) ) { | 734 | || (mpf->mpf_specification == 4)) ) { |
735 | 735 | ||
736 | smp_found_config = 1; | 736 | smp_found_config = 1; |
737 | printk(KERN_INFO "found SMP MP-table at %08lx\n", | 737 | printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", |
738 | virt_to_phys(mpf)); | 738 | mpf, virt_to_phys(mpf)); |
739 | reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE); | 739 | reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE); |
740 | if (mpf->mpf_physptr) { | 740 | if (mpf->mpf_physptr) { |
741 | /* | 741 | /* |
@@ -918,14 +918,14 @@ void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) | |||
918 | */ | 918 | */ |
919 | mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; | 919 | mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; |
920 | mp_ioapic_routing[idx].gsi_base = gsi_base; | 920 | mp_ioapic_routing[idx].gsi_base = gsi_base; |
921 | mp_ioapic_routing[idx].gsi_end = gsi_base + | 921 | mp_ioapic_routing[idx].gsi_end = gsi_base + |
922 | io_apic_get_redir_entries(idx); | 922 | io_apic_get_redir_entries(idx); |
923 | 923 | ||
924 | printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " | 924 | printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " |
925 | "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, | 925 | "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, |
926 | mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, | 926 | mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, |
927 | mp_ioapic_routing[idx].gsi_base, | 927 | mp_ioapic_routing[idx].gsi_base, |
928 | mp_ioapic_routing[idx].gsi_end); | 928 | mp_ioapic_routing[idx].gsi_end); |
929 | } | 929 | } |
930 | 930 | ||
931 | void __init | 931 | void __init |
@@ -1041,15 +1041,16 @@ void __init mp_config_acpi_legacy_irqs (void) | |||
1041 | } | 1041 | } |
1042 | 1042 | ||
1043 | #define MAX_GSI_NUM 4096 | 1043 | #define MAX_GSI_NUM 4096 |
1044 | #define IRQ_COMPRESSION_START 64 | ||
1044 | 1045 | ||
1045 | int mp_register_gsi(u32 gsi, int triggering, int polarity) | 1046 | int mp_register_gsi(u32 gsi, int triggering, int polarity) |
1046 | { | 1047 | { |
1047 | int ioapic = -1; | 1048 | int ioapic = -1; |
1048 | int ioapic_pin = 0; | 1049 | int ioapic_pin = 0; |
1049 | int idx, bit = 0; | 1050 | int idx, bit = 0; |
1050 | static int pci_irq = 16; | 1051 | static int pci_irq = IRQ_COMPRESSION_START; |
1051 | /* | 1052 | /* |
1052 | * Mapping between Global System Interrups, which | 1053 | * Mapping between Global System Interrupts, which |
1053 | * represent all possible interrupts, and IRQs | 1054 | * represent all possible interrupts, and IRQs |
1054 | * assigned to actual devices. | 1055 | * assigned to actual devices. |
1055 | */ | 1056 | */ |
@@ -1086,12 +1087,16 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) | |||
1086 | if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { | 1087 | if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { |
1087 | Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", | 1088 | Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", |
1088 | mp_ioapic_routing[ioapic].apic_id, ioapic_pin); | 1089 | mp_ioapic_routing[ioapic].apic_id, ioapic_pin); |
1089 | return gsi_to_irq[gsi]; | 1090 | return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); |
1090 | } | 1091 | } |
1091 | 1092 | ||
1092 | mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit); | 1093 | mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit); |
1093 | 1094 | ||
1094 | if (triggering == ACPI_LEVEL_SENSITIVE) { | 1095 | /* |
1096 | * For GSI >= 64, use IRQ compression | ||
1097 | */ | ||
1098 | if ((gsi >= IRQ_COMPRESSION_START) | ||
1099 | && (triggering == ACPI_LEVEL_SENSITIVE)) { | ||
1095 | /* | 1100 | /* |
1096 | * For PCI devices assign IRQs in order, avoiding gaps | 1101 | * For PCI devices assign IRQs in order, avoiding gaps |
1097 | * due to unused I/O APIC pins. | 1102 | * due to unused I/O APIC pins. |
diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index ef4aab123581..72ab1403fed7 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c | |||
@@ -60,14 +60,18 @@ unsigned int boot_cpu_id = -1U; | |||
60 | EXPORT_SYMBOL(boot_cpu_id); | 60 | EXPORT_SYMBOL(boot_cpu_id); |
61 | 61 | ||
62 | /* Internal processor count */ | 62 | /* Internal processor count */ |
63 | unsigned int num_processors __cpuinitdata = 0; | 63 | unsigned int num_processors; |
64 | 64 | ||
65 | unsigned disabled_cpus __cpuinitdata; | 65 | unsigned disabled_cpus __cpuinitdata; |
66 | 66 | ||
67 | /* Bitmask of physically existing CPUs */ | 67 | /* Bitmask of physically existing CPUs */ |
68 | physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; | 68 | physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; |
69 | 69 | ||
70 | u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; | 70 | u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata |
71 | = { [0 ... NR_CPUS-1] = BAD_APICID }; | ||
72 | void *x86_bios_cpu_apicid_early_ptr; | ||
73 | DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; | ||
74 | EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | ||
71 | 75 | ||
72 | 76 | ||
73 | /* | 77 | /* |
@@ -118,24 +122,22 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) | |||
118 | physid_set(m->mpc_apicid, phys_cpu_present_map); | 122 | physid_set(m->mpc_apicid, phys_cpu_present_map); |
119 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { | 123 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { |
120 | /* | 124 | /* |
121 | * bios_cpu_apicid is required to have processors listed | 125 | * x86_bios_cpu_apicid is required to have processors listed |
122 | * in same order as logical cpu numbers. Hence the first | 126 | * in same order as logical cpu numbers. Hence the first |
123 | * entry is BSP, and so on. | 127 | * entry is BSP, and so on. |
124 | */ | 128 | */ |
125 | cpu = 0; | 129 | cpu = 0; |
126 | } | 130 | } |
127 | bios_cpu_apicid[cpu] = m->mpc_apicid; | 131 | /* are we being called early in kernel startup? */ |
128 | /* | 132 | if (x86_cpu_to_apicid_early_ptr) { |
129 | * We get called early in the the start_kernel initialization | 133 | u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; |
130 | * process when the per_cpu data area is not yet setup, so we | 134 | u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; |
131 | * use a static array that is removed after the per_cpu data | 135 | |
132 | * area is created. | 136 | cpu_to_apicid[cpu] = m->mpc_apicid; |
133 | */ | 137 | bios_cpu_apicid[cpu] = m->mpc_apicid; |
134 | if (x86_cpu_to_apicid_ptr) { | ||
135 | u8 *x86_cpu_to_apicid = (u8 *)x86_cpu_to_apicid_ptr; | ||
136 | x86_cpu_to_apicid[cpu] = m->mpc_apicid; | ||
137 | } else { | 138 | } else { |
138 | per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid; | 139 | per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid; |
140 | per_cpu(x86_bios_cpu_apicid, cpu) = m->mpc_apicid; | ||
139 | } | 141 | } |
140 | 142 | ||
141 | cpu_set(cpu, cpu_possible_map); | 143 | cpu_set(cpu, cpu_possible_map); |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index ee6eba4ecfea..af51ea8400b2 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* ----------------------------------------------------------------------- * | 1 | /* ----------------------------------------------------------------------- * |
2 | * | 2 | * |
3 | * Copyright 2000 H. Peter Anvin - All Rights Reserved | 3 | * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
@@ -45,9 +45,10 @@ static struct class *msr_class; | |||
45 | 45 | ||
46 | static loff_t msr_seek(struct file *file, loff_t offset, int orig) | 46 | static loff_t msr_seek(struct file *file, loff_t offset, int orig) |
47 | { | 47 | { |
48 | loff_t ret = -EINVAL; | 48 | loff_t ret; |
49 | struct inode *inode = file->f_mapping->host; | ||
49 | 50 | ||
50 | lock_kernel(); | 51 | mutex_lock(&inode->i_mutex); |
51 | switch (orig) { | 52 | switch (orig) { |
52 | case 0: | 53 | case 0: |
53 | file->f_pos = offset; | 54 | file->f_pos = offset; |
@@ -56,8 +57,11 @@ static loff_t msr_seek(struct file *file, loff_t offset, int orig) | |||
56 | case 1: | 57 | case 1: |
57 | file->f_pos += offset; | 58 | file->f_pos += offset; |
58 | ret = file->f_pos; | 59 | ret = file->f_pos; |
60 | break; | ||
61 | default: | ||
62 | ret = -EINVAL; | ||
59 | } | 63 | } |
60 | unlock_kernel(); | 64 | mutex_unlock(&inode->i_mutex); |
61 | return ret; | 65 | return ret; |
62 | } | 66 | } |
63 | 67 | ||
@@ -155,20 +159,20 @@ static int __cpuinit msr_class_cpu_callback(struct notifier_block *nfb, | |||
155 | 159 | ||
156 | switch (action) { | 160 | switch (action) { |
157 | case CPU_UP_PREPARE: | 161 | case CPU_UP_PREPARE: |
158 | case CPU_UP_PREPARE_FROZEN: | ||
159 | err = msr_device_create(cpu); | 162 | err = msr_device_create(cpu); |
160 | break; | 163 | break; |
161 | case CPU_UP_CANCELED: | 164 | case CPU_UP_CANCELED: |
162 | case CPU_UP_CANCELED_FROZEN: | ||
163 | case CPU_DEAD: | 165 | case CPU_DEAD: |
164 | case CPU_DEAD_FROZEN: | ||
165 | msr_device_destroy(cpu); | 166 | msr_device_destroy(cpu); |
166 | break; | 167 | break; |
168 | case CPU_UP_CANCELED_FROZEN: | ||
169 | destroy_suspended_device(msr_class, MKDEV(MSR_MAJOR, cpu)); | ||
170 | break; | ||
167 | } | 171 | } |
168 | return err ? NOTIFY_BAD : NOTIFY_OK; | 172 | return err ? NOTIFY_BAD : NOTIFY_OK; |
169 | } | 173 | } |
170 | 174 | ||
171 | static struct notifier_block __cpuinitdata msr_class_cpu_notifier = { | 175 | static struct notifier_block __refdata msr_class_cpu_notifier = { |
172 | .notifier_call = msr_class_cpu_callback, | 176 | .notifier_call = msr_class_cpu_callback, |
173 | }; | 177 | }; |
174 | 178 | ||
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 80ca72e5ac29..edd413650b3b 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c | |||
@@ -25,7 +25,6 @@ | |||
25 | 25 | ||
26 | #include <asm/smp.h> | 26 | #include <asm/smp.h> |
27 | #include <asm/nmi.h> | 27 | #include <asm/nmi.h> |
28 | #include <asm/timer.h> | ||
29 | 28 | ||
30 | #include "mach_traps.h" | 29 | #include "mach_traps.h" |
31 | 30 | ||
@@ -52,13 +51,13 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); | |||
52 | 51 | ||
53 | static int endflag __initdata = 0; | 52 | static int endflag __initdata = 0; |
54 | 53 | ||
54 | #ifdef CONFIG_SMP | ||
55 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when | 55 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when |
56 | * the CPU is idle. To make sure the NMI watchdog really ticks on all | 56 | * the CPU is idle. To make sure the NMI watchdog really ticks on all |
57 | * CPUs during the test make them busy. | 57 | * CPUs during the test make them busy. |
58 | */ | 58 | */ |
59 | static __init void nmi_cpu_busy(void *data) | 59 | static __init void nmi_cpu_busy(void *data) |
60 | { | 60 | { |
61 | #ifdef CONFIG_SMP | ||
62 | local_irq_enable_in_hardirq(); | 61 | local_irq_enable_in_hardirq(); |
63 | /* Intentionally don't use cpu_relax here. This is | 62 | /* Intentionally don't use cpu_relax here. This is |
64 | to make sure that the performance counter really ticks, | 63 | to make sure that the performance counter really ticks, |
@@ -68,8 +67,8 @@ static __init void nmi_cpu_busy(void *data) | |||
68 | care if they get somewhat less cycles. */ | 67 | care if they get somewhat less cycles. */ |
69 | while (endflag == 0) | 68 | while (endflag == 0) |
70 | mb(); | 69 | mb(); |
71 | #endif | ||
72 | } | 70 | } |
71 | #endif | ||
73 | 72 | ||
74 | static int __init check_nmi_watchdog(void) | 73 | static int __init check_nmi_watchdog(void) |
75 | { | 74 | { |
@@ -84,15 +83,17 @@ static int __init check_nmi_watchdog(void) | |||
84 | 83 | ||
85 | prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); | 84 | prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); |
86 | if (!prev_nmi_count) | 85 | if (!prev_nmi_count) |
87 | goto error; | 86 | return -1; |
88 | 87 | ||
89 | printk(KERN_INFO "Testing NMI watchdog ... "); | 88 | printk(KERN_INFO "Testing NMI watchdog ... "); |
90 | 89 | ||
90 | #ifdef CONFIG_SMP | ||
91 | if (nmi_watchdog == NMI_LOCAL_APIC) | 91 | if (nmi_watchdog == NMI_LOCAL_APIC) |
92 | smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); | 92 | smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); |
93 | #endif | ||
93 | 94 | ||
94 | for_each_possible_cpu(cpu) | 95 | for_each_possible_cpu(cpu) |
95 | prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; | 96 | prev_nmi_count[cpu] = nmi_count(cpu); |
96 | local_irq_enable(); | 97 | local_irq_enable(); |
97 | mdelay((20*1000)/nmi_hz); // wait 20 ticks | 98 | mdelay((20*1000)/nmi_hz); // wait 20 ticks |
98 | 99 | ||
@@ -119,7 +120,7 @@ static int __init check_nmi_watchdog(void) | |||
119 | if (!atomic_read(&nmi_active)) { | 120 | if (!atomic_read(&nmi_active)) { |
120 | kfree(prev_nmi_count); | 121 | kfree(prev_nmi_count); |
121 | atomic_set(&nmi_active, -1); | 122 | atomic_set(&nmi_active, -1); |
122 | goto error; | 123 | return -1; |
123 | } | 124 | } |
124 | printk("OK.\n"); | 125 | printk("OK.\n"); |
125 | 126 | ||
@@ -130,10 +131,6 @@ static int __init check_nmi_watchdog(void) | |||
130 | 131 | ||
131 | kfree(prev_nmi_count); | 132 | kfree(prev_nmi_count); |
132 | return 0; | 133 | return 0; |
133 | error: | ||
134 | timer_ack = !cpu_has_tsc; | ||
135 | |||
136 | return -1; | ||
137 | } | 134 | } |
138 | /* This needs to happen later in boot so counters are working */ | 135 | /* This needs to happen later in boot so counters are working */ |
139 | late_initcall(check_nmi_watchdog); | 136 | late_initcall(check_nmi_watchdog); |
@@ -181,7 +178,7 @@ static int lapic_nmi_resume(struct sys_device *dev) | |||
181 | 178 | ||
182 | 179 | ||
183 | static struct sysdev_class nmi_sysclass = { | 180 | static struct sysdev_class nmi_sysclass = { |
184 | set_kset_name("lapic_nmi"), | 181 | .name = "lapic_nmi", |
185 | .resume = lapic_nmi_resume, | 182 | .resume = lapic_nmi_resume, |
186 | .suspend = lapic_nmi_suspend, | 183 | .suspend = lapic_nmi_suspend, |
187 | }; | 184 | }; |
@@ -242,10 +239,10 @@ void acpi_nmi_disable(void) | |||
242 | on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); | 239 | on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); |
243 | } | 240 | } |
244 | 241 | ||
245 | void setup_apic_nmi_watchdog (void *unused) | 242 | void setup_apic_nmi_watchdog(void *unused) |
246 | { | 243 | { |
247 | if (__get_cpu_var(wd_enabled)) | 244 | if (__get_cpu_var(wd_enabled)) |
248 | return; | 245 | return; |
249 | 246 | ||
250 | /* cheap hack to support suspend/resume */ | 247 | /* cheap hack to support suspend/resume */ |
251 | /* if cpu0 is not active neither should the other cpus */ | 248 | /* if cpu0 is not active neither should the other cpus */ |
@@ -334,7 +331,7 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
334 | unsigned int sum; | 331 | unsigned int sum; |
335 | int touched = 0; | 332 | int touched = 0; |
336 | int cpu = smp_processor_id(); | 333 | int cpu = smp_processor_id(); |
337 | int rc=0; | 334 | int rc = 0; |
338 | 335 | ||
339 | /* check for other users first */ | 336 | /* check for other users first */ |
340 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | 337 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) |
diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c index 4253c4e8849c..fb99484d21cf 100644 --- a/arch/x86/kernel/nmi_64.c +++ b/arch/x86/kernel/nmi_64.c | |||
@@ -39,7 +39,7 @@ static cpumask_t backtrace_mask = CPU_MASK_NONE; | |||
39 | * 0: the lapic NMI watchdog is disabled, but can be enabled | 39 | * 0: the lapic NMI watchdog is disabled, but can be enabled |
40 | */ | 40 | */ |
41 | atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ | 41 | atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ |
42 | int panic_on_timeout; | 42 | static int panic_on_timeout; |
43 | 43 | ||
44 | unsigned int nmi_watchdog = NMI_DEFAULT; | 44 | unsigned int nmi_watchdog = NMI_DEFAULT; |
45 | static unsigned int nmi_hz = HZ; | 45 | static unsigned int nmi_hz = HZ; |
@@ -78,22 +78,22 @@ static __init void nmi_cpu_busy(void *data) | |||
78 | } | 78 | } |
79 | #endif | 79 | #endif |
80 | 80 | ||
81 | int __init check_nmi_watchdog (void) | 81 | int __init check_nmi_watchdog(void) |
82 | { | 82 | { |
83 | int *counts; | 83 | int *prev_nmi_count; |
84 | int cpu; | 84 | int cpu; |
85 | 85 | ||
86 | if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) | 86 | if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) |
87 | return 0; | 87 | return 0; |
88 | 88 | ||
89 | if (!atomic_read(&nmi_active)) | 89 | if (!atomic_read(&nmi_active)) |
90 | return 0; | 90 | return 0; |
91 | 91 | ||
92 | counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); | 92 | prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); |
93 | if (!counts) | 93 | if (!prev_nmi_count) |
94 | return -1; | 94 | return -1; |
95 | 95 | ||
96 | printk(KERN_INFO "testing NMI watchdog ... "); | 96 | printk(KERN_INFO "Testing NMI watchdog ... "); |
97 | 97 | ||
98 | #ifdef CONFIG_SMP | 98 | #ifdef CONFIG_SMP |
99 | if (nmi_watchdog == NMI_LOCAL_APIC) | 99 | if (nmi_watchdog == NMI_LOCAL_APIC) |
@@ -101,30 +101,29 @@ int __init check_nmi_watchdog (void) | |||
101 | #endif | 101 | #endif |
102 | 102 | ||
103 | for (cpu = 0; cpu < NR_CPUS; cpu++) | 103 | for (cpu = 0; cpu < NR_CPUS; cpu++) |
104 | counts[cpu] = cpu_pda(cpu)->__nmi_count; | 104 | prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count; |
105 | local_irq_enable(); | 105 | local_irq_enable(); |
106 | mdelay((20*1000)/nmi_hz); // wait 20 ticks | 106 | mdelay((20*1000)/nmi_hz); // wait 20 ticks |
107 | 107 | ||
108 | for_each_online_cpu(cpu) { | 108 | for_each_online_cpu(cpu) { |
109 | if (!per_cpu(wd_enabled, cpu)) | 109 | if (!per_cpu(wd_enabled, cpu)) |
110 | continue; | 110 | continue; |
111 | if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) { | 111 | if (cpu_pda(cpu)->__nmi_count - prev_nmi_count[cpu] <= 5) { |
112 | printk(KERN_WARNING "WARNING: CPU#%d: NMI " | 112 | printk(KERN_WARNING "WARNING: CPU#%d: NMI " |
113 | "appears to be stuck (%d->%d)!\n", | 113 | "appears to be stuck (%d->%d)!\n", |
114 | cpu, | 114 | cpu, |
115 | counts[cpu], | 115 | prev_nmi_count[cpu], |
116 | cpu_pda(cpu)->__nmi_count); | 116 | cpu_pda(cpu)->__nmi_count); |
117 | per_cpu(wd_enabled, cpu) = 0; | 117 | per_cpu(wd_enabled, cpu) = 0; |
118 | atomic_dec(&nmi_active); | 118 | atomic_dec(&nmi_active); |
119 | } | 119 | } |
120 | } | 120 | } |
121 | endflag = 1; | ||
121 | if (!atomic_read(&nmi_active)) { | 122 | if (!atomic_read(&nmi_active)) { |
122 | kfree(counts); | 123 | kfree(prev_nmi_count); |
123 | atomic_set(&nmi_active, -1); | 124 | atomic_set(&nmi_active, -1); |
124 | endflag = 1; | ||
125 | return -1; | 125 | return -1; |
126 | } | 126 | } |
127 | endflag = 1; | ||
128 | printk("OK.\n"); | 127 | printk("OK.\n"); |
129 | 128 | ||
130 | /* now that we know it works we can reduce NMI frequency to | 129 | /* now that we know it works we can reduce NMI frequency to |
@@ -132,11 +131,11 @@ int __init check_nmi_watchdog (void) | |||
132 | if (nmi_watchdog == NMI_LOCAL_APIC) | 131 | if (nmi_watchdog == NMI_LOCAL_APIC) |
133 | nmi_hz = lapic_adjust_nmi_hz(1); | 132 | nmi_hz = lapic_adjust_nmi_hz(1); |
134 | 133 | ||
135 | kfree(counts); | 134 | kfree(prev_nmi_count); |
136 | return 0; | 135 | return 0; |
137 | } | 136 | } |
138 | 137 | ||
139 | int __init setup_nmi_watchdog(char *str) | 138 | static int __init setup_nmi_watchdog(char *str) |
140 | { | 139 | { |
141 | int nmi; | 140 | int nmi; |
142 | 141 | ||
@@ -159,34 +158,6 @@ int __init setup_nmi_watchdog(char *str) | |||
159 | 158 | ||
160 | __setup("nmi_watchdog=", setup_nmi_watchdog); | 159 | __setup("nmi_watchdog=", setup_nmi_watchdog); |
161 | 160 | ||
162 | |||
163 | static void __acpi_nmi_disable(void *__unused) | ||
164 | { | ||
165 | apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | ||
166 | } | ||
167 | |||
168 | /* | ||
169 | * Disable timer based NMIs on all CPUs: | ||
170 | */ | ||
171 | void acpi_nmi_disable(void) | ||
172 | { | ||
173 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
174 | on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); | ||
175 | } | ||
176 | |||
177 | static void __acpi_nmi_enable(void *__unused) | ||
178 | { | ||
179 | apic_write(APIC_LVT0, APIC_DM_NMI); | ||
180 | } | ||
181 | |||
182 | /* | ||
183 | * Enable timer based NMIs on all CPUs: | ||
184 | */ | ||
185 | void acpi_nmi_enable(void) | ||
186 | { | ||
187 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
188 | on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); | ||
189 | } | ||
190 | #ifdef CONFIG_PM | 161 | #ifdef CONFIG_PM |
191 | 162 | ||
192 | static int nmi_pm_active; /* nmi_active before suspend */ | 163 | static int nmi_pm_active; /* nmi_active before suspend */ |
@@ -211,13 +182,13 @@ static int lapic_nmi_resume(struct sys_device *dev) | |||
211 | } | 182 | } |
212 | 183 | ||
213 | static struct sysdev_class nmi_sysclass = { | 184 | static struct sysdev_class nmi_sysclass = { |
214 | set_kset_name("lapic_nmi"), | 185 | .name = "lapic_nmi", |
215 | .resume = lapic_nmi_resume, | 186 | .resume = lapic_nmi_resume, |
216 | .suspend = lapic_nmi_suspend, | 187 | .suspend = lapic_nmi_suspend, |
217 | }; | 188 | }; |
218 | 189 | ||
219 | static struct sys_device device_lapic_nmi = { | 190 | static struct sys_device device_lapic_nmi = { |
220 | .id = 0, | 191 | .id = 0, |
221 | .cls = &nmi_sysclass, | 192 | .cls = &nmi_sysclass, |
222 | }; | 193 | }; |
223 | 194 | ||
@@ -231,7 +202,7 @@ static int __init init_lapic_nmi_sysfs(void) | |||
231 | if (nmi_watchdog != NMI_LOCAL_APIC) | 202 | if (nmi_watchdog != NMI_LOCAL_APIC) |
232 | return 0; | 203 | return 0; |
233 | 204 | ||
234 | if ( atomic_read(&nmi_active) < 0 ) | 205 | if (atomic_read(&nmi_active) < 0) |
235 | return 0; | 206 | return 0; |
236 | 207 | ||
237 | error = sysdev_class_register(&nmi_sysclass); | 208 | error = sysdev_class_register(&nmi_sysclass); |
@@ -244,9 +215,37 @@ late_initcall(init_lapic_nmi_sysfs); | |||
244 | 215 | ||
245 | #endif /* CONFIG_PM */ | 216 | #endif /* CONFIG_PM */ |
246 | 217 | ||
218 | static void __acpi_nmi_enable(void *__unused) | ||
219 | { | ||
220 | apic_write(APIC_LVT0, APIC_DM_NMI); | ||
221 | } | ||
222 | |||
223 | /* | ||
224 | * Enable timer based NMIs on all CPUs: | ||
225 | */ | ||
226 | void acpi_nmi_enable(void) | ||
227 | { | ||
228 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
229 | on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); | ||
230 | } | ||
231 | |||
232 | static void __acpi_nmi_disable(void *__unused) | ||
233 | { | ||
234 | apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | ||
235 | } | ||
236 | |||
237 | /* | ||
238 | * Disable timer based NMIs on all CPUs: | ||
239 | */ | ||
240 | void acpi_nmi_disable(void) | ||
241 | { | ||
242 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
243 | on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); | ||
244 | } | ||
245 | |||
247 | void setup_apic_nmi_watchdog(void *unused) | 246 | void setup_apic_nmi_watchdog(void *unused) |
248 | { | 247 | { |
249 | if (__get_cpu_var(wd_enabled) == 1) | 248 | if (__get_cpu_var(wd_enabled)) |
250 | return; | 249 | return; |
251 | 250 | ||
252 | /* cheap hack to support suspend/resume */ | 251 | /* cheap hack to support suspend/resume */ |
@@ -311,8 +310,9 @@ void touch_nmi_watchdog(void) | |||
311 | } | 310 | } |
312 | } | 311 | } |
313 | 312 | ||
314 | touch_softlockup_watchdog(); | 313 | touch_softlockup_watchdog(); |
315 | } | 314 | } |
315 | EXPORT_SYMBOL(touch_nmi_watchdog); | ||
316 | 316 | ||
317 | int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | 317 | int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) |
318 | { | 318 | { |
@@ -479,4 +479,3 @@ void __trigger_all_cpu_backtrace(void) | |||
479 | 479 | ||
480 | EXPORT_SYMBOL(nmi_active); | 480 | EXPORT_SYMBOL(nmi_active); |
481 | EXPORT_SYMBOL(nmi_watchdog); | 481 | EXPORT_SYMBOL(nmi_watchdog); |
482 | EXPORT_SYMBOL(touch_nmi_watchdog); | ||
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 9000d82c6dc0..e65281b1634b 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c | |||
@@ -82,7 +82,7 @@ static int __init numaq_tsc_disable(void) | |||
82 | { | 82 | { |
83 | if (num_online_nodes() > 1) { | 83 | if (num_online_nodes() > 1) { |
84 | printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); | 84 | printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); |
85 | tsc_disable = 1; | 85 | setup_clear_cpu_cap(X86_FEATURE_TSC); |
86 | } | 86 | } |
87 | return 0; | 87 | return 0; |
88 | } | 88 | } |
diff --git a/arch/x86/kernel/paravirt_32.c b/arch/x86/kernel/paravirt.c index f5000799f8ef..075962cc75ab 100644 --- a/arch/x86/kernel/paravirt_32.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -14,7 +14,10 @@ | |||
14 | You should have received a copy of the GNU General Public License | 14 | You should have received a copy of the GNU General Public License |
15 | along with this program; if not, write to the Free Software | 15 | along with this program; if not, write to the Free Software |
16 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | |||
18 | 2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc | ||
17 | */ | 19 | */ |
20 | |||
18 | #include <linux/errno.h> | 21 | #include <linux/errno.h> |
19 | #include <linux/module.h> | 22 | #include <linux/module.h> |
20 | #include <linux/efi.h> | 23 | #include <linux/efi.h> |
@@ -55,59 +58,9 @@ char *memory_setup(void) | |||
55 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ | 58 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ |
56 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") | 59 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") |
57 | 60 | ||
58 | DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); | ||
59 | DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); | ||
60 | DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); | ||
61 | DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); | ||
62 | DEF_NATIVE(pv_cpu_ops, iret, "iret"); | ||
63 | DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit"); | ||
64 | DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); | ||
65 | DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); | ||
66 | DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); | ||
67 | DEF_NATIVE(pv_cpu_ops, clts, "clts"); | ||
68 | DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); | ||
69 | |||
70 | /* Undefined instruction for dealing with missing ops pointers. */ | 61 | /* Undefined instruction for dealing with missing ops pointers. */ |
71 | static const unsigned char ud2a[] = { 0x0f, 0x0b }; | 62 | static const unsigned char ud2a[] = { 0x0f, 0x0b }; |
72 | 63 | ||
73 | static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | ||
74 | unsigned long addr, unsigned len) | ||
75 | { | ||
76 | const unsigned char *start, *end; | ||
77 | unsigned ret; | ||
78 | |||
79 | switch(type) { | ||
80 | #define SITE(ops, x) \ | ||
81 | case PARAVIRT_PATCH(ops.x): \ | ||
82 | start = start_##ops##_##x; \ | ||
83 | end = end_##ops##_##x; \ | ||
84 | goto patch_site | ||
85 | |||
86 | SITE(pv_irq_ops, irq_disable); | ||
87 | SITE(pv_irq_ops, irq_enable); | ||
88 | SITE(pv_irq_ops, restore_fl); | ||
89 | SITE(pv_irq_ops, save_fl); | ||
90 | SITE(pv_cpu_ops, iret); | ||
91 | SITE(pv_cpu_ops, irq_enable_sysexit); | ||
92 | SITE(pv_mmu_ops, read_cr2); | ||
93 | SITE(pv_mmu_ops, read_cr3); | ||
94 | SITE(pv_mmu_ops, write_cr3); | ||
95 | SITE(pv_cpu_ops, clts); | ||
96 | SITE(pv_cpu_ops, read_tsc); | ||
97 | #undef SITE | ||
98 | |||
99 | patch_site: | ||
100 | ret = paravirt_patch_insns(ibuf, len, start, end); | ||
101 | break; | ||
102 | |||
103 | default: | ||
104 | ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); | ||
105 | break; | ||
106 | } | ||
107 | |||
108 | return ret; | ||
109 | } | ||
110 | |||
111 | unsigned paravirt_patch_nop(void) | 64 | unsigned paravirt_patch_nop(void) |
112 | { | 65 | { |
113 | return 0; | 66 | return 0; |
@@ -186,7 +139,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, | |||
186 | /* If the operation is a nop, then nop the callsite */ | 139 | /* If the operation is a nop, then nop the callsite */ |
187 | ret = paravirt_patch_nop(); | 140 | ret = paravirt_patch_nop(); |
188 | else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || | 141 | else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || |
189 | type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit)) | 142 | type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret)) |
190 | /* If operation requires a jmp, then jmp */ | 143 | /* If operation requires a jmp, then jmp */ |
191 | ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); | 144 | ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); |
192 | else | 145 | else |
@@ -237,7 +190,7 @@ static void native_flush_tlb_single(unsigned long addr) | |||
237 | 190 | ||
238 | /* These are in entry.S */ | 191 | /* These are in entry.S */ |
239 | extern void native_iret(void); | 192 | extern void native_iret(void); |
240 | extern void native_irq_enable_sysexit(void); | 193 | extern void native_irq_enable_syscall_ret(void); |
241 | 194 | ||
242 | static int __init print_banner(void) | 195 | static int __init print_banner(void) |
243 | { | 196 | { |
@@ -285,18 +238,18 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA | |||
285 | 238 | ||
286 | static inline void enter_lazy(enum paravirt_lazy_mode mode) | 239 | static inline void enter_lazy(enum paravirt_lazy_mode mode) |
287 | { | 240 | { |
288 | BUG_ON(x86_read_percpu(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); | 241 | BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); |
289 | BUG_ON(preemptible()); | 242 | BUG_ON(preemptible()); |
290 | 243 | ||
291 | x86_write_percpu(paravirt_lazy_mode, mode); | 244 | __get_cpu_var(paravirt_lazy_mode) = mode; |
292 | } | 245 | } |
293 | 246 | ||
294 | void paravirt_leave_lazy(enum paravirt_lazy_mode mode) | 247 | void paravirt_leave_lazy(enum paravirt_lazy_mode mode) |
295 | { | 248 | { |
296 | BUG_ON(x86_read_percpu(paravirt_lazy_mode) != mode); | 249 | BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode); |
297 | BUG_ON(preemptible()); | 250 | BUG_ON(preemptible()); |
298 | 251 | ||
299 | x86_write_percpu(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); | 252 | __get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; |
300 | } | 253 | } |
301 | 254 | ||
302 | void paravirt_enter_lazy_mmu(void) | 255 | void paravirt_enter_lazy_mmu(void) |
@@ -321,7 +274,7 @@ void paravirt_leave_lazy_cpu(void) | |||
321 | 274 | ||
322 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void) | 275 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void) |
323 | { | 276 | { |
324 | return x86_read_percpu(paravirt_lazy_mode); | 277 | return __get_cpu_var(paravirt_lazy_mode); |
325 | } | 278 | } |
326 | 279 | ||
327 | struct pv_info pv_info = { | 280 | struct pv_info pv_info = { |
@@ -366,11 +319,16 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
366 | .read_cr4 = native_read_cr4, | 319 | .read_cr4 = native_read_cr4, |
367 | .read_cr4_safe = native_read_cr4_safe, | 320 | .read_cr4_safe = native_read_cr4_safe, |
368 | .write_cr4 = native_write_cr4, | 321 | .write_cr4 = native_write_cr4, |
322 | #ifdef CONFIG_X86_64 | ||
323 | .read_cr8 = native_read_cr8, | ||
324 | .write_cr8 = native_write_cr8, | ||
325 | #endif | ||
369 | .wbinvd = native_wbinvd, | 326 | .wbinvd = native_wbinvd, |
370 | .read_msr = native_read_msr_safe, | 327 | .read_msr = native_read_msr_safe, |
371 | .write_msr = native_write_msr_safe, | 328 | .write_msr = native_write_msr_safe, |
372 | .read_tsc = native_read_tsc, | 329 | .read_tsc = native_read_tsc, |
373 | .read_pmc = native_read_pmc, | 330 | .read_pmc = native_read_pmc, |
331 | .read_tscp = native_read_tscp, | ||
374 | .load_tr_desc = native_load_tr_desc, | 332 | .load_tr_desc = native_load_tr_desc, |
375 | .set_ldt = native_set_ldt, | 333 | .set_ldt = native_set_ldt, |
376 | .load_gdt = native_load_gdt, | 334 | .load_gdt = native_load_gdt, |
@@ -379,13 +337,14 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
379 | .store_idt = native_store_idt, | 337 | .store_idt = native_store_idt, |
380 | .store_tr = native_store_tr, | 338 | .store_tr = native_store_tr, |
381 | .load_tls = native_load_tls, | 339 | .load_tls = native_load_tls, |
382 | .write_ldt_entry = write_dt_entry, | 340 | .write_ldt_entry = native_write_ldt_entry, |
383 | .write_gdt_entry = write_dt_entry, | 341 | .write_gdt_entry = native_write_gdt_entry, |
384 | .write_idt_entry = write_dt_entry, | 342 | .write_idt_entry = native_write_idt_entry, |
385 | .load_esp0 = native_load_esp0, | 343 | .load_sp0 = native_load_sp0, |
386 | 344 | ||
387 | .irq_enable_sysexit = native_irq_enable_sysexit, | 345 | .irq_enable_syscall_ret = native_irq_enable_syscall_ret, |
388 | .iret = native_iret, | 346 | .iret = native_iret, |
347 | .swapgs = native_swapgs, | ||
389 | 348 | ||
390 | .set_iopl_mask = native_set_iopl_mask, | 349 | .set_iopl_mask = native_set_iopl_mask, |
391 | .io_delay = native_io_delay, | 350 | .io_delay = native_io_delay, |
@@ -408,8 +367,10 @@ struct pv_apic_ops pv_apic_ops = { | |||
408 | }; | 367 | }; |
409 | 368 | ||
410 | struct pv_mmu_ops pv_mmu_ops = { | 369 | struct pv_mmu_ops pv_mmu_ops = { |
370 | #ifndef CONFIG_X86_64 | ||
411 | .pagetable_setup_start = native_pagetable_setup_start, | 371 | .pagetable_setup_start = native_pagetable_setup_start, |
412 | .pagetable_setup_done = native_pagetable_setup_done, | 372 | .pagetable_setup_done = native_pagetable_setup_done, |
373 | #endif | ||
413 | 374 | ||
414 | .read_cr2 = native_read_cr2, | 375 | .read_cr2 = native_read_cr2, |
415 | .write_cr2 = native_write_cr2, | 376 | .write_cr2 = native_write_cr2, |
@@ -437,16 +398,23 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
437 | .kmap_atomic_pte = kmap_atomic, | 398 | .kmap_atomic_pte = kmap_atomic, |
438 | #endif | 399 | #endif |
439 | 400 | ||
401 | #if PAGETABLE_LEVELS >= 3 | ||
440 | #ifdef CONFIG_X86_PAE | 402 | #ifdef CONFIG_X86_PAE |
441 | .set_pte_atomic = native_set_pte_atomic, | 403 | .set_pte_atomic = native_set_pte_atomic, |
442 | .set_pte_present = native_set_pte_present, | 404 | .set_pte_present = native_set_pte_present, |
443 | .set_pud = native_set_pud, | ||
444 | .pte_clear = native_pte_clear, | 405 | .pte_clear = native_pte_clear, |
445 | .pmd_clear = native_pmd_clear, | 406 | .pmd_clear = native_pmd_clear, |
446 | 407 | #endif | |
408 | .set_pud = native_set_pud, | ||
447 | .pmd_val = native_pmd_val, | 409 | .pmd_val = native_pmd_val, |
448 | .make_pmd = native_make_pmd, | 410 | .make_pmd = native_make_pmd, |
411 | |||
412 | #if PAGETABLE_LEVELS == 4 | ||
413 | .pud_val = native_pud_val, | ||
414 | .make_pud = native_make_pud, | ||
415 | .set_pgd = native_set_pgd, | ||
449 | #endif | 416 | #endif |
417 | #endif /* PAGETABLE_LEVELS >= 3 */ | ||
450 | 418 | ||
451 | .pte_val = native_pte_val, | 419 | .pte_val = native_pte_val, |
452 | .pgd_val = native_pgd_val, | 420 | .pgd_val = native_pgd_val, |
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c new file mode 100644 index 000000000000..82fc5fcab4f4 --- /dev/null +++ b/arch/x86/kernel/paravirt_patch_32.c | |||
@@ -0,0 +1,49 @@ | |||
1 | #include <asm/paravirt.h> | ||
2 | |||
3 | DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); | ||
4 | DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); | ||
5 | DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); | ||
6 | DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); | ||
7 | DEF_NATIVE(pv_cpu_ops, iret, "iret"); | ||
8 | DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "sti; sysexit"); | ||
9 | DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); | ||
10 | DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); | ||
11 | DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); | ||
12 | DEF_NATIVE(pv_cpu_ops, clts, "clts"); | ||
13 | DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); | ||
14 | |||
15 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | ||
16 | unsigned long addr, unsigned len) | ||
17 | { | ||
18 | const unsigned char *start, *end; | ||
19 | unsigned ret; | ||
20 | |||
21 | #define PATCH_SITE(ops, x) \ | ||
22 | case PARAVIRT_PATCH(ops.x): \ | ||
23 | start = start_##ops##_##x; \ | ||
24 | end = end_##ops##_##x; \ | ||
25 | goto patch_site | ||
26 | switch(type) { | ||
27 | PATCH_SITE(pv_irq_ops, irq_disable); | ||
28 | PATCH_SITE(pv_irq_ops, irq_enable); | ||
29 | PATCH_SITE(pv_irq_ops, restore_fl); | ||
30 | PATCH_SITE(pv_irq_ops, save_fl); | ||
31 | PATCH_SITE(pv_cpu_ops, iret); | ||
32 | PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret); | ||
33 | PATCH_SITE(pv_mmu_ops, read_cr2); | ||
34 | PATCH_SITE(pv_mmu_ops, read_cr3); | ||
35 | PATCH_SITE(pv_mmu_ops, write_cr3); | ||
36 | PATCH_SITE(pv_cpu_ops, clts); | ||
37 | PATCH_SITE(pv_cpu_ops, read_tsc); | ||
38 | |||
39 | patch_site: | ||
40 | ret = paravirt_patch_insns(ibuf, len, start, end); | ||
41 | break; | ||
42 | |||
43 | default: | ||
44 | ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); | ||
45 | break; | ||
46 | } | ||
47 | #undef PATCH_SITE | ||
48 | return ret; | ||
49 | } | ||
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c new file mode 100644 index 000000000000..7d904e138d7e --- /dev/null +++ b/arch/x86/kernel/paravirt_patch_64.c | |||
@@ -0,0 +1,57 @@ | |||
1 | #include <asm/paravirt.h> | ||
2 | #include <asm/asm-offsets.h> | ||
3 | #include <linux/stringify.h> | ||
4 | |||
5 | DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); | ||
6 | DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); | ||
7 | DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq"); | ||
8 | DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); | ||
9 | DEF_NATIVE(pv_cpu_ops, iret, "iretq"); | ||
10 | DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); | ||
11 | DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); | ||
12 | DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); | ||
13 | DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); | ||
14 | DEF_NATIVE(pv_cpu_ops, clts, "clts"); | ||
15 | DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); | ||
16 | |||
17 | /* the three commands give us more control to how to return from a syscall */ | ||
18 | DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "movq %gs:" __stringify(pda_oldrsp) ", %rsp; swapgs; sysretq;"); | ||
19 | DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); | ||
20 | |||
21 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | ||
22 | unsigned long addr, unsigned len) | ||
23 | { | ||
24 | const unsigned char *start, *end; | ||
25 | unsigned ret; | ||
26 | |||
27 | #define PATCH_SITE(ops, x) \ | ||
28 | case PARAVIRT_PATCH(ops.x): \ | ||
29 | start = start_##ops##_##x; \ | ||
30 | end = end_##ops##_##x; \ | ||
31 | goto patch_site | ||
32 | switch(type) { | ||
33 | PATCH_SITE(pv_irq_ops, restore_fl); | ||
34 | PATCH_SITE(pv_irq_ops, save_fl); | ||
35 | PATCH_SITE(pv_irq_ops, irq_enable); | ||
36 | PATCH_SITE(pv_irq_ops, irq_disable); | ||
37 | PATCH_SITE(pv_cpu_ops, iret); | ||
38 | PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret); | ||
39 | PATCH_SITE(pv_cpu_ops, swapgs); | ||
40 | PATCH_SITE(pv_mmu_ops, read_cr2); | ||
41 | PATCH_SITE(pv_mmu_ops, read_cr3); | ||
42 | PATCH_SITE(pv_mmu_ops, write_cr3); | ||
43 | PATCH_SITE(pv_cpu_ops, clts); | ||
44 | PATCH_SITE(pv_mmu_ops, flush_tlb_single); | ||
45 | PATCH_SITE(pv_cpu_ops, wbinvd); | ||
46 | |||
47 | patch_site: | ||
48 | ret = paravirt_patch_insns(ibuf, len, start, end); | ||
49 | break; | ||
50 | |||
51 | default: | ||
52 | ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); | ||
53 | break; | ||
54 | } | ||
55 | #undef PATCH_SITE | ||
56 | return ret; | ||
57 | } | ||
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 6bf1f716909d..1b5464c2434f 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
@@ -30,12 +30,12 @@ | |||
30 | #include <linux/spinlock.h> | 30 | #include <linux/spinlock.h> |
31 | #include <linux/string.h> | 31 | #include <linux/string.h> |
32 | #include <linux/dma-mapping.h> | 32 | #include <linux/dma-mapping.h> |
33 | #include <linux/init.h> | ||
34 | #include <linux/bitops.h> | 33 | #include <linux/bitops.h> |
35 | #include <linux/pci_ids.h> | 34 | #include <linux/pci_ids.h> |
36 | #include <linux/pci.h> | 35 | #include <linux/pci.h> |
37 | #include <linux/delay.h> | 36 | #include <linux/delay.h> |
38 | #include <linux/scatterlist.h> | 37 | #include <linux/scatterlist.h> |
38 | #include <linux/iommu-helper.h> | ||
39 | #include <asm/gart.h> | 39 | #include <asm/gart.h> |
40 | #include <asm/calgary.h> | 40 | #include <asm/calgary.h> |
41 | #include <asm/tce.h> | 41 | #include <asm/tce.h> |
@@ -183,7 +183,7 @@ static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, }; | |||
183 | 183 | ||
184 | /* enable this to stress test the chip's TCE cache */ | 184 | /* enable this to stress test the chip's TCE cache */ |
185 | #ifdef CONFIG_IOMMU_DEBUG | 185 | #ifdef CONFIG_IOMMU_DEBUG |
186 | int debugging __read_mostly = 1; | 186 | static int debugging = 1; |
187 | 187 | ||
188 | static inline unsigned long verify_bit_range(unsigned long* bitmap, | 188 | static inline unsigned long verify_bit_range(unsigned long* bitmap, |
189 | int expected, unsigned long start, unsigned long end) | 189 | int expected, unsigned long start, unsigned long end) |
@@ -202,7 +202,7 @@ static inline unsigned long verify_bit_range(unsigned long* bitmap, | |||
202 | return ~0UL; | 202 | return ~0UL; |
203 | } | 203 | } |
204 | #else /* debugging is disabled */ | 204 | #else /* debugging is disabled */ |
205 | int debugging __read_mostly = 0; | 205 | static int debugging; |
206 | 206 | ||
207 | static inline unsigned long verify_bit_range(unsigned long* bitmap, | 207 | static inline unsigned long verify_bit_range(unsigned long* bitmap, |
208 | int expected, unsigned long start, unsigned long end) | 208 | int expected, unsigned long start, unsigned long end) |
@@ -261,22 +261,28 @@ static void iommu_range_reserve(struct iommu_table *tbl, | |||
261 | spin_unlock_irqrestore(&tbl->it_lock, flags); | 261 | spin_unlock_irqrestore(&tbl->it_lock, flags); |
262 | } | 262 | } |
263 | 263 | ||
264 | static unsigned long iommu_range_alloc(struct iommu_table *tbl, | 264 | static unsigned long iommu_range_alloc(struct device *dev, |
265 | unsigned int npages) | 265 | struct iommu_table *tbl, |
266 | unsigned int npages) | ||
266 | { | 267 | { |
267 | unsigned long flags; | 268 | unsigned long flags; |
268 | unsigned long offset; | 269 | unsigned long offset; |
270 | unsigned long boundary_size; | ||
271 | |||
272 | boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, | ||
273 | PAGE_SIZE) >> PAGE_SHIFT; | ||
269 | 274 | ||
270 | BUG_ON(npages == 0); | 275 | BUG_ON(npages == 0); |
271 | 276 | ||
272 | spin_lock_irqsave(&tbl->it_lock, flags); | 277 | spin_lock_irqsave(&tbl->it_lock, flags); |
273 | 278 | ||
274 | offset = find_next_zero_string(tbl->it_map, tbl->it_hint, | 279 | offset = iommu_area_alloc(tbl->it_map, tbl->it_size, tbl->it_hint, |
275 | tbl->it_size, npages); | 280 | npages, 0, boundary_size, 0); |
276 | if (offset == ~0UL) { | 281 | if (offset == ~0UL) { |
277 | tbl->chip_ops->tce_cache_blast(tbl); | 282 | tbl->chip_ops->tce_cache_blast(tbl); |
278 | offset = find_next_zero_string(tbl->it_map, 0, | 283 | |
279 | tbl->it_size, npages); | 284 | offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0, |
285 | npages, 0, boundary_size, 0); | ||
280 | if (offset == ~0UL) { | 286 | if (offset == ~0UL) { |
281 | printk(KERN_WARNING "Calgary: IOMMU full.\n"); | 287 | printk(KERN_WARNING "Calgary: IOMMU full.\n"); |
282 | spin_unlock_irqrestore(&tbl->it_lock, flags); | 288 | spin_unlock_irqrestore(&tbl->it_lock, flags); |
@@ -287,7 +293,6 @@ static unsigned long iommu_range_alloc(struct iommu_table *tbl, | |||
287 | } | 293 | } |
288 | } | 294 | } |
289 | 295 | ||
290 | set_bit_string(tbl->it_map, offset, npages); | ||
291 | tbl->it_hint = offset + npages; | 296 | tbl->it_hint = offset + npages; |
292 | BUG_ON(tbl->it_hint > tbl->it_size); | 297 | BUG_ON(tbl->it_hint > tbl->it_size); |
293 | 298 | ||
@@ -296,13 +301,13 @@ static unsigned long iommu_range_alloc(struct iommu_table *tbl, | |||
296 | return offset; | 301 | return offset; |
297 | } | 302 | } |
298 | 303 | ||
299 | static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *vaddr, | 304 | static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, |
300 | unsigned int npages, int direction) | 305 | void *vaddr, unsigned int npages, int direction) |
301 | { | 306 | { |
302 | unsigned long entry; | 307 | unsigned long entry; |
303 | dma_addr_t ret = bad_dma_address; | 308 | dma_addr_t ret = bad_dma_address; |
304 | 309 | ||
305 | entry = iommu_range_alloc(tbl, npages); | 310 | entry = iommu_range_alloc(dev, tbl, npages); |
306 | 311 | ||
307 | if (unlikely(entry == bad_dma_address)) | 312 | if (unlikely(entry == bad_dma_address)) |
308 | goto error; | 313 | goto error; |
@@ -355,7 +360,7 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | |||
355 | badbit, tbl, dma_addr, entry, npages); | 360 | badbit, tbl, dma_addr, entry, npages); |
356 | } | 361 | } |
357 | 362 | ||
358 | __clear_bit_string(tbl->it_map, entry, npages); | 363 | iommu_area_free(tbl->it_map, entry, npages); |
359 | 364 | ||
360 | spin_unlock_irqrestore(&tbl->it_lock, flags); | 365 | spin_unlock_irqrestore(&tbl->it_lock, flags); |
361 | } | 366 | } |
@@ -439,7 +444,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, | |||
439 | vaddr = (unsigned long) sg_virt(s); | 444 | vaddr = (unsigned long) sg_virt(s); |
440 | npages = num_dma_pages(vaddr, s->length); | 445 | npages = num_dma_pages(vaddr, s->length); |
441 | 446 | ||
442 | entry = iommu_range_alloc(tbl, npages); | 447 | entry = iommu_range_alloc(dev, tbl, npages); |
443 | if (entry == bad_dma_address) { | 448 | if (entry == bad_dma_address) { |
444 | /* makes sure unmap knows to stop */ | 449 | /* makes sure unmap knows to stop */ |
445 | s->dma_length = 0; | 450 | s->dma_length = 0; |
@@ -477,7 +482,7 @@ static dma_addr_t calgary_map_single(struct device *dev, void *vaddr, | |||
477 | npages = num_dma_pages(uaddr, size); | 482 | npages = num_dma_pages(uaddr, size); |
478 | 483 | ||
479 | if (translation_enabled(tbl)) | 484 | if (translation_enabled(tbl)) |
480 | dma_handle = iommu_alloc(tbl, vaddr, npages, direction); | 485 | dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction); |
481 | else | 486 | else |
482 | dma_handle = virt_to_bus(vaddr); | 487 | dma_handle = virt_to_bus(vaddr); |
483 | 488 | ||
@@ -517,7 +522,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, | |||
517 | 522 | ||
518 | if (translation_enabled(tbl)) { | 523 | if (translation_enabled(tbl)) { |
519 | /* set up tces to cover the allocated range */ | 524 | /* set up tces to cover the allocated range */ |
520 | mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL); | 525 | mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); |
521 | if (mapping == bad_dma_address) | 526 | if (mapping == bad_dma_address) |
522 | goto free; | 527 | goto free; |
523 | 528 | ||
@@ -1007,7 +1012,7 @@ static void __init calgary_set_split_completion_timeout(void __iomem *bbar, | |||
1007 | readq(target); /* flush */ | 1012 | readq(target); /* flush */ |
1008 | } | 1013 | } |
1009 | 1014 | ||
1010 | static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev) | 1015 | static void __init calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev) |
1011 | { | 1016 | { |
1012 | unsigned char busnum = dev->bus->number; | 1017 | unsigned char busnum = dev->bus->number; |
1013 | void __iomem *bbar = tbl->bbar; | 1018 | void __iomem *bbar = tbl->bbar; |
@@ -1023,7 +1028,7 @@ static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev) | |||
1023 | writel(cpu_to_be32(val), target); | 1028 | writel(cpu_to_be32(val), target); |
1024 | } | 1029 | } |
1025 | 1030 | ||
1026 | static void calgary_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev) | 1031 | static void __init calgary_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev) |
1027 | { | 1032 | { |
1028 | unsigned char busnum = dev->bus->number; | 1033 | unsigned char busnum = dev->bus->number; |
1029 | 1034 | ||
diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 5552d23d23c2..a82473d192a3 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <asm/calgary.h> | 13 | #include <asm/calgary.h> |
14 | 14 | ||
15 | int iommu_merge __read_mostly = 0; | 15 | int iommu_merge __read_mostly = 0; |
16 | EXPORT_SYMBOL(iommu_merge); | ||
17 | 16 | ||
18 | dma_addr_t bad_dma_address __read_mostly; | 17 | dma_addr_t bad_dma_address __read_mostly; |
19 | EXPORT_SYMBOL(bad_dma_address); | 18 | EXPORT_SYMBOL(bad_dma_address); |
@@ -230,7 +229,7 @@ EXPORT_SYMBOL(dma_set_mask); | |||
230 | * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter | 229 | * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter |
231 | * documentation. | 230 | * documentation. |
232 | */ | 231 | */ |
233 | __init int iommu_setup(char *p) | 232 | static __init int iommu_setup(char *p) |
234 | { | 233 | { |
235 | iommu_merge = 1; | 234 | iommu_merge = 1; |
236 | 235 | ||
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 06bcba536045..65f6acb025c8 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -1,12 +1,12 @@ | |||
1 | /* | 1 | /* |
2 | * Dynamic DMA mapping support for AMD Hammer. | 2 | * Dynamic DMA mapping support for AMD Hammer. |
3 | * | 3 | * |
4 | * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI. | 4 | * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI. |
5 | * This allows to use PCI devices that only support 32bit addresses on systems | 5 | * This allows to use PCI devices that only support 32bit addresses on systems |
6 | * with more than 4GB. | 6 | * with more than 4GB. |
7 | * | 7 | * |
8 | * See Documentation/DMA-mapping.txt for the interface specification. | 8 | * See Documentation/DMA-mapping.txt for the interface specification. |
9 | * | 9 | * |
10 | * Copyright 2002 Andi Kleen, SuSE Labs. | 10 | * Copyright 2002 Andi Kleen, SuSE Labs. |
11 | * Subject to the GNU General Public License v2 only. | 11 | * Subject to the GNU General Public License v2 only. |
12 | */ | 12 | */ |
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/bitops.h> | 25 | #include <linux/bitops.h> |
26 | #include <linux/kdebug.h> | 26 | #include <linux/kdebug.h> |
27 | #include <linux/scatterlist.h> | 27 | #include <linux/scatterlist.h> |
28 | #include <linux/iommu-helper.h> | ||
28 | #include <asm/atomic.h> | 29 | #include <asm/atomic.h> |
29 | #include <asm/io.h> | 30 | #include <asm/io.h> |
30 | #include <asm/mtrr.h> | 31 | #include <asm/mtrr.h> |
@@ -37,23 +38,26 @@ | |||
37 | #include <asm/k8.h> | 38 | #include <asm/k8.h> |
38 | 39 | ||
39 | static unsigned long iommu_bus_base; /* GART remapping area (physical) */ | 40 | static unsigned long iommu_bus_base; /* GART remapping area (physical) */ |
40 | static unsigned long iommu_size; /* size of remapping area bytes */ | 41 | static unsigned long iommu_size; /* size of remapping area bytes */ |
41 | static unsigned long iommu_pages; /* .. and in pages */ | 42 | static unsigned long iommu_pages; /* .. and in pages */ |
42 | 43 | ||
43 | static u32 *iommu_gatt_base; /* Remapping table */ | 44 | static u32 *iommu_gatt_base; /* Remapping table */ |
44 | 45 | ||
45 | /* If this is disabled the IOMMU will use an optimized flushing strategy | 46 | /* |
46 | of only flushing when an mapping is reused. With it true the GART is flushed | 47 | * If this is disabled the IOMMU will use an optimized flushing strategy |
47 | for every mapping. Problem is that doing the lazy flush seems to trigger | 48 | * of only flushing when an mapping is reused. With it true the GART is |
48 | bugs with some popular PCI cards, in particular 3ware (but has been also | 49 | * flushed for every mapping. Problem is that doing the lazy flush seems |
49 | also seen with Qlogic at least). */ | 50 | * to trigger bugs with some popular PCI cards, in particular 3ware (but |
51 | * has been also also seen with Qlogic at least). | ||
52 | */ | ||
50 | int iommu_fullflush = 1; | 53 | int iommu_fullflush = 1; |
51 | 54 | ||
52 | /* Allocation bitmap for the remapping area */ | 55 | /* Allocation bitmap for the remapping area: */ |
53 | static DEFINE_SPINLOCK(iommu_bitmap_lock); | 56 | static DEFINE_SPINLOCK(iommu_bitmap_lock); |
54 | static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ | 57 | /* Guarded by iommu_bitmap_lock: */ |
58 | static unsigned long *iommu_gart_bitmap; | ||
55 | 59 | ||
56 | static u32 gart_unmapped_entry; | 60 | static u32 gart_unmapped_entry; |
57 | 61 | ||
58 | #define GPTE_VALID 1 | 62 | #define GPTE_VALID 1 |
59 | #define GPTE_COHERENT 2 | 63 | #define GPTE_COHERENT 2 |
@@ -61,10 +65,10 @@ static u32 gart_unmapped_entry; | |||
61 | (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) | 65 | (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) |
62 | #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) | 66 | #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) |
63 | 67 | ||
64 | #define to_pages(addr,size) \ | 68 | #define to_pages(addr, size) \ |
65 | (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) | 69 | (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) |
66 | 70 | ||
67 | #define EMERGENCY_PAGES 32 /* = 128KB */ | 71 | #define EMERGENCY_PAGES 32 /* = 128KB */ |
68 | 72 | ||
69 | #ifdef CONFIG_AGP | 73 | #ifdef CONFIG_AGP |
70 | #define AGPEXTERN extern | 74 | #define AGPEXTERN extern |
@@ -77,130 +81,159 @@ AGPEXTERN int agp_memory_reserved; | |||
77 | AGPEXTERN __u32 *agp_gatt_table; | 81 | AGPEXTERN __u32 *agp_gatt_table; |
78 | 82 | ||
79 | static unsigned long next_bit; /* protected by iommu_bitmap_lock */ | 83 | static unsigned long next_bit; /* protected by iommu_bitmap_lock */ |
80 | static int need_flush; /* global flush state. set for each gart wrap */ | 84 | static int need_flush; /* global flush state. set for each gart wrap */ |
81 | 85 | ||
82 | static unsigned long alloc_iommu(int size) | 86 | static unsigned long alloc_iommu(struct device *dev, int size) |
83 | { | 87 | { |
84 | unsigned long offset, flags; | 88 | unsigned long offset, flags; |
89 | unsigned long boundary_size; | ||
90 | unsigned long base_index; | ||
91 | |||
92 | base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), | ||
93 | PAGE_SIZE) >> PAGE_SHIFT; | ||
94 | boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, | ||
95 | PAGE_SIZE) >> PAGE_SHIFT; | ||
85 | 96 | ||
86 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | 97 | spin_lock_irqsave(&iommu_bitmap_lock, flags); |
87 | offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size); | 98 | offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, |
99 | size, base_index, boundary_size, 0); | ||
88 | if (offset == -1) { | 100 | if (offset == -1) { |
89 | need_flush = 1; | 101 | need_flush = 1; |
90 | offset = find_next_zero_string(iommu_gart_bitmap,0,iommu_pages,size); | 102 | offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, |
103 | size, base_index, boundary_size, 0); | ||
91 | } | 104 | } |
92 | if (offset != -1) { | 105 | if (offset != -1) { |
93 | set_bit_string(iommu_gart_bitmap, offset, size); | 106 | set_bit_string(iommu_gart_bitmap, offset, size); |
94 | next_bit = offset+size; | 107 | next_bit = offset+size; |
95 | if (next_bit >= iommu_pages) { | 108 | if (next_bit >= iommu_pages) { |
96 | next_bit = 0; | 109 | next_bit = 0; |
97 | need_flush = 1; | 110 | need_flush = 1; |
98 | } | 111 | } |
99 | } | 112 | } |
100 | if (iommu_fullflush) | 113 | if (iommu_fullflush) |
101 | need_flush = 1; | 114 | need_flush = 1; |
102 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | 115 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); |
116 | |||
103 | return offset; | 117 | return offset; |
104 | } | 118 | } |
105 | 119 | ||
106 | static void free_iommu(unsigned long offset, int size) | 120 | static void free_iommu(unsigned long offset, int size) |
107 | { | 121 | { |
108 | unsigned long flags; | 122 | unsigned long flags; |
123 | |||
109 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | 124 | spin_lock_irqsave(&iommu_bitmap_lock, flags); |
110 | __clear_bit_string(iommu_gart_bitmap, offset, size); | 125 | iommu_area_free(iommu_gart_bitmap, offset, size); |
111 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | 126 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); |
112 | } | 127 | } |
113 | 128 | ||
114 | /* | 129 | /* |
115 | * Use global flush state to avoid races with multiple flushers. | 130 | * Use global flush state to avoid races with multiple flushers. |
116 | */ | 131 | */ |
117 | static void flush_gart(void) | 132 | static void flush_gart(void) |
118 | { | 133 | { |
119 | unsigned long flags; | 134 | unsigned long flags; |
135 | |||
120 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | 136 | spin_lock_irqsave(&iommu_bitmap_lock, flags); |
121 | if (need_flush) { | 137 | if (need_flush) { |
122 | k8_flush_garts(); | 138 | k8_flush_garts(); |
123 | need_flush = 0; | 139 | need_flush = 0; |
124 | } | 140 | } |
125 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | 141 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); |
126 | } | 142 | } |
127 | 143 | ||
128 | #ifdef CONFIG_IOMMU_LEAK | 144 | #ifdef CONFIG_IOMMU_LEAK |
129 | 145 | ||
130 | #define SET_LEAK(x) if (iommu_leak_tab) \ | 146 | #define SET_LEAK(x) \ |
131 | iommu_leak_tab[x] = __builtin_return_address(0); | 147 | do { \ |
132 | #define CLEAR_LEAK(x) if (iommu_leak_tab) \ | 148 | if (iommu_leak_tab) \ |
133 | iommu_leak_tab[x] = NULL; | 149 | iommu_leak_tab[x] = __builtin_return_address(0);\ |
150 | } while (0) | ||
151 | |||
152 | #define CLEAR_LEAK(x) \ | ||
153 | do { \ | ||
154 | if (iommu_leak_tab) \ | ||
155 | iommu_leak_tab[x] = NULL; \ | ||
156 | } while (0) | ||
134 | 157 | ||
135 | /* Debugging aid for drivers that don't free their IOMMU tables */ | 158 | /* Debugging aid for drivers that don't free their IOMMU tables */ |
136 | static void **iommu_leak_tab; | 159 | static void **iommu_leak_tab; |
137 | static int leak_trace; | 160 | static int leak_trace; |
138 | static int iommu_leak_pages = 20; | 161 | static int iommu_leak_pages = 20; |
162 | |||
139 | static void dump_leak(void) | 163 | static void dump_leak(void) |
140 | { | 164 | { |
141 | int i; | 165 | int i; |
142 | static int dump; | 166 | static int dump; |
143 | if (dump || !iommu_leak_tab) return; | 167 | |
168 | if (dump || !iommu_leak_tab) | ||
169 | return; | ||
144 | dump = 1; | 170 | dump = 1; |
145 | show_stack(NULL,NULL); | 171 | show_stack(NULL, NULL); |
146 | /* Very crude. dump some from the end of the table too */ | 172 | |
147 | printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_pages); | 173 | /* Very crude. dump some from the end of the table too */ |
148 | for (i = 0; i < iommu_leak_pages; i+=2) { | 174 | printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n", |
149 | printk("%lu: ", iommu_pages-i); | 175 | iommu_leak_pages); |
150 | printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]); | 176 | for (i = 0; i < iommu_leak_pages; i += 2) { |
151 | printk("%c", (i+1)%2 == 0 ? '\n' : ' '); | 177 | printk(KERN_DEBUG "%lu: ", iommu_pages-i); |
152 | } | 178 | printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], 0); |
153 | printk("\n"); | 179 | printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); |
180 | } | ||
181 | printk(KERN_DEBUG "\n"); | ||
154 | } | 182 | } |
155 | #else | 183 | #else |
156 | #define SET_LEAK(x) | 184 | # define SET_LEAK(x) |
157 | #define CLEAR_LEAK(x) | 185 | # define CLEAR_LEAK(x) |
158 | #endif | 186 | #endif |
159 | 187 | ||
160 | static void iommu_full(struct device *dev, size_t size, int dir) | 188 | static void iommu_full(struct device *dev, size_t size, int dir) |
161 | { | 189 | { |
162 | /* | 190 | /* |
163 | * Ran out of IOMMU space for this operation. This is very bad. | 191 | * Ran out of IOMMU space for this operation. This is very bad. |
164 | * Unfortunately the drivers cannot handle this operation properly. | 192 | * Unfortunately the drivers cannot handle this operation properly. |
165 | * Return some non mapped prereserved space in the aperture and | 193 | * Return some non mapped prereserved space in the aperture and |
166 | * let the Northbridge deal with it. This will result in garbage | 194 | * let the Northbridge deal with it. This will result in garbage |
167 | * in the IO operation. When the size exceeds the prereserved space | 195 | * in the IO operation. When the size exceeds the prereserved space |
168 | * memory corruption will occur or random memory will be DMAed | 196 | * memory corruption will occur or random memory will be DMAed |
169 | * out. Hopefully no network devices use single mappings that big. | 197 | * out. Hopefully no network devices use single mappings that big. |
170 | */ | 198 | */ |
171 | 199 | ||
172 | printk(KERN_ERR | 200 | printk(KERN_ERR |
173 | "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n", | 201 | "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n", |
174 | size, dev->bus_id); | 202 | size, dev->bus_id); |
175 | 203 | ||
176 | if (size > PAGE_SIZE*EMERGENCY_PAGES) { | 204 | if (size > PAGE_SIZE*EMERGENCY_PAGES) { |
177 | if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) | 205 | if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) |
178 | panic("PCI-DMA: Memory would be corrupted\n"); | 206 | panic("PCI-DMA: Memory would be corrupted\n"); |
179 | if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) | 207 | if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) |
180 | panic(KERN_ERR "PCI-DMA: Random memory would be DMAed\n"); | 208 | panic(KERN_ERR |
181 | } | 209 | "PCI-DMA: Random memory would be DMAed\n"); |
182 | 210 | } | |
183 | #ifdef CONFIG_IOMMU_LEAK | 211 | #ifdef CONFIG_IOMMU_LEAK |
184 | dump_leak(); | 212 | dump_leak(); |
185 | #endif | 213 | #endif |
186 | } | 214 | } |
187 | 215 | ||
188 | static inline int need_iommu(struct device *dev, unsigned long addr, size_t size) | 216 | static inline int |
189 | { | 217 | need_iommu(struct device *dev, unsigned long addr, size_t size) |
218 | { | ||
190 | u64 mask = *dev->dma_mask; | 219 | u64 mask = *dev->dma_mask; |
191 | int high = addr + size > mask; | 220 | int high = addr + size > mask; |
192 | int mmu = high; | 221 | int mmu = high; |
193 | if (force_iommu) | 222 | |
194 | mmu = 1; | 223 | if (force_iommu) |
195 | return mmu; | 224 | mmu = 1; |
225 | |||
226 | return mmu; | ||
196 | } | 227 | } |
197 | 228 | ||
198 | static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size) | 229 | static inline int |
199 | { | 230 | nonforced_iommu(struct device *dev, unsigned long addr, size_t size) |
231 | { | ||
200 | u64 mask = *dev->dma_mask; | 232 | u64 mask = *dev->dma_mask; |
201 | int high = addr + size > mask; | 233 | int high = addr + size > mask; |
202 | int mmu = high; | 234 | int mmu = high; |
203 | return mmu; | 235 | |
236 | return mmu; | ||
204 | } | 237 | } |
205 | 238 | ||
206 | /* Map a single continuous physical area into the IOMMU. | 239 | /* Map a single continuous physical area into the IOMMU. |
@@ -208,13 +241,14 @@ static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t | |||
208 | */ | 241 | */ |
209 | static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, | 242 | static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, |
210 | size_t size, int dir) | 243 | size_t size, int dir) |
211 | { | 244 | { |
212 | unsigned long npages = to_pages(phys_mem, size); | 245 | unsigned long npages = to_pages(phys_mem, size); |
213 | unsigned long iommu_page = alloc_iommu(npages); | 246 | unsigned long iommu_page = alloc_iommu(dev, npages); |
214 | int i; | 247 | int i; |
248 | |||
215 | if (iommu_page == -1) { | 249 | if (iommu_page == -1) { |
216 | if (!nonforced_iommu(dev, phys_mem, size)) | 250 | if (!nonforced_iommu(dev, phys_mem, size)) |
217 | return phys_mem; | 251 | return phys_mem; |
218 | if (panic_on_overflow) | 252 | if (panic_on_overflow) |
219 | panic("dma_map_area overflow %lu bytes\n", size); | 253 | panic("dma_map_area overflow %lu bytes\n", size); |
220 | iommu_full(dev, size, dir); | 254 | iommu_full(dev, size, dir); |
@@ -229,35 +263,39 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, | |||
229 | return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); | 263 | return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); |
230 | } | 264 | } |
231 | 265 | ||
232 | static dma_addr_t gart_map_simple(struct device *dev, char *buf, | 266 | static dma_addr_t |
233 | size_t size, int dir) | 267 | gart_map_simple(struct device *dev, char *buf, size_t size, int dir) |
234 | { | 268 | { |
235 | dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); | 269 | dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); |
270 | |||
236 | flush_gart(); | 271 | flush_gart(); |
272 | |||
237 | return map; | 273 | return map; |
238 | } | 274 | } |
239 | 275 | ||
240 | /* Map a single area into the IOMMU */ | 276 | /* Map a single area into the IOMMU */ |
241 | static dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir) | 277 | static dma_addr_t |
278 | gart_map_single(struct device *dev, void *addr, size_t size, int dir) | ||
242 | { | 279 | { |
243 | unsigned long phys_mem, bus; | 280 | unsigned long phys_mem, bus; |
244 | 281 | ||
245 | if (!dev) | 282 | if (!dev) |
246 | dev = &fallback_dev; | 283 | dev = &fallback_dev; |
247 | 284 | ||
248 | phys_mem = virt_to_phys(addr); | 285 | phys_mem = virt_to_phys(addr); |
249 | if (!need_iommu(dev, phys_mem, size)) | 286 | if (!need_iommu(dev, phys_mem, size)) |
250 | return phys_mem; | 287 | return phys_mem; |
251 | 288 | ||
252 | bus = gart_map_simple(dev, addr, size, dir); | 289 | bus = gart_map_simple(dev, addr, size, dir); |
253 | return bus; | 290 | |
291 | return bus; | ||
254 | } | 292 | } |
255 | 293 | ||
256 | /* | 294 | /* |
257 | * Free a DMA mapping. | 295 | * Free a DMA mapping. |
258 | */ | 296 | */ |
259 | static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, | 297 | static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, |
260 | size_t size, int direction) | 298 | size_t size, int direction) |
261 | { | 299 | { |
262 | unsigned long iommu_page; | 300 | unsigned long iommu_page; |
263 | int npages; | 301 | int npages; |
@@ -266,6 +304,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
266 | if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || | 304 | if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || |
267 | dma_addr >= iommu_bus_base + iommu_size) | 305 | dma_addr >= iommu_bus_base + iommu_size) |
268 | return; | 306 | return; |
307 | |||
269 | iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; | 308 | iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; |
270 | npages = to_pages(dma_addr, size); | 309 | npages = to_pages(dma_addr, size); |
271 | for (i = 0; i < npages; i++) { | 310 | for (i = 0; i < npages; i++) { |
@@ -278,7 +317,8 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
278 | /* | 317 | /* |
279 | * Wrapper for pci_unmap_single working with scatterlists. | 318 | * Wrapper for pci_unmap_single working with scatterlists. |
280 | */ | 319 | */ |
281 | static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | 320 | static void |
321 | gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | ||
282 | { | 322 | { |
283 | struct scatterlist *s; | 323 | struct scatterlist *s; |
284 | int i; | 324 | int i; |
@@ -303,12 +343,13 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | |||
303 | 343 | ||
304 | for_each_sg(sg, s, nents, i) { | 344 | for_each_sg(sg, s, nents, i) { |
305 | unsigned long addr = sg_phys(s); | 345 | unsigned long addr = sg_phys(s); |
306 | if (nonforced_iommu(dev, addr, s->length)) { | 346 | |
347 | if (nonforced_iommu(dev, addr, s->length)) { | ||
307 | addr = dma_map_area(dev, addr, s->length, dir); | 348 | addr = dma_map_area(dev, addr, s->length, dir); |
308 | if (addr == bad_dma_address) { | 349 | if (addr == bad_dma_address) { |
309 | if (i > 0) | 350 | if (i > 0) |
310 | gart_unmap_sg(dev, sg, i, dir); | 351 | gart_unmap_sg(dev, sg, i, dir); |
311 | nents = 0; | 352 | nents = 0; |
312 | sg[0].dma_length = 0; | 353 | sg[0].dma_length = 0; |
313 | break; | 354 | break; |
314 | } | 355 | } |
@@ -317,15 +358,17 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | |||
317 | s->dma_length = s->length; | 358 | s->dma_length = s->length; |
318 | } | 359 | } |
319 | flush_gart(); | 360 | flush_gart(); |
361 | |||
320 | return nents; | 362 | return nents; |
321 | } | 363 | } |
322 | 364 | ||
323 | /* Map multiple scatterlist entries continuous into the first. */ | 365 | /* Map multiple scatterlist entries continuous into the first. */ |
324 | static int __dma_map_cont(struct scatterlist *start, int nelems, | 366 | static int __dma_map_cont(struct device *dev, struct scatterlist *start, |
325 | struct scatterlist *sout, unsigned long pages) | 367 | int nelems, struct scatterlist *sout, |
368 | unsigned long pages) | ||
326 | { | 369 | { |
327 | unsigned long iommu_start = alloc_iommu(pages); | 370 | unsigned long iommu_start = alloc_iommu(dev, pages); |
328 | unsigned long iommu_page = iommu_start; | 371 | unsigned long iommu_page = iommu_start; |
329 | struct scatterlist *s; | 372 | struct scatterlist *s; |
330 | int i; | 373 | int i; |
331 | 374 | ||
@@ -335,32 +378,33 @@ static int __dma_map_cont(struct scatterlist *start, int nelems, | |||
335 | for_each_sg(start, s, nelems, i) { | 378 | for_each_sg(start, s, nelems, i) { |
336 | unsigned long pages, addr; | 379 | unsigned long pages, addr; |
337 | unsigned long phys_addr = s->dma_address; | 380 | unsigned long phys_addr = s->dma_address; |
338 | 381 | ||
339 | BUG_ON(s != start && s->offset); | 382 | BUG_ON(s != start && s->offset); |
340 | if (s == start) { | 383 | if (s == start) { |
341 | sout->dma_address = iommu_bus_base; | 384 | sout->dma_address = iommu_bus_base; |
342 | sout->dma_address += iommu_page*PAGE_SIZE + s->offset; | 385 | sout->dma_address += iommu_page*PAGE_SIZE + s->offset; |
343 | sout->dma_length = s->length; | 386 | sout->dma_length = s->length; |
344 | } else { | 387 | } else { |
345 | sout->dma_length += s->length; | 388 | sout->dma_length += s->length; |
346 | } | 389 | } |
347 | 390 | ||
348 | addr = phys_addr; | 391 | addr = phys_addr; |
349 | pages = to_pages(s->offset, s->length); | 392 | pages = to_pages(s->offset, s->length); |
350 | while (pages--) { | 393 | while (pages--) { |
351 | iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); | 394 | iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); |
352 | SET_LEAK(iommu_page); | 395 | SET_LEAK(iommu_page); |
353 | addr += PAGE_SIZE; | 396 | addr += PAGE_SIZE; |
354 | iommu_page++; | 397 | iommu_page++; |
355 | } | 398 | } |
356 | } | 399 | } |
357 | BUG_ON(iommu_page - iommu_start != pages); | 400 | BUG_ON(iommu_page - iommu_start != pages); |
401 | |||
358 | return 0; | 402 | return 0; |
359 | } | 403 | } |
360 | 404 | ||
361 | static inline int dma_map_cont(struct scatterlist *start, int nelems, | 405 | static inline int |
362 | struct scatterlist *sout, | 406 | dma_map_cont(struct device *dev, struct scatterlist *start, int nelems, |
363 | unsigned long pages, int need) | 407 | struct scatterlist *sout, unsigned long pages, int need) |
364 | { | 408 | { |
365 | if (!need) { | 409 | if (!need) { |
366 | BUG_ON(nelems != 1); | 410 | BUG_ON(nelems != 1); |
@@ -368,24 +412,23 @@ static inline int dma_map_cont(struct scatterlist *start, int nelems, | |||
368 | sout->dma_length = start->length; | 412 | sout->dma_length = start->length; |
369 | return 0; | 413 | return 0; |
370 | } | 414 | } |
371 | return __dma_map_cont(start, nelems, sout, pages); | 415 | return __dma_map_cont(dev, start, nelems, sout, pages); |
372 | } | 416 | } |
373 | 417 | ||
374 | /* | 418 | /* |
375 | * DMA map all entries in a scatterlist. | 419 | * DMA map all entries in a scatterlist. |
376 | * Merge chunks that have page aligned sizes into a continuous mapping. | 420 | * Merge chunks that have page aligned sizes into a continuous mapping. |
377 | */ | 421 | */ |
378 | static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, | 422 | static int |
379 | int dir) | 423 | gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) |
380 | { | 424 | { |
381 | int i; | ||
382 | int out; | ||
383 | int start; | ||
384 | unsigned long pages = 0; | ||
385 | int need = 0, nextneed; | ||
386 | struct scatterlist *s, *ps, *start_sg, *sgmap; | 425 | struct scatterlist *s, *ps, *start_sg, *sgmap; |
426 | int need = 0, nextneed, i, out, start; | ||
427 | unsigned long pages = 0; | ||
428 | unsigned int seg_size; | ||
429 | unsigned int max_seg_size; | ||
387 | 430 | ||
388 | if (nents == 0) | 431 | if (nents == 0) |
389 | return 0; | 432 | return 0; |
390 | 433 | ||
391 | if (!dev) | 434 | if (!dev) |
@@ -394,24 +437,32 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, | |||
394 | out = 0; | 437 | out = 0; |
395 | start = 0; | 438 | start = 0; |
396 | start_sg = sgmap = sg; | 439 | start_sg = sgmap = sg; |
440 | seg_size = 0; | ||
441 | max_seg_size = dma_get_max_seg_size(dev); | ||
397 | ps = NULL; /* shut up gcc */ | 442 | ps = NULL; /* shut up gcc */ |
398 | for_each_sg(sg, s, nents, i) { | 443 | for_each_sg(sg, s, nents, i) { |
399 | dma_addr_t addr = sg_phys(s); | 444 | dma_addr_t addr = sg_phys(s); |
445 | |||
400 | s->dma_address = addr; | 446 | s->dma_address = addr; |
401 | BUG_ON(s->length == 0); | 447 | BUG_ON(s->length == 0); |
402 | 448 | ||
403 | nextneed = need_iommu(dev, addr, s->length); | 449 | nextneed = need_iommu(dev, addr, s->length); |
404 | 450 | ||
405 | /* Handle the previous not yet processed entries */ | 451 | /* Handle the previous not yet processed entries */ |
406 | if (i > start) { | 452 | if (i > start) { |
407 | /* Can only merge when the last chunk ends on a page | 453 | /* |
408 | boundary and the new one doesn't have an offset. */ | 454 | * Can only merge when the last chunk ends on a |
455 | * page boundary and the new one doesn't have an | ||
456 | * offset. | ||
457 | */ | ||
409 | if (!iommu_merge || !nextneed || !need || s->offset || | 458 | if (!iommu_merge || !nextneed || !need || s->offset || |
459 | (s->length + seg_size > max_seg_size) || | ||
410 | (ps->offset + ps->length) % PAGE_SIZE) { | 460 | (ps->offset + ps->length) % PAGE_SIZE) { |
411 | if (dma_map_cont(start_sg, i - start, sgmap, | 461 | if (dma_map_cont(dev, start_sg, i - start, |
412 | pages, need) < 0) | 462 | sgmap, pages, need) < 0) |
413 | goto error; | 463 | goto error; |
414 | out++; | 464 | out++; |
465 | seg_size = 0; | ||
415 | sgmap = sg_next(sgmap); | 466 | sgmap = sg_next(sgmap); |
416 | pages = 0; | 467 | pages = 0; |
417 | start = i; | 468 | start = i; |
@@ -419,11 +470,12 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, | |||
419 | } | 470 | } |
420 | } | 471 | } |
421 | 472 | ||
473 | seg_size += s->length; | ||
422 | need = nextneed; | 474 | need = nextneed; |
423 | pages += to_pages(s->offset, s->length); | 475 | pages += to_pages(s->offset, s->length); |
424 | ps = s; | 476 | ps = s; |
425 | } | 477 | } |
426 | if (dma_map_cont(start_sg, i - start, sgmap, pages, need) < 0) | 478 | if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) |
427 | goto error; | 479 | goto error; |
428 | out++; | 480 | out++; |
429 | flush_gart(); | 481 | flush_gart(); |
@@ -436,6 +488,7 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, | |||
436 | error: | 488 | error: |
437 | flush_gart(); | 489 | flush_gart(); |
438 | gart_unmap_sg(dev, sg, out, dir); | 490 | gart_unmap_sg(dev, sg, out, dir); |
491 | |||
439 | /* When it was forced or merged try again in a dumb way */ | 492 | /* When it was forced or merged try again in a dumb way */ |
440 | if (force_iommu || iommu_merge) { | 493 | if (force_iommu || iommu_merge) { |
441 | out = dma_map_sg_nonforce(dev, sg, nents, dir); | 494 | out = dma_map_sg_nonforce(dev, sg, nents, dir); |
@@ -444,64 +497,68 @@ error: | |||
444 | } | 497 | } |
445 | if (panic_on_overflow) | 498 | if (panic_on_overflow) |
446 | panic("dma_map_sg: overflow on %lu pages\n", pages); | 499 | panic("dma_map_sg: overflow on %lu pages\n", pages); |
500 | |||
447 | iommu_full(dev, pages << PAGE_SHIFT, dir); | 501 | iommu_full(dev, pages << PAGE_SHIFT, dir); |
448 | for_each_sg(sg, s, nents, i) | 502 | for_each_sg(sg, s, nents, i) |
449 | s->dma_address = bad_dma_address; | 503 | s->dma_address = bad_dma_address; |
450 | return 0; | 504 | return 0; |
451 | } | 505 | } |
452 | 506 | ||
453 | static int no_agp; | 507 | static int no_agp; |
454 | 508 | ||
455 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) | 509 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) |
456 | { | 510 | { |
457 | unsigned long a; | 511 | unsigned long a; |
458 | if (!iommu_size) { | 512 | |
459 | iommu_size = aper_size; | 513 | if (!iommu_size) { |
460 | if (!no_agp) | 514 | iommu_size = aper_size; |
461 | iommu_size /= 2; | 515 | if (!no_agp) |
462 | } | 516 | iommu_size /= 2; |
463 | 517 | } | |
464 | a = aper + iommu_size; | 518 | |
465 | iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a; | 519 | a = aper + iommu_size; |
466 | 520 | iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; | |
467 | if (iommu_size < 64*1024*1024) | 521 | |
522 | if (iommu_size < 64*1024*1024) { | ||
468 | printk(KERN_WARNING | 523 | printk(KERN_WARNING |
469 | "PCI-DMA: Warning: Small IOMMU %luMB. Consider increasing the AGP aperture in BIOS\n",iommu_size>>20); | 524 | "PCI-DMA: Warning: Small IOMMU %luMB." |
470 | 525 | " Consider increasing the AGP aperture in BIOS\n", | |
526 | iommu_size >> 20); | ||
527 | } | ||
528 | |||
471 | return iommu_size; | 529 | return iommu_size; |
472 | } | 530 | } |
473 | 531 | ||
474 | static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) | 532 | static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) |
475 | { | 533 | { |
476 | unsigned aper_size = 0, aper_base_32; | 534 | unsigned aper_size = 0, aper_base_32, aper_order; |
477 | u64 aper_base; | 535 | u64 aper_base; |
478 | unsigned aper_order; | ||
479 | 536 | ||
480 | pci_read_config_dword(dev, 0x94, &aper_base_32); | 537 | pci_read_config_dword(dev, 0x94, &aper_base_32); |
481 | pci_read_config_dword(dev, 0x90, &aper_order); | 538 | pci_read_config_dword(dev, 0x90, &aper_order); |
482 | aper_order = (aper_order >> 1) & 7; | 539 | aper_order = (aper_order >> 1) & 7; |
483 | 540 | ||
484 | aper_base = aper_base_32 & 0x7fff; | 541 | aper_base = aper_base_32 & 0x7fff; |
485 | aper_base <<= 25; | 542 | aper_base <<= 25; |
486 | 543 | ||
487 | aper_size = (32 * 1024 * 1024) << aper_order; | 544 | aper_size = (32 * 1024 * 1024) << aper_order; |
488 | if (aper_base + aper_size > 0x100000000UL || !aper_size) | 545 | if (aper_base + aper_size > 0x100000000UL || !aper_size) |
489 | aper_base = 0; | 546 | aper_base = 0; |
490 | 547 | ||
491 | *size = aper_size; | 548 | *size = aper_size; |
492 | return aper_base; | 549 | return aper_base; |
493 | } | 550 | } |
494 | 551 | ||
495 | /* | 552 | /* |
496 | * Private Northbridge GATT initialization in case we cannot use the | 553 | * Private Northbridge GATT initialization in case we cannot use the |
497 | * AGP driver for some reason. | 554 | * AGP driver for some reason. |
498 | */ | 555 | */ |
499 | static __init int init_k8_gatt(struct agp_kern_info *info) | 556 | static __init int init_k8_gatt(struct agp_kern_info *info) |
500 | { | 557 | { |
558 | unsigned aper_size, gatt_size, new_aper_size; | ||
559 | unsigned aper_base, new_aper_base; | ||
501 | struct pci_dev *dev; | 560 | struct pci_dev *dev; |
502 | void *gatt; | 561 | void *gatt; |
503 | unsigned aper_base, new_aper_base; | ||
504 | unsigned aper_size, gatt_size, new_aper_size; | ||
505 | int i; | 562 | int i; |
506 | 563 | ||
507 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); | 564 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); |
@@ -509,75 +566,75 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
509 | dev = NULL; | 566 | dev = NULL; |
510 | for (i = 0; i < num_k8_northbridges; i++) { | 567 | for (i = 0; i < num_k8_northbridges; i++) { |
511 | dev = k8_northbridges[i]; | 568 | dev = k8_northbridges[i]; |
512 | new_aper_base = read_aperture(dev, &new_aper_size); | 569 | new_aper_base = read_aperture(dev, &new_aper_size); |
513 | if (!new_aper_base) | 570 | if (!new_aper_base) |
514 | goto nommu; | 571 | goto nommu; |
515 | 572 | ||
516 | if (!aper_base) { | 573 | if (!aper_base) { |
517 | aper_size = new_aper_size; | 574 | aper_size = new_aper_size; |
518 | aper_base = new_aper_base; | 575 | aper_base = new_aper_base; |
519 | } | 576 | } |
520 | if (aper_size != new_aper_size || aper_base != new_aper_base) | 577 | if (aper_size != new_aper_size || aper_base != new_aper_base) |
521 | goto nommu; | 578 | goto nommu; |
522 | } | 579 | } |
523 | if (!aper_base) | 580 | if (!aper_base) |
524 | goto nommu; | 581 | goto nommu; |
525 | info->aper_base = aper_base; | 582 | info->aper_base = aper_base; |
526 | info->aper_size = aper_size>>20; | 583 | info->aper_size = aper_size >> 20; |
527 | 584 | ||
528 | gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); | 585 | gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); |
529 | gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); | 586 | gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); |
530 | if (!gatt) | 587 | if (!gatt) |
531 | panic("Cannot allocate GATT table"); | 588 | panic("Cannot allocate GATT table"); |
532 | if (change_page_attr_addr((unsigned long)gatt, gatt_size >> PAGE_SHIFT, PAGE_KERNEL_NOCACHE)) | 589 | if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT)) |
533 | panic("Could not set GART PTEs to uncacheable pages"); | 590 | panic("Could not set GART PTEs to uncacheable pages"); |
534 | global_flush_tlb(); | ||
535 | 591 | ||
536 | memset(gatt, 0, gatt_size); | 592 | memset(gatt, 0, gatt_size); |
537 | agp_gatt_table = gatt; | 593 | agp_gatt_table = gatt; |
538 | 594 | ||
539 | for (i = 0; i < num_k8_northbridges; i++) { | 595 | for (i = 0; i < num_k8_northbridges; i++) { |
540 | u32 ctl; | 596 | u32 gatt_reg; |
541 | u32 gatt_reg; | 597 | u32 ctl; |
542 | 598 | ||
543 | dev = k8_northbridges[i]; | 599 | dev = k8_northbridges[i]; |
544 | gatt_reg = __pa(gatt) >> 12; | 600 | gatt_reg = __pa(gatt) >> 12; |
545 | gatt_reg <<= 4; | 601 | gatt_reg <<= 4; |
546 | pci_write_config_dword(dev, 0x98, gatt_reg); | 602 | pci_write_config_dword(dev, 0x98, gatt_reg); |
547 | pci_read_config_dword(dev, 0x90, &ctl); | 603 | pci_read_config_dword(dev, 0x90, &ctl); |
548 | 604 | ||
549 | ctl |= 1; | 605 | ctl |= 1; |
550 | ctl &= ~((1<<4) | (1<<5)); | 606 | ctl &= ~((1<<4) | (1<<5)); |
551 | 607 | ||
552 | pci_write_config_dword(dev, 0x90, ctl); | 608 | pci_write_config_dword(dev, 0x90, ctl); |
553 | } | 609 | } |
554 | flush_gart(); | 610 | flush_gart(); |
555 | 611 | ||
556 | printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); | 612 | printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", |
613 | aper_base, aper_size>>10); | ||
557 | return 0; | 614 | return 0; |
558 | 615 | ||
559 | nommu: | 616 | nommu: |
560 | /* Should not happen anymore */ | 617 | /* Should not happen anymore */ |
561 | printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n" | 618 | printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n" |
562 | KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.\n"); | 619 | KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.\n"); |
563 | return -1; | 620 | return -1; |
564 | } | 621 | } |
565 | 622 | ||
566 | extern int agp_amd64_init(void); | 623 | extern int agp_amd64_init(void); |
567 | 624 | ||
568 | static const struct dma_mapping_ops gart_dma_ops = { | 625 | static const struct dma_mapping_ops gart_dma_ops = { |
569 | .mapping_error = NULL, | 626 | .mapping_error = NULL, |
570 | .map_single = gart_map_single, | 627 | .map_single = gart_map_single, |
571 | .map_simple = gart_map_simple, | 628 | .map_simple = gart_map_simple, |
572 | .unmap_single = gart_unmap_single, | 629 | .unmap_single = gart_unmap_single, |
573 | .sync_single_for_cpu = NULL, | 630 | .sync_single_for_cpu = NULL, |
574 | .sync_single_for_device = NULL, | 631 | .sync_single_for_device = NULL, |
575 | .sync_single_range_for_cpu = NULL, | 632 | .sync_single_range_for_cpu = NULL, |
576 | .sync_single_range_for_device = NULL, | 633 | .sync_single_range_for_device = NULL, |
577 | .sync_sg_for_cpu = NULL, | 634 | .sync_sg_for_cpu = NULL, |
578 | .sync_sg_for_device = NULL, | 635 | .sync_sg_for_device = NULL, |
579 | .map_sg = gart_map_sg, | 636 | .map_sg = gart_map_sg, |
580 | .unmap_sg = gart_unmap_sg, | 637 | .unmap_sg = gart_unmap_sg, |
581 | }; | 638 | }; |
582 | 639 | ||
583 | void gart_iommu_shutdown(void) | 640 | void gart_iommu_shutdown(void) |
@@ -588,23 +645,23 @@ void gart_iommu_shutdown(void) | |||
588 | if (no_agp && (dma_ops != &gart_dma_ops)) | 645 | if (no_agp && (dma_ops != &gart_dma_ops)) |
589 | return; | 646 | return; |
590 | 647 | ||
591 | for (i = 0; i < num_k8_northbridges; i++) { | 648 | for (i = 0; i < num_k8_northbridges; i++) { |
592 | u32 ctl; | 649 | u32 ctl; |
593 | 650 | ||
594 | dev = k8_northbridges[i]; | 651 | dev = k8_northbridges[i]; |
595 | pci_read_config_dword(dev, 0x90, &ctl); | 652 | pci_read_config_dword(dev, 0x90, &ctl); |
596 | 653 | ||
597 | ctl &= ~1; | 654 | ctl &= ~1; |
598 | 655 | ||
599 | pci_write_config_dword(dev, 0x90, ctl); | 656 | pci_write_config_dword(dev, 0x90, ctl); |
600 | } | 657 | } |
601 | } | 658 | } |
602 | 659 | ||
603 | void __init gart_iommu_init(void) | 660 | void __init gart_iommu_init(void) |
604 | { | 661 | { |
605 | struct agp_kern_info info; | 662 | struct agp_kern_info info; |
606 | unsigned long aper_size; | ||
607 | unsigned long iommu_start; | 663 | unsigned long iommu_start; |
664 | unsigned long aper_size; | ||
608 | unsigned long scratch; | 665 | unsigned long scratch; |
609 | long i; | 666 | long i; |
610 | 667 | ||
@@ -614,14 +671,14 @@ void __init gart_iommu_init(void) | |||
614 | } | 671 | } |
615 | 672 | ||
616 | #ifndef CONFIG_AGP_AMD64 | 673 | #ifndef CONFIG_AGP_AMD64 |
617 | no_agp = 1; | 674 | no_agp = 1; |
618 | #else | 675 | #else |
619 | /* Makefile puts PCI initialization via subsys_initcall first. */ | 676 | /* Makefile puts PCI initialization via subsys_initcall first. */ |
620 | /* Add other K8 AGP bridge drivers here */ | 677 | /* Add other K8 AGP bridge drivers here */ |
621 | no_agp = no_agp || | 678 | no_agp = no_agp || |
622 | (agp_amd64_init() < 0) || | 679 | (agp_amd64_init() < 0) || |
623 | (agp_copy_info(agp_bridge, &info) < 0); | 680 | (agp_copy_info(agp_bridge, &info) < 0); |
624 | #endif | 681 | #endif |
625 | 682 | ||
626 | if (swiotlb) | 683 | if (swiotlb) |
627 | return; | 684 | return; |
@@ -643,77 +700,79 @@ void __init gart_iommu_init(void) | |||
643 | } | 700 | } |
644 | 701 | ||
645 | printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); | 702 | printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); |
646 | aper_size = info.aper_size * 1024 * 1024; | 703 | aper_size = info.aper_size * 1024 * 1024; |
647 | iommu_size = check_iommu_size(info.aper_base, aper_size); | 704 | iommu_size = check_iommu_size(info.aper_base, aper_size); |
648 | iommu_pages = iommu_size >> PAGE_SHIFT; | 705 | iommu_pages = iommu_size >> PAGE_SHIFT; |
649 | 706 | ||
650 | iommu_gart_bitmap = (void*)__get_free_pages(GFP_KERNEL, | 707 | iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL, |
651 | get_order(iommu_pages/8)); | 708 | get_order(iommu_pages/8)); |
652 | if (!iommu_gart_bitmap) | 709 | if (!iommu_gart_bitmap) |
653 | panic("Cannot allocate iommu bitmap\n"); | 710 | panic("Cannot allocate iommu bitmap\n"); |
654 | memset(iommu_gart_bitmap, 0, iommu_pages/8); | 711 | memset(iommu_gart_bitmap, 0, iommu_pages/8); |
655 | 712 | ||
656 | #ifdef CONFIG_IOMMU_LEAK | 713 | #ifdef CONFIG_IOMMU_LEAK |
657 | if (leak_trace) { | 714 | if (leak_trace) { |
658 | iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, | 715 | iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, |
659 | get_order(iommu_pages*sizeof(void *))); | 716 | get_order(iommu_pages*sizeof(void *))); |
660 | if (iommu_leak_tab) | 717 | if (iommu_leak_tab) |
661 | memset(iommu_leak_tab, 0, iommu_pages * 8); | 718 | memset(iommu_leak_tab, 0, iommu_pages * 8); |
662 | else | 719 | else |
663 | printk("PCI-DMA: Cannot allocate leak trace area\n"); | 720 | printk(KERN_DEBUG |
664 | } | 721 | "PCI-DMA: Cannot allocate leak trace area\n"); |
722 | } | ||
665 | #endif | 723 | #endif |
666 | 724 | ||
667 | /* | 725 | /* |
668 | * Out of IOMMU space handling. | 726 | * Out of IOMMU space handling. |
669 | * Reserve some invalid pages at the beginning of the GART. | 727 | * Reserve some invalid pages at the beginning of the GART. |
670 | */ | 728 | */ |
671 | set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); | 729 | set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); |
672 | 730 | ||
673 | agp_memory_reserved = iommu_size; | 731 | agp_memory_reserved = iommu_size; |
674 | printk(KERN_INFO | 732 | printk(KERN_INFO |
675 | "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", | 733 | "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", |
676 | iommu_size>>20); | 734 | iommu_size >> 20); |
677 | 735 | ||
678 | iommu_start = aper_size - iommu_size; | 736 | iommu_start = aper_size - iommu_size; |
679 | iommu_bus_base = info.aper_base + iommu_start; | 737 | iommu_bus_base = info.aper_base + iommu_start; |
680 | bad_dma_address = iommu_bus_base; | 738 | bad_dma_address = iommu_bus_base; |
681 | iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); | 739 | iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); |
682 | 740 | ||
683 | /* | 741 | /* |
684 | * Unmap the IOMMU part of the GART. The alias of the page is | 742 | * Unmap the IOMMU part of the GART. The alias of the page is |
685 | * always mapped with cache enabled and there is no full cache | 743 | * always mapped with cache enabled and there is no full cache |
686 | * coherency across the GART remapping. The unmapping avoids | 744 | * coherency across the GART remapping. The unmapping avoids |
687 | * automatic prefetches from the CPU allocating cache lines in | 745 | * automatic prefetches from the CPU allocating cache lines in |
688 | * there. All CPU accesses are done via the direct mapping to | 746 | * there. All CPU accesses are done via the direct mapping to |
689 | * the backing memory. The GART address is only used by PCI | 747 | * the backing memory. The GART address is only used by PCI |
690 | * devices. | 748 | * devices. |
691 | */ | 749 | */ |
692 | clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size); | 750 | set_memory_np((unsigned long)__va(iommu_bus_base), |
751 | iommu_size >> PAGE_SHIFT); | ||
693 | 752 | ||
694 | /* | 753 | /* |
695 | * Try to workaround a bug (thanks to BenH) | 754 | * Try to workaround a bug (thanks to BenH) |
696 | * Set unmapped entries to a scratch page instead of 0. | 755 | * Set unmapped entries to a scratch page instead of 0. |
697 | * Any prefetches that hit unmapped entries won't get an bus abort | 756 | * Any prefetches that hit unmapped entries won't get an bus abort |
698 | * then. | 757 | * then. |
699 | */ | 758 | */ |
700 | scratch = get_zeroed_page(GFP_KERNEL); | 759 | scratch = get_zeroed_page(GFP_KERNEL); |
701 | if (!scratch) | 760 | if (!scratch) |
702 | panic("Cannot allocate iommu scratch page"); | 761 | panic("Cannot allocate iommu scratch page"); |
703 | gart_unmapped_entry = GPTE_ENCODE(__pa(scratch)); | 762 | gart_unmapped_entry = GPTE_ENCODE(__pa(scratch)); |
704 | for (i = EMERGENCY_PAGES; i < iommu_pages; i++) | 763 | for (i = EMERGENCY_PAGES; i < iommu_pages; i++) |
705 | iommu_gatt_base[i] = gart_unmapped_entry; | 764 | iommu_gatt_base[i] = gart_unmapped_entry; |
706 | 765 | ||
707 | flush_gart(); | 766 | flush_gart(); |
708 | dma_ops = &gart_dma_ops; | 767 | dma_ops = &gart_dma_ops; |
709 | } | 768 | } |
710 | 769 | ||
711 | void __init gart_parse_options(char *p) | 770 | void __init gart_parse_options(char *p) |
712 | { | 771 | { |
713 | int arg; | 772 | int arg; |
714 | 773 | ||
715 | #ifdef CONFIG_IOMMU_LEAK | 774 | #ifdef CONFIG_IOMMU_LEAK |
716 | if (!strncmp(p,"leak",4)) { | 775 | if (!strncmp(p, "leak", 4)) { |
717 | leak_trace = 1; | 776 | leak_trace = 1; |
718 | p += 4; | 777 | p += 4; |
719 | if (*p == '=') ++p; | 778 | if (*p == '=') ++p; |
@@ -723,18 +782,18 @@ void __init gart_parse_options(char *p) | |||
723 | #endif | 782 | #endif |
724 | if (isdigit(*p) && get_option(&p, &arg)) | 783 | if (isdigit(*p) && get_option(&p, &arg)) |
725 | iommu_size = arg; | 784 | iommu_size = arg; |
726 | if (!strncmp(p, "fullflush",8)) | 785 | if (!strncmp(p, "fullflush", 8)) |
727 | iommu_fullflush = 1; | 786 | iommu_fullflush = 1; |
728 | if (!strncmp(p, "nofullflush",11)) | 787 | if (!strncmp(p, "nofullflush", 11)) |
729 | iommu_fullflush = 0; | 788 | iommu_fullflush = 0; |
730 | if (!strncmp(p,"noagp",5)) | 789 | if (!strncmp(p, "noagp", 5)) |
731 | no_agp = 1; | 790 | no_agp = 1; |
732 | if (!strncmp(p, "noaperture",10)) | 791 | if (!strncmp(p, "noaperture", 10)) |
733 | fix_aperture = 0; | 792 | fix_aperture = 0; |
734 | /* duplicated from pci-dma.c */ | 793 | /* duplicated from pci-dma.c */ |
735 | if (!strncmp(p,"force",5)) | 794 | if (!strncmp(p, "force", 5)) |
736 | gart_iommu_aperture_allowed = 1; | 795 | gart_iommu_aperture_allowed = 1; |
737 | if (!strncmp(p,"allowed",7)) | 796 | if (!strncmp(p, "allowed", 7)) |
738 | gart_iommu_aperture_allowed = 1; | 797 | gart_iommu_aperture_allowed = 1; |
739 | if (!strncmp(p, "memaper", 7)) { | 798 | if (!strncmp(p, "memaper", 7)) { |
740 | fallback_aper_force = 1; | 799 | fallback_aper_force = 1; |
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 102866d729a5..82a0a674a003 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c | |||
@@ -10,7 +10,6 @@ | |||
10 | #include <asm/dma.h> | 10 | #include <asm/dma.h> |
11 | 11 | ||
12 | int swiotlb __read_mostly; | 12 | int swiotlb __read_mostly; |
13 | EXPORT_SYMBOL(swiotlb); | ||
14 | 13 | ||
15 | const struct dma_mapping_ops swiotlb_dma_ops = { | 14 | const struct dma_mapping_ops swiotlb_dma_ops = { |
16 | .mapping_error = swiotlb_dma_mapping_error, | 15 | .mapping_error = swiotlb_dma_mapping_error, |
diff --git a/arch/x86/kernel/pmtimer_64.c b/arch/x86/kernel/pmtimer_64.c index ae8f91214f15..b112406f1996 100644 --- a/arch/x86/kernel/pmtimer_64.c +++ b/arch/x86/kernel/pmtimer_64.c | |||
@@ -19,13 +19,13 @@ | |||
19 | #include <linux/time.h> | 19 | #include <linux/time.h> |
20 | #include <linux/init.h> | 20 | #include <linux/init.h> |
21 | #include <linux/cpumask.h> | 21 | #include <linux/cpumask.h> |
22 | #include <linux/acpi_pmtmr.h> | ||
23 | |||
22 | #include <asm/io.h> | 24 | #include <asm/io.h> |
23 | #include <asm/proto.h> | 25 | #include <asm/proto.h> |
24 | #include <asm/msr.h> | 26 | #include <asm/msr.h> |
25 | #include <asm/vsyscall.h> | 27 | #include <asm/vsyscall.h> |
26 | 28 | ||
27 | #define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ | ||
28 | |||
29 | static inline u32 cyc2us(u32 cycles) | 29 | static inline u32 cyc2us(u32 cycles) |
30 | { | 30 | { |
31 | /* The Power Management Timer ticks at 3.579545 ticks per microsecond. | 31 | /* The Power Management Timer ticks at 3.579545 ticks per microsecond. |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 9663c2a74830..dabdbeff1f77 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -55,6 +55,7 @@ | |||
55 | 55 | ||
56 | #include <asm/tlbflush.h> | 56 | #include <asm/tlbflush.h> |
57 | #include <asm/cpu.h> | 57 | #include <asm/cpu.h> |
58 | #include <asm/kdebug.h> | ||
58 | 59 | ||
59 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 60 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
60 | 61 | ||
@@ -74,7 +75,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number); | |||
74 | */ | 75 | */ |
75 | unsigned long thread_saved_pc(struct task_struct *tsk) | 76 | unsigned long thread_saved_pc(struct task_struct *tsk) |
76 | { | 77 | { |
77 | return ((unsigned long *)tsk->thread.esp)[3]; | 78 | return ((unsigned long *)tsk->thread.sp)[3]; |
78 | } | 79 | } |
79 | 80 | ||
80 | /* | 81 | /* |
@@ -113,10 +114,19 @@ void default_idle(void) | |||
113 | smp_mb(); | 114 | smp_mb(); |
114 | 115 | ||
115 | local_irq_disable(); | 116 | local_irq_disable(); |
116 | if (!need_resched()) | 117 | if (!need_resched()) { |
118 | ktime_t t0, t1; | ||
119 | u64 t0n, t1n; | ||
120 | |||
121 | t0 = ktime_get(); | ||
122 | t0n = ktime_to_ns(t0); | ||
117 | safe_halt(); /* enables interrupts racelessly */ | 123 | safe_halt(); /* enables interrupts racelessly */ |
118 | else | 124 | local_irq_disable(); |
119 | local_irq_enable(); | 125 | t1 = ktime_get(); |
126 | t1n = ktime_to_ns(t1); | ||
127 | sched_clock_idle_wakeup_event(t1n - t0n); | ||
128 | } | ||
129 | local_irq_enable(); | ||
120 | current_thread_info()->status |= TS_POLLING; | 130 | current_thread_info()->status |= TS_POLLING; |
121 | } else { | 131 | } else { |
122 | /* loop is done by the caller */ | 132 | /* loop is done by the caller */ |
@@ -132,7 +142,7 @@ EXPORT_SYMBOL(default_idle); | |||
132 | * to poll the ->work.need_resched flag instead of waiting for the | 142 | * to poll the ->work.need_resched flag instead of waiting for the |
133 | * cross-CPU IPI to arrive. Use this option with caution. | 143 | * cross-CPU IPI to arrive. Use this option with caution. |
134 | */ | 144 | */ |
135 | static void poll_idle (void) | 145 | static void poll_idle(void) |
136 | { | 146 | { |
137 | cpu_relax(); | 147 | cpu_relax(); |
138 | } | 148 | } |
@@ -188,6 +198,9 @@ void cpu_idle(void) | |||
188 | rmb(); | 198 | rmb(); |
189 | idle = pm_idle; | 199 | idle = pm_idle; |
190 | 200 | ||
201 | if (rcu_pending(cpu)) | ||
202 | rcu_check_callbacks(cpu, 0); | ||
203 | |||
191 | if (!idle) | 204 | if (!idle) |
192 | idle = default_idle; | 205 | idle = default_idle; |
193 | 206 | ||
@@ -204,6 +217,10 @@ void cpu_idle(void) | |||
204 | } | 217 | } |
205 | } | 218 | } |
206 | 219 | ||
220 | static void do_nothing(void *unused) | ||
221 | { | ||
222 | } | ||
223 | |||
207 | void cpu_idle_wait(void) | 224 | void cpu_idle_wait(void) |
208 | { | 225 | { |
209 | unsigned int cpu, this_cpu = get_cpu(); | 226 | unsigned int cpu, this_cpu = get_cpu(); |
@@ -228,6 +245,13 @@ void cpu_idle_wait(void) | |||
228 | cpu_clear(cpu, map); | 245 | cpu_clear(cpu, map); |
229 | } | 246 | } |
230 | cpus_and(map, map, cpu_online_map); | 247 | cpus_and(map, map, cpu_online_map); |
248 | /* | ||
249 | * We waited 1 sec, if a CPU still did not call idle | ||
250 | * it may be because it is in idle and not waking up | ||
251 | * because it has nothing to do. | ||
252 | * Give all the remaining CPUS a kick. | ||
253 | */ | ||
254 | smp_call_function_mask(map, do_nothing, NULL, 0); | ||
231 | } while (!cpus_empty(map)); | 255 | } while (!cpus_empty(map)); |
232 | 256 | ||
233 | set_cpus_allowed(current, tmp); | 257 | set_cpus_allowed(current, tmp); |
@@ -244,13 +268,13 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); | |||
244 | * New with Core Duo processors, MWAIT can take some hints based on CPU | 268 | * New with Core Duo processors, MWAIT can take some hints based on CPU |
245 | * capability. | 269 | * capability. |
246 | */ | 270 | */ |
247 | void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) | 271 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) |
248 | { | 272 | { |
249 | if (!need_resched()) { | 273 | if (!need_resched()) { |
250 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 274 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
251 | smp_mb(); | 275 | smp_mb(); |
252 | if (!need_resched()) | 276 | if (!need_resched()) |
253 | __mwait(eax, ecx); | 277 | __mwait(ax, cx); |
254 | } | 278 | } |
255 | } | 279 | } |
256 | 280 | ||
@@ -261,19 +285,37 @@ static void mwait_idle(void) | |||
261 | mwait_idle_with_hints(0, 0); | 285 | mwait_idle_with_hints(0, 0); |
262 | } | 286 | } |
263 | 287 | ||
288 | static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) | ||
289 | { | ||
290 | if (force_mwait) | ||
291 | return 1; | ||
292 | /* Any C1 states supported? */ | ||
293 | return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0; | ||
294 | } | ||
295 | |||
264 | void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | 296 | void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) |
265 | { | 297 | { |
266 | if (cpu_has(c, X86_FEATURE_MWAIT)) { | 298 | static int selected; |
267 | printk("monitor/mwait feature present.\n"); | 299 | |
300 | if (selected) | ||
301 | return; | ||
302 | #ifdef CONFIG_X86_SMP | ||
303 | if (pm_idle == poll_idle && smp_num_siblings > 1) { | ||
304 | printk(KERN_WARNING "WARNING: polling idle and HT enabled," | ||
305 | " performance may degrade.\n"); | ||
306 | } | ||
307 | #endif | ||
308 | if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { | ||
268 | /* | 309 | /* |
269 | * Skip, if setup has overridden idle. | 310 | * Skip, if setup has overridden idle. |
270 | * One CPU supports mwait => All CPUs supports mwait | 311 | * One CPU supports mwait => All CPUs supports mwait |
271 | */ | 312 | */ |
272 | if (!pm_idle) { | 313 | if (!pm_idle) { |
273 | printk("using mwait in idle threads.\n"); | 314 | printk(KERN_INFO "using mwait in idle threads.\n"); |
274 | pm_idle = mwait_idle; | 315 | pm_idle = mwait_idle; |
275 | } | 316 | } |
276 | } | 317 | } |
318 | selected = 1; | ||
277 | } | 319 | } |
278 | 320 | ||
279 | static int __init idle_setup(char *str) | 321 | static int __init idle_setup(char *str) |
@@ -281,10 +323,6 @@ static int __init idle_setup(char *str) | |||
281 | if (!strcmp(str, "poll")) { | 323 | if (!strcmp(str, "poll")) { |
282 | printk("using polling idle threads.\n"); | 324 | printk("using polling idle threads.\n"); |
283 | pm_idle = poll_idle; | 325 | pm_idle = poll_idle; |
284 | #ifdef CONFIG_X86_SMP | ||
285 | if (smp_num_siblings > 1) | ||
286 | printk("WARNING: polling idle and HT enabled, performance may degrade.\n"); | ||
287 | #endif | ||
288 | } else if (!strcmp(str, "mwait")) | 326 | } else if (!strcmp(str, "mwait")) |
289 | force_mwait = 1; | 327 | force_mwait = 1; |
290 | else | 328 | else |
@@ -299,15 +337,15 @@ void __show_registers(struct pt_regs *regs, int all) | |||
299 | { | 337 | { |
300 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; | 338 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; |
301 | unsigned long d0, d1, d2, d3, d6, d7; | 339 | unsigned long d0, d1, d2, d3, d6, d7; |
302 | unsigned long esp; | 340 | unsigned long sp; |
303 | unsigned short ss, gs; | 341 | unsigned short ss, gs; |
304 | 342 | ||
305 | if (user_mode_vm(regs)) { | 343 | if (user_mode_vm(regs)) { |
306 | esp = regs->esp; | 344 | sp = regs->sp; |
307 | ss = regs->xss & 0xffff; | 345 | ss = regs->ss & 0xffff; |
308 | savesegment(gs, gs); | 346 | savesegment(gs, gs); |
309 | } else { | 347 | } else { |
310 | esp = (unsigned long) (®s->esp); | 348 | sp = (unsigned long) (®s->sp); |
311 | savesegment(ss, ss); | 349 | savesegment(ss, ss); |
312 | savesegment(gs, gs); | 350 | savesegment(gs, gs); |
313 | } | 351 | } |
@@ -320,17 +358,17 @@ void __show_registers(struct pt_regs *regs, int all) | |||
320 | init_utsname()->version); | 358 | init_utsname()->version); |
321 | 359 | ||
322 | printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", | 360 | printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", |
323 | 0xffff & regs->xcs, regs->eip, regs->eflags, | 361 | 0xffff & regs->cs, regs->ip, regs->flags, |
324 | smp_processor_id()); | 362 | smp_processor_id()); |
325 | print_symbol("EIP is at %s\n", regs->eip); | 363 | print_symbol("EIP is at %s\n", regs->ip); |
326 | 364 | ||
327 | printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", | 365 | printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", |
328 | regs->eax, regs->ebx, regs->ecx, regs->edx); | 366 | regs->ax, regs->bx, regs->cx, regs->dx); |
329 | printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", | 367 | printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", |
330 | regs->esi, regs->edi, regs->ebp, esp); | 368 | regs->si, regs->di, regs->bp, sp); |
331 | printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", | 369 | printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", |
332 | regs->xds & 0xffff, regs->xes & 0xffff, | 370 | regs->ds & 0xffff, regs->es & 0xffff, |
333 | regs->xfs & 0xffff, gs, ss); | 371 | regs->fs & 0xffff, gs, ss); |
334 | 372 | ||
335 | if (!all) | 373 | if (!all) |
336 | return; | 374 | return; |
@@ -358,12 +396,12 @@ void __show_registers(struct pt_regs *regs, int all) | |||
358 | void show_regs(struct pt_regs *regs) | 396 | void show_regs(struct pt_regs *regs) |
359 | { | 397 | { |
360 | __show_registers(regs, 1); | 398 | __show_registers(regs, 1); |
361 | show_trace(NULL, regs, ®s->esp); | 399 | show_trace(NULL, regs, ®s->sp, regs->bp); |
362 | } | 400 | } |
363 | 401 | ||
364 | /* | 402 | /* |
365 | * This gets run with %ebx containing the | 403 | * This gets run with %bx containing the |
366 | * function to call, and %edx containing | 404 | * function to call, and %dx containing |
367 | * the "args". | 405 | * the "args". |
368 | */ | 406 | */ |
369 | extern void kernel_thread_helper(void); | 407 | extern void kernel_thread_helper(void); |
@@ -377,16 +415,16 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) | |||
377 | 415 | ||
378 | memset(®s, 0, sizeof(regs)); | 416 | memset(®s, 0, sizeof(regs)); |
379 | 417 | ||
380 | regs.ebx = (unsigned long) fn; | 418 | regs.bx = (unsigned long) fn; |
381 | regs.edx = (unsigned long) arg; | 419 | regs.dx = (unsigned long) arg; |
382 | 420 | ||
383 | regs.xds = __USER_DS; | 421 | regs.ds = __USER_DS; |
384 | regs.xes = __USER_DS; | 422 | regs.es = __USER_DS; |
385 | regs.xfs = __KERNEL_PERCPU; | 423 | regs.fs = __KERNEL_PERCPU; |
386 | regs.orig_eax = -1; | 424 | regs.orig_ax = -1; |
387 | regs.eip = (unsigned long) kernel_thread_helper; | 425 | regs.ip = (unsigned long) kernel_thread_helper; |
388 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); | 426 | regs.cs = __KERNEL_CS | get_kernel_rpl(); |
389 | regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; | 427 | regs.flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; |
390 | 428 | ||
391 | /* Ok, create the new process.. */ | 429 | /* Ok, create the new process.. */ |
392 | return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); | 430 | return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); |
@@ -424,7 +462,12 @@ void flush_thread(void) | |||
424 | { | 462 | { |
425 | struct task_struct *tsk = current; | 463 | struct task_struct *tsk = current; |
426 | 464 | ||
427 | memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); | 465 | tsk->thread.debugreg0 = 0; |
466 | tsk->thread.debugreg1 = 0; | ||
467 | tsk->thread.debugreg2 = 0; | ||
468 | tsk->thread.debugreg3 = 0; | ||
469 | tsk->thread.debugreg6 = 0; | ||
470 | tsk->thread.debugreg7 = 0; | ||
428 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | 471 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); |
429 | clear_tsk_thread_flag(tsk, TIF_DEBUG); | 472 | clear_tsk_thread_flag(tsk, TIF_DEBUG); |
430 | /* | 473 | /* |
@@ -449,7 +492,7 @@ void prepare_to_copy(struct task_struct *tsk) | |||
449 | unlazy_fpu(tsk); | 492 | unlazy_fpu(tsk); |
450 | } | 493 | } |
451 | 494 | ||
452 | int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, | 495 | int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, |
453 | unsigned long unused, | 496 | unsigned long unused, |
454 | struct task_struct * p, struct pt_regs * regs) | 497 | struct task_struct * p, struct pt_regs * regs) |
455 | { | 498 | { |
@@ -459,15 +502,15 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, | |||
459 | 502 | ||
460 | childregs = task_pt_regs(p); | 503 | childregs = task_pt_regs(p); |
461 | *childregs = *regs; | 504 | *childregs = *regs; |
462 | childregs->eax = 0; | 505 | childregs->ax = 0; |
463 | childregs->esp = esp; | 506 | childregs->sp = sp; |
464 | 507 | ||
465 | p->thread.esp = (unsigned long) childregs; | 508 | p->thread.sp = (unsigned long) childregs; |
466 | p->thread.esp0 = (unsigned long) (childregs+1); | 509 | p->thread.sp0 = (unsigned long) (childregs+1); |
467 | 510 | ||
468 | p->thread.eip = (unsigned long) ret_from_fork; | 511 | p->thread.ip = (unsigned long) ret_from_fork; |
469 | 512 | ||
470 | savesegment(gs,p->thread.gs); | 513 | savesegment(gs, p->thread.gs); |
471 | 514 | ||
472 | tsk = current; | 515 | tsk = current; |
473 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { | 516 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { |
@@ -480,32 +523,15 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, | |||
480 | set_tsk_thread_flag(p, TIF_IO_BITMAP); | 523 | set_tsk_thread_flag(p, TIF_IO_BITMAP); |
481 | } | 524 | } |
482 | 525 | ||
526 | err = 0; | ||
527 | |||
483 | /* | 528 | /* |
484 | * Set a new TLS for the child thread? | 529 | * Set a new TLS for the child thread? |
485 | */ | 530 | */ |
486 | if (clone_flags & CLONE_SETTLS) { | 531 | if (clone_flags & CLONE_SETTLS) |
487 | struct desc_struct *desc; | 532 | err = do_set_thread_area(p, -1, |
488 | struct user_desc info; | 533 | (struct user_desc __user *)childregs->si, 0); |
489 | int idx; | ||
490 | |||
491 | err = -EFAULT; | ||
492 | if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info))) | ||
493 | goto out; | ||
494 | err = -EINVAL; | ||
495 | if (LDT_empty(&info)) | ||
496 | goto out; | ||
497 | |||
498 | idx = info.entry_number; | ||
499 | if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) | ||
500 | goto out; | ||
501 | |||
502 | desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; | ||
503 | desc->a = LDT_entry_a(&info); | ||
504 | desc->b = LDT_entry_b(&info); | ||
505 | } | ||
506 | 534 | ||
507 | err = 0; | ||
508 | out: | ||
509 | if (err && p->thread.io_bitmap_ptr) { | 535 | if (err && p->thread.io_bitmap_ptr) { |
510 | kfree(p->thread.io_bitmap_ptr); | 536 | kfree(p->thread.io_bitmap_ptr); |
511 | p->thread.io_bitmap_max = 0; | 537 | p->thread.io_bitmap_max = 0; |
@@ -518,62 +544,52 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, | |||
518 | */ | 544 | */ |
519 | void dump_thread(struct pt_regs * regs, struct user * dump) | 545 | void dump_thread(struct pt_regs * regs, struct user * dump) |
520 | { | 546 | { |
521 | int i; | 547 | u16 gs; |
522 | 548 | ||
523 | /* changed the size calculations - should hopefully work better. lbt */ | 549 | /* changed the size calculations - should hopefully work better. lbt */ |
524 | dump->magic = CMAGIC; | 550 | dump->magic = CMAGIC; |
525 | dump->start_code = 0; | 551 | dump->start_code = 0; |
526 | dump->start_stack = regs->esp & ~(PAGE_SIZE - 1); | 552 | dump->start_stack = regs->sp & ~(PAGE_SIZE - 1); |
527 | dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; | 553 | dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; |
528 | dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; | 554 | dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; |
529 | dump->u_dsize -= dump->u_tsize; | 555 | dump->u_dsize -= dump->u_tsize; |
530 | dump->u_ssize = 0; | 556 | dump->u_ssize = 0; |
531 | for (i = 0; i < 8; i++) | 557 | dump->u_debugreg[0] = current->thread.debugreg0; |
532 | dump->u_debugreg[i] = current->thread.debugreg[i]; | 558 | dump->u_debugreg[1] = current->thread.debugreg1; |
559 | dump->u_debugreg[2] = current->thread.debugreg2; | ||
560 | dump->u_debugreg[3] = current->thread.debugreg3; | ||
561 | dump->u_debugreg[4] = 0; | ||
562 | dump->u_debugreg[5] = 0; | ||
563 | dump->u_debugreg[6] = current->thread.debugreg6; | ||
564 | dump->u_debugreg[7] = current->thread.debugreg7; | ||
533 | 565 | ||
534 | if (dump->start_stack < TASK_SIZE) | 566 | if (dump->start_stack < TASK_SIZE) |
535 | dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; | 567 | dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; |
536 | 568 | ||
537 | dump->regs.ebx = regs->ebx; | 569 | dump->regs.bx = regs->bx; |
538 | dump->regs.ecx = regs->ecx; | 570 | dump->regs.cx = regs->cx; |
539 | dump->regs.edx = regs->edx; | 571 | dump->regs.dx = regs->dx; |
540 | dump->regs.esi = regs->esi; | 572 | dump->regs.si = regs->si; |
541 | dump->regs.edi = regs->edi; | 573 | dump->regs.di = regs->di; |
542 | dump->regs.ebp = regs->ebp; | 574 | dump->regs.bp = regs->bp; |
543 | dump->regs.eax = regs->eax; | 575 | dump->regs.ax = regs->ax; |
544 | dump->regs.ds = regs->xds; | 576 | dump->regs.ds = (u16)regs->ds; |
545 | dump->regs.es = regs->xes; | 577 | dump->regs.es = (u16)regs->es; |
546 | dump->regs.fs = regs->xfs; | 578 | dump->regs.fs = (u16)regs->fs; |
547 | savesegment(gs,dump->regs.gs); | 579 | savesegment(gs,gs); |
548 | dump->regs.orig_eax = regs->orig_eax; | 580 | dump->regs.orig_ax = regs->orig_ax; |
549 | dump->regs.eip = regs->eip; | 581 | dump->regs.ip = regs->ip; |
550 | dump->regs.cs = regs->xcs; | 582 | dump->regs.cs = (u16)regs->cs; |
551 | dump->regs.eflags = regs->eflags; | 583 | dump->regs.flags = regs->flags; |
552 | dump->regs.esp = regs->esp; | 584 | dump->regs.sp = regs->sp; |
553 | dump->regs.ss = regs->xss; | 585 | dump->regs.ss = (u16)regs->ss; |
554 | 586 | ||
555 | dump->u_fpvalid = dump_fpu (regs, &dump->i387); | 587 | dump->u_fpvalid = dump_fpu (regs, &dump->i387); |
556 | } | 588 | } |
557 | EXPORT_SYMBOL(dump_thread); | 589 | EXPORT_SYMBOL(dump_thread); |
558 | 590 | ||
559 | /* | ||
560 | * Capture the user space registers if the task is not running (in user space) | ||
561 | */ | ||
562 | int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) | ||
563 | { | ||
564 | struct pt_regs ptregs = *task_pt_regs(tsk); | ||
565 | ptregs.xcs &= 0xffff; | ||
566 | ptregs.xds &= 0xffff; | ||
567 | ptregs.xes &= 0xffff; | ||
568 | ptregs.xss &= 0xffff; | ||
569 | |||
570 | elf_core_copy_regs(regs, &ptregs); | ||
571 | |||
572 | return 1; | ||
573 | } | ||
574 | |||
575 | #ifdef CONFIG_SECCOMP | 591 | #ifdef CONFIG_SECCOMP |
576 | void hard_disable_TSC(void) | 592 | static void hard_disable_TSC(void) |
577 | { | 593 | { |
578 | write_cr4(read_cr4() | X86_CR4_TSD); | 594 | write_cr4(read_cr4() | X86_CR4_TSD); |
579 | } | 595 | } |
@@ -588,7 +604,7 @@ void disable_TSC(void) | |||
588 | hard_disable_TSC(); | 604 | hard_disable_TSC(); |
589 | preempt_enable(); | 605 | preempt_enable(); |
590 | } | 606 | } |
591 | void hard_enable_TSC(void) | 607 | static void hard_enable_TSC(void) |
592 | { | 608 | { |
593 | write_cr4(read_cr4() & ~X86_CR4_TSD); | 609 | write_cr4(read_cr4() & ~X86_CR4_TSD); |
594 | } | 610 | } |
@@ -598,18 +614,32 @@ static noinline void | |||
598 | __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | 614 | __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, |
599 | struct tss_struct *tss) | 615 | struct tss_struct *tss) |
600 | { | 616 | { |
601 | struct thread_struct *next; | 617 | struct thread_struct *prev, *next; |
618 | unsigned long debugctl; | ||
602 | 619 | ||
620 | prev = &prev_p->thread; | ||
603 | next = &next_p->thread; | 621 | next = &next_p->thread; |
604 | 622 | ||
623 | debugctl = prev->debugctlmsr; | ||
624 | if (next->ds_area_msr != prev->ds_area_msr) { | ||
625 | /* we clear debugctl to make sure DS | ||
626 | * is not in use when we change it */ | ||
627 | debugctl = 0; | ||
628 | wrmsrl(MSR_IA32_DEBUGCTLMSR, 0); | ||
629 | wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0); | ||
630 | } | ||
631 | |||
632 | if (next->debugctlmsr != debugctl) | ||
633 | wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0); | ||
634 | |||
605 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | 635 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { |
606 | set_debugreg(next->debugreg[0], 0); | 636 | set_debugreg(next->debugreg0, 0); |
607 | set_debugreg(next->debugreg[1], 1); | 637 | set_debugreg(next->debugreg1, 1); |
608 | set_debugreg(next->debugreg[2], 2); | 638 | set_debugreg(next->debugreg2, 2); |
609 | set_debugreg(next->debugreg[3], 3); | 639 | set_debugreg(next->debugreg3, 3); |
610 | /* no 4 and 5 */ | 640 | /* no 4 and 5 */ |
611 | set_debugreg(next->debugreg[6], 6); | 641 | set_debugreg(next->debugreg6, 6); |
612 | set_debugreg(next->debugreg[7], 7); | 642 | set_debugreg(next->debugreg7, 7); |
613 | } | 643 | } |
614 | 644 | ||
615 | #ifdef CONFIG_SECCOMP | 645 | #ifdef CONFIG_SECCOMP |
@@ -623,6 +653,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
623 | } | 653 | } |
624 | #endif | 654 | #endif |
625 | 655 | ||
656 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | ||
657 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | ||
658 | |||
659 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | ||
660 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | ||
661 | |||
662 | |||
626 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { | 663 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { |
627 | /* | 664 | /* |
628 | * Disable the bitmap via an invalid offset. We still cache | 665 | * Disable the bitmap via an invalid offset. We still cache |
@@ -676,11 +713,11 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
676 | * More important, however, is the fact that this allows us much | 713 | * More important, however, is the fact that this allows us much |
677 | * more flexibility. | 714 | * more flexibility. |
678 | * | 715 | * |
679 | * The return value (in %eax) will be the "prev" task after | 716 | * The return value (in %ax) will be the "prev" task after |
680 | * the task-switch, and shows up in ret_from_fork in entry.S, | 717 | * the task-switch, and shows up in ret_from_fork in entry.S, |
681 | * for example. | 718 | * for example. |
682 | */ | 719 | */ |
683 | struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | 720 | struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
684 | { | 721 | { |
685 | struct thread_struct *prev = &prev_p->thread, | 722 | struct thread_struct *prev = &prev_p->thread, |
686 | *next = &next_p->thread; | 723 | *next = &next_p->thread; |
@@ -699,7 +736,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
699 | /* | 736 | /* |
700 | * Reload esp0. | 737 | * Reload esp0. |
701 | */ | 738 | */ |
702 | load_esp0(tss, next); | 739 | load_sp0(tss, next); |
703 | 740 | ||
704 | /* | 741 | /* |
705 | * Save away %gs. No need to save %fs, as it was saved on the | 742 | * Save away %gs. No need to save %fs, as it was saved on the |
@@ -763,7 +800,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
763 | 800 | ||
764 | asmlinkage int sys_fork(struct pt_regs regs) | 801 | asmlinkage int sys_fork(struct pt_regs regs) |
765 | { | 802 | { |
766 | return do_fork(SIGCHLD, regs.esp, ®s, 0, NULL, NULL); | 803 | return do_fork(SIGCHLD, regs.sp, ®s, 0, NULL, NULL); |
767 | } | 804 | } |
768 | 805 | ||
769 | asmlinkage int sys_clone(struct pt_regs regs) | 806 | asmlinkage int sys_clone(struct pt_regs regs) |
@@ -772,12 +809,12 @@ asmlinkage int sys_clone(struct pt_regs regs) | |||
772 | unsigned long newsp; | 809 | unsigned long newsp; |
773 | int __user *parent_tidptr, *child_tidptr; | 810 | int __user *parent_tidptr, *child_tidptr; |
774 | 811 | ||
775 | clone_flags = regs.ebx; | 812 | clone_flags = regs.bx; |
776 | newsp = regs.ecx; | 813 | newsp = regs.cx; |
777 | parent_tidptr = (int __user *)regs.edx; | 814 | parent_tidptr = (int __user *)regs.dx; |
778 | child_tidptr = (int __user *)regs.edi; | 815 | child_tidptr = (int __user *)regs.di; |
779 | if (!newsp) | 816 | if (!newsp) |
780 | newsp = regs.esp; | 817 | newsp = regs.sp; |
781 | return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr); | 818 | return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr); |
782 | } | 819 | } |
783 | 820 | ||
@@ -793,7 +830,7 @@ asmlinkage int sys_clone(struct pt_regs regs) | |||
793 | */ | 830 | */ |
794 | asmlinkage int sys_vfork(struct pt_regs regs) | 831 | asmlinkage int sys_vfork(struct pt_regs regs) |
795 | { | 832 | { |
796 | return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0, NULL, NULL); | 833 | return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, ®s, 0, NULL, NULL); |
797 | } | 834 | } |
798 | 835 | ||
799 | /* | 836 | /* |
@@ -804,18 +841,15 @@ asmlinkage int sys_execve(struct pt_regs regs) | |||
804 | int error; | 841 | int error; |
805 | char * filename; | 842 | char * filename; |
806 | 843 | ||
807 | filename = getname((char __user *) regs.ebx); | 844 | filename = getname((char __user *) regs.bx); |
808 | error = PTR_ERR(filename); | 845 | error = PTR_ERR(filename); |
809 | if (IS_ERR(filename)) | 846 | if (IS_ERR(filename)) |
810 | goto out; | 847 | goto out; |
811 | error = do_execve(filename, | 848 | error = do_execve(filename, |
812 | (char __user * __user *) regs.ecx, | 849 | (char __user * __user *) regs.cx, |
813 | (char __user * __user *) regs.edx, | 850 | (char __user * __user *) regs.dx, |
814 | ®s); | 851 | ®s); |
815 | if (error == 0) { | 852 | if (error == 0) { |
816 | task_lock(current); | ||
817 | current->ptrace &= ~PT_DTRACE; | ||
818 | task_unlock(current); | ||
819 | /* Make sure we don't return using sysenter.. */ | 853 | /* Make sure we don't return using sysenter.. */ |
820 | set_thread_flag(TIF_IRET); | 854 | set_thread_flag(TIF_IRET); |
821 | } | 855 | } |
@@ -829,145 +863,37 @@ out: | |||
829 | 863 | ||
830 | unsigned long get_wchan(struct task_struct *p) | 864 | unsigned long get_wchan(struct task_struct *p) |
831 | { | 865 | { |
832 | unsigned long ebp, esp, eip; | 866 | unsigned long bp, sp, ip; |
833 | unsigned long stack_page; | 867 | unsigned long stack_page; |
834 | int count = 0; | 868 | int count = 0; |
835 | if (!p || p == current || p->state == TASK_RUNNING) | 869 | if (!p || p == current || p->state == TASK_RUNNING) |
836 | return 0; | 870 | return 0; |
837 | stack_page = (unsigned long)task_stack_page(p); | 871 | stack_page = (unsigned long)task_stack_page(p); |
838 | esp = p->thread.esp; | 872 | sp = p->thread.sp; |
839 | if (!stack_page || esp < stack_page || esp > top_esp+stack_page) | 873 | if (!stack_page || sp < stack_page || sp > top_esp+stack_page) |
840 | return 0; | 874 | return 0; |
841 | /* include/asm-i386/system.h:switch_to() pushes ebp last. */ | 875 | /* include/asm-i386/system.h:switch_to() pushes bp last. */ |
842 | ebp = *(unsigned long *) esp; | 876 | bp = *(unsigned long *) sp; |
843 | do { | 877 | do { |
844 | if (ebp < stack_page || ebp > top_ebp+stack_page) | 878 | if (bp < stack_page || bp > top_ebp+stack_page) |
845 | return 0; | 879 | return 0; |
846 | eip = *(unsigned long *) (ebp+4); | 880 | ip = *(unsigned long *) (bp+4); |
847 | if (!in_sched_functions(eip)) | 881 | if (!in_sched_functions(ip)) |
848 | return eip; | 882 | return ip; |
849 | ebp = *(unsigned long *) ebp; | 883 | bp = *(unsigned long *) bp; |
850 | } while (count++ < 16); | 884 | } while (count++ < 16); |
851 | return 0; | 885 | return 0; |
852 | } | 886 | } |
853 | 887 | ||
854 | /* | ||
855 | * sys_alloc_thread_area: get a yet unused TLS descriptor index. | ||
856 | */ | ||
857 | static int get_free_idx(void) | ||
858 | { | ||
859 | struct thread_struct *t = ¤t->thread; | ||
860 | int idx; | ||
861 | |||
862 | for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++) | ||
863 | if (desc_empty(t->tls_array + idx)) | ||
864 | return idx + GDT_ENTRY_TLS_MIN; | ||
865 | return -ESRCH; | ||
866 | } | ||
867 | |||
868 | /* | ||
869 | * Set a given TLS descriptor: | ||
870 | */ | ||
871 | asmlinkage int sys_set_thread_area(struct user_desc __user *u_info) | ||
872 | { | ||
873 | struct thread_struct *t = ¤t->thread; | ||
874 | struct user_desc info; | ||
875 | struct desc_struct *desc; | ||
876 | int cpu, idx; | ||
877 | |||
878 | if (copy_from_user(&info, u_info, sizeof(info))) | ||
879 | return -EFAULT; | ||
880 | idx = info.entry_number; | ||
881 | |||
882 | /* | ||
883 | * index -1 means the kernel should try to find and | ||
884 | * allocate an empty descriptor: | ||
885 | */ | ||
886 | if (idx == -1) { | ||
887 | idx = get_free_idx(); | ||
888 | if (idx < 0) | ||
889 | return idx; | ||
890 | if (put_user(idx, &u_info->entry_number)) | ||
891 | return -EFAULT; | ||
892 | } | ||
893 | |||
894 | if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) | ||
895 | return -EINVAL; | ||
896 | |||
897 | desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN; | ||
898 | |||
899 | /* | ||
900 | * We must not get preempted while modifying the TLS. | ||
901 | */ | ||
902 | cpu = get_cpu(); | ||
903 | |||
904 | if (LDT_empty(&info)) { | ||
905 | desc->a = 0; | ||
906 | desc->b = 0; | ||
907 | } else { | ||
908 | desc->a = LDT_entry_a(&info); | ||
909 | desc->b = LDT_entry_b(&info); | ||
910 | } | ||
911 | load_TLS(t, cpu); | ||
912 | |||
913 | put_cpu(); | ||
914 | |||
915 | return 0; | ||
916 | } | ||
917 | |||
918 | /* | ||
919 | * Get the current Thread-Local Storage area: | ||
920 | */ | ||
921 | |||
922 | #define GET_BASE(desc) ( \ | ||
923 | (((desc)->a >> 16) & 0x0000ffff) | \ | ||
924 | (((desc)->b << 16) & 0x00ff0000) | \ | ||
925 | ( (desc)->b & 0xff000000) ) | ||
926 | |||
927 | #define GET_LIMIT(desc) ( \ | ||
928 | ((desc)->a & 0x0ffff) | \ | ||
929 | ((desc)->b & 0xf0000) ) | ||
930 | |||
931 | #define GET_32BIT(desc) (((desc)->b >> 22) & 1) | ||
932 | #define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) | ||
933 | #define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) | ||
934 | #define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) | ||
935 | #define GET_PRESENT(desc) (((desc)->b >> 15) & 1) | ||
936 | #define GET_USEABLE(desc) (((desc)->b >> 20) & 1) | ||
937 | |||
938 | asmlinkage int sys_get_thread_area(struct user_desc __user *u_info) | ||
939 | { | ||
940 | struct user_desc info; | ||
941 | struct desc_struct *desc; | ||
942 | int idx; | ||
943 | |||
944 | if (get_user(idx, &u_info->entry_number)) | ||
945 | return -EFAULT; | ||
946 | if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) | ||
947 | return -EINVAL; | ||
948 | |||
949 | memset(&info, 0, sizeof(info)); | ||
950 | |||
951 | desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; | ||
952 | |||
953 | info.entry_number = idx; | ||
954 | info.base_addr = GET_BASE(desc); | ||
955 | info.limit = GET_LIMIT(desc); | ||
956 | info.seg_32bit = GET_32BIT(desc); | ||
957 | info.contents = GET_CONTENTS(desc); | ||
958 | info.read_exec_only = !GET_WRITABLE(desc); | ||
959 | info.limit_in_pages = GET_LIMIT_PAGES(desc); | ||
960 | info.seg_not_present = !GET_PRESENT(desc); | ||
961 | info.useable = GET_USEABLE(desc); | ||
962 | |||
963 | if (copy_to_user(u_info, &info, sizeof(info))) | ||
964 | return -EFAULT; | ||
965 | return 0; | ||
966 | } | ||
967 | |||
968 | unsigned long arch_align_stack(unsigned long sp) | 888 | unsigned long arch_align_stack(unsigned long sp) |
969 | { | 889 | { |
970 | if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) | 890 | if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) |
971 | sp -= get_random_int() % 8192; | 891 | sp -= get_random_int() % 8192; |
972 | return sp & ~0xf; | 892 | return sp & ~0xf; |
973 | } | 893 | } |
894 | |||
895 | unsigned long arch_randomize_brk(struct mm_struct *mm) | ||
896 | { | ||
897 | unsigned long range_end = mm->brk + 0x02000000; | ||
898 | return randomize_range(mm->brk, range_end, 0) ? : mm->brk; | ||
899 | } | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6309b275cb9c..137a86171c39 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * Pentium III FXSR, SSE support | 4 | * Pentium III FXSR, SSE support |
5 | * Gareth Hughes <gareth@valinux.com>, May 2000 | 5 | * Gareth Hughes <gareth@valinux.com>, May 2000 |
6 | * | 6 | * |
7 | * X86-64 port | 7 | * X86-64 port |
8 | * Andi Kleen. | 8 | * Andi Kleen. |
9 | * | 9 | * |
@@ -19,19 +19,19 @@ | |||
19 | #include <linux/cpu.h> | 19 | #include <linux/cpu.h> |
20 | #include <linux/errno.h> | 20 | #include <linux/errno.h> |
21 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
22 | #include <linux/fs.h> | ||
22 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
23 | #include <linux/mm.h> | 24 | #include <linux/mm.h> |
24 | #include <linux/fs.h> | ||
25 | #include <linux/elfcore.h> | 25 | #include <linux/elfcore.h> |
26 | #include <linux/smp.h> | 26 | #include <linux/smp.h> |
27 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
28 | #include <linux/user.h> | 28 | #include <linux/user.h> |
29 | #include <linux/module.h> | ||
30 | #include <linux/a.out.h> | 29 | #include <linux/a.out.h> |
31 | #include <linux/interrupt.h> | 30 | #include <linux/interrupt.h> |
31 | #include <linux/utsname.h> | ||
32 | #include <linux/delay.h> | 32 | #include <linux/delay.h> |
33 | #include <linux/module.h> | ||
33 | #include <linux/ptrace.h> | 34 | #include <linux/ptrace.h> |
34 | #include <linux/utsname.h> | ||
35 | #include <linux/random.h> | 35 | #include <linux/random.h> |
36 | #include <linux/notifier.h> | 36 | #include <linux/notifier.h> |
37 | #include <linux/kprobes.h> | 37 | #include <linux/kprobes.h> |
@@ -72,13 +72,6 @@ void idle_notifier_register(struct notifier_block *n) | |||
72 | { | 72 | { |
73 | atomic_notifier_chain_register(&idle_notifier, n); | 73 | atomic_notifier_chain_register(&idle_notifier, n); |
74 | } | 74 | } |
75 | EXPORT_SYMBOL_GPL(idle_notifier_register); | ||
76 | |||
77 | void idle_notifier_unregister(struct notifier_block *n) | ||
78 | { | ||
79 | atomic_notifier_chain_unregister(&idle_notifier, n); | ||
80 | } | ||
81 | EXPORT_SYMBOL(idle_notifier_unregister); | ||
82 | 75 | ||
83 | void enter_idle(void) | 76 | void enter_idle(void) |
84 | { | 77 | { |
@@ -106,7 +99,7 @@ void exit_idle(void) | |||
106 | * We use this if we don't have any better | 99 | * We use this if we don't have any better |
107 | * idle routine.. | 100 | * idle routine.. |
108 | */ | 101 | */ |
109 | static void default_idle(void) | 102 | void default_idle(void) |
110 | { | 103 | { |
111 | current_thread_info()->status &= ~TS_POLLING; | 104 | current_thread_info()->status &= ~TS_POLLING; |
112 | /* | 105 | /* |
@@ -116,11 +109,18 @@ static void default_idle(void) | |||
116 | smp_mb(); | 109 | smp_mb(); |
117 | local_irq_disable(); | 110 | local_irq_disable(); |
118 | if (!need_resched()) { | 111 | if (!need_resched()) { |
119 | /* Enables interrupts one instruction before HLT. | 112 | ktime_t t0, t1; |
120 | x86 special cases this so there is no race. */ | 113 | u64 t0n, t1n; |
121 | safe_halt(); | 114 | |
122 | } else | 115 | t0 = ktime_get(); |
123 | local_irq_enable(); | 116 | t0n = ktime_to_ns(t0); |
117 | safe_halt(); /* enables interrupts racelessly */ | ||
118 | local_irq_disable(); | ||
119 | t1 = ktime_get(); | ||
120 | t1n = ktime_to_ns(t1); | ||
121 | sched_clock_idle_wakeup_event(t1n - t0n); | ||
122 | } | ||
123 | local_irq_enable(); | ||
124 | current_thread_info()->status |= TS_POLLING; | 124 | current_thread_info()->status |= TS_POLLING; |
125 | } | 125 | } |
126 | 126 | ||
@@ -129,43 +129,12 @@ static void default_idle(void) | |||
129 | * to poll the ->need_resched flag instead of waiting for the | 129 | * to poll the ->need_resched flag instead of waiting for the |
130 | * cross-CPU IPI to arrive. Use this option with caution. | 130 | * cross-CPU IPI to arrive. Use this option with caution. |
131 | */ | 131 | */ |
132 | static void poll_idle (void) | 132 | static void poll_idle(void) |
133 | { | 133 | { |
134 | local_irq_enable(); | 134 | local_irq_enable(); |
135 | cpu_relax(); | 135 | cpu_relax(); |
136 | } | 136 | } |
137 | 137 | ||
138 | void cpu_idle_wait(void) | ||
139 | { | ||
140 | unsigned int cpu, this_cpu = get_cpu(); | ||
141 | cpumask_t map, tmp = current->cpus_allowed; | ||
142 | |||
143 | set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); | ||
144 | put_cpu(); | ||
145 | |||
146 | cpus_clear(map); | ||
147 | for_each_online_cpu(cpu) { | ||
148 | per_cpu(cpu_idle_state, cpu) = 1; | ||
149 | cpu_set(cpu, map); | ||
150 | } | ||
151 | |||
152 | __get_cpu_var(cpu_idle_state) = 0; | ||
153 | |||
154 | wmb(); | ||
155 | do { | ||
156 | ssleep(1); | ||
157 | for_each_online_cpu(cpu) { | ||
158 | if (cpu_isset(cpu, map) && | ||
159 | !per_cpu(cpu_idle_state, cpu)) | ||
160 | cpu_clear(cpu, map); | ||
161 | } | ||
162 | cpus_and(map, map, cpu_online_map); | ||
163 | } while (!cpus_empty(map)); | ||
164 | |||
165 | set_cpus_allowed(current, tmp); | ||
166 | } | ||
167 | EXPORT_SYMBOL_GPL(cpu_idle_wait); | ||
168 | |||
169 | #ifdef CONFIG_HOTPLUG_CPU | 138 | #ifdef CONFIG_HOTPLUG_CPU |
170 | DECLARE_PER_CPU(int, cpu_state); | 139 | DECLARE_PER_CPU(int, cpu_state); |
171 | 140 | ||
@@ -196,19 +165,18 @@ static inline void play_dead(void) | |||
196 | * low exit latency (ie sit in a loop waiting for | 165 | * low exit latency (ie sit in a loop waiting for |
197 | * somebody to say that they'd like to reschedule) | 166 | * somebody to say that they'd like to reschedule) |
198 | */ | 167 | */ |
199 | void cpu_idle (void) | 168 | void cpu_idle(void) |
200 | { | 169 | { |
201 | current_thread_info()->status |= TS_POLLING; | 170 | current_thread_info()->status |= TS_POLLING; |
202 | /* endless idle loop with no priority at all */ | 171 | /* endless idle loop with no priority at all */ |
203 | while (1) { | 172 | while (1) { |
173 | tick_nohz_stop_sched_tick(); | ||
204 | while (!need_resched()) { | 174 | while (!need_resched()) { |
205 | void (*idle)(void); | 175 | void (*idle)(void); |
206 | 176 | ||
207 | if (__get_cpu_var(cpu_idle_state)) | 177 | if (__get_cpu_var(cpu_idle_state)) |
208 | __get_cpu_var(cpu_idle_state) = 0; | 178 | __get_cpu_var(cpu_idle_state) = 0; |
209 | 179 | ||
210 | tick_nohz_stop_sched_tick(); | ||
211 | |||
212 | rmb(); | 180 | rmb(); |
213 | idle = pm_idle; | 181 | idle = pm_idle; |
214 | if (!idle) | 182 | if (!idle) |
@@ -236,6 +204,47 @@ void cpu_idle (void) | |||
236 | } | 204 | } |
237 | } | 205 | } |
238 | 206 | ||
207 | static void do_nothing(void *unused) | ||
208 | { | ||
209 | } | ||
210 | |||
211 | void cpu_idle_wait(void) | ||
212 | { | ||
213 | unsigned int cpu, this_cpu = get_cpu(); | ||
214 | cpumask_t map, tmp = current->cpus_allowed; | ||
215 | |||
216 | set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); | ||
217 | put_cpu(); | ||
218 | |||
219 | cpus_clear(map); | ||
220 | for_each_online_cpu(cpu) { | ||
221 | per_cpu(cpu_idle_state, cpu) = 1; | ||
222 | cpu_set(cpu, map); | ||
223 | } | ||
224 | |||
225 | __get_cpu_var(cpu_idle_state) = 0; | ||
226 | |||
227 | wmb(); | ||
228 | do { | ||
229 | ssleep(1); | ||
230 | for_each_online_cpu(cpu) { | ||
231 | if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) | ||
232 | cpu_clear(cpu, map); | ||
233 | } | ||
234 | cpus_and(map, map, cpu_online_map); | ||
235 | /* | ||
236 | * We waited 1 sec, if a CPU still did not call idle | ||
237 | * it may be because it is in idle and not waking up | ||
238 | * because it has nothing to do. | ||
239 | * Give all the remaining CPUS a kick. | ||
240 | */ | ||
241 | smp_call_function_mask(map, do_nothing, 0, 0); | ||
242 | } while (!cpus_empty(map)); | ||
243 | |||
244 | set_cpus_allowed(current, tmp); | ||
245 | } | ||
246 | EXPORT_SYMBOL_GPL(cpu_idle_wait); | ||
247 | |||
239 | /* | 248 | /* |
240 | * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, | 249 | * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, |
241 | * which can obviate IPI to trigger checking of need_resched. | 250 | * which can obviate IPI to trigger checking of need_resched. |
@@ -246,13 +255,13 @@ void cpu_idle (void) | |||
246 | * New with Core Duo processors, MWAIT can take some hints based on CPU | 255 | * New with Core Duo processors, MWAIT can take some hints based on CPU |
247 | * capability. | 256 | * capability. |
248 | */ | 257 | */ |
249 | void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) | 258 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) |
250 | { | 259 | { |
251 | if (!need_resched()) { | 260 | if (!need_resched()) { |
252 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 261 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
253 | smp_mb(); | 262 | smp_mb(); |
254 | if (!need_resched()) | 263 | if (!need_resched()) |
255 | __mwait(eax, ecx); | 264 | __mwait(ax, cx); |
256 | } | 265 | } |
257 | } | 266 | } |
258 | 267 | ||
@@ -271,25 +280,41 @@ static void mwait_idle(void) | |||
271 | } | 280 | } |
272 | } | 281 | } |
273 | 282 | ||
283 | |||
284 | static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) | ||
285 | { | ||
286 | if (force_mwait) | ||
287 | return 1; | ||
288 | /* Any C1 states supported? */ | ||
289 | return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0; | ||
290 | } | ||
291 | |||
274 | void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | 292 | void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) |
275 | { | 293 | { |
276 | static int printed; | 294 | static int selected; |
277 | if (cpu_has(c, X86_FEATURE_MWAIT)) { | 295 | |
296 | if (selected) | ||
297 | return; | ||
298 | #ifdef CONFIG_X86_SMP | ||
299 | if (pm_idle == poll_idle && smp_num_siblings > 1) { | ||
300 | printk(KERN_WARNING "WARNING: polling idle and HT enabled," | ||
301 | " performance may degrade.\n"); | ||
302 | } | ||
303 | #endif | ||
304 | if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { | ||
278 | /* | 305 | /* |
279 | * Skip, if setup has overridden idle. | 306 | * Skip, if setup has overridden idle. |
280 | * One CPU supports mwait => All CPUs supports mwait | 307 | * One CPU supports mwait => All CPUs supports mwait |
281 | */ | 308 | */ |
282 | if (!pm_idle) { | 309 | if (!pm_idle) { |
283 | if (!printed) { | 310 | printk(KERN_INFO "using mwait in idle threads.\n"); |
284 | printk(KERN_INFO "using mwait in idle threads.\n"); | ||
285 | printed = 1; | ||
286 | } | ||
287 | pm_idle = mwait_idle; | 311 | pm_idle = mwait_idle; |
288 | } | 312 | } |
289 | } | 313 | } |
314 | selected = 1; | ||
290 | } | 315 | } |
291 | 316 | ||
292 | static int __init idle_setup (char *str) | 317 | static int __init idle_setup(char *str) |
293 | { | 318 | { |
294 | if (!strcmp(str, "poll")) { | 319 | if (!strcmp(str, "poll")) { |
295 | printk("using polling idle threads.\n"); | 320 | printk("using polling idle threads.\n"); |
@@ -304,13 +329,13 @@ static int __init idle_setup (char *str) | |||
304 | } | 329 | } |
305 | early_param("idle", idle_setup); | 330 | early_param("idle", idle_setup); |
306 | 331 | ||
307 | /* Prints also some state that isn't saved in the pt_regs */ | 332 | /* Prints also some state that isn't saved in the pt_regs */ |
308 | void __show_regs(struct pt_regs * regs) | 333 | void __show_regs(struct pt_regs * regs) |
309 | { | 334 | { |
310 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; | 335 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; |
311 | unsigned long d0, d1, d2, d3, d6, d7; | 336 | unsigned long d0, d1, d2, d3, d6, d7; |
312 | unsigned int fsindex,gsindex; | 337 | unsigned int fsindex, gsindex; |
313 | unsigned int ds,cs,es; | 338 | unsigned int ds, cs, es; |
314 | 339 | ||
315 | printk("\n"); | 340 | printk("\n"); |
316 | print_modules(); | 341 | print_modules(); |
@@ -319,16 +344,16 @@ void __show_regs(struct pt_regs * regs) | |||
319 | init_utsname()->release, | 344 | init_utsname()->release, |
320 | (int)strcspn(init_utsname()->version, " "), | 345 | (int)strcspn(init_utsname()->version, " "), |
321 | init_utsname()->version); | 346 | init_utsname()->version); |
322 | printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); | 347 | printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); |
323 | printk_address(regs->rip); | 348 | printk_address(regs->ip, 1); |
324 | printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, | 349 | printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp, |
325 | regs->eflags); | 350 | regs->flags); |
326 | printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", | 351 | printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", |
327 | regs->rax, regs->rbx, regs->rcx); | 352 | regs->ax, regs->bx, regs->cx); |
328 | printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", | 353 | printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", |
329 | regs->rdx, regs->rsi, regs->rdi); | 354 | regs->dx, regs->si, regs->di); |
330 | printk("RBP: %016lx R08: %016lx R09: %016lx\n", | 355 | printk("RBP: %016lx R08: %016lx R09: %016lx\n", |
331 | regs->rbp, regs->r8, regs->r9); | 356 | regs->bp, regs->r8, regs->r9); |
332 | printk("R10: %016lx R11: %016lx R12: %016lx\n", | 357 | printk("R10: %016lx R11: %016lx R12: %016lx\n", |
333 | regs->r10, regs->r11, regs->r12); | 358 | regs->r10, regs->r11, regs->r12); |
334 | printk("R13: %016lx R14: %016lx R15: %016lx\n", | 359 | printk("R13: %016lx R14: %016lx R15: %016lx\n", |
@@ -368,7 +393,7 @@ void show_regs(struct pt_regs *regs) | |||
368 | { | 393 | { |
369 | printk("CPU %d:", smp_processor_id()); | 394 | printk("CPU %d:", smp_processor_id()); |
370 | __show_regs(regs); | 395 | __show_regs(regs); |
371 | show_trace(NULL, regs, (void *)(regs + 1)); | 396 | show_trace(NULL, regs, (void *)(regs + 1), regs->bp); |
372 | } | 397 | } |
373 | 398 | ||
374 | /* | 399 | /* |
@@ -379,7 +404,7 @@ void exit_thread(void) | |||
379 | struct task_struct *me = current; | 404 | struct task_struct *me = current; |
380 | struct thread_struct *t = &me->thread; | 405 | struct thread_struct *t = &me->thread; |
381 | 406 | ||
382 | if (me->thread.io_bitmap_ptr) { | 407 | if (me->thread.io_bitmap_ptr) { |
383 | struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); | 408 | struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); |
384 | 409 | ||
385 | kfree(t->io_bitmap_ptr); | 410 | kfree(t->io_bitmap_ptr); |
@@ -415,7 +440,7 @@ void flush_thread(void) | |||
415 | tsk->thread.debugreg3 = 0; | 440 | tsk->thread.debugreg3 = 0; |
416 | tsk->thread.debugreg6 = 0; | 441 | tsk->thread.debugreg6 = 0; |
417 | tsk->thread.debugreg7 = 0; | 442 | tsk->thread.debugreg7 = 0; |
418 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | 443 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); |
419 | /* | 444 | /* |
420 | * Forget coprocessor state.. | 445 | * Forget coprocessor state.. |
421 | */ | 446 | */ |
@@ -438,26 +463,21 @@ void release_thread(struct task_struct *dead_task) | |||
438 | 463 | ||
439 | static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) | 464 | static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) |
440 | { | 465 | { |
441 | struct user_desc ud = { | 466 | struct user_desc ud = { |
442 | .base_addr = addr, | 467 | .base_addr = addr, |
443 | .limit = 0xfffff, | 468 | .limit = 0xfffff, |
444 | .seg_32bit = 1, | 469 | .seg_32bit = 1, |
445 | .limit_in_pages = 1, | 470 | .limit_in_pages = 1, |
446 | .useable = 1, | 471 | .useable = 1, |
447 | }; | 472 | }; |
448 | struct n_desc_struct *desc = (void *)t->thread.tls_array; | 473 | struct desc_struct *desc = t->thread.tls_array; |
449 | desc += tls; | 474 | desc += tls; |
450 | desc->a = LDT_entry_a(&ud); | 475 | fill_ldt(desc, &ud); |
451 | desc->b = LDT_entry_b(&ud); | ||
452 | } | 476 | } |
453 | 477 | ||
454 | static inline u32 read_32bit_tls(struct task_struct *t, int tls) | 478 | static inline u32 read_32bit_tls(struct task_struct *t, int tls) |
455 | { | 479 | { |
456 | struct desc_struct *desc = (void *)t->thread.tls_array; | 480 | return get_desc_base(&t->thread.tls_array[tls]); |
457 | desc += tls; | ||
458 | return desc->base0 | | ||
459 | (((u32)desc->base1) << 16) | | ||
460 | (((u32)desc->base2) << 24); | ||
461 | } | 481 | } |
462 | 482 | ||
463 | /* | 483 | /* |
@@ -469,7 +489,7 @@ void prepare_to_copy(struct task_struct *tsk) | |||
469 | unlazy_fpu(tsk); | 489 | unlazy_fpu(tsk); |
470 | } | 490 | } |
471 | 491 | ||
472 | int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, | 492 | int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, |
473 | unsigned long unused, | 493 | unsigned long unused, |
474 | struct task_struct * p, struct pt_regs * regs) | 494 | struct task_struct * p, struct pt_regs * regs) |
475 | { | 495 | { |
@@ -481,14 +501,14 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, | |||
481 | (THREAD_SIZE + task_stack_page(p))) - 1; | 501 | (THREAD_SIZE + task_stack_page(p))) - 1; |
482 | *childregs = *regs; | 502 | *childregs = *regs; |
483 | 503 | ||
484 | childregs->rax = 0; | 504 | childregs->ax = 0; |
485 | childregs->rsp = rsp; | 505 | childregs->sp = sp; |
486 | if (rsp == ~0UL) | 506 | if (sp == ~0UL) |
487 | childregs->rsp = (unsigned long)childregs; | 507 | childregs->sp = (unsigned long)childregs; |
488 | 508 | ||
489 | p->thread.rsp = (unsigned long) childregs; | 509 | p->thread.sp = (unsigned long) childregs; |
490 | p->thread.rsp0 = (unsigned long) (childregs+1); | 510 | p->thread.sp0 = (unsigned long) (childregs+1); |
491 | p->thread.userrsp = me->thread.userrsp; | 511 | p->thread.usersp = me->thread.usersp; |
492 | 512 | ||
493 | set_tsk_thread_flag(p, TIF_FORK); | 513 | set_tsk_thread_flag(p, TIF_FORK); |
494 | 514 | ||
@@ -509,7 +529,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, | |||
509 | memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, | 529 | memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, |
510 | IO_BITMAP_BYTES); | 530 | IO_BITMAP_BYTES); |
511 | set_tsk_thread_flag(p, TIF_IO_BITMAP); | 531 | set_tsk_thread_flag(p, TIF_IO_BITMAP); |
512 | } | 532 | } |
513 | 533 | ||
514 | /* | 534 | /* |
515 | * Set a new TLS for the child thread? | 535 | * Set a new TLS for the child thread? |
@@ -517,7 +537,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, | |||
517 | if (clone_flags & CLONE_SETTLS) { | 537 | if (clone_flags & CLONE_SETTLS) { |
518 | #ifdef CONFIG_IA32_EMULATION | 538 | #ifdef CONFIG_IA32_EMULATION |
519 | if (test_thread_flag(TIF_IA32)) | 539 | if (test_thread_flag(TIF_IA32)) |
520 | err = ia32_child_tls(p, childregs); | 540 | err = do_set_thread_area(p, -1, |
541 | (struct user_desc __user *)childregs->si, 0); | ||
521 | else | 542 | else |
522 | #endif | 543 | #endif |
523 | err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); | 544 | err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); |
@@ -536,17 +557,30 @@ out: | |||
536 | /* | 557 | /* |
537 | * This special macro can be used to load a debugging register | 558 | * This special macro can be used to load a debugging register |
538 | */ | 559 | */ |
539 | #define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r) | 560 | #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r) |
540 | 561 | ||
541 | static inline void __switch_to_xtra(struct task_struct *prev_p, | 562 | static inline void __switch_to_xtra(struct task_struct *prev_p, |
542 | struct task_struct *next_p, | 563 | struct task_struct *next_p, |
543 | struct tss_struct *tss) | 564 | struct tss_struct *tss) |
544 | { | 565 | { |
545 | struct thread_struct *prev, *next; | 566 | struct thread_struct *prev, *next; |
567 | unsigned long debugctl; | ||
546 | 568 | ||
547 | prev = &prev_p->thread, | 569 | prev = &prev_p->thread, |
548 | next = &next_p->thread; | 570 | next = &next_p->thread; |
549 | 571 | ||
572 | debugctl = prev->debugctlmsr; | ||
573 | if (next->ds_area_msr != prev->ds_area_msr) { | ||
574 | /* we clear debugctl to make sure DS | ||
575 | * is not in use when we change it */ | ||
576 | debugctl = 0; | ||
577 | wrmsrl(MSR_IA32_DEBUGCTLMSR, 0); | ||
578 | wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); | ||
579 | } | ||
580 | |||
581 | if (next->debugctlmsr != debugctl) | ||
582 | wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr); | ||
583 | |||
550 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | 584 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { |
551 | loaddebug(next, 0); | 585 | loaddebug(next, 0); |
552 | loaddebug(next, 1); | 586 | loaddebug(next, 1); |
@@ -570,12 +604,18 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
570 | */ | 604 | */ |
571 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); | 605 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); |
572 | } | 606 | } |
607 | |||
608 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | ||
609 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | ||
610 | |||
611 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | ||
612 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | ||
573 | } | 613 | } |
574 | 614 | ||
575 | /* | 615 | /* |
576 | * switch_to(x,y) should switch tasks from x to y. | 616 | * switch_to(x,y) should switch tasks from x to y. |
577 | * | 617 | * |
578 | * This could still be optimized: | 618 | * This could still be optimized: |
579 | * - fold all the options into a flag word and test it with a single test. | 619 | * - fold all the options into a flag word and test it with a single test. |
580 | * - could test fs/gs bitsliced | 620 | * - could test fs/gs bitsliced |
581 | * | 621 | * |
@@ -586,7 +626,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
586 | { | 626 | { |
587 | struct thread_struct *prev = &prev_p->thread, | 627 | struct thread_struct *prev = &prev_p->thread, |
588 | *next = &next_p->thread; | 628 | *next = &next_p->thread; |
589 | int cpu = smp_processor_id(); | 629 | int cpu = smp_processor_id(); |
590 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 630 | struct tss_struct *tss = &per_cpu(init_tss, cpu); |
591 | 631 | ||
592 | /* we're going to use this soon, after a few expensive things */ | 632 | /* we're going to use this soon, after a few expensive things */ |
@@ -596,7 +636,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
596 | /* | 636 | /* |
597 | * Reload esp0, LDT and the page table pointer: | 637 | * Reload esp0, LDT and the page table pointer: |
598 | */ | 638 | */ |
599 | tss->rsp0 = next->rsp0; | 639 | load_sp0(tss, next); |
600 | 640 | ||
601 | /* | 641 | /* |
602 | * Switch DS and ES. | 642 | * Switch DS and ES. |
@@ -655,8 +695,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
655 | /* | 695 | /* |
656 | * Switch the PDA and FPU contexts. | 696 | * Switch the PDA and FPU contexts. |
657 | */ | 697 | */ |
658 | prev->userrsp = read_pda(oldrsp); | 698 | prev->usersp = read_pda(oldrsp); |
659 | write_pda(oldrsp, next->userrsp); | 699 | write_pda(oldrsp, next->usersp); |
660 | write_pda(pcurrent, next_p); | 700 | write_pda(pcurrent, next_p); |
661 | 701 | ||
662 | write_pda(kernelstack, | 702 | write_pda(kernelstack, |
@@ -673,8 +713,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
673 | /* | 713 | /* |
674 | * Now maybe reload the debug registers and handle I/O bitmaps | 714 | * Now maybe reload the debug registers and handle I/O bitmaps |
675 | */ | 715 | */ |
676 | if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)) | 716 | if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT || |
677 | || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) | 717 | task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) |
678 | __switch_to_xtra(prev_p, next_p, tss); | 718 | __switch_to_xtra(prev_p, next_p, tss); |
679 | 719 | ||
680 | /* If the task has used fpu the last 5 timeslices, just do a full | 720 | /* If the task has used fpu the last 5 timeslices, just do a full |
@@ -689,7 +729,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
689 | /* | 729 | /* |
690 | * sys_execve() executes a new program. | 730 | * sys_execve() executes a new program. |
691 | */ | 731 | */ |
692 | asmlinkage | 732 | asmlinkage |
693 | long sys_execve(char __user *name, char __user * __user *argv, | 733 | long sys_execve(char __user *name, char __user * __user *argv, |
694 | char __user * __user *envp, struct pt_regs regs) | 734 | char __user * __user *envp, struct pt_regs regs) |
695 | { | 735 | { |
@@ -701,11 +741,6 @@ long sys_execve(char __user *name, char __user * __user *argv, | |||
701 | if (IS_ERR(filename)) | 741 | if (IS_ERR(filename)) |
702 | return error; | 742 | return error; |
703 | error = do_execve(filename, argv, envp, ®s); | 743 | error = do_execve(filename, argv, envp, ®s); |
704 | if (error == 0) { | ||
705 | task_lock(current); | ||
706 | current->ptrace &= ~PT_DTRACE; | ||
707 | task_unlock(current); | ||
708 | } | ||
709 | putname(filename); | 744 | putname(filename); |
710 | return error; | 745 | return error; |
711 | } | 746 | } |
@@ -715,18 +750,18 @@ void set_personality_64bit(void) | |||
715 | /* inherit personality from parent */ | 750 | /* inherit personality from parent */ |
716 | 751 | ||
717 | /* Make sure to be in 64bit mode */ | 752 | /* Make sure to be in 64bit mode */ |
718 | clear_thread_flag(TIF_IA32); | 753 | clear_thread_flag(TIF_IA32); |
719 | 754 | ||
720 | /* TBD: overwrites user setup. Should have two bits. | 755 | /* TBD: overwrites user setup. Should have two bits. |
721 | But 64bit processes have always behaved this way, | 756 | But 64bit processes have always behaved this way, |
722 | so it's not too bad. The main problem is just that | 757 | so it's not too bad. The main problem is just that |
723 | 32bit childs are affected again. */ | 758 | 32bit childs are affected again. */ |
724 | current->personality &= ~READ_IMPLIES_EXEC; | 759 | current->personality &= ~READ_IMPLIES_EXEC; |
725 | } | 760 | } |
726 | 761 | ||
727 | asmlinkage long sys_fork(struct pt_regs *regs) | 762 | asmlinkage long sys_fork(struct pt_regs *regs) |
728 | { | 763 | { |
729 | return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL); | 764 | return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); |
730 | } | 765 | } |
731 | 766 | ||
732 | asmlinkage long | 767 | asmlinkage long |
@@ -734,7 +769,7 @@ sys_clone(unsigned long clone_flags, unsigned long newsp, | |||
734 | void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) | 769 | void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) |
735 | { | 770 | { |
736 | if (!newsp) | 771 | if (!newsp) |
737 | newsp = regs->rsp; | 772 | newsp = regs->sp; |
738 | return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); | 773 | return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); |
739 | } | 774 | } |
740 | 775 | ||
@@ -750,29 +785,29 @@ sys_clone(unsigned long clone_flags, unsigned long newsp, | |||
750 | */ | 785 | */ |
751 | asmlinkage long sys_vfork(struct pt_regs *regs) | 786 | asmlinkage long sys_vfork(struct pt_regs *regs) |
752 | { | 787 | { |
753 | return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0, | 788 | return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, |
754 | NULL, NULL); | 789 | NULL, NULL); |
755 | } | 790 | } |
756 | 791 | ||
757 | unsigned long get_wchan(struct task_struct *p) | 792 | unsigned long get_wchan(struct task_struct *p) |
758 | { | 793 | { |
759 | unsigned long stack; | 794 | unsigned long stack; |
760 | u64 fp,rip; | 795 | u64 fp,ip; |
761 | int count = 0; | 796 | int count = 0; |
762 | 797 | ||
763 | if (!p || p == current || p->state==TASK_RUNNING) | 798 | if (!p || p == current || p->state==TASK_RUNNING) |
764 | return 0; | 799 | return 0; |
765 | stack = (unsigned long)task_stack_page(p); | 800 | stack = (unsigned long)task_stack_page(p); |
766 | if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE) | 801 | if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE) |
767 | return 0; | 802 | return 0; |
768 | fp = *(u64 *)(p->thread.rsp); | 803 | fp = *(u64 *)(p->thread.sp); |
769 | do { | 804 | do { |
770 | if (fp < (unsigned long)stack || | 805 | if (fp < (unsigned long)stack || |
771 | fp > (unsigned long)stack+THREAD_SIZE) | 806 | fp > (unsigned long)stack+THREAD_SIZE) |
772 | return 0; | 807 | return 0; |
773 | rip = *(u64 *)(fp+8); | 808 | ip = *(u64 *)(fp+8); |
774 | if (!in_sched_functions(rip)) | 809 | if (!in_sched_functions(ip)) |
775 | return rip; | 810 | return ip; |
776 | fp = *(u64 *)fp; | 811 | fp = *(u64 *)fp; |
777 | } while (count++ < 16); | 812 | } while (count++ < 16); |
778 | return 0; | 813 | return 0; |
@@ -813,19 +848,19 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
813 | /* Not strictly needed for fs, but do it for symmetry | 848 | /* Not strictly needed for fs, but do it for symmetry |
814 | with gs */ | 849 | with gs */ |
815 | if (addr >= TASK_SIZE_OF(task)) | 850 | if (addr >= TASK_SIZE_OF(task)) |
816 | return -EPERM; | 851 | return -EPERM; |
817 | cpu = get_cpu(); | 852 | cpu = get_cpu(); |
818 | /* handle small bases via the GDT because that's faster to | 853 | /* handle small bases via the GDT because that's faster to |
819 | switch. */ | 854 | switch. */ |
820 | if (addr <= 0xffffffff) { | 855 | if (addr <= 0xffffffff) { |
821 | set_32bit_tls(task, FS_TLS, addr); | 856 | set_32bit_tls(task, FS_TLS, addr); |
822 | if (doit) { | 857 | if (doit) { |
823 | load_TLS(&task->thread, cpu); | 858 | load_TLS(&task->thread, cpu); |
824 | asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL)); | 859 | asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL)); |
825 | } | 860 | } |
826 | task->thread.fsindex = FS_TLS_SEL; | 861 | task->thread.fsindex = FS_TLS_SEL; |
827 | task->thread.fs = 0; | 862 | task->thread.fs = 0; |
828 | } else { | 863 | } else { |
829 | task->thread.fsindex = 0; | 864 | task->thread.fsindex = 0; |
830 | task->thread.fs = addr; | 865 | task->thread.fs = addr; |
831 | if (doit) { | 866 | if (doit) { |
@@ -837,24 +872,24 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
837 | } | 872 | } |
838 | put_cpu(); | 873 | put_cpu(); |
839 | break; | 874 | break; |
840 | case ARCH_GET_FS: { | 875 | case ARCH_GET_FS: { |
841 | unsigned long base; | 876 | unsigned long base; |
842 | if (task->thread.fsindex == FS_TLS_SEL) | 877 | if (task->thread.fsindex == FS_TLS_SEL) |
843 | base = read_32bit_tls(task, FS_TLS); | 878 | base = read_32bit_tls(task, FS_TLS); |
844 | else if (doit) | 879 | else if (doit) |
845 | rdmsrl(MSR_FS_BASE, base); | 880 | rdmsrl(MSR_FS_BASE, base); |
846 | else | 881 | else |
847 | base = task->thread.fs; | 882 | base = task->thread.fs; |
848 | ret = put_user(base, (unsigned long __user *)addr); | 883 | ret = put_user(base, (unsigned long __user *)addr); |
849 | break; | 884 | break; |
850 | } | 885 | } |
851 | case ARCH_GET_GS: { | 886 | case ARCH_GET_GS: { |
852 | unsigned long base; | 887 | unsigned long base; |
853 | unsigned gsindex; | 888 | unsigned gsindex; |
854 | if (task->thread.gsindex == GS_TLS_SEL) | 889 | if (task->thread.gsindex == GS_TLS_SEL) |
855 | base = read_32bit_tls(task, GS_TLS); | 890 | base = read_32bit_tls(task, GS_TLS); |
856 | else if (doit) { | 891 | else if (doit) { |
857 | asm("movl %%gs,%0" : "=r" (gsindex)); | 892 | asm("movl %%gs,%0" : "=r" (gsindex)); |
858 | if (gsindex) | 893 | if (gsindex) |
859 | rdmsrl(MSR_KERNEL_GS_BASE, base); | 894 | rdmsrl(MSR_KERNEL_GS_BASE, base); |
860 | else | 895 | else |
@@ -862,39 +897,21 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
862 | } | 897 | } |
863 | else | 898 | else |
864 | base = task->thread.gs; | 899 | base = task->thread.gs; |
865 | ret = put_user(base, (unsigned long __user *)addr); | 900 | ret = put_user(base, (unsigned long __user *)addr); |
866 | break; | 901 | break; |
867 | } | 902 | } |
868 | 903 | ||
869 | default: | 904 | default: |
870 | ret = -EINVAL; | 905 | ret = -EINVAL; |
871 | break; | 906 | break; |
872 | } | 907 | } |
873 | 908 | ||
874 | return ret; | 909 | return ret; |
875 | } | 910 | } |
876 | 911 | ||
877 | long sys_arch_prctl(int code, unsigned long addr) | 912 | long sys_arch_prctl(int code, unsigned long addr) |
878 | { | 913 | { |
879 | return do_arch_prctl(current, code, addr); | 914 | return do_arch_prctl(current, code, addr); |
880 | } | ||
881 | |||
882 | /* | ||
883 | * Capture the user space registers if the task is not running (in user space) | ||
884 | */ | ||
885 | int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) | ||
886 | { | ||
887 | struct pt_regs *pp, ptregs; | ||
888 | |||
889 | pp = task_pt_regs(tsk); | ||
890 | |||
891 | ptregs = *pp; | ||
892 | ptregs.cs &= 0xffff; | ||
893 | ptregs.ss &= 0xffff; | ||
894 | |||
895 | elf_core_copy_regs(regs, &ptregs); | ||
896 | |||
897 | return 1; | ||
898 | } | 915 | } |
899 | 916 | ||
900 | unsigned long arch_align_stack(unsigned long sp) | 917 | unsigned long arch_align_stack(unsigned long sp) |
@@ -903,3 +920,9 @@ unsigned long arch_align_stack(unsigned long sp) | |||
903 | sp -= get_random_int() % 8192; | 920 | sp -= get_random_int() % 8192; |
904 | return sp & ~0xf; | 921 | return sp & ~0xf; |
905 | } | 922 | } |
923 | |||
924 | unsigned long arch_randomize_brk(struct mm_struct *mm) | ||
925 | { | ||
926 | unsigned long range_end = mm->brk + 0x02000000; | ||
927 | return randomize_range(mm->brk, range_end, 0) ? : mm->brk; | ||
928 | } | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c new file mode 100644 index 000000000000..702c33efea84 --- /dev/null +++ b/arch/x86/kernel/ptrace.c | |||
@@ -0,0 +1,1566 @@ | |||
1 | /* By Ross Biro 1/23/92 */ | ||
2 | /* | ||
3 | * Pentium III FXSR, SSE support | ||
4 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
5 | * | ||
6 | * BTS tracing | ||
7 | * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007 | ||
8 | */ | ||
9 | |||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/sched.h> | ||
12 | #include <linux/mm.h> | ||
13 | #include <linux/smp.h> | ||
14 | #include <linux/errno.h> | ||
15 | #include <linux/ptrace.h> | ||
16 | #include <linux/regset.h> | ||
17 | #include <linux/user.h> | ||
18 | #include <linux/elf.h> | ||
19 | #include <linux/security.h> | ||
20 | #include <linux/audit.h> | ||
21 | #include <linux/seccomp.h> | ||
22 | #include <linux/signal.h> | ||
23 | |||
24 | #include <asm/uaccess.h> | ||
25 | #include <asm/pgtable.h> | ||
26 | #include <asm/system.h> | ||
27 | #include <asm/processor.h> | ||
28 | #include <asm/i387.h> | ||
29 | #include <asm/debugreg.h> | ||
30 | #include <asm/ldt.h> | ||
31 | #include <asm/desc.h> | ||
32 | #include <asm/prctl.h> | ||
33 | #include <asm/proto.h> | ||
34 | #include <asm/ds.h> | ||
35 | |||
36 | #include "tls.h" | ||
37 | |||
38 | enum x86_regset { | ||
39 | REGSET_GENERAL, | ||
40 | REGSET_FP, | ||
41 | REGSET_XFP, | ||
42 | REGSET_TLS, | ||
43 | }; | ||
44 | |||
45 | /* | ||
46 | * does not yet catch signals sent when the child dies. | ||
47 | * in exit.c or in signal.c. | ||
48 | */ | ||
49 | |||
50 | /* | ||
51 | * Determines which flags the user has access to [1 = access, 0 = no access]. | ||
52 | */ | ||
53 | #define FLAG_MASK_32 ((unsigned long) \ | ||
54 | (X86_EFLAGS_CF | X86_EFLAGS_PF | \ | ||
55 | X86_EFLAGS_AF | X86_EFLAGS_ZF | \ | ||
56 | X86_EFLAGS_SF | X86_EFLAGS_TF | \ | ||
57 | X86_EFLAGS_DF | X86_EFLAGS_OF | \ | ||
58 | X86_EFLAGS_RF | X86_EFLAGS_AC)) | ||
59 | |||
60 | /* | ||
61 | * Determines whether a value may be installed in a segment register. | ||
62 | */ | ||
63 | static inline bool invalid_selector(u16 value) | ||
64 | { | ||
65 | return unlikely(value != 0 && (value & SEGMENT_RPL_MASK) != USER_RPL); | ||
66 | } | ||
67 | |||
68 | #ifdef CONFIG_X86_32 | ||
69 | |||
70 | #define FLAG_MASK FLAG_MASK_32 | ||
71 | |||
72 | static long *pt_regs_access(struct pt_regs *regs, unsigned long regno) | ||
73 | { | ||
74 | BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); | ||
75 | regno >>= 2; | ||
76 | if (regno > FS) | ||
77 | --regno; | ||
78 | return ®s->bx + regno; | ||
79 | } | ||
80 | |||
81 | static u16 get_segment_reg(struct task_struct *task, unsigned long offset) | ||
82 | { | ||
83 | /* | ||
84 | * Returning the value truncates it to 16 bits. | ||
85 | */ | ||
86 | unsigned int retval; | ||
87 | if (offset != offsetof(struct user_regs_struct, gs)) | ||
88 | retval = *pt_regs_access(task_pt_regs(task), offset); | ||
89 | else { | ||
90 | retval = task->thread.gs; | ||
91 | if (task == current) | ||
92 | savesegment(gs, retval); | ||
93 | } | ||
94 | return retval; | ||
95 | } | ||
96 | |||
97 | static int set_segment_reg(struct task_struct *task, | ||
98 | unsigned long offset, u16 value) | ||
99 | { | ||
100 | /* | ||
101 | * The value argument was already truncated to 16 bits. | ||
102 | */ | ||
103 | if (invalid_selector(value)) | ||
104 | return -EIO; | ||
105 | |||
106 | /* | ||
107 | * For %cs and %ss we cannot permit a null selector. | ||
108 | * We can permit a bogus selector as long as it has USER_RPL. | ||
109 | * Null selectors are fine for other segment registers, but | ||
110 | * we will never get back to user mode with invalid %cs or %ss | ||
111 | * and will take the trap in iret instead. Much code relies | ||
112 | * on user_mode() to distinguish a user trap frame (which can | ||
113 | * safely use invalid selectors) from a kernel trap frame. | ||
114 | */ | ||
115 | switch (offset) { | ||
116 | case offsetof(struct user_regs_struct, cs): | ||
117 | case offsetof(struct user_regs_struct, ss): | ||
118 | if (unlikely(value == 0)) | ||
119 | return -EIO; | ||
120 | |||
121 | default: | ||
122 | *pt_regs_access(task_pt_regs(task), offset) = value; | ||
123 | break; | ||
124 | |||
125 | case offsetof(struct user_regs_struct, gs): | ||
126 | task->thread.gs = value; | ||
127 | if (task == current) | ||
128 | /* | ||
129 | * The user-mode %gs is not affected by | ||
130 | * kernel entry, so we must update the CPU. | ||
131 | */ | ||
132 | loadsegment(gs, value); | ||
133 | } | ||
134 | |||
135 | return 0; | ||
136 | } | ||
137 | |||
138 | static unsigned long debugreg_addr_limit(struct task_struct *task) | ||
139 | { | ||
140 | return TASK_SIZE - 3; | ||
141 | } | ||
142 | |||
143 | #else /* CONFIG_X86_64 */ | ||
144 | |||
145 | #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) | ||
146 | |||
147 | static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long offset) | ||
148 | { | ||
149 | BUILD_BUG_ON(offsetof(struct pt_regs, r15) != 0); | ||
150 | return ®s->r15 + (offset / sizeof(regs->r15)); | ||
151 | } | ||
152 | |||
153 | static u16 get_segment_reg(struct task_struct *task, unsigned long offset) | ||
154 | { | ||
155 | /* | ||
156 | * Returning the value truncates it to 16 bits. | ||
157 | */ | ||
158 | unsigned int seg; | ||
159 | |||
160 | switch (offset) { | ||
161 | case offsetof(struct user_regs_struct, fs): | ||
162 | if (task == current) { | ||
163 | /* Older gas can't assemble movq %?s,%r?? */ | ||
164 | asm("movl %%fs,%0" : "=r" (seg)); | ||
165 | return seg; | ||
166 | } | ||
167 | return task->thread.fsindex; | ||
168 | case offsetof(struct user_regs_struct, gs): | ||
169 | if (task == current) { | ||
170 | asm("movl %%gs,%0" : "=r" (seg)); | ||
171 | return seg; | ||
172 | } | ||
173 | return task->thread.gsindex; | ||
174 | case offsetof(struct user_regs_struct, ds): | ||
175 | if (task == current) { | ||
176 | asm("movl %%ds,%0" : "=r" (seg)); | ||
177 | return seg; | ||
178 | } | ||
179 | return task->thread.ds; | ||
180 | case offsetof(struct user_regs_struct, es): | ||
181 | if (task == current) { | ||
182 | asm("movl %%es,%0" : "=r" (seg)); | ||
183 | return seg; | ||
184 | } | ||
185 | return task->thread.es; | ||
186 | |||
187 | case offsetof(struct user_regs_struct, cs): | ||
188 | case offsetof(struct user_regs_struct, ss): | ||
189 | break; | ||
190 | } | ||
191 | return *pt_regs_access(task_pt_regs(task), offset); | ||
192 | } | ||
193 | |||
194 | static int set_segment_reg(struct task_struct *task, | ||
195 | unsigned long offset, u16 value) | ||
196 | { | ||
197 | /* | ||
198 | * The value argument was already truncated to 16 bits. | ||
199 | */ | ||
200 | if (invalid_selector(value)) | ||
201 | return -EIO; | ||
202 | |||
203 | switch (offset) { | ||
204 | case offsetof(struct user_regs_struct,fs): | ||
205 | /* | ||
206 | * If this is setting fs as for normal 64-bit use but | ||
207 | * setting fs_base has implicitly changed it, leave it. | ||
208 | */ | ||
209 | if ((value == FS_TLS_SEL && task->thread.fsindex == 0 && | ||
210 | task->thread.fs != 0) || | ||
211 | (value == 0 && task->thread.fsindex == FS_TLS_SEL && | ||
212 | task->thread.fs == 0)) | ||
213 | break; | ||
214 | task->thread.fsindex = value; | ||
215 | if (task == current) | ||
216 | loadsegment(fs, task->thread.fsindex); | ||
217 | break; | ||
218 | case offsetof(struct user_regs_struct,gs): | ||
219 | /* | ||
220 | * If this is setting gs as for normal 64-bit use but | ||
221 | * setting gs_base has implicitly changed it, leave it. | ||
222 | */ | ||
223 | if ((value == GS_TLS_SEL && task->thread.gsindex == 0 && | ||
224 | task->thread.gs != 0) || | ||
225 | (value == 0 && task->thread.gsindex == GS_TLS_SEL && | ||
226 | task->thread.gs == 0)) | ||
227 | break; | ||
228 | task->thread.gsindex = value; | ||
229 | if (task == current) | ||
230 | load_gs_index(task->thread.gsindex); | ||
231 | break; | ||
232 | case offsetof(struct user_regs_struct,ds): | ||
233 | task->thread.ds = value; | ||
234 | if (task == current) | ||
235 | loadsegment(ds, task->thread.ds); | ||
236 | break; | ||
237 | case offsetof(struct user_regs_struct,es): | ||
238 | task->thread.es = value; | ||
239 | if (task == current) | ||
240 | loadsegment(es, task->thread.es); | ||
241 | break; | ||
242 | |||
243 | /* | ||
244 | * Can't actually change these in 64-bit mode. | ||
245 | */ | ||
246 | case offsetof(struct user_regs_struct,cs): | ||
247 | if (unlikely(value == 0)) | ||
248 | return -EIO; | ||
249 | #ifdef CONFIG_IA32_EMULATION | ||
250 | if (test_tsk_thread_flag(task, TIF_IA32)) | ||
251 | task_pt_regs(task)->cs = value; | ||
252 | #endif | ||
253 | break; | ||
254 | case offsetof(struct user_regs_struct,ss): | ||
255 | if (unlikely(value == 0)) | ||
256 | return -EIO; | ||
257 | #ifdef CONFIG_IA32_EMULATION | ||
258 | if (test_tsk_thread_flag(task, TIF_IA32)) | ||
259 | task_pt_regs(task)->ss = value; | ||
260 | #endif | ||
261 | break; | ||
262 | } | ||
263 | |||
264 | return 0; | ||
265 | } | ||
266 | |||
267 | static unsigned long debugreg_addr_limit(struct task_struct *task) | ||
268 | { | ||
269 | #ifdef CONFIG_IA32_EMULATION | ||
270 | if (test_tsk_thread_flag(task, TIF_IA32)) | ||
271 | return IA32_PAGE_OFFSET - 3; | ||
272 | #endif | ||
273 | return TASK_SIZE64 - 7; | ||
274 | } | ||
275 | |||
276 | #endif /* CONFIG_X86_32 */ | ||
277 | |||
278 | static unsigned long get_flags(struct task_struct *task) | ||
279 | { | ||
280 | unsigned long retval = task_pt_regs(task)->flags; | ||
281 | |||
282 | /* | ||
283 | * If the debugger set TF, hide it from the readout. | ||
284 | */ | ||
285 | if (test_tsk_thread_flag(task, TIF_FORCED_TF)) | ||
286 | retval &= ~X86_EFLAGS_TF; | ||
287 | |||
288 | return retval; | ||
289 | } | ||
290 | |||
291 | static int set_flags(struct task_struct *task, unsigned long value) | ||
292 | { | ||
293 | struct pt_regs *regs = task_pt_regs(task); | ||
294 | |||
295 | /* | ||
296 | * If the user value contains TF, mark that | ||
297 | * it was not "us" (the debugger) that set it. | ||
298 | * If not, make sure it stays set if we had. | ||
299 | */ | ||
300 | if (value & X86_EFLAGS_TF) | ||
301 | clear_tsk_thread_flag(task, TIF_FORCED_TF); | ||
302 | else if (test_tsk_thread_flag(task, TIF_FORCED_TF)) | ||
303 | value |= X86_EFLAGS_TF; | ||
304 | |||
305 | regs->flags = (regs->flags & ~FLAG_MASK) | (value & FLAG_MASK); | ||
306 | |||
307 | return 0; | ||
308 | } | ||
309 | |||
310 | static int putreg(struct task_struct *child, | ||
311 | unsigned long offset, unsigned long value) | ||
312 | { | ||
313 | switch (offset) { | ||
314 | case offsetof(struct user_regs_struct, cs): | ||
315 | case offsetof(struct user_regs_struct, ds): | ||
316 | case offsetof(struct user_regs_struct, es): | ||
317 | case offsetof(struct user_regs_struct, fs): | ||
318 | case offsetof(struct user_regs_struct, gs): | ||
319 | case offsetof(struct user_regs_struct, ss): | ||
320 | return set_segment_reg(child, offset, value); | ||
321 | |||
322 | case offsetof(struct user_regs_struct, flags): | ||
323 | return set_flags(child, value); | ||
324 | |||
325 | #ifdef CONFIG_X86_64 | ||
326 | case offsetof(struct user_regs_struct,fs_base): | ||
327 | if (value >= TASK_SIZE_OF(child)) | ||
328 | return -EIO; | ||
329 | /* | ||
330 | * When changing the segment base, use do_arch_prctl | ||
331 | * to set either thread.fs or thread.fsindex and the | ||
332 | * corresponding GDT slot. | ||
333 | */ | ||
334 | if (child->thread.fs != value) | ||
335 | return do_arch_prctl(child, ARCH_SET_FS, value); | ||
336 | return 0; | ||
337 | case offsetof(struct user_regs_struct,gs_base): | ||
338 | /* | ||
339 | * Exactly the same here as the %fs handling above. | ||
340 | */ | ||
341 | if (value >= TASK_SIZE_OF(child)) | ||
342 | return -EIO; | ||
343 | if (child->thread.gs != value) | ||
344 | return do_arch_prctl(child, ARCH_SET_GS, value); | ||
345 | return 0; | ||
346 | #endif | ||
347 | } | ||
348 | |||
349 | *pt_regs_access(task_pt_regs(child), offset) = value; | ||
350 | return 0; | ||
351 | } | ||
352 | |||
353 | static unsigned long getreg(struct task_struct *task, unsigned long offset) | ||
354 | { | ||
355 | switch (offset) { | ||
356 | case offsetof(struct user_regs_struct, cs): | ||
357 | case offsetof(struct user_regs_struct, ds): | ||
358 | case offsetof(struct user_regs_struct, es): | ||
359 | case offsetof(struct user_regs_struct, fs): | ||
360 | case offsetof(struct user_regs_struct, gs): | ||
361 | case offsetof(struct user_regs_struct, ss): | ||
362 | return get_segment_reg(task, offset); | ||
363 | |||
364 | case offsetof(struct user_regs_struct, flags): | ||
365 | return get_flags(task); | ||
366 | |||
367 | #ifdef CONFIG_X86_64 | ||
368 | case offsetof(struct user_regs_struct, fs_base): { | ||
369 | /* | ||
370 | * do_arch_prctl may have used a GDT slot instead of | ||
371 | * the MSR. To userland, it appears the same either | ||
372 | * way, except the %fs segment selector might not be 0. | ||
373 | */ | ||
374 | unsigned int seg = task->thread.fsindex; | ||
375 | if (task->thread.fs != 0) | ||
376 | return task->thread.fs; | ||
377 | if (task == current) | ||
378 | asm("movl %%fs,%0" : "=r" (seg)); | ||
379 | if (seg != FS_TLS_SEL) | ||
380 | return 0; | ||
381 | return get_desc_base(&task->thread.tls_array[FS_TLS]); | ||
382 | } | ||
383 | case offsetof(struct user_regs_struct, gs_base): { | ||
384 | /* | ||
385 | * Exactly the same here as the %fs handling above. | ||
386 | */ | ||
387 | unsigned int seg = task->thread.gsindex; | ||
388 | if (task->thread.gs != 0) | ||
389 | return task->thread.gs; | ||
390 | if (task == current) | ||
391 | asm("movl %%gs,%0" : "=r" (seg)); | ||
392 | if (seg != GS_TLS_SEL) | ||
393 | return 0; | ||
394 | return get_desc_base(&task->thread.tls_array[GS_TLS]); | ||
395 | } | ||
396 | #endif | ||
397 | } | ||
398 | |||
399 | return *pt_regs_access(task_pt_regs(task), offset); | ||
400 | } | ||
401 | |||
402 | static int genregs_get(struct task_struct *target, | ||
403 | const struct user_regset *regset, | ||
404 | unsigned int pos, unsigned int count, | ||
405 | void *kbuf, void __user *ubuf) | ||
406 | { | ||
407 | if (kbuf) { | ||
408 | unsigned long *k = kbuf; | ||
409 | while (count > 0) { | ||
410 | *k++ = getreg(target, pos); | ||
411 | count -= sizeof(*k); | ||
412 | pos += sizeof(*k); | ||
413 | } | ||
414 | } else { | ||
415 | unsigned long __user *u = ubuf; | ||
416 | while (count > 0) { | ||
417 | if (__put_user(getreg(target, pos), u++)) | ||
418 | return -EFAULT; | ||
419 | count -= sizeof(*u); | ||
420 | pos += sizeof(*u); | ||
421 | } | ||
422 | } | ||
423 | |||
424 | return 0; | ||
425 | } | ||
426 | |||
427 | static int genregs_set(struct task_struct *target, | ||
428 | const struct user_regset *regset, | ||
429 | unsigned int pos, unsigned int count, | ||
430 | const void *kbuf, const void __user *ubuf) | ||
431 | { | ||
432 | int ret = 0; | ||
433 | if (kbuf) { | ||
434 | const unsigned long *k = kbuf; | ||
435 | while (count > 0 && !ret) { | ||
436 | ret = putreg(target, pos, *k++); | ||
437 | count -= sizeof(*k); | ||
438 | pos += sizeof(*k); | ||
439 | } | ||
440 | } else { | ||
441 | const unsigned long __user *u = ubuf; | ||
442 | while (count > 0 && !ret) { | ||
443 | unsigned long word; | ||
444 | ret = __get_user(word, u++); | ||
445 | if (ret) | ||
446 | break; | ||
447 | ret = putreg(target, pos, word); | ||
448 | count -= sizeof(*u); | ||
449 | pos += sizeof(*u); | ||
450 | } | ||
451 | } | ||
452 | return ret; | ||
453 | } | ||
454 | |||
455 | /* | ||
456 | * This function is trivial and will be inlined by the compiler. | ||
457 | * Having it separates the implementation details of debug | ||
458 | * registers from the interface details of ptrace. | ||
459 | */ | ||
460 | static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) | ||
461 | { | ||
462 | switch (n) { | ||
463 | case 0: return child->thread.debugreg0; | ||
464 | case 1: return child->thread.debugreg1; | ||
465 | case 2: return child->thread.debugreg2; | ||
466 | case 3: return child->thread.debugreg3; | ||
467 | case 6: return child->thread.debugreg6; | ||
468 | case 7: return child->thread.debugreg7; | ||
469 | } | ||
470 | return 0; | ||
471 | } | ||
472 | |||
473 | static int ptrace_set_debugreg(struct task_struct *child, | ||
474 | int n, unsigned long data) | ||
475 | { | ||
476 | int i; | ||
477 | |||
478 | if (unlikely(n == 4 || n == 5)) | ||
479 | return -EIO; | ||
480 | |||
481 | if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) | ||
482 | return -EIO; | ||
483 | |||
484 | switch (n) { | ||
485 | case 0: child->thread.debugreg0 = data; break; | ||
486 | case 1: child->thread.debugreg1 = data; break; | ||
487 | case 2: child->thread.debugreg2 = data; break; | ||
488 | case 3: child->thread.debugreg3 = data; break; | ||
489 | |||
490 | case 6: | ||
491 | if ((data & ~0xffffffffUL) != 0) | ||
492 | return -EIO; | ||
493 | child->thread.debugreg6 = data; | ||
494 | break; | ||
495 | |||
496 | case 7: | ||
497 | /* | ||
498 | * Sanity-check data. Take one half-byte at once with | ||
499 | * check = (val >> (16 + 4*i)) & 0xf. It contains the | ||
500 | * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits | ||
501 | * 2 and 3 are LENi. Given a list of invalid values, | ||
502 | * we do mask |= 1 << invalid_value, so that | ||
503 | * (mask >> check) & 1 is a correct test for invalid | ||
504 | * values. | ||
505 | * | ||
506 | * R/Wi contains the type of the breakpoint / | ||
507 | * watchpoint, LENi contains the length of the watched | ||
508 | * data in the watchpoint case. | ||
509 | * | ||
510 | * The invalid values are: | ||
511 | * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] | ||
512 | * - R/Wi == 0x10 (break on I/O reads or writes), so | ||
513 | * mask |= 0x4444. | ||
514 | * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= | ||
515 | * 0x1110. | ||
516 | * | ||
517 | * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. | ||
518 | * | ||
519 | * See the Intel Manual "System Programming Guide", | ||
520 | * 15.2.4 | ||
521 | * | ||
522 | * Note that LENi == 0x10 is defined on x86_64 in long | ||
523 | * mode (i.e. even for 32-bit userspace software, but | ||
524 | * 64-bit kernel), so the x86_64 mask value is 0x5454. | ||
525 | * See the AMD manual no. 24593 (AMD64 System Programming) | ||
526 | */ | ||
527 | #ifdef CONFIG_X86_32 | ||
528 | #define DR7_MASK 0x5f54 | ||
529 | #else | ||
530 | #define DR7_MASK 0x5554 | ||
531 | #endif | ||
532 | data &= ~DR_CONTROL_RESERVED; | ||
533 | for (i = 0; i < 4; i++) | ||
534 | if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) | ||
535 | return -EIO; | ||
536 | child->thread.debugreg7 = data; | ||
537 | if (data) | ||
538 | set_tsk_thread_flag(child, TIF_DEBUG); | ||
539 | else | ||
540 | clear_tsk_thread_flag(child, TIF_DEBUG); | ||
541 | break; | ||
542 | } | ||
543 | |||
544 | return 0; | ||
545 | } | ||
546 | |||
547 | static int ptrace_bts_get_size(struct task_struct *child) | ||
548 | { | ||
549 | if (!child->thread.ds_area_msr) | ||
550 | return -ENXIO; | ||
551 | |||
552 | return ds_get_bts_index((void *)child->thread.ds_area_msr); | ||
553 | } | ||
554 | |||
555 | static int ptrace_bts_read_record(struct task_struct *child, | ||
556 | long index, | ||
557 | struct bts_struct __user *out) | ||
558 | { | ||
559 | struct bts_struct ret; | ||
560 | int retval; | ||
561 | int bts_end; | ||
562 | int bts_index; | ||
563 | |||
564 | if (!child->thread.ds_area_msr) | ||
565 | return -ENXIO; | ||
566 | |||
567 | if (index < 0) | ||
568 | return -EINVAL; | ||
569 | |||
570 | bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr); | ||
571 | if (bts_end <= index) | ||
572 | return -EINVAL; | ||
573 | |||
574 | /* translate the ptrace bts index into the ds bts index */ | ||
575 | bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr); | ||
576 | bts_index -= (index + 1); | ||
577 | if (bts_index < 0) | ||
578 | bts_index += bts_end; | ||
579 | |||
580 | retval = ds_read_bts((void *)child->thread.ds_area_msr, | ||
581 | bts_index, &ret); | ||
582 | if (retval < 0) | ||
583 | return retval; | ||
584 | |||
585 | if (copy_to_user(out, &ret, sizeof(ret))) | ||
586 | return -EFAULT; | ||
587 | |||
588 | return sizeof(ret); | ||
589 | } | ||
590 | |||
591 | static int ptrace_bts_write_record(struct task_struct *child, | ||
592 | const struct bts_struct *in) | ||
593 | { | ||
594 | int retval; | ||
595 | |||
596 | if (!child->thread.ds_area_msr) | ||
597 | return -ENXIO; | ||
598 | |||
599 | retval = ds_write_bts((void *)child->thread.ds_area_msr, in); | ||
600 | if (retval) | ||
601 | return retval; | ||
602 | |||
603 | return sizeof(*in); | ||
604 | } | ||
605 | |||
606 | static int ptrace_bts_clear(struct task_struct *child) | ||
607 | { | ||
608 | if (!child->thread.ds_area_msr) | ||
609 | return -ENXIO; | ||
610 | |||
611 | return ds_clear((void *)child->thread.ds_area_msr); | ||
612 | } | ||
613 | |||
614 | static int ptrace_bts_drain(struct task_struct *child, | ||
615 | long size, | ||
616 | struct bts_struct __user *out) | ||
617 | { | ||
618 | int end, i; | ||
619 | void *ds = (void *)child->thread.ds_area_msr; | ||
620 | |||
621 | if (!ds) | ||
622 | return -ENXIO; | ||
623 | |||
624 | end = ds_get_bts_index(ds); | ||
625 | if (end <= 0) | ||
626 | return end; | ||
627 | |||
628 | if (size < (end * sizeof(struct bts_struct))) | ||
629 | return -EIO; | ||
630 | |||
631 | for (i = 0; i < end; i++, out++) { | ||
632 | struct bts_struct ret; | ||
633 | int retval; | ||
634 | |||
635 | retval = ds_read_bts(ds, i, &ret); | ||
636 | if (retval < 0) | ||
637 | return retval; | ||
638 | |||
639 | if (copy_to_user(out, &ret, sizeof(ret))) | ||
640 | return -EFAULT; | ||
641 | } | ||
642 | |||
643 | ds_clear(ds); | ||
644 | |||
645 | return end; | ||
646 | } | ||
647 | |||
648 | static int ptrace_bts_realloc(struct task_struct *child, | ||
649 | int size, int reduce_size) | ||
650 | { | ||
651 | unsigned long rlim, vm; | ||
652 | int ret, old_size; | ||
653 | |||
654 | if (size < 0) | ||
655 | return -EINVAL; | ||
656 | |||
657 | old_size = ds_get_bts_size((void *)child->thread.ds_area_msr); | ||
658 | if (old_size < 0) | ||
659 | return old_size; | ||
660 | |||
661 | ret = ds_free((void **)&child->thread.ds_area_msr); | ||
662 | if (ret < 0) | ||
663 | goto out; | ||
664 | |||
665 | size >>= PAGE_SHIFT; | ||
666 | old_size >>= PAGE_SHIFT; | ||
667 | |||
668 | current->mm->total_vm -= old_size; | ||
669 | current->mm->locked_vm -= old_size; | ||
670 | |||
671 | if (size == 0) | ||
672 | goto out; | ||
673 | |||
674 | rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; | ||
675 | vm = current->mm->total_vm + size; | ||
676 | if (rlim < vm) { | ||
677 | ret = -ENOMEM; | ||
678 | |||
679 | if (!reduce_size) | ||
680 | goto out; | ||
681 | |||
682 | size = rlim - current->mm->total_vm; | ||
683 | if (size <= 0) | ||
684 | goto out; | ||
685 | } | ||
686 | |||
687 | rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | ||
688 | vm = current->mm->locked_vm + size; | ||
689 | if (rlim < vm) { | ||
690 | ret = -ENOMEM; | ||
691 | |||
692 | if (!reduce_size) | ||
693 | goto out; | ||
694 | |||
695 | size = rlim - current->mm->locked_vm; | ||
696 | if (size <= 0) | ||
697 | goto out; | ||
698 | } | ||
699 | |||
700 | ret = ds_allocate((void **)&child->thread.ds_area_msr, | ||
701 | size << PAGE_SHIFT); | ||
702 | if (ret < 0) | ||
703 | goto out; | ||
704 | |||
705 | current->mm->total_vm += size; | ||
706 | current->mm->locked_vm += size; | ||
707 | |||
708 | out: | ||
709 | if (child->thread.ds_area_msr) | ||
710 | set_tsk_thread_flag(child, TIF_DS_AREA_MSR); | ||
711 | else | ||
712 | clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); | ||
713 | |||
714 | return ret; | ||
715 | } | ||
716 | |||
717 | static int ptrace_bts_config(struct task_struct *child, | ||
718 | long cfg_size, | ||
719 | const struct ptrace_bts_config __user *ucfg) | ||
720 | { | ||
721 | struct ptrace_bts_config cfg; | ||
722 | int bts_size, ret = 0; | ||
723 | void *ds; | ||
724 | |||
725 | if (cfg_size < sizeof(cfg)) | ||
726 | return -EIO; | ||
727 | |||
728 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) | ||
729 | return -EFAULT; | ||
730 | |||
731 | if ((int)cfg.size < 0) | ||
732 | return -EINVAL; | ||
733 | |||
734 | bts_size = 0; | ||
735 | ds = (void *)child->thread.ds_area_msr; | ||
736 | if (ds) { | ||
737 | bts_size = ds_get_bts_size(ds); | ||
738 | if (bts_size < 0) | ||
739 | return bts_size; | ||
740 | } | ||
741 | cfg.size = PAGE_ALIGN(cfg.size); | ||
742 | |||
743 | if (bts_size != cfg.size) { | ||
744 | ret = ptrace_bts_realloc(child, cfg.size, | ||
745 | cfg.flags & PTRACE_BTS_O_CUT_SIZE); | ||
746 | if (ret < 0) | ||
747 | goto errout; | ||
748 | |||
749 | ds = (void *)child->thread.ds_area_msr; | ||
750 | } | ||
751 | |||
752 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) | ||
753 | ret = ds_set_overflow(ds, DS_O_SIGNAL); | ||
754 | else | ||
755 | ret = ds_set_overflow(ds, DS_O_WRAP); | ||
756 | if (ret < 0) | ||
757 | goto errout; | ||
758 | |||
759 | if (cfg.flags & PTRACE_BTS_O_TRACE) | ||
760 | child->thread.debugctlmsr |= ds_debugctl_mask(); | ||
761 | else | ||
762 | child->thread.debugctlmsr &= ~ds_debugctl_mask(); | ||
763 | |||
764 | if (cfg.flags & PTRACE_BTS_O_SCHED) | ||
765 | set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | ||
766 | else | ||
767 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | ||
768 | |||
769 | ret = sizeof(cfg); | ||
770 | |||
771 | out: | ||
772 | if (child->thread.debugctlmsr) | ||
773 | set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
774 | else | ||
775 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
776 | |||
777 | return ret; | ||
778 | |||
779 | errout: | ||
780 | child->thread.debugctlmsr &= ~ds_debugctl_mask(); | ||
781 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | ||
782 | goto out; | ||
783 | } | ||
784 | |||
785 | static int ptrace_bts_status(struct task_struct *child, | ||
786 | long cfg_size, | ||
787 | struct ptrace_bts_config __user *ucfg) | ||
788 | { | ||
789 | void *ds = (void *)child->thread.ds_area_msr; | ||
790 | struct ptrace_bts_config cfg; | ||
791 | |||
792 | if (cfg_size < sizeof(cfg)) | ||
793 | return -EIO; | ||
794 | |||
795 | memset(&cfg, 0, sizeof(cfg)); | ||
796 | |||
797 | if (ds) { | ||
798 | cfg.size = ds_get_bts_size(ds); | ||
799 | |||
800 | if (ds_get_overflow(ds) == DS_O_SIGNAL) | ||
801 | cfg.flags |= PTRACE_BTS_O_SIGNAL; | ||
802 | |||
803 | if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && | ||
804 | child->thread.debugctlmsr & ds_debugctl_mask()) | ||
805 | cfg.flags |= PTRACE_BTS_O_TRACE; | ||
806 | |||
807 | if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) | ||
808 | cfg.flags |= PTRACE_BTS_O_SCHED; | ||
809 | } | ||
810 | |||
811 | cfg.bts_size = sizeof(struct bts_struct); | ||
812 | |||
813 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) | ||
814 | return -EFAULT; | ||
815 | |||
816 | return sizeof(cfg); | ||
817 | } | ||
818 | |||
819 | void ptrace_bts_take_timestamp(struct task_struct *tsk, | ||
820 | enum bts_qualifier qualifier) | ||
821 | { | ||
822 | struct bts_struct rec = { | ||
823 | .qualifier = qualifier, | ||
824 | .variant.jiffies = jiffies_64 | ||
825 | }; | ||
826 | |||
827 | ptrace_bts_write_record(tsk, &rec); | ||
828 | } | ||
829 | |||
830 | /* | ||
831 | * Called by kernel/ptrace.c when detaching.. | ||
832 | * | ||
833 | * Make sure the single step bit is not set. | ||
834 | */ | ||
835 | void ptrace_disable(struct task_struct *child) | ||
836 | { | ||
837 | user_disable_single_step(child); | ||
838 | #ifdef TIF_SYSCALL_EMU | ||
839 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); | ||
840 | #endif | ||
841 | if (child->thread.ds_area_msr) { | ||
842 | ptrace_bts_realloc(child, 0, 0); | ||
843 | child->thread.debugctlmsr &= ~ds_debugctl_mask(); | ||
844 | if (!child->thread.debugctlmsr) | ||
845 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
846 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | ||
847 | } | ||
848 | } | ||
849 | |||
850 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | ||
851 | static const struct user_regset_view user_x86_32_view; /* Initialized below. */ | ||
852 | #endif | ||
853 | |||
854 | long arch_ptrace(struct task_struct *child, long request, long addr, long data) | ||
855 | { | ||
856 | int ret; | ||
857 | unsigned long __user *datap = (unsigned long __user *)data; | ||
858 | |||
859 | switch (request) { | ||
860 | /* read the word at location addr in the USER area. */ | ||
861 | case PTRACE_PEEKUSR: { | ||
862 | unsigned long tmp; | ||
863 | |||
864 | ret = -EIO; | ||
865 | if ((addr & (sizeof(data) - 1)) || addr < 0 || | ||
866 | addr >= sizeof(struct user)) | ||
867 | break; | ||
868 | |||
869 | tmp = 0; /* Default return condition */ | ||
870 | if (addr < sizeof(struct user_regs_struct)) | ||
871 | tmp = getreg(child, addr); | ||
872 | else if (addr >= offsetof(struct user, u_debugreg[0]) && | ||
873 | addr <= offsetof(struct user, u_debugreg[7])) { | ||
874 | addr -= offsetof(struct user, u_debugreg[0]); | ||
875 | tmp = ptrace_get_debugreg(child, addr / sizeof(data)); | ||
876 | } | ||
877 | ret = put_user(tmp, datap); | ||
878 | break; | ||
879 | } | ||
880 | |||
881 | case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ | ||
882 | ret = -EIO; | ||
883 | if ((addr & (sizeof(data) - 1)) || addr < 0 || | ||
884 | addr >= sizeof(struct user)) | ||
885 | break; | ||
886 | |||
887 | if (addr < sizeof(struct user_regs_struct)) | ||
888 | ret = putreg(child, addr, data); | ||
889 | else if (addr >= offsetof(struct user, u_debugreg[0]) && | ||
890 | addr <= offsetof(struct user, u_debugreg[7])) { | ||
891 | addr -= offsetof(struct user, u_debugreg[0]); | ||
892 | ret = ptrace_set_debugreg(child, | ||
893 | addr / sizeof(data), data); | ||
894 | } | ||
895 | break; | ||
896 | |||
897 | case PTRACE_GETREGS: /* Get all gp regs from the child. */ | ||
898 | return copy_regset_to_user(child, | ||
899 | task_user_regset_view(current), | ||
900 | REGSET_GENERAL, | ||
901 | 0, sizeof(struct user_regs_struct), | ||
902 | datap); | ||
903 | |||
904 | case PTRACE_SETREGS: /* Set all gp regs in the child. */ | ||
905 | return copy_regset_from_user(child, | ||
906 | task_user_regset_view(current), | ||
907 | REGSET_GENERAL, | ||
908 | 0, sizeof(struct user_regs_struct), | ||
909 | datap); | ||
910 | |||
911 | case PTRACE_GETFPREGS: /* Get the child FPU state. */ | ||
912 | return copy_regset_to_user(child, | ||
913 | task_user_regset_view(current), | ||
914 | REGSET_FP, | ||
915 | 0, sizeof(struct user_i387_struct), | ||
916 | datap); | ||
917 | |||
918 | case PTRACE_SETFPREGS: /* Set the child FPU state. */ | ||
919 | return copy_regset_from_user(child, | ||
920 | task_user_regset_view(current), | ||
921 | REGSET_FP, | ||
922 | 0, sizeof(struct user_i387_struct), | ||
923 | datap); | ||
924 | |||
925 | #ifdef CONFIG_X86_32 | ||
926 | case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ | ||
927 | return copy_regset_to_user(child, &user_x86_32_view, | ||
928 | REGSET_XFP, | ||
929 | 0, sizeof(struct user_fxsr_struct), | ||
930 | datap); | ||
931 | |||
932 | case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ | ||
933 | return copy_regset_from_user(child, &user_x86_32_view, | ||
934 | REGSET_XFP, | ||
935 | 0, sizeof(struct user_fxsr_struct), | ||
936 | datap); | ||
937 | #endif | ||
938 | |||
939 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | ||
940 | case PTRACE_GET_THREAD_AREA: | ||
941 | if (addr < 0) | ||
942 | return -EIO; | ||
943 | ret = do_get_thread_area(child, addr, | ||
944 | (struct user_desc __user *) data); | ||
945 | break; | ||
946 | |||
947 | case PTRACE_SET_THREAD_AREA: | ||
948 | if (addr < 0) | ||
949 | return -EIO; | ||
950 | ret = do_set_thread_area(child, addr, | ||
951 | (struct user_desc __user *) data, 0); | ||
952 | break; | ||
953 | #endif | ||
954 | |||
955 | #ifdef CONFIG_X86_64 | ||
956 | /* normal 64bit interface to access TLS data. | ||
957 | Works just like arch_prctl, except that the arguments | ||
958 | are reversed. */ | ||
959 | case PTRACE_ARCH_PRCTL: | ||
960 | ret = do_arch_prctl(child, data, addr); | ||
961 | break; | ||
962 | #endif | ||
963 | |||
964 | case PTRACE_BTS_CONFIG: | ||
965 | ret = ptrace_bts_config | ||
966 | (child, data, (struct ptrace_bts_config __user *)addr); | ||
967 | break; | ||
968 | |||
969 | case PTRACE_BTS_STATUS: | ||
970 | ret = ptrace_bts_status | ||
971 | (child, data, (struct ptrace_bts_config __user *)addr); | ||
972 | break; | ||
973 | |||
974 | case PTRACE_BTS_SIZE: | ||
975 | ret = ptrace_bts_get_size(child); | ||
976 | break; | ||
977 | |||
978 | case PTRACE_BTS_GET: | ||
979 | ret = ptrace_bts_read_record | ||
980 | (child, data, (struct bts_struct __user *) addr); | ||
981 | break; | ||
982 | |||
983 | case PTRACE_BTS_CLEAR: | ||
984 | ret = ptrace_bts_clear(child); | ||
985 | break; | ||
986 | |||
987 | case PTRACE_BTS_DRAIN: | ||
988 | ret = ptrace_bts_drain | ||
989 | (child, data, (struct bts_struct __user *) addr); | ||
990 | break; | ||
991 | |||
992 | default: | ||
993 | ret = ptrace_request(child, request, addr, data); | ||
994 | break; | ||
995 | } | ||
996 | |||
997 | return ret; | ||
998 | } | ||
999 | |||
1000 | #ifdef CONFIG_IA32_EMULATION | ||
1001 | |||
1002 | #include <linux/compat.h> | ||
1003 | #include <linux/syscalls.h> | ||
1004 | #include <asm/ia32.h> | ||
1005 | #include <asm/user32.h> | ||
1006 | |||
1007 | #define R32(l,q) \ | ||
1008 | case offsetof(struct user32, regs.l): \ | ||
1009 | regs->q = value; break | ||
1010 | |||
1011 | #define SEG32(rs) \ | ||
1012 | case offsetof(struct user32, regs.rs): \ | ||
1013 | return set_segment_reg(child, \ | ||
1014 | offsetof(struct user_regs_struct, rs), \ | ||
1015 | value); \ | ||
1016 | break | ||
1017 | |||
1018 | static int putreg32(struct task_struct *child, unsigned regno, u32 value) | ||
1019 | { | ||
1020 | struct pt_regs *regs = task_pt_regs(child); | ||
1021 | |||
1022 | switch (regno) { | ||
1023 | |||
1024 | SEG32(cs); | ||
1025 | SEG32(ds); | ||
1026 | SEG32(es); | ||
1027 | SEG32(fs); | ||
1028 | SEG32(gs); | ||
1029 | SEG32(ss); | ||
1030 | |||
1031 | R32(ebx, bx); | ||
1032 | R32(ecx, cx); | ||
1033 | R32(edx, dx); | ||
1034 | R32(edi, di); | ||
1035 | R32(esi, si); | ||
1036 | R32(ebp, bp); | ||
1037 | R32(eax, ax); | ||
1038 | R32(orig_eax, orig_ax); | ||
1039 | R32(eip, ip); | ||
1040 | R32(esp, sp); | ||
1041 | |||
1042 | case offsetof(struct user32, regs.eflags): | ||
1043 | return set_flags(child, value); | ||
1044 | |||
1045 | case offsetof(struct user32, u_debugreg[0]) ... | ||
1046 | offsetof(struct user32, u_debugreg[7]): | ||
1047 | regno -= offsetof(struct user32, u_debugreg[0]); | ||
1048 | return ptrace_set_debugreg(child, regno / 4, value); | ||
1049 | |||
1050 | default: | ||
1051 | if (regno > sizeof(struct user32) || (regno & 3)) | ||
1052 | return -EIO; | ||
1053 | |||
1054 | /* | ||
1055 | * Other dummy fields in the virtual user structure | ||
1056 | * are ignored | ||
1057 | */ | ||
1058 | break; | ||
1059 | } | ||
1060 | return 0; | ||
1061 | } | ||
1062 | |||
1063 | #undef R32 | ||
1064 | #undef SEG32 | ||
1065 | |||
1066 | #define R32(l,q) \ | ||
1067 | case offsetof(struct user32, regs.l): \ | ||
1068 | *val = regs->q; break | ||
1069 | |||
1070 | #define SEG32(rs) \ | ||
1071 | case offsetof(struct user32, regs.rs): \ | ||
1072 | *val = get_segment_reg(child, \ | ||
1073 | offsetof(struct user_regs_struct, rs)); \ | ||
1074 | break | ||
1075 | |||
1076 | static int getreg32(struct task_struct *child, unsigned regno, u32 *val) | ||
1077 | { | ||
1078 | struct pt_regs *regs = task_pt_regs(child); | ||
1079 | |||
1080 | switch (regno) { | ||
1081 | |||
1082 | SEG32(ds); | ||
1083 | SEG32(es); | ||
1084 | SEG32(fs); | ||
1085 | SEG32(gs); | ||
1086 | |||
1087 | R32(cs, cs); | ||
1088 | R32(ss, ss); | ||
1089 | R32(ebx, bx); | ||
1090 | R32(ecx, cx); | ||
1091 | R32(edx, dx); | ||
1092 | R32(edi, di); | ||
1093 | R32(esi, si); | ||
1094 | R32(ebp, bp); | ||
1095 | R32(eax, ax); | ||
1096 | R32(orig_eax, orig_ax); | ||
1097 | R32(eip, ip); | ||
1098 | R32(esp, sp); | ||
1099 | |||
1100 | case offsetof(struct user32, regs.eflags): | ||
1101 | *val = get_flags(child); | ||
1102 | break; | ||
1103 | |||
1104 | case offsetof(struct user32, u_debugreg[0]) ... | ||
1105 | offsetof(struct user32, u_debugreg[7]): | ||
1106 | regno -= offsetof(struct user32, u_debugreg[0]); | ||
1107 | *val = ptrace_get_debugreg(child, regno / 4); | ||
1108 | break; | ||
1109 | |||
1110 | default: | ||
1111 | if (regno > sizeof(struct user32) || (regno & 3)) | ||
1112 | return -EIO; | ||
1113 | |||
1114 | /* | ||
1115 | * Other dummy fields in the virtual user structure | ||
1116 | * are ignored | ||
1117 | */ | ||
1118 | *val = 0; | ||
1119 | break; | ||
1120 | } | ||
1121 | return 0; | ||
1122 | } | ||
1123 | |||
1124 | #undef R32 | ||
1125 | #undef SEG32 | ||
1126 | |||
1127 | static int genregs32_get(struct task_struct *target, | ||
1128 | const struct user_regset *regset, | ||
1129 | unsigned int pos, unsigned int count, | ||
1130 | void *kbuf, void __user *ubuf) | ||
1131 | { | ||
1132 | if (kbuf) { | ||
1133 | compat_ulong_t *k = kbuf; | ||
1134 | while (count > 0) { | ||
1135 | getreg32(target, pos, k++); | ||
1136 | count -= sizeof(*k); | ||
1137 | pos += sizeof(*k); | ||
1138 | } | ||
1139 | } else { | ||
1140 | compat_ulong_t __user *u = ubuf; | ||
1141 | while (count > 0) { | ||
1142 | compat_ulong_t word; | ||
1143 | getreg32(target, pos, &word); | ||
1144 | if (__put_user(word, u++)) | ||
1145 | return -EFAULT; | ||
1146 | count -= sizeof(*u); | ||
1147 | pos += sizeof(*u); | ||
1148 | } | ||
1149 | } | ||
1150 | |||
1151 | return 0; | ||
1152 | } | ||
1153 | |||
1154 | static int genregs32_set(struct task_struct *target, | ||
1155 | const struct user_regset *regset, | ||
1156 | unsigned int pos, unsigned int count, | ||
1157 | const void *kbuf, const void __user *ubuf) | ||
1158 | { | ||
1159 | int ret = 0; | ||
1160 | if (kbuf) { | ||
1161 | const compat_ulong_t *k = kbuf; | ||
1162 | while (count > 0 && !ret) { | ||
1163 | ret = putreg(target, pos, *k++); | ||
1164 | count -= sizeof(*k); | ||
1165 | pos += sizeof(*k); | ||
1166 | } | ||
1167 | } else { | ||
1168 | const compat_ulong_t __user *u = ubuf; | ||
1169 | while (count > 0 && !ret) { | ||
1170 | compat_ulong_t word; | ||
1171 | ret = __get_user(word, u++); | ||
1172 | if (ret) | ||
1173 | break; | ||
1174 | ret = putreg(target, pos, word); | ||
1175 | count -= sizeof(*u); | ||
1176 | pos += sizeof(*u); | ||
1177 | } | ||
1178 | } | ||
1179 | return ret; | ||
1180 | } | ||
1181 | |||
1182 | static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data) | ||
1183 | { | ||
1184 | siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t)); | ||
1185 | compat_siginfo_t __user *si32 = compat_ptr(data); | ||
1186 | siginfo_t ssi; | ||
1187 | int ret; | ||
1188 | |||
1189 | if (request == PTRACE_SETSIGINFO) { | ||
1190 | memset(&ssi, 0, sizeof(siginfo_t)); | ||
1191 | ret = copy_siginfo_from_user32(&ssi, si32); | ||
1192 | if (ret) | ||
1193 | return ret; | ||
1194 | if (copy_to_user(si, &ssi, sizeof(siginfo_t))) | ||
1195 | return -EFAULT; | ||
1196 | } | ||
1197 | ret = sys_ptrace(request, pid, addr, (unsigned long)si); | ||
1198 | if (ret) | ||
1199 | return ret; | ||
1200 | if (request == PTRACE_GETSIGINFO) { | ||
1201 | if (copy_from_user(&ssi, si, sizeof(siginfo_t))) | ||
1202 | return -EFAULT; | ||
1203 | ret = copy_siginfo_to_user32(si32, &ssi); | ||
1204 | } | ||
1205 | return ret; | ||
1206 | } | ||
1207 | |||
1208 | asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) | ||
1209 | { | ||
1210 | struct task_struct *child; | ||
1211 | struct pt_regs *childregs; | ||
1212 | void __user *datap = compat_ptr(data); | ||
1213 | int ret; | ||
1214 | __u32 val; | ||
1215 | |||
1216 | switch (request) { | ||
1217 | case PTRACE_TRACEME: | ||
1218 | case PTRACE_ATTACH: | ||
1219 | case PTRACE_KILL: | ||
1220 | case PTRACE_CONT: | ||
1221 | case PTRACE_SINGLESTEP: | ||
1222 | case PTRACE_SINGLEBLOCK: | ||
1223 | case PTRACE_DETACH: | ||
1224 | case PTRACE_SYSCALL: | ||
1225 | case PTRACE_OLDSETOPTIONS: | ||
1226 | case PTRACE_SETOPTIONS: | ||
1227 | case PTRACE_SET_THREAD_AREA: | ||
1228 | case PTRACE_GET_THREAD_AREA: | ||
1229 | case PTRACE_BTS_CONFIG: | ||
1230 | case PTRACE_BTS_STATUS: | ||
1231 | case PTRACE_BTS_SIZE: | ||
1232 | case PTRACE_BTS_GET: | ||
1233 | case PTRACE_BTS_CLEAR: | ||
1234 | case PTRACE_BTS_DRAIN: | ||
1235 | return sys_ptrace(request, pid, addr, data); | ||
1236 | |||
1237 | default: | ||
1238 | return -EINVAL; | ||
1239 | |||
1240 | case PTRACE_PEEKTEXT: | ||
1241 | case PTRACE_PEEKDATA: | ||
1242 | case PTRACE_POKEDATA: | ||
1243 | case PTRACE_POKETEXT: | ||
1244 | case PTRACE_POKEUSR: | ||
1245 | case PTRACE_PEEKUSR: | ||
1246 | case PTRACE_GETREGS: | ||
1247 | case PTRACE_SETREGS: | ||
1248 | case PTRACE_SETFPREGS: | ||
1249 | case PTRACE_GETFPREGS: | ||
1250 | case PTRACE_SETFPXREGS: | ||
1251 | case PTRACE_GETFPXREGS: | ||
1252 | case PTRACE_GETEVENTMSG: | ||
1253 | break; | ||
1254 | |||
1255 | case PTRACE_SETSIGINFO: | ||
1256 | case PTRACE_GETSIGINFO: | ||
1257 | return ptrace32_siginfo(request, pid, addr, data); | ||
1258 | } | ||
1259 | |||
1260 | child = ptrace_get_task_struct(pid); | ||
1261 | if (IS_ERR(child)) | ||
1262 | return PTR_ERR(child); | ||
1263 | |||
1264 | ret = ptrace_check_attach(child, request == PTRACE_KILL); | ||
1265 | if (ret < 0) | ||
1266 | goto out; | ||
1267 | |||
1268 | childregs = task_pt_regs(child); | ||
1269 | |||
1270 | switch (request) { | ||
1271 | case PTRACE_PEEKUSR: | ||
1272 | ret = getreg32(child, addr, &val); | ||
1273 | if (ret == 0) | ||
1274 | ret = put_user(val, (__u32 __user *)datap); | ||
1275 | break; | ||
1276 | |||
1277 | case PTRACE_POKEUSR: | ||
1278 | ret = putreg32(child, addr, data); | ||
1279 | break; | ||
1280 | |||
1281 | case PTRACE_GETREGS: /* Get all gp regs from the child. */ | ||
1282 | return copy_regset_to_user(child, &user_x86_32_view, | ||
1283 | REGSET_GENERAL, | ||
1284 | 0, sizeof(struct user_regs_struct32), | ||
1285 | datap); | ||
1286 | |||
1287 | case PTRACE_SETREGS: /* Set all gp regs in the child. */ | ||
1288 | return copy_regset_from_user(child, &user_x86_32_view, | ||
1289 | REGSET_GENERAL, 0, | ||
1290 | sizeof(struct user_regs_struct32), | ||
1291 | datap); | ||
1292 | |||
1293 | case PTRACE_GETFPREGS: /* Get the child FPU state. */ | ||
1294 | return copy_regset_to_user(child, &user_x86_32_view, | ||
1295 | REGSET_FP, 0, | ||
1296 | sizeof(struct user_i387_ia32_struct), | ||
1297 | datap); | ||
1298 | |||
1299 | case PTRACE_SETFPREGS: /* Set the child FPU state. */ | ||
1300 | return copy_regset_from_user( | ||
1301 | child, &user_x86_32_view, REGSET_FP, | ||
1302 | 0, sizeof(struct user_i387_ia32_struct), datap); | ||
1303 | |||
1304 | case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ | ||
1305 | return copy_regset_to_user(child, &user_x86_32_view, | ||
1306 | REGSET_XFP, 0, | ||
1307 | sizeof(struct user32_fxsr_struct), | ||
1308 | datap); | ||
1309 | |||
1310 | case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ | ||
1311 | return copy_regset_from_user(child, &user_x86_32_view, | ||
1312 | REGSET_XFP, 0, | ||
1313 | sizeof(struct user32_fxsr_struct), | ||
1314 | datap); | ||
1315 | |||
1316 | default: | ||
1317 | return compat_ptrace_request(child, request, addr, data); | ||
1318 | } | ||
1319 | |||
1320 | out: | ||
1321 | put_task_struct(child); | ||
1322 | return ret; | ||
1323 | } | ||
1324 | |||
1325 | #endif /* CONFIG_IA32_EMULATION */ | ||
1326 | |||
1327 | #ifdef CONFIG_X86_64 | ||
1328 | |||
1329 | static const struct user_regset x86_64_regsets[] = { | ||
1330 | [REGSET_GENERAL] = { | ||
1331 | .core_note_type = NT_PRSTATUS, | ||
1332 | .n = sizeof(struct user_regs_struct) / sizeof(long), | ||
1333 | .size = sizeof(long), .align = sizeof(long), | ||
1334 | .get = genregs_get, .set = genregs_set | ||
1335 | }, | ||
1336 | [REGSET_FP] = { | ||
1337 | .core_note_type = NT_PRFPREG, | ||
1338 | .n = sizeof(struct user_i387_struct) / sizeof(long), | ||
1339 | .size = sizeof(long), .align = sizeof(long), | ||
1340 | .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set | ||
1341 | }, | ||
1342 | }; | ||
1343 | |||
1344 | static const struct user_regset_view user_x86_64_view = { | ||
1345 | .name = "x86_64", .e_machine = EM_X86_64, | ||
1346 | .regsets = x86_64_regsets, .n = ARRAY_SIZE(x86_64_regsets) | ||
1347 | }; | ||
1348 | |||
1349 | #else /* CONFIG_X86_32 */ | ||
1350 | |||
1351 | #define user_regs_struct32 user_regs_struct | ||
1352 | #define genregs32_get genregs_get | ||
1353 | #define genregs32_set genregs_set | ||
1354 | |||
1355 | #endif /* CONFIG_X86_64 */ | ||
1356 | |||
1357 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | ||
1358 | static const struct user_regset x86_32_regsets[] = { | ||
1359 | [REGSET_GENERAL] = { | ||
1360 | .core_note_type = NT_PRSTATUS, | ||
1361 | .n = sizeof(struct user_regs_struct32) / sizeof(u32), | ||
1362 | .size = sizeof(u32), .align = sizeof(u32), | ||
1363 | .get = genregs32_get, .set = genregs32_set | ||
1364 | }, | ||
1365 | [REGSET_FP] = { | ||
1366 | .core_note_type = NT_PRFPREG, | ||
1367 | .n = sizeof(struct user_i387_struct) / sizeof(u32), | ||
1368 | .size = sizeof(u32), .align = sizeof(u32), | ||
1369 | .active = fpregs_active, .get = fpregs_get, .set = fpregs_set | ||
1370 | }, | ||
1371 | [REGSET_XFP] = { | ||
1372 | .core_note_type = NT_PRXFPREG, | ||
1373 | .n = sizeof(struct user_i387_struct) / sizeof(u32), | ||
1374 | .size = sizeof(u32), .align = sizeof(u32), | ||
1375 | .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set | ||
1376 | }, | ||
1377 | [REGSET_TLS] = { | ||
1378 | .core_note_type = NT_386_TLS, | ||
1379 | .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN, | ||
1380 | .size = sizeof(struct user_desc), | ||
1381 | .align = sizeof(struct user_desc), | ||
1382 | .active = regset_tls_active, | ||
1383 | .get = regset_tls_get, .set = regset_tls_set | ||
1384 | }, | ||
1385 | }; | ||
1386 | |||
1387 | static const struct user_regset_view user_x86_32_view = { | ||
1388 | .name = "i386", .e_machine = EM_386, | ||
1389 | .regsets = x86_32_regsets, .n = ARRAY_SIZE(x86_32_regsets) | ||
1390 | }; | ||
1391 | #endif | ||
1392 | |||
1393 | const struct user_regset_view *task_user_regset_view(struct task_struct *task) | ||
1394 | { | ||
1395 | #ifdef CONFIG_IA32_EMULATION | ||
1396 | if (test_tsk_thread_flag(task, TIF_IA32)) | ||
1397 | #endif | ||
1398 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | ||
1399 | return &user_x86_32_view; | ||
1400 | #endif | ||
1401 | #ifdef CONFIG_X86_64 | ||
1402 | return &user_x86_64_view; | ||
1403 | #endif | ||
1404 | } | ||
1405 | |||
1406 | #ifdef CONFIG_X86_32 | ||
1407 | |||
1408 | void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) | ||
1409 | { | ||
1410 | struct siginfo info; | ||
1411 | |||
1412 | tsk->thread.trap_no = 1; | ||
1413 | tsk->thread.error_code = error_code; | ||
1414 | |||
1415 | memset(&info, 0, sizeof(info)); | ||
1416 | info.si_signo = SIGTRAP; | ||
1417 | info.si_code = TRAP_BRKPT; | ||
1418 | |||
1419 | /* User-mode ip? */ | ||
1420 | info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL; | ||
1421 | |||
1422 | /* Send us the fake SIGTRAP */ | ||
1423 | force_sig_info(SIGTRAP, &info, tsk); | ||
1424 | } | ||
1425 | |||
1426 | /* notification of system call entry/exit | ||
1427 | * - triggered by current->work.syscall_trace | ||
1428 | */ | ||
1429 | __attribute__((regparm(3))) | ||
1430 | int do_syscall_trace(struct pt_regs *regs, int entryexit) | ||
1431 | { | ||
1432 | int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU); | ||
1433 | /* | ||
1434 | * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall | ||
1435 | * interception | ||
1436 | */ | ||
1437 | int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP); | ||
1438 | int ret = 0; | ||
1439 | |||
1440 | /* do the secure computing check first */ | ||
1441 | if (!entryexit) | ||
1442 | secure_computing(regs->orig_ax); | ||
1443 | |||
1444 | if (unlikely(current->audit_context)) { | ||
1445 | if (entryexit) | ||
1446 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), | ||
1447 | regs->ax); | ||
1448 | /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only | ||
1449 | * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is | ||
1450 | * not used, entry.S will call us only on syscall exit, not | ||
1451 | * entry; so when TIF_SYSCALL_AUDIT is used we must avoid | ||
1452 | * calling send_sigtrap() on syscall entry. | ||
1453 | * | ||
1454 | * Note that when PTRACE_SYSEMU_SINGLESTEP is used, | ||
1455 | * is_singlestep is false, despite his name, so we will still do | ||
1456 | * the correct thing. | ||
1457 | */ | ||
1458 | else if (is_singlestep) | ||
1459 | goto out; | ||
1460 | } | ||
1461 | |||
1462 | if (!(current->ptrace & PT_PTRACED)) | ||
1463 | goto out; | ||
1464 | |||
1465 | /* If a process stops on the 1st tracepoint with SYSCALL_TRACE | ||
1466 | * and then is resumed with SYSEMU_SINGLESTEP, it will come in | ||
1467 | * here. We have to check this and return */ | ||
1468 | if (is_sysemu && entryexit) | ||
1469 | return 0; | ||
1470 | |||
1471 | /* Fake a debug trap */ | ||
1472 | if (is_singlestep) | ||
1473 | send_sigtrap(current, regs, 0); | ||
1474 | |||
1475 | if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu) | ||
1476 | goto out; | ||
1477 | |||
1478 | /* the 0x80 provides a way for the tracing parent to distinguish | ||
1479 | between a syscall stop and SIGTRAP delivery */ | ||
1480 | /* Note that the debugger could change the result of test_thread_flag!*/ | ||
1481 | ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0)); | ||
1482 | |||
1483 | /* | ||
1484 | * this isn't the same as continuing with a signal, but it will do | ||
1485 | * for normal use. strace only continues with a signal if the | ||
1486 | * stopping signal is not SIGTRAP. -brl | ||
1487 | */ | ||
1488 | if (current->exit_code) { | ||
1489 | send_sig(current->exit_code, current, 1); | ||
1490 | current->exit_code = 0; | ||
1491 | } | ||
1492 | ret = is_sysemu; | ||
1493 | out: | ||
1494 | if (unlikely(current->audit_context) && !entryexit) | ||
1495 | audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax, | ||
1496 | regs->bx, regs->cx, regs->dx, regs->si); | ||
1497 | if (ret == 0) | ||
1498 | return 0; | ||
1499 | |||
1500 | regs->orig_ax = -1; /* force skip of syscall restarting */ | ||
1501 | if (unlikely(current->audit_context)) | ||
1502 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | ||
1503 | return 1; | ||
1504 | } | ||
1505 | |||
1506 | #else /* CONFIG_X86_64 */ | ||
1507 | |||
1508 | static void syscall_trace(struct pt_regs *regs) | ||
1509 | { | ||
1510 | |||
1511 | #if 0 | ||
1512 | printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", | ||
1513 | current->comm, | ||
1514 | regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0), | ||
1515 | current_thread_info()->flags, current->ptrace); | ||
1516 | #endif | ||
1517 | |||
1518 | ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) | ||
1519 | ? 0x80 : 0)); | ||
1520 | /* | ||
1521 | * this isn't the same as continuing with a signal, but it will do | ||
1522 | * for normal use. strace only continues with a signal if the | ||
1523 | * stopping signal is not SIGTRAP. -brl | ||
1524 | */ | ||
1525 | if (current->exit_code) { | ||
1526 | send_sig(current->exit_code, current, 1); | ||
1527 | current->exit_code = 0; | ||
1528 | } | ||
1529 | } | ||
1530 | |||
1531 | asmlinkage void syscall_trace_enter(struct pt_regs *regs) | ||
1532 | { | ||
1533 | /* do the secure computing check first */ | ||
1534 | secure_computing(regs->orig_ax); | ||
1535 | |||
1536 | if (test_thread_flag(TIF_SYSCALL_TRACE) | ||
1537 | && (current->ptrace & PT_PTRACED)) | ||
1538 | syscall_trace(regs); | ||
1539 | |||
1540 | if (unlikely(current->audit_context)) { | ||
1541 | if (test_thread_flag(TIF_IA32)) { | ||
1542 | audit_syscall_entry(AUDIT_ARCH_I386, | ||
1543 | regs->orig_ax, | ||
1544 | regs->bx, regs->cx, | ||
1545 | regs->dx, regs->si); | ||
1546 | } else { | ||
1547 | audit_syscall_entry(AUDIT_ARCH_X86_64, | ||
1548 | regs->orig_ax, | ||
1549 | regs->di, regs->si, | ||
1550 | regs->dx, regs->r10); | ||
1551 | } | ||
1552 | } | ||
1553 | } | ||
1554 | |||
1555 | asmlinkage void syscall_trace_leave(struct pt_regs *regs) | ||
1556 | { | ||
1557 | if (unlikely(current->audit_context)) | ||
1558 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | ||
1559 | |||
1560 | if ((test_thread_flag(TIF_SYSCALL_TRACE) | ||
1561 | || test_thread_flag(TIF_SINGLESTEP)) | ||
1562 | && (current->ptrace & PT_PTRACED)) | ||
1563 | syscall_trace(regs); | ||
1564 | } | ||
1565 | |||
1566 | #endif /* CONFIG_X86_32 */ | ||
diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c deleted file mode 100644 index ff5431cc03ee..000000000000 --- a/arch/x86/kernel/ptrace_32.c +++ /dev/null | |||
@@ -1,717 +0,0 @@ | |||
1 | /* By Ross Biro 1/23/92 */ | ||
2 | /* | ||
3 | * Pentium III FXSR, SSE support | ||
4 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
5 | */ | ||
6 | |||
7 | #include <linux/kernel.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/mm.h> | ||
10 | #include <linux/smp.h> | ||
11 | #include <linux/errno.h> | ||
12 | #include <linux/ptrace.h> | ||
13 | #include <linux/user.h> | ||
14 | #include <linux/security.h> | ||
15 | #include <linux/audit.h> | ||
16 | #include <linux/seccomp.h> | ||
17 | #include <linux/signal.h> | ||
18 | |||
19 | #include <asm/uaccess.h> | ||
20 | #include <asm/pgtable.h> | ||
21 | #include <asm/system.h> | ||
22 | #include <asm/processor.h> | ||
23 | #include <asm/i387.h> | ||
24 | #include <asm/debugreg.h> | ||
25 | #include <asm/ldt.h> | ||
26 | #include <asm/desc.h> | ||
27 | |||
28 | /* | ||
29 | * does not yet catch signals sent when the child dies. | ||
30 | * in exit.c or in signal.c. | ||
31 | */ | ||
32 | |||
33 | /* | ||
34 | * Determines which flags the user has access to [1 = access, 0 = no access]. | ||
35 | * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), NT(14), IOPL(12-13), IF(9). | ||
36 | * Also masks reserved bits (31-22, 15, 5, 3, 1). | ||
37 | */ | ||
38 | #define FLAG_MASK 0x00050dd5 | ||
39 | |||
40 | /* set's the trap flag. */ | ||
41 | #define TRAP_FLAG 0x100 | ||
42 | |||
43 | /* | ||
44 | * Offset of eflags on child stack.. | ||
45 | */ | ||
46 | #define EFL_OFFSET offsetof(struct pt_regs, eflags) | ||
47 | |||
48 | static inline struct pt_regs *get_child_regs(struct task_struct *task) | ||
49 | { | ||
50 | void *stack_top = (void *)task->thread.esp0; | ||
51 | return stack_top - sizeof(struct pt_regs); | ||
52 | } | ||
53 | |||
54 | /* | ||
55 | * This routine will get a word off of the processes privileged stack. | ||
56 | * the offset is bytes into the pt_regs structure on the stack. | ||
57 | * This routine assumes that all the privileged stacks are in our | ||
58 | * data space. | ||
59 | */ | ||
60 | static inline int get_stack_long(struct task_struct *task, int offset) | ||
61 | { | ||
62 | unsigned char *stack; | ||
63 | |||
64 | stack = (unsigned char *)task->thread.esp0 - sizeof(struct pt_regs); | ||
65 | stack += offset; | ||
66 | return (*((int *)stack)); | ||
67 | } | ||
68 | |||
69 | /* | ||
70 | * This routine will put a word on the processes privileged stack. | ||
71 | * the offset is bytes into the pt_regs structure on the stack. | ||
72 | * This routine assumes that all the privileged stacks are in our | ||
73 | * data space. | ||
74 | */ | ||
75 | static inline int put_stack_long(struct task_struct *task, int offset, | ||
76 | unsigned long data) | ||
77 | { | ||
78 | unsigned char * stack; | ||
79 | |||
80 | stack = (unsigned char *)task->thread.esp0 - sizeof(struct pt_regs); | ||
81 | stack += offset; | ||
82 | *(unsigned long *) stack = data; | ||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | static int putreg(struct task_struct *child, | ||
87 | unsigned long regno, unsigned long value) | ||
88 | { | ||
89 | switch (regno >> 2) { | ||
90 | case GS: | ||
91 | if (value && (value & 3) != 3) | ||
92 | return -EIO; | ||
93 | child->thread.gs = value; | ||
94 | return 0; | ||
95 | case DS: | ||
96 | case ES: | ||
97 | case FS: | ||
98 | if (value && (value & 3) != 3) | ||
99 | return -EIO; | ||
100 | value &= 0xffff; | ||
101 | break; | ||
102 | case SS: | ||
103 | case CS: | ||
104 | if ((value & 3) != 3) | ||
105 | return -EIO; | ||
106 | value &= 0xffff; | ||
107 | break; | ||
108 | case EFL: | ||
109 | value &= FLAG_MASK; | ||
110 | value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK; | ||
111 | break; | ||
112 | } | ||
113 | if (regno > FS*4) | ||
114 | regno -= 1*4; | ||
115 | put_stack_long(child, regno, value); | ||
116 | return 0; | ||
117 | } | ||
118 | |||
119 | static unsigned long getreg(struct task_struct *child, | ||
120 | unsigned long regno) | ||
121 | { | ||
122 | unsigned long retval = ~0UL; | ||
123 | |||
124 | switch (regno >> 2) { | ||
125 | case GS: | ||
126 | retval = child->thread.gs; | ||
127 | break; | ||
128 | case DS: | ||
129 | case ES: | ||
130 | case FS: | ||
131 | case SS: | ||
132 | case CS: | ||
133 | retval = 0xffff; | ||
134 | /* fall through */ | ||
135 | default: | ||
136 | if (regno > FS*4) | ||
137 | regno -= 1*4; | ||
138 | retval &= get_stack_long(child, regno); | ||
139 | } | ||
140 | return retval; | ||
141 | } | ||
142 | |||
143 | #define LDT_SEGMENT 4 | ||
144 | |||
145 | static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_regs *regs) | ||
146 | { | ||
147 | unsigned long addr, seg; | ||
148 | |||
149 | addr = regs->eip; | ||
150 | seg = regs->xcs & 0xffff; | ||
151 | if (regs->eflags & VM_MASK) { | ||
152 | addr = (addr & 0xffff) + (seg << 4); | ||
153 | return addr; | ||
154 | } | ||
155 | |||
156 | /* | ||
157 | * We'll assume that the code segments in the GDT | ||
158 | * are all zero-based. That is largely true: the | ||
159 | * TLS segments are used for data, and the PNPBIOS | ||
160 | * and APM bios ones we just ignore here. | ||
161 | */ | ||
162 | if (seg & LDT_SEGMENT) { | ||
163 | u32 *desc; | ||
164 | unsigned long base; | ||
165 | |||
166 | seg &= ~7UL; | ||
167 | |||
168 | mutex_lock(&child->mm->context.lock); | ||
169 | if (unlikely((seg >> 3) >= child->mm->context.size)) | ||
170 | addr = -1L; /* bogus selector, access would fault */ | ||
171 | else { | ||
172 | desc = child->mm->context.ldt + seg; | ||
173 | base = ((desc[0] >> 16) | | ||
174 | ((desc[1] & 0xff) << 16) | | ||
175 | (desc[1] & 0xff000000)); | ||
176 | |||
177 | /* 16-bit code segment? */ | ||
178 | if (!((desc[1] >> 22) & 1)) | ||
179 | addr &= 0xffff; | ||
180 | addr += base; | ||
181 | } | ||
182 | mutex_unlock(&child->mm->context.lock); | ||
183 | } | ||
184 | return addr; | ||
185 | } | ||
186 | |||
187 | static inline int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) | ||
188 | { | ||
189 | int i, copied; | ||
190 | unsigned char opcode[15]; | ||
191 | unsigned long addr = convert_eip_to_linear(child, regs); | ||
192 | |||
193 | copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); | ||
194 | for (i = 0; i < copied; i++) { | ||
195 | switch (opcode[i]) { | ||
196 | /* popf and iret */ | ||
197 | case 0x9d: case 0xcf: | ||
198 | return 1; | ||
199 | /* opcode and address size prefixes */ | ||
200 | case 0x66: case 0x67: | ||
201 | continue; | ||
202 | /* irrelevant prefixes (segment overrides and repeats) */ | ||
203 | case 0x26: case 0x2e: | ||
204 | case 0x36: case 0x3e: | ||
205 | case 0x64: case 0x65: | ||
206 | case 0xf0: case 0xf2: case 0xf3: | ||
207 | continue; | ||
208 | |||
209 | /* | ||
210 | * pushf: NOTE! We should probably not let | ||
211 | * the user see the TF bit being set. But | ||
212 | * it's more pain than it's worth to avoid | ||
213 | * it, and a debugger could emulate this | ||
214 | * all in user space if it _really_ cares. | ||
215 | */ | ||
216 | case 0x9c: | ||
217 | default: | ||
218 | return 0; | ||
219 | } | ||
220 | } | ||
221 | return 0; | ||
222 | } | ||
223 | |||
224 | static void set_singlestep(struct task_struct *child) | ||
225 | { | ||
226 | struct pt_regs *regs = get_child_regs(child); | ||
227 | |||
228 | /* | ||
229 | * Always set TIF_SINGLESTEP - this guarantees that | ||
230 | * we single-step system calls etc.. This will also | ||
231 | * cause us to set TF when returning to user mode. | ||
232 | */ | ||
233 | set_tsk_thread_flag(child, TIF_SINGLESTEP); | ||
234 | |||
235 | /* | ||
236 | * If TF was already set, don't do anything else | ||
237 | */ | ||
238 | if (regs->eflags & TRAP_FLAG) | ||
239 | return; | ||
240 | |||
241 | /* Set TF on the kernel stack.. */ | ||
242 | regs->eflags |= TRAP_FLAG; | ||
243 | |||
244 | /* | ||
245 | * ..but if TF is changed by the instruction we will trace, | ||
246 | * don't mark it as being "us" that set it, so that we | ||
247 | * won't clear it by hand later. | ||
248 | */ | ||
249 | if (is_setting_trap_flag(child, regs)) | ||
250 | return; | ||
251 | |||
252 | child->ptrace |= PT_DTRACE; | ||
253 | } | ||
254 | |||
255 | static void clear_singlestep(struct task_struct *child) | ||
256 | { | ||
257 | /* Always clear TIF_SINGLESTEP... */ | ||
258 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); | ||
259 | |||
260 | /* But touch TF only if it was set by us.. */ | ||
261 | if (child->ptrace & PT_DTRACE) { | ||
262 | struct pt_regs *regs = get_child_regs(child); | ||
263 | regs->eflags &= ~TRAP_FLAG; | ||
264 | child->ptrace &= ~PT_DTRACE; | ||
265 | } | ||
266 | } | ||
267 | |||
268 | /* | ||
269 | * Called by kernel/ptrace.c when detaching.. | ||
270 | * | ||
271 | * Make sure the single step bit is not set. | ||
272 | */ | ||
273 | void ptrace_disable(struct task_struct *child) | ||
274 | { | ||
275 | clear_singlestep(child); | ||
276 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); | ||
277 | } | ||
278 | |||
279 | /* | ||
280 | * Perform get_thread_area on behalf of the traced child. | ||
281 | */ | ||
282 | static int | ||
283 | ptrace_get_thread_area(struct task_struct *child, | ||
284 | int idx, struct user_desc __user *user_desc) | ||
285 | { | ||
286 | struct user_desc info; | ||
287 | struct desc_struct *desc; | ||
288 | |||
289 | /* | ||
290 | * Get the current Thread-Local Storage area: | ||
291 | */ | ||
292 | |||
293 | #define GET_BASE(desc) ( \ | ||
294 | (((desc)->a >> 16) & 0x0000ffff) | \ | ||
295 | (((desc)->b << 16) & 0x00ff0000) | \ | ||
296 | ( (desc)->b & 0xff000000) ) | ||
297 | |||
298 | #define GET_LIMIT(desc) ( \ | ||
299 | ((desc)->a & 0x0ffff) | \ | ||
300 | ((desc)->b & 0xf0000) ) | ||
301 | |||
302 | #define GET_32BIT(desc) (((desc)->b >> 22) & 1) | ||
303 | #define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) | ||
304 | #define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) | ||
305 | #define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) | ||
306 | #define GET_PRESENT(desc) (((desc)->b >> 15) & 1) | ||
307 | #define GET_USEABLE(desc) (((desc)->b >> 20) & 1) | ||
308 | |||
309 | if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) | ||
310 | return -EINVAL; | ||
311 | |||
312 | desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; | ||
313 | |||
314 | info.entry_number = idx; | ||
315 | info.base_addr = GET_BASE(desc); | ||
316 | info.limit = GET_LIMIT(desc); | ||
317 | info.seg_32bit = GET_32BIT(desc); | ||
318 | info.contents = GET_CONTENTS(desc); | ||
319 | info.read_exec_only = !GET_WRITABLE(desc); | ||
320 | info.limit_in_pages = GET_LIMIT_PAGES(desc); | ||
321 | info.seg_not_present = !GET_PRESENT(desc); | ||
322 | info.useable = GET_USEABLE(desc); | ||
323 | |||
324 | if (copy_to_user(user_desc, &info, sizeof(info))) | ||
325 | return -EFAULT; | ||
326 | |||
327 | return 0; | ||
328 | } | ||
329 | |||
330 | /* | ||
331 | * Perform set_thread_area on behalf of the traced child. | ||
332 | */ | ||
333 | static int | ||
334 | ptrace_set_thread_area(struct task_struct *child, | ||
335 | int idx, struct user_desc __user *user_desc) | ||
336 | { | ||
337 | struct user_desc info; | ||
338 | struct desc_struct *desc; | ||
339 | |||
340 | if (copy_from_user(&info, user_desc, sizeof(info))) | ||
341 | return -EFAULT; | ||
342 | |||
343 | if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) | ||
344 | return -EINVAL; | ||
345 | |||
346 | desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; | ||
347 | if (LDT_empty(&info)) { | ||
348 | desc->a = 0; | ||
349 | desc->b = 0; | ||
350 | } else { | ||
351 | desc->a = LDT_entry_a(&info); | ||
352 | desc->b = LDT_entry_b(&info); | ||
353 | } | ||
354 | |||
355 | return 0; | ||
356 | } | ||
357 | |||
358 | long arch_ptrace(struct task_struct *child, long request, long addr, long data) | ||
359 | { | ||
360 | struct user * dummy = NULL; | ||
361 | int i, ret; | ||
362 | unsigned long __user *datap = (unsigned long __user *)data; | ||
363 | |||
364 | switch (request) { | ||
365 | /* when I and D space are separate, these will need to be fixed. */ | ||
366 | case PTRACE_PEEKTEXT: /* read word at location addr. */ | ||
367 | case PTRACE_PEEKDATA: | ||
368 | ret = generic_ptrace_peekdata(child, addr, data); | ||
369 | break; | ||
370 | |||
371 | /* read the word at location addr in the USER area. */ | ||
372 | case PTRACE_PEEKUSR: { | ||
373 | unsigned long tmp; | ||
374 | |||
375 | ret = -EIO; | ||
376 | if ((addr & 3) || addr < 0 || | ||
377 | addr > sizeof(struct user) - 3) | ||
378 | break; | ||
379 | |||
380 | tmp = 0; /* Default return condition */ | ||
381 | if(addr < FRAME_SIZE*sizeof(long)) | ||
382 | tmp = getreg(child, addr); | ||
383 | if(addr >= (long) &dummy->u_debugreg[0] && | ||
384 | addr <= (long) &dummy->u_debugreg[7]){ | ||
385 | addr -= (long) &dummy->u_debugreg[0]; | ||
386 | addr = addr >> 2; | ||
387 | tmp = child->thread.debugreg[addr]; | ||
388 | } | ||
389 | ret = put_user(tmp, datap); | ||
390 | break; | ||
391 | } | ||
392 | |||
393 | /* when I and D space are separate, this will have to be fixed. */ | ||
394 | case PTRACE_POKETEXT: /* write the word at location addr. */ | ||
395 | case PTRACE_POKEDATA: | ||
396 | ret = generic_ptrace_pokedata(child, addr, data); | ||
397 | break; | ||
398 | |||
399 | case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ | ||
400 | ret = -EIO; | ||
401 | if ((addr & 3) || addr < 0 || | ||
402 | addr > sizeof(struct user) - 3) | ||
403 | break; | ||
404 | |||
405 | if (addr < FRAME_SIZE*sizeof(long)) { | ||
406 | ret = putreg(child, addr, data); | ||
407 | break; | ||
408 | } | ||
409 | /* We need to be very careful here. We implicitly | ||
410 | want to modify a portion of the task_struct, and we | ||
411 | have to be selective about what portions we allow someone | ||
412 | to modify. */ | ||
413 | |||
414 | ret = -EIO; | ||
415 | if(addr >= (long) &dummy->u_debugreg[0] && | ||
416 | addr <= (long) &dummy->u_debugreg[7]){ | ||
417 | |||
418 | if(addr == (long) &dummy->u_debugreg[4]) break; | ||
419 | if(addr == (long) &dummy->u_debugreg[5]) break; | ||
420 | if(addr < (long) &dummy->u_debugreg[4] && | ||
421 | ((unsigned long) data) >= TASK_SIZE-3) break; | ||
422 | |||
423 | /* Sanity-check data. Take one half-byte at once with | ||
424 | * check = (val >> (16 + 4*i)) & 0xf. It contains the | ||
425 | * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits | ||
426 | * 2 and 3 are LENi. Given a list of invalid values, | ||
427 | * we do mask |= 1 << invalid_value, so that | ||
428 | * (mask >> check) & 1 is a correct test for invalid | ||
429 | * values. | ||
430 | * | ||
431 | * R/Wi contains the type of the breakpoint / | ||
432 | * watchpoint, LENi contains the length of the watched | ||
433 | * data in the watchpoint case. | ||
434 | * | ||
435 | * The invalid values are: | ||
436 | * - LENi == 0x10 (undefined), so mask |= 0x0f00. | ||
437 | * - R/Wi == 0x10 (break on I/O reads or writes), so | ||
438 | * mask |= 0x4444. | ||
439 | * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= | ||
440 | * 0x1110. | ||
441 | * | ||
442 | * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. | ||
443 | * | ||
444 | * See the Intel Manual "System Programming Guide", | ||
445 | * 15.2.4 | ||
446 | * | ||
447 | * Note that LENi == 0x10 is defined on x86_64 in long | ||
448 | * mode (i.e. even for 32-bit userspace software, but | ||
449 | * 64-bit kernel), so the x86_64 mask value is 0x5454. | ||
450 | * See the AMD manual no. 24593 (AMD64 System | ||
451 | * Programming)*/ | ||
452 | |||
453 | if(addr == (long) &dummy->u_debugreg[7]) { | ||
454 | data &= ~DR_CONTROL_RESERVED; | ||
455 | for(i=0; i<4; i++) | ||
456 | if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1) | ||
457 | goto out_tsk; | ||
458 | if (data) | ||
459 | set_tsk_thread_flag(child, TIF_DEBUG); | ||
460 | else | ||
461 | clear_tsk_thread_flag(child, TIF_DEBUG); | ||
462 | } | ||
463 | addr -= (long) &dummy->u_debugreg; | ||
464 | addr = addr >> 2; | ||
465 | child->thread.debugreg[addr] = data; | ||
466 | ret = 0; | ||
467 | } | ||
468 | break; | ||
469 | |||
470 | case PTRACE_SYSEMU: /* continue and stop at next syscall, which will not be executed */ | ||
471 | case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ | ||
472 | case PTRACE_CONT: /* restart after signal. */ | ||
473 | ret = -EIO; | ||
474 | if (!valid_signal(data)) | ||
475 | break; | ||
476 | if (request == PTRACE_SYSEMU) { | ||
477 | set_tsk_thread_flag(child, TIF_SYSCALL_EMU); | ||
478 | clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); | ||
479 | } else if (request == PTRACE_SYSCALL) { | ||
480 | set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); | ||
481 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); | ||
482 | } else { | ||
483 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); | ||
484 | clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); | ||
485 | } | ||
486 | child->exit_code = data; | ||
487 | /* make sure the single step bit is not set. */ | ||
488 | clear_singlestep(child); | ||
489 | wake_up_process(child); | ||
490 | ret = 0; | ||
491 | break; | ||
492 | |||
493 | /* | ||
494 | * make the child exit. Best I can do is send it a sigkill. | ||
495 | * perhaps it should be put in the status that it wants to | ||
496 | * exit. | ||
497 | */ | ||
498 | case PTRACE_KILL: | ||
499 | ret = 0; | ||
500 | if (child->exit_state == EXIT_ZOMBIE) /* already dead */ | ||
501 | break; | ||
502 | child->exit_code = SIGKILL; | ||
503 | /* make sure the single step bit is not set. */ | ||
504 | clear_singlestep(child); | ||
505 | wake_up_process(child); | ||
506 | break; | ||
507 | |||
508 | case PTRACE_SYSEMU_SINGLESTEP: /* Same as SYSEMU, but singlestep if not syscall */ | ||
509 | case PTRACE_SINGLESTEP: /* set the trap flag. */ | ||
510 | ret = -EIO; | ||
511 | if (!valid_signal(data)) | ||
512 | break; | ||
513 | |||
514 | if (request == PTRACE_SYSEMU_SINGLESTEP) | ||
515 | set_tsk_thread_flag(child, TIF_SYSCALL_EMU); | ||
516 | else | ||
517 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); | ||
518 | |||
519 | clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); | ||
520 | set_singlestep(child); | ||
521 | child->exit_code = data; | ||
522 | /* give it a chance to run. */ | ||
523 | wake_up_process(child); | ||
524 | ret = 0; | ||
525 | break; | ||
526 | |||
527 | case PTRACE_GETREGS: { /* Get all gp regs from the child. */ | ||
528 | if (!access_ok(VERIFY_WRITE, datap, FRAME_SIZE*sizeof(long))) { | ||
529 | ret = -EIO; | ||
530 | break; | ||
531 | } | ||
532 | for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) { | ||
533 | __put_user(getreg(child, i), datap); | ||
534 | datap++; | ||
535 | } | ||
536 | ret = 0; | ||
537 | break; | ||
538 | } | ||
539 | |||
540 | case PTRACE_SETREGS: { /* Set all gp regs in the child. */ | ||
541 | unsigned long tmp; | ||
542 | if (!access_ok(VERIFY_READ, datap, FRAME_SIZE*sizeof(long))) { | ||
543 | ret = -EIO; | ||
544 | break; | ||
545 | } | ||
546 | for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) { | ||
547 | __get_user(tmp, datap); | ||
548 | putreg(child, i, tmp); | ||
549 | datap++; | ||
550 | } | ||
551 | ret = 0; | ||
552 | break; | ||
553 | } | ||
554 | |||
555 | case PTRACE_GETFPREGS: { /* Get the child FPU state. */ | ||
556 | if (!access_ok(VERIFY_WRITE, datap, | ||
557 | sizeof(struct user_i387_struct))) { | ||
558 | ret = -EIO; | ||
559 | break; | ||
560 | } | ||
561 | ret = 0; | ||
562 | if (!tsk_used_math(child)) | ||
563 | init_fpu(child); | ||
564 | get_fpregs((struct user_i387_struct __user *)data, child); | ||
565 | break; | ||
566 | } | ||
567 | |||
568 | case PTRACE_SETFPREGS: { /* Set the child FPU state. */ | ||
569 | if (!access_ok(VERIFY_READ, datap, | ||
570 | sizeof(struct user_i387_struct))) { | ||
571 | ret = -EIO; | ||
572 | break; | ||
573 | } | ||
574 | set_stopped_child_used_math(child); | ||
575 | set_fpregs(child, (struct user_i387_struct __user *)data); | ||
576 | ret = 0; | ||
577 | break; | ||
578 | } | ||
579 | |||
580 | case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */ | ||
581 | if (!access_ok(VERIFY_WRITE, datap, | ||
582 | sizeof(struct user_fxsr_struct))) { | ||
583 | ret = -EIO; | ||
584 | break; | ||
585 | } | ||
586 | if (!tsk_used_math(child)) | ||
587 | init_fpu(child); | ||
588 | ret = get_fpxregs((struct user_fxsr_struct __user *)data, child); | ||
589 | break; | ||
590 | } | ||
591 | |||
592 | case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */ | ||
593 | if (!access_ok(VERIFY_READ, datap, | ||
594 | sizeof(struct user_fxsr_struct))) { | ||
595 | ret = -EIO; | ||
596 | break; | ||
597 | } | ||
598 | set_stopped_child_used_math(child); | ||
599 | ret = set_fpxregs(child, (struct user_fxsr_struct __user *)data); | ||
600 | break; | ||
601 | } | ||
602 | |||
603 | case PTRACE_GET_THREAD_AREA: | ||
604 | ret = ptrace_get_thread_area(child, addr, | ||
605 | (struct user_desc __user *) data); | ||
606 | break; | ||
607 | |||
608 | case PTRACE_SET_THREAD_AREA: | ||
609 | ret = ptrace_set_thread_area(child, addr, | ||
610 | (struct user_desc __user *) data); | ||
611 | break; | ||
612 | |||
613 | default: | ||
614 | ret = ptrace_request(child, request, addr, data); | ||
615 | break; | ||
616 | } | ||
617 | out_tsk: | ||
618 | return ret; | ||
619 | } | ||
620 | |||
621 | void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) | ||
622 | { | ||
623 | struct siginfo info; | ||
624 | |||
625 | tsk->thread.trap_no = 1; | ||
626 | tsk->thread.error_code = error_code; | ||
627 | |||
628 | memset(&info, 0, sizeof(info)); | ||
629 | info.si_signo = SIGTRAP; | ||
630 | info.si_code = TRAP_BRKPT; | ||
631 | |||
632 | /* User-mode eip? */ | ||
633 | info.si_addr = user_mode_vm(regs) ? (void __user *) regs->eip : NULL; | ||
634 | |||
635 | /* Send us the fake SIGTRAP */ | ||
636 | force_sig_info(SIGTRAP, &info, tsk); | ||
637 | } | ||
638 | |||
639 | /* notification of system call entry/exit | ||
640 | * - triggered by current->work.syscall_trace | ||
641 | */ | ||
642 | __attribute__((regparm(3))) | ||
643 | int do_syscall_trace(struct pt_regs *regs, int entryexit) | ||
644 | { | ||
645 | int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU); | ||
646 | /* | ||
647 | * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall | ||
648 | * interception | ||
649 | */ | ||
650 | int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP); | ||
651 | int ret = 0; | ||
652 | |||
653 | /* do the secure computing check first */ | ||
654 | if (!entryexit) | ||
655 | secure_computing(regs->orig_eax); | ||
656 | |||
657 | if (unlikely(current->audit_context)) { | ||
658 | if (entryexit) | ||
659 | audit_syscall_exit(AUDITSC_RESULT(regs->eax), | ||
660 | regs->eax); | ||
661 | /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only | ||
662 | * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is | ||
663 | * not used, entry.S will call us only on syscall exit, not | ||
664 | * entry; so when TIF_SYSCALL_AUDIT is used we must avoid | ||
665 | * calling send_sigtrap() on syscall entry. | ||
666 | * | ||
667 | * Note that when PTRACE_SYSEMU_SINGLESTEP is used, | ||
668 | * is_singlestep is false, despite his name, so we will still do | ||
669 | * the correct thing. | ||
670 | */ | ||
671 | else if (is_singlestep) | ||
672 | goto out; | ||
673 | } | ||
674 | |||
675 | if (!(current->ptrace & PT_PTRACED)) | ||
676 | goto out; | ||
677 | |||
678 | /* If a process stops on the 1st tracepoint with SYSCALL_TRACE | ||
679 | * and then is resumed with SYSEMU_SINGLESTEP, it will come in | ||
680 | * here. We have to check this and return */ | ||
681 | if (is_sysemu && entryexit) | ||
682 | return 0; | ||
683 | |||
684 | /* Fake a debug trap */ | ||
685 | if (is_singlestep) | ||
686 | send_sigtrap(current, regs, 0); | ||
687 | |||
688 | if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu) | ||
689 | goto out; | ||
690 | |||
691 | /* the 0x80 provides a way for the tracing parent to distinguish | ||
692 | between a syscall stop and SIGTRAP delivery */ | ||
693 | /* Note that the debugger could change the result of test_thread_flag!*/ | ||
694 | ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0)); | ||
695 | |||
696 | /* | ||
697 | * this isn't the same as continuing with a signal, but it will do | ||
698 | * for normal use. strace only continues with a signal if the | ||
699 | * stopping signal is not SIGTRAP. -brl | ||
700 | */ | ||
701 | if (current->exit_code) { | ||
702 | send_sig(current->exit_code, current, 1); | ||
703 | current->exit_code = 0; | ||
704 | } | ||
705 | ret = is_sysemu; | ||
706 | out: | ||
707 | if (unlikely(current->audit_context) && !entryexit) | ||
708 | audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_eax, | ||
709 | regs->ebx, regs->ecx, regs->edx, regs->esi); | ||
710 | if (ret == 0) | ||
711 | return 0; | ||
712 | |||
713 | regs->orig_eax = -1; /* force skip of syscall restarting */ | ||
714 | if (unlikely(current->audit_context)) | ||
715 | audit_syscall_exit(AUDITSC_RESULT(regs->eax), regs->eax); | ||
716 | return 1; | ||
717 | } | ||
diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c deleted file mode 100644 index 607085f3f08a..000000000000 --- a/arch/x86/kernel/ptrace_64.c +++ /dev/null | |||
@@ -1,621 +0,0 @@ | |||
1 | /* By Ross Biro 1/23/92 */ | ||
2 | /* | ||
3 | * Pentium III FXSR, SSE support | ||
4 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
5 | * | ||
6 | * x86-64 port 2000-2002 Andi Kleen | ||
7 | */ | ||
8 | |||
9 | #include <linux/kernel.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include <linux/mm.h> | ||
12 | #include <linux/smp.h> | ||
13 | #include <linux/errno.h> | ||
14 | #include <linux/ptrace.h> | ||
15 | #include <linux/user.h> | ||
16 | #include <linux/security.h> | ||
17 | #include <linux/audit.h> | ||
18 | #include <linux/seccomp.h> | ||
19 | #include <linux/signal.h> | ||
20 | |||
21 | #include <asm/uaccess.h> | ||
22 | #include <asm/pgtable.h> | ||
23 | #include <asm/system.h> | ||
24 | #include <asm/processor.h> | ||
25 | #include <asm/i387.h> | ||
26 | #include <asm/debugreg.h> | ||
27 | #include <asm/ldt.h> | ||
28 | #include <asm/desc.h> | ||
29 | #include <asm/proto.h> | ||
30 | #include <asm/ia32.h> | ||
31 | |||
32 | /* | ||
33 | * does not yet catch signals sent when the child dies. | ||
34 | * in exit.c or in signal.c. | ||
35 | */ | ||
36 | |||
37 | /* | ||
38 | * Determines which flags the user has access to [1 = access, 0 = no access]. | ||
39 | * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9). | ||
40 | * Also masks reserved bits (63-22, 15, 5, 3, 1). | ||
41 | */ | ||
42 | #define FLAG_MASK 0x54dd5UL | ||
43 | |||
44 | /* set's the trap flag. */ | ||
45 | #define TRAP_FLAG 0x100UL | ||
46 | |||
47 | /* | ||
48 | * eflags and offset of eflags on child stack.. | ||
49 | */ | ||
50 | #define EFLAGS offsetof(struct pt_regs, eflags) | ||
51 | #define EFL_OFFSET ((int)(EFLAGS-sizeof(struct pt_regs))) | ||
52 | |||
53 | /* | ||
54 | * this routine will get a word off of the processes privileged stack. | ||
55 | * the offset is how far from the base addr as stored in the TSS. | ||
56 | * this routine assumes that all the privileged stacks are in our | ||
57 | * data space. | ||
58 | */ | ||
59 | static inline unsigned long get_stack_long(struct task_struct *task, int offset) | ||
60 | { | ||
61 | unsigned char *stack; | ||
62 | |||
63 | stack = (unsigned char *)task->thread.rsp0; | ||
64 | stack += offset; | ||
65 | return (*((unsigned long *)stack)); | ||
66 | } | ||
67 | |||
68 | /* | ||
69 | * this routine will put a word on the processes privileged stack. | ||
70 | * the offset is how far from the base addr as stored in the TSS. | ||
71 | * this routine assumes that all the privileged stacks are in our | ||
72 | * data space. | ||
73 | */ | ||
74 | static inline long put_stack_long(struct task_struct *task, int offset, | ||
75 | unsigned long data) | ||
76 | { | ||
77 | unsigned char * stack; | ||
78 | |||
79 | stack = (unsigned char *) task->thread.rsp0; | ||
80 | stack += offset; | ||
81 | *(unsigned long *) stack = data; | ||
82 | return 0; | ||
83 | } | ||
84 | |||
85 | #define LDT_SEGMENT 4 | ||
86 | |||
87 | unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs) | ||
88 | { | ||
89 | unsigned long addr, seg; | ||
90 | |||
91 | addr = regs->rip; | ||
92 | seg = regs->cs & 0xffff; | ||
93 | |||
94 | /* | ||
95 | * We'll assume that the code segments in the GDT | ||
96 | * are all zero-based. That is largely true: the | ||
97 | * TLS segments are used for data, and the PNPBIOS | ||
98 | * and APM bios ones we just ignore here. | ||
99 | */ | ||
100 | if (seg & LDT_SEGMENT) { | ||
101 | u32 *desc; | ||
102 | unsigned long base; | ||
103 | |||
104 | seg &= ~7UL; | ||
105 | |||
106 | mutex_lock(&child->mm->context.lock); | ||
107 | if (unlikely((seg >> 3) >= child->mm->context.size)) | ||
108 | addr = -1L; /* bogus selector, access would fault */ | ||
109 | else { | ||
110 | desc = child->mm->context.ldt + seg; | ||
111 | base = ((desc[0] >> 16) | | ||
112 | ((desc[1] & 0xff) << 16) | | ||
113 | (desc[1] & 0xff000000)); | ||
114 | |||
115 | /* 16-bit code segment? */ | ||
116 | if (!((desc[1] >> 22) & 1)) | ||
117 | addr &= 0xffff; | ||
118 | addr += base; | ||
119 | } | ||
120 | mutex_unlock(&child->mm->context.lock); | ||
121 | } | ||
122 | |||
123 | return addr; | ||
124 | } | ||
125 | |||
126 | static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) | ||
127 | { | ||
128 | int i, copied; | ||
129 | unsigned char opcode[15]; | ||
130 | unsigned long addr = convert_rip_to_linear(child, regs); | ||
131 | |||
132 | copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); | ||
133 | for (i = 0; i < copied; i++) { | ||
134 | switch (opcode[i]) { | ||
135 | /* popf and iret */ | ||
136 | case 0x9d: case 0xcf: | ||
137 | return 1; | ||
138 | |||
139 | /* CHECKME: 64 65 */ | ||
140 | |||
141 | /* opcode and address size prefixes */ | ||
142 | case 0x66: case 0x67: | ||
143 | continue; | ||
144 | /* irrelevant prefixes (segment overrides and repeats) */ | ||
145 | case 0x26: case 0x2e: | ||
146 | case 0x36: case 0x3e: | ||
147 | case 0x64: case 0x65: | ||
148 | case 0xf2: case 0xf3: | ||
149 | continue; | ||
150 | |||
151 | case 0x40 ... 0x4f: | ||
152 | if (regs->cs != __USER_CS) | ||
153 | /* 32-bit mode: register increment */ | ||
154 | return 0; | ||
155 | /* 64-bit mode: REX prefix */ | ||
156 | continue; | ||
157 | |||
158 | /* CHECKME: f2, f3 */ | ||
159 | |||
160 | /* | ||
161 | * pushf: NOTE! We should probably not let | ||
162 | * the user see the TF bit being set. But | ||
163 | * it's more pain than it's worth to avoid | ||
164 | * it, and a debugger could emulate this | ||
165 | * all in user space if it _really_ cares. | ||
166 | */ | ||
167 | case 0x9c: | ||
168 | default: | ||
169 | return 0; | ||
170 | } | ||
171 | } | ||
172 | return 0; | ||
173 | } | ||
174 | |||
175 | static void set_singlestep(struct task_struct *child) | ||
176 | { | ||
177 | struct pt_regs *regs = task_pt_regs(child); | ||
178 | |||
179 | /* | ||
180 | * Always set TIF_SINGLESTEP - this guarantees that | ||
181 | * we single-step system calls etc.. This will also | ||
182 | * cause us to set TF when returning to user mode. | ||
183 | */ | ||
184 | set_tsk_thread_flag(child, TIF_SINGLESTEP); | ||
185 | |||
186 | /* | ||
187 | * If TF was already set, don't do anything else | ||
188 | */ | ||
189 | if (regs->eflags & TRAP_FLAG) | ||
190 | return; | ||
191 | |||
192 | /* Set TF on the kernel stack.. */ | ||
193 | regs->eflags |= TRAP_FLAG; | ||
194 | |||
195 | /* | ||
196 | * ..but if TF is changed by the instruction we will trace, | ||
197 | * don't mark it as being "us" that set it, so that we | ||
198 | * won't clear it by hand later. | ||
199 | */ | ||
200 | if (is_setting_trap_flag(child, regs)) | ||
201 | return; | ||
202 | |||
203 | child->ptrace |= PT_DTRACE; | ||
204 | } | ||
205 | |||
206 | static void clear_singlestep(struct task_struct *child) | ||
207 | { | ||
208 | /* Always clear TIF_SINGLESTEP... */ | ||
209 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); | ||
210 | |||
211 | /* But touch TF only if it was set by us.. */ | ||
212 | if (child->ptrace & PT_DTRACE) { | ||
213 | struct pt_regs *regs = task_pt_regs(child); | ||
214 | regs->eflags &= ~TRAP_FLAG; | ||
215 | child->ptrace &= ~PT_DTRACE; | ||
216 | } | ||
217 | } | ||
218 | |||
219 | /* | ||
220 | * Called by kernel/ptrace.c when detaching.. | ||
221 | * | ||
222 | * Make sure the single step bit is not set. | ||
223 | */ | ||
224 | void ptrace_disable(struct task_struct *child) | ||
225 | { | ||
226 | clear_singlestep(child); | ||
227 | } | ||
228 | |||
229 | static int putreg(struct task_struct *child, | ||
230 | unsigned long regno, unsigned long value) | ||
231 | { | ||
232 | unsigned long tmp; | ||
233 | |||
234 | switch (regno) { | ||
235 | case offsetof(struct user_regs_struct,fs): | ||
236 | if (value && (value & 3) != 3) | ||
237 | return -EIO; | ||
238 | child->thread.fsindex = value & 0xffff; | ||
239 | return 0; | ||
240 | case offsetof(struct user_regs_struct,gs): | ||
241 | if (value && (value & 3) != 3) | ||
242 | return -EIO; | ||
243 | child->thread.gsindex = value & 0xffff; | ||
244 | return 0; | ||
245 | case offsetof(struct user_regs_struct,ds): | ||
246 | if (value && (value & 3) != 3) | ||
247 | return -EIO; | ||
248 | child->thread.ds = value & 0xffff; | ||
249 | return 0; | ||
250 | case offsetof(struct user_regs_struct,es): | ||
251 | if (value && (value & 3) != 3) | ||
252 | return -EIO; | ||
253 | child->thread.es = value & 0xffff; | ||
254 | return 0; | ||
255 | case offsetof(struct user_regs_struct,ss): | ||
256 | if ((value & 3) != 3) | ||
257 | return -EIO; | ||
258 | value &= 0xffff; | ||
259 | return 0; | ||
260 | case offsetof(struct user_regs_struct,fs_base): | ||
261 | if (value >= TASK_SIZE_OF(child)) | ||
262 | return -EIO; | ||
263 | child->thread.fs = value; | ||
264 | return 0; | ||
265 | case offsetof(struct user_regs_struct,gs_base): | ||
266 | if (value >= TASK_SIZE_OF(child)) | ||
267 | return -EIO; | ||
268 | child->thread.gs = value; | ||
269 | return 0; | ||
270 | case offsetof(struct user_regs_struct, eflags): | ||
271 | value &= FLAG_MASK; | ||
272 | tmp = get_stack_long(child, EFL_OFFSET); | ||
273 | tmp &= ~FLAG_MASK; | ||
274 | value |= tmp; | ||
275 | break; | ||
276 | case offsetof(struct user_regs_struct,cs): | ||
277 | if ((value & 3) != 3) | ||
278 | return -EIO; | ||
279 | value &= 0xffff; | ||
280 | break; | ||
281 | } | ||
282 | put_stack_long(child, regno - sizeof(struct pt_regs), value); | ||
283 | return 0; | ||
284 | } | ||
285 | |||
286 | static unsigned long getreg(struct task_struct *child, unsigned long regno) | ||
287 | { | ||
288 | unsigned long val; | ||
289 | switch (regno) { | ||
290 | case offsetof(struct user_regs_struct, fs): | ||
291 | return child->thread.fsindex; | ||
292 | case offsetof(struct user_regs_struct, gs): | ||
293 | return child->thread.gsindex; | ||
294 | case offsetof(struct user_regs_struct, ds): | ||
295 | return child->thread.ds; | ||
296 | case offsetof(struct user_regs_struct, es): | ||
297 | return child->thread.es; | ||
298 | case offsetof(struct user_regs_struct, fs_base): | ||
299 | return child->thread.fs; | ||
300 | case offsetof(struct user_regs_struct, gs_base): | ||
301 | return child->thread.gs; | ||
302 | default: | ||
303 | regno = regno - sizeof(struct pt_regs); | ||
304 | val = get_stack_long(child, regno); | ||
305 | if (test_tsk_thread_flag(child, TIF_IA32)) | ||
306 | val &= 0xffffffff; | ||
307 | return val; | ||
308 | } | ||
309 | |||
310 | } | ||
311 | |||
312 | long arch_ptrace(struct task_struct *child, long request, long addr, long data) | ||
313 | { | ||
314 | long i, ret; | ||
315 | unsigned ui; | ||
316 | |||
317 | switch (request) { | ||
318 | /* when I and D space are separate, these will need to be fixed. */ | ||
319 | case PTRACE_PEEKTEXT: /* read word at location addr. */ | ||
320 | case PTRACE_PEEKDATA: | ||
321 | ret = generic_ptrace_peekdata(child, addr, data); | ||
322 | break; | ||
323 | |||
324 | /* read the word at location addr in the USER area. */ | ||
325 | case PTRACE_PEEKUSR: { | ||
326 | unsigned long tmp; | ||
327 | |||
328 | ret = -EIO; | ||
329 | if ((addr & 7) || | ||
330 | addr > sizeof(struct user) - 7) | ||
331 | break; | ||
332 | |||
333 | switch (addr) { | ||
334 | case 0 ... sizeof(struct user_regs_struct) - sizeof(long): | ||
335 | tmp = getreg(child, addr); | ||
336 | break; | ||
337 | case offsetof(struct user, u_debugreg[0]): | ||
338 | tmp = child->thread.debugreg0; | ||
339 | break; | ||
340 | case offsetof(struct user, u_debugreg[1]): | ||
341 | tmp = child->thread.debugreg1; | ||
342 | break; | ||
343 | case offsetof(struct user, u_debugreg[2]): | ||
344 | tmp = child->thread.debugreg2; | ||
345 | break; | ||
346 | case offsetof(struct user, u_debugreg[3]): | ||
347 | tmp = child->thread.debugreg3; | ||
348 | break; | ||
349 | case offsetof(struct user, u_debugreg[6]): | ||
350 | tmp = child->thread.debugreg6; | ||
351 | break; | ||
352 | case offsetof(struct user, u_debugreg[7]): | ||
353 | tmp = child->thread.debugreg7; | ||
354 | break; | ||
355 | default: | ||
356 | tmp = 0; | ||
357 | break; | ||
358 | } | ||
359 | ret = put_user(tmp,(unsigned long __user *) data); | ||
360 | break; | ||
361 | } | ||
362 | |||
363 | /* when I and D space are separate, this will have to be fixed. */ | ||
364 | case PTRACE_POKETEXT: /* write the word at location addr. */ | ||
365 | case PTRACE_POKEDATA: | ||
366 | ret = generic_ptrace_pokedata(child, addr, data); | ||
367 | break; | ||
368 | |||
369 | case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ | ||
370 | { | ||
371 | int dsize = test_tsk_thread_flag(child, TIF_IA32) ? 3 : 7; | ||
372 | ret = -EIO; | ||
373 | if ((addr & 7) || | ||
374 | addr > sizeof(struct user) - 7) | ||
375 | break; | ||
376 | |||
377 | switch (addr) { | ||
378 | case 0 ... sizeof(struct user_regs_struct) - sizeof(long): | ||
379 | ret = putreg(child, addr, data); | ||
380 | break; | ||
381 | /* Disallows to set a breakpoint into the vsyscall */ | ||
382 | case offsetof(struct user, u_debugreg[0]): | ||
383 | if (data >= TASK_SIZE_OF(child) - dsize) break; | ||
384 | child->thread.debugreg0 = data; | ||
385 | ret = 0; | ||
386 | break; | ||
387 | case offsetof(struct user, u_debugreg[1]): | ||
388 | if (data >= TASK_SIZE_OF(child) - dsize) break; | ||
389 | child->thread.debugreg1 = data; | ||
390 | ret = 0; | ||
391 | break; | ||
392 | case offsetof(struct user, u_debugreg[2]): | ||
393 | if (data >= TASK_SIZE_OF(child) - dsize) break; | ||
394 | child->thread.debugreg2 = data; | ||
395 | ret = 0; | ||
396 | break; | ||
397 | case offsetof(struct user, u_debugreg[3]): | ||
398 | if (data >= TASK_SIZE_OF(child) - dsize) break; | ||
399 | child->thread.debugreg3 = data; | ||
400 | ret = 0; | ||
401 | break; | ||
402 | case offsetof(struct user, u_debugreg[6]): | ||
403 | if (data >> 32) | ||
404 | break; | ||
405 | child->thread.debugreg6 = data; | ||
406 | ret = 0; | ||
407 | break; | ||
408 | case offsetof(struct user, u_debugreg[7]): | ||
409 | /* See arch/i386/kernel/ptrace.c for an explanation of | ||
410 | * this awkward check.*/ | ||
411 | data &= ~DR_CONTROL_RESERVED; | ||
412 | for(i=0; i<4; i++) | ||
413 | if ((0x5554 >> ((data >> (16 + 4*i)) & 0xf)) & 1) | ||
414 | break; | ||
415 | if (i == 4) { | ||
416 | child->thread.debugreg7 = data; | ||
417 | if (data) | ||
418 | set_tsk_thread_flag(child, TIF_DEBUG); | ||
419 | else | ||
420 | clear_tsk_thread_flag(child, TIF_DEBUG); | ||
421 | ret = 0; | ||
422 | } | ||
423 | break; | ||
424 | } | ||
425 | break; | ||
426 | } | ||
427 | case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ | ||
428 | case PTRACE_CONT: /* restart after signal. */ | ||
429 | |||
430 | ret = -EIO; | ||
431 | if (!valid_signal(data)) | ||
432 | break; | ||
433 | if (request == PTRACE_SYSCALL) | ||
434 | set_tsk_thread_flag(child,TIF_SYSCALL_TRACE); | ||
435 | else | ||
436 | clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE); | ||
437 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); | ||
438 | child->exit_code = data; | ||
439 | /* make sure the single step bit is not set. */ | ||
440 | clear_singlestep(child); | ||
441 | wake_up_process(child); | ||
442 | ret = 0; | ||
443 | break; | ||
444 | |||
445 | #ifdef CONFIG_IA32_EMULATION | ||
446 | /* This makes only sense with 32bit programs. Allow a | ||
447 | 64bit debugger to fully examine them too. Better | ||
448 | don't use it against 64bit processes, use | ||
449 | PTRACE_ARCH_PRCTL instead. */ | ||
450 | case PTRACE_SET_THREAD_AREA: { | ||
451 | struct user_desc __user *p; | ||
452 | int old; | ||
453 | p = (struct user_desc __user *)data; | ||
454 | get_user(old, &p->entry_number); | ||
455 | put_user(addr, &p->entry_number); | ||
456 | ret = do_set_thread_area(&child->thread, p); | ||
457 | put_user(old, &p->entry_number); | ||
458 | break; | ||
459 | case PTRACE_GET_THREAD_AREA: | ||
460 | p = (struct user_desc __user *)data; | ||
461 | get_user(old, &p->entry_number); | ||
462 | put_user(addr, &p->entry_number); | ||
463 | ret = do_get_thread_area(&child->thread, p); | ||
464 | put_user(old, &p->entry_number); | ||
465 | break; | ||
466 | } | ||
467 | #endif | ||
468 | /* normal 64bit interface to access TLS data. | ||
469 | Works just like arch_prctl, except that the arguments | ||
470 | are reversed. */ | ||
471 | case PTRACE_ARCH_PRCTL: | ||
472 | ret = do_arch_prctl(child, data, addr); | ||
473 | break; | ||
474 | |||
475 | /* | ||
476 | * make the child exit. Best I can do is send it a sigkill. | ||
477 | * perhaps it should be put in the status that it wants to | ||
478 | * exit. | ||
479 | */ | ||
480 | case PTRACE_KILL: | ||
481 | ret = 0; | ||
482 | if (child->exit_state == EXIT_ZOMBIE) /* already dead */ | ||
483 | break; | ||
484 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); | ||
485 | child->exit_code = SIGKILL; | ||
486 | /* make sure the single step bit is not set. */ | ||
487 | clear_singlestep(child); | ||
488 | wake_up_process(child); | ||
489 | break; | ||
490 | |||
491 | case PTRACE_SINGLESTEP: /* set the trap flag. */ | ||
492 | ret = -EIO; | ||
493 | if (!valid_signal(data)) | ||
494 | break; | ||
495 | clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE); | ||
496 | set_singlestep(child); | ||
497 | child->exit_code = data; | ||
498 | /* give it a chance to run. */ | ||
499 | wake_up_process(child); | ||
500 | ret = 0; | ||
501 | break; | ||
502 | |||
503 | case PTRACE_GETREGS: { /* Get all gp regs from the child. */ | ||
504 | if (!access_ok(VERIFY_WRITE, (unsigned __user *)data, | ||
505 | sizeof(struct user_regs_struct))) { | ||
506 | ret = -EIO; | ||
507 | break; | ||
508 | } | ||
509 | ret = 0; | ||
510 | for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) { | ||
511 | ret |= __put_user(getreg(child, ui),(unsigned long __user *) data); | ||
512 | data += sizeof(long); | ||
513 | } | ||
514 | break; | ||
515 | } | ||
516 | |||
517 | case PTRACE_SETREGS: { /* Set all gp regs in the child. */ | ||
518 | unsigned long tmp; | ||
519 | if (!access_ok(VERIFY_READ, (unsigned __user *)data, | ||
520 | sizeof(struct user_regs_struct))) { | ||
521 | ret = -EIO; | ||
522 | break; | ||
523 | } | ||
524 | ret = 0; | ||
525 | for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) { | ||
526 | ret = __get_user(tmp, (unsigned long __user *) data); | ||
527 | if (ret) | ||
528 | break; | ||
529 | ret = putreg(child, ui, tmp); | ||
530 | if (ret) | ||
531 | break; | ||
532 | data += sizeof(long); | ||
533 | } | ||
534 | break; | ||
535 | } | ||
536 | |||
537 | case PTRACE_GETFPREGS: { /* Get the child extended FPU state. */ | ||
538 | if (!access_ok(VERIFY_WRITE, (unsigned __user *)data, | ||
539 | sizeof(struct user_i387_struct))) { | ||
540 | ret = -EIO; | ||
541 | break; | ||
542 | } | ||
543 | ret = get_fpregs((struct user_i387_struct __user *)data, child); | ||
544 | break; | ||
545 | } | ||
546 | |||
547 | case PTRACE_SETFPREGS: { /* Set the child extended FPU state. */ | ||
548 | if (!access_ok(VERIFY_READ, (unsigned __user *)data, | ||
549 | sizeof(struct user_i387_struct))) { | ||
550 | ret = -EIO; | ||
551 | break; | ||
552 | } | ||
553 | set_stopped_child_used_math(child); | ||
554 | ret = set_fpregs(child, (struct user_i387_struct __user *)data); | ||
555 | break; | ||
556 | } | ||
557 | |||
558 | default: | ||
559 | ret = ptrace_request(child, request, addr, data); | ||
560 | break; | ||
561 | } | ||
562 | return ret; | ||
563 | } | ||
564 | |||
565 | static void syscall_trace(struct pt_regs *regs) | ||
566 | { | ||
567 | |||
568 | #if 0 | ||
569 | printk("trace %s rip %lx rsp %lx rax %d origrax %d caller %lx tiflags %x ptrace %x\n", | ||
570 | current->comm, | ||
571 | regs->rip, regs->rsp, regs->rax, regs->orig_rax, __builtin_return_address(0), | ||
572 | current_thread_info()->flags, current->ptrace); | ||
573 | #endif | ||
574 | |||
575 | ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) | ||
576 | ? 0x80 : 0)); | ||
577 | /* | ||
578 | * this isn't the same as continuing with a signal, but it will do | ||
579 | * for normal use. strace only continues with a signal if the | ||
580 | * stopping signal is not SIGTRAP. -brl | ||
581 | */ | ||
582 | if (current->exit_code) { | ||
583 | send_sig(current->exit_code, current, 1); | ||
584 | current->exit_code = 0; | ||
585 | } | ||
586 | } | ||
587 | |||
588 | asmlinkage void syscall_trace_enter(struct pt_regs *regs) | ||
589 | { | ||
590 | /* do the secure computing check first */ | ||
591 | secure_computing(regs->orig_rax); | ||
592 | |||
593 | if (test_thread_flag(TIF_SYSCALL_TRACE) | ||
594 | && (current->ptrace & PT_PTRACED)) | ||
595 | syscall_trace(regs); | ||
596 | |||
597 | if (unlikely(current->audit_context)) { | ||
598 | if (test_thread_flag(TIF_IA32)) { | ||
599 | audit_syscall_entry(AUDIT_ARCH_I386, | ||
600 | regs->orig_rax, | ||
601 | regs->rbx, regs->rcx, | ||
602 | regs->rdx, regs->rsi); | ||
603 | } else { | ||
604 | audit_syscall_entry(AUDIT_ARCH_X86_64, | ||
605 | regs->orig_rax, | ||
606 | regs->rdi, regs->rsi, | ||
607 | regs->rdx, regs->r10); | ||
608 | } | ||
609 | } | ||
610 | } | ||
611 | |||
612 | asmlinkage void syscall_trace_leave(struct pt_regs *regs) | ||
613 | { | ||
614 | if (unlikely(current->audit_context)) | ||
615 | audit_syscall_exit(AUDITSC_RESULT(regs->rax), regs->rax); | ||
616 | |||
617 | if ((test_thread_flag(TIF_SYSCALL_TRACE) | ||
618 | || test_thread_flag(TIF_SINGLESTEP)) | ||
619 | && (current->ptrace & PT_PTRACED)) | ||
620 | syscall_trace(regs); | ||
621 | } | ||
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index fab30e134836..6ba33ca8715a 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -30,8 +30,8 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) | |||
30 | raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); | 30 | raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); |
31 | 31 | ||
32 | if (!(word & (1 << 13))) { | 32 | if (!(word & (1 << 13))) { |
33 | printk(KERN_INFO "Intel E7520/7320/7525 detected. " | 33 | dev_info(&dev->dev, "Intel E7520/7320/7525 detected; " |
34 | "Disabling irq balancing and affinity\n"); | 34 | "disabling irq balancing and affinity\n"); |
35 | #ifdef CONFIG_IRQBALANCE | 35 | #ifdef CONFIG_IRQBALANCE |
36 | irqbalance_disable(""); | 36 | irqbalance_disable(""); |
37 | #endif | 37 | #endif |
@@ -104,14 +104,16 @@ static void ich_force_enable_hpet(struct pci_dev *dev) | |||
104 | pci_read_config_dword(dev, 0xF0, &rcba); | 104 | pci_read_config_dword(dev, 0xF0, &rcba); |
105 | rcba &= 0xFFFFC000; | 105 | rcba &= 0xFFFFC000; |
106 | if (rcba == 0) { | 106 | if (rcba == 0) { |
107 | printk(KERN_DEBUG "RCBA disabled. Cannot force enable HPET\n"); | 107 | dev_printk(KERN_DEBUG, &dev->dev, "RCBA disabled; " |
108 | "cannot force enable HPET\n"); | ||
108 | return; | 109 | return; |
109 | } | 110 | } |
110 | 111 | ||
111 | /* use bits 31:14, 16 kB aligned */ | 112 | /* use bits 31:14, 16 kB aligned */ |
112 | rcba_base = ioremap_nocache(rcba, 0x4000); | 113 | rcba_base = ioremap_nocache(rcba, 0x4000); |
113 | if (rcba_base == NULL) { | 114 | if (rcba_base == NULL) { |
114 | printk(KERN_DEBUG "ioremap failed. Cannot force enable HPET\n"); | 115 | dev_printk(KERN_DEBUG, &dev->dev, "ioremap failed; " |
116 | "cannot force enable HPET\n"); | ||
115 | return; | 117 | return; |
116 | } | 118 | } |
117 | 119 | ||
@@ -122,8 +124,8 @@ static void ich_force_enable_hpet(struct pci_dev *dev) | |||
122 | /* HPET is enabled in HPTC. Just not reported by BIOS */ | 124 | /* HPET is enabled in HPTC. Just not reported by BIOS */ |
123 | val = val & 0x3; | 125 | val = val & 0x3; |
124 | force_hpet_address = 0xFED00000 | (val << 12); | 126 | force_hpet_address = 0xFED00000 | (val << 12); |
125 | printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", | 127 | dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at " |
126 | force_hpet_address); | 128 | "0x%lx\n", force_hpet_address); |
127 | iounmap(rcba_base); | 129 | iounmap(rcba_base); |
128 | return; | 130 | return; |
129 | } | 131 | } |
@@ -142,11 +144,12 @@ static void ich_force_enable_hpet(struct pci_dev *dev) | |||
142 | if (err) { | 144 | if (err) { |
143 | force_hpet_address = 0; | 145 | force_hpet_address = 0; |
144 | iounmap(rcba_base); | 146 | iounmap(rcba_base); |
145 | printk(KERN_DEBUG "Failed to force enable HPET\n"); | 147 | dev_printk(KERN_DEBUG, &dev->dev, |
148 | "Failed to force enable HPET\n"); | ||
146 | } else { | 149 | } else { |
147 | force_hpet_resume_type = ICH_FORCE_HPET_RESUME; | 150 | force_hpet_resume_type = ICH_FORCE_HPET_RESUME; |
148 | printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", | 151 | dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at " |
149 | force_hpet_address); | 152 | "0x%lx\n", force_hpet_address); |
150 | } | 153 | } |
151 | } | 154 | } |
152 | 155 | ||
@@ -162,6 +165,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31, | |||
162 | ich_force_enable_hpet); | 165 | ich_force_enable_hpet); |
163 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, | 166 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, |
164 | ich_force_enable_hpet); | 167 | ich_force_enable_hpet); |
168 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, | ||
169 | ich_force_enable_hpet); | ||
165 | 170 | ||
166 | 171 | ||
167 | static struct pci_dev *cached_dev; | 172 | static struct pci_dev *cached_dev; |
@@ -206,8 +211,8 @@ static void old_ich_force_enable_hpet(struct pci_dev *dev) | |||
206 | if (val & 0x4) { | 211 | if (val & 0x4) { |
207 | val &= 0x3; | 212 | val &= 0x3; |
208 | force_hpet_address = 0xFED00000 | (val << 12); | 213 | force_hpet_address = 0xFED00000 | (val << 12); |
209 | printk(KERN_DEBUG "HPET at base address 0x%lx\n", | 214 | dev_printk(KERN_DEBUG, &dev->dev, "HPET at 0x%lx\n", |
210 | force_hpet_address); | 215 | force_hpet_address); |
211 | return; | 216 | return; |
212 | } | 217 | } |
213 | 218 | ||
@@ -227,14 +232,14 @@ static void old_ich_force_enable_hpet(struct pci_dev *dev) | |||
227 | /* HPET is enabled in HPTC. Just not reported by BIOS */ | 232 | /* HPET is enabled in HPTC. Just not reported by BIOS */ |
228 | val &= 0x3; | 233 | val &= 0x3; |
229 | force_hpet_address = 0xFED00000 | (val << 12); | 234 | force_hpet_address = 0xFED00000 | (val << 12); |
230 | printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", | 235 | dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at " |
231 | force_hpet_address); | 236 | "0x%lx\n", force_hpet_address); |
232 | cached_dev = dev; | 237 | cached_dev = dev; |
233 | force_hpet_resume_type = OLD_ICH_FORCE_HPET_RESUME; | 238 | force_hpet_resume_type = OLD_ICH_FORCE_HPET_RESUME; |
234 | return; | 239 | return; |
235 | } | 240 | } |
236 | 241 | ||
237 | printk(KERN_DEBUG "Failed to force enable HPET\n"); | 242 | dev_printk(KERN_DEBUG, &dev->dev, "Failed to force enable HPET\n"); |
238 | } | 243 | } |
239 | 244 | ||
240 | /* | 245 | /* |
@@ -292,8 +297,8 @@ static void vt8237_force_enable_hpet(struct pci_dev *dev) | |||
292 | */ | 297 | */ |
293 | if (val & 0x80) { | 298 | if (val & 0x80) { |
294 | force_hpet_address = (val & ~0x3ff); | 299 | force_hpet_address = (val & ~0x3ff); |
295 | printk(KERN_DEBUG "HPET at base address 0x%lx\n", | 300 | dev_printk(KERN_DEBUG, &dev->dev, "HPET at 0x%lx\n", |
296 | force_hpet_address); | 301 | force_hpet_address); |
297 | return; | 302 | return; |
298 | } | 303 | } |
299 | 304 | ||
@@ -307,14 +312,14 @@ static void vt8237_force_enable_hpet(struct pci_dev *dev) | |||
307 | pci_read_config_dword(dev, 0x68, &val); | 312 | pci_read_config_dword(dev, 0x68, &val); |
308 | if (val & 0x80) { | 313 | if (val & 0x80) { |
309 | force_hpet_address = (val & ~0x3ff); | 314 | force_hpet_address = (val & ~0x3ff); |
310 | printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", | 315 | dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at " |
311 | force_hpet_address); | 316 | "0x%lx\n", force_hpet_address); |
312 | cached_dev = dev; | 317 | cached_dev = dev; |
313 | force_hpet_resume_type = VT8237_FORCE_HPET_RESUME; | 318 | force_hpet_resume_type = VT8237_FORCE_HPET_RESUME; |
314 | return; | 319 | return; |
315 | } | 320 | } |
316 | 321 | ||
317 | printk(KERN_DEBUG "Failed to force enable HPET\n"); | 322 | dev_printk(KERN_DEBUG, &dev->dev, "Failed to force enable HPET\n"); |
318 | } | 323 | } |
319 | 324 | ||
320 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, | 325 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, |
@@ -342,7 +347,7 @@ static void nvidia_force_enable_hpet(struct pci_dev *dev) | |||
342 | pci_read_config_dword(dev, 0x44, &val); | 347 | pci_read_config_dword(dev, 0x44, &val); |
343 | force_hpet_address = val & 0xfffffffe; | 348 | force_hpet_address = val & 0xfffffffe; |
344 | force_hpet_resume_type = NVIDIA_FORCE_HPET_RESUME; | 349 | force_hpet_resume_type = NVIDIA_FORCE_HPET_RESUME; |
345 | printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", | 350 | dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n", |
346 | force_hpet_address); | 351 | force_hpet_address); |
347 | cached_dev = dev; | 352 | cached_dev = dev; |
348 | return; | 353 | return; |
@@ -375,19 +380,19 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0367, | |||
375 | void force_hpet_resume(void) | 380 | void force_hpet_resume(void) |
376 | { | 381 | { |
377 | switch (force_hpet_resume_type) { | 382 | switch (force_hpet_resume_type) { |
378 | case ICH_FORCE_HPET_RESUME: | 383 | case ICH_FORCE_HPET_RESUME: |
379 | return ich_force_hpet_resume(); | 384 | ich_force_hpet_resume(); |
380 | 385 | return; | |
381 | case OLD_ICH_FORCE_HPET_RESUME: | 386 | case OLD_ICH_FORCE_HPET_RESUME: |
382 | return old_ich_force_hpet_resume(); | 387 | old_ich_force_hpet_resume(); |
383 | 388 | return; | |
384 | case VT8237_FORCE_HPET_RESUME: | 389 | case VT8237_FORCE_HPET_RESUME: |
385 | return vt8237_force_hpet_resume(); | 390 | vt8237_force_hpet_resume(); |
386 | 391 | return; | |
387 | case NVIDIA_FORCE_HPET_RESUME: | 392 | case NVIDIA_FORCE_HPET_RESUME: |
388 | return nvidia_force_hpet_resume(); | 393 | nvidia_force_hpet_resume(); |
389 | 394 | return; | |
390 | default: | 395 | default: |
391 | break; | 396 | break; |
392 | } | 397 | } |
393 | } | 398 | } |
diff --git a/arch/x86/kernel/reboot_32.c b/arch/x86/kernel/reboot.c index bb1a0f889c5e..5818dc28167d 100644 --- a/arch/x86/kernel/reboot_32.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -1,64 +1,94 @@ | |||
1 | #include <linux/mm.h> | ||
2 | #include <linux/module.h> | 1 | #include <linux/module.h> |
3 | #include <linux/delay.h> | ||
4 | #include <linux/init.h> | 2 | #include <linux/init.h> |
5 | #include <linux/interrupt.h> | ||
6 | #include <linux/mc146818rtc.h> | ||
7 | #include <linux/efi.h> | ||
8 | #include <linux/dmi.h> | ||
9 | #include <linux/ctype.h> | ||
10 | #include <linux/pm.h> | ||
11 | #include <linux/reboot.h> | 3 | #include <linux/reboot.h> |
12 | #include <asm/uaccess.h> | 4 | #include <linux/init.h> |
5 | #include <linux/pm.h> | ||
6 | #include <linux/efi.h> | ||
7 | #include <acpi/reboot.h> | ||
8 | #include <asm/io.h> | ||
13 | #include <asm/apic.h> | 9 | #include <asm/apic.h> |
14 | #include <asm/hpet.h> | ||
15 | #include <asm/desc.h> | 10 | #include <asm/desc.h> |
16 | #include "mach_reboot.h" | 11 | #include <asm/hpet.h> |
17 | #include <asm/reboot_fixups.h> | 12 | #include <asm/reboot_fixups.h> |
18 | #include <asm/reboot.h> | 13 | #include <asm/reboot.h> |
19 | 14 | ||
15 | #ifdef CONFIG_X86_32 | ||
16 | # include <linux/dmi.h> | ||
17 | # include <linux/ctype.h> | ||
18 | # include <linux/mc146818rtc.h> | ||
19 | # include <asm/pgtable.h> | ||
20 | #else | ||
21 | # include <asm/iommu.h> | ||
22 | #endif | ||
23 | |||
20 | /* | 24 | /* |
21 | * Power off function, if any | 25 | * Power off function, if any |
22 | */ | 26 | */ |
23 | void (*pm_power_off)(void); | 27 | void (*pm_power_off)(void); |
24 | EXPORT_SYMBOL(pm_power_off); | 28 | EXPORT_SYMBOL(pm_power_off); |
25 | 29 | ||
30 | static long no_idt[3]; | ||
26 | static int reboot_mode; | 31 | static int reboot_mode; |
27 | static int reboot_thru_bios; | 32 | enum reboot_type reboot_type = BOOT_KBD; |
33 | int reboot_force; | ||
28 | 34 | ||
29 | #ifdef CONFIG_SMP | 35 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) |
30 | static int reboot_cpu = -1; | 36 | static int reboot_cpu = -1; |
31 | #endif | 37 | #endif |
38 | |||
39 | /* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | ||
40 | warm Don't set the cold reboot flag | ||
41 | cold Set the cold reboot flag | ||
42 | bios Reboot by jumping through the BIOS (only for X86_32) | ||
43 | smp Reboot by executing reset on BSP or other CPU (only for X86_32) | ||
44 | triple Force a triple fault (init) | ||
45 | kbd Use the keyboard controller. cold reset (default) | ||
46 | acpi Use the RESET_REG in the FADT | ||
47 | efi Use efi reset_system runtime service | ||
48 | force Avoid anything that could hang. | ||
49 | */ | ||
32 | static int __init reboot_setup(char *str) | 50 | static int __init reboot_setup(char *str) |
33 | { | 51 | { |
34 | while(1) { | 52 | for (;;) { |
35 | switch (*str) { | 53 | switch (*str) { |
36 | case 'w': /* "warm" reboot (no memory testing etc) */ | 54 | case 'w': |
37 | reboot_mode = 0x1234; | 55 | reboot_mode = 0x1234; |
38 | break; | 56 | break; |
39 | case 'c': /* "cold" reboot (with memory testing etc) */ | 57 | |
40 | reboot_mode = 0x0; | 58 | case 'c': |
41 | break; | 59 | reboot_mode = 0; |
42 | case 'b': /* "bios" reboot by jumping through the BIOS */ | ||
43 | reboot_thru_bios = 1; | ||
44 | break; | ||
45 | case 'h': /* "hard" reboot by toggling RESET and/or crashing the CPU */ | ||
46 | reboot_thru_bios = 0; | ||
47 | break; | 60 | break; |
61 | |||
62 | #ifdef CONFIG_X86_32 | ||
48 | #ifdef CONFIG_SMP | 63 | #ifdef CONFIG_SMP |
49 | case 's': /* "smp" reboot by executing reset on BSP or other CPU*/ | 64 | case 's': |
50 | if (isdigit(*(str+1))) { | 65 | if (isdigit(*(str+1))) { |
51 | reboot_cpu = (int) (*(str+1) - '0'); | 66 | reboot_cpu = (int) (*(str+1) - '0'); |
52 | if (isdigit(*(str+2))) | 67 | if (isdigit(*(str+2))) |
53 | reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0'); | 68 | reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0'); |
54 | } | 69 | } |
55 | /* we will leave sorting out the final value | 70 | /* we will leave sorting out the final value |
56 | when we are ready to reboot, since we might not | 71 | when we are ready to reboot, since we might not |
57 | have set up boot_cpu_id or smp_num_cpu */ | 72 | have set up boot_cpu_id or smp_num_cpu */ |
58 | break; | 73 | break; |
74 | #endif /* CONFIG_SMP */ | ||
75 | |||
76 | case 'b': | ||
59 | #endif | 77 | #endif |
78 | case 'a': | ||
79 | case 'k': | ||
80 | case 't': | ||
81 | case 'e': | ||
82 | reboot_type = *str; | ||
83 | break; | ||
84 | |||
85 | case 'f': | ||
86 | reboot_force = 1; | ||
87 | break; | ||
60 | } | 88 | } |
61 | if((str = strchr(str,',')) != NULL) | 89 | |
90 | str = strchr(str, ','); | ||
91 | if (str) | ||
62 | str++; | 92 | str++; |
63 | else | 93 | else |
64 | break; | 94 | break; |
@@ -68,18 +98,21 @@ static int __init reboot_setup(char *str) | |||
68 | 98 | ||
69 | __setup("reboot=", reboot_setup); | 99 | __setup("reboot=", reboot_setup); |
70 | 100 | ||
101 | |||
102 | #ifdef CONFIG_X86_32 | ||
71 | /* | 103 | /* |
72 | * Reboot options and system auto-detection code provided by | 104 | * Reboot options and system auto-detection code provided by |
73 | * Dell Inc. so their systems "just work". :-) | 105 | * Dell Inc. so their systems "just work". :-) |
74 | */ | 106 | */ |
75 | 107 | ||
76 | /* | 108 | /* |
77 | * Some machines require the "reboot=b" commandline option, this quirk makes that automatic. | 109 | * Some machines require the "reboot=b" commandline option, |
110 | * this quirk makes that automatic. | ||
78 | */ | 111 | */ |
79 | static int __init set_bios_reboot(const struct dmi_system_id *d) | 112 | static int __init set_bios_reboot(const struct dmi_system_id *d) |
80 | { | 113 | { |
81 | if (!reboot_thru_bios) { | 114 | if (reboot_type != BOOT_BIOS) { |
82 | reboot_thru_bios = 1; | 115 | reboot_type = BOOT_BIOS; |
83 | printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident); | 116 | printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident); |
84 | } | 117 | } |
85 | return 0; | 118 | return 0; |
@@ -143,7 +176,6 @@ static int __init reboot_init(void) | |||
143 | dmi_check_system(reboot_dmi_table); | 176 | dmi_check_system(reboot_dmi_table); |
144 | return 0; | 177 | return 0; |
145 | } | 178 | } |
146 | |||
147 | core_initcall(reboot_init); | 179 | core_initcall(reboot_init); |
148 | 180 | ||
149 | /* The following code and data reboots the machine by switching to real | 181 | /* The following code and data reboots the machine by switching to real |
@@ -152,7 +184,6 @@ core_initcall(reboot_init); | |||
152 | controller to pulse the CPU reset line, which is more thorough, but | 184 | controller to pulse the CPU reset line, which is more thorough, but |
153 | doesn't work with at least one type of 486 motherboard. It is easy | 185 | doesn't work with at least one type of 486 motherboard. It is easy |
154 | to stop this code working; hence the copious comments. */ | 186 | to stop this code working; hence the copious comments. */ |
155 | |||
156 | static unsigned long long | 187 | static unsigned long long |
157 | real_mode_gdt_entries [3] = | 188 | real_mode_gdt_entries [3] = |
158 | { | 189 | { |
@@ -161,11 +192,9 @@ real_mode_gdt_entries [3] = | |||
161 | 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ | 192 | 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ |
162 | }; | 193 | }; |
163 | 194 | ||
164 | static struct Xgt_desc_struct | 195 | static struct desc_ptr |
165 | real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries }, | 196 | real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries }, |
166 | real_mode_idt = { 0x3ff, 0 }, | 197 | real_mode_idt = { 0x3ff, 0 }; |
167 | no_idt = { 0, 0 }; | ||
168 | |||
169 | 198 | ||
170 | /* This is 16-bit protected mode code to disable paging and the cache, | 199 | /* This is 16-bit protected mode code to disable paging and the cache, |
171 | switch to real mode and jump to the BIOS reset code. | 200 | switch to real mode and jump to the BIOS reset code. |
@@ -185,7 +214,6 @@ no_idt = { 0, 0 }; | |||
185 | 214 | ||
186 | More could be done here to set up the registers as if a CPU reset had | 215 | More could be done here to set up the registers as if a CPU reset had |
187 | occurred; hopefully real BIOSs don't assume much. */ | 216 | occurred; hopefully real BIOSs don't assume much. */ |
188 | |||
189 | static unsigned char real_mode_switch [] = | 217 | static unsigned char real_mode_switch [] = |
190 | { | 218 | { |
191 | 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ | 219 | 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ |
@@ -223,7 +251,6 @@ void machine_real_restart(unsigned char *code, int length) | |||
223 | `outb_p' is needed instead of just `outb'. Use it to be on the | 251 | `outb_p' is needed instead of just `outb'. Use it to be on the |
224 | safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.) | 252 | safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.) |
225 | */ | 253 | */ |
226 | |||
227 | spin_lock(&rtc_lock); | 254 | spin_lock(&rtc_lock); |
228 | CMOS_WRITE(0x00, 0x8f); | 255 | CMOS_WRITE(0x00, 0x8f); |
229 | spin_unlock(&rtc_lock); | 256 | spin_unlock(&rtc_lock); |
@@ -231,9 +258,8 @@ void machine_real_restart(unsigned char *code, int length) | |||
231 | /* Remap the kernel at virtual address zero, as well as offset zero | 258 | /* Remap the kernel at virtual address zero, as well as offset zero |
232 | from the kernel segment. This assumes the kernel segment starts at | 259 | from the kernel segment. This assumes the kernel segment starts at |
233 | virtual address PAGE_OFFSET. */ | 260 | virtual address PAGE_OFFSET. */ |
234 | 261 | memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, | |
235 | memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, | 262 | sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS); |
236 | sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS); | ||
237 | 263 | ||
238 | /* | 264 | /* |
239 | * Use `swapper_pg_dir' as our page directory. | 265 | * Use `swapper_pg_dir' as our page directory. |
@@ -245,7 +271,6 @@ void machine_real_restart(unsigned char *code, int length) | |||
245 | boot)". This seems like a fairly standard thing that gets set by | 271 | boot)". This seems like a fairly standard thing that gets set by |
246 | REBOOT.COM programs, and the previous reset routine did this | 272 | REBOOT.COM programs, and the previous reset routine did this |
247 | too. */ | 273 | too. */ |
248 | |||
249 | *((unsigned short *)0x472) = reboot_mode; | 274 | *((unsigned short *)0x472) = reboot_mode; |
250 | 275 | ||
251 | /* For the switch to real mode, copy some code to low memory. It has | 276 | /* For the switch to real mode, copy some code to low memory. It has |
@@ -253,19 +278,16 @@ void machine_real_restart(unsigned char *code, int length) | |||
253 | has to have the same physical and virtual address, because it turns | 278 | has to have the same physical and virtual address, because it turns |
254 | off paging. Copy it near the end of the first page, out of the way | 279 | off paging. Copy it near the end of the first page, out of the way |
255 | of BIOS variables. */ | 280 | of BIOS variables. */ |
256 | 281 | memcpy((void *)(0x1000 - sizeof(real_mode_switch) - 100), | |
257 | memcpy ((void *) (0x1000 - sizeof (real_mode_switch) - 100), | ||
258 | real_mode_switch, sizeof (real_mode_switch)); | 282 | real_mode_switch, sizeof (real_mode_switch)); |
259 | memcpy ((void *) (0x1000 - 100), code, length); | 283 | memcpy((void *)(0x1000 - 100), code, length); |
260 | 284 | ||
261 | /* Set up the IDT for real mode. */ | 285 | /* Set up the IDT for real mode. */ |
262 | |||
263 | load_idt(&real_mode_idt); | 286 | load_idt(&real_mode_idt); |
264 | 287 | ||
265 | /* Set up a GDT from which we can load segment descriptors for real | 288 | /* Set up a GDT from which we can load segment descriptors for real |
266 | mode. The GDT is not used in real mode; it is just needed here to | 289 | mode. The GDT is not used in real mode; it is just needed here to |
267 | prepare the descriptors. */ | 290 | prepare the descriptors. */ |
268 | |||
269 | load_gdt(&real_mode_gdt); | 291 | load_gdt(&real_mode_gdt); |
270 | 292 | ||
271 | /* Load the data segment registers, and thus the descriptors ready for | 293 | /* Load the data segment registers, and thus the descriptors ready for |
@@ -273,7 +295,6 @@ void machine_real_restart(unsigned char *code, int length) | |||
273 | selector value being loaded here. This is so that the segment | 295 | selector value being loaded here. This is so that the segment |
274 | registers don't have to be reloaded after switching to real mode: | 296 | registers don't have to be reloaded after switching to real mode: |
275 | the values are consistent for real mode operation already. */ | 297 | the values are consistent for real mode operation already. */ |
276 | |||
277 | __asm__ __volatile__ ("movl $0x0010,%%eax\n" | 298 | __asm__ __volatile__ ("movl $0x0010,%%eax\n" |
278 | "\tmovl %%eax,%%ds\n" | 299 | "\tmovl %%eax,%%ds\n" |
279 | "\tmovl %%eax,%%es\n" | 300 | "\tmovl %%eax,%%es\n" |
@@ -284,130 +305,147 @@ void machine_real_restart(unsigned char *code, int length) | |||
284 | /* Jump to the 16-bit code that we copied earlier. It disables paging | 305 | /* Jump to the 16-bit code that we copied earlier. It disables paging |
285 | and the cache, switches to real mode, and jumps to the BIOS reset | 306 | and the cache, switches to real mode, and jumps to the BIOS reset |
286 | entry point. */ | 307 | entry point. */ |
287 | |||
288 | __asm__ __volatile__ ("ljmp $0x0008,%0" | 308 | __asm__ __volatile__ ("ljmp $0x0008,%0" |
289 | : | 309 | : |
290 | : "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100))); | 310 | : "i" ((void *)(0x1000 - sizeof (real_mode_switch) - 100))); |
291 | } | 311 | } |
292 | #ifdef CONFIG_APM_MODULE | 312 | #ifdef CONFIG_APM_MODULE |
293 | EXPORT_SYMBOL(machine_real_restart); | 313 | EXPORT_SYMBOL(machine_real_restart); |
294 | #endif | 314 | #endif |
295 | 315 | ||
296 | static void native_machine_shutdown(void) | 316 | #endif /* CONFIG_X86_32 */ |
317 | |||
318 | static inline void kb_wait(void) | ||
319 | { | ||
320 | int i; | ||
321 | |||
322 | for (i = 0; i < 0x10000; i++) { | ||
323 | if ((inb(0x64) & 0x02) == 0) | ||
324 | break; | ||
325 | udelay(2); | ||
326 | } | ||
327 | } | ||
328 | |||
329 | void machine_emergency_restart(void) | ||
330 | { | ||
331 | int i; | ||
332 | |||
333 | /* Tell the BIOS if we want cold or warm reboot */ | ||
334 | *((unsigned short *)__va(0x472)) = reboot_mode; | ||
335 | |||
336 | for (;;) { | ||
337 | /* Could also try the reset bit in the Hammer NB */ | ||
338 | switch (reboot_type) { | ||
339 | case BOOT_KBD: | ||
340 | for (i = 0; i < 10; i++) { | ||
341 | kb_wait(); | ||
342 | udelay(50); | ||
343 | outb(0xfe, 0x64); /* pulse reset low */ | ||
344 | udelay(50); | ||
345 | } | ||
346 | |||
347 | case BOOT_TRIPLE: | ||
348 | load_idt((const struct desc_ptr *)&no_idt); | ||
349 | __asm__ __volatile__("int3"); | ||
350 | |||
351 | reboot_type = BOOT_KBD; | ||
352 | break; | ||
353 | |||
354 | #ifdef CONFIG_X86_32 | ||
355 | case BOOT_BIOS: | ||
356 | machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); | ||
357 | |||
358 | reboot_type = BOOT_KBD; | ||
359 | break; | ||
360 | #endif | ||
361 | |||
362 | case BOOT_ACPI: | ||
363 | acpi_reboot(); | ||
364 | reboot_type = BOOT_KBD; | ||
365 | break; | ||
366 | |||
367 | |||
368 | case BOOT_EFI: | ||
369 | if (efi_enabled) | ||
370 | efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD, | ||
371 | EFI_SUCCESS, 0, NULL); | ||
372 | |||
373 | reboot_type = BOOT_KBD; | ||
374 | break; | ||
375 | } | ||
376 | } | ||
377 | } | ||
378 | |||
379 | void machine_shutdown(void) | ||
297 | { | 380 | { |
381 | /* Stop the cpus and apics */ | ||
298 | #ifdef CONFIG_SMP | 382 | #ifdef CONFIG_SMP |
299 | int reboot_cpu_id; | 383 | int reboot_cpu_id; |
300 | 384 | ||
301 | /* The boot cpu is always logical cpu 0 */ | 385 | /* The boot cpu is always logical cpu 0 */ |
302 | reboot_cpu_id = 0; | 386 | reboot_cpu_id = 0; |
303 | 387 | ||
388 | #ifdef CONFIG_X86_32 | ||
304 | /* See if there has been given a command line override */ | 389 | /* See if there has been given a command line override */ |
305 | if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && | 390 | if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && |
306 | cpu_isset(reboot_cpu, cpu_online_map)) { | 391 | cpu_isset(reboot_cpu, cpu_online_map)) |
307 | reboot_cpu_id = reboot_cpu; | 392 | reboot_cpu_id = reboot_cpu; |
308 | } | 393 | #endif |
309 | 394 | ||
310 | /* Make certain the cpu I'm rebooting on is online */ | 395 | /* Make certain the cpu I'm about to reboot on is online */ |
311 | if (!cpu_isset(reboot_cpu_id, cpu_online_map)) { | 396 | if (!cpu_isset(reboot_cpu_id, cpu_online_map)) |
312 | reboot_cpu_id = smp_processor_id(); | 397 | reboot_cpu_id = smp_processor_id(); |
313 | } | ||
314 | 398 | ||
315 | /* Make certain I only run on the appropriate processor */ | 399 | /* Make certain I only run on the appropriate processor */ |
316 | set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id)); | 400 | set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id)); |
317 | 401 | ||
318 | /* O.K. Now that I'm on the appropriate processor, stop | 402 | /* O.K Now that I'm on the appropriate processor, |
319 | * all of the others, and disable their local APICs. | 403 | * stop all of the others. |
320 | */ | 404 | */ |
321 | |||
322 | smp_send_stop(); | 405 | smp_send_stop(); |
323 | #endif /* CONFIG_SMP */ | 406 | #endif |
324 | 407 | ||
325 | lapic_shutdown(); | 408 | lapic_shutdown(); |
326 | 409 | ||
327 | #ifdef CONFIG_X86_IO_APIC | 410 | #ifdef CONFIG_X86_IO_APIC |
328 | disable_IO_APIC(); | 411 | disable_IO_APIC(); |
329 | #endif | 412 | #endif |
413 | |||
330 | #ifdef CONFIG_HPET_TIMER | 414 | #ifdef CONFIG_HPET_TIMER |
331 | hpet_disable(); | 415 | hpet_disable(); |
332 | #endif | 416 | #endif |
333 | } | ||
334 | 417 | ||
335 | void __attribute__((weak)) mach_reboot_fixups(void) | 418 | #ifdef CONFIG_X86_64 |
336 | { | 419 | pci_iommu_shutdown(); |
420 | #endif | ||
337 | } | 421 | } |
338 | 422 | ||
339 | static void native_machine_emergency_restart(void) | 423 | void machine_restart(char *__unused) |
340 | { | 424 | { |
341 | if (!reboot_thru_bios) { | 425 | printk("machine restart\n"); |
342 | if (efi_enabled) { | ||
343 | efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL); | ||
344 | load_idt(&no_idt); | ||
345 | __asm__ __volatile__("int3"); | ||
346 | } | ||
347 | /* rebooting needs to touch the page at absolute addr 0 */ | ||
348 | *((unsigned short *)__va(0x472)) = reboot_mode; | ||
349 | for (;;) { | ||
350 | mach_reboot_fixups(); /* for board specific fixups */ | ||
351 | mach_reboot(); | ||
352 | /* That didn't work - force a triple fault.. */ | ||
353 | load_idt(&no_idt); | ||
354 | __asm__ __volatile__("int3"); | ||
355 | } | ||
356 | } | ||
357 | if (efi_enabled) | ||
358 | efi.reset_system(EFI_RESET_WARM, EFI_SUCCESS, 0, NULL); | ||
359 | 426 | ||
360 | machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); | 427 | if (!reboot_force) |
361 | } | 428 | machine_shutdown(); |
362 | |||
363 | static void native_machine_restart(char * __unused) | ||
364 | { | ||
365 | machine_shutdown(); | ||
366 | machine_emergency_restart(); | 429 | machine_emergency_restart(); |
367 | } | 430 | } |
368 | 431 | ||
369 | static void native_machine_halt(void) | 432 | void machine_halt(void) |
370 | { | 433 | { |
371 | } | 434 | } |
372 | 435 | ||
373 | static void native_machine_power_off(void) | 436 | void machine_power_off(void) |
374 | { | 437 | { |
375 | if (pm_power_off) { | 438 | if (pm_power_off) { |
376 | machine_shutdown(); | 439 | if (!reboot_force) |
440 | machine_shutdown(); | ||
377 | pm_power_off(); | 441 | pm_power_off(); |
378 | } | 442 | } |
379 | } | 443 | } |
380 | 444 | ||
381 | |||
382 | struct machine_ops machine_ops = { | 445 | struct machine_ops machine_ops = { |
383 | .power_off = native_machine_power_off, | 446 | .power_off = machine_power_off, |
384 | .shutdown = native_machine_shutdown, | 447 | .shutdown = machine_shutdown, |
385 | .emergency_restart = native_machine_emergency_restart, | 448 | .emergency_restart = machine_emergency_restart, |
386 | .restart = native_machine_restart, | 449 | .restart = machine_restart, |
387 | .halt = native_machine_halt, | 450 | .halt = machine_halt |
388 | }; | 451 | }; |
389 | |||
390 | void machine_power_off(void) | ||
391 | { | ||
392 | machine_ops.power_off(); | ||
393 | } | ||
394 | |||
395 | void machine_shutdown(void) | ||
396 | { | ||
397 | machine_ops.shutdown(); | ||
398 | } | ||
399 | |||
400 | void machine_emergency_restart(void) | ||
401 | { | ||
402 | machine_ops.emergency_restart(); | ||
403 | } | ||
404 | |||
405 | void machine_restart(char *cmd) | ||
406 | { | ||
407 | machine_ops.restart(cmd); | ||
408 | } | ||
409 | |||
410 | void machine_halt(void) | ||
411 | { | ||
412 | machine_ops.halt(); | ||
413 | } | ||
diff --git a/arch/x86/kernel/reboot_64.c b/arch/x86/kernel/reboot_64.c deleted file mode 100644 index 53620a92a8fd..000000000000 --- a/arch/x86/kernel/reboot_64.c +++ /dev/null | |||
@@ -1,176 +0,0 @@ | |||
1 | /* Various gunk just to reboot the machine. */ | ||
2 | #include <linux/module.h> | ||
3 | #include <linux/reboot.h> | ||
4 | #include <linux/init.h> | ||
5 | #include <linux/smp.h> | ||
6 | #include <linux/kernel.h> | ||
7 | #include <linux/ctype.h> | ||
8 | #include <linux/string.h> | ||
9 | #include <linux/pm.h> | ||
10 | #include <linux/kdebug.h> | ||
11 | #include <linux/sched.h> | ||
12 | #include <asm/io.h> | ||
13 | #include <asm/delay.h> | ||
14 | #include <asm/desc.h> | ||
15 | #include <asm/hw_irq.h> | ||
16 | #include <asm/system.h> | ||
17 | #include <asm/pgtable.h> | ||
18 | #include <asm/tlbflush.h> | ||
19 | #include <asm/apic.h> | ||
20 | #include <asm/hpet.h> | ||
21 | #include <asm/gart.h> | ||
22 | |||
23 | /* | ||
24 | * Power off function, if any | ||
25 | */ | ||
26 | void (*pm_power_off)(void); | ||
27 | EXPORT_SYMBOL(pm_power_off); | ||
28 | |||
29 | static long no_idt[3]; | ||
30 | static enum { | ||
31 | BOOT_TRIPLE = 't', | ||
32 | BOOT_KBD = 'k' | ||
33 | } reboot_type = BOOT_KBD; | ||
34 | static int reboot_mode = 0; | ||
35 | int reboot_force; | ||
36 | |||
37 | /* reboot=t[riple] | k[bd] [, [w]arm | [c]old] | ||
38 | warm Don't set the cold reboot flag | ||
39 | cold Set the cold reboot flag | ||
40 | triple Force a triple fault (init) | ||
41 | kbd Use the keyboard controller. cold reset (default) | ||
42 | force Avoid anything that could hang. | ||
43 | */ | ||
44 | static int __init reboot_setup(char *str) | ||
45 | { | ||
46 | for (;;) { | ||
47 | switch (*str) { | ||
48 | case 'w': | ||
49 | reboot_mode = 0x1234; | ||
50 | break; | ||
51 | |||
52 | case 'c': | ||
53 | reboot_mode = 0; | ||
54 | break; | ||
55 | |||
56 | case 't': | ||
57 | case 'b': | ||
58 | case 'k': | ||
59 | reboot_type = *str; | ||
60 | break; | ||
61 | case 'f': | ||
62 | reboot_force = 1; | ||
63 | break; | ||
64 | } | ||
65 | if((str = strchr(str,',')) != NULL) | ||
66 | str++; | ||
67 | else | ||
68 | break; | ||
69 | } | ||
70 | return 1; | ||
71 | } | ||
72 | |||
73 | __setup("reboot=", reboot_setup); | ||
74 | |||
75 | static inline void kb_wait(void) | ||
76 | { | ||
77 | int i; | ||
78 | |||
79 | for (i=0; i<0x10000; i++) | ||
80 | if ((inb_p(0x64) & 0x02) == 0) | ||
81 | break; | ||
82 | } | ||
83 | |||
84 | void machine_shutdown(void) | ||
85 | { | ||
86 | unsigned long flags; | ||
87 | |||
88 | /* Stop the cpus and apics */ | ||
89 | #ifdef CONFIG_SMP | ||
90 | int reboot_cpu_id; | ||
91 | |||
92 | /* The boot cpu is always logical cpu 0 */ | ||
93 | reboot_cpu_id = 0; | ||
94 | |||
95 | /* Make certain the cpu I'm about to reboot on is online */ | ||
96 | if (!cpu_isset(reboot_cpu_id, cpu_online_map)) { | ||
97 | reboot_cpu_id = smp_processor_id(); | ||
98 | } | ||
99 | |||
100 | /* Make certain I only run on the appropriate processor */ | ||
101 | set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id)); | ||
102 | |||
103 | /* O.K Now that I'm on the appropriate processor, | ||
104 | * stop all of the others. | ||
105 | */ | ||
106 | smp_send_stop(); | ||
107 | #endif | ||
108 | |||
109 | local_irq_save(flags); | ||
110 | |||
111 | #ifndef CONFIG_SMP | ||
112 | disable_local_APIC(); | ||
113 | #endif | ||
114 | |||
115 | disable_IO_APIC(); | ||
116 | |||
117 | #ifdef CONFIG_HPET_TIMER | ||
118 | hpet_disable(); | ||
119 | #endif | ||
120 | local_irq_restore(flags); | ||
121 | |||
122 | pci_iommu_shutdown(); | ||
123 | } | ||
124 | |||
125 | void machine_emergency_restart(void) | ||
126 | { | ||
127 | int i; | ||
128 | |||
129 | /* Tell the BIOS if we want cold or warm reboot */ | ||
130 | *((unsigned short *)__va(0x472)) = reboot_mode; | ||
131 | |||
132 | for (;;) { | ||
133 | /* Could also try the reset bit in the Hammer NB */ | ||
134 | switch (reboot_type) { | ||
135 | case BOOT_KBD: | ||
136 | for (i=0; i<10; i++) { | ||
137 | kb_wait(); | ||
138 | udelay(50); | ||
139 | outb(0xfe,0x64); /* pulse reset low */ | ||
140 | udelay(50); | ||
141 | } | ||
142 | |||
143 | case BOOT_TRIPLE: | ||
144 | load_idt((const struct desc_ptr *)&no_idt); | ||
145 | __asm__ __volatile__("int3"); | ||
146 | |||
147 | reboot_type = BOOT_KBD; | ||
148 | break; | ||
149 | } | ||
150 | } | ||
151 | } | ||
152 | |||
153 | void machine_restart(char * __unused) | ||
154 | { | ||
155 | printk("machine restart\n"); | ||
156 | |||
157 | if (!reboot_force) { | ||
158 | machine_shutdown(); | ||
159 | } | ||
160 | machine_emergency_restart(); | ||
161 | } | ||
162 | |||
163 | void machine_halt(void) | ||
164 | { | ||
165 | } | ||
166 | |||
167 | void machine_power_off(void) | ||
168 | { | ||
169 | if (pm_power_off) { | ||
170 | if (!reboot_force) { | ||
171 | machine_shutdown(); | ||
172 | } | ||
173 | pm_power_off(); | ||
174 | } | ||
175 | } | ||
176 | |||
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c index f452726c0fe2..dec0b5ec25c2 100644 --- a/arch/x86/kernel/reboot_fixups_32.c +++ b/arch/x86/kernel/reboot_fixups_32.c | |||
@@ -30,6 +30,19 @@ static void cs5536_warm_reset(struct pci_dev *dev) | |||
30 | udelay(50); /* shouldn't get here but be safe and spin a while */ | 30 | udelay(50); /* shouldn't get here but be safe and spin a while */ |
31 | } | 31 | } |
32 | 32 | ||
33 | static void rdc321x_reset(struct pci_dev *dev) | ||
34 | { | ||
35 | unsigned i; | ||
36 | /* Voluntary reset the watchdog timer */ | ||
37 | outl(0x80003840, 0xCF8); | ||
38 | /* Generate a CPU reset on next tick */ | ||
39 | i = inl(0xCFC); | ||
40 | /* Use the minimum timer resolution */ | ||
41 | i |= 0x1600; | ||
42 | outl(i, 0xCFC); | ||
43 | outb(1, 0x92); | ||
44 | } | ||
45 | |||
33 | struct device_fixup { | 46 | struct device_fixup { |
34 | unsigned int vendor; | 47 | unsigned int vendor; |
35 | unsigned int device; | 48 | unsigned int device; |
@@ -40,6 +53,7 @@ static struct device_fixup fixups_table[] = { | |||
40 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, | 53 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, |
41 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, | 54 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, |
42 | { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, | 55 | { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, |
56 | { PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset }, | ||
43 | }; | 57 | }; |
44 | 58 | ||
45 | /* | 59 | /* |
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c new file mode 100644 index 000000000000..eb9b1a198f5e --- /dev/null +++ b/arch/x86/kernel/rtc.c | |||
@@ -0,0 +1,204 @@ | |||
1 | /* | ||
2 | * RTC related functions | ||
3 | */ | ||
4 | #include <linux/acpi.h> | ||
5 | #include <linux/bcd.h> | ||
6 | #include <linux/mc146818rtc.h> | ||
7 | |||
8 | #include <asm/time.h> | ||
9 | #include <asm/vsyscall.h> | ||
10 | |||
11 | #ifdef CONFIG_X86_32 | ||
12 | # define CMOS_YEARS_OFFS 1900 | ||
13 | /* | ||
14 | * This is a special lock that is owned by the CPU and holds the index | ||
15 | * register we are working with. It is required for NMI access to the | ||
16 | * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details. | ||
17 | */ | ||
18 | volatile unsigned long cmos_lock = 0; | ||
19 | EXPORT_SYMBOL(cmos_lock); | ||
20 | #else | ||
21 | /* | ||
22 | * x86-64 systems only exists since 2002. | ||
23 | * This will work up to Dec 31, 2100 | ||
24 | */ | ||
25 | # define CMOS_YEARS_OFFS 2000 | ||
26 | #endif | ||
27 | |||
28 | DEFINE_SPINLOCK(rtc_lock); | ||
29 | EXPORT_SYMBOL(rtc_lock); | ||
30 | |||
31 | /* | ||
32 | * In order to set the CMOS clock precisely, set_rtc_mmss has to be | ||
33 | * called 500 ms after the second nowtime has started, because when | ||
34 | * nowtime is written into the registers of the CMOS clock, it will | ||
35 | * jump to the next second precisely 500 ms later. Check the Motorola | ||
36 | * MC146818A or Dallas DS12887 data sheet for details. | ||
37 | * | ||
38 | * BUG: This routine does not handle hour overflow properly; it just | ||
39 | * sets the minutes. Usually you'll only notice that after reboot! | ||
40 | */ | ||
41 | int mach_set_rtc_mmss(unsigned long nowtime) | ||
42 | { | ||
43 | int retval = 0; | ||
44 | int real_seconds, real_minutes, cmos_minutes; | ||
45 | unsigned char save_control, save_freq_select; | ||
46 | |||
47 | /* tell the clock it's being set */ | ||
48 | save_control = CMOS_READ(RTC_CONTROL); | ||
49 | CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); | ||
50 | |||
51 | /* stop and reset prescaler */ | ||
52 | save_freq_select = CMOS_READ(RTC_FREQ_SELECT); | ||
53 | CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); | ||
54 | |||
55 | cmos_minutes = CMOS_READ(RTC_MINUTES); | ||
56 | if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) | ||
57 | BCD_TO_BIN(cmos_minutes); | ||
58 | |||
59 | /* | ||
60 | * since we're only adjusting minutes and seconds, | ||
61 | * don't interfere with hour overflow. This avoids | ||
62 | * messing with unknown time zones but requires your | ||
63 | * RTC not to be off by more than 15 minutes | ||
64 | */ | ||
65 | real_seconds = nowtime % 60; | ||
66 | real_minutes = nowtime / 60; | ||
67 | /* correct for half hour time zone */ | ||
68 | if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1) | ||
69 | real_minutes += 30; | ||
70 | real_minutes %= 60; | ||
71 | |||
72 | if (abs(real_minutes - cmos_minutes) < 30) { | ||
73 | if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { | ||
74 | BIN_TO_BCD(real_seconds); | ||
75 | BIN_TO_BCD(real_minutes); | ||
76 | } | ||
77 | CMOS_WRITE(real_seconds,RTC_SECONDS); | ||
78 | CMOS_WRITE(real_minutes,RTC_MINUTES); | ||
79 | } else { | ||
80 | printk(KERN_WARNING | ||
81 | "set_rtc_mmss: can't update from %d to %d\n", | ||
82 | cmos_minutes, real_minutes); | ||
83 | retval = -1; | ||
84 | } | ||
85 | |||
86 | /* The following flags have to be released exactly in this order, | ||
87 | * otherwise the DS12887 (popular MC146818A clone with integrated | ||
88 | * battery and quartz) will not reset the oscillator and will not | ||
89 | * update precisely 500 ms later. You won't find this mentioned in | ||
90 | * the Dallas Semiconductor data sheets, but who believes data | ||
91 | * sheets anyway ... -- Markus Kuhn | ||
92 | */ | ||
93 | CMOS_WRITE(save_control, RTC_CONTROL); | ||
94 | CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); | ||
95 | |||
96 | return retval; | ||
97 | } | ||
98 | |||
99 | unsigned long mach_get_cmos_time(void) | ||
100 | { | ||
101 | unsigned int year, mon, day, hour, min, sec, century = 0; | ||
102 | |||
103 | /* | ||
104 | * If UIP is clear, then we have >= 244 microseconds before | ||
105 | * RTC registers will be updated. Spec sheet says that this | ||
106 | * is the reliable way to read RTC - registers. If UIP is set | ||
107 | * then the register access might be invalid. | ||
108 | */ | ||
109 | while ((CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)) | ||
110 | cpu_relax(); | ||
111 | |||
112 | sec = CMOS_READ(RTC_SECONDS); | ||
113 | min = CMOS_READ(RTC_MINUTES); | ||
114 | hour = CMOS_READ(RTC_HOURS); | ||
115 | day = CMOS_READ(RTC_DAY_OF_MONTH); | ||
116 | mon = CMOS_READ(RTC_MONTH); | ||
117 | year = CMOS_READ(RTC_YEAR); | ||
118 | |||
119 | #if defined(CONFIG_ACPI) && defined(CONFIG_X86_64) | ||
120 | /* CHECKME: Is this really 64bit only ??? */ | ||
121 | if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && | ||
122 | acpi_gbl_FADT.century) | ||
123 | century = CMOS_READ(acpi_gbl_FADT.century); | ||
124 | #endif | ||
125 | |||
126 | if (RTC_ALWAYS_BCD || !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)) { | ||
127 | BCD_TO_BIN(sec); | ||
128 | BCD_TO_BIN(min); | ||
129 | BCD_TO_BIN(hour); | ||
130 | BCD_TO_BIN(day); | ||
131 | BCD_TO_BIN(mon); | ||
132 | BCD_TO_BIN(year); | ||
133 | } | ||
134 | |||
135 | if (century) { | ||
136 | BCD_TO_BIN(century); | ||
137 | year += century * 100; | ||
138 | printk(KERN_INFO "Extended CMOS year: %d\n", century * 100); | ||
139 | } else { | ||
140 | year += CMOS_YEARS_OFFS; | ||
141 | if (year < 1970) | ||
142 | year += 100; | ||
143 | } | ||
144 | |||
145 | return mktime(year, mon, day, hour, min, sec); | ||
146 | } | ||
147 | |||
148 | /* Routines for accessing the CMOS RAM/RTC. */ | ||
149 | unsigned char rtc_cmos_read(unsigned char addr) | ||
150 | { | ||
151 | unsigned char val; | ||
152 | |||
153 | lock_cmos_prefix(addr); | ||
154 | outb_p(addr, RTC_PORT(0)); | ||
155 | val = inb_p(RTC_PORT(1)); | ||
156 | lock_cmos_suffix(addr); | ||
157 | return val; | ||
158 | } | ||
159 | EXPORT_SYMBOL(rtc_cmos_read); | ||
160 | |||
161 | void rtc_cmos_write(unsigned char val, unsigned char addr) | ||
162 | { | ||
163 | lock_cmos_prefix(addr); | ||
164 | outb_p(addr, RTC_PORT(0)); | ||
165 | outb_p(val, RTC_PORT(1)); | ||
166 | lock_cmos_suffix(addr); | ||
167 | } | ||
168 | EXPORT_SYMBOL(rtc_cmos_write); | ||
169 | |||
170 | static int set_rtc_mmss(unsigned long nowtime) | ||
171 | { | ||
172 | int retval; | ||
173 | unsigned long flags; | ||
174 | |||
175 | spin_lock_irqsave(&rtc_lock, flags); | ||
176 | retval = set_wallclock(nowtime); | ||
177 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
178 | |||
179 | return retval; | ||
180 | } | ||
181 | |||
182 | /* not static: needed by APM */ | ||
183 | unsigned long read_persistent_clock(void) | ||
184 | { | ||
185 | unsigned long retval, flags; | ||
186 | |||
187 | spin_lock_irqsave(&rtc_lock, flags); | ||
188 | retval = get_wallclock(); | ||
189 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
190 | |||
191 | return retval; | ||
192 | } | ||
193 | |||
194 | int update_persistent_clock(struct timespec now) | ||
195 | { | ||
196 | return set_rtc_mmss(now.tv_sec); | ||
197 | } | ||
198 | |||
199 | unsigned long long native_read_tsc(void) | ||
200 | { | ||
201 | return __native_read_tsc(); | ||
202 | } | ||
203 | EXPORT_SYMBOL(native_read_tsc); | ||
204 | |||
diff --git a/arch/x86/kernel/scx200_32.c b/arch/x86/kernel/scx200_32.c index 87bc159d29df..7e004acbe526 100644 --- a/arch/x86/kernel/scx200_32.c +++ b/arch/x86/kernel/scx200_32.c | |||
@@ -65,7 +65,7 @@ static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_ | |||
65 | base = pci_resource_start(pdev, 0); | 65 | base = pci_resource_start(pdev, 0); |
66 | printk(KERN_INFO NAME ": GPIO base 0x%x\n", base); | 66 | printk(KERN_INFO NAME ": GPIO base 0x%x\n", base); |
67 | 67 | ||
68 | if (request_region(base, SCx200_GPIO_SIZE, "NatSemi SCx200 GPIO") == 0) { | 68 | if (!request_region(base, SCx200_GPIO_SIZE, "NatSemi SCx200 GPIO")) { |
69 | printk(KERN_ERR NAME ": can't allocate I/O for GPIOs\n"); | 69 | printk(KERN_ERR NAME ": can't allocate I/O for GPIOs\n"); |
70 | return -EBUSY; | 70 | return -EBUSY; |
71 | } | 71 | } |
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index 3558ac78c926..309366f8f603 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c | |||
@@ -24,7 +24,11 @@ | |||
24 | #include <asm/sections.h> | 24 | #include <asm/sections.h> |
25 | #include <asm/setup.h> | 25 | #include <asm/setup.h> |
26 | 26 | ||
27 | #ifndef CONFIG_DEBUG_BOOT_PARAMS | ||
27 | struct boot_params __initdata boot_params; | 28 | struct boot_params __initdata boot_params; |
29 | #else | ||
30 | struct boot_params boot_params; | ||
31 | #endif | ||
28 | 32 | ||
29 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; | 33 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; |
30 | 34 | ||
@@ -37,6 +41,8 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | |||
37 | char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); | 41 | char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); |
38 | 42 | ||
39 | unsigned long __supported_pte_mask __read_mostly = ~0UL; | 43 | unsigned long __supported_pte_mask __read_mostly = ~0UL; |
44 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | ||
45 | |||
40 | static int do_not_nx __cpuinitdata = 0; | 46 | static int do_not_nx __cpuinitdata = 0; |
41 | 47 | ||
42 | /* noexec=on|off | 48 | /* noexec=on|off |
@@ -80,6 +86,43 @@ static int __init nonx32_setup(char *str) | |||
80 | __setup("noexec32=", nonx32_setup); | 86 | __setup("noexec32=", nonx32_setup); |
81 | 87 | ||
82 | /* | 88 | /* |
89 | * Copy data used in early init routines from the initial arrays to the | ||
90 | * per cpu data areas. These arrays then become expendable and the | ||
91 | * *_early_ptr's are zeroed indicating that the static arrays are gone. | ||
92 | */ | ||
93 | static void __init setup_per_cpu_maps(void) | ||
94 | { | ||
95 | int cpu; | ||
96 | |||
97 | for_each_possible_cpu(cpu) { | ||
98 | #ifdef CONFIG_SMP | ||
99 | if (per_cpu_offset(cpu)) { | ||
100 | #endif | ||
101 | per_cpu(x86_cpu_to_apicid, cpu) = | ||
102 | x86_cpu_to_apicid_init[cpu]; | ||
103 | per_cpu(x86_bios_cpu_apicid, cpu) = | ||
104 | x86_bios_cpu_apicid_init[cpu]; | ||
105 | #ifdef CONFIG_NUMA | ||
106 | per_cpu(x86_cpu_to_node_map, cpu) = | ||
107 | x86_cpu_to_node_map_init[cpu]; | ||
108 | #endif | ||
109 | #ifdef CONFIG_SMP | ||
110 | } | ||
111 | else | ||
112 | printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n", | ||
113 | cpu); | ||
114 | #endif | ||
115 | } | ||
116 | |||
117 | /* indicate the early static arrays will soon be gone */ | ||
118 | x86_cpu_to_apicid_early_ptr = NULL; | ||
119 | x86_bios_cpu_apicid_early_ptr = NULL; | ||
120 | #ifdef CONFIG_NUMA | ||
121 | x86_cpu_to_node_map_early_ptr = NULL; | ||
122 | #endif | ||
123 | } | ||
124 | |||
125 | /* | ||
83 | * Great future plan: | 126 | * Great future plan: |
84 | * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. | 127 | * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. |
85 | * Always point %gs to its beginning | 128 | * Always point %gs to its beginning |
@@ -100,18 +143,21 @@ void __init setup_per_cpu_areas(void) | |||
100 | for_each_cpu_mask (i, cpu_possible_map) { | 143 | for_each_cpu_mask (i, cpu_possible_map) { |
101 | char *ptr; | 144 | char *ptr; |
102 | 145 | ||
103 | if (!NODE_DATA(cpu_to_node(i))) { | 146 | if (!NODE_DATA(early_cpu_to_node(i))) { |
104 | printk("cpu with no node %d, num_online_nodes %d\n", | 147 | printk("cpu with no node %d, num_online_nodes %d\n", |
105 | i, num_online_nodes()); | 148 | i, num_online_nodes()); |
106 | ptr = alloc_bootmem_pages(size); | 149 | ptr = alloc_bootmem_pages(size); |
107 | } else { | 150 | } else { |
108 | ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size); | 151 | ptr = alloc_bootmem_pages_node(NODE_DATA(early_cpu_to_node(i)), size); |
109 | } | 152 | } |
110 | if (!ptr) | 153 | if (!ptr) |
111 | panic("Cannot allocate cpu data for CPU %d\n", i); | 154 | panic("Cannot allocate cpu data for CPU %d\n", i); |
112 | cpu_pda(i)->data_offset = ptr - __per_cpu_start; | 155 | cpu_pda(i)->data_offset = ptr - __per_cpu_start; |
113 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); | 156 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); |
114 | } | 157 | } |
158 | |||
159 | /* setup percpu data maps early */ | ||
160 | setup_per_cpu_maps(); | ||
115 | } | 161 | } |
116 | 162 | ||
117 | void pda_init(int cpu) | 163 | void pda_init(int cpu) |
@@ -169,7 +215,8 @@ void syscall_init(void) | |||
169 | #endif | 215 | #endif |
170 | 216 | ||
171 | /* Flags to clear on syscall */ | 217 | /* Flags to clear on syscall */ |
172 | wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); | 218 | wrmsrl(MSR_SYSCALL_MASK, |
219 | X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); | ||
173 | } | 220 | } |
174 | 221 | ||
175 | void __cpuinit check_efer(void) | 222 | void __cpuinit check_efer(void) |
@@ -227,7 +274,7 @@ void __cpuinit cpu_init (void) | |||
227 | * and set up the GDT descriptor: | 274 | * and set up the GDT descriptor: |
228 | */ | 275 | */ |
229 | if (cpu) | 276 | if (cpu) |
230 | memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE); | 277 | memcpy(get_cpu_gdt_table(cpu), cpu_gdt_table, GDT_SIZE); |
231 | 278 | ||
232 | cpu_gdt_descr[cpu].size = GDT_SIZE; | 279 | cpu_gdt_descr[cpu].size = GDT_SIZE; |
233 | load_gdt((const struct desc_ptr *)&cpu_gdt_descr[cpu]); | 280 | load_gdt((const struct desc_ptr *)&cpu_gdt_descr[cpu]); |
@@ -257,10 +304,10 @@ void __cpuinit cpu_init (void) | |||
257 | v, cpu); | 304 | v, cpu); |
258 | } | 305 | } |
259 | estacks += PAGE_SIZE << order[v]; | 306 | estacks += PAGE_SIZE << order[v]; |
260 | orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks; | 307 | orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; |
261 | } | 308 | } |
262 | 309 | ||
263 | t->io_bitmap_base = offsetof(struct tss_struct, io_bitmap); | 310 | t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); |
264 | /* | 311 | /* |
265 | * <= is required because the CPU will access up to | 312 | * <= is required because the CPU will access up to |
266 | * 8 bits beyond the end of the IO permission bitmap. | 313 | * 8 bits beyond the end of the IO permission bitmap. |
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 9c24b45b513c..62adc5f20be5 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c | |||
@@ -44,9 +44,12 @@ | |||
44 | #include <linux/crash_dump.h> | 44 | #include <linux/crash_dump.h> |
45 | #include <linux/dmi.h> | 45 | #include <linux/dmi.h> |
46 | #include <linux/pfn.h> | 46 | #include <linux/pfn.h> |
47 | #include <linux/pci.h> | ||
48 | #include <linux/init_ohci1394_dma.h> | ||
47 | 49 | ||
48 | #include <video/edid.h> | 50 | #include <video/edid.h> |
49 | 51 | ||
52 | #include <asm/mtrr.h> | ||
50 | #include <asm/apic.h> | 53 | #include <asm/apic.h> |
51 | #include <asm/e820.h> | 54 | #include <asm/e820.h> |
52 | #include <asm/mpspec.h> | 55 | #include <asm/mpspec.h> |
@@ -67,14 +70,83 @@ | |||
67 | address, and must not be in the .bss segment! */ | 70 | address, and must not be in the .bss segment! */ |
68 | unsigned long init_pg_tables_end __initdata = ~0UL; | 71 | unsigned long init_pg_tables_end __initdata = ~0UL; |
69 | 72 | ||
70 | int disable_pse __cpuinitdata = 0; | ||
71 | |||
72 | /* | 73 | /* |
73 | * Machine setup.. | 74 | * Machine setup.. |
74 | */ | 75 | */ |
75 | extern struct resource code_resource; | 76 | static struct resource data_resource = { |
76 | extern struct resource data_resource; | 77 | .name = "Kernel data", |
77 | extern struct resource bss_resource; | 78 | .start = 0, |
79 | .end = 0, | ||
80 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
81 | }; | ||
82 | |||
83 | static struct resource code_resource = { | ||
84 | .name = "Kernel code", | ||
85 | .start = 0, | ||
86 | .end = 0, | ||
87 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
88 | }; | ||
89 | |||
90 | static struct resource bss_resource = { | ||
91 | .name = "Kernel bss", | ||
92 | .start = 0, | ||
93 | .end = 0, | ||
94 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
95 | }; | ||
96 | |||
97 | static struct resource video_ram_resource = { | ||
98 | .name = "Video RAM area", | ||
99 | .start = 0xa0000, | ||
100 | .end = 0xbffff, | ||
101 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
102 | }; | ||
103 | |||
104 | static struct resource standard_io_resources[] = { { | ||
105 | .name = "dma1", | ||
106 | .start = 0x0000, | ||
107 | .end = 0x001f, | ||
108 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
109 | }, { | ||
110 | .name = "pic1", | ||
111 | .start = 0x0020, | ||
112 | .end = 0x0021, | ||
113 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
114 | }, { | ||
115 | .name = "timer0", | ||
116 | .start = 0x0040, | ||
117 | .end = 0x0043, | ||
118 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
119 | }, { | ||
120 | .name = "timer1", | ||
121 | .start = 0x0050, | ||
122 | .end = 0x0053, | ||
123 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
124 | }, { | ||
125 | .name = "keyboard", | ||
126 | .start = 0x0060, | ||
127 | .end = 0x006f, | ||
128 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
129 | }, { | ||
130 | .name = "dma page reg", | ||
131 | .start = 0x0080, | ||
132 | .end = 0x008f, | ||
133 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
134 | }, { | ||
135 | .name = "pic2", | ||
136 | .start = 0x00a0, | ||
137 | .end = 0x00a1, | ||
138 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
139 | }, { | ||
140 | .name = "dma2", | ||
141 | .start = 0x00c0, | ||
142 | .end = 0x00df, | ||
143 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
144 | }, { | ||
145 | .name = "fpu", | ||
146 | .start = 0x00f0, | ||
147 | .end = 0x00ff, | ||
148 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
149 | } }; | ||
78 | 150 | ||
79 | /* cpu data as detected by the assembly code in head.S */ | 151 | /* cpu data as detected by the assembly code in head.S */ |
80 | struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; | 152 | struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; |
@@ -116,13 +188,17 @@ extern int root_mountflags; | |||
116 | 188 | ||
117 | unsigned long saved_videomode; | 189 | unsigned long saved_videomode; |
118 | 190 | ||
119 | #define RAMDISK_IMAGE_START_MASK 0x07FF | 191 | #define RAMDISK_IMAGE_START_MASK 0x07FF |
120 | #define RAMDISK_PROMPT_FLAG 0x8000 | 192 | #define RAMDISK_PROMPT_FLAG 0x8000 |
121 | #define RAMDISK_LOAD_FLAG 0x4000 | 193 | #define RAMDISK_LOAD_FLAG 0x4000 |
122 | 194 | ||
123 | static char __initdata command_line[COMMAND_LINE_SIZE]; | 195 | static char __initdata command_line[COMMAND_LINE_SIZE]; |
124 | 196 | ||
197 | #ifndef CONFIG_DEBUG_BOOT_PARAMS | ||
125 | struct boot_params __initdata boot_params; | 198 | struct boot_params __initdata boot_params; |
199 | #else | ||
200 | struct boot_params boot_params; | ||
201 | #endif | ||
126 | 202 | ||
127 | #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) | 203 | #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) |
128 | struct edd edd; | 204 | struct edd edd; |
@@ -166,8 +242,7 @@ static int __init parse_mem(char *arg) | |||
166 | return -EINVAL; | 242 | return -EINVAL; |
167 | 243 | ||
168 | if (strcmp(arg, "nopentium") == 0) { | 244 | if (strcmp(arg, "nopentium") == 0) { |
169 | clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); | 245 | setup_clear_cpu_cap(X86_FEATURE_PSE); |
170 | disable_pse = 1; | ||
171 | } else { | 246 | } else { |
172 | /* If the user specifies memory size, we | 247 | /* If the user specifies memory size, we |
173 | * limit the BIOS-provided memory map to | 248 | * limit the BIOS-provided memory map to |
@@ -176,7 +251,7 @@ static int __init parse_mem(char *arg) | |||
176 | * trim the existing memory map. | 251 | * trim the existing memory map. |
177 | */ | 252 | */ |
178 | unsigned long long mem_size; | 253 | unsigned long long mem_size; |
179 | 254 | ||
180 | mem_size = memparse(arg, &arg); | 255 | mem_size = memparse(arg, &arg); |
181 | limit_regions(mem_size); | 256 | limit_regions(mem_size); |
182 | user_defined_memmap = 1; | 257 | user_defined_memmap = 1; |
@@ -315,7 +390,7 @@ static void __init reserve_ebda_region(void) | |||
315 | unsigned int addr; | 390 | unsigned int addr; |
316 | addr = get_bios_ebda(); | 391 | addr = get_bios_ebda(); |
317 | if (addr) | 392 | if (addr) |
318 | reserve_bootmem(addr, PAGE_SIZE); | 393 | reserve_bootmem(addr, PAGE_SIZE); |
319 | } | 394 | } |
320 | 395 | ||
321 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 396 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
@@ -420,6 +495,100 @@ static inline void __init reserve_crashkernel(void) | |||
420 | {} | 495 | {} |
421 | #endif | 496 | #endif |
422 | 497 | ||
498 | #ifdef CONFIG_BLK_DEV_INITRD | ||
499 | |||
500 | static bool do_relocate_initrd = false; | ||
501 | |||
502 | static void __init reserve_initrd(void) | ||
503 | { | ||
504 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | ||
505 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | ||
506 | unsigned long ramdisk_end = ramdisk_image + ramdisk_size; | ||
507 | unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; | ||
508 | unsigned long ramdisk_here; | ||
509 | |||
510 | initrd_start = 0; | ||
511 | |||
512 | if (!boot_params.hdr.type_of_loader || | ||
513 | !ramdisk_image || !ramdisk_size) | ||
514 | return; /* No initrd provided by bootloader */ | ||
515 | |||
516 | if (ramdisk_end < ramdisk_image) { | ||
517 | printk(KERN_ERR "initrd wraps around end of memory, " | ||
518 | "disabling initrd\n"); | ||
519 | return; | ||
520 | } | ||
521 | if (ramdisk_size >= end_of_lowmem/2) { | ||
522 | printk(KERN_ERR "initrd too large to handle, " | ||
523 | "disabling initrd\n"); | ||
524 | return; | ||
525 | } | ||
526 | if (ramdisk_end <= end_of_lowmem) { | ||
527 | /* All in lowmem, easy case */ | ||
528 | reserve_bootmem(ramdisk_image, ramdisk_size); | ||
529 | initrd_start = ramdisk_image + PAGE_OFFSET; | ||
530 | initrd_end = initrd_start+ramdisk_size; | ||
531 | return; | ||
532 | } | ||
533 | |||
534 | /* We need to move the initrd down into lowmem */ | ||
535 | ramdisk_here = (end_of_lowmem - ramdisk_size) & PAGE_MASK; | ||
536 | |||
537 | /* Note: this includes all the lowmem currently occupied by | ||
538 | the initrd, we rely on that fact to keep the data intact. */ | ||
539 | reserve_bootmem(ramdisk_here, ramdisk_size); | ||
540 | initrd_start = ramdisk_here + PAGE_OFFSET; | ||
541 | initrd_end = initrd_start + ramdisk_size; | ||
542 | |||
543 | do_relocate_initrd = true; | ||
544 | } | ||
545 | |||
546 | #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) | ||
547 | |||
548 | static void __init relocate_initrd(void) | ||
549 | { | ||
550 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | ||
551 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | ||
552 | unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; | ||
553 | unsigned long ramdisk_here; | ||
554 | unsigned long slop, clen, mapaddr; | ||
555 | char *p, *q; | ||
556 | |||
557 | if (!do_relocate_initrd) | ||
558 | return; | ||
559 | |||
560 | ramdisk_here = initrd_start - PAGE_OFFSET; | ||
561 | |||
562 | q = (char *)initrd_start; | ||
563 | |||
564 | /* Copy any lowmem portion of the initrd */ | ||
565 | if (ramdisk_image < end_of_lowmem) { | ||
566 | clen = end_of_lowmem - ramdisk_image; | ||
567 | p = (char *)__va(ramdisk_image); | ||
568 | memcpy(q, p, clen); | ||
569 | q += clen; | ||
570 | ramdisk_image += clen; | ||
571 | ramdisk_size -= clen; | ||
572 | } | ||
573 | |||
574 | /* Copy the highmem portion of the initrd */ | ||
575 | while (ramdisk_size) { | ||
576 | slop = ramdisk_image & ~PAGE_MASK; | ||
577 | clen = ramdisk_size; | ||
578 | if (clen > MAX_MAP_CHUNK-slop) | ||
579 | clen = MAX_MAP_CHUNK-slop; | ||
580 | mapaddr = ramdisk_image & PAGE_MASK; | ||
581 | p = early_ioremap(mapaddr, clen+slop); | ||
582 | memcpy(q, p+slop, clen); | ||
583 | early_iounmap(p, clen+slop); | ||
584 | q += clen; | ||
585 | ramdisk_image += clen; | ||
586 | ramdisk_size -= clen; | ||
587 | } | ||
588 | } | ||
589 | |||
590 | #endif /* CONFIG_BLK_DEV_INITRD */ | ||
591 | |||
423 | void __init setup_bootmem_allocator(void) | 592 | void __init setup_bootmem_allocator(void) |
424 | { | 593 | { |
425 | unsigned long bootmap_size; | 594 | unsigned long bootmap_size; |
@@ -475,26 +644,10 @@ void __init setup_bootmem_allocator(void) | |||
475 | */ | 644 | */ |
476 | find_smp_config(); | 645 | find_smp_config(); |
477 | #endif | 646 | #endif |
478 | numa_kva_reserve(); | ||
479 | #ifdef CONFIG_BLK_DEV_INITRD | 647 | #ifdef CONFIG_BLK_DEV_INITRD |
480 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | 648 | reserve_initrd(); |
481 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | ||
482 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | ||
483 | unsigned long ramdisk_end = ramdisk_image + ramdisk_size; | ||
484 | unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; | ||
485 | |||
486 | if (ramdisk_end <= end_of_lowmem) { | ||
487 | reserve_bootmem(ramdisk_image, ramdisk_size); | ||
488 | initrd_start = ramdisk_image + PAGE_OFFSET; | ||
489 | initrd_end = initrd_start+ramdisk_size; | ||
490 | } else { | ||
491 | printk(KERN_ERR "initrd extends beyond end of memory " | ||
492 | "(0x%08lx > 0x%08lx)\ndisabling initrd\n", | ||
493 | ramdisk_end, end_of_lowmem); | ||
494 | initrd_start = 0; | ||
495 | } | ||
496 | } | ||
497 | #endif | 649 | #endif |
650 | numa_kva_reserve(); | ||
498 | reserve_crashkernel(); | 651 | reserve_crashkernel(); |
499 | } | 652 | } |
500 | 653 | ||
@@ -545,17 +698,11 @@ void __init setup_arch(char **cmdline_p) | |||
545 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 698 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
546 | pre_setup_arch_hook(); | 699 | pre_setup_arch_hook(); |
547 | early_cpu_init(); | 700 | early_cpu_init(); |
701 | early_ioremap_init(); | ||
548 | 702 | ||
549 | /* | ||
550 | * FIXME: This isn't an official loader_type right | ||
551 | * now but does currently work with elilo. | ||
552 | * If we were configured as an EFI kernel, check to make | ||
553 | * sure that we were loaded correctly from elilo and that | ||
554 | * the system table is valid. If not, then initialize normally. | ||
555 | */ | ||
556 | #ifdef CONFIG_EFI | 703 | #ifdef CONFIG_EFI |
557 | if ((boot_params.hdr.type_of_loader == 0x50) && | 704 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, |
558 | boot_params.efi_info.efi_systab) | 705 | "EL32", 4)) |
559 | efi_enabled = 1; | 706 | efi_enabled = 1; |
560 | #endif | 707 | #endif |
561 | 708 | ||
@@ -579,12 +726,9 @@ void __init setup_arch(char **cmdline_p) | |||
579 | rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); | 726 | rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); |
580 | #endif | 727 | #endif |
581 | ARCH_SETUP | 728 | ARCH_SETUP |
582 | if (efi_enabled) | 729 | |
583 | efi_init(); | 730 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); |
584 | else { | 731 | print_memory_map(memory_setup()); |
585 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | ||
586 | print_memory_map(memory_setup()); | ||
587 | } | ||
588 | 732 | ||
589 | copy_edd(); | 733 | copy_edd(); |
590 | 734 | ||
@@ -612,8 +756,16 @@ void __init setup_arch(char **cmdline_p) | |||
612 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | 756 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); |
613 | *cmdline_p = command_line; | 757 | *cmdline_p = command_line; |
614 | 758 | ||
759 | if (efi_enabled) | ||
760 | efi_init(); | ||
761 | |||
615 | max_low_pfn = setup_memory(); | 762 | max_low_pfn = setup_memory(); |
616 | 763 | ||
764 | /* update e820 for memory not covered by WB MTRRs */ | ||
765 | mtrr_bp_init(); | ||
766 | if (mtrr_trim_uncached_memory(max_pfn)) | ||
767 | max_low_pfn = setup_memory(); | ||
768 | |||
617 | #ifdef CONFIG_VMI | 769 | #ifdef CONFIG_VMI |
618 | /* | 770 | /* |
619 | * Must be after max_low_pfn is determined, and before kernel | 771 | * Must be after max_low_pfn is determined, and before kernel |
@@ -636,6 +788,16 @@ void __init setup_arch(char **cmdline_p) | |||
636 | smp_alloc_memory(); /* AP processor realmode stacks in low memory*/ | 788 | smp_alloc_memory(); /* AP processor realmode stacks in low memory*/ |
637 | #endif | 789 | #endif |
638 | paging_init(); | 790 | paging_init(); |
791 | |||
792 | /* | ||
793 | * NOTE: On x86-32, only from this point on, fixmaps are ready for use. | ||
794 | */ | ||
795 | |||
796 | #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT | ||
797 | if (init_ohci1394_dma_early) | ||
798 | init_ohci1394_dma_on_all_controllers(); | ||
799 | #endif | ||
800 | |||
639 | remapped_pgdat_init(); | 801 | remapped_pgdat_init(); |
640 | sparse_init(); | 802 | sparse_init(); |
641 | zone_sizes_init(); | 803 | zone_sizes_init(); |
@@ -644,15 +806,19 @@ void __init setup_arch(char **cmdline_p) | |||
644 | * NOTE: at this point the bootmem allocator is fully available. | 806 | * NOTE: at this point the bootmem allocator is fully available. |
645 | */ | 807 | */ |
646 | 808 | ||
809 | #ifdef CONFIG_BLK_DEV_INITRD | ||
810 | relocate_initrd(); | ||
811 | #endif | ||
812 | |||
647 | paravirt_post_allocator_init(); | 813 | paravirt_post_allocator_init(); |
648 | 814 | ||
649 | dmi_scan_machine(); | 815 | dmi_scan_machine(); |
650 | 816 | ||
817 | io_delay_init(); | ||
818 | |||
651 | #ifdef CONFIG_X86_GENERICARCH | 819 | #ifdef CONFIG_X86_GENERICARCH |
652 | generic_apic_probe(); | 820 | generic_apic_probe(); |
653 | #endif | 821 | #endif |
654 | if (efi_enabled) | ||
655 | efi_map_memmap(); | ||
656 | 822 | ||
657 | #ifdef CONFIG_ACPI | 823 | #ifdef CONFIG_ACPI |
658 | /* | 824 | /* |
@@ -661,9 +827,7 @@ void __init setup_arch(char **cmdline_p) | |||
661 | acpi_boot_table_init(); | 827 | acpi_boot_table_init(); |
662 | #endif | 828 | #endif |
663 | 829 | ||
664 | #ifdef CONFIG_PCI | ||
665 | early_quirks(); | 830 | early_quirks(); |
666 | #endif | ||
667 | 831 | ||
668 | #ifdef CONFIG_ACPI | 832 | #ifdef CONFIG_ACPI |
669 | acpi_boot_init(); | 833 | acpi_boot_init(); |
@@ -692,3 +856,26 @@ void __init setup_arch(char **cmdline_p) | |||
692 | #endif | 856 | #endif |
693 | #endif | 857 | #endif |
694 | } | 858 | } |
859 | |||
860 | /* | ||
861 | * Request address space for all standard resources | ||
862 | * | ||
863 | * This is called just before pcibios_init(), which is also a | ||
864 | * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). | ||
865 | */ | ||
866 | static int __init request_standard_resources(void) | ||
867 | { | ||
868 | int i; | ||
869 | |||
870 | printk(KERN_INFO "Setting up standard PCI resources\n"); | ||
871 | init_iomem_resources(&code_resource, &data_resource, &bss_resource); | ||
872 | |||
873 | request_resource(&iomem_resource, &video_ram_resource); | ||
874 | |||
875 | /* request I/O space for devices used on all i[345]86 PCs */ | ||
876 | for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) | ||
877 | request_resource(&ioport_resource, &standard_io_resources[i]); | ||
878 | return 0; | ||
879 | } | ||
880 | |||
881 | subsys_initcall(request_standard_resources); | ||
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 30d94d1d5f5f..c8939dfddfba 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/crash_dump.h> | 30 | #include <linux/crash_dump.h> |
31 | #include <linux/root_dev.h> | 31 | #include <linux/root_dev.h> |
32 | #include <linux/pci.h> | 32 | #include <linux/pci.h> |
33 | #include <linux/efi.h> | ||
33 | #include <linux/acpi.h> | 34 | #include <linux/acpi.h> |
34 | #include <linux/kallsyms.h> | 35 | #include <linux/kallsyms.h> |
35 | #include <linux/edd.h> | 36 | #include <linux/edd.h> |
@@ -39,10 +40,13 @@ | |||
39 | #include <linux/dmi.h> | 40 | #include <linux/dmi.h> |
40 | #include <linux/dma-mapping.h> | 41 | #include <linux/dma-mapping.h> |
41 | #include <linux/ctype.h> | 42 | #include <linux/ctype.h> |
43 | #include <linux/uaccess.h> | ||
44 | #include <linux/init_ohci1394_dma.h> | ||
42 | 45 | ||
43 | #include <asm/mtrr.h> | 46 | #include <asm/mtrr.h> |
44 | #include <asm/uaccess.h> | 47 | #include <asm/uaccess.h> |
45 | #include <asm/system.h> | 48 | #include <asm/system.h> |
49 | #include <asm/vsyscall.h> | ||
46 | #include <asm/io.h> | 50 | #include <asm/io.h> |
47 | #include <asm/smp.h> | 51 | #include <asm/smp.h> |
48 | #include <asm/msr.h> | 52 | #include <asm/msr.h> |
@@ -50,6 +54,7 @@ | |||
50 | #include <video/edid.h> | 54 | #include <video/edid.h> |
51 | #include <asm/e820.h> | 55 | #include <asm/e820.h> |
52 | #include <asm/dma.h> | 56 | #include <asm/dma.h> |
57 | #include <asm/gart.h> | ||
53 | #include <asm/mpspec.h> | 58 | #include <asm/mpspec.h> |
54 | #include <asm/mmu_context.h> | 59 | #include <asm/mmu_context.h> |
55 | #include <asm/proto.h> | 60 | #include <asm/proto.h> |
@@ -59,6 +64,15 @@ | |||
59 | #include <asm/sections.h> | 64 | #include <asm/sections.h> |
60 | #include <asm/dmi.h> | 65 | #include <asm/dmi.h> |
61 | #include <asm/cacheflush.h> | 66 | #include <asm/cacheflush.h> |
67 | #include <asm/mce.h> | ||
68 | #include <asm/ds.h> | ||
69 | #include <asm/topology.h> | ||
70 | |||
71 | #ifdef CONFIG_PARAVIRT | ||
72 | #include <asm/paravirt.h> | ||
73 | #else | ||
74 | #define ARCH_SETUP | ||
75 | #endif | ||
62 | 76 | ||
63 | /* | 77 | /* |
64 | * Machine setup.. | 78 | * Machine setup.. |
@@ -67,6 +81,8 @@ | |||
67 | struct cpuinfo_x86 boot_cpu_data __read_mostly; | 81 | struct cpuinfo_x86 boot_cpu_data __read_mostly; |
68 | EXPORT_SYMBOL(boot_cpu_data); | 82 | EXPORT_SYMBOL(boot_cpu_data); |
69 | 83 | ||
84 | __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; | ||
85 | |||
70 | unsigned long mmu_cr4_features; | 86 | unsigned long mmu_cr4_features; |
71 | 87 | ||
72 | /* Boot loader ID as an integer, for the benefit of proc_dointvec */ | 88 | /* Boot loader ID as an integer, for the benefit of proc_dointvec */ |
@@ -76,7 +92,7 @@ unsigned long saved_video_mode; | |||
76 | 92 | ||
77 | int force_mwait __cpuinitdata; | 93 | int force_mwait __cpuinitdata; |
78 | 94 | ||
79 | /* | 95 | /* |
80 | * Early DMI memory | 96 | * Early DMI memory |
81 | */ | 97 | */ |
82 | int dmi_alloc_index; | 98 | int dmi_alloc_index; |
@@ -122,25 +138,27 @@ struct resource standard_io_resources[] = { | |||
122 | 138 | ||
123 | #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM) | 139 | #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM) |
124 | 140 | ||
125 | struct resource data_resource = { | 141 | static struct resource data_resource = { |
126 | .name = "Kernel data", | 142 | .name = "Kernel data", |
127 | .start = 0, | 143 | .start = 0, |
128 | .end = 0, | 144 | .end = 0, |
129 | .flags = IORESOURCE_RAM, | 145 | .flags = IORESOURCE_RAM, |
130 | }; | 146 | }; |
131 | struct resource code_resource = { | 147 | static struct resource code_resource = { |
132 | .name = "Kernel code", | 148 | .name = "Kernel code", |
133 | .start = 0, | 149 | .start = 0, |
134 | .end = 0, | 150 | .end = 0, |
135 | .flags = IORESOURCE_RAM, | 151 | .flags = IORESOURCE_RAM, |
136 | }; | 152 | }; |
137 | struct resource bss_resource = { | 153 | static struct resource bss_resource = { |
138 | .name = "Kernel bss", | 154 | .name = "Kernel bss", |
139 | .start = 0, | 155 | .start = 0, |
140 | .end = 0, | 156 | .end = 0, |
141 | .flags = IORESOURCE_RAM, | 157 | .flags = IORESOURCE_RAM, |
142 | }; | 158 | }; |
143 | 159 | ||
160 | static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); | ||
161 | |||
144 | #ifdef CONFIG_PROC_VMCORE | 162 | #ifdef CONFIG_PROC_VMCORE |
145 | /* elfcorehdr= specifies the location of elf core header | 163 | /* elfcorehdr= specifies the location of elf core header |
146 | * stored by the crashed kernel. This option will be passed | 164 | * stored by the crashed kernel. This option will be passed |
@@ -164,14 +182,15 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
164 | unsigned long bootmap_size, bootmap; | 182 | unsigned long bootmap_size, bootmap; |
165 | 183 | ||
166 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; | 184 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; |
167 | bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); | 185 | bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size, |
186 | PAGE_SIZE); | ||
168 | if (bootmap == -1L) | 187 | if (bootmap == -1L) |
169 | panic("Cannot find bootmem map of size %ld\n",bootmap_size); | 188 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); |
170 | bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); | 189 | bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); |
171 | e820_register_active_regions(0, start_pfn, end_pfn); | 190 | e820_register_active_regions(0, start_pfn, end_pfn); |
172 | free_bootmem_with_active_regions(0, end_pfn); | 191 | free_bootmem_with_active_regions(0, end_pfn); |
173 | reserve_bootmem(bootmap, bootmap_size); | 192 | reserve_bootmem(bootmap, bootmap_size); |
174 | } | 193 | } |
175 | #endif | 194 | #endif |
176 | 195 | ||
177 | #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) | 196 | #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) |
@@ -205,7 +224,8 @@ static void __init reserve_crashkernel(void) | |||
205 | unsigned long long crash_size, crash_base; | 224 | unsigned long long crash_size, crash_base; |
206 | int ret; | 225 | int ret; |
207 | 226 | ||
208 | free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT; | 227 | free_mem = |
228 | ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT; | ||
209 | 229 | ||
210 | ret = parse_crashkernel(boot_command_line, free_mem, | 230 | ret = parse_crashkernel(boot_command_line, free_mem, |
211 | &crash_size, &crash_base); | 231 | &crash_size, &crash_base); |
@@ -229,33 +249,21 @@ static inline void __init reserve_crashkernel(void) | |||
229 | {} | 249 | {} |
230 | #endif | 250 | #endif |
231 | 251 | ||
232 | #define EBDA_ADDR_POINTER 0x40E | 252 | /* Overridden in paravirt.c if CONFIG_PARAVIRT */ |
233 | 253 | void __attribute__((weak)) __init memory_setup(void) | |
234 | unsigned __initdata ebda_addr; | ||
235 | unsigned __initdata ebda_size; | ||
236 | |||
237 | static void discover_ebda(void) | ||
238 | { | 254 | { |
239 | /* | 255 | machine_specific_memory_setup(); |
240 | * there is a real-mode segmented pointer pointing to the | ||
241 | * 4K EBDA area at 0x40E | ||
242 | */ | ||
243 | ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER); | ||
244 | ebda_addr <<= 4; | ||
245 | |||
246 | ebda_size = *(unsigned short *)__va(ebda_addr); | ||
247 | |||
248 | /* Round EBDA up to pages */ | ||
249 | if (ebda_size == 0) | ||
250 | ebda_size = 1; | ||
251 | ebda_size <<= 10; | ||
252 | ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE); | ||
253 | if (ebda_size > 64*1024) | ||
254 | ebda_size = 64*1024; | ||
255 | } | 256 | } |
256 | 257 | ||
258 | /* | ||
259 | * setup_arch - architecture-specific boot-time initializations | ||
260 | * | ||
261 | * Note: On x86_64, fixmaps are ready for use even before this is called. | ||
262 | */ | ||
257 | void __init setup_arch(char **cmdline_p) | 263 | void __init setup_arch(char **cmdline_p) |
258 | { | 264 | { |
265 | unsigned i; | ||
266 | |||
259 | printk(KERN_INFO "Command line: %s\n", boot_command_line); | 267 | printk(KERN_INFO "Command line: %s\n", boot_command_line); |
260 | 268 | ||
261 | ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); | 269 | ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); |
@@ -269,7 +277,15 @@ void __init setup_arch(char **cmdline_p) | |||
269 | rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); | 277 | rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); |
270 | rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); | 278 | rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); |
271 | #endif | 279 | #endif |
272 | setup_memory_region(); | 280 | #ifdef CONFIG_EFI |
281 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, | ||
282 | "EL64", 4)) | ||
283 | efi_enabled = 1; | ||
284 | #endif | ||
285 | |||
286 | ARCH_SETUP | ||
287 | |||
288 | memory_setup(); | ||
273 | copy_edd(); | 289 | copy_edd(); |
274 | 290 | ||
275 | if (!boot_params.hdr.root_flags) | 291 | if (!boot_params.hdr.root_flags) |
@@ -293,27 +309,47 @@ void __init setup_arch(char **cmdline_p) | |||
293 | 309 | ||
294 | parse_early_param(); | 310 | parse_early_param(); |
295 | 311 | ||
312 | #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT | ||
313 | if (init_ohci1394_dma_early) | ||
314 | init_ohci1394_dma_on_all_controllers(); | ||
315 | #endif | ||
316 | |||
296 | finish_e820_parsing(); | 317 | finish_e820_parsing(); |
297 | 318 | ||
319 | early_gart_iommu_check(); | ||
320 | |||
298 | e820_register_active_regions(0, 0, -1UL); | 321 | e820_register_active_regions(0, 0, -1UL); |
299 | /* | 322 | /* |
300 | * partially used pages are not usable - thus | 323 | * partially used pages are not usable - thus |
301 | * we are rounding upwards: | 324 | * we are rounding upwards: |
302 | */ | 325 | */ |
303 | end_pfn = e820_end_of_ram(); | 326 | end_pfn = e820_end_of_ram(); |
327 | /* update e820 for memory not covered by WB MTRRs */ | ||
328 | mtrr_bp_init(); | ||
329 | if (mtrr_trim_uncached_memory(end_pfn)) { | ||
330 | e820_register_active_regions(0, 0, -1UL); | ||
331 | end_pfn = e820_end_of_ram(); | ||
332 | } | ||
333 | |||
304 | num_physpages = end_pfn; | 334 | num_physpages = end_pfn; |
305 | 335 | ||
306 | check_efer(); | 336 | check_efer(); |
307 | 337 | ||
308 | discover_ebda(); | ||
309 | |||
310 | init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); | 338 | init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); |
339 | if (efi_enabled) | ||
340 | efi_init(); | ||
311 | 341 | ||
312 | dmi_scan_machine(); | 342 | dmi_scan_machine(); |
313 | 343 | ||
344 | io_delay_init(); | ||
345 | |||
314 | #ifdef CONFIG_SMP | 346 | #ifdef CONFIG_SMP |
315 | /* setup to use the static apicid table during kernel startup */ | 347 | /* setup to use the early static init tables during kernel startup */ |
316 | x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init; | 348 | x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; |
349 | x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init; | ||
350 | #ifdef CONFIG_NUMA | ||
351 | x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init; | ||
352 | #endif | ||
317 | #endif | 353 | #endif |
318 | 354 | ||
319 | #ifdef CONFIG_ACPI | 355 | #ifdef CONFIG_ACPI |
@@ -340,48 +376,26 @@ void __init setup_arch(char **cmdline_p) | |||
340 | #endif | 376 | #endif |
341 | 377 | ||
342 | #ifdef CONFIG_NUMA | 378 | #ifdef CONFIG_NUMA |
343 | numa_initmem_init(0, end_pfn); | 379 | numa_initmem_init(0, end_pfn); |
344 | #else | 380 | #else |
345 | contig_initmem_init(0, end_pfn); | 381 | contig_initmem_init(0, end_pfn); |
346 | #endif | 382 | #endif |
347 | 383 | ||
348 | /* Reserve direct mapping */ | 384 | early_res_to_bootmem(); |
349 | reserve_bootmem_generic(table_start << PAGE_SHIFT, | ||
350 | (table_end - table_start) << PAGE_SHIFT); | ||
351 | |||
352 | /* reserve kernel */ | ||
353 | reserve_bootmem_generic(__pa_symbol(&_text), | ||
354 | __pa_symbol(&_end) - __pa_symbol(&_text)); | ||
355 | 385 | ||
386 | #ifdef CONFIG_ACPI_SLEEP | ||
356 | /* | 387 | /* |
357 | * reserve physical page 0 - it's a special BIOS page on many boxes, | 388 | * Reserve low memory region for sleep support. |
358 | * enabling clean reboots, SMP operation, laptop functions. | ||
359 | */ | 389 | */ |
360 | reserve_bootmem_generic(0, PAGE_SIZE); | 390 | acpi_reserve_bootmem(); |
361 | |||
362 | /* reserve ebda region */ | ||
363 | if (ebda_addr) | ||
364 | reserve_bootmem_generic(ebda_addr, ebda_size); | ||
365 | #ifdef CONFIG_NUMA | ||
366 | /* reserve nodemap region */ | ||
367 | if (nodemap_addr) | ||
368 | reserve_bootmem_generic(nodemap_addr, nodemap_size); | ||
369 | #endif | 391 | #endif |
370 | 392 | ||
371 | #ifdef CONFIG_SMP | 393 | if (efi_enabled) |
372 | /* Reserve SMP trampoline */ | 394 | efi_reserve_bootmem(); |
373 | reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE); | ||
374 | #endif | ||
375 | 395 | ||
376 | #ifdef CONFIG_ACPI_SLEEP | ||
377 | /* | 396 | /* |
378 | * Reserve low memory region for sleep support. | 397 | * Find and reserve possible boot-time SMP configuration: |
379 | */ | 398 | */ |
380 | acpi_reserve_bootmem(); | ||
381 | #endif | ||
382 | /* | ||
383 | * Find and reserve possible boot-time SMP configuration: | ||
384 | */ | ||
385 | find_smp_config(); | 399 | find_smp_config(); |
386 | #ifdef CONFIG_BLK_DEV_INITRD | 400 | #ifdef CONFIG_BLK_DEV_INITRD |
387 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | 401 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { |
@@ -395,6 +409,8 @@ void __init setup_arch(char **cmdline_p) | |||
395 | initrd_start = ramdisk_image + PAGE_OFFSET; | 409 | initrd_start = ramdisk_image + PAGE_OFFSET; |
396 | initrd_end = initrd_start+ramdisk_size; | 410 | initrd_end = initrd_start+ramdisk_size; |
397 | } else { | 411 | } else { |
412 | /* Assumes everything on node 0 */ | ||
413 | free_bootmem(ramdisk_image, ramdisk_size); | ||
398 | printk(KERN_ERR "initrd extends beyond end of memory " | 414 | printk(KERN_ERR "initrd extends beyond end of memory " |
399 | "(0x%08lx > 0x%08lx)\ndisabling initrd\n", | 415 | "(0x%08lx > 0x%08lx)\ndisabling initrd\n", |
400 | ramdisk_end, end_of_mem); | 416 | ramdisk_end, end_of_mem); |
@@ -404,17 +420,10 @@ void __init setup_arch(char **cmdline_p) | |||
404 | #endif | 420 | #endif |
405 | reserve_crashkernel(); | 421 | reserve_crashkernel(); |
406 | paging_init(); | 422 | paging_init(); |
423 | map_vsyscall(); | ||
407 | 424 | ||
408 | #ifdef CONFIG_PCI | ||
409 | early_quirks(); | 425 | early_quirks(); |
410 | #endif | ||
411 | 426 | ||
412 | /* | ||
413 | * set this early, so we dont allocate cpu0 | ||
414 | * if MADT list doesnt list BSP first | ||
415 | * mpparse.c/MP_processor_info() allocates logical cpu numbers. | ||
416 | */ | ||
417 | cpu_set(0, cpu_present_map); | ||
418 | #ifdef CONFIG_ACPI | 427 | #ifdef CONFIG_ACPI |
419 | /* | 428 | /* |
420 | * Read APIC and some other early information from ACPI tables. | 429 | * Read APIC and some other early information from ACPI tables. |
@@ -430,25 +439,24 @@ void __init setup_arch(char **cmdline_p) | |||
430 | if (smp_found_config) | 439 | if (smp_found_config) |
431 | get_smp_config(); | 440 | get_smp_config(); |
432 | init_apic_mappings(); | 441 | init_apic_mappings(); |
442 | ioapic_init_mappings(); | ||
433 | 443 | ||
434 | /* | 444 | /* |
435 | * We trust e820 completely. No explicit ROM probing in memory. | 445 | * We trust e820 completely. No explicit ROM probing in memory. |
436 | */ | 446 | */ |
437 | e820_reserve_resources(); | 447 | e820_reserve_resources(&code_resource, &data_resource, &bss_resource); |
438 | e820_mark_nosave_regions(); | 448 | e820_mark_nosave_regions(); |
439 | 449 | ||
440 | { | ||
441 | unsigned i; | ||
442 | /* request I/O space for devices used on all i[345]86 PCs */ | 450 | /* request I/O space for devices used on all i[345]86 PCs */ |
443 | for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) | 451 | for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) |
444 | request_resource(&ioport_resource, &standard_io_resources[i]); | 452 | request_resource(&ioport_resource, &standard_io_resources[i]); |
445 | } | ||
446 | 453 | ||
447 | e820_setup_gap(); | 454 | e820_setup_gap(); |
448 | 455 | ||
449 | #ifdef CONFIG_VT | 456 | #ifdef CONFIG_VT |
450 | #if defined(CONFIG_VGA_CONSOLE) | 457 | #if defined(CONFIG_VGA_CONSOLE) |
451 | conswitchp = &vga_con; | 458 | if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) |
459 | conswitchp = &vga_con; | ||
452 | #elif defined(CONFIG_DUMMY_CONSOLE) | 460 | #elif defined(CONFIG_DUMMY_CONSOLE) |
453 | conswitchp = &dummy_con; | 461 | conswitchp = &dummy_con; |
454 | #endif | 462 | #endif |
@@ -479,9 +487,10 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | |||
479 | 487 | ||
480 | if (n >= 0x80000005) { | 488 | if (n >= 0x80000005) { |
481 | cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); | 489 | cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); |
482 | printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", | 490 | printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), " |
483 | edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); | 491 | "D cache %dK (%d bytes/line)\n", |
484 | c->x86_cache_size=(ecx>>24)+(edx>>24); | 492 | edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); |
493 | c->x86_cache_size = (ecx>>24) + (edx>>24); | ||
485 | /* On K8 L1 TLB is inclusive, so don't count it */ | 494 | /* On K8 L1 TLB is inclusive, so don't count it */ |
486 | c->x86_tlbsize = 0; | 495 | c->x86_tlbsize = 0; |
487 | } | 496 | } |
@@ -495,11 +504,8 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | |||
495 | printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", | 504 | printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", |
496 | c->x86_cache_size, ecx & 0xFF); | 505 | c->x86_cache_size, ecx & 0xFF); |
497 | } | 506 | } |
498 | |||
499 | if (n >= 0x80000007) | ||
500 | cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power); | ||
501 | if (n >= 0x80000008) { | 507 | if (n >= 0x80000008) { |
502 | cpuid(0x80000008, &eax, &dummy, &dummy, &dummy); | 508 | cpuid(0x80000008, &eax, &dummy, &dummy, &dummy); |
503 | c->x86_virt_bits = (eax >> 8) & 0xff; | 509 | c->x86_virt_bits = (eax >> 8) & 0xff; |
504 | c->x86_phys_bits = eax & 0xff; | 510 | c->x86_phys_bits = eax & 0xff; |
505 | } | 511 | } |
@@ -508,14 +514,15 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | |||
508 | #ifdef CONFIG_NUMA | 514 | #ifdef CONFIG_NUMA |
509 | static int nearby_node(int apicid) | 515 | static int nearby_node(int apicid) |
510 | { | 516 | { |
511 | int i; | 517 | int i, node; |
518 | |||
512 | for (i = apicid - 1; i >= 0; i--) { | 519 | for (i = apicid - 1; i >= 0; i--) { |
513 | int node = apicid_to_node[i]; | 520 | node = apicid_to_node[i]; |
514 | if (node != NUMA_NO_NODE && node_online(node)) | 521 | if (node != NUMA_NO_NODE && node_online(node)) |
515 | return node; | 522 | return node; |
516 | } | 523 | } |
517 | for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { | 524 | for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { |
518 | int node = apicid_to_node[i]; | 525 | node = apicid_to_node[i]; |
519 | if (node != NUMA_NO_NODE && node_online(node)) | 526 | if (node != NUMA_NO_NODE && node_online(node)) |
520 | return node; | 527 | return node; |
521 | } | 528 | } |
@@ -527,7 +534,7 @@ static int nearby_node(int apicid) | |||
527 | * On a AMD dual core setup the lower bits of the APIC id distingush the cores. | 534 | * On a AMD dual core setup the lower bits of the APIC id distingush the cores. |
528 | * Assumes number of cores is a power of two. | 535 | * Assumes number of cores is a power of two. |
529 | */ | 536 | */ |
530 | static void __init amd_detect_cmp(struct cpuinfo_x86 *c) | 537 | static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) |
531 | { | 538 | { |
532 | #ifdef CONFIG_SMP | 539 | #ifdef CONFIG_SMP |
533 | unsigned bits; | 540 | unsigned bits; |
@@ -536,7 +543,54 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) | |||
536 | int node = 0; | 543 | int node = 0; |
537 | unsigned apicid = hard_smp_processor_id(); | 544 | unsigned apicid = hard_smp_processor_id(); |
538 | #endif | 545 | #endif |
539 | unsigned ecx = cpuid_ecx(0x80000008); | 546 | bits = c->x86_coreid_bits; |
547 | |||
548 | /* Low order bits define the core id (index of core in socket) */ | ||
549 | c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1); | ||
550 | /* Convert the APIC ID into the socket ID */ | ||
551 | c->phys_proc_id = phys_pkg_id(bits); | ||
552 | |||
553 | #ifdef CONFIG_NUMA | ||
554 | node = c->phys_proc_id; | ||
555 | if (apicid_to_node[apicid] != NUMA_NO_NODE) | ||
556 | node = apicid_to_node[apicid]; | ||
557 | if (!node_online(node)) { | ||
558 | /* Two possibilities here: | ||
559 | - The CPU is missing memory and no node was created. | ||
560 | In that case try picking one from a nearby CPU | ||
561 | - The APIC IDs differ from the HyperTransport node IDs | ||
562 | which the K8 northbridge parsing fills in. | ||
563 | Assume they are all increased by a constant offset, | ||
564 | but in the same order as the HT nodeids. | ||
565 | If that doesn't result in a usable node fall back to the | ||
566 | path for the previous case. */ | ||
567 | |||
568 | int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits); | ||
569 | |||
570 | if (ht_nodeid >= 0 && | ||
571 | apicid_to_node[ht_nodeid] != NUMA_NO_NODE) | ||
572 | node = apicid_to_node[ht_nodeid]; | ||
573 | /* Pick a nearby node */ | ||
574 | if (!node_online(node)) | ||
575 | node = nearby_node(apicid); | ||
576 | } | ||
577 | numa_set_node(cpu, node); | ||
578 | |||
579 | printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); | ||
580 | #endif | ||
581 | #endif | ||
582 | } | ||
583 | |||
584 | static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) | ||
585 | { | ||
586 | #ifdef CONFIG_SMP | ||
587 | unsigned bits, ecx; | ||
588 | |||
589 | /* Multi core CPU? */ | ||
590 | if (c->extended_cpuid_level < 0x80000008) | ||
591 | return; | ||
592 | |||
593 | ecx = cpuid_ecx(0x80000008); | ||
540 | 594 | ||
541 | c->x86_max_cores = (ecx & 0xff) + 1; | 595 | c->x86_max_cores = (ecx & 0xff) + 1; |
542 | 596 | ||
@@ -549,37 +603,8 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) | |||
549 | bits++; | 603 | bits++; |
550 | } | 604 | } |
551 | 605 | ||
552 | /* Low order bits define the core id (index of core in socket) */ | 606 | c->x86_coreid_bits = bits; |
553 | c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1); | ||
554 | /* Convert the APIC ID into the socket ID */ | ||
555 | c->phys_proc_id = phys_pkg_id(bits); | ||
556 | |||
557 | #ifdef CONFIG_NUMA | ||
558 | node = c->phys_proc_id; | ||
559 | if (apicid_to_node[apicid] != NUMA_NO_NODE) | ||
560 | node = apicid_to_node[apicid]; | ||
561 | if (!node_online(node)) { | ||
562 | /* Two possibilities here: | ||
563 | - The CPU is missing memory and no node was created. | ||
564 | In that case try picking one from a nearby CPU | ||
565 | - The APIC IDs differ from the HyperTransport node IDs | ||
566 | which the K8 northbridge parsing fills in. | ||
567 | Assume they are all increased by a constant offset, | ||
568 | but in the same order as the HT nodeids. | ||
569 | If that doesn't result in a usable node fall back to the | ||
570 | path for the previous case. */ | ||
571 | int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits); | ||
572 | if (ht_nodeid >= 0 && | ||
573 | apicid_to_node[ht_nodeid] != NUMA_NO_NODE) | ||
574 | node = apicid_to_node[ht_nodeid]; | ||
575 | /* Pick a nearby node */ | ||
576 | if (!node_online(node)) | ||
577 | node = nearby_node(apicid); | ||
578 | } | ||
579 | numa_set_node(cpu, node); | ||
580 | 607 | ||
581 | printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); | ||
582 | #endif | ||
583 | #endif | 608 | #endif |
584 | } | 609 | } |
585 | 610 | ||
@@ -595,8 +620,8 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) | |||
595 | /* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ | 620 | /* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ |
596 | static __cpuinit int amd_apic_timer_broken(void) | 621 | static __cpuinit int amd_apic_timer_broken(void) |
597 | { | 622 | { |
598 | u32 lo, hi; | 623 | u32 lo, hi, eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); |
599 | u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); | 624 | |
600 | switch (eax & CPUID_XFAM) { | 625 | switch (eax & CPUID_XFAM) { |
601 | case CPUID_XFAM_K8: | 626 | case CPUID_XFAM_K8: |
602 | if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) | 627 | if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) |
@@ -614,6 +639,15 @@ static __cpuinit int amd_apic_timer_broken(void) | |||
614 | return 0; | 639 | return 0; |
615 | } | 640 | } |
616 | 641 | ||
642 | static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | ||
643 | { | ||
644 | early_init_amd_mc(c); | ||
645 | |||
646 | /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ | ||
647 | if (c->x86_power & (1<<8)) | ||
648 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
649 | } | ||
650 | |||
617 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | 651 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) |
618 | { | 652 | { |
619 | unsigned level; | 653 | unsigned level; |
@@ -624,7 +658,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
624 | /* | 658 | /* |
625 | * Disable TLB flush filter by setting HWCR.FFDIS on K8 | 659 | * Disable TLB flush filter by setting HWCR.FFDIS on K8 |
626 | * bit 6 of msr C001_0015 | 660 | * bit 6 of msr C001_0015 |
627 | * | 661 | * |
628 | * Errata 63 for SH-B3 steppings | 662 | * Errata 63 for SH-B3 steppings |
629 | * Errata 122 for all steppings (F+ have it disabled by default) | 663 | * Errata 122 for all steppings (F+ have it disabled by default) |
630 | */ | 664 | */ |
@@ -637,35 +671,32 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
637 | 671 | ||
638 | /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; | 672 | /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; |
639 | 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ | 673 | 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ |
640 | clear_bit(0*32+31, &c->x86_capability); | 674 | clear_bit(0*32+31, (unsigned long *)&c->x86_capability); |
641 | 675 | ||
642 | /* On C+ stepping K8 rep microcode works well for copy/memset */ | 676 | /* On C+ stepping K8 rep microcode works well for copy/memset */ |
643 | level = cpuid_eax(1); | 677 | level = cpuid_eax(1); |
644 | if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)) | 678 | if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || |
645 | set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); | 679 | level >= 0x0f58)) |
680 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | ||
646 | if (c->x86 == 0x10 || c->x86 == 0x11) | 681 | if (c->x86 == 0x10 || c->x86 == 0x11) |
647 | set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); | 682 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
648 | 683 | ||
649 | /* Enable workaround for FXSAVE leak */ | 684 | /* Enable workaround for FXSAVE leak */ |
650 | if (c->x86 >= 6) | 685 | if (c->x86 >= 6) |
651 | set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability); | 686 | set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); |
652 | 687 | ||
653 | level = get_model_name(c); | 688 | level = get_model_name(c); |
654 | if (!level) { | 689 | if (!level) { |
655 | switch (c->x86) { | 690 | switch (c->x86) { |
656 | case 15: | 691 | case 15: |
657 | /* Should distinguish Models here, but this is only | 692 | /* Should distinguish Models here, but this is only |
658 | a fallback anyways. */ | 693 | a fallback anyways. */ |
659 | strcpy(c->x86_model_id, "Hammer"); | 694 | strcpy(c->x86_model_id, "Hammer"); |
660 | break; | 695 | break; |
661 | } | 696 | } |
662 | } | 697 | } |
663 | display_cacheinfo(c); | 698 | display_cacheinfo(c); |
664 | 699 | ||
665 | /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ | ||
666 | if (c->x86_power & (1<<8)) | ||
667 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); | ||
668 | |||
669 | /* Multi core CPU? */ | 700 | /* Multi core CPU? */ |
670 | if (c->extended_cpuid_level >= 0x80000008) | 701 | if (c->extended_cpuid_level >= 0x80000008) |
671 | amd_detect_cmp(c); | 702 | amd_detect_cmp(c); |
@@ -677,41 +708,38 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
677 | num_cache_leaves = 3; | 708 | num_cache_leaves = 3; |
678 | 709 | ||
679 | if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11) | 710 | if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11) |
680 | set_bit(X86_FEATURE_K8, &c->x86_capability); | 711 | set_cpu_cap(c, X86_FEATURE_K8); |
681 | |||
682 | /* RDTSC can be speculated around */ | ||
683 | clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | ||
684 | 712 | ||
685 | /* Family 10 doesn't support C states in MWAIT so don't use it */ | 713 | /* MFENCE stops RDTSC speculation */ |
686 | if (c->x86 == 0x10 && !force_mwait) | 714 | set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); |
687 | clear_bit(X86_FEATURE_MWAIT, &c->x86_capability); | ||
688 | 715 | ||
689 | if (amd_apic_timer_broken()) | 716 | if (amd_apic_timer_broken()) |
690 | disable_apic_timer = 1; | 717 | disable_apic_timer = 1; |
691 | } | 718 | } |
692 | 719 | ||
693 | static void __cpuinit detect_ht(struct cpuinfo_x86 *c) | 720 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) |
694 | { | 721 | { |
695 | #ifdef CONFIG_SMP | 722 | #ifdef CONFIG_SMP |
696 | u32 eax, ebx, ecx, edx; | 723 | u32 eax, ebx, ecx, edx; |
697 | int index_msb, core_bits; | 724 | int index_msb, core_bits; |
698 | 725 | ||
699 | cpuid(1, &eax, &ebx, &ecx, &edx); | 726 | cpuid(1, &eax, &ebx, &ecx, &edx); |
700 | 727 | ||
701 | 728 | ||
702 | if (!cpu_has(c, X86_FEATURE_HT)) | 729 | if (!cpu_has(c, X86_FEATURE_HT)) |
703 | return; | 730 | return; |
704 | if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) | 731 | if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) |
705 | goto out; | 732 | goto out; |
706 | 733 | ||
707 | smp_num_siblings = (ebx & 0xff0000) >> 16; | 734 | smp_num_siblings = (ebx & 0xff0000) >> 16; |
708 | 735 | ||
709 | if (smp_num_siblings == 1) { | 736 | if (smp_num_siblings == 1) { |
710 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); | 737 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); |
711 | } else if (smp_num_siblings > 1 ) { | 738 | } else if (smp_num_siblings > 1) { |
712 | 739 | ||
713 | if (smp_num_siblings > NR_CPUS) { | 740 | if (smp_num_siblings > NR_CPUS) { |
714 | printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); | 741 | printk(KERN_WARNING "CPU: Unsupported number of " |
742 | "siblings %d", smp_num_siblings); | ||
715 | smp_num_siblings = 1; | 743 | smp_num_siblings = 1; |
716 | return; | 744 | return; |
717 | } | 745 | } |
@@ -721,7 +749,7 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
721 | 749 | ||
722 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; | 750 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; |
723 | 751 | ||
724 | index_msb = get_count_order(smp_num_siblings) ; | 752 | index_msb = get_count_order(smp_num_siblings); |
725 | 753 | ||
726 | core_bits = get_count_order(c->x86_max_cores); | 754 | core_bits = get_count_order(c->x86_max_cores); |
727 | 755 | ||
@@ -730,8 +758,10 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
730 | } | 758 | } |
731 | out: | 759 | out: |
732 | if ((c->x86_max_cores * smp_num_siblings) > 1) { | 760 | if ((c->x86_max_cores * smp_num_siblings) > 1) { |
733 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", c->phys_proc_id); | 761 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", |
734 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id); | 762 | c->phys_proc_id); |
763 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", | ||
764 | c->cpu_core_id); | ||
735 | } | 765 | } |
736 | 766 | ||
737 | #endif | 767 | #endif |
@@ -773,28 +803,39 @@ static void srat_detect_node(void) | |||
773 | #endif | 803 | #endif |
774 | } | 804 | } |
775 | 805 | ||
806 | static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | ||
807 | { | ||
808 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || | ||
809 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) | ||
810 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); | ||
811 | } | ||
812 | |||
776 | static void __cpuinit init_intel(struct cpuinfo_x86 *c) | 813 | static void __cpuinit init_intel(struct cpuinfo_x86 *c) |
777 | { | 814 | { |
778 | /* Cache sizes */ | 815 | /* Cache sizes */ |
779 | unsigned n; | 816 | unsigned n; |
780 | 817 | ||
781 | init_intel_cacheinfo(c); | 818 | init_intel_cacheinfo(c); |
782 | if (c->cpuid_level > 9 ) { | 819 | if (c->cpuid_level > 9) { |
783 | unsigned eax = cpuid_eax(10); | 820 | unsigned eax = cpuid_eax(10); |
784 | /* Check for version and the number of counters */ | 821 | /* Check for version and the number of counters */ |
785 | if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) | 822 | if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) |
786 | set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability); | 823 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); |
787 | } | 824 | } |
788 | 825 | ||
789 | if (cpu_has_ds) { | 826 | if (cpu_has_ds) { |
790 | unsigned int l1, l2; | 827 | unsigned int l1, l2; |
791 | rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); | 828 | rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); |
792 | if (!(l1 & (1<<11))) | 829 | if (!(l1 & (1<<11))) |
793 | set_bit(X86_FEATURE_BTS, c->x86_capability); | 830 | set_cpu_cap(c, X86_FEATURE_BTS); |
794 | if (!(l1 & (1<<12))) | 831 | if (!(l1 & (1<<12))) |
795 | set_bit(X86_FEATURE_PEBS, c->x86_capability); | 832 | set_cpu_cap(c, X86_FEATURE_PEBS); |
796 | } | 833 | } |
797 | 834 | ||
835 | |||
836 | if (cpu_has_bts) | ||
837 | ds_init_intel(c); | ||
838 | |||
798 | n = c->extended_cpuid_level; | 839 | n = c->extended_cpuid_level; |
799 | if (n >= 0x80000008) { | 840 | if (n >= 0x80000008) { |
800 | unsigned eax = cpuid_eax(0x80000008); | 841 | unsigned eax = cpuid_eax(0x80000008); |
@@ -811,14 +852,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
811 | c->x86_cache_alignment = c->x86_clflush_size * 2; | 852 | c->x86_cache_alignment = c->x86_clflush_size * 2; |
812 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || | 853 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || |
813 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) | 854 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) |
814 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); | 855 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
815 | if (c->x86 == 6) | 856 | if (c->x86 == 6) |
816 | set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); | 857 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
817 | if (c->x86 == 15) | 858 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); |
818 | set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | 859 | c->x86_max_cores = intel_num_cpu_cores(c); |
819 | else | ||
820 | clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | ||
821 | c->x86_max_cores = intel_num_cpu_cores(c); | ||
822 | 860 | ||
823 | srat_detect_node(); | 861 | srat_detect_node(); |
824 | } | 862 | } |
@@ -835,18 +873,12 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) | |||
835 | c->x86_vendor = X86_VENDOR_UNKNOWN; | 873 | c->x86_vendor = X86_VENDOR_UNKNOWN; |
836 | } | 874 | } |
837 | 875 | ||
838 | struct cpu_model_info { | ||
839 | int vendor; | ||
840 | int family; | ||
841 | char *model_names[16]; | ||
842 | }; | ||
843 | |||
844 | /* Do some early cpuid on the boot CPU to get some parameter that are | 876 | /* Do some early cpuid on the boot CPU to get some parameter that are |
845 | needed before check_bugs. Everything advanced is in identify_cpu | 877 | needed before check_bugs. Everything advanced is in identify_cpu |
846 | below. */ | 878 | below. */ |
847 | void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | 879 | static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) |
848 | { | 880 | { |
849 | u32 tfms; | 881 | u32 tfms, xlvl; |
850 | 882 | ||
851 | c->loops_per_jiffy = loops_per_jiffy; | 883 | c->loops_per_jiffy = loops_per_jiffy; |
852 | c->x86_cache_size = -1; | 884 | c->x86_cache_size = -1; |
@@ -857,6 +889,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |||
857 | c->x86_clflush_size = 64; | 889 | c->x86_clflush_size = 64; |
858 | c->x86_cache_alignment = c->x86_clflush_size; | 890 | c->x86_cache_alignment = c->x86_clflush_size; |
859 | c->x86_max_cores = 1; | 891 | c->x86_max_cores = 1; |
892 | c->x86_coreid_bits = 0; | ||
860 | c->extended_cpuid_level = 0; | 893 | c->extended_cpuid_level = 0; |
861 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | 894 | memset(&c->x86_capability, 0, sizeof c->x86_capability); |
862 | 895 | ||
@@ -865,7 +898,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |||
865 | (unsigned int *)&c->x86_vendor_id[0], | 898 | (unsigned int *)&c->x86_vendor_id[0], |
866 | (unsigned int *)&c->x86_vendor_id[8], | 899 | (unsigned int *)&c->x86_vendor_id[8], |
867 | (unsigned int *)&c->x86_vendor_id[4]); | 900 | (unsigned int *)&c->x86_vendor_id[4]); |
868 | 901 | ||
869 | get_cpu_vendor(c); | 902 | get_cpu_vendor(c); |
870 | 903 | ||
871 | /* Initialize the standard set of capabilities */ | 904 | /* Initialize the standard set of capabilities */ |
@@ -883,7 +916,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |||
883 | c->x86 += (tfms >> 20) & 0xff; | 916 | c->x86 += (tfms >> 20) & 0xff; |
884 | if (c->x86 >= 0x6) | 917 | if (c->x86 >= 0x6) |
885 | c->x86_model += ((tfms >> 16) & 0xF) << 4; | 918 | c->x86_model += ((tfms >> 16) & 0xF) << 4; |
886 | if (c->x86_capability[0] & (1<<19)) | 919 | if (c->x86_capability[0] & (1<<19)) |
887 | c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; | 920 | c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; |
888 | } else { | 921 | } else { |
889 | /* Have CPUID level 0 only - unheard of */ | 922 | /* Have CPUID level 0 only - unheard of */ |
@@ -893,18 +926,6 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |||
893 | #ifdef CONFIG_SMP | 926 | #ifdef CONFIG_SMP |
894 | c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; | 927 | c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; |
895 | #endif | 928 | #endif |
896 | } | ||
897 | |||
898 | /* | ||
899 | * This does the hard work of actually picking apart the CPU stuff... | ||
900 | */ | ||
901 | void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | ||
902 | { | ||
903 | int i; | ||
904 | u32 xlvl; | ||
905 | |||
906 | early_identify_cpu(c); | ||
907 | |||
908 | /* AMD-defined flags: level 0x80000001 */ | 929 | /* AMD-defined flags: level 0x80000001 */ |
909 | xlvl = cpuid_eax(0x80000000); | 930 | xlvl = cpuid_eax(0x80000000); |
910 | c->extended_cpuid_level = xlvl; | 931 | c->extended_cpuid_level = xlvl; |
@@ -925,6 +946,30 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
925 | c->x86_capability[2] = cpuid_edx(0x80860001); | 946 | c->x86_capability[2] = cpuid_edx(0x80860001); |
926 | } | 947 | } |
927 | 948 | ||
949 | c->extended_cpuid_level = cpuid_eax(0x80000000); | ||
950 | if (c->extended_cpuid_level >= 0x80000007) | ||
951 | c->x86_power = cpuid_edx(0x80000007); | ||
952 | |||
953 | switch (c->x86_vendor) { | ||
954 | case X86_VENDOR_AMD: | ||
955 | early_init_amd(c); | ||
956 | break; | ||
957 | case X86_VENDOR_INTEL: | ||
958 | early_init_intel(c); | ||
959 | break; | ||
960 | } | ||
961 | |||
962 | } | ||
963 | |||
964 | /* | ||
965 | * This does the hard work of actually picking apart the CPU stuff... | ||
966 | */ | ||
967 | void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | ||
968 | { | ||
969 | int i; | ||
970 | |||
971 | early_identify_cpu(c); | ||
972 | |||
928 | init_scattered_cpuid_features(c); | 973 | init_scattered_cpuid_features(c); |
929 | 974 | ||
930 | c->apicid = phys_pkg_id(0); | 975 | c->apicid = phys_pkg_id(0); |
@@ -954,8 +999,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
954 | break; | 999 | break; |
955 | } | 1000 | } |
956 | 1001 | ||
957 | select_idle_routine(c); | 1002 | detect_ht(c); |
958 | detect_ht(c); | ||
959 | 1003 | ||
960 | /* | 1004 | /* |
961 | * On SMP, boot_cpu_data holds the common feature set between | 1005 | * On SMP, boot_cpu_data holds the common feature set between |
@@ -965,31 +1009,55 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
965 | */ | 1009 | */ |
966 | if (c != &boot_cpu_data) { | 1010 | if (c != &boot_cpu_data) { |
967 | /* AND the already accumulated flags with these */ | 1011 | /* AND the already accumulated flags with these */ |
968 | for (i = 0 ; i < NCAPINTS ; i++) | 1012 | for (i = 0; i < NCAPINTS; i++) |
969 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; | 1013 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; |
970 | } | 1014 | } |
971 | 1015 | ||
1016 | /* Clear all flags overriden by options */ | ||
1017 | for (i = 0; i < NCAPINTS; i++) | ||
1018 | c->x86_capability[i] ^= cleared_cpu_caps[i]; | ||
1019 | |||
972 | #ifdef CONFIG_X86_MCE | 1020 | #ifdef CONFIG_X86_MCE |
973 | mcheck_init(c); | 1021 | mcheck_init(c); |
974 | #endif | 1022 | #endif |
1023 | select_idle_routine(c); | ||
1024 | |||
975 | if (c != &boot_cpu_data) | 1025 | if (c != &boot_cpu_data) |
976 | mtrr_ap_init(); | 1026 | mtrr_ap_init(); |
977 | #ifdef CONFIG_NUMA | 1027 | #ifdef CONFIG_NUMA |
978 | numa_add_cpu(smp_processor_id()); | 1028 | numa_add_cpu(smp_processor_id()); |
979 | #endif | 1029 | #endif |
1030 | |||
980 | } | 1031 | } |
981 | 1032 | ||
1033 | static __init int setup_noclflush(char *arg) | ||
1034 | { | ||
1035 | setup_clear_cpu_cap(X86_FEATURE_CLFLSH); | ||
1036 | return 1; | ||
1037 | } | ||
1038 | __setup("noclflush", setup_noclflush); | ||
982 | 1039 | ||
983 | void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) | 1040 | void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) |
984 | { | 1041 | { |
985 | if (c->x86_model_id[0]) | 1042 | if (c->x86_model_id[0]) |
986 | printk("%s", c->x86_model_id); | 1043 | printk(KERN_INFO "%s", c->x86_model_id); |
987 | 1044 | ||
988 | if (c->x86_mask || c->cpuid_level >= 0) | 1045 | if (c->x86_mask || c->cpuid_level >= 0) |
989 | printk(" stepping %02x\n", c->x86_mask); | 1046 | printk(KERN_CONT " stepping %02x\n", c->x86_mask); |
990 | else | 1047 | else |
991 | printk("\n"); | 1048 | printk(KERN_CONT "\n"); |
1049 | } | ||
1050 | |||
1051 | static __init int setup_disablecpuid(char *arg) | ||
1052 | { | ||
1053 | int bit; | ||
1054 | if (get_option(&arg, &bit) && bit < NCAPINTS*32) | ||
1055 | setup_clear_cpu_cap(bit); | ||
1056 | else | ||
1057 | return 0; | ||
1058 | return 1; | ||
992 | } | 1059 | } |
1060 | __setup("clearcpuid=", setup_disablecpuid); | ||
993 | 1061 | ||
994 | /* | 1062 | /* |
995 | * Get CPU information for use by the procfs. | 1063 | * Get CPU information for use by the procfs. |
@@ -998,116 +1066,41 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) | |||
998 | static int show_cpuinfo(struct seq_file *m, void *v) | 1066 | static int show_cpuinfo(struct seq_file *m, void *v) |
999 | { | 1067 | { |
1000 | struct cpuinfo_x86 *c = v; | 1068 | struct cpuinfo_x86 *c = v; |
1001 | int cpu = 0; | 1069 | int cpu = 0, i; |
1002 | |||
1003 | /* | ||
1004 | * These flag bits must match the definitions in <asm/cpufeature.h>. | ||
1005 | * NULL means this bit is undefined or reserved; either way it doesn't | ||
1006 | * have meaning as far as Linux is concerned. Note that it's important | ||
1007 | * to realize there is a difference between this table and CPUID -- if | ||
1008 | * applications want to get the raw CPUID data, they should access | ||
1009 | * /dev/cpu/<cpu_nr>/cpuid instead. | ||
1010 | */ | ||
1011 | static const char *const x86_cap_flags[] = { | ||
1012 | /* Intel-defined */ | ||
1013 | "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", | ||
1014 | "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", | ||
1015 | "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", | ||
1016 | "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", | ||
1017 | |||
1018 | /* AMD-defined */ | ||
1019 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
1020 | NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, | ||
1021 | NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL, | ||
1022 | NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", | ||
1023 | "3dnowext", "3dnow", | ||
1024 | |||
1025 | /* Transmeta-defined */ | ||
1026 | "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, | ||
1027 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
1028 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
1029 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
1030 | |||
1031 | /* Other (Linux-defined) */ | ||
1032 | "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", | ||
1033 | NULL, NULL, NULL, NULL, | ||
1034 | "constant_tsc", "up", NULL, "arch_perfmon", | ||
1035 | "pebs", "bts", NULL, "sync_rdtsc", | ||
1036 | "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
1037 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
1038 | |||
1039 | /* Intel-defined (#2) */ | ||
1040 | "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", | ||
1041 | "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, | ||
1042 | NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", | ||
1043 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
1044 | |||
1045 | /* VIA/Cyrix/Centaur-defined */ | ||
1046 | NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", | ||
1047 | "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, | ||
1048 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
1049 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
1050 | |||
1051 | /* AMD-defined (#2) */ | ||
1052 | "lahf_lm", "cmp_legacy", "svm", "extapic", | ||
1053 | "cr8_legacy", "abm", "sse4a", "misalignsse", | ||
1054 | "3dnowprefetch", "osvw", "ibs", "sse5", | ||
1055 | "skinit", "wdt", NULL, NULL, | ||
1056 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
1057 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
1058 | |||
1059 | /* Auxiliary (Linux-defined) */ | ||
1060 | "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
1061 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
1062 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
1063 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
1064 | }; | ||
1065 | static const char *const x86_power_flags[] = { | ||
1066 | "ts", /* temperature sensor */ | ||
1067 | "fid", /* frequency id control */ | ||
1068 | "vid", /* voltage id control */ | ||
1069 | "ttp", /* thermal trip */ | ||
1070 | "tm", | ||
1071 | "stc", | ||
1072 | "100mhzsteps", | ||
1073 | "hwpstate", | ||
1074 | "", /* tsc invariant mapped to constant_tsc */ | ||
1075 | /* nothing */ | ||
1076 | }; | ||
1077 | |||
1078 | 1070 | ||
1079 | #ifdef CONFIG_SMP | 1071 | #ifdef CONFIG_SMP |
1080 | cpu = c->cpu_index; | 1072 | cpu = c->cpu_index; |
1081 | #endif | 1073 | #endif |
1082 | 1074 | ||
1083 | seq_printf(m,"processor\t: %u\n" | 1075 | seq_printf(m, "processor\t: %u\n" |
1084 | "vendor_id\t: %s\n" | 1076 | "vendor_id\t: %s\n" |
1085 | "cpu family\t: %d\n" | 1077 | "cpu family\t: %d\n" |
1086 | "model\t\t: %d\n" | 1078 | "model\t\t: %d\n" |
1087 | "model name\t: %s\n", | 1079 | "model name\t: %s\n", |
1088 | (unsigned)cpu, | 1080 | (unsigned)cpu, |
1089 | c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", | 1081 | c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", |
1090 | c->x86, | 1082 | c->x86, |
1091 | (int)c->x86_model, | 1083 | (int)c->x86_model, |
1092 | c->x86_model_id[0] ? c->x86_model_id : "unknown"); | 1084 | c->x86_model_id[0] ? c->x86_model_id : "unknown"); |
1093 | 1085 | ||
1094 | if (c->x86_mask || c->cpuid_level >= 0) | 1086 | if (c->x86_mask || c->cpuid_level >= 0) |
1095 | seq_printf(m, "stepping\t: %d\n", c->x86_mask); | 1087 | seq_printf(m, "stepping\t: %d\n", c->x86_mask); |
1096 | else | 1088 | else |
1097 | seq_printf(m, "stepping\t: unknown\n"); | 1089 | seq_printf(m, "stepping\t: unknown\n"); |
1098 | 1090 | ||
1099 | if (cpu_has(c,X86_FEATURE_TSC)) { | 1091 | if (cpu_has(c, X86_FEATURE_TSC)) { |
1100 | unsigned int freq = cpufreq_quick_get((unsigned)cpu); | 1092 | unsigned int freq = cpufreq_quick_get((unsigned)cpu); |
1093 | |||
1101 | if (!freq) | 1094 | if (!freq) |
1102 | freq = cpu_khz; | 1095 | freq = cpu_khz; |
1103 | seq_printf(m, "cpu MHz\t\t: %u.%03u\n", | 1096 | seq_printf(m, "cpu MHz\t\t: %u.%03u\n", |
1104 | freq / 1000, (freq % 1000)); | 1097 | freq / 1000, (freq % 1000)); |
1105 | } | 1098 | } |
1106 | 1099 | ||
1107 | /* Cache size */ | 1100 | /* Cache size */ |
1108 | if (c->x86_cache_size >= 0) | 1101 | if (c->x86_cache_size >= 0) |
1109 | seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); | 1102 | seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); |
1110 | 1103 | ||
1111 | #ifdef CONFIG_SMP | 1104 | #ifdef CONFIG_SMP |
1112 | if (smp_num_siblings * c->x86_max_cores > 1) { | 1105 | if (smp_num_siblings * c->x86_max_cores > 1) { |
1113 | seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); | 1106 | seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); |
@@ -1116,48 +1109,43 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
1116 | seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); | 1109 | seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); |
1117 | seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); | 1110 | seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); |
1118 | } | 1111 | } |
1119 | #endif | 1112 | #endif |
1120 | 1113 | ||
1121 | seq_printf(m, | 1114 | seq_printf(m, |
1122 | "fpu\t\t: yes\n" | 1115 | "fpu\t\t: yes\n" |
1123 | "fpu_exception\t: yes\n" | 1116 | "fpu_exception\t: yes\n" |
1124 | "cpuid level\t: %d\n" | 1117 | "cpuid level\t: %d\n" |
1125 | "wp\t\t: yes\n" | 1118 | "wp\t\t: yes\n" |
1126 | "flags\t\t:", | 1119 | "flags\t\t:", |
1127 | c->cpuid_level); | 1120 | c->cpuid_level); |
1128 | 1121 | ||
1129 | { | 1122 | for (i = 0; i < 32*NCAPINTS; i++) |
1130 | int i; | 1123 | if (cpu_has(c, i) && x86_cap_flags[i] != NULL) |
1131 | for ( i = 0 ; i < 32*NCAPINTS ; i++ ) | 1124 | seq_printf(m, " %s", x86_cap_flags[i]); |
1132 | if (cpu_has(c, i) && x86_cap_flags[i] != NULL) | 1125 | |
1133 | seq_printf(m, " %s", x86_cap_flags[i]); | ||
1134 | } | ||
1135 | |||
1136 | seq_printf(m, "\nbogomips\t: %lu.%02lu\n", | 1126 | seq_printf(m, "\nbogomips\t: %lu.%02lu\n", |
1137 | c->loops_per_jiffy/(500000/HZ), | 1127 | c->loops_per_jiffy/(500000/HZ), |
1138 | (c->loops_per_jiffy/(5000/HZ)) % 100); | 1128 | (c->loops_per_jiffy/(5000/HZ)) % 100); |
1139 | 1129 | ||
1140 | if (c->x86_tlbsize > 0) | 1130 | if (c->x86_tlbsize > 0) |
1141 | seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize); | 1131 | seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize); |
1142 | seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size); | 1132 | seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size); |
1143 | seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment); | 1133 | seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment); |
1144 | 1134 | ||
1145 | seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", | 1135 | seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", |
1146 | c->x86_phys_bits, c->x86_virt_bits); | 1136 | c->x86_phys_bits, c->x86_virt_bits); |
1147 | 1137 | ||
1148 | seq_printf(m, "power management:"); | 1138 | seq_printf(m, "power management:"); |
1149 | { | 1139 | for (i = 0; i < 32; i++) { |
1150 | unsigned i; | 1140 | if (c->x86_power & (1 << i)) { |
1151 | for (i = 0; i < 32; i++) | 1141 | if (i < ARRAY_SIZE(x86_power_flags) && |
1152 | if (c->x86_power & (1 << i)) { | 1142 | x86_power_flags[i]) |
1153 | if (i < ARRAY_SIZE(x86_power_flags) && | 1143 | seq_printf(m, "%s%s", |
1154 | x86_power_flags[i]) | 1144 | x86_power_flags[i][0]?" ":"", |
1155 | seq_printf(m, "%s%s", | 1145 | x86_power_flags[i]); |
1156 | x86_power_flags[i][0]?" ":"", | 1146 | else |
1157 | x86_power_flags[i]); | 1147 | seq_printf(m, " [%d]", i); |
1158 | else | 1148 | } |
1159 | seq_printf(m, " [%d]", i); | ||
1160 | } | ||
1161 | } | 1149 | } |
1162 | 1150 | ||
1163 | seq_printf(m, "\n\n"); | 1151 | seq_printf(m, "\n\n"); |
@@ -1184,8 +1172,8 @@ static void c_stop(struct seq_file *m, void *v) | |||
1184 | { | 1172 | { |
1185 | } | 1173 | } |
1186 | 1174 | ||
1187 | struct seq_operations cpuinfo_op = { | 1175 | const struct seq_operations cpuinfo_op = { |
1188 | .start =c_start, | 1176 | .start = c_start, |
1189 | .next = c_next, | 1177 | .next = c_next, |
1190 | .stop = c_stop, | 1178 | .stop = c_stop, |
1191 | .show = show_cpuinfo, | 1179 | .show = show_cpuinfo, |
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 9bdd83022f5f..caee1f002fed 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <asm/ucontext.h> | 23 | #include <asm/ucontext.h> |
24 | #include <asm/uaccess.h> | 24 | #include <asm/uaccess.h> |
25 | #include <asm/i387.h> | 25 | #include <asm/i387.h> |
26 | #include <asm/vdso.h> | ||
26 | #include "sigframe_32.h" | 27 | #include "sigframe_32.h" |
27 | 28 | ||
28 | #define DEBUG_SIG 0 | 29 | #define DEBUG_SIG 0 |
@@ -81,14 +82,14 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, | |||
81 | } | 82 | } |
82 | 83 | ||
83 | asmlinkage int | 84 | asmlinkage int |
84 | sys_sigaltstack(unsigned long ebx) | 85 | sys_sigaltstack(unsigned long bx) |
85 | { | 86 | { |
86 | /* This is needed to make gcc realize it doesn't own the "struct pt_regs" */ | 87 | /* This is needed to make gcc realize it doesn't own the "struct pt_regs" */ |
87 | struct pt_regs *regs = (struct pt_regs *)&ebx; | 88 | struct pt_regs *regs = (struct pt_regs *)&bx; |
88 | const stack_t __user *uss = (const stack_t __user *)ebx; | 89 | const stack_t __user *uss = (const stack_t __user *)bx; |
89 | stack_t __user *uoss = (stack_t __user *)regs->ecx; | 90 | stack_t __user *uoss = (stack_t __user *)regs->cx; |
90 | 91 | ||
91 | return do_sigaltstack(uss, uoss, regs->esp); | 92 | return do_sigaltstack(uss, uoss, regs->sp); |
92 | } | 93 | } |
93 | 94 | ||
94 | 95 | ||
@@ -109,12 +110,12 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax | |||
109 | #define COPY_SEG(seg) \ | 110 | #define COPY_SEG(seg) \ |
110 | { unsigned short tmp; \ | 111 | { unsigned short tmp; \ |
111 | err |= __get_user(tmp, &sc->seg); \ | 112 | err |= __get_user(tmp, &sc->seg); \ |
112 | regs->x##seg = tmp; } | 113 | regs->seg = tmp; } |
113 | 114 | ||
114 | #define COPY_SEG_STRICT(seg) \ | 115 | #define COPY_SEG_STRICT(seg) \ |
115 | { unsigned short tmp; \ | 116 | { unsigned short tmp; \ |
116 | err |= __get_user(tmp, &sc->seg); \ | 117 | err |= __get_user(tmp, &sc->seg); \ |
117 | regs->x##seg = tmp|3; } | 118 | regs->seg = tmp|3; } |
118 | 119 | ||
119 | #define GET_SEG(seg) \ | 120 | #define GET_SEG(seg) \ |
120 | { unsigned short tmp; \ | 121 | { unsigned short tmp; \ |
@@ -130,22 +131,22 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax | |||
130 | COPY_SEG(fs); | 131 | COPY_SEG(fs); |
131 | COPY_SEG(es); | 132 | COPY_SEG(es); |
132 | COPY_SEG(ds); | 133 | COPY_SEG(ds); |
133 | COPY(edi); | 134 | COPY(di); |
134 | COPY(esi); | 135 | COPY(si); |
135 | COPY(ebp); | 136 | COPY(bp); |
136 | COPY(esp); | 137 | COPY(sp); |
137 | COPY(ebx); | 138 | COPY(bx); |
138 | COPY(edx); | 139 | COPY(dx); |
139 | COPY(ecx); | 140 | COPY(cx); |
140 | COPY(eip); | 141 | COPY(ip); |
141 | COPY_SEG_STRICT(cs); | 142 | COPY_SEG_STRICT(cs); |
142 | COPY_SEG_STRICT(ss); | 143 | COPY_SEG_STRICT(ss); |
143 | 144 | ||
144 | { | 145 | { |
145 | unsigned int tmpflags; | 146 | unsigned int tmpflags; |
146 | err |= __get_user(tmpflags, &sc->eflags); | 147 | err |= __get_user(tmpflags, &sc->flags); |
147 | regs->eflags = (regs->eflags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); | 148 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); |
148 | regs->orig_eax = -1; /* disable syscall checks */ | 149 | regs->orig_ax = -1; /* disable syscall checks */ |
149 | } | 150 | } |
150 | 151 | ||
151 | { | 152 | { |
@@ -164,7 +165,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax | |||
164 | } | 165 | } |
165 | } | 166 | } |
166 | 167 | ||
167 | err |= __get_user(*peax, &sc->eax); | 168 | err |= __get_user(*peax, &sc->ax); |
168 | return err; | 169 | return err; |
169 | 170 | ||
170 | badframe: | 171 | badframe: |
@@ -174,9 +175,9 @@ badframe: | |||
174 | asmlinkage int sys_sigreturn(unsigned long __unused) | 175 | asmlinkage int sys_sigreturn(unsigned long __unused) |
175 | { | 176 | { |
176 | struct pt_regs *regs = (struct pt_regs *) &__unused; | 177 | struct pt_regs *regs = (struct pt_regs *) &__unused; |
177 | struct sigframe __user *frame = (struct sigframe __user *)(regs->esp - 8); | 178 | struct sigframe __user *frame = (struct sigframe __user *)(regs->sp - 8); |
178 | sigset_t set; | 179 | sigset_t set; |
179 | int eax; | 180 | int ax; |
180 | 181 | ||
181 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | 182 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) |
182 | goto badframe; | 183 | goto badframe; |
@@ -192,17 +193,20 @@ asmlinkage int sys_sigreturn(unsigned long __unused) | |||
192 | recalc_sigpending(); | 193 | recalc_sigpending(); |
193 | spin_unlock_irq(¤t->sighand->siglock); | 194 | spin_unlock_irq(¤t->sighand->siglock); |
194 | 195 | ||
195 | if (restore_sigcontext(regs, &frame->sc, &eax)) | 196 | if (restore_sigcontext(regs, &frame->sc, &ax)) |
196 | goto badframe; | 197 | goto badframe; |
197 | return eax; | 198 | return ax; |
198 | 199 | ||
199 | badframe: | 200 | badframe: |
200 | if (show_unhandled_signals && printk_ratelimit()) | 201 | if (show_unhandled_signals && printk_ratelimit()) { |
201 | printk("%s%s[%d] bad frame in sigreturn frame:%p eip:%lx" | 202 | printk("%s%s[%d] bad frame in sigreturn frame:%p ip:%lx" |
202 | " esp:%lx oeax:%lx\n", | 203 | " sp:%lx oeax:%lx", |
203 | task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, | 204 | task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, |
204 | current->comm, task_pid_nr(current), frame, regs->eip, | 205 | current->comm, task_pid_nr(current), frame, regs->ip, |
205 | regs->esp, regs->orig_eax); | 206 | regs->sp, regs->orig_ax); |
207 | print_vma_addr(" in ", regs->ip); | ||
208 | printk("\n"); | ||
209 | } | ||
206 | 210 | ||
207 | force_sig(SIGSEGV, current); | 211 | force_sig(SIGSEGV, current); |
208 | return 0; | 212 | return 0; |
@@ -211,9 +215,9 @@ badframe: | |||
211 | asmlinkage int sys_rt_sigreturn(unsigned long __unused) | 215 | asmlinkage int sys_rt_sigreturn(unsigned long __unused) |
212 | { | 216 | { |
213 | struct pt_regs *regs = (struct pt_regs *) &__unused; | 217 | struct pt_regs *regs = (struct pt_regs *) &__unused; |
214 | struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->esp - 4); | 218 | struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->sp - 4); |
215 | sigset_t set; | 219 | sigset_t set; |
216 | int eax; | 220 | int ax; |
217 | 221 | ||
218 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | 222 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) |
219 | goto badframe; | 223 | goto badframe; |
@@ -226,13 +230,13 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused) | |||
226 | recalc_sigpending(); | 230 | recalc_sigpending(); |
227 | spin_unlock_irq(¤t->sighand->siglock); | 231 | spin_unlock_irq(¤t->sighand->siglock); |
228 | 232 | ||
229 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax)) | 233 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) |
230 | goto badframe; | 234 | goto badframe; |
231 | 235 | ||
232 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->esp) == -EFAULT) | 236 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) |
233 | goto badframe; | 237 | goto badframe; |
234 | 238 | ||
235 | return eax; | 239 | return ax; |
236 | 240 | ||
237 | badframe: | 241 | badframe: |
238 | force_sig(SIGSEGV, current); | 242 | force_sig(SIGSEGV, current); |
@@ -249,27 +253,27 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, | |||
249 | { | 253 | { |
250 | int tmp, err = 0; | 254 | int tmp, err = 0; |
251 | 255 | ||
252 | err |= __put_user(regs->xfs, (unsigned int __user *)&sc->fs); | 256 | err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs); |
253 | savesegment(gs, tmp); | 257 | savesegment(gs, tmp); |
254 | err |= __put_user(tmp, (unsigned int __user *)&sc->gs); | 258 | err |= __put_user(tmp, (unsigned int __user *)&sc->gs); |
255 | 259 | ||
256 | err |= __put_user(regs->xes, (unsigned int __user *)&sc->es); | 260 | err |= __put_user(regs->es, (unsigned int __user *)&sc->es); |
257 | err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds); | 261 | err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds); |
258 | err |= __put_user(regs->edi, &sc->edi); | 262 | err |= __put_user(regs->di, &sc->di); |
259 | err |= __put_user(regs->esi, &sc->esi); | 263 | err |= __put_user(regs->si, &sc->si); |
260 | err |= __put_user(regs->ebp, &sc->ebp); | 264 | err |= __put_user(regs->bp, &sc->bp); |
261 | err |= __put_user(regs->esp, &sc->esp); | 265 | err |= __put_user(regs->sp, &sc->sp); |
262 | err |= __put_user(regs->ebx, &sc->ebx); | 266 | err |= __put_user(regs->bx, &sc->bx); |
263 | err |= __put_user(regs->edx, &sc->edx); | 267 | err |= __put_user(regs->dx, &sc->dx); |
264 | err |= __put_user(regs->ecx, &sc->ecx); | 268 | err |= __put_user(regs->cx, &sc->cx); |
265 | err |= __put_user(regs->eax, &sc->eax); | 269 | err |= __put_user(regs->ax, &sc->ax); |
266 | err |= __put_user(current->thread.trap_no, &sc->trapno); | 270 | err |= __put_user(current->thread.trap_no, &sc->trapno); |
267 | err |= __put_user(current->thread.error_code, &sc->err); | 271 | err |= __put_user(current->thread.error_code, &sc->err); |
268 | err |= __put_user(regs->eip, &sc->eip); | 272 | err |= __put_user(regs->ip, &sc->ip); |
269 | err |= __put_user(regs->xcs, (unsigned int __user *)&sc->cs); | 273 | err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); |
270 | err |= __put_user(regs->eflags, &sc->eflags); | 274 | err |= __put_user(regs->flags, &sc->flags); |
271 | err |= __put_user(regs->esp, &sc->esp_at_signal); | 275 | err |= __put_user(regs->sp, &sc->sp_at_signal); |
272 | err |= __put_user(regs->xss, (unsigned int __user *)&sc->ss); | 276 | err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); |
273 | 277 | ||
274 | tmp = save_i387(fpstate); | 278 | tmp = save_i387(fpstate); |
275 | if (tmp < 0) | 279 | if (tmp < 0) |
@@ -290,29 +294,36 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, | |||
290 | static inline void __user * | 294 | static inline void __user * |
291 | get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) | 295 | get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) |
292 | { | 296 | { |
293 | unsigned long esp; | 297 | unsigned long sp; |
294 | 298 | ||
295 | /* Default to using normal stack */ | 299 | /* Default to using normal stack */ |
296 | esp = regs->esp; | 300 | sp = regs->sp; |
301 | |||
302 | /* | ||
303 | * If we are on the alternate signal stack and would overflow it, don't. | ||
304 | * Return an always-bogus address instead so we will die with SIGSEGV. | ||
305 | */ | ||
306 | if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size))) | ||
307 | return (void __user *) -1L; | ||
297 | 308 | ||
298 | /* This is the X/Open sanctioned signal stack switching. */ | 309 | /* This is the X/Open sanctioned signal stack switching. */ |
299 | if (ka->sa.sa_flags & SA_ONSTACK) { | 310 | if (ka->sa.sa_flags & SA_ONSTACK) { |
300 | if (sas_ss_flags(esp) == 0) | 311 | if (sas_ss_flags(sp) == 0) |
301 | esp = current->sas_ss_sp + current->sas_ss_size; | 312 | sp = current->sas_ss_sp + current->sas_ss_size; |
302 | } | 313 | } |
303 | 314 | ||
304 | /* This is the legacy signal stack switching. */ | 315 | /* This is the legacy signal stack switching. */ |
305 | else if ((regs->xss & 0xffff) != __USER_DS && | 316 | else if ((regs->ss & 0xffff) != __USER_DS && |
306 | !(ka->sa.sa_flags & SA_RESTORER) && | 317 | !(ka->sa.sa_flags & SA_RESTORER) && |
307 | ka->sa.sa_restorer) { | 318 | ka->sa.sa_restorer) { |
308 | esp = (unsigned long) ka->sa.sa_restorer; | 319 | sp = (unsigned long) ka->sa.sa_restorer; |
309 | } | 320 | } |
310 | 321 | ||
311 | esp -= frame_size; | 322 | sp -= frame_size; |
312 | /* Align the stack pointer according to the i386 ABI, | 323 | /* Align the stack pointer according to the i386 ABI, |
313 | * i.e. so that on function entry ((sp + 4) & 15) == 0. */ | 324 | * i.e. so that on function entry ((sp + 4) & 15) == 0. */ |
314 | esp = ((esp + 4) & -16ul) - 4; | 325 | sp = ((sp + 4) & -16ul) - 4; |
315 | return (void __user *) esp; | 326 | return (void __user *) sp; |
316 | } | 327 | } |
317 | 328 | ||
318 | /* These symbols are defined with the addresses in the vsyscall page. | 329 | /* These symbols are defined with the addresses in the vsyscall page. |
@@ -355,9 +366,9 @@ static int setup_frame(int sig, struct k_sigaction *ka, | |||
355 | } | 366 | } |
356 | 367 | ||
357 | if (current->binfmt->hasvdso) | 368 | if (current->binfmt->hasvdso) |
358 | restorer = (void *)VDSO_SYM(&__kernel_sigreturn); | 369 | restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn); |
359 | else | 370 | else |
360 | restorer = (void *)&frame->retcode; | 371 | restorer = &frame->retcode; |
361 | if (ka->sa.sa_flags & SA_RESTORER) | 372 | if (ka->sa.sa_flags & SA_RESTORER) |
362 | restorer = ka->sa.sa_restorer; | 373 | restorer = ka->sa.sa_restorer; |
363 | 374 | ||
@@ -379,16 +390,16 @@ static int setup_frame(int sig, struct k_sigaction *ka, | |||
379 | goto give_sigsegv; | 390 | goto give_sigsegv; |
380 | 391 | ||
381 | /* Set up registers for signal handler */ | 392 | /* Set up registers for signal handler */ |
382 | regs->esp = (unsigned long) frame; | 393 | regs->sp = (unsigned long) frame; |
383 | regs->eip = (unsigned long) ka->sa.sa_handler; | 394 | regs->ip = (unsigned long) ka->sa.sa_handler; |
384 | regs->eax = (unsigned long) sig; | 395 | regs->ax = (unsigned long) sig; |
385 | regs->edx = (unsigned long) 0; | 396 | regs->dx = (unsigned long) 0; |
386 | regs->ecx = (unsigned long) 0; | 397 | regs->cx = (unsigned long) 0; |
387 | 398 | ||
388 | regs->xds = __USER_DS; | 399 | regs->ds = __USER_DS; |
389 | regs->xes = __USER_DS; | 400 | regs->es = __USER_DS; |
390 | regs->xss = __USER_DS; | 401 | regs->ss = __USER_DS; |
391 | regs->xcs = __USER_CS; | 402 | regs->cs = __USER_CS; |
392 | 403 | ||
393 | /* | 404 | /* |
394 | * Clear TF when entering the signal handler, but | 405 | * Clear TF when entering the signal handler, but |
@@ -396,13 +407,13 @@ static int setup_frame(int sig, struct k_sigaction *ka, | |||
396 | * The tracer may want to single-step inside the | 407 | * The tracer may want to single-step inside the |
397 | * handler too. | 408 | * handler too. |
398 | */ | 409 | */ |
399 | regs->eflags &= ~TF_MASK; | 410 | regs->flags &= ~TF_MASK; |
400 | if (test_thread_flag(TIF_SINGLESTEP)) | 411 | if (test_thread_flag(TIF_SINGLESTEP)) |
401 | ptrace_notify(SIGTRAP); | 412 | ptrace_notify(SIGTRAP); |
402 | 413 | ||
403 | #if DEBUG_SIG | 414 | #if DEBUG_SIG |
404 | printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", | 415 | printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", |
405 | current->comm, current->pid, frame, regs->eip, frame->pretcode); | 416 | current->comm, current->pid, frame, regs->ip, frame->pretcode); |
406 | #endif | 417 | #endif |
407 | 418 | ||
408 | return 0; | 419 | return 0; |
@@ -442,7 +453,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
442 | err |= __put_user(0, &frame->uc.uc_flags); | 453 | err |= __put_user(0, &frame->uc.uc_flags); |
443 | err |= __put_user(0, &frame->uc.uc_link); | 454 | err |= __put_user(0, &frame->uc.uc_link); |
444 | err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | 455 | err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); |
445 | err |= __put_user(sas_ss_flags(regs->esp), | 456 | err |= __put_user(sas_ss_flags(regs->sp), |
446 | &frame->uc.uc_stack.ss_flags); | 457 | &frame->uc.uc_stack.ss_flags); |
447 | err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); | 458 | err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); |
448 | err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, | 459 | err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, |
@@ -452,13 +463,13 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
452 | goto give_sigsegv; | 463 | goto give_sigsegv; |
453 | 464 | ||
454 | /* Set up to return from userspace. */ | 465 | /* Set up to return from userspace. */ |
455 | restorer = (void *)VDSO_SYM(&__kernel_rt_sigreturn); | 466 | restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); |
456 | if (ka->sa.sa_flags & SA_RESTORER) | 467 | if (ka->sa.sa_flags & SA_RESTORER) |
457 | restorer = ka->sa.sa_restorer; | 468 | restorer = ka->sa.sa_restorer; |
458 | err |= __put_user(restorer, &frame->pretcode); | 469 | err |= __put_user(restorer, &frame->pretcode); |
459 | 470 | ||
460 | /* | 471 | /* |
461 | * This is movl $,%eax ; int $0x80 | 472 | * This is movl $,%ax ; int $0x80 |
462 | * | 473 | * |
463 | * WE DO NOT USE IT ANY MORE! It's only left here for historical | 474 | * WE DO NOT USE IT ANY MORE! It's only left here for historical |
464 | * reasons and because gdb uses it as a signature to notice | 475 | * reasons and because gdb uses it as a signature to notice |
@@ -472,16 +483,16 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
472 | goto give_sigsegv; | 483 | goto give_sigsegv; |
473 | 484 | ||
474 | /* Set up registers for signal handler */ | 485 | /* Set up registers for signal handler */ |
475 | regs->esp = (unsigned long) frame; | 486 | regs->sp = (unsigned long) frame; |
476 | regs->eip = (unsigned long) ka->sa.sa_handler; | 487 | regs->ip = (unsigned long) ka->sa.sa_handler; |
477 | regs->eax = (unsigned long) usig; | 488 | regs->ax = (unsigned long) usig; |
478 | regs->edx = (unsigned long) &frame->info; | 489 | regs->dx = (unsigned long) &frame->info; |
479 | regs->ecx = (unsigned long) &frame->uc; | 490 | regs->cx = (unsigned long) &frame->uc; |
480 | 491 | ||
481 | regs->xds = __USER_DS; | 492 | regs->ds = __USER_DS; |
482 | regs->xes = __USER_DS; | 493 | regs->es = __USER_DS; |
483 | regs->xss = __USER_DS; | 494 | regs->ss = __USER_DS; |
484 | regs->xcs = __USER_CS; | 495 | regs->cs = __USER_CS; |
485 | 496 | ||
486 | /* | 497 | /* |
487 | * Clear TF when entering the signal handler, but | 498 | * Clear TF when entering the signal handler, but |
@@ -489,13 +500,13 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
489 | * The tracer may want to single-step inside the | 500 | * The tracer may want to single-step inside the |
490 | * handler too. | 501 | * handler too. |
491 | */ | 502 | */ |
492 | regs->eflags &= ~TF_MASK; | 503 | regs->flags &= ~TF_MASK; |
493 | if (test_thread_flag(TIF_SINGLESTEP)) | 504 | if (test_thread_flag(TIF_SINGLESTEP)) |
494 | ptrace_notify(SIGTRAP); | 505 | ptrace_notify(SIGTRAP); |
495 | 506 | ||
496 | #if DEBUG_SIG | 507 | #if DEBUG_SIG |
497 | printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", | 508 | printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", |
498 | current->comm, current->pid, frame, regs->eip, frame->pretcode); | 509 | current->comm, current->pid, frame, regs->ip, frame->pretcode); |
499 | #endif | 510 | #endif |
500 | 511 | ||
501 | return 0; | 512 | return 0; |
@@ -516,35 +527,33 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
516 | int ret; | 527 | int ret; |
517 | 528 | ||
518 | /* Are we from a system call? */ | 529 | /* Are we from a system call? */ |
519 | if (regs->orig_eax >= 0) { | 530 | if (regs->orig_ax >= 0) { |
520 | /* If so, check system call restarting.. */ | 531 | /* If so, check system call restarting.. */ |
521 | switch (regs->eax) { | 532 | switch (regs->ax) { |
522 | case -ERESTART_RESTARTBLOCK: | 533 | case -ERESTART_RESTARTBLOCK: |
523 | case -ERESTARTNOHAND: | 534 | case -ERESTARTNOHAND: |
524 | regs->eax = -EINTR; | 535 | regs->ax = -EINTR; |
525 | break; | 536 | break; |
526 | 537 | ||
527 | case -ERESTARTSYS: | 538 | case -ERESTARTSYS: |
528 | if (!(ka->sa.sa_flags & SA_RESTART)) { | 539 | if (!(ka->sa.sa_flags & SA_RESTART)) { |
529 | regs->eax = -EINTR; | 540 | regs->ax = -EINTR; |
530 | break; | 541 | break; |
531 | } | 542 | } |
532 | /* fallthrough */ | 543 | /* fallthrough */ |
533 | case -ERESTARTNOINTR: | 544 | case -ERESTARTNOINTR: |
534 | regs->eax = regs->orig_eax; | 545 | regs->ax = regs->orig_ax; |
535 | regs->eip -= 2; | 546 | regs->ip -= 2; |
536 | } | 547 | } |
537 | } | 548 | } |
538 | 549 | ||
539 | /* | 550 | /* |
540 | * If TF is set due to a debugger (PT_DTRACE), clear the TF flag so | 551 | * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF |
541 | * that register information in the sigcontext is correct. | 552 | * flag so that register information in the sigcontext is correct. |
542 | */ | 553 | */ |
543 | if (unlikely(regs->eflags & TF_MASK) | 554 | if (unlikely(regs->flags & X86_EFLAGS_TF) && |
544 | && likely(current->ptrace & PT_DTRACE)) { | 555 | likely(test_and_clear_thread_flag(TIF_FORCED_TF))) |
545 | current->ptrace &= ~PT_DTRACE; | 556 | regs->flags &= ~X86_EFLAGS_TF; |
546 | regs->eflags &= ~TF_MASK; | ||
547 | } | ||
548 | 557 | ||
549 | /* Set up the stack frame */ | 558 | /* Set up the stack frame */ |
550 | if (ka->sa.sa_flags & SA_SIGINFO) | 559 | if (ka->sa.sa_flags & SA_SIGINFO) |
@@ -569,7 +578,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
569 | * want to handle. Thus you cannot kill init even with a SIGKILL even by | 578 | * want to handle. Thus you cannot kill init even with a SIGKILL even by |
570 | * mistake. | 579 | * mistake. |
571 | */ | 580 | */ |
572 | static void fastcall do_signal(struct pt_regs *regs) | 581 | static void do_signal(struct pt_regs *regs) |
573 | { | 582 | { |
574 | siginfo_t info; | 583 | siginfo_t info; |
575 | int signr; | 584 | int signr; |
@@ -599,8 +608,8 @@ static void fastcall do_signal(struct pt_regs *regs) | |||
599 | * have been cleared if the watchpoint triggered | 608 | * have been cleared if the watchpoint triggered |
600 | * inside the kernel. | 609 | * inside the kernel. |
601 | */ | 610 | */ |
602 | if (unlikely(current->thread.debugreg[7])) | 611 | if (unlikely(current->thread.debugreg7)) |
603 | set_debugreg(current->thread.debugreg[7], 7); | 612 | set_debugreg(current->thread.debugreg7, 7); |
604 | 613 | ||
605 | /* Whee! Actually deliver the signal. */ | 614 | /* Whee! Actually deliver the signal. */ |
606 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { | 615 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { |
@@ -616,19 +625,19 @@ static void fastcall do_signal(struct pt_regs *regs) | |||
616 | } | 625 | } |
617 | 626 | ||
618 | /* Did we come from a system call? */ | 627 | /* Did we come from a system call? */ |
619 | if (regs->orig_eax >= 0) { | 628 | if (regs->orig_ax >= 0) { |
620 | /* Restart the system call - no handlers present */ | 629 | /* Restart the system call - no handlers present */ |
621 | switch (regs->eax) { | 630 | switch (regs->ax) { |
622 | case -ERESTARTNOHAND: | 631 | case -ERESTARTNOHAND: |
623 | case -ERESTARTSYS: | 632 | case -ERESTARTSYS: |
624 | case -ERESTARTNOINTR: | 633 | case -ERESTARTNOINTR: |
625 | regs->eax = regs->orig_eax; | 634 | regs->ax = regs->orig_ax; |
626 | regs->eip -= 2; | 635 | regs->ip -= 2; |
627 | break; | 636 | break; |
628 | 637 | ||
629 | case -ERESTART_RESTARTBLOCK: | 638 | case -ERESTART_RESTARTBLOCK: |
630 | regs->eax = __NR_restart_syscall; | 639 | regs->ax = __NR_restart_syscall; |
631 | regs->eip -= 2; | 640 | regs->ip -= 2; |
632 | break; | 641 | break; |
633 | } | 642 | } |
634 | } | 643 | } |
@@ -651,13 +660,16 @@ void do_notify_resume(struct pt_regs *regs, void *_unused, | |||
651 | { | 660 | { |
652 | /* Pending single-step? */ | 661 | /* Pending single-step? */ |
653 | if (thread_info_flags & _TIF_SINGLESTEP) { | 662 | if (thread_info_flags & _TIF_SINGLESTEP) { |
654 | regs->eflags |= TF_MASK; | 663 | regs->flags |= TF_MASK; |
655 | clear_thread_flag(TIF_SINGLESTEP); | 664 | clear_thread_flag(TIF_SINGLESTEP); |
656 | } | 665 | } |
657 | 666 | ||
658 | /* deal with pending signal delivery */ | 667 | /* deal with pending signal delivery */ |
659 | if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) | 668 | if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) |
660 | do_signal(regs); | 669 | do_signal(regs); |
670 | |||
671 | if (thread_info_flags & _TIF_HRTICK_RESCHED) | ||
672 | hrtick_resched(); | ||
661 | 673 | ||
662 | clear_thread_flag(TIF_IRET); | 674 | clear_thread_flag(TIF_IRET); |
663 | } | 675 | } |
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index ab086b0357fc..7347bb14e306 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c | |||
@@ -39,7 +39,7 @@ asmlinkage long | |||
39 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | 39 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, |
40 | struct pt_regs *regs) | 40 | struct pt_regs *regs) |
41 | { | 41 | { |
42 | return do_sigaltstack(uss, uoss, regs->rsp); | 42 | return do_sigaltstack(uss, uoss, regs->sp); |
43 | } | 43 | } |
44 | 44 | ||
45 | 45 | ||
@@ -64,8 +64,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned | |||
64 | 64 | ||
65 | #define COPY(x) err |= __get_user(regs->x, &sc->x) | 65 | #define COPY(x) err |= __get_user(regs->x, &sc->x) |
66 | 66 | ||
67 | COPY(rdi); COPY(rsi); COPY(rbp); COPY(rsp); COPY(rbx); | 67 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); |
68 | COPY(rdx); COPY(rcx); COPY(rip); | 68 | COPY(dx); COPY(cx); COPY(ip); |
69 | COPY(r8); | 69 | COPY(r8); |
70 | COPY(r9); | 70 | COPY(r9); |
71 | COPY(r10); | 71 | COPY(r10); |
@@ -86,9 +86,9 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned | |||
86 | 86 | ||
87 | { | 87 | { |
88 | unsigned int tmpflags; | 88 | unsigned int tmpflags; |
89 | err |= __get_user(tmpflags, &sc->eflags); | 89 | err |= __get_user(tmpflags, &sc->flags); |
90 | regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5); | 90 | regs->flags = (regs->flags & ~0x40DD5) | (tmpflags & 0x40DD5); |
91 | regs->orig_rax = -1; /* disable syscall checks */ | 91 | regs->orig_ax = -1; /* disable syscall checks */ |
92 | } | 92 | } |
93 | 93 | ||
94 | { | 94 | { |
@@ -108,7 +108,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned | |||
108 | } | 108 | } |
109 | } | 109 | } |
110 | 110 | ||
111 | err |= __get_user(*prax, &sc->rax); | 111 | err |= __get_user(*prax, &sc->ax); |
112 | return err; | 112 | return err; |
113 | 113 | ||
114 | badframe: | 114 | badframe: |
@@ -119,9 +119,9 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) | |||
119 | { | 119 | { |
120 | struct rt_sigframe __user *frame; | 120 | struct rt_sigframe __user *frame; |
121 | sigset_t set; | 121 | sigset_t set; |
122 | unsigned long eax; | 122 | unsigned long ax; |
123 | 123 | ||
124 | frame = (struct rt_sigframe __user *)(regs->rsp - 8); | 124 | frame = (struct rt_sigframe __user *)(regs->sp - 8); |
125 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) { | 125 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) { |
126 | goto badframe; | 126 | goto badframe; |
127 | } | 127 | } |
@@ -135,17 +135,17 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) | |||
135 | recalc_sigpending(); | 135 | recalc_sigpending(); |
136 | spin_unlock_irq(¤t->sighand->siglock); | 136 | spin_unlock_irq(¤t->sighand->siglock); |
137 | 137 | ||
138 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax)) | 138 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) |
139 | goto badframe; | 139 | goto badframe; |
140 | 140 | ||
141 | #ifdef DEBUG_SIG | 141 | #ifdef DEBUG_SIG |
142 | printk("%d sigreturn rip:%lx rsp:%lx frame:%p rax:%lx\n",current->pid,regs->rip,regs->rsp,frame,eax); | 142 | printk("%d sigreturn ip:%lx sp:%lx frame:%p ax:%lx\n",current->pid,regs->ip,regs->sp,frame,ax); |
143 | #endif | 143 | #endif |
144 | 144 | ||
145 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->rsp) == -EFAULT) | 145 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) |
146 | goto badframe; | 146 | goto badframe; |
147 | 147 | ||
148 | return eax; | 148 | return ax; |
149 | 149 | ||
150 | badframe: | 150 | badframe: |
151 | signal_fault(regs,frame,"sigreturn"); | 151 | signal_fault(regs,frame,"sigreturn"); |
@@ -165,14 +165,14 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned lo | |||
165 | err |= __put_user(0, &sc->gs); | 165 | err |= __put_user(0, &sc->gs); |
166 | err |= __put_user(0, &sc->fs); | 166 | err |= __put_user(0, &sc->fs); |
167 | 167 | ||
168 | err |= __put_user(regs->rdi, &sc->rdi); | 168 | err |= __put_user(regs->di, &sc->di); |
169 | err |= __put_user(regs->rsi, &sc->rsi); | 169 | err |= __put_user(regs->si, &sc->si); |
170 | err |= __put_user(regs->rbp, &sc->rbp); | 170 | err |= __put_user(regs->bp, &sc->bp); |
171 | err |= __put_user(regs->rsp, &sc->rsp); | 171 | err |= __put_user(regs->sp, &sc->sp); |
172 | err |= __put_user(regs->rbx, &sc->rbx); | 172 | err |= __put_user(regs->bx, &sc->bx); |
173 | err |= __put_user(regs->rdx, &sc->rdx); | 173 | err |= __put_user(regs->dx, &sc->dx); |
174 | err |= __put_user(regs->rcx, &sc->rcx); | 174 | err |= __put_user(regs->cx, &sc->cx); |
175 | err |= __put_user(regs->rax, &sc->rax); | 175 | err |= __put_user(regs->ax, &sc->ax); |
176 | err |= __put_user(regs->r8, &sc->r8); | 176 | err |= __put_user(regs->r8, &sc->r8); |
177 | err |= __put_user(regs->r9, &sc->r9); | 177 | err |= __put_user(regs->r9, &sc->r9); |
178 | err |= __put_user(regs->r10, &sc->r10); | 178 | err |= __put_user(regs->r10, &sc->r10); |
@@ -183,8 +183,8 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned lo | |||
183 | err |= __put_user(regs->r15, &sc->r15); | 183 | err |= __put_user(regs->r15, &sc->r15); |
184 | err |= __put_user(me->thread.trap_no, &sc->trapno); | 184 | err |= __put_user(me->thread.trap_no, &sc->trapno); |
185 | err |= __put_user(me->thread.error_code, &sc->err); | 185 | err |= __put_user(me->thread.error_code, &sc->err); |
186 | err |= __put_user(regs->rip, &sc->rip); | 186 | err |= __put_user(regs->ip, &sc->ip); |
187 | err |= __put_user(regs->eflags, &sc->eflags); | 187 | err |= __put_user(regs->flags, &sc->flags); |
188 | err |= __put_user(mask, &sc->oldmask); | 188 | err |= __put_user(mask, &sc->oldmask); |
189 | err |= __put_user(me->thread.cr2, &sc->cr2); | 189 | err |= __put_user(me->thread.cr2, &sc->cr2); |
190 | 190 | ||
@@ -198,18 +198,18 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned lo | |||
198 | static void __user * | 198 | static void __user * |
199 | get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) | 199 | get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) |
200 | { | 200 | { |
201 | unsigned long rsp; | 201 | unsigned long sp; |
202 | 202 | ||
203 | /* Default to using normal stack - redzone*/ | 203 | /* Default to using normal stack - redzone*/ |
204 | rsp = regs->rsp - 128; | 204 | sp = regs->sp - 128; |
205 | 205 | ||
206 | /* This is the X/Open sanctioned signal stack switching. */ | 206 | /* This is the X/Open sanctioned signal stack switching. */ |
207 | if (ka->sa.sa_flags & SA_ONSTACK) { | 207 | if (ka->sa.sa_flags & SA_ONSTACK) { |
208 | if (sas_ss_flags(rsp) == 0) | 208 | if (sas_ss_flags(sp) == 0) |
209 | rsp = current->sas_ss_sp + current->sas_ss_size; | 209 | sp = current->sas_ss_sp + current->sas_ss_size; |
210 | } | 210 | } |
211 | 211 | ||
212 | return (void __user *)round_down(rsp - size, 16); | 212 | return (void __user *)round_down(sp - size, 16); |
213 | } | 213 | } |
214 | 214 | ||
215 | static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | 215 | static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, |
@@ -246,7 +246,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
246 | err |= __put_user(0, &frame->uc.uc_flags); | 246 | err |= __put_user(0, &frame->uc.uc_flags); |
247 | err |= __put_user(0, &frame->uc.uc_link); | 247 | err |= __put_user(0, &frame->uc.uc_link); |
248 | err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | 248 | err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); |
249 | err |= __put_user(sas_ss_flags(regs->rsp), | 249 | err |= __put_user(sas_ss_flags(regs->sp), |
250 | &frame->uc.uc_stack.ss_flags); | 250 | &frame->uc.uc_stack.ss_flags); |
251 | err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); | 251 | err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); |
252 | err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me); | 252 | err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me); |
@@ -271,21 +271,21 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
271 | goto give_sigsegv; | 271 | goto give_sigsegv; |
272 | 272 | ||
273 | #ifdef DEBUG_SIG | 273 | #ifdef DEBUG_SIG |
274 | printk("%d old rip %lx old rsp %lx old rax %lx\n", current->pid,regs->rip,regs->rsp,regs->rax); | 274 | printk("%d old ip %lx old sp %lx old ax %lx\n", current->pid,regs->ip,regs->sp,regs->ax); |
275 | #endif | 275 | #endif |
276 | 276 | ||
277 | /* Set up registers for signal handler */ | 277 | /* Set up registers for signal handler */ |
278 | regs->rdi = sig; | 278 | regs->di = sig; |
279 | /* In case the signal handler was declared without prototypes */ | 279 | /* In case the signal handler was declared without prototypes */ |
280 | regs->rax = 0; | 280 | regs->ax = 0; |
281 | 281 | ||
282 | /* This also works for non SA_SIGINFO handlers because they expect the | 282 | /* This also works for non SA_SIGINFO handlers because they expect the |
283 | next argument after the signal number on the stack. */ | 283 | next argument after the signal number on the stack. */ |
284 | regs->rsi = (unsigned long)&frame->info; | 284 | regs->si = (unsigned long)&frame->info; |
285 | regs->rdx = (unsigned long)&frame->uc; | 285 | regs->dx = (unsigned long)&frame->uc; |
286 | regs->rip = (unsigned long) ka->sa.sa_handler; | 286 | regs->ip = (unsigned long) ka->sa.sa_handler; |
287 | 287 | ||
288 | regs->rsp = (unsigned long)frame; | 288 | regs->sp = (unsigned long)frame; |
289 | 289 | ||
290 | /* Set up the CS register to run signal handlers in 64-bit mode, | 290 | /* Set up the CS register to run signal handlers in 64-bit mode, |
291 | even if the handler happens to be interrupting 32-bit code. */ | 291 | even if the handler happens to be interrupting 32-bit code. */ |
@@ -295,12 +295,12 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
295 | see include/asm-x86_64/uaccess.h for details. */ | 295 | see include/asm-x86_64/uaccess.h for details. */ |
296 | set_fs(USER_DS); | 296 | set_fs(USER_DS); |
297 | 297 | ||
298 | regs->eflags &= ~TF_MASK; | 298 | regs->flags &= ~X86_EFLAGS_TF; |
299 | if (test_thread_flag(TIF_SINGLESTEP)) | 299 | if (test_thread_flag(TIF_SINGLESTEP)) |
300 | ptrace_notify(SIGTRAP); | 300 | ptrace_notify(SIGTRAP); |
301 | #ifdef DEBUG_SIG | 301 | #ifdef DEBUG_SIG |
302 | printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%p\n", | 302 | printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%p\n", |
303 | current->comm, current->pid, frame, regs->rip, frame->pretcode); | 303 | current->comm, current->pid, frame, regs->ip, frame->pretcode); |
304 | #endif | 304 | #endif |
305 | 305 | ||
306 | return 0; | 306 | return 0; |
@@ -321,44 +321,40 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
321 | int ret; | 321 | int ret; |
322 | 322 | ||
323 | #ifdef DEBUG_SIG | 323 | #ifdef DEBUG_SIG |
324 | printk("handle_signal pid:%d sig:%lu rip:%lx rsp:%lx regs=%p\n", | 324 | printk("handle_signal pid:%d sig:%lu ip:%lx sp:%lx regs=%p\n", |
325 | current->pid, sig, | 325 | current->pid, sig, |
326 | regs->rip, regs->rsp, regs); | 326 | regs->ip, regs->sp, regs); |
327 | #endif | 327 | #endif |
328 | 328 | ||
329 | /* Are we from a system call? */ | 329 | /* Are we from a system call? */ |
330 | if ((long)regs->orig_rax >= 0) { | 330 | if ((long)regs->orig_ax >= 0) { |
331 | /* If so, check system call restarting.. */ | 331 | /* If so, check system call restarting.. */ |
332 | switch (regs->rax) { | 332 | switch (regs->ax) { |
333 | case -ERESTART_RESTARTBLOCK: | 333 | case -ERESTART_RESTARTBLOCK: |
334 | case -ERESTARTNOHAND: | 334 | case -ERESTARTNOHAND: |
335 | regs->rax = -EINTR; | 335 | regs->ax = -EINTR; |
336 | break; | 336 | break; |
337 | 337 | ||
338 | case -ERESTARTSYS: | 338 | case -ERESTARTSYS: |
339 | if (!(ka->sa.sa_flags & SA_RESTART)) { | 339 | if (!(ka->sa.sa_flags & SA_RESTART)) { |
340 | regs->rax = -EINTR; | 340 | regs->ax = -EINTR; |
341 | break; | 341 | break; |
342 | } | 342 | } |
343 | /* fallthrough */ | 343 | /* fallthrough */ |
344 | case -ERESTARTNOINTR: | 344 | case -ERESTARTNOINTR: |
345 | regs->rax = regs->orig_rax; | 345 | regs->ax = regs->orig_ax; |
346 | regs->rip -= 2; | 346 | regs->ip -= 2; |
347 | break; | 347 | break; |
348 | } | 348 | } |
349 | } | 349 | } |
350 | 350 | ||
351 | /* | 351 | /* |
352 | * If TF is set due to a debugger (PT_DTRACE), clear the TF | 352 | * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF |
353 | * flag so that register information in the sigcontext is | 353 | * flag so that register information in the sigcontext is correct. |
354 | * correct. | ||
355 | */ | 354 | */ |
356 | if (unlikely(regs->eflags & TF_MASK)) { | 355 | if (unlikely(regs->flags & X86_EFLAGS_TF) && |
357 | if (likely(current->ptrace & PT_DTRACE)) { | 356 | likely(test_and_clear_thread_flag(TIF_FORCED_TF))) |
358 | current->ptrace &= ~PT_DTRACE; | 357 | regs->flags &= ~X86_EFLAGS_TF; |
359 | regs->eflags &= ~TF_MASK; | ||
360 | } | ||
361 | } | ||
362 | 358 | ||
363 | #ifdef CONFIG_IA32_EMULATION | 359 | #ifdef CONFIG_IA32_EMULATION |
364 | if (test_thread_flag(TIF_IA32)) { | 360 | if (test_thread_flag(TIF_IA32)) { |
@@ -430,21 +426,21 @@ static void do_signal(struct pt_regs *regs) | |||
430 | } | 426 | } |
431 | 427 | ||
432 | /* Did we come from a system call? */ | 428 | /* Did we come from a system call? */ |
433 | if ((long)regs->orig_rax >= 0) { | 429 | if ((long)regs->orig_ax >= 0) { |
434 | /* Restart the system call - no handlers present */ | 430 | /* Restart the system call - no handlers present */ |
435 | long res = regs->rax; | 431 | long res = regs->ax; |
436 | switch (res) { | 432 | switch (res) { |
437 | case -ERESTARTNOHAND: | 433 | case -ERESTARTNOHAND: |
438 | case -ERESTARTSYS: | 434 | case -ERESTARTSYS: |
439 | case -ERESTARTNOINTR: | 435 | case -ERESTARTNOINTR: |
440 | regs->rax = regs->orig_rax; | 436 | regs->ax = regs->orig_ax; |
441 | regs->rip -= 2; | 437 | regs->ip -= 2; |
442 | break; | 438 | break; |
443 | case -ERESTART_RESTARTBLOCK: | 439 | case -ERESTART_RESTARTBLOCK: |
444 | regs->rax = test_thread_flag(TIF_IA32) ? | 440 | regs->ax = test_thread_flag(TIF_IA32) ? |
445 | __NR_ia32_restart_syscall : | 441 | __NR_ia32_restart_syscall : |
446 | __NR_restart_syscall; | 442 | __NR_restart_syscall; |
447 | regs->rip -= 2; | 443 | regs->ip -= 2; |
448 | break; | 444 | break; |
449 | } | 445 | } |
450 | } | 446 | } |
@@ -461,13 +457,13 @@ void | |||
461 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | 457 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) |
462 | { | 458 | { |
463 | #ifdef DEBUG_SIG | 459 | #ifdef DEBUG_SIG |
464 | printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%p pending:%x\n", | 460 | printk("do_notify_resume flags:%x ip:%lx sp:%lx caller:%p pending:%x\n", |
465 | thread_info_flags, regs->rip, regs->rsp, __builtin_return_address(0),signal_pending(current)); | 461 | thread_info_flags, regs->ip, regs->sp, __builtin_return_address(0),signal_pending(current)); |
466 | #endif | 462 | #endif |
467 | 463 | ||
468 | /* Pending single-step? */ | 464 | /* Pending single-step? */ |
469 | if (thread_info_flags & _TIF_SINGLESTEP) { | 465 | if (thread_info_flags & _TIF_SINGLESTEP) { |
470 | regs->eflags |= TF_MASK; | 466 | regs->flags |= X86_EFLAGS_TF; |
471 | clear_thread_flag(TIF_SINGLESTEP); | 467 | clear_thread_flag(TIF_SINGLESTEP); |
472 | } | 468 | } |
473 | 469 | ||
@@ -480,14 +476,20 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
480 | /* deal with pending signal delivery */ | 476 | /* deal with pending signal delivery */ |
481 | if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK)) | 477 | if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK)) |
482 | do_signal(regs); | 478 | do_signal(regs); |
479 | |||
480 | if (thread_info_flags & _TIF_HRTICK_RESCHED) | ||
481 | hrtick_resched(); | ||
483 | } | 482 | } |
484 | 483 | ||
485 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | 484 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) |
486 | { | 485 | { |
487 | struct task_struct *me = current; | 486 | struct task_struct *me = current; |
488 | if (show_unhandled_signals && printk_ratelimit()) | 487 | if (show_unhandled_signals && printk_ratelimit()) { |
489 | printk("%s[%d] bad frame in %s frame:%p rip:%lx rsp:%lx orax:%lx\n", | 488 | printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", |
490 | me->comm,me->pid,where,frame,regs->rip,regs->rsp,regs->orig_rax); | 489 | me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax); |
490 | print_vma_addr(" in ", regs->ip); | ||
491 | printk("\n"); | ||
492 | } | ||
491 | 493 | ||
492 | force_sig(SIGSEGV, me); | 494 | force_sig(SIGSEGV, me); |
493 | } | 495 | } |
diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c index fcaa026eb807..dc0cde9d16fb 100644 --- a/arch/x86/kernel/smp_32.c +++ b/arch/x86/kernel/smp_32.c | |||
@@ -159,7 +159,7 @@ void __send_IPI_shortcut(unsigned int shortcut, int vector) | |||
159 | apic_write_around(APIC_ICR, cfg); | 159 | apic_write_around(APIC_ICR, cfg); |
160 | } | 160 | } |
161 | 161 | ||
162 | void fastcall send_IPI_self(int vector) | 162 | void send_IPI_self(int vector) |
163 | { | 163 | { |
164 | __send_IPI_shortcut(APIC_DEST_SELF, vector); | 164 | __send_IPI_shortcut(APIC_DEST_SELF, vector); |
165 | } | 165 | } |
@@ -223,7 +223,7 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector) | |||
223 | */ | 223 | */ |
224 | 224 | ||
225 | local_irq_save(flags); | 225 | local_irq_save(flags); |
226 | for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) { | 226 | for_each_possible_cpu(query_cpu) { |
227 | if (cpu_isset(query_cpu, mask)) { | 227 | if (cpu_isset(query_cpu, mask)) { |
228 | __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), | 228 | __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), |
229 | vector); | 229 | vector); |
@@ -256,13 +256,14 @@ static DEFINE_SPINLOCK(tlbstate_lock); | |||
256 | * We need to reload %cr3 since the page tables may be going | 256 | * We need to reload %cr3 since the page tables may be going |
257 | * away from under us.. | 257 | * away from under us.. |
258 | */ | 258 | */ |
259 | void leave_mm(unsigned long cpu) | 259 | void leave_mm(int cpu) |
260 | { | 260 | { |
261 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) | 261 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) |
262 | BUG(); | 262 | BUG(); |
263 | cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); | 263 | cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); |
264 | load_cr3(swapper_pg_dir); | 264 | load_cr3(swapper_pg_dir); |
265 | } | 265 | } |
266 | EXPORT_SYMBOL_GPL(leave_mm); | ||
266 | 267 | ||
267 | /* | 268 | /* |
268 | * | 269 | * |
@@ -310,7 +311,7 @@ void leave_mm(unsigned long cpu) | |||
310 | * 2) Leave the mm if we are in the lazy tlb mode. | 311 | * 2) Leave the mm if we are in the lazy tlb mode. |
311 | */ | 312 | */ |
312 | 313 | ||
313 | fastcall void smp_invalidate_interrupt(struct pt_regs *regs) | 314 | void smp_invalidate_interrupt(struct pt_regs *regs) |
314 | { | 315 | { |
315 | unsigned long cpu; | 316 | unsigned long cpu; |
316 | 317 | ||
@@ -638,13 +639,13 @@ static void native_smp_send_stop(void) | |||
638 | * all the work is done automatically when | 639 | * all the work is done automatically when |
639 | * we return from the interrupt. | 640 | * we return from the interrupt. |
640 | */ | 641 | */ |
641 | fastcall void smp_reschedule_interrupt(struct pt_regs *regs) | 642 | void smp_reschedule_interrupt(struct pt_regs *regs) |
642 | { | 643 | { |
643 | ack_APIC_irq(); | 644 | ack_APIC_irq(); |
644 | __get_cpu_var(irq_stat).irq_resched_count++; | 645 | __get_cpu_var(irq_stat).irq_resched_count++; |
645 | } | 646 | } |
646 | 647 | ||
647 | fastcall void smp_call_function_interrupt(struct pt_regs *regs) | 648 | void smp_call_function_interrupt(struct pt_regs *regs) |
648 | { | 649 | { |
649 | void (*func) (void *info) = call_data->func; | 650 | void (*func) (void *info) = call_data->func; |
650 | void *info = call_data->info; | 651 | void *info = call_data->info; |
@@ -675,7 +676,7 @@ static int convert_apicid_to_cpu(int apic_id) | |||
675 | { | 676 | { |
676 | int i; | 677 | int i; |
677 | 678 | ||
678 | for (i = 0; i < NR_CPUS; i++) { | 679 | for_each_possible_cpu(i) { |
679 | if (per_cpu(x86_cpu_to_apicid, i) == apic_id) | 680 | if (per_cpu(x86_cpu_to_apicid, i) == apic_id) |
680 | return i; | 681 | return i; |
681 | } | 682 | } |
diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index 03fa6ed559c6..2fd74b06db67 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c | |||
@@ -29,7 +29,7 @@ | |||
29 | #include <asm/idle.h> | 29 | #include <asm/idle.h> |
30 | 30 | ||
31 | /* | 31 | /* |
32 | * Smarter SMP flushing macros. | 32 | * Smarter SMP flushing macros. |
33 | * c/o Linus Torvalds. | 33 | * c/o Linus Torvalds. |
34 | * | 34 | * |
35 | * These mean you can really definitely utterly forget about | 35 | * These mean you can really definitely utterly forget about |
@@ -37,15 +37,15 @@ | |||
37 | * | 37 | * |
38 | * Optimizations Manfred Spraul <manfred@colorfullife.com> | 38 | * Optimizations Manfred Spraul <manfred@colorfullife.com> |
39 | * | 39 | * |
40 | * More scalable flush, from Andi Kleen | 40 | * More scalable flush, from Andi Kleen |
41 | * | 41 | * |
42 | * To avoid global state use 8 different call vectors. | 42 | * To avoid global state use 8 different call vectors. |
43 | * Each CPU uses a specific vector to trigger flushes on other | 43 | * Each CPU uses a specific vector to trigger flushes on other |
44 | * CPUs. Depending on the received vector the target CPUs look into | 44 | * CPUs. Depending on the received vector the target CPUs look into |
45 | * the right per cpu variable for the flush data. | 45 | * the right per cpu variable for the flush data. |
46 | * | 46 | * |
47 | * With more than 8 CPUs they are hashed to the 8 available | 47 | * With more than 8 CPUs they are hashed to the 8 available |
48 | * vectors. The limited global vector space forces us to this right now. | 48 | * vectors. The limited global vector space forces us to this right now. |
49 | * In future when interrupts are split into per CPU domains this could be | 49 | * In future when interrupts are split into per CPU domains this could be |
50 | * fixed, at the cost of triggering multiple IPIs in some cases. | 50 | * fixed, at the cost of triggering multiple IPIs in some cases. |
51 | */ | 51 | */ |
@@ -55,7 +55,6 @@ union smp_flush_state { | |||
55 | cpumask_t flush_cpumask; | 55 | cpumask_t flush_cpumask; |
56 | struct mm_struct *flush_mm; | 56 | struct mm_struct *flush_mm; |
57 | unsigned long flush_va; | 57 | unsigned long flush_va; |
58 | #define FLUSH_ALL -1ULL | ||
59 | spinlock_t tlbstate_lock; | 58 | spinlock_t tlbstate_lock; |
60 | }; | 59 | }; |
61 | char pad[SMP_CACHE_BYTES]; | 60 | char pad[SMP_CACHE_BYTES]; |
@@ -67,16 +66,17 @@ union smp_flush_state { | |||
67 | static DEFINE_PER_CPU(union smp_flush_state, flush_state); | 66 | static DEFINE_PER_CPU(union smp_flush_state, flush_state); |
68 | 67 | ||
69 | /* | 68 | /* |
70 | * We cannot call mmdrop() because we are in interrupt context, | 69 | * We cannot call mmdrop() because we are in interrupt context, |
71 | * instead update mm->cpu_vm_mask. | 70 | * instead update mm->cpu_vm_mask. |
72 | */ | 71 | */ |
73 | static inline void leave_mm(int cpu) | 72 | void leave_mm(int cpu) |
74 | { | 73 | { |
75 | if (read_pda(mmu_state) == TLBSTATE_OK) | 74 | if (read_pda(mmu_state) == TLBSTATE_OK) |
76 | BUG(); | 75 | BUG(); |
77 | cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); | 76 | cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); |
78 | load_cr3(swapper_pg_dir); | 77 | load_cr3(swapper_pg_dir); |
79 | } | 78 | } |
79 | EXPORT_SYMBOL_GPL(leave_mm); | ||
80 | 80 | ||
81 | /* | 81 | /* |
82 | * | 82 | * |
@@ -85,25 +85,25 @@ static inline void leave_mm(int cpu) | |||
85 | * 1) switch_mm() either 1a) or 1b) | 85 | * 1) switch_mm() either 1a) or 1b) |
86 | * 1a) thread switch to a different mm | 86 | * 1a) thread switch to a different mm |
87 | * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); | 87 | * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); |
88 | * Stop ipi delivery for the old mm. This is not synchronized with | 88 | * Stop ipi delivery for the old mm. This is not synchronized with |
89 | * the other cpus, but smp_invalidate_interrupt ignore flush ipis | 89 | * the other cpus, but smp_invalidate_interrupt ignore flush ipis |
90 | * for the wrong mm, and in the worst case we perform a superfluous | 90 | * for the wrong mm, and in the worst case we perform a superfluous |
91 | * tlb flush. | 91 | * tlb flush. |
92 | * 1a2) set cpu mmu_state to TLBSTATE_OK | 92 | * 1a2) set cpu mmu_state to TLBSTATE_OK |
93 | * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 | 93 | * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 |
94 | * was in lazy tlb mode. | 94 | * was in lazy tlb mode. |
95 | * 1a3) update cpu active_mm | 95 | * 1a3) update cpu active_mm |
96 | * Now cpu0 accepts tlb flushes for the new mm. | 96 | * Now cpu0 accepts tlb flushes for the new mm. |
97 | * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); | 97 | * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); |
98 | * Now the other cpus will send tlb flush ipis. | 98 | * Now the other cpus will send tlb flush ipis. |
99 | * 1a4) change cr3. | 99 | * 1a4) change cr3. |
100 | * 1b) thread switch without mm change | 100 | * 1b) thread switch without mm change |
101 | * cpu active_mm is correct, cpu0 already handles | 101 | * cpu active_mm is correct, cpu0 already handles |
102 | * flush ipis. | 102 | * flush ipis. |
103 | * 1b1) set cpu mmu_state to TLBSTATE_OK | 103 | * 1b1) set cpu mmu_state to TLBSTATE_OK |
104 | * 1b2) test_and_set the cpu bit in cpu_vm_mask. | 104 | * 1b2) test_and_set the cpu bit in cpu_vm_mask. |
105 | * Atomically set the bit [other cpus will start sending flush ipis], | 105 | * Atomically set the bit [other cpus will start sending flush ipis], |
106 | * and test the bit. | 106 | * and test the bit. |
107 | * 1b3) if the bit was 0: leave_mm was called, flush the tlb. | 107 | * 1b3) if the bit was 0: leave_mm was called, flush the tlb. |
108 | * 2) switch %%esp, ie current | 108 | * 2) switch %%esp, ie current |
109 | * | 109 | * |
@@ -137,12 +137,12 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) | |||
137 | * orig_rax contains the negated interrupt vector. | 137 | * orig_rax contains the negated interrupt vector. |
138 | * Use that to determine where the sender put the data. | 138 | * Use that to determine where the sender put the data. |
139 | */ | 139 | */ |
140 | sender = ~regs->orig_rax - INVALIDATE_TLB_VECTOR_START; | 140 | sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; |
141 | f = &per_cpu(flush_state, sender); | 141 | f = &per_cpu(flush_state, sender); |
142 | 142 | ||
143 | if (!cpu_isset(cpu, f->flush_cpumask)) | 143 | if (!cpu_isset(cpu, f->flush_cpumask)) |
144 | goto out; | 144 | goto out; |
145 | /* | 145 | /* |
146 | * This was a BUG() but until someone can quote me the | 146 | * This was a BUG() but until someone can quote me the |
147 | * line from the intel manual that guarantees an IPI to | 147 | * line from the intel manual that guarantees an IPI to |
148 | * multiple CPUs is retried _only_ on the erroring CPUs | 148 | * multiple CPUs is retried _only_ on the erroring CPUs |
@@ -150,10 +150,10 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) | |||
150 | * | 150 | * |
151 | * BUG(); | 151 | * BUG(); |
152 | */ | 152 | */ |
153 | 153 | ||
154 | if (f->flush_mm == read_pda(active_mm)) { | 154 | if (f->flush_mm == read_pda(active_mm)) { |
155 | if (read_pda(mmu_state) == TLBSTATE_OK) { | 155 | if (read_pda(mmu_state) == TLBSTATE_OK) { |
156 | if (f->flush_va == FLUSH_ALL) | 156 | if (f->flush_va == TLB_FLUSH_ALL) |
157 | local_flush_tlb(); | 157 | local_flush_tlb(); |
158 | else | 158 | else |
159 | __flush_tlb_one(f->flush_va); | 159 | __flush_tlb_one(f->flush_va); |
@@ -166,19 +166,22 @@ out: | |||
166 | add_pda(irq_tlb_count, 1); | 166 | add_pda(irq_tlb_count, 1); |
167 | } | 167 | } |
168 | 168 | ||
169 | static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | 169 | void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, |
170 | unsigned long va) | 170 | unsigned long va) |
171 | { | 171 | { |
172 | int sender; | 172 | int sender; |
173 | union smp_flush_state *f; | 173 | union smp_flush_state *f; |
174 | cpumask_t cpumask = *cpumaskp; | ||
174 | 175 | ||
175 | /* Caller has disabled preemption */ | 176 | /* Caller has disabled preemption */ |
176 | sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; | 177 | sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; |
177 | f = &per_cpu(flush_state, sender); | 178 | f = &per_cpu(flush_state, sender); |
178 | 179 | ||
179 | /* Could avoid this lock when | 180 | /* |
180 | num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is | 181 | * Could avoid this lock when |
181 | probably not worth checking this for a cache-hot lock. */ | 182 | * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is |
183 | * probably not worth checking this for a cache-hot lock. | ||
184 | */ | ||
182 | spin_lock(&f->tlbstate_lock); | 185 | spin_lock(&f->tlbstate_lock); |
183 | 186 | ||
184 | f->flush_mm = mm; | 187 | f->flush_mm = mm; |
@@ -202,14 +205,14 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | |||
202 | int __cpuinit init_smp_flush(void) | 205 | int __cpuinit init_smp_flush(void) |
203 | { | 206 | { |
204 | int i; | 207 | int i; |
208 | |||
205 | for_each_cpu_mask(i, cpu_possible_map) { | 209 | for_each_cpu_mask(i, cpu_possible_map) { |
206 | spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); | 210 | spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); |
207 | } | 211 | } |
208 | return 0; | 212 | return 0; |
209 | } | 213 | } |
210 | |||
211 | core_initcall(init_smp_flush); | 214 | core_initcall(init_smp_flush); |
212 | 215 | ||
213 | void flush_tlb_current_task(void) | 216 | void flush_tlb_current_task(void) |
214 | { | 217 | { |
215 | struct mm_struct *mm = current->mm; | 218 | struct mm_struct *mm = current->mm; |
@@ -221,10 +224,9 @@ void flush_tlb_current_task(void) | |||
221 | 224 | ||
222 | local_flush_tlb(); | 225 | local_flush_tlb(); |
223 | if (!cpus_empty(cpu_mask)) | 226 | if (!cpus_empty(cpu_mask)) |
224 | flush_tlb_others(cpu_mask, mm, FLUSH_ALL); | 227 | flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); |
225 | preempt_enable(); | 228 | preempt_enable(); |
226 | } | 229 | } |
227 | EXPORT_SYMBOL(flush_tlb_current_task); | ||
228 | 230 | ||
229 | void flush_tlb_mm (struct mm_struct * mm) | 231 | void flush_tlb_mm (struct mm_struct * mm) |
230 | { | 232 | { |
@@ -241,11 +243,10 @@ void flush_tlb_mm (struct mm_struct * mm) | |||
241 | leave_mm(smp_processor_id()); | 243 | leave_mm(smp_processor_id()); |
242 | } | 244 | } |
243 | if (!cpus_empty(cpu_mask)) | 245 | if (!cpus_empty(cpu_mask)) |
244 | flush_tlb_others(cpu_mask, mm, FLUSH_ALL); | 246 | flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); |
245 | 247 | ||
246 | preempt_enable(); | 248 | preempt_enable(); |
247 | } | 249 | } |
248 | EXPORT_SYMBOL(flush_tlb_mm); | ||
249 | 250 | ||
250 | void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) | 251 | void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) |
251 | { | 252 | { |
@@ -259,8 +260,8 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) | |||
259 | if (current->active_mm == mm) { | 260 | if (current->active_mm == mm) { |
260 | if(current->mm) | 261 | if(current->mm) |
261 | __flush_tlb_one(va); | 262 | __flush_tlb_one(va); |
262 | else | 263 | else |
263 | leave_mm(smp_processor_id()); | 264 | leave_mm(smp_processor_id()); |
264 | } | 265 | } |
265 | 266 | ||
266 | if (!cpus_empty(cpu_mask)) | 267 | if (!cpus_empty(cpu_mask)) |
@@ -268,7 +269,6 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) | |||
268 | 269 | ||
269 | preempt_enable(); | 270 | preempt_enable(); |
270 | } | 271 | } |
271 | EXPORT_SYMBOL(flush_tlb_page); | ||
272 | 272 | ||
273 | static void do_flush_tlb_all(void* info) | 273 | static void do_flush_tlb_all(void* info) |
274 | { | 274 | { |
@@ -325,11 +325,9 @@ void unlock_ipi_call_lock(void) | |||
325 | * this function sends a 'generic call function' IPI to all other CPU | 325 | * this function sends a 'generic call function' IPI to all other CPU |
326 | * of the system defined in the mask. | 326 | * of the system defined in the mask. |
327 | */ | 327 | */ |
328 | 328 | static int __smp_call_function_mask(cpumask_t mask, | |
329 | static int | 329 | void (*func)(void *), void *info, |
330 | __smp_call_function_mask(cpumask_t mask, | 330 | int wait) |
331 | void (*func)(void *), void *info, | ||
332 | int wait) | ||
333 | { | 331 | { |
334 | struct call_data_struct data; | 332 | struct call_data_struct data; |
335 | cpumask_t allbutself; | 333 | cpumask_t allbutself; |
@@ -417,11 +415,10 @@ EXPORT_SYMBOL(smp_call_function_mask); | |||
417 | */ | 415 | */ |
418 | 416 | ||
419 | int smp_call_function_single (int cpu, void (*func) (void *info), void *info, | 417 | int smp_call_function_single (int cpu, void (*func) (void *info), void *info, |
420 | int nonatomic, int wait) | 418 | int nonatomic, int wait) |
421 | { | 419 | { |
422 | /* prevent preemption and reschedule on another processor */ | 420 | /* prevent preemption and reschedule on another processor */ |
423 | int ret; | 421 | int ret, me = get_cpu(); |
424 | int me = get_cpu(); | ||
425 | 422 | ||
426 | /* Can deadlock when called with interrupts disabled */ | 423 | /* Can deadlock when called with interrupts disabled */ |
427 | WARN_ON(irqs_disabled()); | 424 | WARN_ON(irqs_disabled()); |
@@ -471,9 +468,9 @@ static void stop_this_cpu(void *dummy) | |||
471 | */ | 468 | */ |
472 | cpu_clear(smp_processor_id(), cpu_online_map); | 469 | cpu_clear(smp_processor_id(), cpu_online_map); |
473 | disable_local_APIC(); | 470 | disable_local_APIC(); |
474 | for (;;) | 471 | for (;;) |
475 | halt(); | 472 | halt(); |
476 | } | 473 | } |
477 | 474 | ||
478 | void smp_send_stop(void) | 475 | void smp_send_stop(void) |
479 | { | 476 | { |
diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 4ea80cbe52e5..579b9b740c7c 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c | |||
@@ -83,7 +83,6 @@ EXPORT_SYMBOL(cpu_online_map); | |||
83 | 83 | ||
84 | cpumask_t cpu_callin_map; | 84 | cpumask_t cpu_callin_map; |
85 | cpumask_t cpu_callout_map; | 85 | cpumask_t cpu_callout_map; |
86 | EXPORT_SYMBOL(cpu_callout_map); | ||
87 | cpumask_t cpu_possible_map; | 86 | cpumask_t cpu_possible_map; |
88 | EXPORT_SYMBOL(cpu_possible_map); | 87 | EXPORT_SYMBOL(cpu_possible_map); |
89 | static cpumask_t smp_commenced_mask; | 88 | static cpumask_t smp_commenced_mask; |
@@ -92,15 +91,10 @@ static cpumask_t smp_commenced_mask; | |||
92 | DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); | 91 | DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); |
93 | EXPORT_PER_CPU_SYMBOL(cpu_info); | 92 | EXPORT_PER_CPU_SYMBOL(cpu_info); |
94 | 93 | ||
95 | /* | 94 | /* which logical CPU number maps to which CPU (physical APIC ID) */ |
96 | * The following static array is used during kernel startup | ||
97 | * and the x86_cpu_to_apicid_ptr contains the address of the | ||
98 | * array during this time. Is it zeroed when the per_cpu | ||
99 | * data area is removed. | ||
100 | */ | ||
101 | u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata = | 95 | u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata = |
102 | { [0 ... NR_CPUS-1] = BAD_APICID }; | 96 | { [0 ... NR_CPUS-1] = BAD_APICID }; |
103 | void *x86_cpu_to_apicid_ptr; | 97 | void *x86_cpu_to_apicid_early_ptr; |
104 | DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID; | 98 | DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID; |
105 | EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); | 99 | EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); |
106 | 100 | ||
@@ -113,7 +107,6 @@ u8 apicid_2_node[MAX_APICID]; | |||
113 | extern const unsigned char trampoline_data []; | 107 | extern const unsigned char trampoline_data []; |
114 | extern const unsigned char trampoline_end []; | 108 | extern const unsigned char trampoline_end []; |
115 | static unsigned char *trampoline_base; | 109 | static unsigned char *trampoline_base; |
116 | static int trampoline_exec; | ||
117 | 110 | ||
118 | static void map_cpu_to_logical_apicid(void); | 111 | static void map_cpu_to_logical_apicid(void); |
119 | 112 | ||
@@ -138,17 +131,13 @@ static unsigned long __cpuinit setup_trampoline(void) | |||
138 | */ | 131 | */ |
139 | void __init smp_alloc_memory(void) | 132 | void __init smp_alloc_memory(void) |
140 | { | 133 | { |
141 | trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE); | 134 | trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE); |
142 | /* | 135 | /* |
143 | * Has to be in very low memory so we can execute | 136 | * Has to be in very low memory so we can execute |
144 | * real-mode AP code. | 137 | * real-mode AP code. |
145 | */ | 138 | */ |
146 | if (__pa(trampoline_base) >= 0x9F000) | 139 | if (__pa(trampoline_base) >= 0x9F000) |
147 | BUG(); | 140 | BUG(); |
148 | /* | ||
149 | * Make the SMP trampoline executable: | ||
150 | */ | ||
151 | trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1); | ||
152 | } | 141 | } |
153 | 142 | ||
154 | /* | 143 | /* |
@@ -213,8 +202,6 @@ valid_k7: | |||
213 | ; | 202 | ; |
214 | } | 203 | } |
215 | 204 | ||
216 | extern void calibrate_delay(void); | ||
217 | |||
218 | static atomic_t init_deasserted; | 205 | static atomic_t init_deasserted; |
219 | 206 | ||
220 | static void __cpuinit smp_callin(void) | 207 | static void __cpuinit smp_callin(void) |
@@ -405,7 +392,7 @@ static void __cpuinit start_secondary(void *unused) | |||
405 | setup_secondary_clock(); | 392 | setup_secondary_clock(); |
406 | if (nmi_watchdog == NMI_IO_APIC) { | 393 | if (nmi_watchdog == NMI_IO_APIC) { |
407 | disable_8259A_irq(0); | 394 | disable_8259A_irq(0); |
408 | enable_NMI_through_LVT0(NULL); | 395 | enable_NMI_through_LVT0(); |
409 | enable_8259A_irq(0); | 396 | enable_8259A_irq(0); |
410 | } | 397 | } |
411 | /* | 398 | /* |
@@ -448,38 +435,38 @@ void __devinit initialize_secondary(void) | |||
448 | { | 435 | { |
449 | /* | 436 | /* |
450 | * We don't actually need to load the full TSS, | 437 | * We don't actually need to load the full TSS, |
451 | * basically just the stack pointer and the eip. | 438 | * basically just the stack pointer and the ip. |
452 | */ | 439 | */ |
453 | 440 | ||
454 | asm volatile( | 441 | asm volatile( |
455 | "movl %0,%%esp\n\t" | 442 | "movl %0,%%esp\n\t" |
456 | "jmp *%1" | 443 | "jmp *%1" |
457 | : | 444 | : |
458 | :"m" (current->thread.esp),"m" (current->thread.eip)); | 445 | :"m" (current->thread.sp),"m" (current->thread.ip)); |
459 | } | 446 | } |
460 | 447 | ||
461 | /* Static state in head.S used to set up a CPU */ | 448 | /* Static state in head.S used to set up a CPU */ |
462 | extern struct { | 449 | extern struct { |
463 | void * esp; | 450 | void * sp; |
464 | unsigned short ss; | 451 | unsigned short ss; |
465 | } stack_start; | 452 | } stack_start; |
466 | 453 | ||
467 | #ifdef CONFIG_NUMA | 454 | #ifdef CONFIG_NUMA |
468 | 455 | ||
469 | /* which logical CPUs are on which nodes */ | 456 | /* which logical CPUs are on which nodes */ |
470 | cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly = | 457 | cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly = |
471 | { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; | 458 | { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; |
472 | EXPORT_SYMBOL(node_2_cpu_mask); | 459 | EXPORT_SYMBOL(node_to_cpumask_map); |
473 | /* which node each logical CPU is on */ | 460 | /* which node each logical CPU is on */ |
474 | int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; | 461 | int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; |
475 | EXPORT_SYMBOL(cpu_2_node); | 462 | EXPORT_SYMBOL(cpu_to_node_map); |
476 | 463 | ||
477 | /* set up a mapping between cpu and node. */ | 464 | /* set up a mapping between cpu and node. */ |
478 | static inline void map_cpu_to_node(int cpu, int node) | 465 | static inline void map_cpu_to_node(int cpu, int node) |
479 | { | 466 | { |
480 | printk("Mapping cpu %d to node %d\n", cpu, node); | 467 | printk("Mapping cpu %d to node %d\n", cpu, node); |
481 | cpu_set(cpu, node_2_cpu_mask[node]); | 468 | cpu_set(cpu, node_to_cpumask_map[node]); |
482 | cpu_2_node[cpu] = node; | 469 | cpu_to_node_map[cpu] = node; |
483 | } | 470 | } |
484 | 471 | ||
485 | /* undo a mapping between cpu and node. */ | 472 | /* undo a mapping between cpu and node. */ |
@@ -489,8 +476,8 @@ static inline void unmap_cpu_to_node(int cpu) | |||
489 | 476 | ||
490 | printk("Unmapping cpu %d from all nodes\n", cpu); | 477 | printk("Unmapping cpu %d from all nodes\n", cpu); |
491 | for (node = 0; node < MAX_NUMNODES; node ++) | 478 | for (node = 0; node < MAX_NUMNODES; node ++) |
492 | cpu_clear(cpu, node_2_cpu_mask[node]); | 479 | cpu_clear(cpu, node_to_cpumask_map[node]); |
493 | cpu_2_node[cpu] = 0; | 480 | cpu_to_node_map[cpu] = 0; |
494 | } | 481 | } |
495 | #else /* !CONFIG_NUMA */ | 482 | #else /* !CONFIG_NUMA */ |
496 | 483 | ||
@@ -668,7 +655,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
668 | * target processor state. | 655 | * target processor state. |
669 | */ | 656 | */ |
670 | startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, | 657 | startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, |
671 | (unsigned long) stack_start.esp); | 658 | (unsigned long) stack_start.sp); |
672 | 659 | ||
673 | /* | 660 | /* |
674 | * Run STARTUP IPI loop. | 661 | * Run STARTUP IPI loop. |
@@ -754,7 +741,7 @@ static inline struct task_struct * __cpuinit alloc_idle_task(int cpu) | |||
754 | /* initialize thread_struct. we really want to avoid destroy | 741 | /* initialize thread_struct. we really want to avoid destroy |
755 | * idle tread | 742 | * idle tread |
756 | */ | 743 | */ |
757 | idle->thread.esp = (unsigned long)task_pt_regs(idle); | 744 | idle->thread.sp = (unsigned long)task_pt_regs(idle); |
758 | init_idle(idle, cpu); | 745 | init_idle(idle, cpu); |
759 | return idle; | 746 | return idle; |
760 | } | 747 | } |
@@ -799,7 +786,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) | |||
799 | per_cpu(current_task, cpu) = idle; | 786 | per_cpu(current_task, cpu) = idle; |
800 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); | 787 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); |
801 | 788 | ||
802 | idle->thread.eip = (unsigned long) start_secondary; | 789 | idle->thread.ip = (unsigned long) start_secondary; |
803 | /* start_eip had better be page-aligned! */ | 790 | /* start_eip had better be page-aligned! */ |
804 | start_eip = setup_trampoline(); | 791 | start_eip = setup_trampoline(); |
805 | 792 | ||
@@ -807,9 +794,9 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) | |||
807 | alternatives_smp_switch(1); | 794 | alternatives_smp_switch(1); |
808 | 795 | ||
809 | /* So we see what's up */ | 796 | /* So we see what's up */ |
810 | printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); | 797 | printk("Booting processor %d/%d ip %lx\n", cpu, apicid, start_eip); |
811 | /* Stack for startup_32 can be just as for start_secondary onwards */ | 798 | /* Stack for startup_32 can be just as for start_secondary onwards */ |
812 | stack_start.esp = (void *) idle->thread.esp; | 799 | stack_start.sp = (void *) idle->thread.sp; |
813 | 800 | ||
814 | irq_ctx_init(cpu); | 801 | irq_ctx_init(cpu); |
815 | 802 | ||
@@ -1091,7 +1078,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) | |||
1091 | * Allow the user to impress friends. | 1078 | * Allow the user to impress friends. |
1092 | */ | 1079 | */ |
1093 | Dprintk("Before bogomips.\n"); | 1080 | Dprintk("Before bogomips.\n"); |
1094 | for (cpu = 0; cpu < NR_CPUS; cpu++) | 1081 | for_each_possible_cpu(cpu) |
1095 | if (cpu_isset(cpu, cpu_callout_map)) | 1082 | if (cpu_isset(cpu, cpu_callout_map)) |
1096 | bogosum += cpu_data(cpu).loops_per_jiffy; | 1083 | bogosum += cpu_data(cpu).loops_per_jiffy; |
1097 | printk(KERN_INFO | 1084 | printk(KERN_INFO |
@@ -1122,7 +1109,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) | |||
1122 | * construct cpu_sibling_map, so that we can tell sibling CPUs | 1109 | * construct cpu_sibling_map, so that we can tell sibling CPUs |
1123 | * efficiently. | 1110 | * efficiently. |
1124 | */ | 1111 | */ |
1125 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 1112 | for_each_possible_cpu(cpu) { |
1126 | cpus_clear(per_cpu(cpu_sibling_map, cpu)); | 1113 | cpus_clear(per_cpu(cpu_sibling_map, cpu)); |
1127 | cpus_clear(per_cpu(cpu_core_map, cpu)); | 1114 | cpus_clear(per_cpu(cpu_core_map, cpu)); |
1128 | } | 1115 | } |
@@ -1296,12 +1283,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1296 | setup_ioapic_dest(); | 1283 | setup_ioapic_dest(); |
1297 | #endif | 1284 | #endif |
1298 | zap_low_mappings(); | 1285 | zap_low_mappings(); |
1299 | #ifndef CONFIG_HOTPLUG_CPU | ||
1300 | /* | ||
1301 | * Disable executability of the SMP trampoline: | ||
1302 | */ | ||
1303 | set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); | ||
1304 | #endif | ||
1305 | } | 1286 | } |
1306 | 1287 | ||
1307 | void __init smp_intr_init(void) | 1288 | void __init smp_intr_init(void) |
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index aaf4e1291217..d53bd6fcb428 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c | |||
@@ -65,7 +65,7 @@ int smp_num_siblings = 1; | |||
65 | EXPORT_SYMBOL(smp_num_siblings); | 65 | EXPORT_SYMBOL(smp_num_siblings); |
66 | 66 | ||
67 | /* Last level cache ID of each logical CPU */ | 67 | /* Last level cache ID of each logical CPU */ |
68 | DEFINE_PER_CPU(u8, cpu_llc_id) = BAD_APICID; | 68 | DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; |
69 | 69 | ||
70 | /* Bitmask of currently online CPUs */ | 70 | /* Bitmask of currently online CPUs */ |
71 | cpumask_t cpu_online_map __read_mostly; | 71 | cpumask_t cpu_online_map __read_mostly; |
@@ -78,8 +78,6 @@ EXPORT_SYMBOL(cpu_online_map); | |||
78 | */ | 78 | */ |
79 | cpumask_t cpu_callin_map; | 79 | cpumask_t cpu_callin_map; |
80 | cpumask_t cpu_callout_map; | 80 | cpumask_t cpu_callout_map; |
81 | EXPORT_SYMBOL(cpu_callout_map); | ||
82 | |||
83 | cpumask_t cpu_possible_map; | 81 | cpumask_t cpu_possible_map; |
84 | EXPORT_SYMBOL(cpu_possible_map); | 82 | EXPORT_SYMBOL(cpu_possible_map); |
85 | 83 | ||
@@ -113,10 +111,20 @@ DEFINE_PER_CPU(int, cpu_state) = { 0 }; | |||
113 | * a new thread. Also avoids complicated thread destroy functionality | 111 | * a new thread. Also avoids complicated thread destroy functionality |
114 | * for idle threads. | 112 | * for idle threads. |
115 | */ | 113 | */ |
114 | #ifdef CONFIG_HOTPLUG_CPU | ||
115 | /* | ||
116 | * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is | ||
117 | * removed after init for !CONFIG_HOTPLUG_CPU. | ||
118 | */ | ||
119 | static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); | ||
120 | #define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) | ||
121 | #define set_idle_for_cpu(x,p) (per_cpu(idle_thread_array, x) = (p)) | ||
122 | #else | ||
116 | struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; | 123 | struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; |
117 | |||
118 | #define get_idle_for_cpu(x) (idle_thread_array[(x)]) | 124 | #define get_idle_for_cpu(x) (idle_thread_array[(x)]) |
119 | #define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p)) | 125 | #define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p)) |
126 | #endif | ||
127 | |||
120 | 128 | ||
121 | /* | 129 | /* |
122 | * Currently trivial. Write the real->protected mode | 130 | * Currently trivial. Write the real->protected mode |
@@ -212,6 +220,7 @@ void __cpuinit smp_callin(void) | |||
212 | 220 | ||
213 | Dprintk("CALLIN, before setup_local_APIC().\n"); | 221 | Dprintk("CALLIN, before setup_local_APIC().\n"); |
214 | setup_local_APIC(); | 222 | setup_local_APIC(); |
223 | end_local_APIC_setup(); | ||
215 | 224 | ||
216 | /* | 225 | /* |
217 | * Get our bogomips. | 226 | * Get our bogomips. |
@@ -338,7 +347,7 @@ void __cpuinit start_secondary(void) | |||
338 | 347 | ||
339 | if (nmi_watchdog == NMI_IO_APIC) { | 348 | if (nmi_watchdog == NMI_IO_APIC) { |
340 | disable_8259A_irq(0); | 349 | disable_8259A_irq(0); |
341 | enable_NMI_through_LVT0(NULL); | 350 | enable_NMI_through_LVT0(); |
342 | enable_8259A_irq(0); | 351 | enable_8259A_irq(0); |
343 | } | 352 | } |
344 | 353 | ||
@@ -370,7 +379,7 @@ void __cpuinit start_secondary(void) | |||
370 | 379 | ||
371 | unlock_ipi_call_lock(); | 380 | unlock_ipi_call_lock(); |
372 | 381 | ||
373 | setup_secondary_APIC_clock(); | 382 | setup_secondary_clock(); |
374 | 383 | ||
375 | cpu_idle(); | 384 | cpu_idle(); |
376 | } | 385 | } |
@@ -384,19 +393,20 @@ static void inquire_remote_apic(int apicid) | |||
384 | unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; | 393 | unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; |
385 | char *names[] = { "ID", "VERSION", "SPIV" }; | 394 | char *names[] = { "ID", "VERSION", "SPIV" }; |
386 | int timeout; | 395 | int timeout; |
387 | unsigned int status; | 396 | u32 status; |
388 | 397 | ||
389 | printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); | 398 | printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); |
390 | 399 | ||
391 | for (i = 0; i < ARRAY_SIZE(regs); i++) { | 400 | for (i = 0; i < ARRAY_SIZE(regs); i++) { |
392 | printk("... APIC #%d %s: ", apicid, names[i]); | 401 | printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]); |
393 | 402 | ||
394 | /* | 403 | /* |
395 | * Wait for idle. | 404 | * Wait for idle. |
396 | */ | 405 | */ |
397 | status = safe_apic_wait_icr_idle(); | 406 | status = safe_apic_wait_icr_idle(); |
398 | if (status) | 407 | if (status) |
399 | printk("a previous APIC delivery may have failed\n"); | 408 | printk(KERN_CONT |
409 | "a previous APIC delivery may have failed\n"); | ||
400 | 410 | ||
401 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); | 411 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); |
402 | apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]); | 412 | apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]); |
@@ -410,10 +420,10 @@ static void inquire_remote_apic(int apicid) | |||
410 | switch (status) { | 420 | switch (status) { |
411 | case APIC_ICR_RR_VALID: | 421 | case APIC_ICR_RR_VALID: |
412 | status = apic_read(APIC_RRR); | 422 | status = apic_read(APIC_RRR); |
413 | printk("%08x\n", status); | 423 | printk(KERN_CONT "%08x\n", status); |
414 | break; | 424 | break; |
415 | default: | 425 | default: |
416 | printk("failed\n"); | 426 | printk(KERN_CONT "failed\n"); |
417 | } | 427 | } |
418 | } | 428 | } |
419 | } | 429 | } |
@@ -466,7 +476,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta | |||
466 | */ | 476 | */ |
467 | Dprintk("#startup loops: %d.\n", num_starts); | 477 | Dprintk("#startup loops: %d.\n", num_starts); |
468 | 478 | ||
469 | maxlvt = get_maxlvt(); | 479 | maxlvt = lapic_get_maxlvt(); |
470 | 480 | ||
471 | for (j = 1; j <= num_starts; j++) { | 481 | for (j = 1; j <= num_starts; j++) { |
472 | Dprintk("Sending STARTUP #%d.\n",j); | 482 | Dprintk("Sending STARTUP #%d.\n",j); |
@@ -577,7 +587,7 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid) | |||
577 | c_idle.idle = get_idle_for_cpu(cpu); | 587 | c_idle.idle = get_idle_for_cpu(cpu); |
578 | 588 | ||
579 | if (c_idle.idle) { | 589 | if (c_idle.idle) { |
580 | c_idle.idle->thread.rsp = (unsigned long) (((struct pt_regs *) | 590 | c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *) |
581 | (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1); | 591 | (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1); |
582 | init_idle(c_idle.idle, cpu); | 592 | init_idle(c_idle.idle, cpu); |
583 | goto do_rest; | 593 | goto do_rest; |
@@ -613,8 +623,8 @@ do_rest: | |||
613 | 623 | ||
614 | start_rip = setup_trampoline(); | 624 | start_rip = setup_trampoline(); |
615 | 625 | ||
616 | init_rsp = c_idle.idle->thread.rsp; | 626 | init_rsp = c_idle.idle->thread.sp; |
617 | per_cpu(init_tss,cpu).rsp0 = init_rsp; | 627 | load_sp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread); |
618 | initial_code = start_secondary; | 628 | initial_code = start_secondary; |
619 | clear_tsk_thread_flag(c_idle.idle, TIF_FORK); | 629 | clear_tsk_thread_flag(c_idle.idle, TIF_FORK); |
620 | 630 | ||
@@ -691,7 +701,7 @@ do_rest: | |||
691 | } | 701 | } |
692 | if (boot_error) { | 702 | if (boot_error) { |
693 | cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ | 703 | cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ |
694 | clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ | 704 | clear_bit(cpu, (unsigned long *)&cpu_initialized); /* was set by cpu_init() */ |
695 | clear_node_cpumask(cpu); /* was set by numa_add_cpu */ | 705 | clear_node_cpumask(cpu); /* was set by numa_add_cpu */ |
696 | cpu_clear(cpu, cpu_present_map); | 706 | cpu_clear(cpu, cpu_present_map); |
697 | cpu_clear(cpu, cpu_possible_map); | 707 | cpu_clear(cpu, cpu_possible_map); |
@@ -841,24 +851,16 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
841 | return 0; | 851 | return 0; |
842 | } | 852 | } |
843 | 853 | ||
844 | /* | 854 | static void __init smp_cpu_index_default(void) |
845 | * Copy apicid's found by MP_processor_info from initial array to the per cpu | ||
846 | * data area. The x86_cpu_to_apicid_init array is then expendable and the | ||
847 | * x86_cpu_to_apicid_ptr is zeroed indicating that the static array is no | ||
848 | * longer available. | ||
849 | */ | ||
850 | void __init smp_set_apicids(void) | ||
851 | { | 855 | { |
852 | int cpu; | 856 | int i; |
857 | struct cpuinfo_x86 *c; | ||
853 | 858 | ||
854 | for_each_cpu_mask(cpu, cpu_possible_map) { | 859 | for_each_cpu_mask(i, cpu_possible_map) { |
855 | if (per_cpu_offset(cpu)) | 860 | c = &cpu_data(i); |
856 | per_cpu(x86_cpu_to_apicid, cpu) = | 861 | /* mark all to hotplug */ |
857 | x86_cpu_to_apicid_init[cpu]; | 862 | c->cpu_index = NR_CPUS; |
858 | } | 863 | } |
859 | |||
860 | /* indicate the static array will be going away soon */ | ||
861 | x86_cpu_to_apicid_ptr = NULL; | ||
862 | } | 864 | } |
863 | 865 | ||
864 | /* | 866 | /* |
@@ -868,9 +870,9 @@ void __init smp_set_apicids(void) | |||
868 | void __init smp_prepare_cpus(unsigned int max_cpus) | 870 | void __init smp_prepare_cpus(unsigned int max_cpus) |
869 | { | 871 | { |
870 | nmi_watchdog_default(); | 872 | nmi_watchdog_default(); |
873 | smp_cpu_index_default(); | ||
871 | current_cpu_data = boot_cpu_data; | 874 | current_cpu_data = boot_cpu_data; |
872 | current_thread_info()->cpu = 0; /* needed? */ | 875 | current_thread_info()->cpu = 0; /* needed? */ |
873 | smp_set_apicids(); | ||
874 | set_cpu_sibling_map(0); | 876 | set_cpu_sibling_map(0); |
875 | 877 | ||
876 | if (smp_sanity_check(max_cpus) < 0) { | 878 | if (smp_sanity_check(max_cpus) < 0) { |
@@ -885,6 +887,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus) | |||
885 | */ | 887 | */ |
886 | setup_local_APIC(); | 888 | setup_local_APIC(); |
887 | 889 | ||
890 | /* | ||
891 | * Enable IO APIC before setting up error vector | ||
892 | */ | ||
893 | if (!skip_ioapic_setup && nr_ioapics) | ||
894 | enable_IO_APIC(); | ||
895 | end_local_APIC_setup(); | ||
896 | |||
888 | if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) { | 897 | if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) { |
889 | panic("Boot APIC ID in local APIC unexpected (%d vs %d)", | 898 | panic("Boot APIC ID in local APIC unexpected (%d vs %d)", |
890 | GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id); | 899 | GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id); |
@@ -903,7 +912,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) | |||
903 | * Set up local APIC timer on boot CPU. | 912 | * Set up local APIC timer on boot CPU. |
904 | */ | 913 | */ |
905 | 914 | ||
906 | setup_boot_APIC_clock(); | 915 | setup_boot_clock(); |
907 | } | 916 | } |
908 | 917 | ||
909 | /* | 918 | /* |
@@ -912,7 +921,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) | |||
912 | void __init smp_prepare_boot_cpu(void) | 921 | void __init smp_prepare_boot_cpu(void) |
913 | { | 922 | { |
914 | int me = smp_processor_id(); | 923 | int me = smp_processor_id(); |
915 | cpu_set(me, cpu_online_map); | 924 | /* already set me in cpu_online_map in boot_cpu_init() */ |
916 | cpu_set(me, cpu_callout_map); | 925 | cpu_set(me, cpu_callout_map); |
917 | per_cpu(cpu_state, me) = CPU_ONLINE; | 926 | per_cpu(cpu_state, me) = CPU_ONLINE; |
918 | } | 927 | } |
@@ -1010,13 +1019,13 @@ static void remove_siblinginfo(int cpu) | |||
1010 | cpu_clear(cpu, cpu_sibling_setup_map); | 1019 | cpu_clear(cpu, cpu_sibling_setup_map); |
1011 | } | 1020 | } |
1012 | 1021 | ||
1013 | void remove_cpu_from_maps(void) | 1022 | static void __ref remove_cpu_from_maps(void) |
1014 | { | 1023 | { |
1015 | int cpu = smp_processor_id(); | 1024 | int cpu = smp_processor_id(); |
1016 | 1025 | ||
1017 | cpu_clear(cpu, cpu_callout_map); | 1026 | cpu_clear(cpu, cpu_callout_map); |
1018 | cpu_clear(cpu, cpu_callin_map); | 1027 | cpu_clear(cpu, cpu_callin_map); |
1019 | clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ | 1028 | clear_bit(cpu, (unsigned long *)&cpu_initialized); /* was set by cpu_init() */ |
1020 | clear_node_cpumask(cpu); | 1029 | clear_node_cpumask(cpu); |
1021 | } | 1030 | } |
1022 | 1031 | ||
diff --git a/arch/x86/kernel/smpcommon_32.c b/arch/x86/kernel/smpcommon_32.c index bbfe85a0f699..8bc38af29aef 100644 --- a/arch/x86/kernel/smpcommon_32.c +++ b/arch/x86/kernel/smpcommon_32.c | |||
@@ -14,10 +14,11 @@ __cpuinit void init_gdt(int cpu) | |||
14 | { | 14 | { |
15 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | 15 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); |
16 | 16 | ||
17 | pack_descriptor((u32 *)&gdt[GDT_ENTRY_PERCPU].a, | 17 | pack_descriptor(&gdt[GDT_ENTRY_PERCPU], |
18 | (u32 *)&gdt[GDT_ENTRY_PERCPU].b, | ||
19 | __per_cpu_offset[cpu], 0xFFFFF, | 18 | __per_cpu_offset[cpu], 0xFFFFF, |
20 | 0x80 | DESCTYPE_S | 0x2, 0x8); | 19 | 0x2 | DESCTYPE_S, 0x8); |
20 | |||
21 | gdt[GDT_ENTRY_PERCPU].s = 1; | ||
21 | 22 | ||
22 | per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; | 23 | per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; |
23 | per_cpu(cpu_number, cpu) = cpu; | 24 | per_cpu(cpu_number, cpu) = cpu; |
diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/kernel/srat_32.c index 2a8713ec0f9a..b72e61359c36 100644 --- a/arch/x86/kernel/srat_32.c +++ b/arch/x86/kernel/srat_32.c | |||
@@ -57,8 +57,6 @@ static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS]; | |||
57 | static int num_memory_chunks; /* total number of memory chunks */ | 57 | static int num_memory_chunks; /* total number of memory chunks */ |
58 | static u8 __initdata apicid_to_pxm[MAX_APICID]; | 58 | static u8 __initdata apicid_to_pxm[MAX_APICID]; |
59 | 59 | ||
60 | extern void * boot_ioremap(unsigned long, unsigned long); | ||
61 | |||
62 | /* Identify CPU proximity domains */ | 60 | /* Identify CPU proximity domains */ |
63 | static void __init parse_cpu_affinity_structure(char *p) | 61 | static void __init parse_cpu_affinity_structure(char *p) |
64 | { | 62 | { |
@@ -276,7 +274,7 @@ int __init get_memcfg_from_srat(void) | |||
276 | int tables = 0; | 274 | int tables = 0; |
277 | int i = 0; | 275 | int i = 0; |
278 | 276 | ||
279 | rsdp_address = acpi_find_rsdp(); | 277 | rsdp_address = acpi_os_get_root_pointer(); |
280 | if (!rsdp_address) { | 278 | if (!rsdp_address) { |
281 | printk("%s: System description tables not found\n", | 279 | printk("%s: System description tables not found\n", |
282 | __FUNCTION__); | 280 | __FUNCTION__); |
@@ -299,7 +297,7 @@ int __init get_memcfg_from_srat(void) | |||
299 | } | 297 | } |
300 | 298 | ||
301 | rsdt = (struct acpi_table_rsdt *) | 299 | rsdt = (struct acpi_table_rsdt *) |
302 | boot_ioremap(rsdp->rsdt_physical_address, sizeof(struct acpi_table_rsdt)); | 300 | early_ioremap(rsdp->rsdt_physical_address, sizeof(struct acpi_table_rsdt)); |
303 | 301 | ||
304 | if (!rsdt) { | 302 | if (!rsdt) { |
305 | printk(KERN_WARNING | 303 | printk(KERN_WARNING |
@@ -339,11 +337,11 @@ int __init get_memcfg_from_srat(void) | |||
339 | for (i = 0; i < tables; i++) { | 337 | for (i = 0; i < tables; i++) { |
340 | /* Map in header, then map in full table length. */ | 338 | /* Map in header, then map in full table length. */ |
341 | header = (struct acpi_table_header *) | 339 | header = (struct acpi_table_header *) |
342 | boot_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header)); | 340 | early_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header)); |
343 | if (!header) | 341 | if (!header) |
344 | break; | 342 | break; |
345 | header = (struct acpi_table_header *) | 343 | header = (struct acpi_table_header *) |
346 | boot_ioremap(saved_rsdt.table.table_offset_entry[i], header->length); | 344 | early_ioremap(saved_rsdt.table.table_offset_entry[i], header->length); |
347 | if (!header) | 345 | if (!header) |
348 | break; | 346 | break; |
349 | 347 | ||
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 6fa6cf036c70..02f0f61f5b11 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -22,9 +22,23 @@ static int save_stack_stack(void *data, char *name) | |||
22 | return -1; | 22 | return -1; |
23 | } | 23 | } |
24 | 24 | ||
25 | static void save_stack_address(void *data, unsigned long addr) | 25 | static void save_stack_address(void *data, unsigned long addr, int reliable) |
26 | { | ||
27 | struct stack_trace *trace = data; | ||
28 | if (trace->skip > 0) { | ||
29 | trace->skip--; | ||
30 | return; | ||
31 | } | ||
32 | if (trace->nr_entries < trace->max_entries) | ||
33 | trace->entries[trace->nr_entries++] = addr; | ||
34 | } | ||
35 | |||
36 | static void | ||
37 | save_stack_address_nosched(void *data, unsigned long addr, int reliable) | ||
26 | { | 38 | { |
27 | struct stack_trace *trace = (struct stack_trace *)data; | 39 | struct stack_trace *trace = (struct stack_trace *)data; |
40 | if (in_sched_functions(addr)) | ||
41 | return; | ||
28 | if (trace->skip > 0) { | 42 | if (trace->skip > 0) { |
29 | trace->skip--; | 43 | trace->skip--; |
30 | return; | 44 | return; |
@@ -40,13 +54,26 @@ static const struct stacktrace_ops save_stack_ops = { | |||
40 | .address = save_stack_address, | 54 | .address = save_stack_address, |
41 | }; | 55 | }; |
42 | 56 | ||
57 | static const struct stacktrace_ops save_stack_ops_nosched = { | ||
58 | .warning = save_stack_warning, | ||
59 | .warning_symbol = save_stack_warning_symbol, | ||
60 | .stack = save_stack_stack, | ||
61 | .address = save_stack_address_nosched, | ||
62 | }; | ||
63 | |||
43 | /* | 64 | /* |
44 | * Save stack-backtrace addresses into a stack_trace buffer. | 65 | * Save stack-backtrace addresses into a stack_trace buffer. |
45 | */ | 66 | */ |
46 | void save_stack_trace(struct stack_trace *trace) | 67 | void save_stack_trace(struct stack_trace *trace) |
47 | { | 68 | { |
48 | dump_trace(current, NULL, NULL, &save_stack_ops, trace); | 69 | dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); |
70 | if (trace->nr_entries < trace->max_entries) | ||
71 | trace->entries[trace->nr_entries++] = ULONG_MAX; | ||
72 | } | ||
73 | |||
74 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) | ||
75 | { | ||
76 | dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); | ||
49 | if (trace->nr_entries < trace->max_entries) | 77 | if (trace->nr_entries < trace->max_entries) |
50 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 78 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
51 | } | 79 | } |
52 | EXPORT_SYMBOL(save_stack_trace); | ||
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c new file mode 100644 index 000000000000..2ef1a5f8d675 --- /dev/null +++ b/arch/x86/kernel/step.c | |||
@@ -0,0 +1,203 @@ | |||
1 | /* | ||
2 | * x86 single-step support code, common to 32-bit and 64-bit. | ||
3 | */ | ||
4 | #include <linux/sched.h> | ||
5 | #include <linux/mm.h> | ||
6 | #include <linux/ptrace.h> | ||
7 | |||
8 | unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs) | ||
9 | { | ||
10 | unsigned long addr, seg; | ||
11 | |||
12 | addr = regs->ip; | ||
13 | seg = regs->cs & 0xffff; | ||
14 | if (v8086_mode(regs)) { | ||
15 | addr = (addr & 0xffff) + (seg << 4); | ||
16 | return addr; | ||
17 | } | ||
18 | |||
19 | /* | ||
20 | * We'll assume that the code segments in the GDT | ||
21 | * are all zero-based. That is largely true: the | ||
22 | * TLS segments are used for data, and the PNPBIOS | ||
23 | * and APM bios ones we just ignore here. | ||
24 | */ | ||
25 | if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) { | ||
26 | u32 *desc; | ||
27 | unsigned long base; | ||
28 | |||
29 | seg &= ~7UL; | ||
30 | |||
31 | mutex_lock(&child->mm->context.lock); | ||
32 | if (unlikely((seg >> 3) >= child->mm->context.size)) | ||
33 | addr = -1L; /* bogus selector, access would fault */ | ||
34 | else { | ||
35 | desc = child->mm->context.ldt + seg; | ||
36 | base = ((desc[0] >> 16) | | ||
37 | ((desc[1] & 0xff) << 16) | | ||
38 | (desc[1] & 0xff000000)); | ||
39 | |||
40 | /* 16-bit code segment? */ | ||
41 | if (!((desc[1] >> 22) & 1)) | ||
42 | addr &= 0xffff; | ||
43 | addr += base; | ||
44 | } | ||
45 | mutex_unlock(&child->mm->context.lock); | ||
46 | } | ||
47 | |||
48 | return addr; | ||
49 | } | ||
50 | |||
51 | static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) | ||
52 | { | ||
53 | int i, copied; | ||
54 | unsigned char opcode[15]; | ||
55 | unsigned long addr = convert_ip_to_linear(child, regs); | ||
56 | |||
57 | copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); | ||
58 | for (i = 0; i < copied; i++) { | ||
59 | switch (opcode[i]) { | ||
60 | /* popf and iret */ | ||
61 | case 0x9d: case 0xcf: | ||
62 | return 1; | ||
63 | |||
64 | /* CHECKME: 64 65 */ | ||
65 | |||
66 | /* opcode and address size prefixes */ | ||
67 | case 0x66: case 0x67: | ||
68 | continue; | ||
69 | /* irrelevant prefixes (segment overrides and repeats) */ | ||
70 | case 0x26: case 0x2e: | ||
71 | case 0x36: case 0x3e: | ||
72 | case 0x64: case 0x65: | ||
73 | case 0xf0: case 0xf2: case 0xf3: | ||
74 | continue; | ||
75 | |||
76 | #ifdef CONFIG_X86_64 | ||
77 | case 0x40 ... 0x4f: | ||
78 | if (regs->cs != __USER_CS) | ||
79 | /* 32-bit mode: register increment */ | ||
80 | return 0; | ||
81 | /* 64-bit mode: REX prefix */ | ||
82 | continue; | ||
83 | #endif | ||
84 | |||
85 | /* CHECKME: f2, f3 */ | ||
86 | |||
87 | /* | ||
88 | * pushf: NOTE! We should probably not let | ||
89 | * the user see the TF bit being set. But | ||
90 | * it's more pain than it's worth to avoid | ||
91 | * it, and a debugger could emulate this | ||
92 | * all in user space if it _really_ cares. | ||
93 | */ | ||
94 | case 0x9c: | ||
95 | default: | ||
96 | return 0; | ||
97 | } | ||
98 | } | ||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | /* | ||
103 | * Enable single-stepping. Return nonzero if user mode is not using TF itself. | ||
104 | */ | ||
105 | static int enable_single_step(struct task_struct *child) | ||
106 | { | ||
107 | struct pt_regs *regs = task_pt_regs(child); | ||
108 | |||
109 | /* | ||
110 | * Always set TIF_SINGLESTEP - this guarantees that | ||
111 | * we single-step system calls etc.. This will also | ||
112 | * cause us to set TF when returning to user mode. | ||
113 | */ | ||
114 | set_tsk_thread_flag(child, TIF_SINGLESTEP); | ||
115 | |||
116 | /* | ||
117 | * If TF was already set, don't do anything else | ||
118 | */ | ||
119 | if (regs->flags & X86_EFLAGS_TF) | ||
120 | return 0; | ||
121 | |||
122 | /* Set TF on the kernel stack.. */ | ||
123 | regs->flags |= X86_EFLAGS_TF; | ||
124 | |||
125 | /* | ||
126 | * ..but if TF is changed by the instruction we will trace, | ||
127 | * don't mark it as being "us" that set it, so that we | ||
128 | * won't clear it by hand later. | ||
129 | */ | ||
130 | if (is_setting_trap_flag(child, regs)) | ||
131 | return 0; | ||
132 | |||
133 | set_tsk_thread_flag(child, TIF_FORCED_TF); | ||
134 | |||
135 | return 1; | ||
136 | } | ||
137 | |||
138 | /* | ||
139 | * Install this value in MSR_IA32_DEBUGCTLMSR whenever child is running. | ||
140 | */ | ||
141 | static void write_debugctlmsr(struct task_struct *child, unsigned long val) | ||
142 | { | ||
143 | child->thread.debugctlmsr = val; | ||
144 | |||
145 | if (child != current) | ||
146 | return; | ||
147 | |||
148 | wrmsrl(MSR_IA32_DEBUGCTLMSR, val); | ||
149 | } | ||
150 | |||
151 | /* | ||
152 | * Enable single or block step. | ||
153 | */ | ||
154 | static void enable_step(struct task_struct *child, bool block) | ||
155 | { | ||
156 | /* | ||
157 | * Make sure block stepping (BTF) is not enabled unless it should be. | ||
158 | * Note that we don't try to worry about any is_setting_trap_flag() | ||
159 | * instructions after the first when using block stepping. | ||
160 | * So noone should try to use debugger block stepping in a program | ||
161 | * that uses user-mode single stepping itself. | ||
162 | */ | ||
163 | if (enable_single_step(child) && block) { | ||
164 | set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
165 | write_debugctlmsr(child, | ||
166 | child->thread.debugctlmsr | DEBUGCTLMSR_BTF); | ||
167 | } else { | ||
168 | write_debugctlmsr(child, | ||
169 | child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR); | ||
170 | |||
171 | if (!child->thread.debugctlmsr) | ||
172 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
173 | } | ||
174 | } | ||
175 | |||
176 | void user_enable_single_step(struct task_struct *child) | ||
177 | { | ||
178 | enable_step(child, 0); | ||
179 | } | ||
180 | |||
181 | void user_enable_block_step(struct task_struct *child) | ||
182 | { | ||
183 | enable_step(child, 1); | ||
184 | } | ||
185 | |||
186 | void user_disable_single_step(struct task_struct *child) | ||
187 | { | ||
188 | /* | ||
189 | * Make sure block stepping (BTF) is disabled. | ||
190 | */ | ||
191 | write_debugctlmsr(child, | ||
192 | child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR); | ||
193 | |||
194 | if (!child->thread.debugctlmsr) | ||
195 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
196 | |||
197 | /* Always clear TIF_SINGLESTEP... */ | ||
198 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); | ||
199 | |||
200 | /* But touch TF only if it was set by us.. */ | ||
201 | if (test_and_clear_tsk_thread_flag(child, TIF_FORCED_TF)) | ||
202 | task_pt_regs(child)->flags &= ~X86_EFLAGS_TF; | ||
203 | } | ||
diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/kernel/suspend_64.c index 2e5efaaf8800..7ac7130022f1 100644 --- a/arch/x86/kernel/suspend_64.c +++ b/arch/x86/kernel/suspend_64.c | |||
@@ -17,9 +17,26 @@ | |||
17 | /* References to section boundaries */ | 17 | /* References to section boundaries */ |
18 | extern const void __nosave_begin, __nosave_end; | 18 | extern const void __nosave_begin, __nosave_end; |
19 | 19 | ||
20 | static void fix_processor_context(void); | ||
21 | |||
20 | struct saved_context saved_context; | 22 | struct saved_context saved_context; |
21 | 23 | ||
22 | void __save_processor_state(struct saved_context *ctxt) | 24 | /** |
25 | * __save_processor_state - save CPU registers before creating a | ||
26 | * hibernation image and before restoring the memory state from it | ||
27 | * @ctxt - structure to store the registers contents in | ||
28 | * | ||
29 | * NOTE: If there is a CPU register the modification of which by the | ||
30 | * boot kernel (ie. the kernel used for loading the hibernation image) | ||
31 | * might affect the operations of the restored target kernel (ie. the one | ||
32 | * saved in the hibernation image), then its contents must be saved by this | ||
33 | * function. In other words, if kernel A is hibernated and different | ||
34 | * kernel B is used for loading the hibernation image into memory, the | ||
35 | * kernel A's __save_processor_state() function must save all registers | ||
36 | * needed by kernel A, so that it can operate correctly after the resume | ||
37 | * regardless of what kernel B does in the meantime. | ||
38 | */ | ||
39 | static void __save_processor_state(struct saved_context *ctxt) | ||
23 | { | 40 | { |
24 | kernel_fpu_begin(); | 41 | kernel_fpu_begin(); |
25 | 42 | ||
@@ -69,7 +86,12 @@ static void do_fpu_end(void) | |||
69 | kernel_fpu_end(); | 86 | kernel_fpu_end(); |
70 | } | 87 | } |
71 | 88 | ||
72 | void __restore_processor_state(struct saved_context *ctxt) | 89 | /** |
90 | * __restore_processor_state - restore the contents of CPU registers saved | ||
91 | * by __save_processor_state() | ||
92 | * @ctxt - structure to load the registers contents from | ||
93 | */ | ||
94 | static void __restore_processor_state(struct saved_context *ctxt) | ||
73 | { | 95 | { |
74 | /* | 96 | /* |
75 | * control registers | 97 | * control registers |
@@ -113,14 +135,19 @@ void restore_processor_state(void) | |||
113 | __restore_processor_state(&saved_context); | 135 | __restore_processor_state(&saved_context); |
114 | } | 136 | } |
115 | 137 | ||
116 | void fix_processor_context(void) | 138 | static void fix_processor_context(void) |
117 | { | 139 | { |
118 | int cpu = smp_processor_id(); | 140 | int cpu = smp_processor_id(); |
119 | struct tss_struct *t = &per_cpu(init_tss, cpu); | 141 | struct tss_struct *t = &per_cpu(init_tss, cpu); |
120 | 142 | ||
121 | set_tss_desc(cpu,t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */ | 143 | /* |
144 | * This just modifies memory; should not be necessary. But... This | ||
145 | * is necessary, because 386 hardware has concept of busy TSS or some | ||
146 | * similar stupidity. | ||
147 | */ | ||
148 | set_tss_desc(cpu, t); | ||
122 | 149 | ||
123 | cpu_gdt(cpu)[GDT_ENTRY_TSS].type = 9; | 150 | get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9; |
124 | 151 | ||
125 | syscall_init(); /* This sets MSR_*STAR and related */ | 152 | syscall_init(); /* This sets MSR_*STAR and related */ |
126 | load_TR_desc(); /* This does ltr */ | 153 | load_TR_desc(); /* This does ltr */ |
@@ -138,7 +165,6 @@ void fix_processor_context(void) | |||
138 | loaddebug(¤t->thread, 6); | 165 | loaddebug(¤t->thread, 6); |
139 | loaddebug(¤t->thread, 7); | 166 | loaddebug(¤t->thread, 7); |
140 | } | 167 | } |
141 | |||
142 | } | 168 | } |
143 | 169 | ||
144 | #ifdef CONFIG_HIBERNATION | 170 | #ifdef CONFIG_HIBERNATION |
diff --git a/arch/x86/kernel/suspend_asm_64.S b/arch/x86/kernel/suspend_asm_64.S index 72f952103e50..aeb9a4d7681e 100644 --- a/arch/x86/kernel/suspend_asm_64.S +++ b/arch/x86/kernel/suspend_asm_64.S | |||
@@ -18,13 +18,13 @@ | |||
18 | 18 | ||
19 | ENTRY(swsusp_arch_suspend) | 19 | ENTRY(swsusp_arch_suspend) |
20 | movq $saved_context, %rax | 20 | movq $saved_context, %rax |
21 | movq %rsp, pt_regs_rsp(%rax) | 21 | movq %rsp, pt_regs_sp(%rax) |
22 | movq %rbp, pt_regs_rbp(%rax) | 22 | movq %rbp, pt_regs_bp(%rax) |
23 | movq %rsi, pt_regs_rsi(%rax) | 23 | movq %rsi, pt_regs_si(%rax) |
24 | movq %rdi, pt_regs_rdi(%rax) | 24 | movq %rdi, pt_regs_di(%rax) |
25 | movq %rbx, pt_regs_rbx(%rax) | 25 | movq %rbx, pt_regs_bx(%rax) |
26 | movq %rcx, pt_regs_rcx(%rax) | 26 | movq %rcx, pt_regs_cx(%rax) |
27 | movq %rdx, pt_regs_rdx(%rax) | 27 | movq %rdx, pt_regs_dx(%rax) |
28 | movq %r8, pt_regs_r8(%rax) | 28 | movq %r8, pt_regs_r8(%rax) |
29 | movq %r9, pt_regs_r9(%rax) | 29 | movq %r9, pt_regs_r9(%rax) |
30 | movq %r10, pt_regs_r10(%rax) | 30 | movq %r10, pt_regs_r10(%rax) |
@@ -34,7 +34,7 @@ ENTRY(swsusp_arch_suspend) | |||
34 | movq %r14, pt_regs_r14(%rax) | 34 | movq %r14, pt_regs_r14(%rax) |
35 | movq %r15, pt_regs_r15(%rax) | 35 | movq %r15, pt_regs_r15(%rax) |
36 | pushfq | 36 | pushfq |
37 | popq pt_regs_eflags(%rax) | 37 | popq pt_regs_flags(%rax) |
38 | 38 | ||
39 | /* save the address of restore_registers */ | 39 | /* save the address of restore_registers */ |
40 | movq $restore_registers, %rax | 40 | movq $restore_registers, %rax |
@@ -115,13 +115,13 @@ ENTRY(restore_registers) | |||
115 | 115 | ||
116 | /* We don't restore %rax, it must be 0 anyway */ | 116 | /* We don't restore %rax, it must be 0 anyway */ |
117 | movq $saved_context, %rax | 117 | movq $saved_context, %rax |
118 | movq pt_regs_rsp(%rax), %rsp | 118 | movq pt_regs_sp(%rax), %rsp |
119 | movq pt_regs_rbp(%rax), %rbp | 119 | movq pt_regs_bp(%rax), %rbp |
120 | movq pt_regs_rsi(%rax), %rsi | 120 | movq pt_regs_si(%rax), %rsi |
121 | movq pt_regs_rdi(%rax), %rdi | 121 | movq pt_regs_di(%rax), %rdi |
122 | movq pt_regs_rbx(%rax), %rbx | 122 | movq pt_regs_bx(%rax), %rbx |
123 | movq pt_regs_rcx(%rax), %rcx | 123 | movq pt_regs_cx(%rax), %rcx |
124 | movq pt_regs_rdx(%rax), %rdx | 124 | movq pt_regs_dx(%rax), %rdx |
125 | movq pt_regs_r8(%rax), %r8 | 125 | movq pt_regs_r8(%rax), %r8 |
126 | movq pt_regs_r9(%rax), %r9 | 126 | movq pt_regs_r9(%rax), %r9 |
127 | movq pt_regs_r10(%rax), %r10 | 127 | movq pt_regs_r10(%rax), %r10 |
@@ -130,7 +130,7 @@ ENTRY(restore_registers) | |||
130 | movq pt_regs_r13(%rax), %r13 | 130 | movq pt_regs_r13(%rax), %r13 |
131 | movq pt_regs_r14(%rax), %r14 | 131 | movq pt_regs_r14(%rax), %r14 |
132 | movq pt_regs_r15(%rax), %r15 | 132 | movq pt_regs_r15(%rax), %r15 |
133 | pushq pt_regs_eflags(%rax) | 133 | pushq pt_regs_flags(%rax) |
134 | popfq | 134 | popfq |
135 | 135 | ||
136 | xorq %rax, %rax | 136 | xorq %rax, %rax |
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 907942ee6e76..bd802a5e1aa3 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/file.h> | 12 | #include <linux/file.h> |
13 | #include <linux/utsname.h> | 13 | #include <linux/utsname.h> |
14 | #include <linux/personality.h> | 14 | #include <linux/personality.h> |
15 | #include <linux/random.h> | ||
15 | 16 | ||
16 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
17 | #include <asm/ia32.h> | 18 | #include <asm/ia32.h> |
@@ -65,6 +66,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin, | |||
65 | unsigned long *end) | 66 | unsigned long *end) |
66 | { | 67 | { |
67 | if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) { | 68 | if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) { |
69 | unsigned long new_begin; | ||
68 | /* This is usually used needed to map code in small | 70 | /* This is usually used needed to map code in small |
69 | model, so it needs to be in the first 31bit. Limit | 71 | model, so it needs to be in the first 31bit. Limit |
70 | it to that. This means we need to move the | 72 | it to that. This means we need to move the |
@@ -74,6 +76,11 @@ static void find_start_end(unsigned long flags, unsigned long *begin, | |||
74 | of playground for now. -AK */ | 76 | of playground for now. -AK */ |
75 | *begin = 0x40000000; | 77 | *begin = 0x40000000; |
76 | *end = 0x80000000; | 78 | *end = 0x80000000; |
79 | if (current->flags & PF_RANDOMIZE) { | ||
80 | new_begin = randomize_range(*begin, *begin + 0x02000000, 0); | ||
81 | if (new_begin) | ||
82 | *begin = new_begin; | ||
83 | } | ||
77 | } else { | 84 | } else { |
78 | *begin = TASK_UNMAPPED_BASE; | 85 | *begin = TASK_UNMAPPED_BASE; |
79 | *end = TASK_SIZE; | 86 | *end = TASK_SIZE; |
@@ -143,6 +150,97 @@ full_search: | |||
143 | } | 150 | } |
144 | } | 151 | } |
145 | 152 | ||
153 | |||
154 | unsigned long | ||
155 | arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | ||
156 | const unsigned long len, const unsigned long pgoff, | ||
157 | const unsigned long flags) | ||
158 | { | ||
159 | struct vm_area_struct *vma; | ||
160 | struct mm_struct *mm = current->mm; | ||
161 | unsigned long addr = addr0; | ||
162 | |||
163 | /* requested length too big for entire address space */ | ||
164 | if (len > TASK_SIZE) | ||
165 | return -ENOMEM; | ||
166 | |||
167 | if (flags & MAP_FIXED) | ||
168 | return addr; | ||
169 | |||
170 | /* for MAP_32BIT mappings we force the legact mmap base */ | ||
171 | if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) | ||
172 | goto bottomup; | ||
173 | |||
174 | /* requesting a specific address */ | ||
175 | if (addr) { | ||
176 | addr = PAGE_ALIGN(addr); | ||
177 | vma = find_vma(mm, addr); | ||
178 | if (TASK_SIZE - len >= addr && | ||
179 | (!vma || addr + len <= vma->vm_start)) | ||
180 | return addr; | ||
181 | } | ||
182 | |||
183 | /* check if free_area_cache is useful for us */ | ||
184 | if (len <= mm->cached_hole_size) { | ||
185 | mm->cached_hole_size = 0; | ||
186 | mm->free_area_cache = mm->mmap_base; | ||
187 | } | ||
188 | |||
189 | /* either no address requested or can't fit in requested address hole */ | ||
190 | addr = mm->free_area_cache; | ||
191 | |||
192 | /* make sure it can fit in the remaining address space */ | ||
193 | if (addr > len) { | ||
194 | vma = find_vma(mm, addr-len); | ||
195 | if (!vma || addr <= vma->vm_start) | ||
196 | /* remember the address as a hint for next time */ | ||
197 | return (mm->free_area_cache = addr-len); | ||
198 | } | ||
199 | |||
200 | if (mm->mmap_base < len) | ||
201 | goto bottomup; | ||
202 | |||
203 | addr = mm->mmap_base-len; | ||
204 | |||
205 | do { | ||
206 | /* | ||
207 | * Lookup failure means no vma is above this address, | ||
208 | * else if new region fits below vma->vm_start, | ||
209 | * return with success: | ||
210 | */ | ||
211 | vma = find_vma(mm, addr); | ||
212 | if (!vma || addr+len <= vma->vm_start) | ||
213 | /* remember the address as a hint for next time */ | ||
214 | return (mm->free_area_cache = addr); | ||
215 | |||
216 | /* remember the largest hole we saw so far */ | ||
217 | if (addr + mm->cached_hole_size < vma->vm_start) | ||
218 | mm->cached_hole_size = vma->vm_start - addr; | ||
219 | |||
220 | /* try just below the current vma->vm_start */ | ||
221 | addr = vma->vm_start-len; | ||
222 | } while (len < vma->vm_start); | ||
223 | |||
224 | bottomup: | ||
225 | /* | ||
226 | * A failed mmap() very likely causes application failure, | ||
227 | * so fall back to the bottom-up function here. This scenario | ||
228 | * can happen with large stack limits and large mmap() | ||
229 | * allocations. | ||
230 | */ | ||
231 | mm->cached_hole_size = ~0UL; | ||
232 | mm->free_area_cache = TASK_UNMAPPED_BASE; | ||
233 | addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); | ||
234 | /* | ||
235 | * Restore the topdown base: | ||
236 | */ | ||
237 | mm->free_area_cache = mm->mmap_base; | ||
238 | mm->cached_hole_size = ~0UL; | ||
239 | |||
240 | return addr; | ||
241 | } | ||
242 | |||
243 | |||
146 | asmlinkage long sys_uname(struct new_utsname __user * name) | 244 | asmlinkage long sys_uname(struct new_utsname __user * name) |
147 | { | 245 | { |
148 | int err; | 246 | int err; |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 8344c70adf61..adff5562f5fd 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -321,6 +321,8 @@ ENTRY(sys_call_table) | |||
321 | .long sys_epoll_pwait | 321 | .long sys_epoll_pwait |
322 | .long sys_utimensat /* 320 */ | 322 | .long sys_utimensat /* 320 */ |
323 | .long sys_signalfd | 323 | .long sys_signalfd |
324 | .long sys_timerfd | 324 | .long sys_timerfd_create |
325 | .long sys_eventfd | 325 | .long sys_eventfd |
326 | .long sys_fallocate | 326 | .long sys_fallocate |
327 | .long sys_timerfd_settime /* 325 */ | ||
328 | .long sys_timerfd_gettime | ||
diff --git a/arch/x86/kernel/sysenter_32.c b/arch/x86/kernel/sysenter_32.c deleted file mode 100644 index 5a2d951e2608..000000000000 --- a/arch/x86/kernel/sysenter_32.c +++ /dev/null | |||
@@ -1,346 +0,0 @@ | |||
1 | /* | ||
2 | * (C) Copyright 2002 Linus Torvalds | ||
3 | * Portions based on the vdso-randomization code from exec-shield: | ||
4 | * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar | ||
5 | * | ||
6 | * This file contains the needed initializations to support sysenter. | ||
7 | */ | ||
8 | |||
9 | #include <linux/init.h> | ||
10 | #include <linux/smp.h> | ||
11 | #include <linux/thread_info.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/gfp.h> | ||
14 | #include <linux/string.h> | ||
15 | #include <linux/elf.h> | ||
16 | #include <linux/mm.h> | ||
17 | #include <linux/err.h> | ||
18 | #include <linux/module.h> | ||
19 | |||
20 | #include <asm/cpufeature.h> | ||
21 | #include <asm/msr.h> | ||
22 | #include <asm/pgtable.h> | ||
23 | #include <asm/unistd.h> | ||
24 | #include <asm/elf.h> | ||
25 | #include <asm/tlbflush.h> | ||
26 | |||
27 | enum { | ||
28 | VDSO_DISABLED = 0, | ||
29 | VDSO_ENABLED = 1, | ||
30 | VDSO_COMPAT = 2, | ||
31 | }; | ||
32 | |||
33 | #ifdef CONFIG_COMPAT_VDSO | ||
34 | #define VDSO_DEFAULT VDSO_COMPAT | ||
35 | #else | ||
36 | #define VDSO_DEFAULT VDSO_ENABLED | ||
37 | #endif | ||
38 | |||
39 | /* | ||
40 | * Should the kernel map a VDSO page into processes and pass its | ||
41 | * address down to glibc upon exec()? | ||
42 | */ | ||
43 | unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT; | ||
44 | |||
45 | EXPORT_SYMBOL_GPL(vdso_enabled); | ||
46 | |||
47 | static int __init vdso_setup(char *s) | ||
48 | { | ||
49 | vdso_enabled = simple_strtoul(s, NULL, 0); | ||
50 | |||
51 | return 1; | ||
52 | } | ||
53 | |||
54 | __setup("vdso=", vdso_setup); | ||
55 | |||
56 | extern asmlinkage void sysenter_entry(void); | ||
57 | |||
58 | static __init void reloc_symtab(Elf32_Ehdr *ehdr, | ||
59 | unsigned offset, unsigned size) | ||
60 | { | ||
61 | Elf32_Sym *sym = (void *)ehdr + offset; | ||
62 | unsigned nsym = size / sizeof(*sym); | ||
63 | unsigned i; | ||
64 | |||
65 | for(i = 0; i < nsym; i++, sym++) { | ||
66 | if (sym->st_shndx == SHN_UNDEF || | ||
67 | sym->st_shndx == SHN_ABS) | ||
68 | continue; /* skip */ | ||
69 | |||
70 | if (sym->st_shndx > SHN_LORESERVE) { | ||
71 | printk(KERN_INFO "VDSO: unexpected st_shndx %x\n", | ||
72 | sym->st_shndx); | ||
73 | continue; | ||
74 | } | ||
75 | |||
76 | switch(ELF_ST_TYPE(sym->st_info)) { | ||
77 | case STT_OBJECT: | ||
78 | case STT_FUNC: | ||
79 | case STT_SECTION: | ||
80 | case STT_FILE: | ||
81 | sym->st_value += VDSO_HIGH_BASE; | ||
82 | } | ||
83 | } | ||
84 | } | ||
85 | |||
86 | static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset) | ||
87 | { | ||
88 | Elf32_Dyn *dyn = (void *)ehdr + offset; | ||
89 | |||
90 | for(; dyn->d_tag != DT_NULL; dyn++) | ||
91 | switch(dyn->d_tag) { | ||
92 | case DT_PLTGOT: | ||
93 | case DT_HASH: | ||
94 | case DT_STRTAB: | ||
95 | case DT_SYMTAB: | ||
96 | case DT_RELA: | ||
97 | case DT_INIT: | ||
98 | case DT_FINI: | ||
99 | case DT_REL: | ||
100 | case DT_DEBUG: | ||
101 | case DT_JMPREL: | ||
102 | case DT_VERSYM: | ||
103 | case DT_VERDEF: | ||
104 | case DT_VERNEED: | ||
105 | case DT_ADDRRNGLO ... DT_ADDRRNGHI: | ||
106 | /* definitely pointers needing relocation */ | ||
107 | dyn->d_un.d_ptr += VDSO_HIGH_BASE; | ||
108 | break; | ||
109 | |||
110 | case DT_ENCODING ... OLD_DT_LOOS-1: | ||
111 | case DT_LOOS ... DT_HIOS-1: | ||
112 | /* Tags above DT_ENCODING are pointers if | ||
113 | they're even */ | ||
114 | if (dyn->d_tag >= DT_ENCODING && | ||
115 | (dyn->d_tag & 1) == 0) | ||
116 | dyn->d_un.d_ptr += VDSO_HIGH_BASE; | ||
117 | break; | ||
118 | |||
119 | case DT_VERDEFNUM: | ||
120 | case DT_VERNEEDNUM: | ||
121 | case DT_FLAGS_1: | ||
122 | case DT_RELACOUNT: | ||
123 | case DT_RELCOUNT: | ||
124 | case DT_VALRNGLO ... DT_VALRNGHI: | ||
125 | /* definitely not pointers */ | ||
126 | break; | ||
127 | |||
128 | case OLD_DT_LOOS ... DT_LOOS-1: | ||
129 | case DT_HIOS ... DT_VALRNGLO-1: | ||
130 | default: | ||
131 | if (dyn->d_tag > DT_ENCODING) | ||
132 | printk(KERN_INFO "VDSO: unexpected DT_tag %x\n", | ||
133 | dyn->d_tag); | ||
134 | break; | ||
135 | } | ||
136 | } | ||
137 | |||
138 | static __init void relocate_vdso(Elf32_Ehdr *ehdr) | ||
139 | { | ||
140 | Elf32_Phdr *phdr; | ||
141 | Elf32_Shdr *shdr; | ||
142 | int i; | ||
143 | |||
144 | BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 || | ||
145 | !elf_check_arch(ehdr) || | ||
146 | ehdr->e_type != ET_DYN); | ||
147 | |||
148 | ehdr->e_entry += VDSO_HIGH_BASE; | ||
149 | |||
150 | /* rebase phdrs */ | ||
151 | phdr = (void *)ehdr + ehdr->e_phoff; | ||
152 | for (i = 0; i < ehdr->e_phnum; i++) { | ||
153 | phdr[i].p_vaddr += VDSO_HIGH_BASE; | ||
154 | |||
155 | /* relocate dynamic stuff */ | ||
156 | if (phdr[i].p_type == PT_DYNAMIC) | ||
157 | reloc_dyn(ehdr, phdr[i].p_offset); | ||
158 | } | ||
159 | |||
160 | /* rebase sections */ | ||
161 | shdr = (void *)ehdr + ehdr->e_shoff; | ||
162 | for(i = 0; i < ehdr->e_shnum; i++) { | ||
163 | if (!(shdr[i].sh_flags & SHF_ALLOC)) | ||
164 | continue; | ||
165 | |||
166 | shdr[i].sh_addr += VDSO_HIGH_BASE; | ||
167 | |||
168 | if (shdr[i].sh_type == SHT_SYMTAB || | ||
169 | shdr[i].sh_type == SHT_DYNSYM) | ||
170 | reloc_symtab(ehdr, shdr[i].sh_offset, | ||
171 | shdr[i].sh_size); | ||
172 | } | ||
173 | } | ||
174 | |||
175 | void enable_sep_cpu(void) | ||
176 | { | ||
177 | int cpu = get_cpu(); | ||
178 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | ||
179 | |||
180 | if (!boot_cpu_has(X86_FEATURE_SEP)) { | ||
181 | put_cpu(); | ||
182 | return; | ||
183 | } | ||
184 | |||
185 | tss->x86_tss.ss1 = __KERNEL_CS; | ||
186 | tss->x86_tss.esp1 = sizeof(struct tss_struct) + (unsigned long) tss; | ||
187 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); | ||
188 | wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.esp1, 0); | ||
189 | wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0); | ||
190 | put_cpu(); | ||
191 | } | ||
192 | |||
193 | static struct vm_area_struct gate_vma; | ||
194 | |||
195 | static int __init gate_vma_init(void) | ||
196 | { | ||
197 | gate_vma.vm_mm = NULL; | ||
198 | gate_vma.vm_start = FIXADDR_USER_START; | ||
199 | gate_vma.vm_end = FIXADDR_USER_END; | ||
200 | gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; | ||
201 | gate_vma.vm_page_prot = __P101; | ||
202 | /* | ||
203 | * Make sure the vDSO gets into every core dump. | ||
204 | * Dumping its contents makes post-mortem fully interpretable later | ||
205 | * without matching up the same kernel and hardware config to see | ||
206 | * what PC values meant. | ||
207 | */ | ||
208 | gate_vma.vm_flags |= VM_ALWAYSDUMP; | ||
209 | return 0; | ||
210 | } | ||
211 | |||
212 | /* | ||
213 | * These symbols are defined by vsyscall.o to mark the bounds | ||
214 | * of the ELF DSO images included therein. | ||
215 | */ | ||
216 | extern const char vsyscall_int80_start, vsyscall_int80_end; | ||
217 | extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; | ||
218 | static struct page *syscall_pages[1]; | ||
219 | |||
220 | static void map_compat_vdso(int map) | ||
221 | { | ||
222 | static int vdso_mapped; | ||
223 | |||
224 | if (map == vdso_mapped) | ||
225 | return; | ||
226 | |||
227 | vdso_mapped = map; | ||
228 | |||
229 | __set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT, | ||
230 | map ? PAGE_READONLY_EXEC : PAGE_NONE); | ||
231 | |||
232 | /* flush stray tlbs */ | ||
233 | flush_tlb_all(); | ||
234 | } | ||
235 | |||
236 | int __init sysenter_setup(void) | ||
237 | { | ||
238 | void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); | ||
239 | const void *vsyscall; | ||
240 | size_t vsyscall_len; | ||
241 | |||
242 | syscall_pages[0] = virt_to_page(syscall_page); | ||
243 | |||
244 | gate_vma_init(); | ||
245 | |||
246 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); | ||
247 | |||
248 | if (!boot_cpu_has(X86_FEATURE_SEP)) { | ||
249 | vsyscall = &vsyscall_int80_start; | ||
250 | vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start; | ||
251 | } else { | ||
252 | vsyscall = &vsyscall_sysenter_start; | ||
253 | vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start; | ||
254 | } | ||
255 | |||
256 | memcpy(syscall_page, vsyscall, vsyscall_len); | ||
257 | relocate_vdso(syscall_page); | ||
258 | |||
259 | return 0; | ||
260 | } | ||
261 | |||
262 | /* Defined in vsyscall-sysenter.S */ | ||
263 | extern void SYSENTER_RETURN; | ||
264 | |||
265 | /* Setup a VMA at program startup for the vsyscall page */ | ||
266 | int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) | ||
267 | { | ||
268 | struct mm_struct *mm = current->mm; | ||
269 | unsigned long addr; | ||
270 | int ret = 0; | ||
271 | bool compat; | ||
272 | |||
273 | down_write(&mm->mmap_sem); | ||
274 | |||
275 | /* Test compat mode once here, in case someone | ||
276 | changes it via sysctl */ | ||
277 | compat = (vdso_enabled == VDSO_COMPAT); | ||
278 | |||
279 | map_compat_vdso(compat); | ||
280 | |||
281 | if (compat) | ||
282 | addr = VDSO_HIGH_BASE; | ||
283 | else { | ||
284 | addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); | ||
285 | if (IS_ERR_VALUE(addr)) { | ||
286 | ret = addr; | ||
287 | goto up_fail; | ||
288 | } | ||
289 | |||
290 | /* | ||
291 | * MAYWRITE to allow gdb to COW and set breakpoints | ||
292 | * | ||
293 | * Make sure the vDSO gets into every core dump. | ||
294 | * Dumping its contents makes post-mortem fully | ||
295 | * interpretable later without matching up the same | ||
296 | * kernel and hardware config to see what PC values | ||
297 | * meant. | ||
298 | */ | ||
299 | ret = install_special_mapping(mm, addr, PAGE_SIZE, | ||
300 | VM_READ|VM_EXEC| | ||
301 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| | ||
302 | VM_ALWAYSDUMP, | ||
303 | syscall_pages); | ||
304 | |||
305 | if (ret) | ||
306 | goto up_fail; | ||
307 | } | ||
308 | |||
309 | current->mm->context.vdso = (void *)addr; | ||
310 | current_thread_info()->sysenter_return = | ||
311 | (void *)VDSO_SYM(&SYSENTER_RETURN); | ||
312 | |||
313 | up_fail: | ||
314 | up_write(&mm->mmap_sem); | ||
315 | |||
316 | return ret; | ||
317 | } | ||
318 | |||
319 | const char *arch_vma_name(struct vm_area_struct *vma) | ||
320 | { | ||
321 | if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) | ||
322 | return "[vdso]"; | ||
323 | return NULL; | ||
324 | } | ||
325 | |||
326 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) | ||
327 | { | ||
328 | struct mm_struct *mm = tsk->mm; | ||
329 | |||
330 | /* Check to see if this task was created in compat vdso mode */ | ||
331 | if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) | ||
332 | return &gate_vma; | ||
333 | return NULL; | ||
334 | } | ||
335 | |||
336 | int in_gate_area(struct task_struct *task, unsigned long addr) | ||
337 | { | ||
338 | const struct vm_area_struct *vma = get_gate_vma(task); | ||
339 | |||
340 | return vma && addr >= vma->vm_start && addr < vma->vm_end; | ||
341 | } | ||
342 | |||
343 | int in_gate_area_no_task(unsigned long addr) | ||
344 | { | ||
345 | return 0; | ||
346 | } | ||
diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c new file mode 100644 index 000000000000..10b8a6f69f84 --- /dev/null +++ b/arch/x86/kernel/test_nx.c | |||
@@ -0,0 +1,173 @@ | |||
1 | /* | ||
2 | * test_nx.c: functional test for NX functionality | ||
3 | * | ||
4 | * (C) Copyright 2008 Intel Corporation | ||
5 | * Author: Arjan van de Ven <arjan@linux.intel.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; version 2 | ||
10 | * of the License. | ||
11 | */ | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/sort.h> | ||
14 | #include <asm/uaccess.h> | ||
15 | #include <asm/asm.h> | ||
16 | |||
17 | extern int rodata_test_data; | ||
18 | |||
19 | /* | ||
20 | * This file checks 4 things: | ||
21 | * 1) Check if the stack is not executable | ||
22 | * 2) Check if kmalloc memory is not executable | ||
23 | * 3) Check if the .rodata section is not executable | ||
24 | * 4) Check if the .data section of a module is not executable | ||
25 | * | ||
26 | * To do this, the test code tries to execute memory in stack/kmalloc/etc, | ||
27 | * and then checks if the expected trap happens. | ||
28 | * | ||
29 | * Sadly, this implies having a dynamic exception handling table entry. | ||
30 | * ... which can be done (and will make Rusty cry)... but it can only | ||
31 | * be done in a stand-alone module with only 1 entry total. | ||
32 | * (otherwise we'd have to sort and that's just too messy) | ||
33 | */ | ||
34 | |||
35 | |||
36 | |||
37 | /* | ||
38 | * We want to set up an exception handling point on our stack, | ||
39 | * which means a variable value. This function is rather dirty | ||
40 | * and walks the exception table of the module, looking for a magic | ||
41 | * marker and replaces it with a specific function. | ||
42 | */ | ||
43 | static void fudze_exception_table(void *marker, void *new) | ||
44 | { | ||
45 | struct module *mod = THIS_MODULE; | ||
46 | struct exception_table_entry *extable; | ||
47 | |||
48 | /* | ||
49 | * Note: This module has only 1 exception table entry, | ||
50 | * so searching and sorting is not needed. If that changes, | ||
51 | * this would be the place to search and re-sort the exception | ||
52 | * table. | ||
53 | */ | ||
54 | if (mod->num_exentries > 1) { | ||
55 | printk(KERN_ERR "test_nx: too many exception table entries!\n"); | ||
56 | printk(KERN_ERR "test_nx: test results are not reliable.\n"); | ||
57 | return; | ||
58 | } | ||
59 | extable = (struct exception_table_entry *)mod->extable; | ||
60 | extable[0].insn = (unsigned long)new; | ||
61 | } | ||
62 | |||
63 | |||
64 | /* | ||
65 | * exception tables get their symbols translated so we need | ||
66 | * to use a fake function to put in there, which we can then | ||
67 | * replace at runtime. | ||
68 | */ | ||
69 | void foo_label(void); | ||
70 | |||
71 | /* | ||
72 | * returns 0 for not-executable, negative for executable | ||
73 | * | ||
74 | * Note: we cannot allow this function to be inlined, because | ||
75 | * that would give us more than 1 exception table entry. | ||
76 | * This in turn would break the assumptions above. | ||
77 | */ | ||
78 | static noinline int test_address(void *address) | ||
79 | { | ||
80 | unsigned long result; | ||
81 | |||
82 | /* Set up an exception table entry for our address */ | ||
83 | fudze_exception_table(&foo_label, address); | ||
84 | result = 1; | ||
85 | asm volatile( | ||
86 | "foo_label:\n" | ||
87 | "0: call *%[fake_code]\n" | ||
88 | "1:\n" | ||
89 | ".section .fixup,\"ax\"\n" | ||
90 | "2: mov %[zero], %[rslt]\n" | ||
91 | " ret\n" | ||
92 | ".previous\n" | ||
93 | _ASM_EXTABLE(0b,2b) | ||
94 | : [rslt] "=r" (result) | ||
95 | : [fake_code] "r" (address), [zero] "r" (0UL), "0" (result) | ||
96 | ); | ||
97 | /* change the exception table back for the next round */ | ||
98 | fudze_exception_table(address, &foo_label); | ||
99 | |||
100 | if (result) | ||
101 | return -ENODEV; | ||
102 | return 0; | ||
103 | } | ||
104 | |||
105 | static unsigned char test_data = 0xC3; /* 0xC3 is the opcode for "ret" */ | ||
106 | |||
107 | static int test_NX(void) | ||
108 | { | ||
109 | int ret = 0; | ||
110 | /* 0xC3 is the opcode for "ret" */ | ||
111 | char stackcode[] = {0xC3, 0x90, 0 }; | ||
112 | char *heap; | ||
113 | |||
114 | test_data = 0xC3; | ||
115 | |||
116 | printk(KERN_INFO "Testing NX protection\n"); | ||
117 | |||
118 | /* Test 1: check if the stack is not executable */ | ||
119 | if (test_address(&stackcode)) { | ||
120 | printk(KERN_ERR "test_nx: stack was executable\n"); | ||
121 | ret = -ENODEV; | ||
122 | } | ||
123 | |||
124 | |||
125 | /* Test 2: Check if the heap is executable */ | ||
126 | heap = kmalloc(64, GFP_KERNEL); | ||
127 | if (!heap) | ||
128 | return -ENOMEM; | ||
129 | heap[0] = 0xC3; /* opcode for "ret" */ | ||
130 | |||
131 | if (test_address(heap)) { | ||
132 | printk(KERN_ERR "test_nx: heap was executable\n"); | ||
133 | ret = -ENODEV; | ||
134 | } | ||
135 | kfree(heap); | ||
136 | |||
137 | /* | ||
138 | * The following 2 tests currently fail, this needs to get fixed | ||
139 | * Until then, don't run them to avoid too many people getting scared | ||
140 | * by the error message | ||
141 | */ | ||
142 | |||
143 | #ifdef CONFIG_DEBUG_RODATA | ||
144 | /* Test 3: Check if the .rodata section is executable */ | ||
145 | if (rodata_test_data != 0xC3) { | ||
146 | printk(KERN_ERR "test_nx: .rodata marker has invalid value\n"); | ||
147 | ret = -ENODEV; | ||
148 | } else if (test_address(&rodata_test_data)) { | ||
149 | printk(KERN_ERR "test_nx: .rodata section is executable\n"); | ||
150 | ret = -ENODEV; | ||
151 | } | ||
152 | #endif | ||
153 | |||
154 | #if 0 | ||
155 | /* Test 4: Check if the .data section of a module is executable */ | ||
156 | if (test_address(&test_data)) { | ||
157 | printk(KERN_ERR "test_nx: .data section is executable\n"); | ||
158 | ret = -ENODEV; | ||
159 | } | ||
160 | |||
161 | #endif | ||
162 | return 0; | ||
163 | } | ||
164 | |||
165 | static void test_exit(void) | ||
166 | { | ||
167 | } | ||
168 | |||
169 | module_init(test_NX); | ||
170 | module_exit(test_exit); | ||
171 | MODULE_LICENSE("GPL"); | ||
172 | MODULE_DESCRIPTION("Testcase for the NX infrastructure"); | ||
173 | MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); | ||
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c new file mode 100644 index 000000000000..4c163772000e --- /dev/null +++ b/arch/x86/kernel/test_rodata.c | |||
@@ -0,0 +1,86 @@ | |||
1 | /* | ||
2 | * test_rodata.c: functional test for mark_rodata_ro function | ||
3 | * | ||
4 | * (C) Copyright 2008 Intel Corporation | ||
5 | * Author: Arjan van de Ven <arjan@linux.intel.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; version 2 | ||
10 | * of the License. | ||
11 | */ | ||
12 | #include <linux/module.h> | ||
13 | #include <asm/sections.h> | ||
14 | extern int rodata_test_data; | ||
15 | |||
16 | int rodata_test(void) | ||
17 | { | ||
18 | unsigned long result; | ||
19 | unsigned long start, end; | ||
20 | |||
21 | /* test 1: read the value */ | ||
22 | /* If this test fails, some previous testrun has clobbered the state */ | ||
23 | if (!rodata_test_data) { | ||
24 | printk(KERN_ERR "rodata_test: test 1 fails (start data)\n"); | ||
25 | return -ENODEV; | ||
26 | } | ||
27 | |||
28 | /* test 2: write to the variable; this should fault */ | ||
29 | /* | ||
30 | * If this test fails, we managed to overwrite the data | ||
31 | * | ||
32 | * This is written in assembly to be able to catch the | ||
33 | * exception that is supposed to happen in the correct | ||
34 | * case | ||
35 | */ | ||
36 | |||
37 | result = 1; | ||
38 | asm volatile( | ||
39 | "0: mov %[zero],(%[rodata_test])\n" | ||
40 | " mov %[zero], %[rslt]\n" | ||
41 | "1:\n" | ||
42 | ".section .fixup,\"ax\"\n" | ||
43 | "2: jmp 1b\n" | ||
44 | ".previous\n" | ||
45 | ".section __ex_table,\"a\"\n" | ||
46 | " .align 16\n" | ||
47 | #ifdef CONFIG_X86_32 | ||
48 | " .long 0b,2b\n" | ||
49 | #else | ||
50 | " .quad 0b,2b\n" | ||
51 | #endif | ||
52 | ".previous" | ||
53 | : [rslt] "=r" (result) | ||
54 | : [rodata_test] "r" (&rodata_test_data), [zero] "r" (0UL) | ||
55 | ); | ||
56 | |||
57 | |||
58 | if (!result) { | ||
59 | printk(KERN_ERR "rodata_test: test data was not read only\n"); | ||
60 | return -ENODEV; | ||
61 | } | ||
62 | |||
63 | /* test 3: check the value hasn't changed */ | ||
64 | /* If this test fails, we managed to overwrite the data */ | ||
65 | if (!rodata_test_data) { | ||
66 | printk(KERN_ERR "rodata_test: Test 3 failes (end data)\n"); | ||
67 | return -ENODEV; | ||
68 | } | ||
69 | /* test 4: check if the rodata section is 4Kb aligned */ | ||
70 | start = (unsigned long)__start_rodata; | ||
71 | end = (unsigned long)__end_rodata; | ||
72 | if (start & (PAGE_SIZE - 1)) { | ||
73 | printk(KERN_ERR "rodata_test: .rodata is not 4k aligned\n"); | ||
74 | return -ENODEV; | ||
75 | } | ||
76 | if (end & (PAGE_SIZE - 1)) { | ||
77 | printk(KERN_ERR "rodata_test: .rodata end is not 4k aligned\n"); | ||
78 | return -ENODEV; | ||
79 | } | ||
80 | |||
81 | return 0; | ||
82 | } | ||
83 | |||
84 | MODULE_LICENSE("GPL"); | ||
85 | MODULE_DESCRIPTION("Testcase for the DEBUG_RODATA infrastructure"); | ||
86 | MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); | ||
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 8a322c96bc23..1a89e93f3f1c 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c | |||
@@ -28,98 +28,20 @@ | |||
28 | * serialize accesses to xtime/lost_ticks). | 28 | * serialize accesses to xtime/lost_ticks). |
29 | */ | 29 | */ |
30 | 30 | ||
31 | #include <linux/errno.h> | 31 | #include <linux/init.h> |
32 | #include <linux/sched.h> | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/param.h> | ||
35 | #include <linux/string.h> | ||
36 | #include <linux/mm.h> | ||
37 | #include <linux/interrupt.h> | 32 | #include <linux/interrupt.h> |
38 | #include <linux/time.h> | 33 | #include <linux/time.h> |
39 | #include <linux/delay.h> | ||
40 | #include <linux/init.h> | ||
41 | #include <linux/smp.h> | ||
42 | #include <linux/module.h> | ||
43 | #include <linux/sysdev.h> | ||
44 | #include <linux/bcd.h> | ||
45 | #include <linux/efi.h> | ||
46 | #include <linux/mca.h> | 34 | #include <linux/mca.h> |
47 | 35 | ||
48 | #include <asm/io.h> | ||
49 | #include <asm/smp.h> | ||
50 | #include <asm/irq.h> | ||
51 | #include <asm/msr.h> | ||
52 | #include <asm/delay.h> | ||
53 | #include <asm/mpspec.h> | ||
54 | #include <asm/uaccess.h> | ||
55 | #include <asm/processor.h> | ||
56 | #include <asm/timer.h> | ||
57 | #include <asm/time.h> | ||
58 | |||
59 | #include "mach_time.h" | ||
60 | |||
61 | #include <linux/timex.h> | ||
62 | |||
63 | #include <asm/hpet.h> | ||
64 | |||
65 | #include <asm/arch_hooks.h> | 36 | #include <asm/arch_hooks.h> |
66 | 37 | #include <asm/hpet.h> | |
67 | #include "io_ports.h" | 38 | #include <asm/time.h> |
68 | |||
69 | #include <asm/i8259.h> | ||
70 | 39 | ||
71 | #include "do_timer.h" | 40 | #include "do_timer.h" |
72 | 41 | ||
73 | unsigned int cpu_khz; /* Detected as we calibrate the TSC */ | 42 | unsigned int cpu_khz; /* Detected as we calibrate the TSC */ |
74 | EXPORT_SYMBOL(cpu_khz); | 43 | EXPORT_SYMBOL(cpu_khz); |
75 | 44 | ||
76 | DEFINE_SPINLOCK(rtc_lock); | ||
77 | EXPORT_SYMBOL(rtc_lock); | ||
78 | |||
79 | /* | ||
80 | * This is a special lock that is owned by the CPU and holds the index | ||
81 | * register we are working with. It is required for NMI access to the | ||
82 | * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details. | ||
83 | */ | ||
84 | volatile unsigned long cmos_lock = 0; | ||
85 | EXPORT_SYMBOL(cmos_lock); | ||
86 | |||
87 | /* Routines for accessing the CMOS RAM/RTC. */ | ||
88 | unsigned char rtc_cmos_read(unsigned char addr) | ||
89 | { | ||
90 | unsigned char val; | ||
91 | lock_cmos_prefix(addr); | ||
92 | outb_p(addr, RTC_PORT(0)); | ||
93 | val = inb_p(RTC_PORT(1)); | ||
94 | lock_cmos_suffix(addr); | ||
95 | return val; | ||
96 | } | ||
97 | EXPORT_SYMBOL(rtc_cmos_read); | ||
98 | |||
99 | void rtc_cmos_write(unsigned char val, unsigned char addr) | ||
100 | { | ||
101 | lock_cmos_prefix(addr); | ||
102 | outb_p(addr, RTC_PORT(0)); | ||
103 | outb_p(val, RTC_PORT(1)); | ||
104 | lock_cmos_suffix(addr); | ||
105 | } | ||
106 | EXPORT_SYMBOL(rtc_cmos_write); | ||
107 | |||
108 | static int set_rtc_mmss(unsigned long nowtime) | ||
109 | { | ||
110 | int retval; | ||
111 | unsigned long flags; | ||
112 | |||
113 | /* gets recalled with irq locally disabled */ | ||
114 | /* XXX - does irqsave resolve this? -johnstul */ | ||
115 | spin_lock_irqsave(&rtc_lock, flags); | ||
116 | retval = set_wallclock(nowtime); | ||
117 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
118 | |||
119 | return retval; | ||
120 | } | ||
121 | |||
122 | |||
123 | int timer_ack; | 45 | int timer_ack; |
124 | 46 | ||
125 | unsigned long profile_pc(struct pt_regs *regs) | 47 | unsigned long profile_pc(struct pt_regs *regs) |
@@ -127,17 +49,17 @@ unsigned long profile_pc(struct pt_regs *regs) | |||
127 | unsigned long pc = instruction_pointer(regs); | 49 | unsigned long pc = instruction_pointer(regs); |
128 | 50 | ||
129 | #ifdef CONFIG_SMP | 51 | #ifdef CONFIG_SMP |
130 | if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->xcs) && | 52 | if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->cs) && |
131 | in_lock_functions(pc)) { | 53 | in_lock_functions(pc)) { |
132 | #ifdef CONFIG_FRAME_POINTER | 54 | #ifdef CONFIG_FRAME_POINTER |
133 | return *(unsigned long *)(regs->ebp + 4); | 55 | return *(unsigned long *)(regs->bp + 4); |
134 | #else | 56 | #else |
135 | unsigned long *sp = (unsigned long *)®s->esp; | 57 | unsigned long *sp = (unsigned long *)®s->sp; |
136 | 58 | ||
137 | /* Return address is either directly at stack pointer | 59 | /* Return address is either directly at stack pointer |
138 | or above a saved eflags. Eflags has bits 22-31 zero, | 60 | or above a saved flags. Eflags has bits 22-31 zero, |
139 | kernel addresses don't. */ | 61 | kernel addresses don't. */ |
140 | if (sp[0] >> 22) | 62 | if (sp[0] >> 22) |
141 | return sp[0]; | 63 | return sp[0]; |
142 | if (sp[1] >> 22) | 64 | if (sp[1] >> 22) |
143 | return sp[1]; | 65 | return sp[1]; |
@@ -193,26 +115,6 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) | |||
193 | return IRQ_HANDLED; | 115 | return IRQ_HANDLED; |
194 | } | 116 | } |
195 | 117 | ||
196 | /* not static: needed by APM */ | ||
197 | unsigned long read_persistent_clock(void) | ||
198 | { | ||
199 | unsigned long retval; | ||
200 | unsigned long flags; | ||
201 | |||
202 | spin_lock_irqsave(&rtc_lock, flags); | ||
203 | |||
204 | retval = get_wallclock(); | ||
205 | |||
206 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
207 | |||
208 | return retval; | ||
209 | } | ||
210 | |||
211 | int update_persistent_clock(struct timespec now) | ||
212 | { | ||
213 | return set_rtc_mmss(now.tv_sec); | ||
214 | } | ||
215 | |||
216 | extern void (*late_time_init)(void); | 118 | extern void (*late_time_init)(void); |
217 | /* Duplicate of time_init() below, with hpet_enable part added */ | 119 | /* Duplicate of time_init() below, with hpet_enable part added */ |
218 | void __init hpet_time_init(void) | 120 | void __init hpet_time_init(void) |
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index 368b1942b39a..0380795121a6 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c | |||
@@ -11,43 +11,18 @@ | |||
11 | * RTC support code taken from arch/i386/kernel/timers/time_hpet.c | 11 | * RTC support code taken from arch/i386/kernel/timers/time_hpet.c |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <linux/kernel.h> | 14 | #include <linux/clockchips.h> |
15 | #include <linux/sched.h> | ||
16 | #include <linux/interrupt.h> | ||
17 | #include <linux/init.h> | 15 | #include <linux/init.h> |
18 | #include <linux/mc146818rtc.h> | 16 | #include <linux/interrupt.h> |
19 | #include <linux/time.h> | ||
20 | #include <linux/ioport.h> | ||
21 | #include <linux/module.h> | 17 | #include <linux/module.h> |
22 | #include <linux/device.h> | 18 | #include <linux/time.h> |
23 | #include <linux/sysdev.h> | ||
24 | #include <linux/bcd.h> | ||
25 | #include <linux/notifier.h> | ||
26 | #include <linux/cpu.h> | ||
27 | #include <linux/kallsyms.h> | ||
28 | #include <linux/acpi.h> | ||
29 | #include <linux/clockchips.h> | ||
30 | 19 | ||
31 | #ifdef CONFIG_ACPI | ||
32 | #include <acpi/achware.h> /* for PM timer frequency */ | ||
33 | #include <acpi/acpi_bus.h> | ||
34 | #endif | ||
35 | #include <asm/i8253.h> | 20 | #include <asm/i8253.h> |
36 | #include <asm/pgtable.h> | ||
37 | #include <asm/vsyscall.h> | ||
38 | #include <asm/timex.h> | ||
39 | #include <asm/proto.h> | ||
40 | #include <asm/hpet.h> | ||
41 | #include <asm/sections.h> | ||
42 | #include <linux/hpet.h> | ||
43 | #include <asm/apic.h> | ||
44 | #include <asm/hpet.h> | 21 | #include <asm/hpet.h> |
45 | #include <asm/mpspec.h> | ||
46 | #include <asm/nmi.h> | 22 | #include <asm/nmi.h> |
47 | #include <asm/vgtod.h> | 23 | #include <asm/vgtod.h> |
48 | 24 | #include <asm/time.h> | |
49 | DEFINE_SPINLOCK(rtc_lock); | 25 | #include <asm/timer.h> |
50 | EXPORT_SYMBOL(rtc_lock); | ||
51 | 26 | ||
52 | volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; | 27 | volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; |
53 | 28 | ||
@@ -56,10 +31,10 @@ unsigned long profile_pc(struct pt_regs *regs) | |||
56 | unsigned long pc = instruction_pointer(regs); | 31 | unsigned long pc = instruction_pointer(regs); |
57 | 32 | ||
58 | /* Assume the lock function has either no stack frame or a copy | 33 | /* Assume the lock function has either no stack frame or a copy |
59 | of eflags from PUSHF | 34 | of flags from PUSHF |
60 | Eflags always has bits 22 and up cleared unlike kernel addresses. */ | 35 | Eflags always has bits 22 and up cleared unlike kernel addresses. */ |
61 | if (!user_mode(regs) && in_lock_functions(pc)) { | 36 | if (!user_mode(regs) && in_lock_functions(pc)) { |
62 | unsigned long *sp = (unsigned long *)regs->rsp; | 37 | unsigned long *sp = (unsigned long *)regs->sp; |
63 | if (sp[0] >> 22) | 38 | if (sp[0] >> 22) |
64 | return sp[0]; | 39 | return sp[0]; |
65 | if (sp[1] >> 22) | 40 | if (sp[1] >> 22) |
@@ -69,82 +44,6 @@ unsigned long profile_pc(struct pt_regs *regs) | |||
69 | } | 44 | } |
70 | EXPORT_SYMBOL(profile_pc); | 45 | EXPORT_SYMBOL(profile_pc); |
71 | 46 | ||
72 | /* | ||
73 | * In order to set the CMOS clock precisely, set_rtc_mmss has to be called 500 | ||
74 | * ms after the second nowtime has started, because when nowtime is written | ||
75 | * into the registers of the CMOS clock, it will jump to the next second | ||
76 | * precisely 500 ms later. Check the Motorola MC146818A or Dallas DS12887 data | ||
77 | * sheet for details. | ||
78 | */ | ||
79 | |||
80 | static int set_rtc_mmss(unsigned long nowtime) | ||
81 | { | ||
82 | int retval = 0; | ||
83 | int real_seconds, real_minutes, cmos_minutes; | ||
84 | unsigned char control, freq_select; | ||
85 | unsigned long flags; | ||
86 | |||
87 | /* | ||
88 | * set_rtc_mmss is called when irqs are enabled, so disable irqs here | ||
89 | */ | ||
90 | spin_lock_irqsave(&rtc_lock, flags); | ||
91 | /* | ||
92 | * Tell the clock it's being set and stop it. | ||
93 | */ | ||
94 | control = CMOS_READ(RTC_CONTROL); | ||
95 | CMOS_WRITE(control | RTC_SET, RTC_CONTROL); | ||
96 | |||
97 | freq_select = CMOS_READ(RTC_FREQ_SELECT); | ||
98 | CMOS_WRITE(freq_select | RTC_DIV_RESET2, RTC_FREQ_SELECT); | ||
99 | |||
100 | cmos_minutes = CMOS_READ(RTC_MINUTES); | ||
101 | BCD_TO_BIN(cmos_minutes); | ||
102 | |||
103 | /* | ||
104 | * since we're only adjusting minutes and seconds, don't interfere with hour | ||
105 | * overflow. This avoids messing with unknown time zones but requires your RTC | ||
106 | * not to be off by more than 15 minutes. Since we're calling it only when | ||
107 | * our clock is externally synchronized using NTP, this shouldn't be a problem. | ||
108 | */ | ||
109 | |||
110 | real_seconds = nowtime % 60; | ||
111 | real_minutes = nowtime / 60; | ||
112 | if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1) | ||
113 | real_minutes += 30; /* correct for half hour time zone */ | ||
114 | real_minutes %= 60; | ||
115 | |||
116 | if (abs(real_minutes - cmos_minutes) >= 30) { | ||
117 | printk(KERN_WARNING "time.c: can't update CMOS clock " | ||
118 | "from %d to %d\n", cmos_minutes, real_minutes); | ||
119 | retval = -1; | ||
120 | } else { | ||
121 | BIN_TO_BCD(real_seconds); | ||
122 | BIN_TO_BCD(real_minutes); | ||
123 | CMOS_WRITE(real_seconds, RTC_SECONDS); | ||
124 | CMOS_WRITE(real_minutes, RTC_MINUTES); | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * The following flags have to be released exactly in this order, otherwise the | ||
129 | * DS12887 (popular MC146818A clone with integrated battery and quartz) will | ||
130 | * not reset the oscillator and will not update precisely 500 ms later. You | ||
131 | * won't find this mentioned in the Dallas Semiconductor data sheets, but who | ||
132 | * believes data sheets anyway ... -- Markus Kuhn | ||
133 | */ | ||
134 | |||
135 | CMOS_WRITE(control, RTC_CONTROL); | ||
136 | CMOS_WRITE(freq_select, RTC_FREQ_SELECT); | ||
137 | |||
138 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
139 | |||
140 | return retval; | ||
141 | } | ||
142 | |||
143 | int update_persistent_clock(struct timespec now) | ||
144 | { | ||
145 | return set_rtc_mmss(now.tv_sec); | ||
146 | } | ||
147 | |||
148 | static irqreturn_t timer_event_interrupt(int irq, void *dev_id) | 47 | static irqreturn_t timer_event_interrupt(int irq, void *dev_id) |
149 | { | 48 | { |
150 | add_pda(irq0_irqs, 1); | 49 | add_pda(irq0_irqs, 1); |
@@ -154,67 +53,10 @@ static irqreturn_t timer_event_interrupt(int irq, void *dev_id) | |||
154 | return IRQ_HANDLED; | 53 | return IRQ_HANDLED; |
155 | } | 54 | } |
156 | 55 | ||
157 | unsigned long read_persistent_clock(void) | ||
158 | { | ||
159 | unsigned int year, mon, day, hour, min, sec; | ||
160 | unsigned long flags; | ||
161 | unsigned century = 0; | ||
162 | |||
163 | spin_lock_irqsave(&rtc_lock, flags); | ||
164 | /* | ||
165 | * if UIP is clear, then we have >= 244 microseconds before RTC | ||
166 | * registers will be updated. Spec sheet says that this is the | ||
167 | * reliable way to read RTC - registers invalid (off bus) during update | ||
168 | */ | ||
169 | while ((CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)) | ||
170 | cpu_relax(); | ||
171 | |||
172 | |||
173 | /* now read all RTC registers while stable with interrupts disabled */ | ||
174 | sec = CMOS_READ(RTC_SECONDS); | ||
175 | min = CMOS_READ(RTC_MINUTES); | ||
176 | hour = CMOS_READ(RTC_HOURS); | ||
177 | day = CMOS_READ(RTC_DAY_OF_MONTH); | ||
178 | mon = CMOS_READ(RTC_MONTH); | ||
179 | year = CMOS_READ(RTC_YEAR); | ||
180 | #ifdef CONFIG_ACPI | ||
181 | if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && | ||
182 | acpi_gbl_FADT.century) | ||
183 | century = CMOS_READ(acpi_gbl_FADT.century); | ||
184 | #endif | ||
185 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
186 | |||
187 | /* | ||
188 | * We know that x86-64 always uses BCD format, no need to check the | ||
189 | * config register. | ||
190 | */ | ||
191 | |||
192 | BCD_TO_BIN(sec); | ||
193 | BCD_TO_BIN(min); | ||
194 | BCD_TO_BIN(hour); | ||
195 | BCD_TO_BIN(day); | ||
196 | BCD_TO_BIN(mon); | ||
197 | BCD_TO_BIN(year); | ||
198 | |||
199 | if (century) { | ||
200 | BCD_TO_BIN(century); | ||
201 | year += century * 100; | ||
202 | printk(KERN_INFO "Extended CMOS year: %d\n", century * 100); | ||
203 | } else { | ||
204 | /* | ||
205 | * x86-64 systems only exists since 2002. | ||
206 | * This will work up to Dec 31, 2100 | ||
207 | */ | ||
208 | year += 2000; | ||
209 | } | ||
210 | |||
211 | return mktime(year, mon, day, hour, min, sec); | ||
212 | } | ||
213 | |||
214 | /* calibrate_cpu is used on systems with fixed rate TSCs to determine | 56 | /* calibrate_cpu is used on systems with fixed rate TSCs to determine |
215 | * processor frequency */ | 57 | * processor frequency */ |
216 | #define TICK_COUNT 100000000 | 58 | #define TICK_COUNT 100000000 |
217 | static unsigned int __init tsc_calibrate_cpu_khz(void) | 59 | unsigned long __init native_calculate_cpu_khz(void) |
218 | { | 60 | { |
219 | int tsc_start, tsc_now; | 61 | int tsc_start, tsc_now; |
220 | int i, no_ctr_free; | 62 | int i, no_ctr_free; |
@@ -241,7 +83,7 @@ static unsigned int __init tsc_calibrate_cpu_khz(void) | |||
241 | rdtscl(tsc_start); | 83 | rdtscl(tsc_start); |
242 | do { | 84 | do { |
243 | rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now); | 85 | rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now); |
244 | tsc_now = get_cycles_sync(); | 86 | tsc_now = get_cycles(); |
245 | } while ((tsc_now - tsc_start) < TICK_COUNT); | 87 | } while ((tsc_now - tsc_start) < TICK_COUNT); |
246 | 88 | ||
247 | local_irq_restore(flags); | 89 | local_irq_restore(flags); |
@@ -264,20 +106,22 @@ static struct irqaction irq0 = { | |||
264 | .name = "timer" | 106 | .name = "timer" |
265 | }; | 107 | }; |
266 | 108 | ||
267 | void __init time_init(void) | 109 | void __init hpet_time_init(void) |
268 | { | 110 | { |
269 | if (!hpet_enable()) | 111 | if (!hpet_enable()) |
270 | setup_pit_timer(); | 112 | setup_pit_timer(); |
271 | 113 | ||
272 | setup_irq(0, &irq0); | 114 | setup_irq(0, &irq0); |
115 | } | ||
273 | 116 | ||
117 | void __init time_init(void) | ||
118 | { | ||
274 | tsc_calibrate(); | 119 | tsc_calibrate(); |
275 | 120 | ||
276 | cpu_khz = tsc_khz; | 121 | cpu_khz = tsc_khz; |
277 | if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && | 122 | if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && |
278 | boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | 123 | (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) |
279 | boot_cpu_data.x86 == 16) | 124 | cpu_khz = calculate_cpu_khz(); |
280 | cpu_khz = tsc_calibrate_cpu_khz(); | ||
281 | 125 | ||
282 | if (unsynchronized_tsc()) | 126 | if (unsynchronized_tsc()) |
283 | mark_tsc_unstable("TSCs unsynchronized"); | 127 | mark_tsc_unstable("TSCs unsynchronized"); |
@@ -290,4 +134,5 @@ void __init time_init(void) | |||
290 | printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", | 134 | printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", |
291 | cpu_khz / 1000, cpu_khz % 1000); | 135 | cpu_khz / 1000, cpu_khz % 1000); |
292 | init_tsc_clocksource(); | 136 | init_tsc_clocksource(); |
137 | late_time_init = choose_time_init(); | ||
293 | } | 138 | } |
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c new file mode 100644 index 000000000000..6dfd4e76661a --- /dev/null +++ b/arch/x86/kernel/tls.c | |||
@@ -0,0 +1,213 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <linux/sched.h> | ||
4 | #include <linux/user.h> | ||
5 | #include <linux/regset.h> | ||
6 | |||
7 | #include <asm/uaccess.h> | ||
8 | #include <asm/desc.h> | ||
9 | #include <asm/system.h> | ||
10 | #include <asm/ldt.h> | ||
11 | #include <asm/processor.h> | ||
12 | #include <asm/proto.h> | ||
13 | |||
14 | #include "tls.h" | ||
15 | |||
16 | /* | ||
17 | * sys_alloc_thread_area: get a yet unused TLS descriptor index. | ||
18 | */ | ||
19 | static int get_free_idx(void) | ||
20 | { | ||
21 | struct thread_struct *t = ¤t->thread; | ||
22 | int idx; | ||
23 | |||
24 | for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++) | ||
25 | if (desc_empty(&t->tls_array[idx])) | ||
26 | return idx + GDT_ENTRY_TLS_MIN; | ||
27 | return -ESRCH; | ||
28 | } | ||
29 | |||
30 | static void set_tls_desc(struct task_struct *p, int idx, | ||
31 | const struct user_desc *info, int n) | ||
32 | { | ||
33 | struct thread_struct *t = &p->thread; | ||
34 | struct desc_struct *desc = &t->tls_array[idx - GDT_ENTRY_TLS_MIN]; | ||
35 | int cpu; | ||
36 | |||
37 | /* | ||
38 | * We must not get preempted while modifying the TLS. | ||
39 | */ | ||
40 | cpu = get_cpu(); | ||
41 | |||
42 | while (n-- > 0) { | ||
43 | if (LDT_empty(info)) | ||
44 | desc->a = desc->b = 0; | ||
45 | else | ||
46 | fill_ldt(desc, info); | ||
47 | ++info; | ||
48 | ++desc; | ||
49 | } | ||
50 | |||
51 | if (t == ¤t->thread) | ||
52 | load_TLS(t, cpu); | ||
53 | |||
54 | put_cpu(); | ||
55 | } | ||
56 | |||
57 | /* | ||
58 | * Set a given TLS descriptor: | ||
59 | */ | ||
60 | int do_set_thread_area(struct task_struct *p, int idx, | ||
61 | struct user_desc __user *u_info, | ||
62 | int can_allocate) | ||
63 | { | ||
64 | struct user_desc info; | ||
65 | |||
66 | if (copy_from_user(&info, u_info, sizeof(info))) | ||
67 | return -EFAULT; | ||
68 | |||
69 | if (idx == -1) | ||
70 | idx = info.entry_number; | ||
71 | |||
72 | /* | ||
73 | * index -1 means the kernel should try to find and | ||
74 | * allocate an empty descriptor: | ||
75 | */ | ||
76 | if (idx == -1 && can_allocate) { | ||
77 | idx = get_free_idx(); | ||
78 | if (idx < 0) | ||
79 | return idx; | ||
80 | if (put_user(idx, &u_info->entry_number)) | ||
81 | return -EFAULT; | ||
82 | } | ||
83 | |||
84 | if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) | ||
85 | return -EINVAL; | ||
86 | |||
87 | set_tls_desc(p, idx, &info, 1); | ||
88 | |||
89 | return 0; | ||
90 | } | ||
91 | |||
92 | asmlinkage int sys_set_thread_area(struct user_desc __user *u_info) | ||
93 | { | ||
94 | return do_set_thread_area(current, -1, u_info, 1); | ||
95 | } | ||
96 | |||
97 | |||
98 | /* | ||
99 | * Get the current Thread-Local Storage area: | ||
100 | */ | ||
101 | |||
102 | static void fill_user_desc(struct user_desc *info, int idx, | ||
103 | const struct desc_struct *desc) | ||
104 | |||
105 | { | ||
106 | memset(info, 0, sizeof(*info)); | ||
107 | info->entry_number = idx; | ||
108 | info->base_addr = get_desc_base(desc); | ||
109 | info->limit = get_desc_limit(desc); | ||
110 | info->seg_32bit = desc->d; | ||
111 | info->contents = desc->type >> 2; | ||
112 | info->read_exec_only = !(desc->type & 2); | ||
113 | info->limit_in_pages = desc->g; | ||
114 | info->seg_not_present = !desc->p; | ||
115 | info->useable = desc->avl; | ||
116 | #ifdef CONFIG_X86_64 | ||
117 | info->lm = desc->l; | ||
118 | #endif | ||
119 | } | ||
120 | |||
121 | int do_get_thread_area(struct task_struct *p, int idx, | ||
122 | struct user_desc __user *u_info) | ||
123 | { | ||
124 | struct user_desc info; | ||
125 | |||
126 | if (idx == -1 && get_user(idx, &u_info->entry_number)) | ||
127 | return -EFAULT; | ||
128 | |||
129 | if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) | ||
130 | return -EINVAL; | ||
131 | |||
132 | fill_user_desc(&info, idx, | ||
133 | &p->thread.tls_array[idx - GDT_ENTRY_TLS_MIN]); | ||
134 | |||
135 | if (copy_to_user(u_info, &info, sizeof(info))) | ||
136 | return -EFAULT; | ||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | asmlinkage int sys_get_thread_area(struct user_desc __user *u_info) | ||
141 | { | ||
142 | return do_get_thread_area(current, -1, u_info); | ||
143 | } | ||
144 | |||
145 | int regset_tls_active(struct task_struct *target, | ||
146 | const struct user_regset *regset) | ||
147 | { | ||
148 | struct thread_struct *t = &target->thread; | ||
149 | int n = GDT_ENTRY_TLS_ENTRIES; | ||
150 | while (n > 0 && desc_empty(&t->tls_array[n - 1])) | ||
151 | --n; | ||
152 | return n; | ||
153 | } | ||
154 | |||
155 | int regset_tls_get(struct task_struct *target, const struct user_regset *regset, | ||
156 | unsigned int pos, unsigned int count, | ||
157 | void *kbuf, void __user *ubuf) | ||
158 | { | ||
159 | const struct desc_struct *tls; | ||
160 | |||
161 | if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || | ||
162 | (pos % sizeof(struct user_desc)) != 0 || | ||
163 | (count % sizeof(struct user_desc)) != 0) | ||
164 | return -EINVAL; | ||
165 | |||
166 | pos /= sizeof(struct user_desc); | ||
167 | count /= sizeof(struct user_desc); | ||
168 | |||
169 | tls = &target->thread.tls_array[pos]; | ||
170 | |||
171 | if (kbuf) { | ||
172 | struct user_desc *info = kbuf; | ||
173 | while (count-- > 0) | ||
174 | fill_user_desc(info++, GDT_ENTRY_TLS_MIN + pos++, | ||
175 | tls++); | ||
176 | } else { | ||
177 | struct user_desc __user *u_info = ubuf; | ||
178 | while (count-- > 0) { | ||
179 | struct user_desc info; | ||
180 | fill_user_desc(&info, GDT_ENTRY_TLS_MIN + pos++, tls++); | ||
181 | if (__copy_to_user(u_info++, &info, sizeof(info))) | ||
182 | return -EFAULT; | ||
183 | } | ||
184 | } | ||
185 | |||
186 | return 0; | ||
187 | } | ||
188 | |||
189 | int regset_tls_set(struct task_struct *target, const struct user_regset *regset, | ||
190 | unsigned int pos, unsigned int count, | ||
191 | const void *kbuf, const void __user *ubuf) | ||
192 | { | ||
193 | struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES]; | ||
194 | const struct user_desc *info; | ||
195 | |||
196 | if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || | ||
197 | (pos % sizeof(struct user_desc)) != 0 || | ||
198 | (count % sizeof(struct user_desc)) != 0) | ||
199 | return -EINVAL; | ||
200 | |||
201 | if (kbuf) | ||
202 | info = kbuf; | ||
203 | else if (__copy_from_user(infobuf, ubuf, count)) | ||
204 | return -EFAULT; | ||
205 | else | ||
206 | info = infobuf; | ||
207 | |||
208 | set_tls_desc(target, | ||
209 | GDT_ENTRY_TLS_MIN + (pos / sizeof(struct user_desc)), | ||
210 | info, count / sizeof(struct user_desc)); | ||
211 | |||
212 | return 0; | ||
213 | } | ||
diff --git a/arch/x86/kernel/tls.h b/arch/x86/kernel/tls.h new file mode 100644 index 000000000000..2f083a2fe216 --- /dev/null +++ b/arch/x86/kernel/tls.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * Internal declarations for x86 TLS implementation functions. | ||
3 | * | ||
4 | * Copyright (C) 2007 Red Hat, Inc. All rights reserved. | ||
5 | * | ||
6 | * This copyrighted material is made available to anyone wishing to use, | ||
7 | * modify, copy, or redistribute it subject to the terms and conditions | ||
8 | * of the GNU General Public License v.2. | ||
9 | * | ||
10 | * Red Hat Author: Roland McGrath. | ||
11 | */ | ||
12 | |||
13 | #ifndef _ARCH_X86_KERNEL_TLS_H | ||
14 | |||
15 | #include <linux/regset.h> | ||
16 | |||
17 | extern user_regset_active_fn regset_tls_active; | ||
18 | extern user_regset_get_fn regset_tls_get; | ||
19 | extern user_regset_set_fn regset_tls_set; | ||
20 | |||
21 | #endif /* _ARCH_X86_KERNEL_TLS_H */ | ||
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index 7e16d675eb85..e6757aaa202b 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c | |||
@@ -31,9 +31,10 @@ | |||
31 | #include <linux/mmzone.h> | 31 | #include <linux/mmzone.h> |
32 | #include <asm/cpu.h> | 32 | #include <asm/cpu.h> |
33 | 33 | ||
34 | static struct i386_cpu cpu_devices[NR_CPUS]; | 34 | static DEFINE_PER_CPU(struct x86_cpu, cpu_devices); |
35 | 35 | ||
36 | int __cpuinit arch_register_cpu(int num) | 36 | #ifdef CONFIG_HOTPLUG_CPU |
37 | int arch_register_cpu(int num) | ||
37 | { | 38 | { |
38 | /* | 39 | /* |
39 | * CPU0 cannot be offlined due to several | 40 | * CPU0 cannot be offlined due to several |
@@ -44,21 +45,22 @@ int __cpuinit arch_register_cpu(int num) | |||
44 | * Also certain PCI quirks require not to enable hotplug control | 45 | * Also certain PCI quirks require not to enable hotplug control |
45 | * for all CPU's. | 46 | * for all CPU's. |
46 | */ | 47 | */ |
47 | #ifdef CONFIG_HOTPLUG_CPU | ||
48 | if (num) | 48 | if (num) |
49 | cpu_devices[num].cpu.hotpluggable = 1; | 49 | per_cpu(cpu_devices, num).cpu.hotpluggable = 1; |
50 | #endif | 50 | return register_cpu(&per_cpu(cpu_devices, num).cpu, num); |
51 | |||
52 | return register_cpu(&cpu_devices[num].cpu, num); | ||
53 | } | 51 | } |
52 | EXPORT_SYMBOL(arch_register_cpu); | ||
54 | 53 | ||
55 | #ifdef CONFIG_HOTPLUG_CPU | ||
56 | void arch_unregister_cpu(int num) | 54 | void arch_unregister_cpu(int num) |
57 | { | 55 | { |
58 | return unregister_cpu(&cpu_devices[num].cpu); | 56 | return unregister_cpu(&per_cpu(cpu_devices, num).cpu); |
59 | } | 57 | } |
60 | EXPORT_SYMBOL(arch_register_cpu); | ||
61 | EXPORT_SYMBOL(arch_unregister_cpu); | 58 | EXPORT_SYMBOL(arch_unregister_cpu); |
59 | #else | ||
60 | static int __init arch_register_cpu(int num) | ||
61 | { | ||
62 | return register_cpu(&per_cpu(cpu_devices, num).cpu, num); | ||
63 | } | ||
62 | #endif /*CONFIG_HOTPLUG_CPU*/ | 64 | #endif /*CONFIG_HOTPLUG_CPU*/ |
63 | 65 | ||
64 | static int __init topology_init(void) | 66 | static int __init topology_init(void) |
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S index 9bcc1c6aca3d..64580679861e 100644 --- a/arch/x86/kernel/trampoline_32.S +++ b/arch/x86/kernel/trampoline_32.S | |||
@@ -11,12 +11,7 @@ | |||
11 | * trampoline page to make our stack and everything else | 11 | * trampoline page to make our stack and everything else |
12 | * is a mystery. | 12 | * is a mystery. |
13 | * | 13 | * |
14 | * In fact we don't actually need a stack so we don't | 14 | * We jump into arch/x86/kernel/head_32.S. |
15 | * set one up. | ||
16 | * | ||
17 | * We jump into the boot/compressed/head.S code. So you'd | ||
18 | * better be running a compressed kernel image or you | ||
19 | * won't get very far. | ||
20 | * | 15 | * |
21 | * On entry to trampoline_data, the processor is in real mode | 16 | * On entry to trampoline_data, the processor is in real mode |
22 | * with 16-bit addressing and 16-bit data. CS has some value | 17 | * with 16-bit addressing and 16-bit data. CS has some value |
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index e30b67c6a9f5..4aedd0bcee4c 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S | |||
@@ -10,9 +10,6 @@ | |||
10 | * trampoline page to make our stack and everything else | 10 | * trampoline page to make our stack and everything else |
11 | * is a mystery. | 11 | * is a mystery. |
12 | * | 12 | * |
13 | * In fact we don't actually need a stack so we don't | ||
14 | * set one up. | ||
15 | * | ||
16 | * On entry to trampoline_data, the processor is in real mode | 13 | * On entry to trampoline_data, the processor is in real mode |
17 | * with 16-bit addressing and 16-bit data. CS has some value | 14 | * with 16-bit addressing and 16-bit data. CS has some value |
18 | * and IP is zero. Thus, data addresses need to be absolute | 15 | * and IP is zero. Thus, data addresses need to be absolute |
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index c88bbffcaa03..b22c01e05a18 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c | |||
@@ -76,7 +76,8 @@ char ignore_fpu_irq = 0; | |||
76 | * F0 0F bug workaround.. We have a special link segment | 76 | * F0 0F bug workaround.. We have a special link segment |
77 | * for this. | 77 | * for this. |
78 | */ | 78 | */ |
79 | struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, }; | 79 | gate_desc idt_table[256] |
80 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; | ||
80 | 81 | ||
81 | asmlinkage void divide_error(void); | 82 | asmlinkage void divide_error(void); |
82 | asmlinkage void debug(void); | 83 | asmlinkage void debug(void); |
@@ -101,6 +102,34 @@ asmlinkage void machine_check(void); | |||
101 | int kstack_depth_to_print = 24; | 102 | int kstack_depth_to_print = 24; |
102 | static unsigned int code_bytes = 64; | 103 | static unsigned int code_bytes = 64; |
103 | 104 | ||
105 | void printk_address(unsigned long address, int reliable) | ||
106 | { | ||
107 | #ifdef CONFIG_KALLSYMS | ||
108 | unsigned long offset = 0, symsize; | ||
109 | const char *symname; | ||
110 | char *modname; | ||
111 | char *delim = ":"; | ||
112 | char namebuf[128]; | ||
113 | char reliab[4] = ""; | ||
114 | |||
115 | symname = kallsyms_lookup(address, &symsize, &offset, | ||
116 | &modname, namebuf); | ||
117 | if (!symname) { | ||
118 | printk(" [<%08lx>]\n", address); | ||
119 | return; | ||
120 | } | ||
121 | if (!reliable) | ||
122 | strcpy(reliab, "? "); | ||
123 | |||
124 | if (!modname) | ||
125 | modname = delim = ""; | ||
126 | printk(" [<%08lx>] %s%s%s%s%s+0x%lx/0x%lx\n", | ||
127 | address, reliab, delim, modname, delim, symname, offset, symsize); | ||
128 | #else | ||
129 | printk(" [<%08lx>]\n", address); | ||
130 | #endif | ||
131 | } | ||
132 | |||
104 | static inline int valid_stack_ptr(struct thread_info *tinfo, void *p, unsigned size) | 133 | static inline int valid_stack_ptr(struct thread_info *tinfo, void *p, unsigned size) |
105 | { | 134 | { |
106 | return p > (void *)tinfo && | 135 | return p > (void *)tinfo && |
@@ -114,48 +143,35 @@ struct stack_frame { | |||
114 | }; | 143 | }; |
115 | 144 | ||
116 | static inline unsigned long print_context_stack(struct thread_info *tinfo, | 145 | static inline unsigned long print_context_stack(struct thread_info *tinfo, |
117 | unsigned long *stack, unsigned long ebp, | 146 | unsigned long *stack, unsigned long bp, |
118 | const struct stacktrace_ops *ops, void *data) | 147 | const struct stacktrace_ops *ops, void *data) |
119 | { | 148 | { |
120 | #ifdef CONFIG_FRAME_POINTER | 149 | struct stack_frame *frame = (struct stack_frame *)bp; |
121 | struct stack_frame *frame = (struct stack_frame *)ebp; | ||
122 | while (valid_stack_ptr(tinfo, frame, sizeof(*frame))) { | ||
123 | struct stack_frame *next; | ||
124 | unsigned long addr; | ||
125 | 150 | ||
126 | addr = frame->return_address; | ||
127 | ops->address(data, addr); | ||
128 | /* | ||
129 | * break out of recursive entries (such as | ||
130 | * end_of_stack_stop_unwind_function). Also, | ||
131 | * we can never allow a frame pointer to | ||
132 | * move downwards! | ||
133 | */ | ||
134 | next = frame->next_frame; | ||
135 | if (next <= frame) | ||
136 | break; | ||
137 | frame = next; | ||
138 | } | ||
139 | #else | ||
140 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack))) { | 151 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack))) { |
141 | unsigned long addr; | 152 | unsigned long addr; |
142 | 153 | ||
143 | addr = *stack++; | 154 | addr = *stack; |
144 | if (__kernel_text_address(addr)) | 155 | if (__kernel_text_address(addr)) { |
145 | ops->address(data, addr); | 156 | if ((unsigned long) stack == bp + 4) { |
157 | ops->address(data, addr, 1); | ||
158 | frame = frame->next_frame; | ||
159 | bp = (unsigned long) frame; | ||
160 | } else { | ||
161 | ops->address(data, addr, bp == 0); | ||
162 | } | ||
163 | } | ||
164 | stack++; | ||
146 | } | 165 | } |
147 | #endif | 166 | return bp; |
148 | return ebp; | ||
149 | } | 167 | } |
150 | 168 | ||
151 | #define MSG(msg) ops->warning(data, msg) | 169 | #define MSG(msg) ops->warning(data, msg) |
152 | 170 | ||
153 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 171 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
154 | unsigned long *stack, | 172 | unsigned long *stack, unsigned long bp, |
155 | const struct stacktrace_ops *ops, void *data) | 173 | const struct stacktrace_ops *ops, void *data) |
156 | { | 174 | { |
157 | unsigned long ebp = 0; | ||
158 | |||
159 | if (!task) | 175 | if (!task) |
160 | task = current; | 176 | task = current; |
161 | 177 | ||
@@ -163,17 +179,17 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
163 | unsigned long dummy; | 179 | unsigned long dummy; |
164 | stack = &dummy; | 180 | stack = &dummy; |
165 | if (task != current) | 181 | if (task != current) |
166 | stack = (unsigned long *)task->thread.esp; | 182 | stack = (unsigned long *)task->thread.sp; |
167 | } | 183 | } |
168 | 184 | ||
169 | #ifdef CONFIG_FRAME_POINTER | 185 | #ifdef CONFIG_FRAME_POINTER |
170 | if (!ebp) { | 186 | if (!bp) { |
171 | if (task == current) { | 187 | if (task == current) { |
172 | /* Grab ebp right from our regs */ | 188 | /* Grab bp right from our regs */ |
173 | asm ("movl %%ebp, %0" : "=r" (ebp) : ); | 189 | asm ("movl %%ebp, %0" : "=r" (bp) : ); |
174 | } else { | 190 | } else { |
175 | /* ebp is the last reg pushed by switch_to */ | 191 | /* bp is the last reg pushed by switch_to */ |
176 | ebp = *(unsigned long *) task->thread.esp; | 192 | bp = *(unsigned long *) task->thread.sp; |
177 | } | 193 | } |
178 | } | 194 | } |
179 | #endif | 195 | #endif |
@@ -182,7 +198,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
182 | struct thread_info *context; | 198 | struct thread_info *context; |
183 | context = (struct thread_info *) | 199 | context = (struct thread_info *) |
184 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); | 200 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); |
185 | ebp = print_context_stack(context, stack, ebp, ops, data); | 201 | bp = print_context_stack(context, stack, bp, ops, data); |
186 | /* Should be after the line below, but somewhere | 202 | /* Should be after the line below, but somewhere |
187 | in early boot context comes out corrupted and we | 203 | in early boot context comes out corrupted and we |
188 | can't reference it -AK */ | 204 | can't reference it -AK */ |
@@ -217,9 +233,11 @@ static int print_trace_stack(void *data, char *name) | |||
217 | /* | 233 | /* |
218 | * Print one address/symbol entries per line. | 234 | * Print one address/symbol entries per line. |
219 | */ | 235 | */ |
220 | static void print_trace_address(void *data, unsigned long addr) | 236 | static void print_trace_address(void *data, unsigned long addr, int reliable) |
221 | { | 237 | { |
222 | printk("%s [<%08lx>] ", (char *)data, addr); | 238 | printk("%s [<%08lx>] ", (char *)data, addr); |
239 | if (!reliable) | ||
240 | printk("? "); | ||
223 | print_symbol("%s\n", addr); | 241 | print_symbol("%s\n", addr); |
224 | touch_nmi_watchdog(); | 242 | touch_nmi_watchdog(); |
225 | } | 243 | } |
@@ -233,32 +251,32 @@ static const struct stacktrace_ops print_trace_ops = { | |||
233 | 251 | ||
234 | static void | 252 | static void |
235 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 253 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
236 | unsigned long * stack, char *log_lvl) | 254 | unsigned long *stack, unsigned long bp, char *log_lvl) |
237 | { | 255 | { |
238 | dump_trace(task, regs, stack, &print_trace_ops, log_lvl); | 256 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); |
239 | printk("%s =======================\n", log_lvl); | 257 | printk("%s =======================\n", log_lvl); |
240 | } | 258 | } |
241 | 259 | ||
242 | void show_trace(struct task_struct *task, struct pt_regs *regs, | 260 | void show_trace(struct task_struct *task, struct pt_regs *regs, |
243 | unsigned long * stack) | 261 | unsigned long *stack, unsigned long bp) |
244 | { | 262 | { |
245 | show_trace_log_lvl(task, regs, stack, ""); | 263 | show_trace_log_lvl(task, regs, stack, bp, ""); |
246 | } | 264 | } |
247 | 265 | ||
248 | static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 266 | static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
249 | unsigned long *esp, char *log_lvl) | 267 | unsigned long *sp, unsigned long bp, char *log_lvl) |
250 | { | 268 | { |
251 | unsigned long *stack; | 269 | unsigned long *stack; |
252 | int i; | 270 | int i; |
253 | 271 | ||
254 | if (esp == NULL) { | 272 | if (sp == NULL) { |
255 | if (task) | 273 | if (task) |
256 | esp = (unsigned long*)task->thread.esp; | 274 | sp = (unsigned long*)task->thread.sp; |
257 | else | 275 | else |
258 | esp = (unsigned long *)&esp; | 276 | sp = (unsigned long *)&sp; |
259 | } | 277 | } |
260 | 278 | ||
261 | stack = esp; | 279 | stack = sp; |
262 | for(i = 0; i < kstack_depth_to_print; i++) { | 280 | for(i = 0; i < kstack_depth_to_print; i++) { |
263 | if (kstack_end(stack)) | 281 | if (kstack_end(stack)) |
264 | break; | 282 | break; |
@@ -267,13 +285,13 @@ static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
267 | printk("%08lx ", *stack++); | 285 | printk("%08lx ", *stack++); |
268 | } | 286 | } |
269 | printk("\n%sCall Trace:\n", log_lvl); | 287 | printk("\n%sCall Trace:\n", log_lvl); |
270 | show_trace_log_lvl(task, regs, esp, log_lvl); | 288 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
271 | } | 289 | } |
272 | 290 | ||
273 | void show_stack(struct task_struct *task, unsigned long *esp) | 291 | void show_stack(struct task_struct *task, unsigned long *sp) |
274 | { | 292 | { |
275 | printk(" "); | 293 | printk(" "); |
276 | show_stack_log_lvl(task, NULL, esp, ""); | 294 | show_stack_log_lvl(task, NULL, sp, 0, ""); |
277 | } | 295 | } |
278 | 296 | ||
279 | /* | 297 | /* |
@@ -282,13 +300,19 @@ void show_stack(struct task_struct *task, unsigned long *esp) | |||
282 | void dump_stack(void) | 300 | void dump_stack(void) |
283 | { | 301 | { |
284 | unsigned long stack; | 302 | unsigned long stack; |
303 | unsigned long bp = 0; | ||
304 | |||
305 | #ifdef CONFIG_FRAME_POINTER | ||
306 | if (!bp) | ||
307 | asm("movl %%ebp, %0" : "=r" (bp):); | ||
308 | #endif | ||
285 | 309 | ||
286 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | 310 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", |
287 | current->pid, current->comm, print_tainted(), | 311 | current->pid, current->comm, print_tainted(), |
288 | init_utsname()->release, | 312 | init_utsname()->release, |
289 | (int)strcspn(init_utsname()->version, " "), | 313 | (int)strcspn(init_utsname()->version, " "), |
290 | init_utsname()->version); | 314 | init_utsname()->version); |
291 | show_trace(current, NULL, &stack); | 315 | show_trace(current, NULL, &stack, bp); |
292 | } | 316 | } |
293 | 317 | ||
294 | EXPORT_SYMBOL(dump_stack); | 318 | EXPORT_SYMBOL(dump_stack); |
@@ -307,30 +331,30 @@ void show_registers(struct pt_regs *regs) | |||
307 | * time of the fault.. | 331 | * time of the fault.. |
308 | */ | 332 | */ |
309 | if (!user_mode_vm(regs)) { | 333 | if (!user_mode_vm(regs)) { |
310 | u8 *eip; | 334 | u8 *ip; |
311 | unsigned int code_prologue = code_bytes * 43 / 64; | 335 | unsigned int code_prologue = code_bytes * 43 / 64; |
312 | unsigned int code_len = code_bytes; | 336 | unsigned int code_len = code_bytes; |
313 | unsigned char c; | 337 | unsigned char c; |
314 | 338 | ||
315 | printk("\n" KERN_EMERG "Stack: "); | 339 | printk("\n" KERN_EMERG "Stack: "); |
316 | show_stack_log_lvl(NULL, regs, ®s->esp, KERN_EMERG); | 340 | show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); |
317 | 341 | ||
318 | printk(KERN_EMERG "Code: "); | 342 | printk(KERN_EMERG "Code: "); |
319 | 343 | ||
320 | eip = (u8 *)regs->eip - code_prologue; | 344 | ip = (u8 *)regs->ip - code_prologue; |
321 | if (eip < (u8 *)PAGE_OFFSET || | 345 | if (ip < (u8 *)PAGE_OFFSET || |
322 | probe_kernel_address(eip, c)) { | 346 | probe_kernel_address(ip, c)) { |
323 | /* try starting at EIP */ | 347 | /* try starting at EIP */ |
324 | eip = (u8 *)regs->eip; | 348 | ip = (u8 *)regs->ip; |
325 | code_len = code_len - code_prologue + 1; | 349 | code_len = code_len - code_prologue + 1; |
326 | } | 350 | } |
327 | for (i = 0; i < code_len; i++, eip++) { | 351 | for (i = 0; i < code_len; i++, ip++) { |
328 | if (eip < (u8 *)PAGE_OFFSET || | 352 | if (ip < (u8 *)PAGE_OFFSET || |
329 | probe_kernel_address(eip, c)) { | 353 | probe_kernel_address(ip, c)) { |
330 | printk(" Bad EIP value."); | 354 | printk(" Bad EIP value."); |
331 | break; | 355 | break; |
332 | } | 356 | } |
333 | if (eip == (u8 *)regs->eip) | 357 | if (ip == (u8 *)regs->ip) |
334 | printk("<%02x> ", c); | 358 | printk("<%02x> ", c); |
335 | else | 359 | else |
336 | printk("%02x ", c); | 360 | printk("%02x ", c); |
@@ -339,18 +363,57 @@ void show_registers(struct pt_regs *regs) | |||
339 | printk("\n"); | 363 | printk("\n"); |
340 | } | 364 | } |
341 | 365 | ||
342 | int is_valid_bugaddr(unsigned long eip) | 366 | int is_valid_bugaddr(unsigned long ip) |
343 | { | 367 | { |
344 | unsigned short ud2; | 368 | unsigned short ud2; |
345 | 369 | ||
346 | if (eip < PAGE_OFFSET) | 370 | if (ip < PAGE_OFFSET) |
347 | return 0; | 371 | return 0; |
348 | if (probe_kernel_address((unsigned short *)eip, ud2)) | 372 | if (probe_kernel_address((unsigned short *)ip, ud2)) |
349 | return 0; | 373 | return 0; |
350 | 374 | ||
351 | return ud2 == 0x0b0f; | 375 | return ud2 == 0x0b0f; |
352 | } | 376 | } |
353 | 377 | ||
378 | static int die_counter; | ||
379 | |||
380 | int __kprobes __die(const char * str, struct pt_regs * regs, long err) | ||
381 | { | ||
382 | unsigned long sp; | ||
383 | unsigned short ss; | ||
384 | |||
385 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
386 | #ifdef CONFIG_PREEMPT | ||
387 | printk("PREEMPT "); | ||
388 | #endif | ||
389 | #ifdef CONFIG_SMP | ||
390 | printk("SMP "); | ||
391 | #endif | ||
392 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
393 | printk("DEBUG_PAGEALLOC"); | ||
394 | #endif | ||
395 | printk("\n"); | ||
396 | |||
397 | if (notify_die(DIE_OOPS, str, regs, err, | ||
398 | current->thread.trap_no, SIGSEGV) != | ||
399 | NOTIFY_STOP) { | ||
400 | show_registers(regs); | ||
401 | /* Executive summary in case the oops scrolled away */ | ||
402 | sp = (unsigned long) (®s->sp); | ||
403 | savesegment(ss, ss); | ||
404 | if (user_mode(regs)) { | ||
405 | sp = regs->sp; | ||
406 | ss = regs->ss & 0xffff; | ||
407 | } | ||
408 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | ||
409 | print_symbol("%s", regs->ip); | ||
410 | printk(" SS:ESP %04x:%08lx\n", ss, sp); | ||
411 | return 0; | ||
412 | } else { | ||
413 | return 1; | ||
414 | } | ||
415 | } | ||
416 | |||
354 | /* | 417 | /* |
355 | * This is gone through when something in the kernel has done something bad and | 418 | * This is gone through when something in the kernel has done something bad and |
356 | * is about to be terminated. | 419 | * is about to be terminated. |
@@ -366,7 +429,6 @@ void die(const char * str, struct pt_regs * regs, long err) | |||
366 | .lock_owner = -1, | 429 | .lock_owner = -1, |
367 | .lock_owner_depth = 0 | 430 | .lock_owner_depth = 0 |
368 | }; | 431 | }; |
369 | static int die_counter; | ||
370 | unsigned long flags; | 432 | unsigned long flags; |
371 | 433 | ||
372 | oops_enter(); | 434 | oops_enter(); |
@@ -382,43 +444,13 @@ void die(const char * str, struct pt_regs * regs, long err) | |||
382 | raw_local_irq_save(flags); | 444 | raw_local_irq_save(flags); |
383 | 445 | ||
384 | if (++die.lock_owner_depth < 3) { | 446 | if (++die.lock_owner_depth < 3) { |
385 | unsigned long esp; | 447 | report_bug(regs->ip, regs); |
386 | unsigned short ss; | ||
387 | |||
388 | report_bug(regs->eip, regs); | ||
389 | 448 | ||
390 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, | 449 | if (__die(str, regs, err)) |
391 | ++die_counter); | ||
392 | #ifdef CONFIG_PREEMPT | ||
393 | printk("PREEMPT "); | ||
394 | #endif | ||
395 | #ifdef CONFIG_SMP | ||
396 | printk("SMP "); | ||
397 | #endif | ||
398 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
399 | printk("DEBUG_PAGEALLOC"); | ||
400 | #endif | ||
401 | printk("\n"); | ||
402 | |||
403 | if (notify_die(DIE_OOPS, str, regs, err, | ||
404 | current->thread.trap_no, SIGSEGV) != | ||
405 | NOTIFY_STOP) { | ||
406 | show_registers(regs); | ||
407 | /* Executive summary in case the oops scrolled away */ | ||
408 | esp = (unsigned long) (®s->esp); | ||
409 | savesegment(ss, ss); | ||
410 | if (user_mode(regs)) { | ||
411 | esp = regs->esp; | ||
412 | ss = regs->xss & 0xffff; | ||
413 | } | ||
414 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->eip); | ||
415 | print_symbol("%s", regs->eip); | ||
416 | printk(" SS:ESP %04x:%08lx\n", ss, esp); | ||
417 | } | ||
418 | else | ||
419 | regs = NULL; | 450 | regs = NULL; |
420 | } else | 451 | } else { |
421 | printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); | 452 | printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); |
453 | } | ||
422 | 454 | ||
423 | bust_spinlocks(0); | 455 | bust_spinlocks(0); |
424 | die.lock_owner = -1; | 456 | die.lock_owner = -1; |
@@ -454,7 +486,7 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86, | |||
454 | { | 486 | { |
455 | struct task_struct *tsk = current; | 487 | struct task_struct *tsk = current; |
456 | 488 | ||
457 | if (regs->eflags & VM_MASK) { | 489 | if (regs->flags & VM_MASK) { |
458 | if (vm86) | 490 | if (vm86) |
459 | goto vm86_trap; | 491 | goto vm86_trap; |
460 | goto trap_signal; | 492 | goto trap_signal; |
@@ -500,7 +532,7 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86, | |||
500 | } | 532 | } |
501 | 533 | ||
502 | #define DO_ERROR(trapnr, signr, str, name) \ | 534 | #define DO_ERROR(trapnr, signr, str, name) \ |
503 | fastcall void do_##name(struct pt_regs * regs, long error_code) \ | 535 | void do_##name(struct pt_regs * regs, long error_code) \ |
504 | { \ | 536 | { \ |
505 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 537 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
506 | == NOTIFY_STOP) \ | 538 | == NOTIFY_STOP) \ |
@@ -509,7 +541,7 @@ fastcall void do_##name(struct pt_regs * regs, long error_code) \ | |||
509 | } | 541 | } |
510 | 542 | ||
511 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \ | 543 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \ |
512 | fastcall void do_##name(struct pt_regs * regs, long error_code) \ | 544 | void do_##name(struct pt_regs * regs, long error_code) \ |
513 | { \ | 545 | { \ |
514 | siginfo_t info; \ | 546 | siginfo_t info; \ |
515 | if (irq) \ | 547 | if (irq) \ |
@@ -525,7 +557,7 @@ fastcall void do_##name(struct pt_regs * regs, long error_code) \ | |||
525 | } | 557 | } |
526 | 558 | ||
527 | #define DO_VM86_ERROR(trapnr, signr, str, name) \ | 559 | #define DO_VM86_ERROR(trapnr, signr, str, name) \ |
528 | fastcall void do_##name(struct pt_regs * regs, long error_code) \ | 560 | void do_##name(struct pt_regs * regs, long error_code) \ |
529 | { \ | 561 | { \ |
530 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 562 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
531 | == NOTIFY_STOP) \ | 563 | == NOTIFY_STOP) \ |
@@ -534,26 +566,27 @@ fastcall void do_##name(struct pt_regs * regs, long error_code) \ | |||
534 | } | 566 | } |
535 | 567 | ||
536 | #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | 568 | #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ |
537 | fastcall void do_##name(struct pt_regs * regs, long error_code) \ | 569 | void do_##name(struct pt_regs * regs, long error_code) \ |
538 | { \ | 570 | { \ |
539 | siginfo_t info; \ | 571 | siginfo_t info; \ |
540 | info.si_signo = signr; \ | 572 | info.si_signo = signr; \ |
541 | info.si_errno = 0; \ | 573 | info.si_errno = 0; \ |
542 | info.si_code = sicode; \ | 574 | info.si_code = sicode; \ |
543 | info.si_addr = (void __user *)siaddr; \ | 575 | info.si_addr = (void __user *)siaddr; \ |
576 | trace_hardirqs_fixup(); \ | ||
544 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 577 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
545 | == NOTIFY_STOP) \ | 578 | == NOTIFY_STOP) \ |
546 | return; \ | 579 | return; \ |
547 | do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ | 580 | do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ |
548 | } | 581 | } |
549 | 582 | ||
550 | DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip) | 583 | DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) |
551 | #ifndef CONFIG_KPROBES | 584 | #ifndef CONFIG_KPROBES |
552 | DO_VM86_ERROR( 3, SIGTRAP, "int3", int3) | 585 | DO_VM86_ERROR( 3, SIGTRAP, "int3", int3) |
553 | #endif | 586 | #endif |
554 | DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow) | 587 | DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow) |
555 | DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds) | 588 | DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds) |
556 | DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip, 0) | 589 | DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) |
557 | DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | 590 | DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) |
558 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) | 591 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) |
559 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) | 592 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) |
@@ -561,7 +594,7 @@ DO_ERROR(12, SIGBUS, "stack segment", stack_segment) | |||
561 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) | 594 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) |
562 | DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1) | 595 | DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1) |
563 | 596 | ||
564 | fastcall void __kprobes do_general_protection(struct pt_regs * regs, | 597 | void __kprobes do_general_protection(struct pt_regs * regs, |
565 | long error_code) | 598 | long error_code) |
566 | { | 599 | { |
567 | int cpu = get_cpu(); | 600 | int cpu = get_cpu(); |
@@ -595,7 +628,7 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs, | |||
595 | } | 628 | } |
596 | put_cpu(); | 629 | put_cpu(); |
597 | 630 | ||
598 | if (regs->eflags & VM_MASK) | 631 | if (regs->flags & VM_MASK) |
599 | goto gp_in_vm86; | 632 | goto gp_in_vm86; |
600 | 633 | ||
601 | if (!user_mode(regs)) | 634 | if (!user_mode(regs)) |
@@ -604,11 +637,14 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs, | |||
604 | current->thread.error_code = error_code; | 637 | current->thread.error_code = error_code; |
605 | current->thread.trap_no = 13; | 638 | current->thread.trap_no = 13; |
606 | if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) && | 639 | if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) && |
607 | printk_ratelimit()) | 640 | printk_ratelimit()) { |
608 | printk(KERN_INFO | 641 | printk(KERN_INFO |
609 | "%s[%d] general protection eip:%lx esp:%lx error:%lx\n", | 642 | "%s[%d] general protection ip:%lx sp:%lx error:%lx", |
610 | current->comm, task_pid_nr(current), | 643 | current->comm, task_pid_nr(current), |
611 | regs->eip, regs->esp, error_code); | 644 | regs->ip, regs->sp, error_code); |
645 | print_vma_addr(" in ", regs->ip); | ||
646 | printk("\n"); | ||
647 | } | ||
612 | 648 | ||
613 | force_sig(SIGSEGV, current); | 649 | force_sig(SIGSEGV, current); |
614 | return; | 650 | return; |
@@ -704,8 +740,8 @@ void __kprobes die_nmi(struct pt_regs *regs, const char *msg) | |||
704 | */ | 740 | */ |
705 | bust_spinlocks(1); | 741 | bust_spinlocks(1); |
706 | printk(KERN_EMERG "%s", msg); | 742 | printk(KERN_EMERG "%s", msg); |
707 | printk(" on CPU%d, eip %08lx, registers:\n", | 743 | printk(" on CPU%d, ip %08lx, registers:\n", |
708 | smp_processor_id(), regs->eip); | 744 | smp_processor_id(), regs->ip); |
709 | show_registers(regs); | 745 | show_registers(regs); |
710 | console_silent(); | 746 | console_silent(); |
711 | spin_unlock(&nmi_print_lock); | 747 | spin_unlock(&nmi_print_lock); |
@@ -762,7 +798,7 @@ static __kprobes void default_do_nmi(struct pt_regs * regs) | |||
762 | 798 | ||
763 | static int ignore_nmis; | 799 | static int ignore_nmis; |
764 | 800 | ||
765 | fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code) | 801 | __kprobes void do_nmi(struct pt_regs * regs, long error_code) |
766 | { | 802 | { |
767 | int cpu; | 803 | int cpu; |
768 | 804 | ||
@@ -791,7 +827,7 @@ void restart_nmi(void) | |||
791 | } | 827 | } |
792 | 828 | ||
793 | #ifdef CONFIG_KPROBES | 829 | #ifdef CONFIG_KPROBES |
794 | fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) | 830 | void __kprobes do_int3(struct pt_regs *regs, long error_code) |
795 | { | 831 | { |
796 | trace_hardirqs_fixup(); | 832 | trace_hardirqs_fixup(); |
797 | 833 | ||
@@ -827,7 +863,7 @@ fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) | |||
827 | * find every occurrence of the TF bit that could be saved away even | 863 | * find every occurrence of the TF bit that could be saved away even |
828 | * by user code) | 864 | * by user code) |
829 | */ | 865 | */ |
830 | fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code) | 866 | void __kprobes do_debug(struct pt_regs * regs, long error_code) |
831 | { | 867 | { |
832 | unsigned int condition; | 868 | unsigned int condition; |
833 | struct task_struct *tsk = current; | 869 | struct task_struct *tsk = current; |
@@ -836,24 +872,30 @@ fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code) | |||
836 | 872 | ||
837 | get_debugreg(condition, 6); | 873 | get_debugreg(condition, 6); |
838 | 874 | ||
875 | /* | ||
876 | * The processor cleared BTF, so don't mark that we need it set. | ||
877 | */ | ||
878 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); | ||
879 | tsk->thread.debugctlmsr = 0; | ||
880 | |||
839 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | 881 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, |
840 | SIGTRAP) == NOTIFY_STOP) | 882 | SIGTRAP) == NOTIFY_STOP) |
841 | return; | 883 | return; |
842 | /* It's safe to allow irq's after DR6 has been saved */ | 884 | /* It's safe to allow irq's after DR6 has been saved */ |
843 | if (regs->eflags & X86_EFLAGS_IF) | 885 | if (regs->flags & X86_EFLAGS_IF) |
844 | local_irq_enable(); | 886 | local_irq_enable(); |
845 | 887 | ||
846 | /* Mask out spurious debug traps due to lazy DR7 setting */ | 888 | /* Mask out spurious debug traps due to lazy DR7 setting */ |
847 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { | 889 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { |
848 | if (!tsk->thread.debugreg[7]) | 890 | if (!tsk->thread.debugreg7) |
849 | goto clear_dr7; | 891 | goto clear_dr7; |
850 | } | 892 | } |
851 | 893 | ||
852 | if (regs->eflags & VM_MASK) | 894 | if (regs->flags & VM_MASK) |
853 | goto debug_vm86; | 895 | goto debug_vm86; |
854 | 896 | ||
855 | /* Save debug status register where ptrace can see it */ | 897 | /* Save debug status register where ptrace can see it */ |
856 | tsk->thread.debugreg[6] = condition; | 898 | tsk->thread.debugreg6 = condition; |
857 | 899 | ||
858 | /* | 900 | /* |
859 | * Single-stepping through TF: make sure we ignore any events in | 901 | * Single-stepping through TF: make sure we ignore any events in |
@@ -885,7 +927,7 @@ debug_vm86: | |||
885 | 927 | ||
886 | clear_TF_reenable: | 928 | clear_TF_reenable: |
887 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | 929 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); |
888 | regs->eflags &= ~TF_MASK; | 930 | regs->flags &= ~TF_MASK; |
889 | return; | 931 | return; |
890 | } | 932 | } |
891 | 933 | ||
@@ -894,7 +936,7 @@ clear_TF_reenable: | |||
894 | * the correct behaviour even in the presence of the asynchronous | 936 | * the correct behaviour even in the presence of the asynchronous |
895 | * IRQ13 behaviour | 937 | * IRQ13 behaviour |
896 | */ | 938 | */ |
897 | void math_error(void __user *eip) | 939 | void math_error(void __user *ip) |
898 | { | 940 | { |
899 | struct task_struct * task; | 941 | struct task_struct * task; |
900 | siginfo_t info; | 942 | siginfo_t info; |
@@ -910,7 +952,7 @@ void math_error(void __user *eip) | |||
910 | info.si_signo = SIGFPE; | 952 | info.si_signo = SIGFPE; |
911 | info.si_errno = 0; | 953 | info.si_errno = 0; |
912 | info.si_code = __SI_FAULT; | 954 | info.si_code = __SI_FAULT; |
913 | info.si_addr = eip; | 955 | info.si_addr = ip; |
914 | /* | 956 | /* |
915 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | 957 | * (~cwd & swd) will mask out exceptions that are not set to unmasked |
916 | * status. 0x3f is the exception bits in these regs, 0x200 is the | 958 | * status. 0x3f is the exception bits in these regs, 0x200 is the |
@@ -953,13 +995,13 @@ void math_error(void __user *eip) | |||
953 | force_sig_info(SIGFPE, &info, task); | 995 | force_sig_info(SIGFPE, &info, task); |
954 | } | 996 | } |
955 | 997 | ||
956 | fastcall void do_coprocessor_error(struct pt_regs * regs, long error_code) | 998 | void do_coprocessor_error(struct pt_regs * regs, long error_code) |
957 | { | 999 | { |
958 | ignore_fpu_irq = 1; | 1000 | ignore_fpu_irq = 1; |
959 | math_error((void __user *)regs->eip); | 1001 | math_error((void __user *)regs->ip); |
960 | } | 1002 | } |
961 | 1003 | ||
962 | static void simd_math_error(void __user *eip) | 1004 | static void simd_math_error(void __user *ip) |
963 | { | 1005 | { |
964 | struct task_struct * task; | 1006 | struct task_struct * task; |
965 | siginfo_t info; | 1007 | siginfo_t info; |
@@ -975,7 +1017,7 @@ static void simd_math_error(void __user *eip) | |||
975 | info.si_signo = SIGFPE; | 1017 | info.si_signo = SIGFPE; |
976 | info.si_errno = 0; | 1018 | info.si_errno = 0; |
977 | info.si_code = __SI_FAULT; | 1019 | info.si_code = __SI_FAULT; |
978 | info.si_addr = eip; | 1020 | info.si_addr = ip; |
979 | /* | 1021 | /* |
980 | * The SIMD FPU exceptions are handled a little differently, as there | 1022 | * The SIMD FPU exceptions are handled a little differently, as there |
981 | * is only a single status/control register. Thus, to determine which | 1023 | * is only a single status/control register. Thus, to determine which |
@@ -1007,19 +1049,19 @@ static void simd_math_error(void __user *eip) | |||
1007 | force_sig_info(SIGFPE, &info, task); | 1049 | force_sig_info(SIGFPE, &info, task); |
1008 | } | 1050 | } |
1009 | 1051 | ||
1010 | fastcall void do_simd_coprocessor_error(struct pt_regs * regs, | 1052 | void do_simd_coprocessor_error(struct pt_regs * regs, |
1011 | long error_code) | 1053 | long error_code) |
1012 | { | 1054 | { |
1013 | if (cpu_has_xmm) { | 1055 | if (cpu_has_xmm) { |
1014 | /* Handle SIMD FPU exceptions on PIII+ processors. */ | 1056 | /* Handle SIMD FPU exceptions on PIII+ processors. */ |
1015 | ignore_fpu_irq = 1; | 1057 | ignore_fpu_irq = 1; |
1016 | simd_math_error((void __user *)regs->eip); | 1058 | simd_math_error((void __user *)regs->ip); |
1017 | } else { | 1059 | } else { |
1018 | /* | 1060 | /* |
1019 | * Handle strange cache flush from user space exception | 1061 | * Handle strange cache flush from user space exception |
1020 | * in all other cases. This is undocumented behaviour. | 1062 | * in all other cases. This is undocumented behaviour. |
1021 | */ | 1063 | */ |
1022 | if (regs->eflags & VM_MASK) { | 1064 | if (regs->flags & VM_MASK) { |
1023 | handle_vm86_fault((struct kernel_vm86_regs *)regs, | 1065 | handle_vm86_fault((struct kernel_vm86_regs *)regs, |
1024 | error_code); | 1066 | error_code); |
1025 | return; | 1067 | return; |
@@ -1031,7 +1073,7 @@ fastcall void do_simd_coprocessor_error(struct pt_regs * regs, | |||
1031 | } | 1073 | } |
1032 | } | 1074 | } |
1033 | 1075 | ||
1034 | fastcall void do_spurious_interrupt_bug(struct pt_regs * regs, | 1076 | void do_spurious_interrupt_bug(struct pt_regs * regs, |
1035 | long error_code) | 1077 | long error_code) |
1036 | { | 1078 | { |
1037 | #if 0 | 1079 | #if 0 |
@@ -1040,7 +1082,7 @@ fastcall void do_spurious_interrupt_bug(struct pt_regs * regs, | |||
1040 | #endif | 1082 | #endif |
1041 | } | 1083 | } |
1042 | 1084 | ||
1043 | fastcall unsigned long patch_espfix_desc(unsigned long uesp, | 1085 | unsigned long patch_espfix_desc(unsigned long uesp, |
1044 | unsigned long kesp) | 1086 | unsigned long kesp) |
1045 | { | 1087 | { |
1046 | struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt; | 1088 | struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt; |
@@ -1094,51 +1136,17 @@ asmlinkage void math_emulate(long arg) | |||
1094 | 1136 | ||
1095 | #endif /* CONFIG_MATH_EMULATION */ | 1137 | #endif /* CONFIG_MATH_EMULATION */ |
1096 | 1138 | ||
1097 | /* | ||
1098 | * This needs to use 'idt_table' rather than 'idt', and | ||
1099 | * thus use the _nonmapped_ version of the IDT, as the | ||
1100 | * Pentium F0 0F bugfix can have resulted in the mapped | ||
1101 | * IDT being write-protected. | ||
1102 | */ | ||
1103 | void set_intr_gate(unsigned int n, void *addr) | ||
1104 | { | ||
1105 | _set_gate(n, DESCTYPE_INT, addr, __KERNEL_CS); | ||
1106 | } | ||
1107 | |||
1108 | /* | ||
1109 | * This routine sets up an interrupt gate at directory privilege level 3. | ||
1110 | */ | ||
1111 | static inline void set_system_intr_gate(unsigned int n, void *addr) | ||
1112 | { | ||
1113 | _set_gate(n, DESCTYPE_INT | DESCTYPE_DPL3, addr, __KERNEL_CS); | ||
1114 | } | ||
1115 | |||
1116 | static void __init set_trap_gate(unsigned int n, void *addr) | ||
1117 | { | ||
1118 | _set_gate(n, DESCTYPE_TRAP, addr, __KERNEL_CS); | ||
1119 | } | ||
1120 | |||
1121 | static void __init set_system_gate(unsigned int n, void *addr) | ||
1122 | { | ||
1123 | _set_gate(n, DESCTYPE_TRAP | DESCTYPE_DPL3, addr, __KERNEL_CS); | ||
1124 | } | ||
1125 | |||
1126 | static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) | ||
1127 | { | ||
1128 | _set_gate(n, DESCTYPE_TASK, (void *)0, (gdt_entry<<3)); | ||
1129 | } | ||
1130 | |||
1131 | 1139 | ||
1132 | void __init trap_init(void) | 1140 | void __init trap_init(void) |
1133 | { | 1141 | { |
1134 | int i; | 1142 | int i; |
1135 | 1143 | ||
1136 | #ifdef CONFIG_EISA | 1144 | #ifdef CONFIG_EISA |
1137 | void __iomem *p = ioremap(0x0FFFD9, 4); | 1145 | void __iomem *p = early_ioremap(0x0FFFD9, 4); |
1138 | if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) { | 1146 | if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) { |
1139 | EISA_bus = 1; | 1147 | EISA_bus = 1; |
1140 | } | 1148 | } |
1141 | iounmap(p); | 1149 | early_iounmap(p, 4); |
1142 | #endif | 1150 | #endif |
1143 | 1151 | ||
1144 | #ifdef CONFIG_X86_LOCAL_APIC | 1152 | #ifdef CONFIG_X86_LOCAL_APIC |
@@ -1168,17 +1176,12 @@ void __init trap_init(void) | |||
1168 | #endif | 1176 | #endif |
1169 | set_trap_gate(19,&simd_coprocessor_error); | 1177 | set_trap_gate(19,&simd_coprocessor_error); |
1170 | 1178 | ||
1179 | /* | ||
1180 | * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. | ||
1181 | * Generate a build-time error if the alignment is wrong. | ||
1182 | */ | ||
1183 | BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15); | ||
1171 | if (cpu_has_fxsr) { | 1184 | if (cpu_has_fxsr) { |
1172 | /* | ||
1173 | * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. | ||
1174 | * Generates a compile-time "error: zero width for bit-field" if | ||
1175 | * the alignment is wrong. | ||
1176 | */ | ||
1177 | struct fxsrAlignAssert { | ||
1178 | int _:!(offsetof(struct task_struct, | ||
1179 | thread.i387.fxsave) & 15); | ||
1180 | }; | ||
1181 | |||
1182 | printk(KERN_INFO "Enabling fast FPU save and restore... "); | 1185 | printk(KERN_INFO "Enabling fast FPU save and restore... "); |
1183 | set_in_cr4(X86_CR4_OSFXSR); | 1186 | set_in_cr4(X86_CR4_OSFXSR); |
1184 | printk("done.\n"); | 1187 | printk("done.\n"); |
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index d11525ad81b4..efc66df728b6 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c | |||
@@ -74,22 +74,24 @@ asmlinkage void alignment_check(void); | |||
74 | asmlinkage void machine_check(void); | 74 | asmlinkage void machine_check(void); |
75 | asmlinkage void spurious_interrupt_bug(void); | 75 | asmlinkage void spurious_interrupt_bug(void); |
76 | 76 | ||
77 | static unsigned int code_bytes = 64; | ||
78 | |||
77 | static inline void conditional_sti(struct pt_regs *regs) | 79 | static inline void conditional_sti(struct pt_regs *regs) |
78 | { | 80 | { |
79 | if (regs->eflags & X86_EFLAGS_IF) | 81 | if (regs->flags & X86_EFLAGS_IF) |
80 | local_irq_enable(); | 82 | local_irq_enable(); |
81 | } | 83 | } |
82 | 84 | ||
83 | static inline void preempt_conditional_sti(struct pt_regs *regs) | 85 | static inline void preempt_conditional_sti(struct pt_regs *regs) |
84 | { | 86 | { |
85 | preempt_disable(); | 87 | preempt_disable(); |
86 | if (regs->eflags & X86_EFLAGS_IF) | 88 | if (regs->flags & X86_EFLAGS_IF) |
87 | local_irq_enable(); | 89 | local_irq_enable(); |
88 | } | 90 | } |
89 | 91 | ||
90 | static inline void preempt_conditional_cli(struct pt_regs *regs) | 92 | static inline void preempt_conditional_cli(struct pt_regs *regs) |
91 | { | 93 | { |
92 | if (regs->eflags & X86_EFLAGS_IF) | 94 | if (regs->flags & X86_EFLAGS_IF) |
93 | local_irq_disable(); | 95 | local_irq_disable(); |
94 | /* Make sure to not schedule here because we could be running | 96 | /* Make sure to not schedule here because we could be running |
95 | on an exception stack. */ | 97 | on an exception stack. */ |
@@ -98,14 +100,15 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) | |||
98 | 100 | ||
99 | int kstack_depth_to_print = 12; | 101 | int kstack_depth_to_print = 12; |
100 | 102 | ||
101 | #ifdef CONFIG_KALLSYMS | 103 | void printk_address(unsigned long address, int reliable) |
102 | void printk_address(unsigned long address) | ||
103 | { | 104 | { |
105 | #ifdef CONFIG_KALLSYMS | ||
104 | unsigned long offset = 0, symsize; | 106 | unsigned long offset = 0, symsize; |
105 | const char *symname; | 107 | const char *symname; |
106 | char *modname; | 108 | char *modname; |
107 | char *delim = ":"; | 109 | char *delim = ":"; |
108 | char namebuf[128]; | 110 | char namebuf[KSYM_NAME_LEN]; |
111 | char reliab[4] = ""; | ||
109 | 112 | ||
110 | symname = kallsyms_lookup(address, &symsize, &offset, | 113 | symname = kallsyms_lookup(address, &symsize, &offset, |
111 | &modname, namebuf); | 114 | &modname, namebuf); |
@@ -113,17 +116,17 @@ void printk_address(unsigned long address) | |||
113 | printk(" [<%016lx>]\n", address); | 116 | printk(" [<%016lx>]\n", address); |
114 | return; | 117 | return; |
115 | } | 118 | } |
119 | if (!reliable) | ||
120 | strcpy(reliab, "? "); | ||
121 | |||
116 | if (!modname) | 122 | if (!modname) |
117 | modname = delim = ""; | 123 | modname = delim = ""; |
118 | printk(" [<%016lx>] %s%s%s%s+0x%lx/0x%lx\n", | 124 | printk(" [<%016lx>] %s%s%s%s%s+0x%lx/0x%lx\n", |
119 | address, delim, modname, delim, symname, offset, symsize); | 125 | address, reliab, delim, modname, delim, symname, offset, symsize); |
120 | } | ||
121 | #else | 126 | #else |
122 | void printk_address(unsigned long address) | ||
123 | { | ||
124 | printk(" [<%016lx>]\n", address); | 127 | printk(" [<%016lx>]\n", address); |
125 | } | ||
126 | #endif | 128 | #endif |
129 | } | ||
127 | 130 | ||
128 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 131 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
129 | unsigned *usedp, char **idp) | 132 | unsigned *usedp, char **idp) |
@@ -208,14 +211,53 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
208 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | 211 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack |
209 | */ | 212 | */ |
210 | 213 | ||
211 | static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) | 214 | static inline int valid_stack_ptr(struct thread_info *tinfo, |
215 | void *p, unsigned int size, void *end) | ||
216 | { | ||
217 | void *t = tinfo; | ||
218 | if (end) { | ||
219 | if (p < end && p >= (end-THREAD_SIZE)) | ||
220 | return 1; | ||
221 | else | ||
222 | return 0; | ||
223 | } | ||
224 | return p > t && p < t + THREAD_SIZE - size; | ||
225 | } | ||
226 | |||
227 | /* The form of the top of the frame on the stack */ | ||
228 | struct stack_frame { | ||
229 | struct stack_frame *next_frame; | ||
230 | unsigned long return_address; | ||
231 | }; | ||
232 | |||
233 | |||
234 | static inline unsigned long print_context_stack(struct thread_info *tinfo, | ||
235 | unsigned long *stack, unsigned long bp, | ||
236 | const struct stacktrace_ops *ops, void *data, | ||
237 | unsigned long *end) | ||
212 | { | 238 | { |
213 | void *t = (void *)tinfo; | 239 | struct stack_frame *frame = (struct stack_frame *)bp; |
214 | return p > t && p < t + THREAD_SIZE - 3; | 240 | |
241 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
242 | unsigned long addr; | ||
243 | |||
244 | addr = *stack; | ||
245 | if (__kernel_text_address(addr)) { | ||
246 | if ((unsigned long) stack == bp + 8) { | ||
247 | ops->address(data, addr, 1); | ||
248 | frame = frame->next_frame; | ||
249 | bp = (unsigned long) frame; | ||
250 | } else { | ||
251 | ops->address(data, addr, bp == 0); | ||
252 | } | ||
253 | } | ||
254 | stack++; | ||
255 | } | ||
256 | return bp; | ||
215 | } | 257 | } |
216 | 258 | ||
217 | void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | 259 | void dump_trace(struct task_struct *tsk, struct pt_regs *regs, |
218 | unsigned long *stack, | 260 | unsigned long *stack, unsigned long bp, |
219 | const struct stacktrace_ops *ops, void *data) | 261 | const struct stacktrace_ops *ops, void *data) |
220 | { | 262 | { |
221 | const unsigned cpu = get_cpu(); | 263 | const unsigned cpu = get_cpu(); |
@@ -225,36 +267,28 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | |||
225 | 267 | ||
226 | if (!tsk) | 268 | if (!tsk) |
227 | tsk = current; | 269 | tsk = current; |
270 | tinfo = task_thread_info(tsk); | ||
228 | 271 | ||
229 | if (!stack) { | 272 | if (!stack) { |
230 | unsigned long dummy; | 273 | unsigned long dummy; |
231 | stack = &dummy; | 274 | stack = &dummy; |
232 | if (tsk && tsk != current) | 275 | if (tsk && tsk != current) |
233 | stack = (unsigned long *)tsk->thread.rsp; | 276 | stack = (unsigned long *)tsk->thread.sp; |
234 | } | 277 | } |
235 | 278 | ||
236 | /* | 279 | #ifdef CONFIG_FRAME_POINTER |
237 | * Print function call entries within a stack. 'cond' is the | 280 | if (!bp) { |
238 | * "end of stackframe" condition, that the 'stack++' | 281 | if (tsk == current) { |
239 | * iteration will eventually trigger. | 282 | /* Grab bp right from our regs */ |
240 | */ | 283 | asm("movq %%rbp, %0" : "=r" (bp):); |
241 | #define HANDLE_STACK(cond) \ | 284 | } else { |
242 | do while (cond) { \ | 285 | /* bp is the last reg pushed by switch_to */ |
243 | unsigned long addr = *stack++; \ | 286 | bp = *(unsigned long *) tsk->thread.sp; |
244 | /* Use unlocked access here because except for NMIs \ | 287 | } |
245 | we should be already protected against module unloads */ \ | 288 | } |
246 | if (__kernel_text_address(addr)) { \ | 289 | #endif |
247 | /* \ | 290 | |
248 | * If the address is either in the text segment of the \ | 291 | |
249 | * kernel, or in the region which contains vmalloc'ed \ | ||
250 | * memory, it *may* be the address of a calling \ | ||
251 | * routine; if so, print it so that someone tracing \ | ||
252 | * down the cause of the crash will be able to figure \ | ||
253 | * out the call path that was taken. \ | ||
254 | */ \ | ||
255 | ops->address(data, addr); \ | ||
256 | } \ | ||
257 | } while (0) | ||
258 | 292 | ||
259 | /* | 293 | /* |
260 | * Print function call entries in all stacks, starting at the | 294 | * Print function call entries in all stacks, starting at the |
@@ -270,7 +304,9 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | |||
270 | if (estack_end) { | 304 | if (estack_end) { |
271 | if (ops->stack(data, id) < 0) | 305 | if (ops->stack(data, id) < 0) |
272 | break; | 306 | break; |
273 | HANDLE_STACK (stack < estack_end); | 307 | |
308 | bp = print_context_stack(tinfo, stack, bp, ops, | ||
309 | data, estack_end); | ||
274 | ops->stack(data, "<EOE>"); | 310 | ops->stack(data, "<EOE>"); |
275 | /* | 311 | /* |
276 | * We link to the next stack via the | 312 | * We link to the next stack via the |
@@ -288,7 +324,8 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | |||
288 | if (stack >= irqstack && stack < irqstack_end) { | 324 | if (stack >= irqstack && stack < irqstack_end) { |
289 | if (ops->stack(data, "IRQ") < 0) | 325 | if (ops->stack(data, "IRQ") < 0) |
290 | break; | 326 | break; |
291 | HANDLE_STACK (stack < irqstack_end); | 327 | bp = print_context_stack(tinfo, stack, bp, |
328 | ops, data, irqstack_end); | ||
292 | /* | 329 | /* |
293 | * We link to the next stack (which would be | 330 | * We link to the next stack (which would be |
294 | * the process stack normally) the last | 331 | * the process stack normally) the last |
@@ -306,9 +343,7 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | |||
306 | /* | 343 | /* |
307 | * This handles the process stack: | 344 | * This handles the process stack: |
308 | */ | 345 | */ |
309 | tinfo = task_thread_info(tsk); | 346 | bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); |
310 | HANDLE_STACK (valid_stack_ptr(tinfo, stack)); | ||
311 | #undef HANDLE_STACK | ||
312 | put_cpu(); | 347 | put_cpu(); |
313 | } | 348 | } |
314 | EXPORT_SYMBOL(dump_trace); | 349 | EXPORT_SYMBOL(dump_trace); |
@@ -331,10 +366,10 @@ static int print_trace_stack(void *data, char *name) | |||
331 | return 0; | 366 | return 0; |
332 | } | 367 | } |
333 | 368 | ||
334 | static void print_trace_address(void *data, unsigned long addr) | 369 | static void print_trace_address(void *data, unsigned long addr, int reliable) |
335 | { | 370 | { |
336 | touch_nmi_watchdog(); | 371 | touch_nmi_watchdog(); |
337 | printk_address(addr); | 372 | printk_address(addr, reliable); |
338 | } | 373 | } |
339 | 374 | ||
340 | static const struct stacktrace_ops print_trace_ops = { | 375 | static const struct stacktrace_ops print_trace_ops = { |
@@ -345,15 +380,17 @@ static const struct stacktrace_ops print_trace_ops = { | |||
345 | }; | 380 | }; |
346 | 381 | ||
347 | void | 382 | void |
348 | show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack) | 383 | show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack, |
384 | unsigned long bp) | ||
349 | { | 385 | { |
350 | printk("\nCall Trace:\n"); | 386 | printk("\nCall Trace:\n"); |
351 | dump_trace(tsk, regs, stack, &print_trace_ops, NULL); | 387 | dump_trace(tsk, regs, stack, bp, &print_trace_ops, NULL); |
352 | printk("\n"); | 388 | printk("\n"); |
353 | } | 389 | } |
354 | 390 | ||
355 | static void | 391 | static void |
356 | _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp) | 392 | _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp, |
393 | unsigned long bp) | ||
357 | { | 394 | { |
358 | unsigned long *stack; | 395 | unsigned long *stack; |
359 | int i; | 396 | int i; |
@@ -364,14 +401,14 @@ _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp) | |||
364 | // debugging aid: "show_stack(NULL, NULL);" prints the | 401 | // debugging aid: "show_stack(NULL, NULL);" prints the |
365 | // back trace for this cpu. | 402 | // back trace for this cpu. |
366 | 403 | ||
367 | if (rsp == NULL) { | 404 | if (sp == NULL) { |
368 | if (tsk) | 405 | if (tsk) |
369 | rsp = (unsigned long *)tsk->thread.rsp; | 406 | sp = (unsigned long *)tsk->thread.sp; |
370 | else | 407 | else |
371 | rsp = (unsigned long *)&rsp; | 408 | sp = (unsigned long *)&sp; |
372 | } | 409 | } |
373 | 410 | ||
374 | stack = rsp; | 411 | stack = sp; |
375 | for(i=0; i < kstack_depth_to_print; i++) { | 412 | for(i=0; i < kstack_depth_to_print; i++) { |
376 | if (stack >= irqstack && stack <= irqstack_end) { | 413 | if (stack >= irqstack && stack <= irqstack_end) { |
377 | if (stack == irqstack_end) { | 414 | if (stack == irqstack_end) { |
@@ -387,12 +424,12 @@ _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp) | |||
387 | printk(" %016lx", *stack++); | 424 | printk(" %016lx", *stack++); |
388 | touch_nmi_watchdog(); | 425 | touch_nmi_watchdog(); |
389 | } | 426 | } |
390 | show_trace(tsk, regs, rsp); | 427 | show_trace(tsk, regs, sp, bp); |
391 | } | 428 | } |
392 | 429 | ||
393 | void show_stack(struct task_struct *tsk, unsigned long * rsp) | 430 | void show_stack(struct task_struct *tsk, unsigned long * sp) |
394 | { | 431 | { |
395 | _show_stack(tsk, NULL, rsp); | 432 | _show_stack(tsk, NULL, sp, 0); |
396 | } | 433 | } |
397 | 434 | ||
398 | /* | 435 | /* |
@@ -401,13 +438,19 @@ void show_stack(struct task_struct *tsk, unsigned long * rsp) | |||
401 | void dump_stack(void) | 438 | void dump_stack(void) |
402 | { | 439 | { |
403 | unsigned long dummy; | 440 | unsigned long dummy; |
441 | unsigned long bp = 0; | ||
442 | |||
443 | #ifdef CONFIG_FRAME_POINTER | ||
444 | if (!bp) | ||
445 | asm("movq %%rbp, %0" : "=r" (bp):); | ||
446 | #endif | ||
404 | 447 | ||
405 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | 448 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", |
406 | current->pid, current->comm, print_tainted(), | 449 | current->pid, current->comm, print_tainted(), |
407 | init_utsname()->release, | 450 | init_utsname()->release, |
408 | (int)strcspn(init_utsname()->version, " "), | 451 | (int)strcspn(init_utsname()->version, " "), |
409 | init_utsname()->version); | 452 | init_utsname()->version); |
410 | show_trace(NULL, NULL, &dummy); | 453 | show_trace(NULL, NULL, &dummy, bp); |
411 | } | 454 | } |
412 | 455 | ||
413 | EXPORT_SYMBOL(dump_stack); | 456 | EXPORT_SYMBOL(dump_stack); |
@@ -415,12 +458,15 @@ EXPORT_SYMBOL(dump_stack); | |||
415 | void show_registers(struct pt_regs *regs) | 458 | void show_registers(struct pt_regs *regs) |
416 | { | 459 | { |
417 | int i; | 460 | int i; |
418 | int in_kernel = !user_mode(regs); | 461 | unsigned long sp; |
419 | unsigned long rsp; | ||
420 | const int cpu = smp_processor_id(); | 462 | const int cpu = smp_processor_id(); |
421 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; | 463 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; |
464 | u8 *ip; | ||
465 | unsigned int code_prologue = code_bytes * 43 / 64; | ||
466 | unsigned int code_len = code_bytes; | ||
422 | 467 | ||
423 | rsp = regs->rsp; | 468 | sp = regs->sp; |
469 | ip = (u8 *) regs->ip - code_prologue; | ||
424 | printk("CPU %d ", cpu); | 470 | printk("CPU %d ", cpu); |
425 | __show_regs(regs); | 471 | __show_regs(regs); |
426 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", | 472 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", |
@@ -430,45 +476,43 @@ void show_registers(struct pt_regs *regs) | |||
430 | * When in-kernel, we also print out the stack and code at the | 476 | * When in-kernel, we also print out the stack and code at the |
431 | * time of the fault.. | 477 | * time of the fault.. |
432 | */ | 478 | */ |
433 | if (in_kernel) { | 479 | if (!user_mode(regs)) { |
480 | unsigned char c; | ||
434 | printk("Stack: "); | 481 | printk("Stack: "); |
435 | _show_stack(NULL, regs, (unsigned long*)rsp); | 482 | _show_stack(NULL, regs, (unsigned long *)sp, regs->bp); |
436 | 483 | printk("\n"); | |
437 | printk("\nCode: "); | 484 | |
438 | if (regs->rip < PAGE_OFFSET) | 485 | printk(KERN_EMERG "Code: "); |
439 | goto bad; | 486 | if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { |
440 | 487 | /* try starting at RIP */ | |
441 | for (i=0; i<20; i++) { | 488 | ip = (u8 *) regs->ip; |
442 | unsigned char c; | 489 | code_len = code_len - code_prologue + 1; |
443 | if (__get_user(c, &((unsigned char*)regs->rip)[i])) { | 490 | } |
444 | bad: | 491 | for (i = 0; i < code_len; i++, ip++) { |
492 | if (ip < (u8 *)PAGE_OFFSET || | ||
493 | probe_kernel_address(ip, c)) { | ||
445 | printk(" Bad RIP value."); | 494 | printk(" Bad RIP value."); |
446 | break; | 495 | break; |
447 | } | 496 | } |
448 | printk("%02x ", c); | 497 | if (ip == (u8 *)regs->ip) |
498 | printk("<%02x> ", c); | ||
499 | else | ||
500 | printk("%02x ", c); | ||
449 | } | 501 | } |
450 | } | 502 | } |
451 | printk("\n"); | 503 | printk("\n"); |
452 | } | 504 | } |
453 | 505 | ||
454 | int is_valid_bugaddr(unsigned long rip) | 506 | int is_valid_bugaddr(unsigned long ip) |
455 | { | 507 | { |
456 | unsigned short ud2; | 508 | unsigned short ud2; |
457 | 509 | ||
458 | if (__copy_from_user(&ud2, (const void __user *) rip, sizeof(ud2))) | 510 | if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2))) |
459 | return 0; | 511 | return 0; |
460 | 512 | ||
461 | return ud2 == 0x0b0f; | 513 | return ud2 == 0x0b0f; |
462 | } | 514 | } |
463 | 515 | ||
464 | #ifdef CONFIG_BUG | ||
465 | void out_of_line_bug(void) | ||
466 | { | ||
467 | BUG(); | ||
468 | } | ||
469 | EXPORT_SYMBOL(out_of_line_bug); | ||
470 | #endif | ||
471 | |||
472 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | 516 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; |
473 | static int die_owner = -1; | 517 | static int die_owner = -1; |
474 | static unsigned int die_nest_count; | 518 | static unsigned int die_nest_count; |
@@ -496,7 +540,7 @@ unsigned __kprobes long oops_begin(void) | |||
496 | return flags; | 540 | return flags; |
497 | } | 541 | } |
498 | 542 | ||
499 | void __kprobes oops_end(unsigned long flags) | 543 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) |
500 | { | 544 | { |
501 | die_owner = -1; | 545 | die_owner = -1; |
502 | bust_spinlocks(0); | 546 | bust_spinlocks(0); |
@@ -505,12 +549,17 @@ void __kprobes oops_end(unsigned long flags) | |||
505 | /* Nest count reaches zero, release the lock. */ | 549 | /* Nest count reaches zero, release the lock. */ |
506 | __raw_spin_unlock(&die_lock); | 550 | __raw_spin_unlock(&die_lock); |
507 | raw_local_irq_restore(flags); | 551 | raw_local_irq_restore(flags); |
552 | if (!regs) { | ||
553 | oops_exit(); | ||
554 | return; | ||
555 | } | ||
508 | if (panic_on_oops) | 556 | if (panic_on_oops) |
509 | panic("Fatal exception"); | 557 | panic("Fatal exception"); |
510 | oops_exit(); | 558 | oops_exit(); |
559 | do_exit(signr); | ||
511 | } | 560 | } |
512 | 561 | ||
513 | void __kprobes __die(const char * str, struct pt_regs * regs, long err) | 562 | int __kprobes __die(const char * str, struct pt_regs * regs, long err) |
514 | { | 563 | { |
515 | static int die_counter; | 564 | static int die_counter; |
516 | printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter); | 565 | printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter); |
@@ -524,15 +573,17 @@ void __kprobes __die(const char * str, struct pt_regs * regs, long err) | |||
524 | printk("DEBUG_PAGEALLOC"); | 573 | printk("DEBUG_PAGEALLOC"); |
525 | #endif | 574 | #endif |
526 | printk("\n"); | 575 | printk("\n"); |
527 | notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV); | 576 | if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) |
577 | return 1; | ||
528 | show_registers(regs); | 578 | show_registers(regs); |
529 | add_taint(TAINT_DIE); | 579 | add_taint(TAINT_DIE); |
530 | /* Executive summary in case the oops scrolled away */ | 580 | /* Executive summary in case the oops scrolled away */ |
531 | printk(KERN_ALERT "RIP "); | 581 | printk(KERN_ALERT "RIP "); |
532 | printk_address(regs->rip); | 582 | printk_address(regs->ip, 1); |
533 | printk(" RSP <%016lx>\n", regs->rsp); | 583 | printk(" RSP <%016lx>\n", regs->sp); |
534 | if (kexec_should_crash(current)) | 584 | if (kexec_should_crash(current)) |
535 | crash_kexec(regs); | 585 | crash_kexec(regs); |
586 | return 0; | ||
536 | } | 587 | } |
537 | 588 | ||
538 | void die(const char * str, struct pt_regs * regs, long err) | 589 | void die(const char * str, struct pt_regs * regs, long err) |
@@ -540,11 +591,11 @@ void die(const char * str, struct pt_regs * regs, long err) | |||
540 | unsigned long flags = oops_begin(); | 591 | unsigned long flags = oops_begin(); |
541 | 592 | ||
542 | if (!user_mode(regs)) | 593 | if (!user_mode(regs)) |
543 | report_bug(regs->rip, regs); | 594 | report_bug(regs->ip, regs); |
544 | 595 | ||
545 | __die(str, regs, err); | 596 | if (__die(str, regs, err)) |
546 | oops_end(flags); | 597 | regs = NULL; |
547 | do_exit(SIGSEGV); | 598 | oops_end(flags, regs, SIGSEGV); |
548 | } | 599 | } |
549 | 600 | ||
550 | void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) | 601 | void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) |
@@ -561,10 +612,10 @@ void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) | |||
561 | crash_kexec(regs); | 612 | crash_kexec(regs); |
562 | if (do_panic || panic_on_oops) | 613 | if (do_panic || panic_on_oops) |
563 | panic("Non maskable interrupt"); | 614 | panic("Non maskable interrupt"); |
564 | oops_end(flags); | 615 | oops_end(flags, NULL, SIGBUS); |
565 | nmi_exit(); | 616 | nmi_exit(); |
566 | local_irq_enable(); | 617 | local_irq_enable(); |
567 | do_exit(SIGSEGV); | 618 | do_exit(SIGBUS); |
568 | } | 619 | } |
569 | 620 | ||
570 | static void __kprobes do_trap(int trapnr, int signr, char *str, | 621 | static void __kprobes do_trap(int trapnr, int signr, char *str, |
@@ -588,11 +639,14 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, | |||
588 | tsk->thread.trap_no = trapnr; | 639 | tsk->thread.trap_no = trapnr; |
589 | 640 | ||
590 | if (show_unhandled_signals && unhandled_signal(tsk, signr) && | 641 | if (show_unhandled_signals && unhandled_signal(tsk, signr) && |
591 | printk_ratelimit()) | 642 | printk_ratelimit()) { |
592 | printk(KERN_INFO | 643 | printk(KERN_INFO |
593 | "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", | 644 | "%s[%d] trap %s ip:%lx sp:%lx error:%lx", |
594 | tsk->comm, tsk->pid, str, | 645 | tsk->comm, tsk->pid, str, |
595 | regs->rip, regs->rsp, error_code); | 646 | regs->ip, regs->sp, error_code); |
647 | print_vma_addr(" in ", regs->ip); | ||
648 | printk("\n"); | ||
649 | } | ||
596 | 650 | ||
597 | if (info) | 651 | if (info) |
598 | force_sig_info(signr, info, tsk); | 652 | force_sig_info(signr, info, tsk); |
@@ -602,19 +656,12 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, | |||
602 | } | 656 | } |
603 | 657 | ||
604 | 658 | ||
605 | /* kernel trap */ | 659 | if (!fixup_exception(regs)) { |
606 | { | 660 | tsk->thread.error_code = error_code; |
607 | const struct exception_table_entry *fixup; | 661 | tsk->thread.trap_no = trapnr; |
608 | fixup = search_exception_tables(regs->rip); | 662 | die(str, regs, error_code); |
609 | if (fixup) | ||
610 | regs->rip = fixup->fixup; | ||
611 | else { | ||
612 | tsk->thread.error_code = error_code; | ||
613 | tsk->thread.trap_no = trapnr; | ||
614 | die(str, regs, error_code); | ||
615 | } | ||
616 | return; | ||
617 | } | 663 | } |
664 | return; | ||
618 | } | 665 | } |
619 | 666 | ||
620 | #define DO_ERROR(trapnr, signr, str, name) \ | 667 | #define DO_ERROR(trapnr, signr, str, name) \ |
@@ -635,6 +682,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ | |||
635 | info.si_errno = 0; \ | 682 | info.si_errno = 0; \ |
636 | info.si_code = sicode; \ | 683 | info.si_code = sicode; \ |
637 | info.si_addr = (void __user *)siaddr; \ | 684 | info.si_addr = (void __user *)siaddr; \ |
685 | trace_hardirqs_fixup(); \ | ||
638 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 686 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
639 | == NOTIFY_STOP) \ | 687 | == NOTIFY_STOP) \ |
640 | return; \ | 688 | return; \ |
@@ -642,10 +690,10 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ | |||
642 | do_trap(trapnr, signr, str, regs, error_code, &info); \ | 690 | do_trap(trapnr, signr, str, regs, error_code, &info); \ |
643 | } | 691 | } |
644 | 692 | ||
645 | DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip) | 693 | DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) |
646 | DO_ERROR( 4, SIGSEGV, "overflow", overflow) | 694 | DO_ERROR( 4, SIGSEGV, "overflow", overflow) |
647 | DO_ERROR( 5, SIGSEGV, "bounds", bounds) | 695 | DO_ERROR( 5, SIGSEGV, "bounds", bounds) |
648 | DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->rip) | 696 | DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) |
649 | DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) | 697 | DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) |
650 | DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | 698 | DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) |
651 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) | 699 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) |
@@ -693,32 +741,28 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, | |||
693 | tsk->thread.trap_no = 13; | 741 | tsk->thread.trap_no = 13; |
694 | 742 | ||
695 | if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && | 743 | if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && |
696 | printk_ratelimit()) | 744 | printk_ratelimit()) { |
697 | printk(KERN_INFO | 745 | printk(KERN_INFO |
698 | "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", | 746 | "%s[%d] general protection ip:%lx sp:%lx error:%lx", |
699 | tsk->comm, tsk->pid, | 747 | tsk->comm, tsk->pid, |
700 | regs->rip, regs->rsp, error_code); | 748 | regs->ip, regs->sp, error_code); |
749 | print_vma_addr(" in ", regs->ip); | ||
750 | printk("\n"); | ||
751 | } | ||
701 | 752 | ||
702 | force_sig(SIGSEGV, tsk); | 753 | force_sig(SIGSEGV, tsk); |
703 | return; | 754 | return; |
704 | } | 755 | } |
705 | 756 | ||
706 | /* kernel gp */ | 757 | if (fixup_exception(regs)) |
707 | { | 758 | return; |
708 | const struct exception_table_entry *fixup; | ||
709 | fixup = search_exception_tables(regs->rip); | ||
710 | if (fixup) { | ||
711 | regs->rip = fixup->fixup; | ||
712 | return; | ||
713 | } | ||
714 | 759 | ||
715 | tsk->thread.error_code = error_code; | 760 | tsk->thread.error_code = error_code; |
716 | tsk->thread.trap_no = 13; | 761 | tsk->thread.trap_no = 13; |
717 | if (notify_die(DIE_GPF, "general protection fault", regs, | 762 | if (notify_die(DIE_GPF, "general protection fault", regs, |
718 | error_code, 13, SIGSEGV) == NOTIFY_STOP) | 763 | error_code, 13, SIGSEGV) == NOTIFY_STOP) |
719 | return; | 764 | return; |
720 | die("general protection fault", regs, error_code); | 765 | die("general protection fault", regs, error_code); |
721 | } | ||
722 | } | 766 | } |
723 | 767 | ||
724 | static __kprobes void | 768 | static __kprobes void |
@@ -831,15 +875,15 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | |||
831 | { | 875 | { |
832 | struct pt_regs *regs = eregs; | 876 | struct pt_regs *regs = eregs; |
833 | /* Did already sync */ | 877 | /* Did already sync */ |
834 | if (eregs == (struct pt_regs *)eregs->rsp) | 878 | if (eregs == (struct pt_regs *)eregs->sp) |
835 | ; | 879 | ; |
836 | /* Exception from user space */ | 880 | /* Exception from user space */ |
837 | else if (user_mode(eregs)) | 881 | else if (user_mode(eregs)) |
838 | regs = task_pt_regs(current); | 882 | regs = task_pt_regs(current); |
839 | /* Exception from kernel and interrupts are enabled. Move to | 883 | /* Exception from kernel and interrupts are enabled. Move to |
840 | kernel process stack. */ | 884 | kernel process stack. */ |
841 | else if (eregs->eflags & X86_EFLAGS_IF) | 885 | else if (eregs->flags & X86_EFLAGS_IF) |
842 | regs = (struct pt_regs *)(eregs->rsp -= sizeof(struct pt_regs)); | 886 | regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); |
843 | if (eregs != regs) | 887 | if (eregs != regs) |
844 | *regs = *eregs; | 888 | *regs = *eregs; |
845 | return regs; | 889 | return regs; |
@@ -857,6 +901,12 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs, | |||
857 | 901 | ||
858 | get_debugreg(condition, 6); | 902 | get_debugreg(condition, 6); |
859 | 903 | ||
904 | /* | ||
905 | * The processor cleared BTF, so don't mark that we need it set. | ||
906 | */ | ||
907 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); | ||
908 | tsk->thread.debugctlmsr = 0; | ||
909 | |||
860 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | 910 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, |
861 | SIGTRAP) == NOTIFY_STOP) | 911 | SIGTRAP) == NOTIFY_STOP) |
862 | return; | 912 | return; |
@@ -872,27 +922,14 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs, | |||
872 | 922 | ||
873 | tsk->thread.debugreg6 = condition; | 923 | tsk->thread.debugreg6 = condition; |
874 | 924 | ||
875 | /* Mask out spurious TF errors due to lazy TF clearing */ | 925 | |
926 | /* | ||
927 | * Single-stepping through TF: make sure we ignore any events in | ||
928 | * kernel space (but re-enable TF when returning to user mode). | ||
929 | */ | ||
876 | if (condition & DR_STEP) { | 930 | if (condition & DR_STEP) { |
877 | /* | ||
878 | * The TF error should be masked out only if the current | ||
879 | * process is not traced and if the TRAP flag has been set | ||
880 | * previously by a tracing process (condition detected by | ||
881 | * the PT_DTRACE flag); remember that the i386 TRAP flag | ||
882 | * can be modified by the process itself in user mode, | ||
883 | * allowing programs to debug themselves without the ptrace() | ||
884 | * interface. | ||
885 | */ | ||
886 | if (!user_mode(regs)) | 931 | if (!user_mode(regs)) |
887 | goto clear_TF_reenable; | 932 | goto clear_TF_reenable; |
888 | /* | ||
889 | * Was the TF flag set by a debugger? If so, clear it now, | ||
890 | * so that register information is correct. | ||
891 | */ | ||
892 | if (tsk->ptrace & PT_DTRACE) { | ||
893 | regs->eflags &= ~TF_MASK; | ||
894 | tsk->ptrace &= ~PT_DTRACE; | ||
895 | } | ||
896 | } | 933 | } |
897 | 934 | ||
898 | /* Ok, finally something we can handle */ | 935 | /* Ok, finally something we can handle */ |
@@ -901,7 +938,7 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs, | |||
901 | info.si_signo = SIGTRAP; | 938 | info.si_signo = SIGTRAP; |
902 | info.si_errno = 0; | 939 | info.si_errno = 0; |
903 | info.si_code = TRAP_BRKPT; | 940 | info.si_code = TRAP_BRKPT; |
904 | info.si_addr = user_mode(regs) ? (void __user *)regs->rip : NULL; | 941 | info.si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL; |
905 | force_sig_info(SIGTRAP, &info, tsk); | 942 | force_sig_info(SIGTRAP, &info, tsk); |
906 | 943 | ||
907 | clear_dr7: | 944 | clear_dr7: |
@@ -911,18 +948,15 @@ clear_dr7: | |||
911 | 948 | ||
912 | clear_TF_reenable: | 949 | clear_TF_reenable: |
913 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | 950 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); |
914 | regs->eflags &= ~TF_MASK; | 951 | regs->flags &= ~X86_EFLAGS_TF; |
915 | preempt_conditional_cli(regs); | 952 | preempt_conditional_cli(regs); |
916 | } | 953 | } |
917 | 954 | ||
918 | static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) | 955 | static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) |
919 | { | 956 | { |
920 | const struct exception_table_entry *fixup; | 957 | if (fixup_exception(regs)) |
921 | fixup = search_exception_tables(regs->rip); | ||
922 | if (fixup) { | ||
923 | regs->rip = fixup->fixup; | ||
924 | return 1; | 958 | return 1; |
925 | } | 959 | |
926 | notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); | 960 | notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); |
927 | /* Illegal floating point operation in the kernel */ | 961 | /* Illegal floating point operation in the kernel */ |
928 | current->thread.trap_no = trapnr; | 962 | current->thread.trap_no = trapnr; |
@@ -937,7 +971,7 @@ static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) | |||
937 | */ | 971 | */ |
938 | asmlinkage void do_coprocessor_error(struct pt_regs *regs) | 972 | asmlinkage void do_coprocessor_error(struct pt_regs *regs) |
939 | { | 973 | { |
940 | void __user *rip = (void __user *)(regs->rip); | 974 | void __user *ip = (void __user *)(regs->ip); |
941 | struct task_struct * task; | 975 | struct task_struct * task; |
942 | siginfo_t info; | 976 | siginfo_t info; |
943 | unsigned short cwd, swd; | 977 | unsigned short cwd, swd; |
@@ -957,7 +991,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs) | |||
957 | info.si_signo = SIGFPE; | 991 | info.si_signo = SIGFPE; |
958 | info.si_errno = 0; | 992 | info.si_errno = 0; |
959 | info.si_code = __SI_FAULT; | 993 | info.si_code = __SI_FAULT; |
960 | info.si_addr = rip; | 994 | info.si_addr = ip; |
961 | /* | 995 | /* |
962 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | 996 | * (~cwd & swd) will mask out exceptions that are not set to unmasked |
963 | * status. 0x3f is the exception bits in these regs, 0x200 is the | 997 | * status. 0x3f is the exception bits in these regs, 0x200 is the |
@@ -1006,7 +1040,7 @@ asmlinkage void bad_intr(void) | |||
1006 | 1040 | ||
1007 | asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) | 1041 | asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) |
1008 | { | 1042 | { |
1009 | void __user *rip = (void __user *)(regs->rip); | 1043 | void __user *ip = (void __user *)(regs->ip); |
1010 | struct task_struct * task; | 1044 | struct task_struct * task; |
1011 | siginfo_t info; | 1045 | siginfo_t info; |
1012 | unsigned short mxcsr; | 1046 | unsigned short mxcsr; |
@@ -1026,7 +1060,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) | |||
1026 | info.si_signo = SIGFPE; | 1060 | info.si_signo = SIGFPE; |
1027 | info.si_errno = 0; | 1061 | info.si_errno = 0; |
1028 | info.si_code = __SI_FAULT; | 1062 | info.si_code = __SI_FAULT; |
1029 | info.si_addr = rip; | 1063 | info.si_addr = ip; |
1030 | /* | 1064 | /* |
1031 | * The SIMD FPU exceptions are handled a little differently, as there | 1065 | * The SIMD FPU exceptions are handled a little differently, as there |
1032 | * is only a single status/control register. Thus, to determine which | 1066 | * is only a single status/control register. Thus, to determine which |
@@ -1088,6 +1122,7 @@ asmlinkage void math_state_restore(void) | |||
1088 | task_thread_info(me)->status |= TS_USEDFPU; | 1122 | task_thread_info(me)->status |= TS_USEDFPU; |
1089 | me->fpu_counter++; | 1123 | me->fpu_counter++; |
1090 | } | 1124 | } |
1125 | EXPORT_SYMBOL_GPL(math_state_restore); | ||
1091 | 1126 | ||
1092 | void __init trap_init(void) | 1127 | void __init trap_init(void) |
1093 | { | 1128 | { |
@@ -1143,3 +1178,14 @@ static int __init kstack_setup(char *s) | |||
1143 | return 0; | 1178 | return 0; |
1144 | } | 1179 | } |
1145 | early_param("kstack", kstack_setup); | 1180 | early_param("kstack", kstack_setup); |
1181 | |||
1182 | |||
1183 | static int __init code_bytes_setup(char *s) | ||
1184 | { | ||
1185 | code_bytes = simple_strtoul(s, NULL, 0); | ||
1186 | if (code_bytes > 8192) | ||
1187 | code_bytes = 8192; | ||
1188 | |||
1189 | return 1; | ||
1190 | } | ||
1191 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index 9ebc0dab66b4..43517e324be8 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/jiffies.h> | 5 | #include <linux/jiffies.h> |
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
7 | #include <linux/dmi.h> | 7 | #include <linux/dmi.h> |
8 | #include <linux/percpu.h> | ||
8 | 9 | ||
9 | #include <asm/delay.h> | 10 | #include <asm/delay.h> |
10 | #include <asm/tsc.h> | 11 | #include <asm/tsc.h> |
@@ -23,8 +24,6 @@ static int tsc_enabled; | |||
23 | unsigned int tsc_khz; | 24 | unsigned int tsc_khz; |
24 | EXPORT_SYMBOL_GPL(tsc_khz); | 25 | EXPORT_SYMBOL_GPL(tsc_khz); |
25 | 26 | ||
26 | int tsc_disable; | ||
27 | |||
28 | #ifdef CONFIG_X86_TSC | 27 | #ifdef CONFIG_X86_TSC |
29 | static int __init tsc_setup(char *str) | 28 | static int __init tsc_setup(char *str) |
30 | { | 29 | { |
@@ -39,8 +38,7 @@ static int __init tsc_setup(char *str) | |||
39 | */ | 38 | */ |
40 | static int __init tsc_setup(char *str) | 39 | static int __init tsc_setup(char *str) |
41 | { | 40 | { |
42 | tsc_disable = 1; | 41 | setup_clear_cpu_cap(X86_FEATURE_TSC); |
43 | |||
44 | return 1; | 42 | return 1; |
45 | } | 43 | } |
46 | #endif | 44 | #endif |
@@ -80,13 +78,31 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable); | |||
80 | * | 78 | * |
81 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | 79 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" |
82 | */ | 80 | */ |
83 | unsigned long cyc2ns_scale __read_mostly; | ||
84 | 81 | ||
85 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | 82 | DEFINE_PER_CPU(unsigned long, cyc2ns); |
86 | 83 | ||
87 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) | 84 | static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) |
88 | { | 85 | { |
89 | cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; | 86 | unsigned long flags, prev_scale, *scale; |
87 | unsigned long long tsc_now, ns_now; | ||
88 | |||
89 | local_irq_save(flags); | ||
90 | sched_clock_idle_sleep_event(); | ||
91 | |||
92 | scale = &per_cpu(cyc2ns, cpu); | ||
93 | |||
94 | rdtscll(tsc_now); | ||
95 | ns_now = __cycles_2_ns(tsc_now); | ||
96 | |||
97 | prev_scale = *scale; | ||
98 | if (cpu_khz) | ||
99 | *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; | ||
100 | |||
101 | /* | ||
102 | * Start smoothly with the new frequency: | ||
103 | */ | ||
104 | sched_clock_idle_wakeup_event(0); | ||
105 | local_irq_restore(flags); | ||
90 | } | 106 | } |
91 | 107 | ||
92 | /* | 108 | /* |
@@ -239,7 +255,9 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) | |||
239 | ref_freq, freq->new); | 255 | ref_freq, freq->new); |
240 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { | 256 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { |
241 | tsc_khz = cpu_khz; | 257 | tsc_khz = cpu_khz; |
242 | set_cyc2ns_scale(cpu_khz); | 258 | preempt_disable(); |
259 | set_cyc2ns_scale(cpu_khz, smp_processor_id()); | ||
260 | preempt_enable(); | ||
243 | /* | 261 | /* |
244 | * TSC based sched_clock turns | 262 | * TSC based sched_clock turns |
245 | * to junk w/ cpufreq | 263 | * to junk w/ cpufreq |
@@ -333,6 +351,11 @@ __cpuinit int unsynchronized_tsc(void) | |||
333 | { | 351 | { |
334 | if (!cpu_has_tsc || tsc_unstable) | 352 | if (!cpu_has_tsc || tsc_unstable) |
335 | return 1; | 353 | return 1; |
354 | |||
355 | /* Anything with constant TSC should be synchronized */ | ||
356 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | ||
357 | return 0; | ||
358 | |||
336 | /* | 359 | /* |
337 | * Intel systems are normally all synchronized. | 360 | * Intel systems are normally all synchronized. |
338 | * Exceptions must mark TSC as unstable: | 361 | * Exceptions must mark TSC as unstable: |
@@ -367,7 +390,9 @@ static inline void check_geode_tsc_reliable(void) { } | |||
367 | 390 | ||
368 | void __init tsc_init(void) | 391 | void __init tsc_init(void) |
369 | { | 392 | { |
370 | if (!cpu_has_tsc || tsc_disable) | 393 | int cpu; |
394 | |||
395 | if (!cpu_has_tsc) | ||
371 | goto out_no_tsc; | 396 | goto out_no_tsc; |
372 | 397 | ||
373 | cpu_khz = calculate_cpu_khz(); | 398 | cpu_khz = calculate_cpu_khz(); |
@@ -380,7 +405,15 @@ void __init tsc_init(void) | |||
380 | (unsigned long)cpu_khz / 1000, | 405 | (unsigned long)cpu_khz / 1000, |
381 | (unsigned long)cpu_khz % 1000); | 406 | (unsigned long)cpu_khz % 1000); |
382 | 407 | ||
383 | set_cyc2ns_scale(cpu_khz); | 408 | /* |
409 | * Secondary CPUs do not run through tsc_init(), so set up | ||
410 | * all the scale factors for all CPUs, assuming the same | ||
411 | * speed as the bootup CPU. (cpufreq notifiers will fix this | ||
412 | * up if their speed diverges) | ||
413 | */ | ||
414 | for_each_possible_cpu(cpu) | ||
415 | set_cyc2ns_scale(cpu_khz, cpu); | ||
416 | |||
384 | use_tsc_delay(); | 417 | use_tsc_delay(); |
385 | 418 | ||
386 | /* Check and install the TSC clocksource */ | 419 | /* Check and install the TSC clocksource */ |
@@ -403,10 +436,5 @@ void __init tsc_init(void) | |||
403 | return; | 436 | return; |
404 | 437 | ||
405 | out_no_tsc: | 438 | out_no_tsc: |
406 | /* | 439 | setup_clear_cpu_cap(X86_FEATURE_TSC); |
407 | * Set the tsc_disable flag if there's no TSC support, this | ||
408 | * makes it a fast flag for the kernel to see whether it | ||
409 | * should be using the TSC. | ||
410 | */ | ||
411 | tsc_disable = 1; | ||
412 | } | 440 | } |
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index 9c70af45b42b..947554ddabb6 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c | |||
@@ -10,6 +10,7 @@ | |||
10 | 10 | ||
11 | #include <asm/hpet.h> | 11 | #include <asm/hpet.h> |
12 | #include <asm/timex.h> | 12 | #include <asm/timex.h> |
13 | #include <asm/timer.h> | ||
13 | 14 | ||
14 | static int notsc __initdata = 0; | 15 | static int notsc __initdata = 0; |
15 | 16 | ||
@@ -18,19 +19,51 @@ EXPORT_SYMBOL(cpu_khz); | |||
18 | unsigned int tsc_khz; | 19 | unsigned int tsc_khz; |
19 | EXPORT_SYMBOL(tsc_khz); | 20 | EXPORT_SYMBOL(tsc_khz); |
20 | 21 | ||
21 | static unsigned int cyc2ns_scale __read_mostly; | 22 | /* Accelerators for sched_clock() |
23 | * convert from cycles(64bits) => nanoseconds (64bits) | ||
24 | * basic equation: | ||
25 | * ns = cycles / (freq / ns_per_sec) | ||
26 | * ns = cycles * (ns_per_sec / freq) | ||
27 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) | ||
28 | * ns = cycles * (10^6 / cpu_khz) | ||
29 | * | ||
30 | * Then we use scaling math (suggested by george@mvista.com) to get: | ||
31 | * ns = cycles * (10^6 * SC / cpu_khz) / SC | ||
32 | * ns = cycles * cyc2ns_scale / SC | ||
33 | * | ||
34 | * And since SC is a constant power of two, we can convert the div | ||
35 | * into a shift. | ||
36 | * | ||
37 | * We can use khz divisor instead of mhz to keep a better precision, since | ||
38 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
39 | * (mathieu.desnoyers@polymtl.ca) | ||
40 | * | ||
41 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
42 | */ | ||
43 | DEFINE_PER_CPU(unsigned long, cyc2ns); | ||
22 | 44 | ||
23 | static inline void set_cyc2ns_scale(unsigned long khz) | 45 | static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) |
24 | { | 46 | { |
25 | cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / khz; | 47 | unsigned long flags, prev_scale, *scale; |
26 | } | 48 | unsigned long long tsc_now, ns_now; |
27 | 49 | ||
28 | static unsigned long long cycles_2_ns(unsigned long long cyc) | 50 | local_irq_save(flags); |
29 | { | 51 | sched_clock_idle_sleep_event(); |
30 | return (cyc * cyc2ns_scale) >> NS_SCALE; | 52 | |
53 | scale = &per_cpu(cyc2ns, cpu); | ||
54 | |||
55 | rdtscll(tsc_now); | ||
56 | ns_now = __cycles_2_ns(tsc_now); | ||
57 | |||
58 | prev_scale = *scale; | ||
59 | if (cpu_khz) | ||
60 | *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; | ||
61 | |||
62 | sched_clock_idle_wakeup_event(0); | ||
63 | local_irq_restore(flags); | ||
31 | } | 64 | } |
32 | 65 | ||
33 | unsigned long long sched_clock(void) | 66 | unsigned long long native_sched_clock(void) |
34 | { | 67 | { |
35 | unsigned long a = 0; | 68 | unsigned long a = 0; |
36 | 69 | ||
@@ -44,12 +77,27 @@ unsigned long long sched_clock(void) | |||
44 | return cycles_2_ns(a); | 77 | return cycles_2_ns(a); |
45 | } | 78 | } |
46 | 79 | ||
80 | /* We need to define a real function for sched_clock, to override the | ||
81 | weak default version */ | ||
82 | #ifdef CONFIG_PARAVIRT | ||
83 | unsigned long long sched_clock(void) | ||
84 | { | ||
85 | return paravirt_sched_clock(); | ||
86 | } | ||
87 | #else | ||
88 | unsigned long long | ||
89 | sched_clock(void) __attribute__((alias("native_sched_clock"))); | ||
90 | #endif | ||
91 | |||
92 | |||
47 | static int tsc_unstable; | 93 | static int tsc_unstable; |
48 | 94 | ||
49 | inline int check_tsc_unstable(void) | 95 | int check_tsc_unstable(void) |
50 | { | 96 | { |
51 | return tsc_unstable; | 97 | return tsc_unstable; |
52 | } | 98 | } |
99 | EXPORT_SYMBOL_GPL(check_tsc_unstable); | ||
100 | |||
53 | #ifdef CONFIG_CPU_FREQ | 101 | #ifdef CONFIG_CPU_FREQ |
54 | 102 | ||
55 | /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency | 103 | /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency |
@@ -100,7 +148,9 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | |||
100 | mark_tsc_unstable("cpufreq changes"); | 148 | mark_tsc_unstable("cpufreq changes"); |
101 | } | 149 | } |
102 | 150 | ||
103 | set_cyc2ns_scale(tsc_khz_ref); | 151 | preempt_disable(); |
152 | set_cyc2ns_scale(tsc_khz_ref, smp_processor_id()); | ||
153 | preempt_enable(); | ||
104 | 154 | ||
105 | return 0; | 155 | return 0; |
106 | } | 156 | } |
@@ -133,12 +183,12 @@ static unsigned long __init tsc_read_refs(unsigned long *pm, | |||
133 | int i; | 183 | int i; |
134 | 184 | ||
135 | for (i = 0; i < MAX_RETRIES; i++) { | 185 | for (i = 0; i < MAX_RETRIES; i++) { |
136 | t1 = get_cycles_sync(); | 186 | t1 = get_cycles(); |
137 | if (hpet) | 187 | if (hpet) |
138 | *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; | 188 | *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; |
139 | else | 189 | else |
140 | *pm = acpi_pm_read_early(); | 190 | *pm = acpi_pm_read_early(); |
141 | t2 = get_cycles_sync(); | 191 | t2 = get_cycles(); |
142 | if ((t2 - t1) < SMI_TRESHOLD) | 192 | if ((t2 - t1) < SMI_TRESHOLD) |
143 | return t2; | 193 | return t2; |
144 | } | 194 | } |
@@ -151,7 +201,7 @@ static unsigned long __init tsc_read_refs(unsigned long *pm, | |||
151 | void __init tsc_calibrate(void) | 201 | void __init tsc_calibrate(void) |
152 | { | 202 | { |
153 | unsigned long flags, tsc1, tsc2, tr1, tr2, pm1, pm2, hpet1, hpet2; | 203 | unsigned long flags, tsc1, tsc2, tr1, tr2, pm1, pm2, hpet1, hpet2; |
154 | int hpet = is_hpet_enabled(); | 204 | int hpet = is_hpet_enabled(), cpu; |
155 | 205 | ||
156 | local_irq_save(flags); | 206 | local_irq_save(flags); |
157 | 207 | ||
@@ -162,9 +212,9 @@ void __init tsc_calibrate(void) | |||
162 | outb(0xb0, 0x43); | 212 | outb(0xb0, 0x43); |
163 | outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); | 213 | outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); |
164 | outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); | 214 | outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); |
165 | tr1 = get_cycles_sync(); | 215 | tr1 = get_cycles(); |
166 | while ((inb(0x61) & 0x20) == 0); | 216 | while ((inb(0x61) & 0x20) == 0); |
167 | tr2 = get_cycles_sync(); | 217 | tr2 = get_cycles(); |
168 | 218 | ||
169 | tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); | 219 | tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); |
170 | 220 | ||
@@ -206,7 +256,9 @@ void __init tsc_calibrate(void) | |||
206 | } | 256 | } |
207 | 257 | ||
208 | tsc_khz = tsc2 / tsc1; | 258 | tsc_khz = tsc2 / tsc1; |
209 | set_cyc2ns_scale(tsc_khz); | 259 | |
260 | for_each_possible_cpu(cpu) | ||
261 | set_cyc2ns_scale(tsc_khz, cpu); | ||
210 | } | 262 | } |
211 | 263 | ||
212 | /* | 264 | /* |
@@ -222,17 +274,9 @@ __cpuinit int unsynchronized_tsc(void) | |||
222 | if (apic_is_clustered_box()) | 274 | if (apic_is_clustered_box()) |
223 | return 1; | 275 | return 1; |
224 | #endif | 276 | #endif |
225 | /* Most intel systems have synchronized TSCs except for | 277 | |
226 | multi node systems */ | 278 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) |
227 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { | ||
228 | #ifdef CONFIG_ACPI | ||
229 | /* But TSC doesn't tick in C3 so don't use it there */ | ||
230 | if (acpi_gbl_FADT.header.length > 0 && | ||
231 | acpi_gbl_FADT.C3latency < 1000) | ||
232 | return 1; | ||
233 | #endif | ||
234 | return 0; | 279 | return 0; |
235 | } | ||
236 | 280 | ||
237 | /* Assume multi socket systems are not synchronized */ | 281 | /* Assume multi socket systems are not synchronized */ |
238 | return num_present_cpus() > 1; | 282 | return num_present_cpus() > 1; |
@@ -250,13 +294,13 @@ __setup("notsc", notsc_setup); | |||
250 | /* clock source code: */ | 294 | /* clock source code: */ |
251 | static cycle_t read_tsc(void) | 295 | static cycle_t read_tsc(void) |
252 | { | 296 | { |
253 | cycle_t ret = (cycle_t)get_cycles_sync(); | 297 | cycle_t ret = (cycle_t)get_cycles(); |
254 | return ret; | 298 | return ret; |
255 | } | 299 | } |
256 | 300 | ||
257 | static cycle_t __vsyscall_fn vread_tsc(void) | 301 | static cycle_t __vsyscall_fn vread_tsc(void) |
258 | { | 302 | { |
259 | cycle_t ret = (cycle_t)get_cycles_sync(); | 303 | cycle_t ret = (cycle_t)vget_cycles(); |
260 | return ret; | 304 | return ret; |
261 | } | 305 | } |
262 | 306 | ||
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 9125efe66a06..0577825cf89b 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c | |||
@@ -46,7 +46,7 @@ static __cpuinit void check_tsc_warp(void) | |||
46 | cycles_t start, now, prev, end; | 46 | cycles_t start, now, prev, end; |
47 | int i; | 47 | int i; |
48 | 48 | ||
49 | start = get_cycles_sync(); | 49 | start = get_cycles(); |
50 | /* | 50 | /* |
51 | * The measurement runs for 20 msecs: | 51 | * The measurement runs for 20 msecs: |
52 | */ | 52 | */ |
@@ -61,18 +61,18 @@ static __cpuinit void check_tsc_warp(void) | |||
61 | */ | 61 | */ |
62 | __raw_spin_lock(&sync_lock); | 62 | __raw_spin_lock(&sync_lock); |
63 | prev = last_tsc; | 63 | prev = last_tsc; |
64 | now = get_cycles_sync(); | 64 | now = get_cycles(); |
65 | last_tsc = now; | 65 | last_tsc = now; |
66 | __raw_spin_unlock(&sync_lock); | 66 | __raw_spin_unlock(&sync_lock); |
67 | 67 | ||
68 | /* | 68 | /* |
69 | * Be nice every now and then (and also check whether | 69 | * Be nice every now and then (and also check whether |
70 | * measurement is done [we also insert a 100 million | 70 | * measurement is done [we also insert a 10 million |
71 | * loops safety exit, so we dont lock up in case the | 71 | * loops safety exit, so we dont lock up in case the |
72 | * TSC readout is totally broken]): | 72 | * TSC readout is totally broken]): |
73 | */ | 73 | */ |
74 | if (unlikely(!(i & 7))) { | 74 | if (unlikely(!(i & 7))) { |
75 | if (now > end || i > 100000000) | 75 | if (now > end || i > 10000000) |
76 | break; | 76 | break; |
77 | cpu_relax(); | 77 | cpu_relax(); |
78 | touch_nmi_watchdog(); | 78 | touch_nmi_watchdog(); |
@@ -87,7 +87,11 @@ static __cpuinit void check_tsc_warp(void) | |||
87 | nr_warps++; | 87 | nr_warps++; |
88 | __raw_spin_unlock(&sync_lock); | 88 | __raw_spin_unlock(&sync_lock); |
89 | } | 89 | } |
90 | 90 | } | |
91 | if (!(now-start)) { | ||
92 | printk("Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", | ||
93 | now-start, end-start); | ||
94 | WARN_ON(1); | ||
91 | } | 95 | } |
92 | } | 96 | } |
93 | 97 | ||
@@ -129,24 +133,24 @@ void __cpuinit check_tsc_sync_source(int cpu) | |||
129 | while (atomic_read(&stop_count) != cpus-1) | 133 | while (atomic_read(&stop_count) != cpus-1) |
130 | cpu_relax(); | 134 | cpu_relax(); |
131 | 135 | ||
132 | /* | ||
133 | * Reset it - just in case we boot another CPU later: | ||
134 | */ | ||
135 | atomic_set(&start_count, 0); | ||
136 | |||
137 | if (nr_warps) { | 136 | if (nr_warps) { |
138 | printk("\n"); | 137 | printk("\n"); |
139 | printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs," | 138 | printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs," |
140 | " turning off TSC clock.\n", max_warp); | 139 | " turning off TSC clock.\n", max_warp); |
141 | mark_tsc_unstable("check_tsc_sync_source failed"); | 140 | mark_tsc_unstable("check_tsc_sync_source failed"); |
142 | nr_warps = 0; | ||
143 | max_warp = 0; | ||
144 | last_tsc = 0; | ||
145 | } else { | 141 | } else { |
146 | printk(" passed.\n"); | 142 | printk(" passed.\n"); |
147 | } | 143 | } |
148 | 144 | ||
149 | /* | 145 | /* |
146 | * Reset it - just in case we boot another CPU later: | ||
147 | */ | ||
148 | atomic_set(&start_count, 0); | ||
149 | nr_warps = 0; | ||
150 | max_warp = 0; | ||
151 | last_tsc = 0; | ||
152 | |||
153 | /* | ||
150 | * Let the target continue with the bootup: | 154 | * Let the target continue with the bootup: |
151 | */ | 155 | */ |
152 | atomic_inc(&stop_count); | 156 | atomic_inc(&stop_count); |
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 157e4bedd3c5..738c2104df30 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -70,10 +70,10 @@ | |||
70 | /* | 70 | /* |
71 | * 8- and 16-bit register defines.. | 71 | * 8- and 16-bit register defines.. |
72 | */ | 72 | */ |
73 | #define AL(regs) (((unsigned char *)&((regs)->pt.eax))[0]) | 73 | #define AL(regs) (((unsigned char *)&((regs)->pt.ax))[0]) |
74 | #define AH(regs) (((unsigned char *)&((regs)->pt.eax))[1]) | 74 | #define AH(regs) (((unsigned char *)&((regs)->pt.ax))[1]) |
75 | #define IP(regs) (*(unsigned short *)&((regs)->pt.eip)) | 75 | #define IP(regs) (*(unsigned short *)&((regs)->pt.ip)) |
76 | #define SP(regs) (*(unsigned short *)&((regs)->pt.esp)) | 76 | #define SP(regs) (*(unsigned short *)&((regs)->pt.sp)) |
77 | 77 | ||
78 | /* | 78 | /* |
79 | * virtual flags (16 and 32-bit versions) | 79 | * virtual flags (16 and 32-bit versions) |
@@ -93,12 +93,12 @@ static int copy_vm86_regs_to_user(struct vm86_regs __user *user, | |||
93 | { | 93 | { |
94 | int ret = 0; | 94 | int ret = 0; |
95 | 95 | ||
96 | /* kernel_vm86_regs is missing xgs, so copy everything up to | 96 | /* kernel_vm86_regs is missing gs, so copy everything up to |
97 | (but not including) orig_eax, and then rest including orig_eax. */ | 97 | (but not including) orig_eax, and then rest including orig_eax. */ |
98 | ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_eax)); | 98 | ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_ax)); |
99 | ret += copy_to_user(&user->orig_eax, ®s->pt.orig_eax, | 99 | ret += copy_to_user(&user->orig_eax, ®s->pt.orig_ax, |
100 | sizeof(struct kernel_vm86_regs) - | 100 | sizeof(struct kernel_vm86_regs) - |
101 | offsetof(struct kernel_vm86_regs, pt.orig_eax)); | 101 | offsetof(struct kernel_vm86_regs, pt.orig_ax)); |
102 | 102 | ||
103 | return ret; | 103 | return ret; |
104 | } | 104 | } |
@@ -110,18 +110,17 @@ static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs, | |||
110 | { | 110 | { |
111 | int ret = 0; | 111 | int ret = 0; |
112 | 112 | ||
113 | /* copy eax-xfs inclusive */ | 113 | /* copy ax-fs inclusive */ |
114 | ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_eax)); | 114 | ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_ax)); |
115 | /* copy orig_eax-__gsh+extra */ | 115 | /* copy orig_ax-__gsh+extra */ |
116 | ret += copy_from_user(®s->pt.orig_eax, &user->orig_eax, | 116 | ret += copy_from_user(®s->pt.orig_ax, &user->orig_eax, |
117 | sizeof(struct kernel_vm86_regs) - | 117 | sizeof(struct kernel_vm86_regs) - |
118 | offsetof(struct kernel_vm86_regs, pt.orig_eax) + | 118 | offsetof(struct kernel_vm86_regs, pt.orig_ax) + |
119 | extra); | 119 | extra); |
120 | return ret; | 120 | return ret; |
121 | } | 121 | } |
122 | 122 | ||
123 | struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs)); | 123 | struct pt_regs * save_v86_state(struct kernel_vm86_regs * regs) |
124 | struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) | ||
125 | { | 124 | { |
126 | struct tss_struct *tss; | 125 | struct tss_struct *tss; |
127 | struct pt_regs *ret; | 126 | struct pt_regs *ret; |
@@ -138,7 +137,7 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) | |||
138 | printk("no vm86_info: BAD\n"); | 137 | printk("no vm86_info: BAD\n"); |
139 | do_exit(SIGSEGV); | 138 | do_exit(SIGSEGV); |
140 | } | 139 | } |
141 | set_flags(regs->pt.eflags, VEFLAGS, VIF_MASK | current->thread.v86mask); | 140 | set_flags(regs->pt.flags, VEFLAGS, VIF_MASK | current->thread.v86mask); |
142 | tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs,regs); | 141 | tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs,regs); |
143 | tmp += put_user(current->thread.screen_bitmap,¤t->thread.vm86_info->screen_bitmap); | 142 | tmp += put_user(current->thread.screen_bitmap,¤t->thread.vm86_info->screen_bitmap); |
144 | if (tmp) { | 143 | if (tmp) { |
@@ -147,15 +146,15 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) | |||
147 | } | 146 | } |
148 | 147 | ||
149 | tss = &per_cpu(init_tss, get_cpu()); | 148 | tss = &per_cpu(init_tss, get_cpu()); |
150 | current->thread.esp0 = current->thread.saved_esp0; | 149 | current->thread.sp0 = current->thread.saved_sp0; |
151 | current->thread.sysenter_cs = __KERNEL_CS; | 150 | current->thread.sysenter_cs = __KERNEL_CS; |
152 | load_esp0(tss, ¤t->thread); | 151 | load_sp0(tss, ¤t->thread); |
153 | current->thread.saved_esp0 = 0; | 152 | current->thread.saved_sp0 = 0; |
154 | put_cpu(); | 153 | put_cpu(); |
155 | 154 | ||
156 | ret = KVM86->regs32; | 155 | ret = KVM86->regs32; |
157 | 156 | ||
158 | ret->xfs = current->thread.saved_fs; | 157 | ret->fs = current->thread.saved_fs; |
159 | loadsegment(gs, current->thread.saved_gs); | 158 | loadsegment(gs, current->thread.saved_gs); |
160 | 159 | ||
161 | return ret; | 160 | return ret; |
@@ -197,7 +196,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
197 | 196 | ||
198 | asmlinkage int sys_vm86old(struct pt_regs regs) | 197 | asmlinkage int sys_vm86old(struct pt_regs regs) |
199 | { | 198 | { |
200 | struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.ebx; | 199 | struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.bx; |
201 | struct kernel_vm86_struct info; /* declare this _on top_, | 200 | struct kernel_vm86_struct info; /* declare this _on top_, |
202 | * this avoids wasting of stack space. | 201 | * this avoids wasting of stack space. |
203 | * This remains on the stack until we | 202 | * This remains on the stack until we |
@@ -207,7 +206,7 @@ asmlinkage int sys_vm86old(struct pt_regs regs) | |||
207 | int tmp, ret = -EPERM; | 206 | int tmp, ret = -EPERM; |
208 | 207 | ||
209 | tsk = current; | 208 | tsk = current; |
210 | if (tsk->thread.saved_esp0) | 209 | if (tsk->thread.saved_sp0) |
211 | goto out; | 210 | goto out; |
212 | tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, | 211 | tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, |
213 | offsetof(struct kernel_vm86_struct, vm86plus) - | 212 | offsetof(struct kernel_vm86_struct, vm86plus) - |
@@ -237,12 +236,12 @@ asmlinkage int sys_vm86(struct pt_regs regs) | |||
237 | struct vm86plus_struct __user *v86; | 236 | struct vm86plus_struct __user *v86; |
238 | 237 | ||
239 | tsk = current; | 238 | tsk = current; |
240 | switch (regs.ebx) { | 239 | switch (regs.bx) { |
241 | case VM86_REQUEST_IRQ: | 240 | case VM86_REQUEST_IRQ: |
242 | case VM86_FREE_IRQ: | 241 | case VM86_FREE_IRQ: |
243 | case VM86_GET_IRQ_BITS: | 242 | case VM86_GET_IRQ_BITS: |
244 | case VM86_GET_AND_RESET_IRQ: | 243 | case VM86_GET_AND_RESET_IRQ: |
245 | ret = do_vm86_irq_handling(regs.ebx, (int)regs.ecx); | 244 | ret = do_vm86_irq_handling(regs.bx, (int)regs.cx); |
246 | goto out; | 245 | goto out; |
247 | case VM86_PLUS_INSTALL_CHECK: | 246 | case VM86_PLUS_INSTALL_CHECK: |
248 | /* NOTE: on old vm86 stuff this will return the error | 247 | /* NOTE: on old vm86 stuff this will return the error |
@@ -256,9 +255,9 @@ asmlinkage int sys_vm86(struct pt_regs regs) | |||
256 | 255 | ||
257 | /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */ | 256 | /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */ |
258 | ret = -EPERM; | 257 | ret = -EPERM; |
259 | if (tsk->thread.saved_esp0) | 258 | if (tsk->thread.saved_sp0) |
260 | goto out; | 259 | goto out; |
261 | v86 = (struct vm86plus_struct __user *)regs.ecx; | 260 | v86 = (struct vm86plus_struct __user *)regs.cx; |
262 | tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, | 261 | tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, |
263 | offsetof(struct kernel_vm86_struct, regs32) - | 262 | offsetof(struct kernel_vm86_struct, regs32) - |
264 | sizeof(info.regs)); | 263 | sizeof(info.regs)); |
@@ -281,23 +280,23 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
281 | /* | 280 | /* |
282 | * make sure the vm86() system call doesn't try to do anything silly | 281 | * make sure the vm86() system call doesn't try to do anything silly |
283 | */ | 282 | */ |
284 | info->regs.pt.xds = 0; | 283 | info->regs.pt.ds = 0; |
285 | info->regs.pt.xes = 0; | 284 | info->regs.pt.es = 0; |
286 | info->regs.pt.xfs = 0; | 285 | info->regs.pt.fs = 0; |
287 | 286 | ||
288 | /* we are clearing gs later just before "jmp resume_userspace", | 287 | /* we are clearing gs later just before "jmp resume_userspace", |
289 | * because it is not saved/restored. | 288 | * because it is not saved/restored. |
290 | */ | 289 | */ |
291 | 290 | ||
292 | /* | 291 | /* |
293 | * The eflags register is also special: we cannot trust that the user | 292 | * The flags register is also special: we cannot trust that the user |
294 | * has set it up safely, so this makes sure interrupt etc flags are | 293 | * has set it up safely, so this makes sure interrupt etc flags are |
295 | * inherited from protected mode. | 294 | * inherited from protected mode. |
296 | */ | 295 | */ |
297 | VEFLAGS = info->regs.pt.eflags; | 296 | VEFLAGS = info->regs.pt.flags; |
298 | info->regs.pt.eflags &= SAFE_MASK; | 297 | info->regs.pt.flags &= SAFE_MASK; |
299 | info->regs.pt.eflags |= info->regs32->eflags & ~SAFE_MASK; | 298 | info->regs.pt.flags |= info->regs32->flags & ~SAFE_MASK; |
300 | info->regs.pt.eflags |= VM_MASK; | 299 | info->regs.pt.flags |= VM_MASK; |
301 | 300 | ||
302 | switch (info->cpu_type) { | 301 | switch (info->cpu_type) { |
303 | case CPU_286: | 302 | case CPU_286: |
@@ -315,18 +314,18 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
315 | } | 314 | } |
316 | 315 | ||
317 | /* | 316 | /* |
318 | * Save old state, set default return value (%eax) to 0 | 317 | * Save old state, set default return value (%ax) to 0 |
319 | */ | 318 | */ |
320 | info->regs32->eax = 0; | 319 | info->regs32->ax = 0; |
321 | tsk->thread.saved_esp0 = tsk->thread.esp0; | 320 | tsk->thread.saved_sp0 = tsk->thread.sp0; |
322 | tsk->thread.saved_fs = info->regs32->xfs; | 321 | tsk->thread.saved_fs = info->regs32->fs; |
323 | savesegment(gs, tsk->thread.saved_gs); | 322 | savesegment(gs, tsk->thread.saved_gs); |
324 | 323 | ||
325 | tss = &per_cpu(init_tss, get_cpu()); | 324 | tss = &per_cpu(init_tss, get_cpu()); |
326 | tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; | 325 | tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; |
327 | if (cpu_has_sep) | 326 | if (cpu_has_sep) |
328 | tsk->thread.sysenter_cs = 0; | 327 | tsk->thread.sysenter_cs = 0; |
329 | load_esp0(tss, &tsk->thread); | 328 | load_sp0(tss, &tsk->thread); |
330 | put_cpu(); | 329 | put_cpu(); |
331 | 330 | ||
332 | tsk->thread.screen_bitmap = info->screen_bitmap; | 331 | tsk->thread.screen_bitmap = info->screen_bitmap; |
@@ -352,7 +351,7 @@ static inline void return_to_32bit(struct kernel_vm86_regs * regs16, int retval) | |||
352 | struct pt_regs * regs32; | 351 | struct pt_regs * regs32; |
353 | 352 | ||
354 | regs32 = save_v86_state(regs16); | 353 | regs32 = save_v86_state(regs16); |
355 | regs32->eax = retval; | 354 | regs32->ax = retval; |
356 | __asm__ __volatile__("movl %0,%%esp\n\t" | 355 | __asm__ __volatile__("movl %0,%%esp\n\t" |
357 | "movl %1,%%ebp\n\t" | 356 | "movl %1,%%ebp\n\t" |
358 | "jmp resume_userspace" | 357 | "jmp resume_userspace" |
@@ -373,30 +372,30 @@ static inline void clear_IF(struct kernel_vm86_regs * regs) | |||
373 | 372 | ||
374 | static inline void clear_TF(struct kernel_vm86_regs * regs) | 373 | static inline void clear_TF(struct kernel_vm86_regs * regs) |
375 | { | 374 | { |
376 | regs->pt.eflags &= ~TF_MASK; | 375 | regs->pt.flags &= ~TF_MASK; |
377 | } | 376 | } |
378 | 377 | ||
379 | static inline void clear_AC(struct kernel_vm86_regs * regs) | 378 | static inline void clear_AC(struct kernel_vm86_regs * regs) |
380 | { | 379 | { |
381 | regs->pt.eflags &= ~AC_MASK; | 380 | regs->pt.flags &= ~AC_MASK; |
382 | } | 381 | } |
383 | 382 | ||
384 | /* It is correct to call set_IF(regs) from the set_vflags_* | 383 | /* It is correct to call set_IF(regs) from the set_vflags_* |
385 | * functions. However someone forgot to call clear_IF(regs) | 384 | * functions. However someone forgot to call clear_IF(regs) |
386 | * in the opposite case. | 385 | * in the opposite case. |
387 | * After the command sequence CLI PUSHF STI POPF you should | 386 | * After the command sequence CLI PUSHF STI POPF you should |
388 | * end up with interrups disabled, but you ended up with | 387 | * end up with interrupts disabled, but you ended up with |
389 | * interrupts enabled. | 388 | * interrupts enabled. |
390 | * ( I was testing my own changes, but the only bug I | 389 | * ( I was testing my own changes, but the only bug I |
391 | * could find was in a function I had not changed. ) | 390 | * could find was in a function I had not changed. ) |
392 | * [KD] | 391 | * [KD] |
393 | */ | 392 | */ |
394 | 393 | ||
395 | static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs * regs) | 394 | static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs * regs) |
396 | { | 395 | { |
397 | set_flags(VEFLAGS, eflags, current->thread.v86mask); | 396 | set_flags(VEFLAGS, flags, current->thread.v86mask); |
398 | set_flags(regs->pt.eflags, eflags, SAFE_MASK); | 397 | set_flags(regs->pt.flags, flags, SAFE_MASK); |
399 | if (eflags & IF_MASK) | 398 | if (flags & IF_MASK) |
400 | set_IF(regs); | 399 | set_IF(regs); |
401 | else | 400 | else |
402 | clear_IF(regs); | 401 | clear_IF(regs); |
@@ -405,7 +404,7 @@ static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs | |||
405 | static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs) | 404 | static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs) |
406 | { | 405 | { |
407 | set_flags(VFLAGS, flags, current->thread.v86mask); | 406 | set_flags(VFLAGS, flags, current->thread.v86mask); |
408 | set_flags(regs->pt.eflags, flags, SAFE_MASK); | 407 | set_flags(regs->pt.flags, flags, SAFE_MASK); |
409 | if (flags & IF_MASK) | 408 | if (flags & IF_MASK) |
410 | set_IF(regs); | 409 | set_IF(regs); |
411 | else | 410 | else |
@@ -414,7 +413,7 @@ static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_reg | |||
414 | 413 | ||
415 | static inline unsigned long get_vflags(struct kernel_vm86_regs * regs) | 414 | static inline unsigned long get_vflags(struct kernel_vm86_regs * regs) |
416 | { | 415 | { |
417 | unsigned long flags = regs->pt.eflags & RETURN_MASK; | 416 | unsigned long flags = regs->pt.flags & RETURN_MASK; |
418 | 417 | ||
419 | if (VEFLAGS & VIF_MASK) | 418 | if (VEFLAGS & VIF_MASK) |
420 | flags |= IF_MASK; | 419 | flags |= IF_MASK; |
@@ -518,7 +517,7 @@ static void do_int(struct kernel_vm86_regs *regs, int i, | |||
518 | unsigned long __user *intr_ptr; | 517 | unsigned long __user *intr_ptr; |
519 | unsigned long segoffs; | 518 | unsigned long segoffs; |
520 | 519 | ||
521 | if (regs->pt.xcs == BIOSSEG) | 520 | if (regs->pt.cs == BIOSSEG) |
522 | goto cannot_handle; | 521 | goto cannot_handle; |
523 | if (is_revectored(i, &KVM86->int_revectored)) | 522 | if (is_revectored(i, &KVM86->int_revectored)) |
524 | goto cannot_handle; | 523 | goto cannot_handle; |
@@ -530,9 +529,9 @@ static void do_int(struct kernel_vm86_regs *regs, int i, | |||
530 | if ((segoffs >> 16) == BIOSSEG) | 529 | if ((segoffs >> 16) == BIOSSEG) |
531 | goto cannot_handle; | 530 | goto cannot_handle; |
532 | pushw(ssp, sp, get_vflags(regs), cannot_handle); | 531 | pushw(ssp, sp, get_vflags(regs), cannot_handle); |
533 | pushw(ssp, sp, regs->pt.xcs, cannot_handle); | 532 | pushw(ssp, sp, regs->pt.cs, cannot_handle); |
534 | pushw(ssp, sp, IP(regs), cannot_handle); | 533 | pushw(ssp, sp, IP(regs), cannot_handle); |
535 | regs->pt.xcs = segoffs >> 16; | 534 | regs->pt.cs = segoffs >> 16; |
536 | SP(regs) -= 6; | 535 | SP(regs) -= 6; |
537 | IP(regs) = segoffs & 0xffff; | 536 | IP(regs) = segoffs & 0xffff; |
538 | clear_TF(regs); | 537 | clear_TF(regs); |
@@ -549,7 +548,7 @@ int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int trapno | |||
549 | if (VMPI.is_vm86pus) { | 548 | if (VMPI.is_vm86pus) { |
550 | if ( (trapno==3) || (trapno==1) ) | 549 | if ( (trapno==3) || (trapno==1) ) |
551 | return_to_32bit(regs, VM86_TRAP + (trapno << 8)); | 550 | return_to_32bit(regs, VM86_TRAP + (trapno << 8)); |
552 | do_int(regs, trapno, (unsigned char __user *) (regs->pt.xss << 4), SP(regs)); | 551 | do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs)); |
553 | return 0; | 552 | return 0; |
554 | } | 553 | } |
555 | if (trapno !=1) | 554 | if (trapno !=1) |
@@ -585,10 +584,10 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code) | |||
585 | handle_vm86_trap(regs, 0, 1); \ | 584 | handle_vm86_trap(regs, 0, 1); \ |
586 | return; } while (0) | 585 | return; } while (0) |
587 | 586 | ||
588 | orig_flags = *(unsigned short *)®s->pt.eflags; | 587 | orig_flags = *(unsigned short *)®s->pt.flags; |
589 | 588 | ||
590 | csp = (unsigned char __user *) (regs->pt.xcs << 4); | 589 | csp = (unsigned char __user *) (regs->pt.cs << 4); |
591 | ssp = (unsigned char __user *) (regs->pt.xss << 4); | 590 | ssp = (unsigned char __user *) (regs->pt.ss << 4); |
592 | sp = SP(regs); | 591 | sp = SP(regs); |
593 | ip = IP(regs); | 592 | ip = IP(regs); |
594 | 593 | ||
@@ -675,7 +674,7 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code) | |||
675 | SP(regs) += 6; | 674 | SP(regs) += 6; |
676 | } | 675 | } |
677 | IP(regs) = newip; | 676 | IP(regs) = newip; |
678 | regs->pt.xcs = newcs; | 677 | regs->pt.cs = newcs; |
679 | CHECK_IF_IN_TRAP; | 678 | CHECK_IF_IN_TRAP; |
680 | if (data32) { | 679 | if (data32) { |
681 | set_vflags_long(newflags, regs); | 680 | set_vflags_long(newflags, regs); |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index f02bad68abaa..12affe1f9bce 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -62,7 +62,10 @@ static struct { | |||
62 | void (*cpuid)(void /* non-c */); | 62 | void (*cpuid)(void /* non-c */); |
63 | void (*_set_ldt)(u32 selector); | 63 | void (*_set_ldt)(u32 selector); |
64 | void (*set_tr)(u32 selector); | 64 | void (*set_tr)(u32 selector); |
65 | void (*set_kernel_stack)(u32 selector, u32 esp0); | 65 | void (*write_idt_entry)(struct desc_struct *, int, u32, u32); |
66 | void (*write_gdt_entry)(struct desc_struct *, int, u32, u32); | ||
67 | void (*write_ldt_entry)(struct desc_struct *, int, u32, u32); | ||
68 | void (*set_kernel_stack)(u32 selector, u32 sp0); | ||
66 | void (*allocate_page)(u32, u32, u32, u32, u32); | 69 | void (*allocate_page)(u32, u32, u32, u32, u32); |
67 | void (*release_page)(u32, u32); | 70 | void (*release_page)(u32, u32); |
68 | void (*set_pte)(pte_t, pte_t *, unsigned); | 71 | void (*set_pte)(pte_t, pte_t *, unsigned); |
@@ -88,13 +91,13 @@ struct vmi_timer_ops vmi_timer_ops; | |||
88 | #define IRQ_PATCH_DISABLE 5 | 91 | #define IRQ_PATCH_DISABLE 5 |
89 | 92 | ||
90 | static inline void patch_offset(void *insnbuf, | 93 | static inline void patch_offset(void *insnbuf, |
91 | unsigned long eip, unsigned long dest) | 94 | unsigned long ip, unsigned long dest) |
92 | { | 95 | { |
93 | *(unsigned long *)(insnbuf+1) = dest-eip-5; | 96 | *(unsigned long *)(insnbuf+1) = dest-ip-5; |
94 | } | 97 | } |
95 | 98 | ||
96 | static unsigned patch_internal(int call, unsigned len, void *insnbuf, | 99 | static unsigned patch_internal(int call, unsigned len, void *insnbuf, |
97 | unsigned long eip) | 100 | unsigned long ip) |
98 | { | 101 | { |
99 | u64 reloc; | 102 | u64 reloc; |
100 | struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc; | 103 | struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc; |
@@ -103,13 +106,13 @@ static unsigned patch_internal(int call, unsigned len, void *insnbuf, | |||
103 | case VMI_RELOCATION_CALL_REL: | 106 | case VMI_RELOCATION_CALL_REL: |
104 | BUG_ON(len < 5); | 107 | BUG_ON(len < 5); |
105 | *(char *)insnbuf = MNEM_CALL; | 108 | *(char *)insnbuf = MNEM_CALL; |
106 | patch_offset(insnbuf, eip, (unsigned long)rel->eip); | 109 | patch_offset(insnbuf, ip, (unsigned long)rel->eip); |
107 | return 5; | 110 | return 5; |
108 | 111 | ||
109 | case VMI_RELOCATION_JUMP_REL: | 112 | case VMI_RELOCATION_JUMP_REL: |
110 | BUG_ON(len < 5); | 113 | BUG_ON(len < 5); |
111 | *(char *)insnbuf = MNEM_JMP; | 114 | *(char *)insnbuf = MNEM_JMP; |
112 | patch_offset(insnbuf, eip, (unsigned long)rel->eip); | 115 | patch_offset(insnbuf, ip, (unsigned long)rel->eip); |
113 | return 5; | 116 | return 5; |
114 | 117 | ||
115 | case VMI_RELOCATION_NOP: | 118 | case VMI_RELOCATION_NOP: |
@@ -131,25 +134,25 @@ static unsigned patch_internal(int call, unsigned len, void *insnbuf, | |||
131 | * sequence. The callee does nop padding for us. | 134 | * sequence. The callee does nop padding for us. |
132 | */ | 135 | */ |
133 | static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, | 136 | static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, |
134 | unsigned long eip, unsigned len) | 137 | unsigned long ip, unsigned len) |
135 | { | 138 | { |
136 | switch (type) { | 139 | switch (type) { |
137 | case PARAVIRT_PATCH(pv_irq_ops.irq_disable): | 140 | case PARAVIRT_PATCH(pv_irq_ops.irq_disable): |
138 | return patch_internal(VMI_CALL_DisableInterrupts, len, | 141 | return patch_internal(VMI_CALL_DisableInterrupts, len, |
139 | insns, eip); | 142 | insns, ip); |
140 | case PARAVIRT_PATCH(pv_irq_ops.irq_enable): | 143 | case PARAVIRT_PATCH(pv_irq_ops.irq_enable): |
141 | return patch_internal(VMI_CALL_EnableInterrupts, len, | 144 | return patch_internal(VMI_CALL_EnableInterrupts, len, |
142 | insns, eip); | 145 | insns, ip); |
143 | case PARAVIRT_PATCH(pv_irq_ops.restore_fl): | 146 | case PARAVIRT_PATCH(pv_irq_ops.restore_fl): |
144 | return patch_internal(VMI_CALL_SetInterruptMask, len, | 147 | return patch_internal(VMI_CALL_SetInterruptMask, len, |
145 | insns, eip); | 148 | insns, ip); |
146 | case PARAVIRT_PATCH(pv_irq_ops.save_fl): | 149 | case PARAVIRT_PATCH(pv_irq_ops.save_fl): |
147 | return patch_internal(VMI_CALL_GetInterruptMask, len, | 150 | return patch_internal(VMI_CALL_GetInterruptMask, len, |
148 | insns, eip); | 151 | insns, ip); |
149 | case PARAVIRT_PATCH(pv_cpu_ops.iret): | 152 | case PARAVIRT_PATCH(pv_cpu_ops.iret): |
150 | return patch_internal(VMI_CALL_IRET, len, insns, eip); | 153 | return patch_internal(VMI_CALL_IRET, len, insns, ip); |
151 | case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit): | 154 | case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret): |
152 | return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip); | 155 | return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip); |
153 | default: | 156 | default: |
154 | break; | 157 | break; |
155 | } | 158 | } |
@@ -157,36 +160,36 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, | |||
157 | } | 160 | } |
158 | 161 | ||
159 | /* CPUID has non-C semantics, and paravirt-ops API doesn't match hardware ISA */ | 162 | /* CPUID has non-C semantics, and paravirt-ops API doesn't match hardware ISA */ |
160 | static void vmi_cpuid(unsigned int *eax, unsigned int *ebx, | 163 | static void vmi_cpuid(unsigned int *ax, unsigned int *bx, |
161 | unsigned int *ecx, unsigned int *edx) | 164 | unsigned int *cx, unsigned int *dx) |
162 | { | 165 | { |
163 | int override = 0; | 166 | int override = 0; |
164 | if (*eax == 1) | 167 | if (*ax == 1) |
165 | override = 1; | 168 | override = 1; |
166 | asm volatile ("call *%6" | 169 | asm volatile ("call *%6" |
167 | : "=a" (*eax), | 170 | : "=a" (*ax), |
168 | "=b" (*ebx), | 171 | "=b" (*bx), |
169 | "=c" (*ecx), | 172 | "=c" (*cx), |
170 | "=d" (*edx) | 173 | "=d" (*dx) |
171 | : "0" (*eax), "2" (*ecx), "r" (vmi_ops.cpuid)); | 174 | : "0" (*ax), "2" (*cx), "r" (vmi_ops.cpuid)); |
172 | if (override) { | 175 | if (override) { |
173 | if (disable_pse) | 176 | if (disable_pse) |
174 | *edx &= ~X86_FEATURE_PSE; | 177 | *dx &= ~X86_FEATURE_PSE; |
175 | if (disable_pge) | 178 | if (disable_pge) |
176 | *edx &= ~X86_FEATURE_PGE; | 179 | *dx &= ~X86_FEATURE_PGE; |
177 | if (disable_sep) | 180 | if (disable_sep) |
178 | *edx &= ~X86_FEATURE_SEP; | 181 | *dx &= ~X86_FEATURE_SEP; |
179 | if (disable_tsc) | 182 | if (disable_tsc) |
180 | *edx &= ~X86_FEATURE_TSC; | 183 | *dx &= ~X86_FEATURE_TSC; |
181 | if (disable_mtrr) | 184 | if (disable_mtrr) |
182 | *edx &= ~X86_FEATURE_MTRR; | 185 | *dx &= ~X86_FEATURE_MTRR; |
183 | } | 186 | } |
184 | } | 187 | } |
185 | 188 | ||
186 | static inline void vmi_maybe_load_tls(struct desc_struct *gdt, int nr, struct desc_struct *new) | 189 | static inline void vmi_maybe_load_tls(struct desc_struct *gdt, int nr, struct desc_struct *new) |
187 | { | 190 | { |
188 | if (gdt[nr].a != new->a || gdt[nr].b != new->b) | 191 | if (gdt[nr].a != new->a || gdt[nr].b != new->b) |
189 | write_gdt_entry(gdt, nr, new->a, new->b); | 192 | write_gdt_entry(gdt, nr, new, 0); |
190 | } | 193 | } |
191 | 194 | ||
192 | static void vmi_load_tls(struct thread_struct *t, unsigned int cpu) | 195 | static void vmi_load_tls(struct thread_struct *t, unsigned int cpu) |
@@ -200,12 +203,12 @@ static void vmi_load_tls(struct thread_struct *t, unsigned int cpu) | |||
200 | static void vmi_set_ldt(const void *addr, unsigned entries) | 203 | static void vmi_set_ldt(const void *addr, unsigned entries) |
201 | { | 204 | { |
202 | unsigned cpu = smp_processor_id(); | 205 | unsigned cpu = smp_processor_id(); |
203 | u32 low, high; | 206 | struct desc_struct desc; |
204 | 207 | ||
205 | pack_descriptor(&low, &high, (unsigned long)addr, | 208 | pack_descriptor(&desc, (unsigned long)addr, |
206 | entries * sizeof(struct desc_struct) - 1, | 209 | entries * sizeof(struct desc_struct) - 1, |
207 | DESCTYPE_LDT, 0); | 210 | DESC_LDT, 0); |
208 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, low, high); | 211 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, &desc, DESC_LDT); |
209 | vmi_ops._set_ldt(entries ? GDT_ENTRY_LDT*sizeof(struct desc_struct) : 0); | 212 | vmi_ops._set_ldt(entries ? GDT_ENTRY_LDT*sizeof(struct desc_struct) : 0); |
210 | } | 213 | } |
211 | 214 | ||
@@ -214,17 +217,37 @@ static void vmi_set_tr(void) | |||
214 | vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct)); | 217 | vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct)); |
215 | } | 218 | } |
216 | 219 | ||
217 | static void vmi_load_esp0(struct tss_struct *tss, | 220 | static void vmi_write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) |
221 | { | ||
222 | u32 *idt_entry = (u32 *)g; | ||
223 | vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[1]); | ||
224 | } | ||
225 | |||
226 | static void vmi_write_gdt_entry(struct desc_struct *dt, int entry, | ||
227 | const void *desc, int type) | ||
228 | { | ||
229 | u32 *gdt_entry = (u32 *)desc; | ||
230 | vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[1]); | ||
231 | } | ||
232 | |||
233 | static void vmi_write_ldt_entry(struct desc_struct *dt, int entry, | ||
234 | const void *desc) | ||
235 | { | ||
236 | u32 *ldt_entry = (u32 *)desc; | ||
237 | vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); | ||
238 | } | ||
239 | |||
240 | static void vmi_load_sp0(struct tss_struct *tss, | ||
218 | struct thread_struct *thread) | 241 | struct thread_struct *thread) |
219 | { | 242 | { |
220 | tss->x86_tss.esp0 = thread->esp0; | 243 | tss->x86_tss.sp0 = thread->sp0; |
221 | 244 | ||
222 | /* This can only happen when SEP is enabled, no need to test "SEP"arately */ | 245 | /* This can only happen when SEP is enabled, no need to test "SEP"arately */ |
223 | if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { | 246 | if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { |
224 | tss->x86_tss.ss1 = thread->sysenter_cs; | 247 | tss->x86_tss.ss1 = thread->sysenter_cs; |
225 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | 248 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); |
226 | } | 249 | } |
227 | vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.esp0); | 250 | vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.sp0); |
228 | } | 251 | } |
229 | 252 | ||
230 | static void vmi_flush_tlb_user(void) | 253 | static void vmi_flush_tlb_user(void) |
@@ -375,7 +398,7 @@ static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn) | |||
375 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); | 398 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); |
376 | } | 399 | } |
377 | 400 | ||
378 | static void vmi_allocate_pd(u32 pfn) | 401 | static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn) |
379 | { | 402 | { |
380 | /* | 403 | /* |
381 | * This call comes in very early, before mem_map is setup. | 404 | * This call comes in very early, before mem_map is setup. |
@@ -452,7 +475,7 @@ static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep | |||
452 | static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval) | 475 | static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval) |
453 | { | 476 | { |
454 | #ifdef CONFIG_X86_PAE | 477 | #ifdef CONFIG_X86_PAE |
455 | const pte_t pte = { pmdval.pmd, pmdval.pmd >> 32 }; | 478 | const pte_t pte = { .pte = pmdval.pmd }; |
456 | vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD); | 479 | vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD); |
457 | #else | 480 | #else |
458 | const pte_t pte = { pmdval.pud.pgd.pgd }; | 481 | const pte_t pte = { pmdval.pud.pgd.pgd }; |
@@ -485,21 +508,21 @@ static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t | |||
485 | static void vmi_set_pud(pud_t *pudp, pud_t pudval) | 508 | static void vmi_set_pud(pud_t *pudp, pud_t pudval) |
486 | { | 509 | { |
487 | /* Um, eww */ | 510 | /* Um, eww */ |
488 | const pte_t pte = { pudval.pgd.pgd, pudval.pgd.pgd >> 32 }; | 511 | const pte_t pte = { .pte = pudval.pgd.pgd }; |
489 | vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD); | 512 | vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD); |
490 | vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); | 513 | vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); |
491 | } | 514 | } |
492 | 515 | ||
493 | static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 516 | static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
494 | { | 517 | { |
495 | const pte_t pte = { 0 }; | 518 | const pte_t pte = { .pte = 0 }; |
496 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | 519 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); |
497 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | 520 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); |
498 | } | 521 | } |
499 | 522 | ||
500 | static void vmi_pmd_clear(pmd_t *pmd) | 523 | static void vmi_pmd_clear(pmd_t *pmd) |
501 | { | 524 | { |
502 | const pte_t pte = { 0 }; | 525 | const pte_t pte = { .pte = 0 }; |
503 | vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD); | 526 | vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD); |
504 | vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); | 527 | vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); |
505 | } | 528 | } |
@@ -790,10 +813,13 @@ static inline int __init activate_vmi(void) | |||
790 | para_fill(pv_cpu_ops.store_idt, GetIDT); | 813 | para_fill(pv_cpu_ops.store_idt, GetIDT); |
791 | para_fill(pv_cpu_ops.store_tr, GetTR); | 814 | para_fill(pv_cpu_ops.store_tr, GetTR); |
792 | pv_cpu_ops.load_tls = vmi_load_tls; | 815 | pv_cpu_ops.load_tls = vmi_load_tls; |
793 | para_fill(pv_cpu_ops.write_ldt_entry, WriteLDTEntry); | 816 | para_wrap(pv_cpu_ops.write_ldt_entry, vmi_write_ldt_entry, |
794 | para_fill(pv_cpu_ops.write_gdt_entry, WriteGDTEntry); | 817 | write_ldt_entry, WriteLDTEntry); |
795 | para_fill(pv_cpu_ops.write_idt_entry, WriteIDTEntry); | 818 | para_wrap(pv_cpu_ops.write_gdt_entry, vmi_write_gdt_entry, |
796 | para_wrap(pv_cpu_ops.load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack); | 819 | write_gdt_entry, WriteGDTEntry); |
820 | para_wrap(pv_cpu_ops.write_idt_entry, vmi_write_idt_entry, | ||
821 | write_idt_entry, WriteIDTEntry); | ||
822 | para_wrap(pv_cpu_ops.load_sp0, vmi_load_sp0, set_kernel_stack, UpdateKernelStack); | ||
797 | para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); | 823 | para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); |
798 | para_fill(pv_cpu_ops.io_delay, IODelay); | 824 | para_fill(pv_cpu_ops.io_delay, IODelay); |
799 | 825 | ||
@@ -870,7 +896,7 @@ static inline int __init activate_vmi(void) | |||
870 | * the backend. They are performance critical anyway, so requiring | 896 | * the backend. They are performance critical anyway, so requiring |
871 | * a patch is not a big problem. | 897 | * a patch is not a big problem. |
872 | */ | 898 | */ |
873 | pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0; | 899 | pv_cpu_ops.irq_enable_syscall_ret = (void *)0xfeedbab0; |
874 | pv_cpu_ops.iret = (void *)0xbadbab0; | 900 | pv_cpu_ops.iret = (void *)0xbadbab0; |
875 | 901 | ||
876 | #ifdef CONFIG_SMP | 902 | #ifdef CONFIG_SMP |
@@ -963,19 +989,19 @@ static int __init parse_vmi(char *arg) | |||
963 | return -EINVAL; | 989 | return -EINVAL; |
964 | 990 | ||
965 | if (!strcmp(arg, "disable_pge")) { | 991 | if (!strcmp(arg, "disable_pge")) { |
966 | clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); | 992 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE); |
967 | disable_pge = 1; | 993 | disable_pge = 1; |
968 | } else if (!strcmp(arg, "disable_pse")) { | 994 | } else if (!strcmp(arg, "disable_pse")) { |
969 | clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); | 995 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PSE); |
970 | disable_pse = 1; | 996 | disable_pse = 1; |
971 | } else if (!strcmp(arg, "disable_sep")) { | 997 | } else if (!strcmp(arg, "disable_sep")) { |
972 | clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability); | 998 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP); |
973 | disable_sep = 1; | 999 | disable_sep = 1; |
974 | } else if (!strcmp(arg, "disable_tsc")) { | 1000 | } else if (!strcmp(arg, "disable_tsc")) { |
975 | clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); | 1001 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC); |
976 | disable_tsc = 1; | 1002 | disable_tsc = 1; |
977 | } else if (!strcmp(arg, "disable_mtrr")) { | 1003 | } else if (!strcmp(arg, "disable_mtrr")) { |
978 | clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability); | 1004 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_MTRR); |
979 | disable_mtrr = 1; | 1005 | disable_mtrr = 1; |
980 | } else if (!strcmp(arg, "disable_timer")) { | 1006 | } else if (!strcmp(arg, "disable_timer")) { |
981 | disable_vmi_timer = 1; | 1007 | disable_vmi_timer = 1; |
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index b1b5ab08b26e..a2b030780aa9 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c | |||
@@ -35,7 +35,6 @@ | |||
35 | #include <asm/i8253.h> | 35 | #include <asm/i8253.h> |
36 | 36 | ||
37 | #include <irq_vectors.h> | 37 | #include <irq_vectors.h> |
38 | #include "io_ports.h" | ||
39 | 38 | ||
40 | #define VMI_ONESHOT (VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) | 39 | #define VMI_ONESHOT (VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) |
41 | #define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) | 40 | #define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) |
@@ -238,7 +237,7 @@ static void __devinit vmi_time_init_clockevent(void) | |||
238 | void __init vmi_time_init(void) | 237 | void __init vmi_time_init(void) |
239 | { | 238 | { |
240 | /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */ | 239 | /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */ |
241 | outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ | 240 | outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ |
242 | 241 | ||
243 | vmi_time_init_clockevent(); | 242 | vmi_time_init_clockevent(); |
244 | setup_irq(0, &vmi_clock_action); | 243 | setup_irq(0, &vmi_clock_action); |
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 7d72cce00529..f1148ac8abe3 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S | |||
@@ -8,12 +8,6 @@ | |||
8 | * put it inside the section definition. | 8 | * put it inside the section definition. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | /* Don't define absolute symbols until and unless you know that symbol | ||
12 | * value is should remain constant even if kernel image is relocated | ||
13 | * at run time. Absolute symbols are not relocated. If symbol value should | ||
14 | * change if kernel is relocated, make the symbol section relative and | ||
15 | * put it inside the section definition. | ||
16 | */ | ||
17 | #define LOAD_OFFSET __PAGE_OFFSET | 11 | #define LOAD_OFFSET __PAGE_OFFSET |
18 | 12 | ||
19 | #include <asm-generic/vmlinux.lds.h> | 13 | #include <asm-generic/vmlinux.lds.h> |
@@ -44,6 +38,8 @@ SECTIONS | |||
44 | 38 | ||
45 | /* read-only */ | 39 | /* read-only */ |
46 | .text : AT(ADDR(.text) - LOAD_OFFSET) { | 40 | .text : AT(ADDR(.text) - LOAD_OFFSET) { |
41 | . = ALIGN(4096); /* not really needed, already page aligned */ | ||
42 | *(.text.page_aligned) | ||
47 | TEXT_TEXT | 43 | TEXT_TEXT |
48 | SCHED_TEXT | 44 | SCHED_TEXT |
49 | LOCK_TEXT | 45 | LOCK_TEXT |
@@ -131,10 +127,12 @@ SECTIONS | |||
131 | .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { | 127 | .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { |
132 | __init_begin = .; | 128 | __init_begin = .; |
133 | _sinittext = .; | 129 | _sinittext = .; |
134 | *(.init.text) | 130 | INIT_TEXT |
135 | _einittext = .; | 131 | _einittext = .; |
136 | } | 132 | } |
137 | .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) } | 133 | .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { |
134 | INIT_DATA | ||
135 | } | ||
138 | . = ALIGN(16); | 136 | . = ALIGN(16); |
139 | .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { | 137 | .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { |
140 | __setup_start = .; | 138 | __setup_start = .; |
@@ -169,8 +167,12 @@ SECTIONS | |||
169 | } | 167 | } |
170 | /* .exit.text is discard at runtime, not link time, to deal with references | 168 | /* .exit.text is discard at runtime, not link time, to deal with references |
171 | from .altinstructions and .eh_frame */ | 169 | from .altinstructions and .eh_frame */ |
172 | .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) } | 170 | .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { |
173 | .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } | 171 | EXIT_TEXT |
172 | } | ||
173 | .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { | ||
174 | EXIT_DATA | ||
175 | } | ||
174 | #if defined(CONFIG_BLK_DEV_INITRD) | 176 | #if defined(CONFIG_BLK_DEV_INITRD) |
175 | . = ALIGN(4096); | 177 | . = ALIGN(4096); |
176 | .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { | 178 | .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { |
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index ba8ea97abd21..0992b9946c6f 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S | |||
@@ -37,16 +37,15 @@ SECTIONS | |||
37 | KPROBES_TEXT | 37 | KPROBES_TEXT |
38 | *(.fixup) | 38 | *(.fixup) |
39 | *(.gnu.warning) | 39 | *(.gnu.warning) |
40 | } :text = 0x9090 | 40 | _etext = .; /* End of text section */ |
41 | /* out-of-line lock text */ | 41 | } :text = 0x9090 |
42 | .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) { *(.text.lock) } | ||
43 | |||
44 | _etext = .; /* End of text section */ | ||
45 | 42 | ||
46 | . = ALIGN(16); /* Exception table */ | 43 | . = ALIGN(16); /* Exception table */ |
47 | __start___ex_table = .; | 44 | __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { |
48 | __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } | 45 | __start___ex_table = .; |
49 | __stop___ex_table = .; | 46 | *(__ex_table) |
47 | __stop___ex_table = .; | ||
48 | } | ||
50 | 49 | ||
51 | NOTES :text :note | 50 | NOTES :text :note |
52 | 51 | ||
@@ -155,12 +154,15 @@ SECTIONS | |||
155 | __init_begin = .; | 154 | __init_begin = .; |
156 | .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { | 155 | .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { |
157 | _sinittext = .; | 156 | _sinittext = .; |
158 | *(.init.text) | 157 | INIT_TEXT |
159 | _einittext = .; | 158 | _einittext = .; |
160 | } | 159 | } |
161 | __initdata_begin = .; | 160 | .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { |
162 | .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) } | 161 | __initdata_begin = .; |
163 | __initdata_end = .; | 162 | INIT_DATA |
163 | __initdata_end = .; | ||
164 | } | ||
165 | |||
164 | . = ALIGN(16); | 166 | . = ALIGN(16); |
165 | __setup_start = .; | 167 | __setup_start = .; |
166 | .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) } | 168 | .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) } |
@@ -176,6 +178,14 @@ SECTIONS | |||
176 | } | 178 | } |
177 | __con_initcall_end = .; | 179 | __con_initcall_end = .; |
178 | SECURITY_INIT | 180 | SECURITY_INIT |
181 | |||
182 | . = ALIGN(8); | ||
183 | .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { | ||
184 | __parainstructions = .; | ||
185 | *(.parainstructions) | ||
186 | __parainstructions_end = .; | ||
187 | } | ||
188 | |||
179 | . = ALIGN(8); | 189 | . = ALIGN(8); |
180 | __alt_instructions = .; | 190 | __alt_instructions = .; |
181 | .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { | 191 | .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { |
@@ -187,8 +197,12 @@ SECTIONS | |||
187 | } | 197 | } |
188 | /* .exit.text is discard at runtime, not link time, to deal with references | 198 | /* .exit.text is discard at runtime, not link time, to deal with references |
189 | from .altinstructions and .eh_frame */ | 199 | from .altinstructions and .eh_frame */ |
190 | .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) } | 200 | .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { |
191 | .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } | 201 | EXIT_TEXT |
202 | } | ||
203 | .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { | ||
204 | EXIT_DATA | ||
205 | } | ||
192 | 206 | ||
193 | /* vdso blob that is mapped into user space */ | 207 | /* vdso blob that is mapped into user space */ |
194 | vdso_start = . ; | 208 | vdso_start = . ; |
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 414caf0c5f9a..d971210a6d36 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c | |||
@@ -25,21 +25,24 @@ static int __init vsmp_init(void) | |||
25 | return 0; | 25 | return 0; |
26 | 26 | ||
27 | /* Check if we are running on a ScaleMP vSMP box */ | 27 | /* Check if we are running on a ScaleMP vSMP box */ |
28 | if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) != PCI_VENDOR_ID_SCALEMP) || | 28 | if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) != |
29 | (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) != PCI_DEVICE_ID_SCALEMP_VSMP_CTL)) | 29 | PCI_VENDOR_ID_SCALEMP) || |
30 | (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) != | ||
31 | PCI_DEVICE_ID_SCALEMP_VSMP_CTL)) | ||
30 | return 0; | 32 | return 0; |
31 | 33 | ||
32 | /* set vSMP magic bits to indicate vSMP capable kernel */ | 34 | /* set vSMP magic bits to indicate vSMP capable kernel */ |
33 | address = ioremap(read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0), 8); | 35 | address = ioremap(read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0), 8); |
34 | cap = readl(address); | 36 | cap = readl(address); |
35 | ctl = readl(address + 4); | 37 | ctl = readl(address + 4); |
36 | printk("vSMP CTL: capabilities:0x%08x control:0x%08x\n", cap, ctl); | 38 | printk(KERN_INFO "vSMP CTL: capabilities:0x%08x control:0x%08x\n", |
39 | cap, ctl); | ||
37 | if (cap & ctl & (1 << 4)) { | 40 | if (cap & ctl & (1 << 4)) { |
38 | /* Turn on vSMP IRQ fastpath handling (see system.h) */ | 41 | /* Turn on vSMP IRQ fastpath handling (see system.h) */ |
39 | ctl &= ~(1 << 4); | 42 | ctl &= ~(1 << 4); |
40 | writel(ctl, address + 4); | 43 | writel(ctl, address + 4); |
41 | ctl = readl(address + 4); | 44 | ctl = readl(address + 4); |
42 | printk("vSMP CTL: control set to:0x%08x\n", ctl); | 45 | printk(KERN_INFO "vSMP CTL: control set to:0x%08x\n", ctl); |
43 | } | 46 | } |
44 | 47 | ||
45 | iounmap(address); | 48 | iounmap(address); |
diff --git a/arch/x86/kernel/vsyscall-int80_32.S b/arch/x86/kernel/vsyscall-int80_32.S deleted file mode 100644 index 103cab6aa7c0..000000000000 --- a/arch/x86/kernel/vsyscall-int80_32.S +++ /dev/null | |||
@@ -1,53 +0,0 @@ | |||
1 | /* | ||
2 | * Code for the vsyscall page. This version uses the old int $0x80 method. | ||
3 | * | ||
4 | * NOTE: | ||
5 | * 1) __kernel_vsyscall _must_ be first in this page. | ||
6 | * 2) there are alignment constraints on this stub, see vsyscall-sigreturn.S | ||
7 | * for details. | ||
8 | */ | ||
9 | |||
10 | .text | ||
11 | .globl __kernel_vsyscall | ||
12 | .type __kernel_vsyscall,@function | ||
13 | __kernel_vsyscall: | ||
14 | .LSTART_vsyscall: | ||
15 | int $0x80 | ||
16 | ret | ||
17 | .LEND_vsyscall: | ||
18 | .size __kernel_vsyscall,.-.LSTART_vsyscall | ||
19 | .previous | ||
20 | |||
21 | .section .eh_frame,"a",@progbits | ||
22 | .LSTARTFRAMEDLSI: | ||
23 | .long .LENDCIEDLSI-.LSTARTCIEDLSI | ||
24 | .LSTARTCIEDLSI: | ||
25 | .long 0 /* CIE ID */ | ||
26 | .byte 1 /* Version number */ | ||
27 | .string "zR" /* NUL-terminated augmentation string */ | ||
28 | .uleb128 1 /* Code alignment factor */ | ||
29 | .sleb128 -4 /* Data alignment factor */ | ||
30 | .byte 8 /* Return address register column */ | ||
31 | .uleb128 1 /* Augmentation value length */ | ||
32 | .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ | ||
33 | .byte 0x0c /* DW_CFA_def_cfa */ | ||
34 | .uleb128 4 | ||
35 | .uleb128 4 | ||
36 | .byte 0x88 /* DW_CFA_offset, column 0x8 */ | ||
37 | .uleb128 1 | ||
38 | .align 4 | ||
39 | .LENDCIEDLSI: | ||
40 | .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */ | ||
41 | .LSTARTFDEDLSI: | ||
42 | .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */ | ||
43 | .long .LSTART_vsyscall-. /* PC-relative start address */ | ||
44 | .long .LEND_vsyscall-.LSTART_vsyscall | ||
45 | .uleb128 0 | ||
46 | .align 4 | ||
47 | .LENDFDEDLSI: | ||
48 | .previous | ||
49 | |||
50 | /* | ||
51 | * Get the common code for the sigreturn entry points. | ||
52 | */ | ||
53 | #include "vsyscall-sigreturn_32.S" | ||
diff --git a/arch/x86/kernel/vsyscall-note_32.S b/arch/x86/kernel/vsyscall-note_32.S deleted file mode 100644 index fcf376a37f79..000000000000 --- a/arch/x86/kernel/vsyscall-note_32.S +++ /dev/null | |||
@@ -1,45 +0,0 @@ | |||
1 | /* | ||
2 | * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. | ||
3 | * Here we can supply some information useful to userland. | ||
4 | */ | ||
5 | |||
6 | #include <linux/version.h> | ||
7 | #include <linux/elfnote.h> | ||
8 | |||
9 | /* Ideally this would use UTS_NAME, but using a quoted string here | ||
10 | doesn't work. Remember to change this when changing the | ||
11 | kernel's name. */ | ||
12 | ELFNOTE_START(Linux, 0, "a") | ||
13 | .long LINUX_VERSION_CODE | ||
14 | ELFNOTE_END | ||
15 | |||
16 | #ifdef CONFIG_XEN | ||
17 | /* | ||
18 | * Add a special note telling glibc's dynamic linker a fake hardware | ||
19 | * flavor that it will use to choose the search path for libraries in the | ||
20 | * same way it uses real hardware capabilities like "mmx". | ||
21 | * We supply "nosegneg" as the fake capability, to indicate that we | ||
22 | * do not like negative offsets in instructions using segment overrides, | ||
23 | * since we implement those inefficiently. This makes it possible to | ||
24 | * install libraries optimized to avoid those access patterns in someplace | ||
25 | * like /lib/i686/tls/nosegneg. Note that an /etc/ld.so.conf.d/file | ||
26 | * corresponding to the bits here is needed to make ldconfig work right. | ||
27 | * It should contain: | ||
28 | * hwcap 1 nosegneg | ||
29 | * to match the mapping of bit to name that we give here. | ||
30 | * | ||
31 | * At runtime, the fake hardware feature will be considered to be present | ||
32 | * if its bit is set in the mask word. So, we start with the mask 0, and | ||
33 | * at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen. | ||
34 | */ | ||
35 | |||
36 | #include "../../x86/xen/vdso.h" /* Defines VDSO_NOTE_NONEGSEG_BIT. */ | ||
37 | |||
38 | .globl VDSO_NOTE_MASK | ||
39 | ELFNOTE_START(GNU, 2, "a") | ||
40 | .long 1 /* ncaps */ | ||
41 | VDSO_NOTE_MASK: | ||
42 | .long 0 /* mask */ | ||
43 | .byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */ | ||
44 | ELFNOTE_END | ||
45 | #endif | ||
diff --git a/arch/x86/kernel/vsyscall-sigreturn_32.S b/arch/x86/kernel/vsyscall-sigreturn_32.S deleted file mode 100644 index a92262f41659..000000000000 --- a/arch/x86/kernel/vsyscall-sigreturn_32.S +++ /dev/null | |||
@@ -1,143 +0,0 @@ | |||
1 | /* | ||
2 | * Common code for the sigreturn entry points on the vsyscall page. | ||
3 | * So far this code is the same for both int80 and sysenter versions. | ||
4 | * This file is #include'd by vsyscall-*.S to define them after the | ||
5 | * vsyscall entry point. The kernel assumes that the addresses of these | ||
6 | * routines are constant for all vsyscall implementations. | ||
7 | */ | ||
8 | |||
9 | #include <asm/unistd.h> | ||
10 | #include <asm/asm-offsets.h> | ||
11 | |||
12 | |||
13 | /* XXX | ||
14 | Should these be named "_sigtramp" or something? | ||
15 | */ | ||
16 | |||
17 | .text | ||
18 | .org __kernel_vsyscall+32,0x90 | ||
19 | .globl __kernel_sigreturn | ||
20 | .type __kernel_sigreturn,@function | ||
21 | __kernel_sigreturn: | ||
22 | .LSTART_sigreturn: | ||
23 | popl %eax /* XXX does this mean it needs unwind info? */ | ||
24 | movl $__NR_sigreturn, %eax | ||
25 | int $0x80 | ||
26 | .LEND_sigreturn: | ||
27 | .size __kernel_sigreturn,.-.LSTART_sigreturn | ||
28 | |||
29 | .balign 32 | ||
30 | .globl __kernel_rt_sigreturn | ||
31 | .type __kernel_rt_sigreturn,@function | ||
32 | __kernel_rt_sigreturn: | ||
33 | .LSTART_rt_sigreturn: | ||
34 | movl $__NR_rt_sigreturn, %eax | ||
35 | int $0x80 | ||
36 | .LEND_rt_sigreturn: | ||
37 | .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn | ||
38 | .balign 32 | ||
39 | .previous | ||
40 | |||
41 | .section .eh_frame,"a",@progbits | ||
42 | .LSTARTFRAMEDLSI1: | ||
43 | .long .LENDCIEDLSI1-.LSTARTCIEDLSI1 | ||
44 | .LSTARTCIEDLSI1: | ||
45 | .long 0 /* CIE ID */ | ||
46 | .byte 1 /* Version number */ | ||
47 | .string "zRS" /* NUL-terminated augmentation string */ | ||
48 | .uleb128 1 /* Code alignment factor */ | ||
49 | .sleb128 -4 /* Data alignment factor */ | ||
50 | .byte 8 /* Return address register column */ | ||
51 | .uleb128 1 /* Augmentation value length */ | ||
52 | .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ | ||
53 | .byte 0 /* DW_CFA_nop */ | ||
54 | .align 4 | ||
55 | .LENDCIEDLSI1: | ||
56 | .long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */ | ||
57 | .LSTARTFDEDLSI1: | ||
58 | .long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */ | ||
59 | /* HACK: The dwarf2 unwind routines will subtract 1 from the | ||
60 | return address to get an address in the middle of the | ||
61 | presumed call instruction. Since we didn't get here via | ||
62 | a call, we need to include the nop before the real start | ||
63 | to make up for it. */ | ||
64 | .long .LSTART_sigreturn-1-. /* PC-relative start address */ | ||
65 | .long .LEND_sigreturn-.LSTART_sigreturn+1 | ||
66 | .uleb128 0 /* Augmentation */ | ||
67 | /* What follows are the instructions for the table generation. | ||
68 | We record the locations of each register saved. This is | ||
69 | complicated by the fact that the "CFA" is always assumed to | ||
70 | be the value of the stack pointer in the caller. This means | ||
71 | that we must define the CFA of this body of code to be the | ||
72 | saved value of the stack pointer in the sigcontext. Which | ||
73 | also means that there is no fixed relation to the other | ||
74 | saved registers, which means that we must use DW_CFA_expression | ||
75 | to compute their addresses. It also means that when we | ||
76 | adjust the stack with the popl, we have to do it all over again. */ | ||
77 | |||
78 | #define do_cfa_expr(offset) \ | ||
79 | .byte 0x0f; /* DW_CFA_def_cfa_expression */ \ | ||
80 | .uleb128 1f-0f; /* length */ \ | ||
81 | 0: .byte 0x74; /* DW_OP_breg4 */ \ | ||
82 | .sleb128 offset; /* offset */ \ | ||
83 | .byte 0x06; /* DW_OP_deref */ \ | ||
84 | 1: | ||
85 | |||
86 | #define do_expr(regno, offset) \ | ||
87 | .byte 0x10; /* DW_CFA_expression */ \ | ||
88 | .uleb128 regno; /* regno */ \ | ||
89 | .uleb128 1f-0f; /* length */ \ | ||
90 | 0: .byte 0x74; /* DW_OP_breg4 */ \ | ||
91 | .sleb128 offset; /* offset */ \ | ||
92 | 1: | ||
93 | |||
94 | do_cfa_expr(SIGCONTEXT_esp+4) | ||
95 | do_expr(0, SIGCONTEXT_eax+4) | ||
96 | do_expr(1, SIGCONTEXT_ecx+4) | ||
97 | do_expr(2, SIGCONTEXT_edx+4) | ||
98 | do_expr(3, SIGCONTEXT_ebx+4) | ||
99 | do_expr(5, SIGCONTEXT_ebp+4) | ||
100 | do_expr(6, SIGCONTEXT_esi+4) | ||
101 | do_expr(7, SIGCONTEXT_edi+4) | ||
102 | do_expr(8, SIGCONTEXT_eip+4) | ||
103 | |||
104 | .byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */ | ||
105 | |||
106 | do_cfa_expr(SIGCONTEXT_esp) | ||
107 | do_expr(0, SIGCONTEXT_eax) | ||
108 | do_expr(1, SIGCONTEXT_ecx) | ||
109 | do_expr(2, SIGCONTEXT_edx) | ||
110 | do_expr(3, SIGCONTEXT_ebx) | ||
111 | do_expr(5, SIGCONTEXT_ebp) | ||
112 | do_expr(6, SIGCONTEXT_esi) | ||
113 | do_expr(7, SIGCONTEXT_edi) | ||
114 | do_expr(8, SIGCONTEXT_eip) | ||
115 | |||
116 | .align 4 | ||
117 | .LENDFDEDLSI1: | ||
118 | |||
119 | .long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */ | ||
120 | .LSTARTFDEDLSI2: | ||
121 | .long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */ | ||
122 | /* HACK: See above wrt unwind library assumptions. */ | ||
123 | .long .LSTART_rt_sigreturn-1-. /* PC-relative start address */ | ||
124 | .long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1 | ||
125 | .uleb128 0 /* Augmentation */ | ||
126 | /* What follows are the instructions for the table generation. | ||
127 | We record the locations of each register saved. This is | ||
128 | slightly less complicated than the above, since we don't | ||
129 | modify the stack pointer in the process. */ | ||
130 | |||
131 | do_cfa_expr(RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_esp) | ||
132 | do_expr(0, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_eax) | ||
133 | do_expr(1, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ecx) | ||
134 | do_expr(2, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_edx) | ||
135 | do_expr(3, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ebx) | ||
136 | do_expr(5, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ebp) | ||
137 | do_expr(6, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_esi) | ||
138 | do_expr(7, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_edi) | ||
139 | do_expr(8, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_eip) | ||
140 | |||
141 | .align 4 | ||
142 | .LENDFDEDLSI2: | ||
143 | .previous | ||
diff --git a/arch/x86/kernel/vsyscall-sysenter_32.S b/arch/x86/kernel/vsyscall-sysenter_32.S deleted file mode 100644 index ed879bf42995..000000000000 --- a/arch/x86/kernel/vsyscall-sysenter_32.S +++ /dev/null | |||
@@ -1,122 +0,0 @@ | |||
1 | /* | ||
2 | * Code for the vsyscall page. This version uses the sysenter instruction. | ||
3 | * | ||
4 | * NOTE: | ||
5 | * 1) __kernel_vsyscall _must_ be first in this page. | ||
6 | * 2) there are alignment constraints on this stub, see vsyscall-sigreturn.S | ||
7 | * for details. | ||
8 | */ | ||
9 | |||
10 | /* | ||
11 | * The caller puts arg2 in %ecx, which gets pushed. The kernel will use | ||
12 | * %ecx itself for arg2. The pushing is because the sysexit instruction | ||
13 | * (found in entry.S) requires that we clobber %ecx with the desired %esp. | ||
14 | * User code might expect that %ecx is unclobbered though, as it would be | ||
15 | * for returning via the iret instruction, so we must push and pop. | ||
16 | * | ||
17 | * The caller puts arg3 in %edx, which the sysexit instruction requires | ||
18 | * for %eip. Thus, exactly as for arg2, we must push and pop. | ||
19 | * | ||
20 | * Arg6 is different. The caller puts arg6 in %ebp. Since the sysenter | ||
21 | * instruction clobbers %esp, the user's %esp won't even survive entry | ||
22 | * into the kernel. We store %esp in %ebp. Code in entry.S must fetch | ||
23 | * arg6 from the stack. | ||
24 | * | ||
25 | * You can not use this vsyscall for the clone() syscall because the | ||
26 | * three dwords on the parent stack do not get copied to the child. | ||
27 | */ | ||
28 | .text | ||
29 | .globl __kernel_vsyscall | ||
30 | .type __kernel_vsyscall,@function | ||
31 | __kernel_vsyscall: | ||
32 | .LSTART_vsyscall: | ||
33 | push %ecx | ||
34 | .Lpush_ecx: | ||
35 | push %edx | ||
36 | .Lpush_edx: | ||
37 | push %ebp | ||
38 | .Lenter_kernel: | ||
39 | movl %esp,%ebp | ||
40 | sysenter | ||
41 | |||
42 | /* 7: align return point with nop's to make disassembly easier */ | ||
43 | .space 7,0x90 | ||
44 | |||
45 | /* 14: System call restart point is here! (SYSENTER_RETURN-2) */ | ||
46 | jmp .Lenter_kernel | ||
47 | /* 16: System call normal return point is here! */ | ||
48 | .globl SYSENTER_RETURN /* Symbol used by sysenter.c */ | ||
49 | SYSENTER_RETURN: | ||
50 | pop %ebp | ||
51 | .Lpop_ebp: | ||
52 | pop %edx | ||
53 | .Lpop_edx: | ||
54 | pop %ecx | ||
55 | .Lpop_ecx: | ||
56 | ret | ||
57 | .LEND_vsyscall: | ||
58 | .size __kernel_vsyscall,.-.LSTART_vsyscall | ||
59 | .previous | ||
60 | |||
61 | .section .eh_frame,"a",@progbits | ||
62 | .LSTARTFRAMEDLSI: | ||
63 | .long .LENDCIEDLSI-.LSTARTCIEDLSI | ||
64 | .LSTARTCIEDLSI: | ||
65 | .long 0 /* CIE ID */ | ||
66 | .byte 1 /* Version number */ | ||
67 | .string "zR" /* NUL-terminated augmentation string */ | ||
68 | .uleb128 1 /* Code alignment factor */ | ||
69 | .sleb128 -4 /* Data alignment factor */ | ||
70 | .byte 8 /* Return address register column */ | ||
71 | .uleb128 1 /* Augmentation value length */ | ||
72 | .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ | ||
73 | .byte 0x0c /* DW_CFA_def_cfa */ | ||
74 | .uleb128 4 | ||
75 | .uleb128 4 | ||
76 | .byte 0x88 /* DW_CFA_offset, column 0x8 */ | ||
77 | .uleb128 1 | ||
78 | .align 4 | ||
79 | .LENDCIEDLSI: | ||
80 | .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */ | ||
81 | .LSTARTFDEDLSI: | ||
82 | .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */ | ||
83 | .long .LSTART_vsyscall-. /* PC-relative start address */ | ||
84 | .long .LEND_vsyscall-.LSTART_vsyscall | ||
85 | .uleb128 0 | ||
86 | /* What follows are the instructions for the table generation. | ||
87 | We have to record all changes of the stack pointer. */ | ||
88 | .byte 0x04 /* DW_CFA_advance_loc4 */ | ||
89 | .long .Lpush_ecx-.LSTART_vsyscall | ||
90 | .byte 0x0e /* DW_CFA_def_cfa_offset */ | ||
91 | .byte 0x08 /* RA at offset 8 now */ | ||
92 | .byte 0x04 /* DW_CFA_advance_loc4 */ | ||
93 | .long .Lpush_edx-.Lpush_ecx | ||
94 | .byte 0x0e /* DW_CFA_def_cfa_offset */ | ||
95 | .byte 0x0c /* RA at offset 12 now */ | ||
96 | .byte 0x04 /* DW_CFA_advance_loc4 */ | ||
97 | .long .Lenter_kernel-.Lpush_edx | ||
98 | .byte 0x0e /* DW_CFA_def_cfa_offset */ | ||
99 | .byte 0x10 /* RA at offset 16 now */ | ||
100 | .byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */ | ||
101 | /* Finally the epilogue. */ | ||
102 | .byte 0x04 /* DW_CFA_advance_loc4 */ | ||
103 | .long .Lpop_ebp-.Lenter_kernel | ||
104 | .byte 0x0e /* DW_CFA_def_cfa_offset */ | ||
105 | .byte 0x0c /* RA at offset 12 now */ | ||
106 | .byte 0xc5 /* DW_CFA_restore %ebp */ | ||
107 | .byte 0x04 /* DW_CFA_advance_loc4 */ | ||
108 | .long .Lpop_edx-.Lpop_ebp | ||
109 | .byte 0x0e /* DW_CFA_def_cfa_offset */ | ||
110 | .byte 0x08 /* RA at offset 8 now */ | ||
111 | .byte 0x04 /* DW_CFA_advance_loc4 */ | ||
112 | .long .Lpop_ecx-.Lpop_edx | ||
113 | .byte 0x0e /* DW_CFA_def_cfa_offset */ | ||
114 | .byte 0x04 /* RA at offset 4 now */ | ||
115 | .align 4 | ||
116 | .LENDFDEDLSI: | ||
117 | .previous | ||
118 | |||
119 | /* | ||
120 | * Get the common code for the sigreturn entry points. | ||
121 | */ | ||
122 | #include "vsyscall-sigreturn_32.S" | ||
diff --git a/arch/x86/kernel/vsyscall_32.S b/arch/x86/kernel/vsyscall_32.S deleted file mode 100644 index a5ab3dc4fd25..000000000000 --- a/arch/x86/kernel/vsyscall_32.S +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | #include <linux/init.h> | ||
2 | |||
3 | __INITDATA | ||
4 | |||
5 | .globl vsyscall_int80_start, vsyscall_int80_end | ||
6 | vsyscall_int80_start: | ||
7 | .incbin "arch/x86/kernel/vsyscall-int80_32.so" | ||
8 | vsyscall_int80_end: | ||
9 | |||
10 | .globl vsyscall_sysenter_start, vsyscall_sysenter_end | ||
11 | vsyscall_sysenter_start: | ||
12 | .incbin "arch/x86/kernel/vsyscall-sysenter_32.so" | ||
13 | vsyscall_sysenter_end: | ||
14 | |||
15 | __FINIT | ||
diff --git a/arch/x86/kernel/vsyscall_32.lds.S b/arch/x86/kernel/vsyscall_32.lds.S deleted file mode 100644 index 4a8b0ed9b8fb..000000000000 --- a/arch/x86/kernel/vsyscall_32.lds.S +++ /dev/null | |||
@@ -1,67 +0,0 @@ | |||
1 | /* | ||
2 | * Linker script for vsyscall DSO. The vsyscall page is an ELF shared | ||
3 | * object prelinked to its virtual address, and with only one read-only | ||
4 | * segment (that fits in one page). This script controls its layout. | ||
5 | */ | ||
6 | #include <asm/asm-offsets.h> | ||
7 | |||
8 | SECTIONS | ||
9 | { | ||
10 | . = VDSO_PRELINK_asm + SIZEOF_HEADERS; | ||
11 | |||
12 | .hash : { *(.hash) } :text | ||
13 | .gnu.hash : { *(.gnu.hash) } | ||
14 | .dynsym : { *(.dynsym) } | ||
15 | .dynstr : { *(.dynstr) } | ||
16 | .gnu.version : { *(.gnu.version) } | ||
17 | .gnu.version_d : { *(.gnu.version_d) } | ||
18 | .gnu.version_r : { *(.gnu.version_r) } | ||
19 | |||
20 | /* This linker script is used both with -r and with -shared. | ||
21 | For the layouts to match, we need to skip more than enough | ||
22 | space for the dynamic symbol table et al. If this amount | ||
23 | is insufficient, ld -shared will barf. Just increase it here. */ | ||
24 | . = VDSO_PRELINK_asm + 0x400; | ||
25 | |||
26 | .text : { *(.text) } :text =0x90909090 | ||
27 | .note : { *(.note.*) } :text :note | ||
28 | .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr | ||
29 | .eh_frame : { KEEP (*(.eh_frame)) } :text | ||
30 | .dynamic : { *(.dynamic) } :text :dynamic | ||
31 | .useless : { | ||
32 | *(.got.plt) *(.got) | ||
33 | *(.data .data.* .gnu.linkonce.d.*) | ||
34 | *(.dynbss) | ||
35 | *(.bss .bss.* .gnu.linkonce.b.*) | ||
36 | } :text | ||
37 | } | ||
38 | |||
39 | /* | ||
40 | * We must supply the ELF program headers explicitly to get just one | ||
41 | * PT_LOAD segment, and set the flags explicitly to make segments read-only. | ||
42 | */ | ||
43 | PHDRS | ||
44 | { | ||
45 | text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ | ||
46 | dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ | ||
47 | note PT_NOTE FLAGS(4); /* PF_R */ | ||
48 | eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ | ||
49 | } | ||
50 | |||
51 | /* | ||
52 | * This controls what symbols we export from the DSO. | ||
53 | */ | ||
54 | VERSION | ||
55 | { | ||
56 | LINUX_2.5 { | ||
57 | global: | ||
58 | __kernel_vsyscall; | ||
59 | __kernel_sigreturn; | ||
60 | __kernel_rt_sigreturn; | ||
61 | |||
62 | local: *; | ||
63 | }; | ||
64 | } | ||
65 | |||
66 | /* The ELF entry point can be used to set the AT_SYSINFO value. */ | ||
67 | ENTRY(__kernel_vsyscall); | ||
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index ad4005c6d4a1..3f8242774580 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -43,7 +43,7 @@ | |||
43 | #include <asm/vgtod.h> | 43 | #include <asm/vgtod.h> |
44 | 44 | ||
45 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) | 45 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) |
46 | #define __syscall_clobber "r11","rcx","memory" | 46 | #define __syscall_clobber "r11","cx","memory" |
47 | #define __pa_vsymbol(x) \ | 47 | #define __pa_vsymbol(x) \ |
48 | ({unsigned long v; \ | 48 | ({unsigned long v; \ |
49 | extern char __vsyscall_0; \ | 49 | extern char __vsyscall_0; \ |
@@ -190,7 +190,7 @@ time_t __vsyscall(1) vtime(time_t *t) | |||
190 | long __vsyscall(2) | 190 | long __vsyscall(2) |
191 | vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) | 191 | vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) |
192 | { | 192 | { |
193 | unsigned int dummy, p; | 193 | unsigned int p; |
194 | unsigned long j = 0; | 194 | unsigned long j = 0; |
195 | 195 | ||
196 | /* Fast cache - only recompute value once per jiffies and avoid | 196 | /* Fast cache - only recompute value once per jiffies and avoid |
@@ -205,7 +205,7 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) | |||
205 | p = tcache->blob[1]; | 205 | p = tcache->blob[1]; |
206 | } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { | 206 | } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { |
207 | /* Load per CPU data from RDTSCP */ | 207 | /* Load per CPU data from RDTSCP */ |
208 | rdtscp(dummy, dummy, p); | 208 | native_read_tscp(&p); |
209 | } else { | 209 | } else { |
210 | /* Load per CPU data from GDT */ | 210 | /* Load per CPU data from GDT */ |
211 | asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); | 211 | asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); |
@@ -297,7 +297,7 @@ static void __cpuinit vsyscall_set_cpu(int cpu) | |||
297 | /* Store cpu number in limit so that it can be loaded quickly | 297 | /* Store cpu number in limit so that it can be loaded quickly |
298 | in user space in vgetcpu. | 298 | in user space in vgetcpu. |
299 | 12 bits for the CPU and 8 bits for the node. */ | 299 | 12 bits for the CPU and 8 bits for the node. */ |
300 | d = (unsigned long *)(cpu_gdt(cpu) + GDT_ENTRY_PER_CPU); | 300 | d = (unsigned long *)(get_cpu_gdt_table(cpu) + GDT_ENTRY_PER_CPU); |
301 | *d = 0x0f40000000000ULL; | 301 | *d = 0x0f40000000000ULL; |
302 | *d |= cpu; | 302 | *d |= cpu; |
303 | *d |= (node & 0xf) << 12; | 303 | *d |= (node & 0xf) << 12; |
@@ -319,7 +319,7 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) | |||
319 | return NOTIFY_DONE; | 319 | return NOTIFY_DONE; |
320 | } | 320 | } |
321 | 321 | ||
322 | static void __init map_vsyscall(void) | 322 | void __init map_vsyscall(void) |
323 | { | 323 | { |
324 | extern char __vsyscall_0; | 324 | extern char __vsyscall_0; |
325 | unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); | 325 | unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); |
@@ -335,7 +335,6 @@ static int __init vsyscall_init(void) | |||
335 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); | 335 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); |
336 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); | 336 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); |
337 | BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu)); | 337 | BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu)); |
338 | map_vsyscall(); | ||
339 | #ifdef CONFIG_SYSCTL | 338 | #ifdef CONFIG_SYSCTL |
340 | register_sysctl_table(kernel_root_table2); | 339 | register_sysctl_table(kernel_root_table2); |
341 | #endif | 340 | #endif |
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 77c25b307635..a66e9c1a0537 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <asm/processor.h> | 8 | #include <asm/processor.h> |
9 | #include <asm/uaccess.h> | 9 | #include <asm/uaccess.h> |
10 | #include <asm/pgtable.h> | 10 | #include <asm/pgtable.h> |
11 | #include <asm/desc.h> | ||
11 | 12 | ||
12 | EXPORT_SYMBOL(kernel_thread); | 13 | EXPORT_SYMBOL(kernel_thread); |
13 | 14 | ||
@@ -34,13 +35,6 @@ EXPORT_SYMBOL(__copy_from_user_inatomic); | |||
34 | EXPORT_SYMBOL(copy_page); | 35 | EXPORT_SYMBOL(copy_page); |
35 | EXPORT_SYMBOL(clear_page); | 36 | EXPORT_SYMBOL(clear_page); |
36 | 37 | ||
37 | #ifdef CONFIG_SMP | ||
38 | extern void __write_lock_failed(rwlock_t *rw); | ||
39 | extern void __read_lock_failed(rwlock_t *rw); | ||
40 | EXPORT_SYMBOL(__write_lock_failed); | ||
41 | EXPORT_SYMBOL(__read_lock_failed); | ||
42 | #endif | ||
43 | |||
44 | /* Export string functions. We normally rely on gcc builtin for most of these, | 38 | /* Export string functions. We normally rely on gcc builtin for most of these, |
45 | but gcc sometimes decides not to inline them. */ | 39 | but gcc sometimes decides not to inline them. */ |
46 | #undef memcpy | 40 | #undef memcpy |
@@ -60,3 +54,8 @@ EXPORT_SYMBOL(init_level4_pgt); | |||
60 | EXPORT_SYMBOL(load_gs_index); | 54 | EXPORT_SYMBOL(load_gs_index); |
61 | 55 | ||
62 | EXPORT_SYMBOL(_proxy_pda); | 56 | EXPORT_SYMBOL(_proxy_pda); |
57 | |||
58 | #ifdef CONFIG_PARAVIRT | ||
59 | /* Virtualized guests may want to use it */ | ||
60 | EXPORT_SYMBOL_GPL(cpu_gdt_descr); | ||
61 | #endif | ||