diff options
Diffstat (limited to 'arch/x86/kernel')
144 files changed, 10970 insertions, 7994 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index e489ff9cb3e2..24f357e7557a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -12,6 +12,7 @@ CFLAGS_REMOVE_tsc.o = -pg | |||
12 | CFLAGS_REMOVE_rtc.o = -pg | 12 | CFLAGS_REMOVE_rtc.o = -pg |
13 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg | 13 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg |
14 | CFLAGS_REMOVE_ftrace.o = -pg | 14 | CFLAGS_REMOVE_ftrace.o = -pg |
15 | CFLAGS_REMOVE_early_printk.o = -pg | ||
15 | endif | 16 | endif |
16 | 17 | ||
17 | # | 18 | # |
@@ -22,13 +23,14 @@ nostackp := $(call cc-option, -fno-stack-protector) | |||
22 | CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) | 23 | CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) |
23 | CFLAGS_hpet.o := $(nostackp) | 24 | CFLAGS_hpet.o := $(nostackp) |
24 | CFLAGS_tsc.o := $(nostackp) | 25 | CFLAGS_tsc.o := $(nostackp) |
26 | CFLAGS_paravirt.o := $(nostackp) | ||
25 | 27 | ||
26 | obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o | 28 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o |
27 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | 29 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
28 | obj-y += time_$(BITS).o ioport.o ldt.o | 30 | obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o |
29 | obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o | 31 | obj-y += setup.o i8259.o irqinit_$(BITS).o |
30 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o | 32 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o |
31 | obj-$(CONFIG_X86_32) += probe_roms_32.o | 33 | obj-$(CONFIG_X86_32) += probe_32.o probe_roms_32.o |
32 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 34 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
33 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 35 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
34 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o | 36 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o |
@@ -41,36 +43,38 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | |||
41 | obj-y += process.o | 43 | obj-y += process.o |
42 | obj-y += i387.o xsave.o | 44 | obj-y += i387.o xsave.o |
43 | obj-y += ptrace.o | 45 | obj-y += ptrace.o |
44 | obj-y += ds.o | 46 | obj-$(CONFIG_X86_DS) += ds.o |
45 | obj-$(CONFIG_X86_32) += tls.o | 47 | obj-$(CONFIG_X86_32) += tls.o |
46 | obj-$(CONFIG_IA32_EMULATION) += tls.o | 48 | obj-$(CONFIG_IA32_EMULATION) += tls.o |
47 | obj-y += step.o | 49 | obj-y += step.o |
48 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 50 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
49 | obj-y += cpu/ | 51 | obj-y += cpu/ |
50 | obj-y += acpi/ | 52 | obj-y += acpi/ |
51 | obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o | 53 | obj-y += reboot.o |
52 | obj-$(CONFIG_MCA) += mca_32.o | 54 | obj-$(CONFIG_MCA) += mca_32.o |
53 | obj-$(CONFIG_X86_MSR) += msr.o | 55 | obj-$(CONFIG_X86_MSR) += msr.o |
54 | obj-$(CONFIG_X86_CPUID) += cpuid.o | 56 | obj-$(CONFIG_X86_CPUID) += cpuid.o |
55 | obj-$(CONFIG_PCI) += early-quirks.o | 57 | obj-$(CONFIG_PCI) += early-quirks.o |
56 | apm-y := apm_32.o | 58 | apm-y := apm_32.o |
57 | obj-$(CONFIG_APM) += apm.o | 59 | obj-$(CONFIG_APM) += apm.o |
58 | obj-$(CONFIG_X86_SMP) += smp.o | 60 | obj-$(CONFIG_SMP) += smp.o |
59 | obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o | 61 | obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o ipi.o |
60 | obj-$(CONFIG_X86_32_SMP) += smpcommon.o | 62 | obj-$(CONFIG_SMP) += setup_percpu.o |
61 | obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o | 63 | obj-$(CONFIG_X86_64_SMP) += tsc_sync.o |
62 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o | 64 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o |
63 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o | 65 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o |
64 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o | 66 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o ipi.o |
65 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o | 67 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o |
66 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o | 68 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o |
67 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o | 69 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o |
70 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o | ||
68 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o | 71 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o |
69 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o | 72 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o |
70 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o | 73 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o |
74 | obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o | ||
71 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o | 75 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o |
72 | obj-$(CONFIG_X86_ES7000) += es7000_32.o | 76 | obj-$(CONFIG_X86_ES7000) += es7000_32.o |
73 | obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o | 77 | obj-$(CONFIG_X86_SUMMIT) += summit_32.o |
74 | obj-y += vsmp_64.o | 78 | obj-y += vsmp_64.o |
75 | obj-$(CONFIG_KPROBES) += kprobes.o | 79 | obj-$(CONFIG_KPROBES) += kprobes.o |
76 | obj-$(CONFIG_MODULES) += module_$(BITS).o | 80 | obj-$(CONFIG_MODULES) += module_$(BITS).o |
@@ -105,20 +109,24 @@ microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o | |||
105 | microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o | 109 | microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o |
106 | obj-$(CONFIG_MICROCODE) += microcode.o | 110 | obj-$(CONFIG_MICROCODE) += microcode.o |
107 | 111 | ||
112 | obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o | ||
113 | |||
114 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64 | ||
115 | |||
108 | ### | 116 | ### |
109 | # 64 bit specific files | 117 | # 64 bit specific files |
110 | ifeq ($(CONFIG_X86_64),y) | 118 | ifeq ($(CONFIG_X86_64),y) |
111 | obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o | 119 | obj-y += genapic_64.o genapic_flat_64.o |
112 | obj-y += bios_uv.o uv_irq.o uv_sysfs.o | ||
113 | obj-y += genx2apic_cluster.o | 120 | obj-y += genx2apic_cluster.o |
114 | obj-y += genx2apic_phys.o | 121 | obj-y += genx2apic_phys.o |
122 | obj-$(CONFIG_X86_UV) += genx2apic_uv_x.o tlb_uv.o | ||
123 | obj-$(CONFIG_X86_UV) += bios_uv.o uv_irq.o uv_sysfs.o | ||
115 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o | 124 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o |
116 | obj-$(CONFIG_AUDIT) += audit_64.o | 125 | obj-$(CONFIG_AUDIT) += audit_64.o |
117 | 126 | ||
118 | obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o | 127 | obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o |
119 | obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o | 128 | obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o |
120 | obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o | 129 | obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o |
121 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o | ||
122 | 130 | ||
123 | obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o | 131 | obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o |
124 | endif | 132 | endif |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8c1f76abae9e..956c1dee6fbe 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -42,12 +42,8 @@ | |||
42 | #include <asm/mpspec.h> | 42 | #include <asm/mpspec.h> |
43 | #include <asm/smp.h> | 43 | #include <asm/smp.h> |
44 | 44 | ||
45 | #ifdef CONFIG_X86_LOCAL_APIC | ||
46 | # include <mach_apic.h> | ||
47 | #endif | ||
48 | |||
49 | static int __initdata acpi_force = 0; | 45 | static int __initdata acpi_force = 0; |
50 | 46 | u32 acpi_rsdt_forced; | |
51 | #ifdef CONFIG_ACPI | 47 | #ifdef CONFIG_ACPI |
52 | int acpi_disabled = 0; | 48 | int acpi_disabled = 0; |
53 | #else | 49 | #else |
@@ -56,16 +52,7 @@ int acpi_disabled = 1; | |||
56 | EXPORT_SYMBOL(acpi_disabled); | 52 | EXPORT_SYMBOL(acpi_disabled); |
57 | 53 | ||
58 | #ifdef CONFIG_X86_64 | 54 | #ifdef CONFIG_X86_64 |
59 | 55 | # include <asm/proto.h> | |
60 | #include <asm/proto.h> | ||
61 | |||
62 | #else /* X86 */ | ||
63 | |||
64 | #ifdef CONFIG_X86_LOCAL_APIC | ||
65 | #include <mach_apic.h> | ||
66 | #include <mach_mpparse.h> | ||
67 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
68 | |||
69 | #endif /* X86 */ | 56 | #endif /* X86 */ |
70 | 57 | ||
71 | #define BAD_MADT_ENTRY(entry, end) ( \ | 58 | #define BAD_MADT_ENTRY(entry, end) ( \ |
@@ -121,35 +108,18 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; | |||
121 | */ | 108 | */ |
122 | char *__init __acpi_map_table(unsigned long phys, unsigned long size) | 109 | char *__init __acpi_map_table(unsigned long phys, unsigned long size) |
123 | { | 110 | { |
124 | unsigned long base, offset, mapped_size; | ||
125 | int idx; | ||
126 | 111 | ||
127 | if (!phys || !size) | 112 | if (!phys || !size) |
128 | return NULL; | 113 | return NULL; |
129 | 114 | ||
130 | if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT)) | 115 | return early_ioremap(phys, size); |
131 | return __va(phys); | 116 | } |
132 | 117 | void __init __acpi_unmap_table(char *map, unsigned long size) | |
133 | offset = phys & (PAGE_SIZE - 1); | 118 | { |
134 | mapped_size = PAGE_SIZE - offset; | 119 | if (!map || !size) |
135 | clear_fixmap(FIX_ACPI_END); | 120 | return; |
136 | set_fixmap(FIX_ACPI_END, phys); | ||
137 | base = fix_to_virt(FIX_ACPI_END); | ||
138 | |||
139 | /* | ||
140 | * Most cases can be covered by the below. | ||
141 | */ | ||
142 | idx = FIX_ACPI_END; | ||
143 | while (mapped_size < size) { | ||
144 | if (--idx < FIX_ACPI_BEGIN) | ||
145 | return NULL; /* cannot handle this */ | ||
146 | phys += PAGE_SIZE; | ||
147 | clear_fixmap(idx); | ||
148 | set_fixmap(idx, phys); | ||
149 | mapped_size += PAGE_SIZE; | ||
150 | } | ||
151 | 121 | ||
152 | return ((unsigned char *)base + offset); | 122 | early_iounmap(map, size); |
153 | } | 123 | } |
154 | 124 | ||
155 | #ifdef CONFIG_PCI_MMCONFIG | 125 | #ifdef CONFIG_PCI_MMCONFIG |
@@ -239,7 +209,8 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) | |||
239 | madt->address); | 209 | madt->address); |
240 | } | 210 | } |
241 | 211 | ||
242 | acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id); | 212 | default_acpi_madt_oem_check(madt->header.oem_id, |
213 | madt->header.oem_table_id); | ||
243 | 214 | ||
244 | return 0; | 215 | return 0; |
245 | } | 216 | } |
@@ -538,9 +509,10 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) | |||
538 | struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; | 509 | struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; |
539 | union acpi_object *obj; | 510 | union acpi_object *obj; |
540 | struct acpi_madt_local_apic *lapic; | 511 | struct acpi_madt_local_apic *lapic; |
541 | cpumask_t tmp_map, new_map; | 512 | cpumask_var_t tmp_map, new_map; |
542 | u8 physid; | 513 | u8 physid; |
543 | int cpu; | 514 | int cpu; |
515 | int retval = -ENOMEM; | ||
544 | 516 | ||
545 | if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) | 517 | if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) |
546 | return -EINVAL; | 518 | return -EINVAL; |
@@ -569,23 +541,37 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) | |||
569 | buffer.length = ACPI_ALLOCATE_BUFFER; | 541 | buffer.length = ACPI_ALLOCATE_BUFFER; |
570 | buffer.pointer = NULL; | 542 | buffer.pointer = NULL; |
571 | 543 | ||
572 | tmp_map = cpu_present_map; | 544 | if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL)) |
545 | goto out; | ||
546 | |||
547 | if (!alloc_cpumask_var(&new_map, GFP_KERNEL)) | ||
548 | goto free_tmp_map; | ||
549 | |||
550 | cpumask_copy(tmp_map, cpu_present_mask); | ||
573 | acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED); | 551 | acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED); |
574 | 552 | ||
575 | /* | 553 | /* |
576 | * If mp_register_lapic successfully generates a new logical cpu | 554 | * If mp_register_lapic successfully generates a new logical cpu |
577 | * number, then the following will get us exactly what was mapped | 555 | * number, then the following will get us exactly what was mapped |
578 | */ | 556 | */ |
579 | cpus_andnot(new_map, cpu_present_map, tmp_map); | 557 | cpumask_andnot(new_map, cpu_present_mask, tmp_map); |
580 | if (cpus_empty(new_map)) { | 558 | if (cpumask_empty(new_map)) { |
581 | printk ("Unable to map lapic to logical cpu number\n"); | 559 | printk ("Unable to map lapic to logical cpu number\n"); |
582 | return -EINVAL; | 560 | retval = -EINVAL; |
561 | goto free_new_map; | ||
583 | } | 562 | } |
584 | 563 | ||
585 | cpu = first_cpu(new_map); | 564 | cpu = cpumask_first(new_map); |
586 | 565 | ||
587 | *pcpu = cpu; | 566 | *pcpu = cpu; |
588 | return 0; | 567 | retval = 0; |
568 | |||
569 | free_new_map: | ||
570 | free_cpumask_var(new_map); | ||
571 | free_tmp_map: | ||
572 | free_cpumask_var(tmp_map); | ||
573 | out: | ||
574 | return retval; | ||
589 | } | 575 | } |
590 | 576 | ||
591 | /* wrapper to silence section mismatch warning */ | 577 | /* wrapper to silence section mismatch warning */ |
@@ -598,7 +584,7 @@ EXPORT_SYMBOL(acpi_map_lsapic); | |||
598 | int acpi_unmap_lsapic(int cpu) | 584 | int acpi_unmap_lsapic(int cpu) |
599 | { | 585 | { |
600 | per_cpu(x86_cpu_to_apicid, cpu) = -1; | 586 | per_cpu(x86_cpu_to_apicid, cpu) = -1; |
601 | cpu_clear(cpu, cpu_present_map); | 587 | set_cpu_present(cpu, false); |
602 | num_processors--; | 588 | num_processors--; |
603 | 589 | ||
604 | return (0); | 590 | return (0); |
@@ -869,7 +855,7 @@ static struct { | |||
869 | DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); | 855 | DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); |
870 | } mp_ioapic_routing[MAX_IO_APICS]; | 856 | } mp_ioapic_routing[MAX_IO_APICS]; |
871 | 857 | ||
872 | static int mp_find_ioapic(int gsi) | 858 | int mp_find_ioapic(int gsi) |
873 | { | 859 | { |
874 | int i = 0; | 860 | int i = 0; |
875 | 861 | ||
@@ -884,6 +870,16 @@ static int mp_find_ioapic(int gsi) | |||
884 | return -1; | 870 | return -1; |
885 | } | 871 | } |
886 | 872 | ||
873 | int mp_find_ioapic_pin(int ioapic, int gsi) | ||
874 | { | ||
875 | if (WARN_ON(ioapic == -1)) | ||
876 | return -1; | ||
877 | if (WARN_ON(gsi > mp_ioapic_routing[ioapic].gsi_end)) | ||
878 | return -1; | ||
879 | |||
880 | return gsi - mp_ioapic_routing[ioapic].gsi_base; | ||
881 | } | ||
882 | |||
887 | static u8 __init uniq_ioapic_id(u8 id) | 883 | static u8 __init uniq_ioapic_id(u8 id) |
888 | { | 884 | { |
889 | #ifdef CONFIG_X86_32 | 885 | #ifdef CONFIG_X86_32 |
@@ -897,8 +893,8 @@ static u8 __init uniq_ioapic_id(u8 id) | |||
897 | DECLARE_BITMAP(used, 256); | 893 | DECLARE_BITMAP(used, 256); |
898 | bitmap_zero(used, 256); | 894 | bitmap_zero(used, 256); |
899 | for (i = 0; i < nr_ioapics; i++) { | 895 | for (i = 0; i < nr_ioapics; i++) { |
900 | struct mp_config_ioapic *ia = &mp_ioapics[i]; | 896 | struct mpc_ioapic *ia = &mp_ioapics[i]; |
901 | __set_bit(ia->mp_apicid, used); | 897 | __set_bit(ia->apicid, used); |
902 | } | 898 | } |
903 | if (!test_bit(id, used)) | 899 | if (!test_bit(id, used)) |
904 | return id; | 900 | return id; |
@@ -930,47 +926,70 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | |||
930 | 926 | ||
931 | idx = nr_ioapics; | 927 | idx = nr_ioapics; |
932 | 928 | ||
933 | mp_ioapics[idx].mp_type = MP_IOAPIC; | 929 | mp_ioapics[idx].type = MP_IOAPIC; |
934 | mp_ioapics[idx].mp_flags = MPC_APIC_USABLE; | 930 | mp_ioapics[idx].flags = MPC_APIC_USABLE; |
935 | mp_ioapics[idx].mp_apicaddr = address; | 931 | mp_ioapics[idx].apicaddr = address; |
936 | 932 | ||
937 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); | 933 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); |
938 | mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id); | 934 | mp_ioapics[idx].apicid = uniq_ioapic_id(id); |
939 | #ifdef CONFIG_X86_32 | 935 | #ifdef CONFIG_X86_32 |
940 | mp_ioapics[idx].mp_apicver = io_apic_get_version(idx); | 936 | mp_ioapics[idx].apicver = io_apic_get_version(idx); |
941 | #else | 937 | #else |
942 | mp_ioapics[idx].mp_apicver = 0; | 938 | mp_ioapics[idx].apicver = 0; |
943 | #endif | 939 | #endif |
944 | /* | 940 | /* |
945 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups | 941 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups |
946 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). | 942 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). |
947 | */ | 943 | */ |
948 | mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid; | 944 | mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid; |
949 | mp_ioapic_routing[idx].gsi_base = gsi_base; | 945 | mp_ioapic_routing[idx].gsi_base = gsi_base; |
950 | mp_ioapic_routing[idx].gsi_end = gsi_base + | 946 | mp_ioapic_routing[idx].gsi_end = gsi_base + |
951 | io_apic_get_redir_entries(idx); | 947 | io_apic_get_redir_entries(idx); |
952 | 948 | ||
953 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " | 949 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " |
954 | "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid, | 950 | "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, |
955 | mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr, | 951 | mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr, |
956 | mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); | 952 | mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); |
957 | 953 | ||
958 | nr_ioapics++; | 954 | nr_ioapics++; |
959 | } | 955 | } |
960 | 956 | ||
961 | static void assign_to_mp_irq(struct mp_config_intsrc *m, | 957 | int __init acpi_probe_gsi(void) |
962 | struct mp_config_intsrc *mp_irq) | ||
963 | { | 958 | { |
964 | memcpy(mp_irq, m, sizeof(struct mp_config_intsrc)); | 959 | int idx; |
960 | int gsi; | ||
961 | int max_gsi = 0; | ||
962 | |||
963 | if (acpi_disabled) | ||
964 | return 0; | ||
965 | |||
966 | if (!acpi_ioapic) | ||
967 | return 0; | ||
968 | |||
969 | max_gsi = 0; | ||
970 | for (idx = 0; idx < nr_ioapics; idx++) { | ||
971 | gsi = mp_ioapic_routing[idx].gsi_end; | ||
972 | |||
973 | if (gsi > max_gsi) | ||
974 | max_gsi = gsi; | ||
975 | } | ||
976 | |||
977 | return max_gsi + 1; | ||
965 | } | 978 | } |
966 | 979 | ||
967 | static int mp_irq_cmp(struct mp_config_intsrc *mp_irq, | 980 | static void assign_to_mp_irq(struct mpc_intsrc *m, |
968 | struct mp_config_intsrc *m) | 981 | struct mpc_intsrc *mp_irq) |
969 | { | 982 | { |
970 | return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc)); | 983 | memcpy(mp_irq, m, sizeof(struct mpc_intsrc)); |
971 | } | 984 | } |
972 | 985 | ||
973 | static void save_mp_irq(struct mp_config_intsrc *m) | 986 | static int mp_irq_cmp(struct mpc_intsrc *mp_irq, |
987 | struct mpc_intsrc *m) | ||
988 | { | ||
989 | return memcmp(mp_irq, m, sizeof(struct mpc_intsrc)); | ||
990 | } | ||
991 | |||
992 | static void save_mp_irq(struct mpc_intsrc *m) | ||
974 | { | 993 | { |
975 | int i; | 994 | int i; |
976 | 995 | ||
@@ -988,7 +1007,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) | |||
988 | { | 1007 | { |
989 | int ioapic; | 1008 | int ioapic; |
990 | int pin; | 1009 | int pin; |
991 | struct mp_config_intsrc mp_irq; | 1010 | struct mpc_intsrc mp_irq; |
992 | 1011 | ||
993 | /* | 1012 | /* |
994 | * Convert 'gsi' to 'ioapic.pin'. | 1013 | * Convert 'gsi' to 'ioapic.pin'. |
@@ -996,7 +1015,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) | |||
996 | ioapic = mp_find_ioapic(gsi); | 1015 | ioapic = mp_find_ioapic(gsi); |
997 | if (ioapic < 0) | 1016 | if (ioapic < 0) |
998 | return; | 1017 | return; |
999 | pin = gsi - mp_ioapic_routing[ioapic].gsi_base; | 1018 | pin = mp_find_ioapic_pin(ioapic, gsi); |
1000 | 1019 | ||
1001 | /* | 1020 | /* |
1002 | * TBD: This check is for faulty timer entries, where the override | 1021 | * TBD: This check is for faulty timer entries, where the override |
@@ -1006,13 +1025,13 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) | |||
1006 | if ((bus_irq == 0) && (trigger == 3)) | 1025 | if ((bus_irq == 0) && (trigger == 3)) |
1007 | trigger = 1; | 1026 | trigger = 1; |
1008 | 1027 | ||
1009 | mp_irq.mp_type = MP_INTSRC; | 1028 | mp_irq.type = MP_INTSRC; |
1010 | mp_irq.mp_irqtype = mp_INT; | 1029 | mp_irq.irqtype = mp_INT; |
1011 | mp_irq.mp_irqflag = (trigger << 2) | polarity; | 1030 | mp_irq.irqflag = (trigger << 2) | polarity; |
1012 | mp_irq.mp_srcbus = MP_ISA_BUS; | 1031 | mp_irq.srcbus = MP_ISA_BUS; |
1013 | mp_irq.mp_srcbusirq = bus_irq; /* IRQ */ | 1032 | mp_irq.srcbusirq = bus_irq; /* IRQ */ |
1014 | mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */ | 1033 | mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */ |
1015 | mp_irq.mp_dstirq = pin; /* INTIN# */ | 1034 | mp_irq.dstirq = pin; /* INTIN# */ |
1016 | 1035 | ||
1017 | save_mp_irq(&mp_irq); | 1036 | save_mp_irq(&mp_irq); |
1018 | } | 1037 | } |
@@ -1022,7 +1041,7 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
1022 | int i; | 1041 | int i; |
1023 | int ioapic; | 1042 | int ioapic; |
1024 | unsigned int dstapic; | 1043 | unsigned int dstapic; |
1025 | struct mp_config_intsrc mp_irq; | 1044 | struct mpc_intsrc mp_irq; |
1026 | 1045 | ||
1027 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) | 1046 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) |
1028 | /* | 1047 | /* |
@@ -1047,7 +1066,7 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
1047 | ioapic = mp_find_ioapic(0); | 1066 | ioapic = mp_find_ioapic(0); |
1048 | if (ioapic < 0) | 1067 | if (ioapic < 0) |
1049 | return; | 1068 | return; |
1050 | dstapic = mp_ioapics[ioapic].mp_apicid; | 1069 | dstapic = mp_ioapics[ioapic].apicid; |
1051 | 1070 | ||
1052 | /* | 1071 | /* |
1053 | * Use the default configuration for the IRQs 0-15. Unless | 1072 | * Use the default configuration for the IRQs 0-15. Unless |
@@ -1057,16 +1076,14 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
1057 | int idx; | 1076 | int idx; |
1058 | 1077 | ||
1059 | for (idx = 0; idx < mp_irq_entries; idx++) { | 1078 | for (idx = 0; idx < mp_irq_entries; idx++) { |
1060 | struct mp_config_intsrc *irq = mp_irqs + idx; | 1079 | struct mpc_intsrc *irq = mp_irqs + idx; |
1061 | 1080 | ||
1062 | /* Do we already have a mapping for this ISA IRQ? */ | 1081 | /* Do we already have a mapping for this ISA IRQ? */ |
1063 | if (irq->mp_srcbus == MP_ISA_BUS | 1082 | if (irq->srcbus == MP_ISA_BUS && irq->srcbusirq == i) |
1064 | && irq->mp_srcbusirq == i) | ||
1065 | break; | 1083 | break; |
1066 | 1084 | ||
1067 | /* Do we already have a mapping for this IOAPIC pin */ | 1085 | /* Do we already have a mapping for this IOAPIC pin */ |
1068 | if (irq->mp_dstapic == dstapic && | 1086 | if (irq->dstapic == dstapic && irq->dstirq == i) |
1069 | irq->mp_dstirq == i) | ||
1070 | break; | 1087 | break; |
1071 | } | 1088 | } |
1072 | 1089 | ||
@@ -1075,13 +1092,13 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
1075 | continue; /* IRQ already used */ | 1092 | continue; /* IRQ already used */ |
1076 | } | 1093 | } |
1077 | 1094 | ||
1078 | mp_irq.mp_type = MP_INTSRC; | 1095 | mp_irq.type = MP_INTSRC; |
1079 | mp_irq.mp_irqflag = 0; /* Conforming */ | 1096 | mp_irq.irqflag = 0; /* Conforming */ |
1080 | mp_irq.mp_srcbus = MP_ISA_BUS; | 1097 | mp_irq.srcbus = MP_ISA_BUS; |
1081 | mp_irq.mp_dstapic = dstapic; | 1098 | mp_irq.dstapic = dstapic; |
1082 | mp_irq.mp_irqtype = mp_INT; | 1099 | mp_irq.irqtype = mp_INT; |
1083 | mp_irq.mp_srcbusirq = i; /* Identity mapped */ | 1100 | mp_irq.srcbusirq = i; /* Identity mapped */ |
1084 | mp_irq.mp_dstirq = i; | 1101 | mp_irq.dstirq = i; |
1085 | 1102 | ||
1086 | save_mp_irq(&mp_irq); | 1103 | save_mp_irq(&mp_irq); |
1087 | } | 1104 | } |
@@ -1118,7 +1135,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) | |||
1118 | return gsi; | 1135 | return gsi; |
1119 | } | 1136 | } |
1120 | 1137 | ||
1121 | ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; | 1138 | ioapic_pin = mp_find_ioapic_pin(ioapic, gsi); |
1122 | 1139 | ||
1123 | #ifdef CONFIG_X86_32 | 1140 | #ifdef CONFIG_X86_32 |
1124 | if (ioapic_renumber_irq) | 1141 | if (ioapic_renumber_irq) |
@@ -1192,22 +1209,22 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, | |||
1192 | u32 gsi, int triggering, int polarity) | 1209 | u32 gsi, int triggering, int polarity) |
1193 | { | 1210 | { |
1194 | #ifdef CONFIG_X86_MPPARSE | 1211 | #ifdef CONFIG_X86_MPPARSE |
1195 | struct mp_config_intsrc mp_irq; | 1212 | struct mpc_intsrc mp_irq; |
1196 | int ioapic; | 1213 | int ioapic; |
1197 | 1214 | ||
1198 | if (!acpi_ioapic) | 1215 | if (!acpi_ioapic) |
1199 | return 0; | 1216 | return 0; |
1200 | 1217 | ||
1201 | /* print the entry should happen on mptable identically */ | 1218 | /* print the entry should happen on mptable identically */ |
1202 | mp_irq.mp_type = MP_INTSRC; | 1219 | mp_irq.type = MP_INTSRC; |
1203 | mp_irq.mp_irqtype = mp_INT; | 1220 | mp_irq.irqtype = mp_INT; |
1204 | mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | | 1221 | mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | |
1205 | (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); | 1222 | (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); |
1206 | mp_irq.mp_srcbus = number; | 1223 | mp_irq.srcbus = number; |
1207 | mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); | 1224 | mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); |
1208 | ioapic = mp_find_ioapic(gsi); | 1225 | ioapic = mp_find_ioapic(gsi); |
1209 | mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id; | 1226 | mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id; |
1210 | mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; | 1227 | mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi); |
1211 | 1228 | ||
1212 | save_mp_irq(&mp_irq); | 1229 | save_mp_irq(&mp_irq); |
1213 | #endif | 1230 | #endif |
@@ -1334,7 +1351,7 @@ static void __init acpi_process_madt(void) | |||
1334 | if (!error) { | 1351 | if (!error) { |
1335 | acpi_lapic = 1; | 1352 | acpi_lapic = 1; |
1336 | 1353 | ||
1337 | #ifdef CONFIG_X86_GENERICARCH | 1354 | #ifdef CONFIG_X86_BIGSMP |
1338 | generic_bigsmp_probe(); | 1355 | generic_bigsmp_probe(); |
1339 | #endif | 1356 | #endif |
1340 | /* | 1357 | /* |
@@ -1343,13 +1360,11 @@ static void __init acpi_process_madt(void) | |||
1343 | error = acpi_parse_madt_ioapic_entries(); | 1360 | error = acpi_parse_madt_ioapic_entries(); |
1344 | if (!error) { | 1361 | if (!error) { |
1345 | acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; | 1362 | acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; |
1346 | acpi_irq_balance_set(NULL); | ||
1347 | acpi_ioapic = 1; | 1363 | acpi_ioapic = 1; |
1348 | 1364 | ||
1349 | smp_found_config = 1; | 1365 | smp_found_config = 1; |
1350 | #ifdef CONFIG_X86_32 | 1366 | if (apic->setup_apic_routing) |
1351 | setup_apic_routing(); | 1367 | apic->setup_apic_routing(); |
1352 | #endif | ||
1353 | } | 1368 | } |
1354 | } | 1369 | } |
1355 | if (error == -EINVAL) { | 1370 | if (error == -EINVAL) { |
@@ -1360,7 +1375,29 @@ static void __init acpi_process_madt(void) | |||
1360 | "Invalid BIOS MADT, disabling ACPI\n"); | 1375 | "Invalid BIOS MADT, disabling ACPI\n"); |
1361 | disable_acpi(); | 1376 | disable_acpi(); |
1362 | } | 1377 | } |
1378 | } else { | ||
1379 | /* | ||
1380 | * ACPI found no MADT, and so ACPI wants UP PIC mode. | ||
1381 | * In the event an MPS table was found, forget it. | ||
1382 | * Boot with "acpi=off" to use MPS on such a system. | ||
1383 | */ | ||
1384 | if (smp_found_config) { | ||
1385 | printk(KERN_WARNING PREFIX | ||
1386 | "No APIC-table, disabling MPS\n"); | ||
1387 | smp_found_config = 0; | ||
1388 | } | ||
1363 | } | 1389 | } |
1390 | |||
1391 | /* | ||
1392 | * ACPI supports both logical (e.g. Hyper-Threading) and physical | ||
1393 | * processors, where MPS only supports physical. | ||
1394 | */ | ||
1395 | if (acpi_lapic && acpi_ioapic) | ||
1396 | printk(KERN_INFO "Using ACPI (MADT) for SMP configuration " | ||
1397 | "information\n"); | ||
1398 | else if (acpi_lapic) | ||
1399 | printk(KERN_INFO "Using ACPI for processor (LAPIC) " | ||
1400 | "configuration information\n"); | ||
1364 | #endif | 1401 | #endif |
1365 | return; | 1402 | return; |
1366 | } | 1403 | } |
@@ -1784,6 +1821,10 @@ static int __init parse_acpi(char *arg) | |||
1784 | disable_acpi(); | 1821 | disable_acpi(); |
1785 | acpi_ht = 1; | 1822 | acpi_ht = 1; |
1786 | } | 1823 | } |
1824 | /* acpi=rsdt use RSDT instead of XSDT */ | ||
1825 | else if (strcmp(arg, "rsdt") == 0) { | ||
1826 | acpi_rsdt_forced = 1; | ||
1827 | } | ||
1787 | /* "acpi=noirq" disables ACPI interrupt routing */ | 1828 | /* "acpi=noirq" disables ACPI interrupt routing */ |
1788 | else if (strcmp(arg, "noirq") == 0) { | 1829 | else if (strcmp(arg, "noirq") == 0) { |
1789 | acpi_noirq_set(); | 1830 | acpi_noirq_set(); |
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index c2502eb9aa83..bbbe4bbb6f34 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c | |||
@@ -56,6 +56,7 @@ static struct cstate_entry *cpu_cstate_entry; /* per CPU ptr */ | |||
56 | static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; | 56 | static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; |
57 | 57 | ||
58 | #define MWAIT_SUBSTATE_MASK (0xf) | 58 | #define MWAIT_SUBSTATE_MASK (0xf) |
59 | #define MWAIT_CSTATE_MASK (0xf) | ||
59 | #define MWAIT_SUBSTATE_SIZE (4) | 60 | #define MWAIT_SUBSTATE_SIZE (4) |
60 | 61 | ||
61 | #define CPUID_MWAIT_LEAF (5) | 62 | #define CPUID_MWAIT_LEAF (5) |
@@ -66,39 +67,20 @@ static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; | |||
66 | 67 | ||
67 | #define NATIVE_CSTATE_BEYOND_HALT (2) | 68 | #define NATIVE_CSTATE_BEYOND_HALT (2) |
68 | 69 | ||
69 | int acpi_processor_ffh_cstate_probe(unsigned int cpu, | 70 | static long acpi_processor_ffh_cstate_probe_cpu(void *_cx) |
70 | struct acpi_processor_cx *cx, struct acpi_power_register *reg) | ||
71 | { | 71 | { |
72 | struct cstate_entry *percpu_entry; | 72 | struct acpi_processor_cx *cx = _cx; |
73 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 73 | long retval; |
74 | |||
75 | cpumask_t saved_mask; | ||
76 | int retval; | ||
77 | unsigned int eax, ebx, ecx, edx; | 74 | unsigned int eax, ebx, ecx, edx; |
78 | unsigned int edx_part; | 75 | unsigned int edx_part; |
79 | unsigned int cstate_type; /* C-state type and not ACPI C-state type */ | 76 | unsigned int cstate_type; /* C-state type and not ACPI C-state type */ |
80 | unsigned int num_cstate_subtype; | 77 | unsigned int num_cstate_subtype; |
81 | 78 | ||
82 | if (!cpu_cstate_entry || c->cpuid_level < CPUID_MWAIT_LEAF ) | ||
83 | return -1; | ||
84 | |||
85 | if (reg->bit_offset != NATIVE_CSTATE_BEYOND_HALT) | ||
86 | return -1; | ||
87 | |||
88 | percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu); | ||
89 | percpu_entry->states[cx->index].eax = 0; | ||
90 | percpu_entry->states[cx->index].ecx = 0; | ||
91 | |||
92 | /* Make sure we are running on right CPU */ | ||
93 | saved_mask = current->cpus_allowed; | ||
94 | retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | ||
95 | if (retval) | ||
96 | return -1; | ||
97 | |||
98 | cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); | 79 | cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); |
99 | 80 | ||
100 | /* Check whether this particular cx_type (in CST) is supported or not */ | 81 | /* Check whether this particular cx_type (in CST) is supported or not */ |
101 | cstate_type = (cx->address >> MWAIT_SUBSTATE_SIZE) + 1; | 82 | cstate_type = ((cx->address >> MWAIT_SUBSTATE_SIZE) & |
83 | MWAIT_CSTATE_MASK) + 1; | ||
102 | edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE); | 84 | edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE); |
103 | num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK; | 85 | num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK; |
104 | 86 | ||
@@ -114,21 +96,45 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, | |||
114 | retval = -1; | 96 | retval = -1; |
115 | goto out; | 97 | goto out; |
116 | } | 98 | } |
117 | percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK; | ||
118 | |||
119 | /* Use the hint in CST */ | ||
120 | percpu_entry->states[cx->index].eax = cx->address; | ||
121 | 99 | ||
122 | if (!mwait_supported[cstate_type]) { | 100 | if (!mwait_supported[cstate_type]) { |
123 | mwait_supported[cstate_type] = 1; | 101 | mwait_supported[cstate_type] = 1; |
124 | printk(KERN_DEBUG "Monitor-Mwait will be used to enter C-%d " | 102 | printk(KERN_DEBUG |
125 | "state\n", cx->type); | 103 | "Monitor-Mwait will be used to enter C-%d " |
104 | "state\n", cx->type); | ||
126 | } | 105 | } |
127 | snprintf(cx->desc, ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x", | 106 | snprintf(cx->desc, |
128 | cx->address); | 107 | ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x", |
129 | 108 | cx->address); | |
130 | out: | 109 | out: |
131 | set_cpus_allowed_ptr(current, &saved_mask); | 110 | return retval; |
111 | } | ||
112 | |||
113 | int acpi_processor_ffh_cstate_probe(unsigned int cpu, | ||
114 | struct acpi_processor_cx *cx, struct acpi_power_register *reg) | ||
115 | { | ||
116 | struct cstate_entry *percpu_entry; | ||
117 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
118 | long retval; | ||
119 | |||
120 | if (!cpu_cstate_entry || c->cpuid_level < CPUID_MWAIT_LEAF) | ||
121 | return -1; | ||
122 | |||
123 | if (reg->bit_offset != NATIVE_CSTATE_BEYOND_HALT) | ||
124 | return -1; | ||
125 | |||
126 | percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu); | ||
127 | percpu_entry->states[cx->index].eax = 0; | ||
128 | percpu_entry->states[cx->index].ecx = 0; | ||
129 | |||
130 | /* Make sure we are running on right CPU */ | ||
131 | |||
132 | retval = work_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx); | ||
133 | if (retval == 0) { | ||
134 | /* Use the hint in CST */ | ||
135 | percpu_entry->states[cx->index].eax = cx->address; | ||
136 | percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK; | ||
137 | } | ||
132 | return retval; | 138 | return retval; |
133 | } | 139 | } |
134 | EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); | 140 | EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 806b4e9051b4..7c243a2c5115 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -101,6 +101,7 @@ int acpi_save_state_mem(void) | |||
101 | stack_start.sp = temp_stack + sizeof(temp_stack); | 101 | stack_start.sp = temp_stack + sizeof(temp_stack); |
102 | early_gdt_descr.address = | 102 | early_gdt_descr.address = |
103 | (unsigned long)get_cpu_gdt_table(smp_processor_id()); | 103 | (unsigned long)get_cpu_gdt_table(smp_processor_id()); |
104 | initial_gs = per_cpu_offset(smp_processor_id()); | ||
104 | #endif | 105 | #endif |
105 | initial_code = (unsigned long)wakeup_long64; | 106 | initial_code = (unsigned long)wakeup_long64; |
106 | saved_magic = 0x123456789abcdef0; | 107 | saved_magic = 0x123456789abcdef0; |
@@ -156,6 +157,8 @@ static int __init acpi_sleep_setup(char *str) | |||
156 | #ifdef CONFIG_HIBERNATION | 157 | #ifdef CONFIG_HIBERNATION |
157 | if (strncmp(str, "s4_nohwsig", 10) == 0) | 158 | if (strncmp(str, "s4_nohwsig", 10) == 0) |
158 | acpi_no_s4_hw_signature(); | 159 | acpi_no_s4_hw_signature(); |
160 | if (strncmp(str, "s4_nonvs", 8) == 0) | ||
161 | acpi_s4_no_nvs(); | ||
159 | #endif | 162 | #endif |
160 | if (strncmp(str, "old_ordering", 12) == 0) | 163 | if (strncmp(str, "old_ordering", 12) == 0) |
161 | acpi_old_suspend_ordering(); | 164 | acpi_old_suspend_ordering(); |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 331b318304eb..5113c080f0c4 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -20,10 +20,15 @@ | |||
20 | #include <linux/pci.h> | 20 | #include <linux/pci.h> |
21 | #include <linux/gfp.h> | 21 | #include <linux/gfp.h> |
22 | #include <linux/bitops.h> | 22 | #include <linux/bitops.h> |
23 | #include <linux/debugfs.h> | ||
23 | #include <linux/scatterlist.h> | 24 | #include <linux/scatterlist.h> |
24 | #include <linux/iommu-helper.h> | 25 | #include <linux/iommu-helper.h> |
26 | #ifdef CONFIG_IOMMU_API | ||
27 | #include <linux/iommu.h> | ||
28 | #endif | ||
25 | #include <asm/proto.h> | 29 | #include <asm/proto.h> |
26 | #include <asm/iommu.h> | 30 | #include <asm/iommu.h> |
31 | #include <asm/gart.h> | ||
27 | #include <asm/amd_iommu_types.h> | 32 | #include <asm/amd_iommu_types.h> |
28 | #include <asm/amd_iommu.h> | 33 | #include <asm/amd_iommu.h> |
29 | 34 | ||
@@ -37,6 +42,10 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock); | |||
37 | static LIST_HEAD(iommu_pd_list); | 42 | static LIST_HEAD(iommu_pd_list); |
38 | static DEFINE_SPINLOCK(iommu_pd_list_lock); | 43 | static DEFINE_SPINLOCK(iommu_pd_list_lock); |
39 | 44 | ||
45 | #ifdef CONFIG_IOMMU_API | ||
46 | static struct iommu_ops amd_iommu_ops; | ||
47 | #endif | ||
48 | |||
40 | /* | 49 | /* |
41 | * general struct to manage commands send to an IOMMU | 50 | * general struct to manage commands send to an IOMMU |
42 | */ | 51 | */ |
@@ -46,6 +55,68 @@ struct iommu_cmd { | |||
46 | 55 | ||
47 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | 56 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, |
48 | struct unity_map_entry *e); | 57 | struct unity_map_entry *e); |
58 | static struct dma_ops_domain *find_protection_domain(u16 devid); | ||
59 | |||
60 | |||
61 | #ifdef CONFIG_AMD_IOMMU_STATS | ||
62 | |||
63 | /* | ||
64 | * Initialization code for statistics collection | ||
65 | */ | ||
66 | |||
67 | DECLARE_STATS_COUNTER(compl_wait); | ||
68 | DECLARE_STATS_COUNTER(cnt_map_single); | ||
69 | DECLARE_STATS_COUNTER(cnt_unmap_single); | ||
70 | DECLARE_STATS_COUNTER(cnt_map_sg); | ||
71 | DECLARE_STATS_COUNTER(cnt_unmap_sg); | ||
72 | DECLARE_STATS_COUNTER(cnt_alloc_coherent); | ||
73 | DECLARE_STATS_COUNTER(cnt_free_coherent); | ||
74 | DECLARE_STATS_COUNTER(cross_page); | ||
75 | DECLARE_STATS_COUNTER(domain_flush_single); | ||
76 | DECLARE_STATS_COUNTER(domain_flush_all); | ||
77 | DECLARE_STATS_COUNTER(alloced_io_mem); | ||
78 | DECLARE_STATS_COUNTER(total_map_requests); | ||
79 | |||
80 | static struct dentry *stats_dir; | ||
81 | static struct dentry *de_isolate; | ||
82 | static struct dentry *de_fflush; | ||
83 | |||
84 | static void amd_iommu_stats_add(struct __iommu_counter *cnt) | ||
85 | { | ||
86 | if (stats_dir == NULL) | ||
87 | return; | ||
88 | |||
89 | cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir, | ||
90 | &cnt->value); | ||
91 | } | ||
92 | |||
93 | static void amd_iommu_stats_init(void) | ||
94 | { | ||
95 | stats_dir = debugfs_create_dir("amd-iommu", NULL); | ||
96 | if (stats_dir == NULL) | ||
97 | return; | ||
98 | |||
99 | de_isolate = debugfs_create_bool("isolation", 0444, stats_dir, | ||
100 | (u32 *)&amd_iommu_isolate); | ||
101 | |||
102 | de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, | ||
103 | (u32 *)&amd_iommu_unmap_flush); | ||
104 | |||
105 | amd_iommu_stats_add(&compl_wait); | ||
106 | amd_iommu_stats_add(&cnt_map_single); | ||
107 | amd_iommu_stats_add(&cnt_unmap_single); | ||
108 | amd_iommu_stats_add(&cnt_map_sg); | ||
109 | amd_iommu_stats_add(&cnt_unmap_sg); | ||
110 | amd_iommu_stats_add(&cnt_alloc_coherent); | ||
111 | amd_iommu_stats_add(&cnt_free_coherent); | ||
112 | amd_iommu_stats_add(&cross_page); | ||
113 | amd_iommu_stats_add(&domain_flush_single); | ||
114 | amd_iommu_stats_add(&domain_flush_all); | ||
115 | amd_iommu_stats_add(&alloced_io_mem); | ||
116 | amd_iommu_stats_add(&total_map_requests); | ||
117 | } | ||
118 | |||
119 | #endif | ||
49 | 120 | ||
50 | /* returns !0 if the IOMMU is caching non-present entries in its TLB */ | 121 | /* returns !0 if the IOMMU is caching non-present entries in its TLB */ |
51 | static int iommu_has_npcache(struct amd_iommu *iommu) | 122 | static int iommu_has_npcache(struct amd_iommu *iommu) |
@@ -187,12 +258,56 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | |||
187 | 258 | ||
188 | spin_lock_irqsave(&iommu->lock, flags); | 259 | spin_lock_irqsave(&iommu->lock, flags); |
189 | ret = __iommu_queue_command(iommu, cmd); | 260 | ret = __iommu_queue_command(iommu, cmd); |
261 | if (!ret) | ||
262 | iommu->need_sync = true; | ||
190 | spin_unlock_irqrestore(&iommu->lock, flags); | 263 | spin_unlock_irqrestore(&iommu->lock, flags); |
191 | 264 | ||
192 | return ret; | 265 | return ret; |
193 | } | 266 | } |
194 | 267 | ||
195 | /* | 268 | /* |
269 | * This function waits until an IOMMU has completed a completion | ||
270 | * wait command | ||
271 | */ | ||
272 | static void __iommu_wait_for_completion(struct amd_iommu *iommu) | ||
273 | { | ||
274 | int ready = 0; | ||
275 | unsigned status = 0; | ||
276 | unsigned long i = 0; | ||
277 | |||
278 | INC_STATS_COUNTER(compl_wait); | ||
279 | |||
280 | while (!ready && (i < EXIT_LOOP_COUNT)) { | ||
281 | ++i; | ||
282 | /* wait for the bit to become one */ | ||
283 | status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); | ||
284 | ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; | ||
285 | } | ||
286 | |||
287 | /* set bit back to zero */ | ||
288 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; | ||
289 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); | ||
290 | |||
291 | if (unlikely(i == EXIT_LOOP_COUNT)) | ||
292 | panic("AMD IOMMU: Completion wait loop failed\n"); | ||
293 | } | ||
294 | |||
295 | /* | ||
296 | * This function queues a completion wait command into the command | ||
297 | * buffer of an IOMMU | ||
298 | */ | ||
299 | static int __iommu_completion_wait(struct amd_iommu *iommu) | ||
300 | { | ||
301 | struct iommu_cmd cmd; | ||
302 | |||
303 | memset(&cmd, 0, sizeof(cmd)); | ||
304 | cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; | ||
305 | CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); | ||
306 | |||
307 | return __iommu_queue_command(iommu, &cmd); | ||
308 | } | ||
309 | |||
310 | /* | ||
196 | * This function is called whenever we need to ensure that the IOMMU has | 311 | * This function is called whenever we need to ensure that the IOMMU has |
197 | * completed execution of all commands we sent. It sends a | 312 | * completed execution of all commands we sent. It sends a |
198 | * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs | 313 | * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs |
@@ -201,37 +316,23 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | |||
201 | */ | 316 | */ |
202 | static int iommu_completion_wait(struct amd_iommu *iommu) | 317 | static int iommu_completion_wait(struct amd_iommu *iommu) |
203 | { | 318 | { |
204 | int ret = 0, ready = 0; | 319 | int ret = 0; |
205 | unsigned status = 0; | 320 | unsigned long flags; |
206 | struct iommu_cmd cmd; | ||
207 | unsigned long flags, i = 0; | ||
208 | 321 | ||
209 | memset(&cmd, 0, sizeof(cmd)); | 322 | spin_lock_irqsave(&iommu->lock, flags); |
210 | cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; | ||
211 | CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); | ||
212 | 323 | ||
213 | iommu->need_sync = 0; | 324 | if (!iommu->need_sync) |
325 | goto out; | ||
214 | 326 | ||
215 | spin_lock_irqsave(&iommu->lock, flags); | 327 | ret = __iommu_completion_wait(iommu); |
216 | 328 | ||
217 | ret = __iommu_queue_command(iommu, &cmd); | 329 | iommu->need_sync = false; |
218 | 330 | ||
219 | if (ret) | 331 | if (ret) |
220 | goto out; | 332 | goto out; |
221 | 333 | ||
222 | while (!ready && (i < EXIT_LOOP_COUNT)) { | 334 | __iommu_wait_for_completion(iommu); |
223 | ++i; | ||
224 | /* wait for the bit to become one */ | ||
225 | status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); | ||
226 | ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; | ||
227 | } | ||
228 | |||
229 | /* set bit back to zero */ | ||
230 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; | ||
231 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); | ||
232 | 335 | ||
233 | if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) | ||
234 | printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); | ||
235 | out: | 336 | out: |
236 | spin_unlock_irqrestore(&iommu->lock, flags); | 337 | spin_unlock_irqrestore(&iommu->lock, flags); |
237 | 338 | ||
@@ -254,11 +355,24 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) | |||
254 | 355 | ||
255 | ret = iommu_queue_command(iommu, &cmd); | 356 | ret = iommu_queue_command(iommu, &cmd); |
256 | 357 | ||
257 | iommu->need_sync = 1; | ||
258 | |||
259 | return ret; | 358 | return ret; |
260 | } | 359 | } |
261 | 360 | ||
361 | static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, | ||
362 | u16 domid, int pde, int s) | ||
363 | { | ||
364 | memset(cmd, 0, sizeof(*cmd)); | ||
365 | address &= PAGE_MASK; | ||
366 | CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); | ||
367 | cmd->data[1] |= domid; | ||
368 | cmd->data[2] = lower_32_bits(address); | ||
369 | cmd->data[3] = upper_32_bits(address); | ||
370 | if (s) /* size bit - we flush more than one 4kb page */ | ||
371 | cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; | ||
372 | if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ | ||
373 | cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; | ||
374 | } | ||
375 | |||
262 | /* | 376 | /* |
263 | * Generic command send function for invalidaing TLB entries | 377 | * Generic command send function for invalidaing TLB entries |
264 | */ | 378 | */ |
@@ -268,21 +382,10 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, | |||
268 | struct iommu_cmd cmd; | 382 | struct iommu_cmd cmd; |
269 | int ret; | 383 | int ret; |
270 | 384 | ||
271 | memset(&cmd, 0, sizeof(cmd)); | 385 | __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s); |
272 | address &= PAGE_MASK; | ||
273 | CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); | ||
274 | cmd.data[1] |= domid; | ||
275 | cmd.data[2] = lower_32_bits(address); | ||
276 | cmd.data[3] = upper_32_bits(address); | ||
277 | if (s) /* size bit - we flush more than one 4kb page */ | ||
278 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; | ||
279 | if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ | ||
280 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; | ||
281 | 386 | ||
282 | ret = iommu_queue_command(iommu, &cmd); | 387 | ret = iommu_queue_command(iommu, &cmd); |
283 | 388 | ||
284 | iommu->need_sync = 1; | ||
285 | |||
286 | return ret; | 389 | return ret; |
287 | } | 390 | } |
288 | 391 | ||
@@ -318,9 +421,35 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) | |||
318 | { | 421 | { |
319 | u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | 422 | u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; |
320 | 423 | ||
424 | INC_STATS_COUNTER(domain_flush_single); | ||
425 | |||
321 | iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); | 426 | iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); |
322 | } | 427 | } |
323 | 428 | ||
429 | /* | ||
430 | * This function is used to flush the IO/TLB for a given protection domain | ||
431 | * on every IOMMU in the system | ||
432 | */ | ||
433 | static void iommu_flush_domain(u16 domid) | ||
434 | { | ||
435 | unsigned long flags; | ||
436 | struct amd_iommu *iommu; | ||
437 | struct iommu_cmd cmd; | ||
438 | |||
439 | INC_STATS_COUNTER(domain_flush_all); | ||
440 | |||
441 | __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, | ||
442 | domid, 1, 1); | ||
443 | |||
444 | list_for_each_entry(iommu, &amd_iommu_list, list) { | ||
445 | spin_lock_irqsave(&iommu->lock, flags); | ||
446 | __iommu_queue_command(iommu, &cmd); | ||
447 | __iommu_completion_wait(iommu); | ||
448 | __iommu_wait_for_completion(iommu); | ||
449 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
450 | } | ||
451 | } | ||
452 | |||
324 | /**************************************************************************** | 453 | /**************************************************************************** |
325 | * | 454 | * |
326 | * The functions below are used the create the page table mappings for | 455 | * The functions below are used the create the page table mappings for |
@@ -335,15 +464,15 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) | |||
335 | * supporting all features of AMD IOMMU page tables like level skipping | 464 | * supporting all features of AMD IOMMU page tables like level skipping |
336 | * and full 64 bit address spaces. | 465 | * and full 64 bit address spaces. |
337 | */ | 466 | */ |
338 | static int iommu_map(struct protection_domain *dom, | 467 | static int iommu_map_page(struct protection_domain *dom, |
339 | unsigned long bus_addr, | 468 | unsigned long bus_addr, |
340 | unsigned long phys_addr, | 469 | unsigned long phys_addr, |
341 | int prot) | 470 | int prot) |
342 | { | 471 | { |
343 | u64 __pte, *pte, *page; | 472 | u64 __pte, *pte, *page; |
344 | 473 | ||
345 | bus_addr = PAGE_ALIGN(bus_addr); | 474 | bus_addr = PAGE_ALIGN(bus_addr); |
346 | phys_addr = PAGE_ALIGN(bus_addr); | 475 | phys_addr = PAGE_ALIGN(phys_addr); |
347 | 476 | ||
348 | /* only support 512GB address spaces for now */ | 477 | /* only support 512GB address spaces for now */ |
349 | if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) | 478 | if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) |
@@ -385,6 +514,28 @@ static int iommu_map(struct protection_domain *dom, | |||
385 | return 0; | 514 | return 0; |
386 | } | 515 | } |
387 | 516 | ||
517 | static void iommu_unmap_page(struct protection_domain *dom, | ||
518 | unsigned long bus_addr) | ||
519 | { | ||
520 | u64 *pte; | ||
521 | |||
522 | pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; | ||
523 | |||
524 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
525 | return; | ||
526 | |||
527 | pte = IOMMU_PTE_PAGE(*pte); | ||
528 | pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; | ||
529 | |||
530 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
531 | return; | ||
532 | |||
533 | pte = IOMMU_PTE_PAGE(*pte); | ||
534 | pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; | ||
535 | |||
536 | *pte = 0; | ||
537 | } | ||
538 | |||
388 | /* | 539 | /* |
389 | * This function checks if a specific unity mapping entry is needed for | 540 | * This function checks if a specific unity mapping entry is needed for |
390 | * this specific IOMMU. | 541 | * this specific IOMMU. |
@@ -437,7 +588,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | |||
437 | 588 | ||
438 | for (addr = e->address_start; addr < e->address_end; | 589 | for (addr = e->address_start; addr < e->address_end; |
439 | addr += PAGE_SIZE) { | 590 | addr += PAGE_SIZE) { |
440 | ret = iommu_map(&dma_dom->domain, addr, addr, e->prot); | 591 | ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot); |
441 | if (ret) | 592 | if (ret) |
442 | return ret; | 593 | return ret; |
443 | /* | 594 | /* |
@@ -537,7 +688,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, | |||
537 | address >>= PAGE_SHIFT; | 688 | address >>= PAGE_SHIFT; |
538 | iommu_area_free(dom->bitmap, address, pages); | 689 | iommu_area_free(dom->bitmap, address, pages); |
539 | 690 | ||
540 | if (address + pages >= dom->next_bit) | 691 | if (address >= dom->next_bit) |
541 | dom->need_flush = true; | 692 | dom->need_flush = true; |
542 | } | 693 | } |
543 | 694 | ||
@@ -568,6 +719,16 @@ static u16 domain_id_alloc(void) | |||
568 | return id; | 719 | return id; |
569 | } | 720 | } |
570 | 721 | ||
722 | static void domain_id_free(int id) | ||
723 | { | ||
724 | unsigned long flags; | ||
725 | |||
726 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
727 | if (id > 0 && id < MAX_DOMAIN_ID) | ||
728 | __clear_bit(id, amd_iommu_pd_alloc_bitmap); | ||
729 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
730 | } | ||
731 | |||
571 | /* | 732 | /* |
572 | * Used to reserve address ranges in the aperture (e.g. for exclusion | 733 | * Used to reserve address ranges in the aperture (e.g. for exclusion |
573 | * ranges. | 734 | * ranges. |
@@ -584,12 +745,12 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | |||
584 | iommu_area_reserve(dom->bitmap, start_page, pages); | 745 | iommu_area_reserve(dom->bitmap, start_page, pages); |
585 | } | 746 | } |
586 | 747 | ||
587 | static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) | 748 | static void free_pagetable(struct protection_domain *domain) |
588 | { | 749 | { |
589 | int i, j; | 750 | int i, j; |
590 | u64 *p1, *p2, *p3; | 751 | u64 *p1, *p2, *p3; |
591 | 752 | ||
592 | p1 = dma_dom->domain.pt_root; | 753 | p1 = domain->pt_root; |
593 | 754 | ||
594 | if (!p1) | 755 | if (!p1) |
595 | return; | 756 | return; |
@@ -599,7 +760,7 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) | |||
599 | continue; | 760 | continue; |
600 | 761 | ||
601 | p2 = IOMMU_PTE_PAGE(p1[i]); | 762 | p2 = IOMMU_PTE_PAGE(p1[i]); |
602 | for (j = 0; j < 512; ++i) { | 763 | for (j = 0; j < 512; ++j) { |
603 | if (!IOMMU_PTE_PRESENT(p2[j])) | 764 | if (!IOMMU_PTE_PRESENT(p2[j])) |
604 | continue; | 765 | continue; |
605 | p3 = IOMMU_PTE_PAGE(p2[j]); | 766 | p3 = IOMMU_PTE_PAGE(p2[j]); |
@@ -610,6 +771,8 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) | |||
610 | } | 771 | } |
611 | 772 | ||
612 | free_page((unsigned long)p1); | 773 | free_page((unsigned long)p1); |
774 | |||
775 | domain->pt_root = NULL; | ||
613 | } | 776 | } |
614 | 777 | ||
615 | /* | 778 | /* |
@@ -621,7 +784,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) | |||
621 | if (!dom) | 784 | if (!dom) |
622 | return; | 785 | return; |
623 | 786 | ||
624 | dma_ops_free_pagetable(dom); | 787 | free_pagetable(&dom->domain); |
625 | 788 | ||
626 | kfree(dom->pte_pages); | 789 | kfree(dom->pte_pages); |
627 | 790 | ||
@@ -660,6 +823,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, | |||
660 | goto free_dma_dom; | 823 | goto free_dma_dom; |
661 | dma_dom->domain.mode = PAGE_MODE_3_LEVEL; | 824 | dma_dom->domain.mode = PAGE_MODE_3_LEVEL; |
662 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); | 825 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); |
826 | dma_dom->domain.flags = PD_DMA_OPS_MASK; | ||
663 | dma_dom->domain.priv = dma_dom; | 827 | dma_dom->domain.priv = dma_dom; |
664 | if (!dma_dom->domain.pt_root) | 828 | if (!dma_dom->domain.pt_root) |
665 | goto free_dma_dom; | 829 | goto free_dma_dom; |
@@ -722,6 +886,15 @@ free_dma_dom: | |||
722 | } | 886 | } |
723 | 887 | ||
724 | /* | 888 | /* |
889 | * little helper function to check whether a given protection domain is a | ||
890 | * dma_ops domain | ||
891 | */ | ||
892 | static bool dma_ops_domain(struct protection_domain *domain) | ||
893 | { | ||
894 | return domain->flags & PD_DMA_OPS_MASK; | ||
895 | } | ||
896 | |||
897 | /* | ||
725 | * Find out the protection domain structure for a given PCI device. This | 898 | * Find out the protection domain structure for a given PCI device. This |
726 | * will give us the pointer to the page table root for example. | 899 | * will give us the pointer to the page table root for example. |
727 | */ | 900 | */ |
@@ -741,14 +914,15 @@ static struct protection_domain *domain_for_device(u16 devid) | |||
741 | * If a device is not yet associated with a domain, this function does | 914 | * If a device is not yet associated with a domain, this function does |
742 | * assigns it visible for the hardware | 915 | * assigns it visible for the hardware |
743 | */ | 916 | */ |
744 | static void set_device_domain(struct amd_iommu *iommu, | 917 | static void attach_device(struct amd_iommu *iommu, |
745 | struct protection_domain *domain, | 918 | struct protection_domain *domain, |
746 | u16 devid) | 919 | u16 devid) |
747 | { | 920 | { |
748 | unsigned long flags; | 921 | unsigned long flags; |
749 | |||
750 | u64 pte_root = virt_to_phys(domain->pt_root); | 922 | u64 pte_root = virt_to_phys(domain->pt_root); |
751 | 923 | ||
924 | domain->dev_cnt += 1; | ||
925 | |||
752 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) | 926 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) |
753 | << DEV_ENTRY_MODE_SHIFT; | 927 | << DEV_ENTRY_MODE_SHIFT; |
754 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; | 928 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; |
@@ -762,10 +936,118 @@ static void set_device_domain(struct amd_iommu *iommu, | |||
762 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 936 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
763 | 937 | ||
764 | iommu_queue_inv_dev_entry(iommu, devid); | 938 | iommu_queue_inv_dev_entry(iommu, devid); |
939 | } | ||
940 | |||
941 | /* | ||
942 | * Removes a device from a protection domain (unlocked) | ||
943 | */ | ||
944 | static void __detach_device(struct protection_domain *domain, u16 devid) | ||
945 | { | ||
946 | |||
947 | /* lock domain */ | ||
948 | spin_lock(&domain->lock); | ||
949 | |||
950 | /* remove domain from the lookup table */ | ||
951 | amd_iommu_pd_table[devid] = NULL; | ||
952 | |||
953 | /* remove entry from the device table seen by the hardware */ | ||
954 | amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; | ||
955 | amd_iommu_dev_table[devid].data[1] = 0; | ||
956 | amd_iommu_dev_table[devid].data[2] = 0; | ||
957 | |||
958 | /* decrease reference counter */ | ||
959 | domain->dev_cnt -= 1; | ||
960 | |||
961 | /* ready */ | ||
962 | spin_unlock(&domain->lock); | ||
963 | } | ||
964 | |||
965 | /* | ||
966 | * Removes a device from a protection domain (with devtable_lock held) | ||
967 | */ | ||
968 | static void detach_device(struct protection_domain *domain, u16 devid) | ||
969 | { | ||
970 | unsigned long flags; | ||
971 | |||
972 | /* lock device table */ | ||
973 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
974 | __detach_device(domain, devid); | ||
975 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
976 | } | ||
977 | |||
978 | static int device_change_notifier(struct notifier_block *nb, | ||
979 | unsigned long action, void *data) | ||
980 | { | ||
981 | struct device *dev = data; | ||
982 | struct pci_dev *pdev = to_pci_dev(dev); | ||
983 | u16 devid = calc_devid(pdev->bus->number, pdev->devfn); | ||
984 | struct protection_domain *domain; | ||
985 | struct dma_ops_domain *dma_domain; | ||
986 | struct amd_iommu *iommu; | ||
987 | int order = amd_iommu_aperture_order; | ||
988 | unsigned long flags; | ||
989 | |||
990 | if (devid > amd_iommu_last_bdf) | ||
991 | goto out; | ||
992 | |||
993 | devid = amd_iommu_alias_table[devid]; | ||
994 | |||
995 | iommu = amd_iommu_rlookup_table[devid]; | ||
996 | if (iommu == NULL) | ||
997 | goto out; | ||
998 | |||
999 | domain = domain_for_device(devid); | ||
1000 | |||
1001 | if (domain && !dma_ops_domain(domain)) | ||
1002 | WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound " | ||
1003 | "to a non-dma-ops domain\n", dev_name(dev)); | ||
1004 | |||
1005 | switch (action) { | ||
1006 | case BUS_NOTIFY_BOUND_DRIVER: | ||
1007 | if (domain) | ||
1008 | goto out; | ||
1009 | dma_domain = find_protection_domain(devid); | ||
1010 | if (!dma_domain) | ||
1011 | dma_domain = iommu->default_dom; | ||
1012 | attach_device(iommu, &dma_domain->domain, devid); | ||
1013 | printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " | ||
1014 | "device %s\n", dma_domain->domain.id, dev_name(dev)); | ||
1015 | break; | ||
1016 | case BUS_NOTIFY_UNBIND_DRIVER: | ||
1017 | if (!domain) | ||
1018 | goto out; | ||
1019 | detach_device(domain, devid); | ||
1020 | break; | ||
1021 | case BUS_NOTIFY_ADD_DEVICE: | ||
1022 | /* allocate a protection domain if a device is added */ | ||
1023 | dma_domain = find_protection_domain(devid); | ||
1024 | if (dma_domain) | ||
1025 | goto out; | ||
1026 | dma_domain = dma_ops_domain_alloc(iommu, order); | ||
1027 | if (!dma_domain) | ||
1028 | goto out; | ||
1029 | dma_domain->target_dev = devid; | ||
1030 | |||
1031 | spin_lock_irqsave(&iommu_pd_list_lock, flags); | ||
1032 | list_add_tail(&dma_domain->list, &iommu_pd_list); | ||
1033 | spin_unlock_irqrestore(&iommu_pd_list_lock, flags); | ||
1034 | |||
1035 | break; | ||
1036 | default: | ||
1037 | goto out; | ||
1038 | } | ||
1039 | |||
1040 | iommu_queue_inv_dev_entry(iommu, devid); | ||
1041 | iommu_completion_wait(iommu); | ||
765 | 1042 | ||
766 | iommu->need_sync = 1; | 1043 | out: |
1044 | return 0; | ||
767 | } | 1045 | } |
768 | 1046 | ||
1047 | struct notifier_block device_nb = { | ||
1048 | .notifier_call = device_change_notifier, | ||
1049 | }; | ||
1050 | |||
769 | /***************************************************************************** | 1051 | /***************************************************************************** |
770 | * | 1052 | * |
771 | * The next functions belong to the dma_ops mapping/unmapping code. | 1053 | * The next functions belong to the dma_ops mapping/unmapping code. |
@@ -801,7 +1083,6 @@ static struct dma_ops_domain *find_protection_domain(u16 devid) | |||
801 | list_for_each_entry(entry, &iommu_pd_list, list) { | 1083 | list_for_each_entry(entry, &iommu_pd_list, list) { |
802 | if (entry->target_dev == devid) { | 1084 | if (entry->target_dev == devid) { |
803 | ret = entry; | 1085 | ret = entry; |
804 | list_del(&ret->list); | ||
805 | break; | 1086 | break; |
806 | } | 1087 | } |
807 | } | 1088 | } |
@@ -852,12 +1133,14 @@ static int get_device_resources(struct device *dev, | |||
852 | if (!dma_dom) | 1133 | if (!dma_dom) |
853 | dma_dom = (*iommu)->default_dom; | 1134 | dma_dom = (*iommu)->default_dom; |
854 | *domain = &dma_dom->domain; | 1135 | *domain = &dma_dom->domain; |
855 | set_device_domain(*iommu, *domain, *bdf); | 1136 | attach_device(*iommu, *domain, *bdf); |
856 | printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " | 1137 | printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " |
857 | "device ", (*domain)->id); | 1138 | "device %s\n", (*domain)->id, dev_name(dev)); |
858 | print_devid(_bdf, 1); | ||
859 | } | 1139 | } |
860 | 1140 | ||
1141 | if (domain_for_device(_bdf) == NULL) | ||
1142 | attach_device(*iommu, *domain, _bdf); | ||
1143 | |||
861 | return 1; | 1144 | return 1; |
862 | } | 1145 | } |
863 | 1146 | ||
@@ -908,7 +1191,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, | |||
908 | if (address >= dom->aperture_size) | 1191 | if (address >= dom->aperture_size) |
909 | return; | 1192 | return; |
910 | 1193 | ||
911 | WARN_ON(address & 0xfffULL || address > dom->aperture_size); | 1194 | WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size); |
912 | 1195 | ||
913 | pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; | 1196 | pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; |
914 | pte += IOMMU_PTE_L0_INDEX(address); | 1197 | pte += IOMMU_PTE_L0_INDEX(address); |
@@ -920,8 +1203,8 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, | |||
920 | 1203 | ||
921 | /* | 1204 | /* |
922 | * This function contains common code for mapping of a physically | 1205 | * This function contains common code for mapping of a physically |
923 | * contiguous memory region into DMA address space. It is uses by all | 1206 | * contiguous memory region into DMA address space. It is used by all |
924 | * mapping functions provided by this IOMMU driver. | 1207 | * mapping functions provided with this IOMMU driver. |
925 | * Must be called with the domain lock held. | 1208 | * Must be called with the domain lock held. |
926 | */ | 1209 | */ |
927 | static dma_addr_t __map_single(struct device *dev, | 1210 | static dma_addr_t __map_single(struct device *dev, |
@@ -942,6 +1225,11 @@ static dma_addr_t __map_single(struct device *dev, | |||
942 | pages = iommu_num_pages(paddr, size, PAGE_SIZE); | 1225 | pages = iommu_num_pages(paddr, size, PAGE_SIZE); |
943 | paddr &= PAGE_MASK; | 1226 | paddr &= PAGE_MASK; |
944 | 1227 | ||
1228 | INC_STATS_COUNTER(total_map_requests); | ||
1229 | |||
1230 | if (pages > 1) | ||
1231 | INC_STATS_COUNTER(cross_page); | ||
1232 | |||
945 | if (align) | 1233 | if (align) |
946 | align_mask = (1UL << get_order(size)) - 1; | 1234 | align_mask = (1UL << get_order(size)) - 1; |
947 | 1235 | ||
@@ -958,6 +1246,8 @@ static dma_addr_t __map_single(struct device *dev, | |||
958 | } | 1246 | } |
959 | address += offset; | 1247 | address += offset; |
960 | 1248 | ||
1249 | ADD_STATS_COUNTER(alloced_io_mem, size); | ||
1250 | |||
961 | if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { | 1251 | if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { |
962 | iommu_flush_tlb(iommu, dma_dom->domain.id); | 1252 | iommu_flush_tlb(iommu, dma_dom->domain.id); |
963 | dma_dom->need_flush = false; | 1253 | dma_dom->need_flush = false; |
@@ -981,7 +1271,8 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
981 | dma_addr_t i, start; | 1271 | dma_addr_t i, start; |
982 | unsigned int pages; | 1272 | unsigned int pages; |
983 | 1273 | ||
984 | if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) | 1274 | if ((dma_addr == bad_dma_address) || |
1275 | (dma_addr + size > dma_dom->aperture_size)) | ||
985 | return; | 1276 | return; |
986 | 1277 | ||
987 | pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); | 1278 | pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); |
@@ -993,6 +1284,8 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
993 | start += PAGE_SIZE; | 1284 | start += PAGE_SIZE; |
994 | } | 1285 | } |
995 | 1286 | ||
1287 | SUB_STATS_COUNTER(alloced_io_mem, size); | ||
1288 | |||
996 | dma_ops_free_addresses(dma_dom, dma_addr, pages); | 1289 | dma_ops_free_addresses(dma_dom, dma_addr, pages); |
997 | 1290 | ||
998 | if (amd_iommu_unmap_flush || dma_dom->need_flush) { | 1291 | if (amd_iommu_unmap_flush || dma_dom->need_flush) { |
@@ -1014,6 +1307,8 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, | |||
1014 | dma_addr_t addr; | 1307 | dma_addr_t addr; |
1015 | u64 dma_mask; | 1308 | u64 dma_mask; |
1016 | 1309 | ||
1310 | INC_STATS_COUNTER(cnt_map_single); | ||
1311 | |||
1017 | if (!check_device(dev)) | 1312 | if (!check_device(dev)) |
1018 | return bad_dma_address; | 1313 | return bad_dma_address; |
1019 | 1314 | ||
@@ -1025,14 +1320,16 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, | |||
1025 | /* device not handled by any AMD IOMMU */ | 1320 | /* device not handled by any AMD IOMMU */ |
1026 | return (dma_addr_t)paddr; | 1321 | return (dma_addr_t)paddr; |
1027 | 1322 | ||
1323 | if (!dma_ops_domain(domain)) | ||
1324 | return bad_dma_address; | ||
1325 | |||
1028 | spin_lock_irqsave(&domain->lock, flags); | 1326 | spin_lock_irqsave(&domain->lock, flags); |
1029 | addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, | 1327 | addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, |
1030 | dma_mask); | 1328 | dma_mask); |
1031 | if (addr == bad_dma_address) | 1329 | if (addr == bad_dma_address) |
1032 | goto out; | 1330 | goto out; |
1033 | 1331 | ||
1034 | if (unlikely(iommu->need_sync)) | 1332 | iommu_completion_wait(iommu); |
1035 | iommu_completion_wait(iommu); | ||
1036 | 1333 | ||
1037 | out: | 1334 | out: |
1038 | spin_unlock_irqrestore(&domain->lock, flags); | 1335 | spin_unlock_irqrestore(&domain->lock, flags); |
@@ -1051,17 +1348,21 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
1051 | struct protection_domain *domain; | 1348 | struct protection_domain *domain; |
1052 | u16 devid; | 1349 | u16 devid; |
1053 | 1350 | ||
1351 | INC_STATS_COUNTER(cnt_unmap_single); | ||
1352 | |||
1054 | if (!check_device(dev) || | 1353 | if (!check_device(dev) || |
1055 | !get_device_resources(dev, &iommu, &domain, &devid)) | 1354 | !get_device_resources(dev, &iommu, &domain, &devid)) |
1056 | /* device not handled by any AMD IOMMU */ | 1355 | /* device not handled by any AMD IOMMU */ |
1057 | return; | 1356 | return; |
1058 | 1357 | ||
1358 | if (!dma_ops_domain(domain)) | ||
1359 | return; | ||
1360 | |||
1059 | spin_lock_irqsave(&domain->lock, flags); | 1361 | spin_lock_irqsave(&domain->lock, flags); |
1060 | 1362 | ||
1061 | __unmap_single(iommu, domain->priv, dma_addr, size, dir); | 1363 | __unmap_single(iommu, domain->priv, dma_addr, size, dir); |
1062 | 1364 | ||
1063 | if (unlikely(iommu->need_sync)) | 1365 | iommu_completion_wait(iommu); |
1064 | iommu_completion_wait(iommu); | ||
1065 | 1366 | ||
1066 | spin_unlock_irqrestore(&domain->lock, flags); | 1367 | spin_unlock_irqrestore(&domain->lock, flags); |
1067 | } | 1368 | } |
@@ -1101,6 +1402,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
1101 | int mapped_elems = 0; | 1402 | int mapped_elems = 0; |
1102 | u64 dma_mask; | 1403 | u64 dma_mask; |
1103 | 1404 | ||
1405 | INC_STATS_COUNTER(cnt_map_sg); | ||
1406 | |||
1104 | if (!check_device(dev)) | 1407 | if (!check_device(dev)) |
1105 | return 0; | 1408 | return 0; |
1106 | 1409 | ||
@@ -1111,6 +1414,9 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
1111 | if (!iommu || !domain) | 1414 | if (!iommu || !domain) |
1112 | return map_sg_no_iommu(dev, sglist, nelems, dir); | 1415 | return map_sg_no_iommu(dev, sglist, nelems, dir); |
1113 | 1416 | ||
1417 | if (!dma_ops_domain(domain)) | ||
1418 | return 0; | ||
1419 | |||
1114 | spin_lock_irqsave(&domain->lock, flags); | 1420 | spin_lock_irqsave(&domain->lock, flags); |
1115 | 1421 | ||
1116 | for_each_sg(sglist, s, nelems, i) { | 1422 | for_each_sg(sglist, s, nelems, i) { |
@@ -1127,8 +1433,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
1127 | goto unmap; | 1433 | goto unmap; |
1128 | } | 1434 | } |
1129 | 1435 | ||
1130 | if (unlikely(iommu->need_sync)) | 1436 | iommu_completion_wait(iommu); |
1131 | iommu_completion_wait(iommu); | ||
1132 | 1437 | ||
1133 | out: | 1438 | out: |
1134 | spin_unlock_irqrestore(&domain->lock, flags); | 1439 | spin_unlock_irqrestore(&domain->lock, flags); |
@@ -1161,10 +1466,15 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, | |||
1161 | u16 devid; | 1466 | u16 devid; |
1162 | int i; | 1467 | int i; |
1163 | 1468 | ||
1469 | INC_STATS_COUNTER(cnt_unmap_sg); | ||
1470 | |||
1164 | if (!check_device(dev) || | 1471 | if (!check_device(dev) || |
1165 | !get_device_resources(dev, &iommu, &domain, &devid)) | 1472 | !get_device_resources(dev, &iommu, &domain, &devid)) |
1166 | return; | 1473 | return; |
1167 | 1474 | ||
1475 | if (!dma_ops_domain(domain)) | ||
1476 | return; | ||
1477 | |||
1168 | spin_lock_irqsave(&domain->lock, flags); | 1478 | spin_lock_irqsave(&domain->lock, flags); |
1169 | 1479 | ||
1170 | for_each_sg(sglist, s, nelems, i) { | 1480 | for_each_sg(sglist, s, nelems, i) { |
@@ -1173,8 +1483,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, | |||
1173 | s->dma_address = s->dma_length = 0; | 1483 | s->dma_address = s->dma_length = 0; |
1174 | } | 1484 | } |
1175 | 1485 | ||
1176 | if (unlikely(iommu->need_sync)) | 1486 | iommu_completion_wait(iommu); |
1177 | iommu_completion_wait(iommu); | ||
1178 | 1487 | ||
1179 | spin_unlock_irqrestore(&domain->lock, flags); | 1488 | spin_unlock_irqrestore(&domain->lock, flags); |
1180 | } | 1489 | } |
@@ -1193,6 +1502,8 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
1193 | phys_addr_t paddr; | 1502 | phys_addr_t paddr; |
1194 | u64 dma_mask = dev->coherent_dma_mask; | 1503 | u64 dma_mask = dev->coherent_dma_mask; |
1195 | 1504 | ||
1505 | INC_STATS_COUNTER(cnt_alloc_coherent); | ||
1506 | |||
1196 | if (!check_device(dev)) | 1507 | if (!check_device(dev)) |
1197 | return NULL; | 1508 | return NULL; |
1198 | 1509 | ||
@@ -1211,6 +1522,9 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
1211 | return virt_addr; | 1522 | return virt_addr; |
1212 | } | 1523 | } |
1213 | 1524 | ||
1525 | if (!dma_ops_domain(domain)) | ||
1526 | goto out_free; | ||
1527 | |||
1214 | if (!dma_mask) | 1528 | if (!dma_mask) |
1215 | dma_mask = *dev->dma_mask; | 1529 | dma_mask = *dev->dma_mask; |
1216 | 1530 | ||
@@ -1219,19 +1533,20 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
1219 | *dma_addr = __map_single(dev, iommu, domain->priv, paddr, | 1533 | *dma_addr = __map_single(dev, iommu, domain->priv, paddr, |
1220 | size, DMA_BIDIRECTIONAL, true, dma_mask); | 1534 | size, DMA_BIDIRECTIONAL, true, dma_mask); |
1221 | 1535 | ||
1222 | if (*dma_addr == bad_dma_address) { | 1536 | if (*dma_addr == bad_dma_address) |
1223 | free_pages((unsigned long)virt_addr, get_order(size)); | 1537 | goto out_free; |
1224 | virt_addr = NULL; | ||
1225 | goto out; | ||
1226 | } | ||
1227 | 1538 | ||
1228 | if (unlikely(iommu->need_sync)) | 1539 | iommu_completion_wait(iommu); |
1229 | iommu_completion_wait(iommu); | ||
1230 | 1540 | ||
1231 | out: | ||
1232 | spin_unlock_irqrestore(&domain->lock, flags); | 1541 | spin_unlock_irqrestore(&domain->lock, flags); |
1233 | 1542 | ||
1234 | return virt_addr; | 1543 | return virt_addr; |
1544 | |||
1545 | out_free: | ||
1546 | |||
1547 | free_pages((unsigned long)virt_addr, get_order(size)); | ||
1548 | |||
1549 | return NULL; | ||
1235 | } | 1550 | } |
1236 | 1551 | ||
1237 | /* | 1552 | /* |
@@ -1245,6 +1560,8 @@ static void free_coherent(struct device *dev, size_t size, | |||
1245 | struct protection_domain *domain; | 1560 | struct protection_domain *domain; |
1246 | u16 devid; | 1561 | u16 devid; |
1247 | 1562 | ||
1563 | INC_STATS_COUNTER(cnt_free_coherent); | ||
1564 | |||
1248 | if (!check_device(dev)) | 1565 | if (!check_device(dev)) |
1249 | return; | 1566 | return; |
1250 | 1567 | ||
@@ -1253,12 +1570,14 @@ static void free_coherent(struct device *dev, size_t size, | |||
1253 | if (!iommu || !domain) | 1570 | if (!iommu || !domain) |
1254 | goto free_mem; | 1571 | goto free_mem; |
1255 | 1572 | ||
1573 | if (!dma_ops_domain(domain)) | ||
1574 | goto free_mem; | ||
1575 | |||
1256 | spin_lock_irqsave(&domain->lock, flags); | 1576 | spin_lock_irqsave(&domain->lock, flags); |
1257 | 1577 | ||
1258 | __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); | 1578 | __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); |
1259 | 1579 | ||
1260 | if (unlikely(iommu->need_sync)) | 1580 | iommu_completion_wait(iommu); |
1261 | iommu_completion_wait(iommu); | ||
1262 | 1581 | ||
1263 | spin_unlock_irqrestore(&domain->lock, flags); | 1582 | spin_unlock_irqrestore(&domain->lock, flags); |
1264 | 1583 | ||
@@ -1297,7 +1616,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask) | |||
1297 | * we don't need to preallocate the protection domains anymore. | 1616 | * we don't need to preallocate the protection domains anymore. |
1298 | * For now we have to. | 1617 | * For now we have to. |
1299 | */ | 1618 | */ |
1300 | void prealloc_protection_domains(void) | 1619 | static void prealloc_protection_domains(void) |
1301 | { | 1620 | { |
1302 | struct pci_dev *dev = NULL; | 1621 | struct pci_dev *dev = NULL; |
1303 | struct dma_ops_domain *dma_dom; | 1622 | struct dma_ops_domain *dma_dom; |
@@ -1306,7 +1625,7 @@ void prealloc_protection_domains(void) | |||
1306 | u16 devid; | 1625 | u16 devid; |
1307 | 1626 | ||
1308 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 1627 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { |
1309 | devid = (dev->bus->number << 8) | dev->devfn; | 1628 | devid = calc_devid(dev->bus->number, dev->devfn); |
1310 | if (devid > amd_iommu_last_bdf) | 1629 | if (devid > amd_iommu_last_bdf) |
1311 | continue; | 1630 | continue; |
1312 | devid = amd_iommu_alias_table[devid]; | 1631 | devid = amd_iommu_alias_table[devid]; |
@@ -1353,6 +1672,7 @@ int __init amd_iommu_init_dma_ops(void) | |||
1353 | iommu->default_dom = dma_ops_domain_alloc(iommu, order); | 1672 | iommu->default_dom = dma_ops_domain_alloc(iommu, order); |
1354 | if (iommu->default_dom == NULL) | 1673 | if (iommu->default_dom == NULL) |
1355 | return -ENOMEM; | 1674 | return -ENOMEM; |
1675 | iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; | ||
1356 | ret = iommu_init_unity_mappings(iommu); | 1676 | ret = iommu_init_unity_mappings(iommu); |
1357 | if (ret) | 1677 | if (ret) |
1358 | goto free_domains; | 1678 | goto free_domains; |
@@ -1376,6 +1696,12 @@ int __init amd_iommu_init_dma_ops(void) | |||
1376 | /* Make the driver finally visible to the drivers */ | 1696 | /* Make the driver finally visible to the drivers */ |
1377 | dma_ops = &amd_iommu_dma_ops; | 1697 | dma_ops = &amd_iommu_dma_ops; |
1378 | 1698 | ||
1699 | register_iommu(&amd_iommu_ops); | ||
1700 | |||
1701 | bus_register_notifier(&pci_bus_type, &device_nb); | ||
1702 | |||
1703 | amd_iommu_stats_init(); | ||
1704 | |||
1379 | return 0; | 1705 | return 0; |
1380 | 1706 | ||
1381 | free_domains: | 1707 | free_domains: |
@@ -1387,3 +1713,224 @@ free_domains: | |||
1387 | 1713 | ||
1388 | return ret; | 1714 | return ret; |
1389 | } | 1715 | } |
1716 | |||
1717 | /***************************************************************************** | ||
1718 | * | ||
1719 | * The following functions belong to the exported interface of AMD IOMMU | ||
1720 | * | ||
1721 | * This interface allows access to lower level functions of the IOMMU | ||
1722 | * like protection domain handling and assignement of devices to domains | ||
1723 | * which is not possible with the dma_ops interface. | ||
1724 | * | ||
1725 | *****************************************************************************/ | ||
1726 | |||
1727 | static void cleanup_domain(struct protection_domain *domain) | ||
1728 | { | ||
1729 | unsigned long flags; | ||
1730 | u16 devid; | ||
1731 | |||
1732 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
1733 | |||
1734 | for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) | ||
1735 | if (amd_iommu_pd_table[devid] == domain) | ||
1736 | __detach_device(domain, devid); | ||
1737 | |||
1738 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
1739 | } | ||
1740 | |||
1741 | static int amd_iommu_domain_init(struct iommu_domain *dom) | ||
1742 | { | ||
1743 | struct protection_domain *domain; | ||
1744 | |||
1745 | domain = kzalloc(sizeof(*domain), GFP_KERNEL); | ||
1746 | if (!domain) | ||
1747 | return -ENOMEM; | ||
1748 | |||
1749 | spin_lock_init(&domain->lock); | ||
1750 | domain->mode = PAGE_MODE_3_LEVEL; | ||
1751 | domain->id = domain_id_alloc(); | ||
1752 | if (!domain->id) | ||
1753 | goto out_free; | ||
1754 | domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); | ||
1755 | if (!domain->pt_root) | ||
1756 | goto out_free; | ||
1757 | |||
1758 | dom->priv = domain; | ||
1759 | |||
1760 | return 0; | ||
1761 | |||
1762 | out_free: | ||
1763 | kfree(domain); | ||
1764 | |||
1765 | return -ENOMEM; | ||
1766 | } | ||
1767 | |||
1768 | static void amd_iommu_domain_destroy(struct iommu_domain *dom) | ||
1769 | { | ||
1770 | struct protection_domain *domain = dom->priv; | ||
1771 | |||
1772 | if (!domain) | ||
1773 | return; | ||
1774 | |||
1775 | if (domain->dev_cnt > 0) | ||
1776 | cleanup_domain(domain); | ||
1777 | |||
1778 | BUG_ON(domain->dev_cnt != 0); | ||
1779 | |||
1780 | free_pagetable(domain); | ||
1781 | |||
1782 | domain_id_free(domain->id); | ||
1783 | |||
1784 | kfree(domain); | ||
1785 | |||
1786 | dom->priv = NULL; | ||
1787 | } | ||
1788 | |||
1789 | static void amd_iommu_detach_device(struct iommu_domain *dom, | ||
1790 | struct device *dev) | ||
1791 | { | ||
1792 | struct protection_domain *domain = dom->priv; | ||
1793 | struct amd_iommu *iommu; | ||
1794 | struct pci_dev *pdev; | ||
1795 | u16 devid; | ||
1796 | |||
1797 | if (dev->bus != &pci_bus_type) | ||
1798 | return; | ||
1799 | |||
1800 | pdev = to_pci_dev(dev); | ||
1801 | |||
1802 | devid = calc_devid(pdev->bus->number, pdev->devfn); | ||
1803 | |||
1804 | if (devid > 0) | ||
1805 | detach_device(domain, devid); | ||
1806 | |||
1807 | iommu = amd_iommu_rlookup_table[devid]; | ||
1808 | if (!iommu) | ||
1809 | return; | ||
1810 | |||
1811 | iommu_queue_inv_dev_entry(iommu, devid); | ||
1812 | iommu_completion_wait(iommu); | ||
1813 | } | ||
1814 | |||
1815 | static int amd_iommu_attach_device(struct iommu_domain *dom, | ||
1816 | struct device *dev) | ||
1817 | { | ||
1818 | struct protection_domain *domain = dom->priv; | ||
1819 | struct protection_domain *old_domain; | ||
1820 | struct amd_iommu *iommu; | ||
1821 | struct pci_dev *pdev; | ||
1822 | u16 devid; | ||
1823 | |||
1824 | if (dev->bus != &pci_bus_type) | ||
1825 | return -EINVAL; | ||
1826 | |||
1827 | pdev = to_pci_dev(dev); | ||
1828 | |||
1829 | devid = calc_devid(pdev->bus->number, pdev->devfn); | ||
1830 | |||
1831 | if (devid >= amd_iommu_last_bdf || | ||
1832 | devid != amd_iommu_alias_table[devid]) | ||
1833 | return -EINVAL; | ||
1834 | |||
1835 | iommu = amd_iommu_rlookup_table[devid]; | ||
1836 | if (!iommu) | ||
1837 | return -EINVAL; | ||
1838 | |||
1839 | old_domain = domain_for_device(devid); | ||
1840 | if (old_domain) | ||
1841 | return -EBUSY; | ||
1842 | |||
1843 | attach_device(iommu, domain, devid); | ||
1844 | |||
1845 | iommu_completion_wait(iommu); | ||
1846 | |||
1847 | return 0; | ||
1848 | } | ||
1849 | |||
1850 | static int amd_iommu_map_range(struct iommu_domain *dom, | ||
1851 | unsigned long iova, phys_addr_t paddr, | ||
1852 | size_t size, int iommu_prot) | ||
1853 | { | ||
1854 | struct protection_domain *domain = dom->priv; | ||
1855 | unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE); | ||
1856 | int prot = 0; | ||
1857 | int ret; | ||
1858 | |||
1859 | if (iommu_prot & IOMMU_READ) | ||
1860 | prot |= IOMMU_PROT_IR; | ||
1861 | if (iommu_prot & IOMMU_WRITE) | ||
1862 | prot |= IOMMU_PROT_IW; | ||
1863 | |||
1864 | iova &= PAGE_MASK; | ||
1865 | paddr &= PAGE_MASK; | ||
1866 | |||
1867 | for (i = 0; i < npages; ++i) { | ||
1868 | ret = iommu_map_page(domain, iova, paddr, prot); | ||
1869 | if (ret) | ||
1870 | return ret; | ||
1871 | |||
1872 | iova += PAGE_SIZE; | ||
1873 | paddr += PAGE_SIZE; | ||
1874 | } | ||
1875 | |||
1876 | return 0; | ||
1877 | } | ||
1878 | |||
1879 | static void amd_iommu_unmap_range(struct iommu_domain *dom, | ||
1880 | unsigned long iova, size_t size) | ||
1881 | { | ||
1882 | |||
1883 | struct protection_domain *domain = dom->priv; | ||
1884 | unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE); | ||
1885 | |||
1886 | iova &= PAGE_MASK; | ||
1887 | |||
1888 | for (i = 0; i < npages; ++i) { | ||
1889 | iommu_unmap_page(domain, iova); | ||
1890 | iova += PAGE_SIZE; | ||
1891 | } | ||
1892 | |||
1893 | iommu_flush_domain(domain->id); | ||
1894 | } | ||
1895 | |||
1896 | static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, | ||
1897 | unsigned long iova) | ||
1898 | { | ||
1899 | struct protection_domain *domain = dom->priv; | ||
1900 | unsigned long offset = iova & ~PAGE_MASK; | ||
1901 | phys_addr_t paddr; | ||
1902 | u64 *pte; | ||
1903 | |||
1904 | pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)]; | ||
1905 | |||
1906 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
1907 | return 0; | ||
1908 | |||
1909 | pte = IOMMU_PTE_PAGE(*pte); | ||
1910 | pte = &pte[IOMMU_PTE_L1_INDEX(iova)]; | ||
1911 | |||
1912 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
1913 | return 0; | ||
1914 | |||
1915 | pte = IOMMU_PTE_PAGE(*pte); | ||
1916 | pte = &pte[IOMMU_PTE_L0_INDEX(iova)]; | ||
1917 | |||
1918 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
1919 | return 0; | ||
1920 | |||
1921 | paddr = *pte & IOMMU_PAGE_MASK; | ||
1922 | paddr |= offset; | ||
1923 | |||
1924 | return paddr; | ||
1925 | } | ||
1926 | |||
1927 | static struct iommu_ops amd_iommu_ops = { | ||
1928 | .domain_init = amd_iommu_domain_init, | ||
1929 | .domain_destroy = amd_iommu_domain_destroy, | ||
1930 | .attach_dev = amd_iommu_attach_device, | ||
1931 | .detach_dev = amd_iommu_detach_device, | ||
1932 | .map = amd_iommu_map_range, | ||
1933 | .unmap = amd_iommu_unmap_range, | ||
1934 | .iova_to_phys = amd_iommu_iova_to_phys, | ||
1935 | }; | ||
1936 | |||
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 0cdcda35a05f..42c33cebf00f 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <asm/amd_iommu_types.h> | 28 | #include <asm/amd_iommu_types.h> |
29 | #include <asm/amd_iommu.h> | 29 | #include <asm/amd_iommu.h> |
30 | #include <asm/iommu.h> | 30 | #include <asm/iommu.h> |
31 | #include <asm/gart.h> | ||
31 | 32 | ||
32 | /* | 33 | /* |
33 | * definitions for the ACPI scanning code | 34 | * definitions for the ACPI scanning code |
@@ -121,7 +122,8 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have | |||
121 | LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings | 122 | LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings |
122 | we find in ACPI */ | 123 | we find in ACPI */ |
123 | unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ | 124 | unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ |
124 | int amd_iommu_isolate; /* if 1, device isolation is enabled */ | 125 | bool amd_iommu_isolate = true; /* if true, device isolation is |
126 | enabled */ | ||
125 | bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ | 127 | bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ |
126 | 128 | ||
127 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the | 129 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the |
@@ -242,20 +244,16 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | |||
242 | } | 244 | } |
243 | 245 | ||
244 | /* Function to enable the hardware */ | 246 | /* Function to enable the hardware */ |
245 | void __init iommu_enable(struct amd_iommu *iommu) | 247 | static void __init iommu_enable(struct amd_iommu *iommu) |
246 | { | 248 | { |
247 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " | 249 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n", |
248 | "at %02x:%02x.%x cap 0x%hx\n", | 250 | dev_name(&iommu->dev->dev), iommu->cap_ptr); |
249 | iommu->dev->bus->number, | ||
250 | PCI_SLOT(iommu->dev->devfn), | ||
251 | PCI_FUNC(iommu->dev->devfn), | ||
252 | iommu->cap_ptr); | ||
253 | 251 | ||
254 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); | 252 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); |
255 | } | 253 | } |
256 | 254 | ||
257 | /* Function to enable IOMMU event logging and event interrupts */ | 255 | /* Function to enable IOMMU event logging and event interrupts */ |
258 | void __init iommu_enable_event_logging(struct amd_iommu *iommu) | 256 | static void __init iommu_enable_event_logging(struct amd_iommu *iommu) |
259 | { | 257 | { |
260 | iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); | 258 | iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); |
261 | iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); | 259 | iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); |
@@ -427,6 +425,10 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) | |||
427 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, | 425 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, |
428 | &entry, sizeof(entry)); | 426 | &entry, sizeof(entry)); |
429 | 427 | ||
428 | /* set head and tail to zero manually */ | ||
429 | writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | ||
430 | writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | ||
431 | |||
430 | iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); | 432 | iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); |
431 | 433 | ||
432 | return cmd_buf; | 434 | return cmd_buf; |
@@ -1074,7 +1076,8 @@ int __init amd_iommu_init(void) | |||
1074 | goto free; | 1076 | goto free; |
1075 | 1077 | ||
1076 | /* IOMMU rlookup table - find the IOMMU for a specific device */ | 1078 | /* IOMMU rlookup table - find the IOMMU for a specific device */ |
1077 | amd_iommu_rlookup_table = (void *)__get_free_pages(GFP_KERNEL, | 1079 | amd_iommu_rlookup_table = (void *)__get_free_pages( |
1080 | GFP_KERNEL | __GFP_ZERO, | ||
1078 | get_order(rlookup_table_size)); | 1081 | get_order(rlookup_table_size)); |
1079 | if (amd_iommu_rlookup_table == NULL) | 1082 | if (amd_iommu_rlookup_table == NULL) |
1080 | goto free; | 1083 | goto free; |
@@ -1212,8 +1215,10 @@ static int __init parse_amd_iommu_options(char *str) | |||
1212 | { | 1215 | { |
1213 | for (; *str; ++str) { | 1216 | for (; *str; ++str) { |
1214 | if (strncmp(str, "isolate", 7) == 0) | 1217 | if (strncmp(str, "isolate", 7) == 0) |
1215 | amd_iommu_isolate = 1; | 1218 | amd_iommu_isolate = true; |
1216 | if (strncmp(str, "fullflush", 11) == 0) | 1219 | if (strncmp(str, "share", 5) == 0) |
1220 | amd_iommu_isolate = false; | ||
1221 | if (strncmp(str, "fullflush", 9) == 0) | ||
1217 | amd_iommu_unmap_flush = true; | 1222 | amd_iommu_unmap_flush = true; |
1218 | } | 1223 | } |
1219 | 1224 | ||
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 9a32b37ee2ee..676debfc1702 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -1,8 +1,9 @@ | |||
1 | /* | 1 | /* |
2 | * Firmware replacement code. | 2 | * Firmware replacement code. |
3 | * | 3 | * |
4 | * Work around broken BIOSes that don't set an aperture or only set the | 4 | * Work around broken BIOSes that don't set an aperture, only set the |
5 | * aperture in the AGP bridge. | 5 | * aperture in the AGP bridge, or set too small aperture. |
6 | * | ||
6 | * If all fails map the aperture over some low memory. This is cheaper than | 7 | * If all fails map the aperture over some low memory. This is cheaper than |
7 | * doing bounce buffering. The memory is lost. This is done at early boot | 8 | * doing bounce buffering. The memory is lost. This is done at early boot |
8 | * because only the bootmem allocator can allocate 32+MB. | 9 | * because only the bootmem allocator can allocate 32+MB. |
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 04a7f960bbc0..cf2ca19e62da 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Local APIC handling, local APIC timers | 2 | * Local APIC handling, local APIC timers |
3 | * | 3 | * |
4 | * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com> | 4 | * (c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com> |
5 | * | 5 | * |
6 | * Fixes | 6 | * Fixes |
7 | * Maciej W. Rozycki : Bits for genuine 82489DX APICs; | 7 | * Maciej W. Rozycki : Bits for genuine 82489DX APICs; |
@@ -14,49 +14,71 @@ | |||
14 | * Mikael Pettersson : PM converted to driver model. | 14 | * Mikael Pettersson : PM converted to driver model. |
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <linux/init.h> | ||
18 | |||
19 | #include <linux/mm.h> | ||
20 | #include <linux/delay.h> | ||
21 | #include <linux/bootmem.h> | ||
22 | #include <linux/interrupt.h> | ||
23 | #include <linux/mc146818rtc.h> | ||
24 | #include <linux/kernel_stat.h> | 17 | #include <linux/kernel_stat.h> |
25 | #include <linux/sysdev.h> | 18 | #include <linux/mc146818rtc.h> |
26 | #include <linux/ioport.h> | ||
27 | #include <linux/cpu.h> | ||
28 | #include <linux/clockchips.h> | ||
29 | #include <linux/acpi_pmtmr.h> | 19 | #include <linux/acpi_pmtmr.h> |
20 | #include <linux/clockchips.h> | ||
21 | #include <linux/interrupt.h> | ||
22 | #include <linux/bootmem.h> | ||
23 | #include <linux/ftrace.h> | ||
24 | #include <linux/ioport.h> | ||
30 | #include <linux/module.h> | 25 | #include <linux/module.h> |
31 | #include <linux/dmi.h> | 26 | #include <linux/sysdev.h> |
27 | #include <linux/delay.h> | ||
28 | #include <linux/timex.h> | ||
32 | #include <linux/dmar.h> | 29 | #include <linux/dmar.h> |
30 | #include <linux/init.h> | ||
31 | #include <linux/cpu.h> | ||
32 | #include <linux/dmi.h> | ||
33 | #include <linux/nmi.h> | ||
34 | #include <linux/smp.h> | ||
35 | #include <linux/mm.h> | ||
33 | 36 | ||
34 | #include <asm/atomic.h> | ||
35 | #include <asm/smp.h> | ||
36 | #include <asm/mtrr.h> | ||
37 | #include <asm/mpspec.h> | ||
38 | #include <asm/desc.h> | ||
39 | #include <asm/arch_hooks.h> | 37 | #include <asm/arch_hooks.h> |
40 | #include <asm/hpet.h> | ||
41 | #include <asm/pgalloc.h> | 38 | #include <asm/pgalloc.h> |
39 | #include <asm/genapic.h> | ||
40 | #include <asm/atomic.h> | ||
41 | #include <asm/mpspec.h> | ||
42 | #include <asm/i8253.h> | 42 | #include <asm/i8253.h> |
43 | #include <asm/nmi.h> | 43 | #include <asm/i8259.h> |
44 | #include <asm/idle.h> | ||
45 | #include <asm/proto.h> | 44 | #include <asm/proto.h> |
46 | #include <asm/timex.h> | ||
47 | #include <asm/apic.h> | 45 | #include <asm/apic.h> |
48 | #include <asm/i8259.h> | 46 | #include <asm/desc.h> |
47 | #include <asm/hpet.h> | ||
48 | #include <asm/idle.h> | ||
49 | #include <asm/mtrr.h> | ||
50 | #include <asm/smp.h> | ||
51 | |||
52 | unsigned int num_processors; | ||
53 | |||
54 | unsigned disabled_cpus __cpuinitdata; | ||
55 | |||
56 | /* Processor that is doing the boot up */ | ||
57 | unsigned int boot_cpu_physical_apicid = -1U; | ||
49 | 58 | ||
50 | #include <mach_apic.h> | 59 | /* |
51 | #include <mach_apicdef.h> | 60 | * The highest APIC ID seen during enumeration. |
52 | #include <mach_ipi.h> | 61 | * |
62 | * This determines the messaging protocol we can use: if all APIC IDs | ||
63 | * are in the 0 ... 7 range, then we can use logical addressing which | ||
64 | * has some performance advantages (better broadcasting). | ||
65 | * | ||
66 | * If there's an APIC ID above 8, we use physical addressing. | ||
67 | */ | ||
68 | unsigned int max_physical_apicid; | ||
53 | 69 | ||
54 | /* | 70 | /* |
55 | * Sanity check | 71 | * Bitmask of physically existing CPUs: |
56 | */ | 72 | */ |
57 | #if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) | 73 | physid_mask_t phys_cpu_present_map; |
58 | # error SPURIOUS_APIC_VECTOR definition error | 74 | |
59 | #endif | 75 | /* |
76 | * Map cpu index to physical APIC ID | ||
77 | */ | ||
78 | DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); | ||
79 | DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); | ||
80 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); | ||
81 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | ||
60 | 82 | ||
61 | #ifdef CONFIG_X86_32 | 83 | #ifdef CONFIG_X86_32 |
62 | /* | 84 | /* |
@@ -97,8 +119,8 @@ __setup("apicpmtimer", setup_apicpmtimer); | |||
97 | #ifdef HAVE_X2APIC | 119 | #ifdef HAVE_X2APIC |
98 | int x2apic; | 120 | int x2apic; |
99 | /* x2apic enabled before OS handover */ | 121 | /* x2apic enabled before OS handover */ |
100 | int x2apic_preenabled; | 122 | static int x2apic_preenabled; |
101 | int disable_x2apic; | 123 | static int disable_x2apic; |
102 | static __init int setup_nox2apic(char *str) | 124 | static __init int setup_nox2apic(char *str) |
103 | { | 125 | { |
104 | disable_x2apic = 1; | 126 | disable_x2apic = 1; |
@@ -118,8 +140,6 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); | |||
118 | 140 | ||
119 | int first_system_vector = 0xfe; | 141 | int first_system_vector = 0xfe; |
120 | 142 | ||
121 | char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; | ||
122 | |||
123 | /* | 143 | /* |
124 | * Debug level, exported for io_apic.c | 144 | * Debug level, exported for io_apic.c |
125 | */ | 145 | */ |
@@ -141,7 +161,7 @@ static int lapic_next_event(unsigned long delta, | |||
141 | struct clock_event_device *evt); | 161 | struct clock_event_device *evt); |
142 | static void lapic_timer_setup(enum clock_event_mode mode, | 162 | static void lapic_timer_setup(enum clock_event_mode mode, |
143 | struct clock_event_device *evt); | 163 | struct clock_event_device *evt); |
144 | static void lapic_timer_broadcast(cpumask_t mask); | 164 | static void lapic_timer_broadcast(const struct cpumask *mask); |
145 | static void apic_pm_activate(void); | 165 | static void apic_pm_activate(void); |
146 | 166 | ||
147 | /* | 167 | /* |
@@ -227,7 +247,7 @@ void xapic_icr_write(u32 low, u32 id) | |||
227 | apic_write(APIC_ICR, low); | 247 | apic_write(APIC_ICR, low); |
228 | } | 248 | } |
229 | 249 | ||
230 | u64 xapic_icr_read(void) | 250 | static u64 xapic_icr_read(void) |
231 | { | 251 | { |
232 | u32 icr1, icr2; | 252 | u32 icr1, icr2; |
233 | 253 | ||
@@ -267,7 +287,7 @@ void x2apic_icr_write(u32 low, u32 id) | |||
267 | wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); | 287 | wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); |
268 | } | 288 | } |
269 | 289 | ||
270 | u64 x2apic_icr_read(void) | 290 | static u64 x2apic_icr_read(void) |
271 | { | 291 | { |
272 | unsigned long val; | 292 | unsigned long val; |
273 | 293 | ||
@@ -441,6 +461,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, | |||
441 | v = apic_read(APIC_LVTT); | 461 | v = apic_read(APIC_LVTT); |
442 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | 462 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); |
443 | apic_write(APIC_LVTT, v); | 463 | apic_write(APIC_LVTT, v); |
464 | apic_write(APIC_TMICT, 0xffffffff); | ||
444 | break; | 465 | break; |
445 | case CLOCK_EVT_MODE_RESUME: | 466 | case CLOCK_EVT_MODE_RESUME: |
446 | /* Nothing to do here */ | 467 | /* Nothing to do here */ |
@@ -453,10 +474,10 @@ static void lapic_timer_setup(enum clock_event_mode mode, | |||
453 | /* | 474 | /* |
454 | * Local APIC timer broadcast function | 475 | * Local APIC timer broadcast function |
455 | */ | 476 | */ |
456 | static void lapic_timer_broadcast(cpumask_t mask) | 477 | static void lapic_timer_broadcast(const struct cpumask *mask) |
457 | { | 478 | { |
458 | #ifdef CONFIG_SMP | 479 | #ifdef CONFIG_SMP |
459 | send_IPI_mask(mask, LOCAL_TIMER_VECTOR); | 480 | apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR); |
460 | #endif | 481 | #endif |
461 | } | 482 | } |
462 | 483 | ||
@@ -469,7 +490,7 @@ static void __cpuinit setup_APIC_timer(void) | |||
469 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); | 490 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); |
470 | 491 | ||
471 | memcpy(levt, &lapic_clockevent, sizeof(*levt)); | 492 | memcpy(levt, &lapic_clockevent, sizeof(*levt)); |
472 | levt->cpumask = cpumask_of_cpu(smp_processor_id()); | 493 | levt->cpumask = cpumask_of(smp_processor_id()); |
473 | 494 | ||
474 | clockevents_register_device(levt); | 495 | clockevents_register_device(levt); |
475 | } | 496 | } |
@@ -534,7 +555,8 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) | |||
534 | } | 555 | } |
535 | } | 556 | } |
536 | 557 | ||
537 | static int __init calibrate_by_pmtimer(long deltapm, long *delta) | 558 | static int __init |
559 | calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) | ||
538 | { | 560 | { |
539 | const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; | 561 | const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; |
540 | const long pm_thresh = pm_100ms / 100; | 562 | const long pm_thresh = pm_100ms / 100; |
@@ -545,7 +567,7 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta) | |||
545 | return -1; | 567 | return -1; |
546 | #endif | 568 | #endif |
547 | 569 | ||
548 | apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); | 570 | apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm); |
549 | 571 | ||
550 | /* Check, if the PM timer is available */ | 572 | /* Check, if the PM timer is available */ |
551 | if (!deltapm) | 573 | if (!deltapm) |
@@ -555,19 +577,30 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta) | |||
555 | 577 | ||
556 | if (deltapm > (pm_100ms - pm_thresh) && | 578 | if (deltapm > (pm_100ms - pm_thresh) && |
557 | deltapm < (pm_100ms + pm_thresh)) { | 579 | deltapm < (pm_100ms + pm_thresh)) { |
558 | apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); | 580 | apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n"); |
559 | } else { | 581 | return 0; |
560 | res = (((u64)deltapm) * mult) >> 22; | 582 | } |
561 | do_div(res, 1000000); | 583 | |
562 | printk(KERN_WARNING "APIC calibration not consistent " | 584 | res = (((u64)deltapm) * mult) >> 22; |
563 | "with PM Timer: %ldms instead of 100ms\n", | 585 | do_div(res, 1000000); |
564 | (long)res); | 586 | pr_warning("APIC calibration not consistent " |
565 | /* Correct the lapic counter value */ | 587 | "with PM-Timer: %ldms instead of 100ms\n",(long)res); |
566 | res = (((u64)(*delta)) * pm_100ms); | 588 | |
589 | /* Correct the lapic counter value */ | ||
590 | res = (((u64)(*delta)) * pm_100ms); | ||
591 | do_div(res, deltapm); | ||
592 | pr_info("APIC delta adjusted to PM-Timer: " | ||
593 | "%lu (%ld)\n", (unsigned long)res, *delta); | ||
594 | *delta = (long)res; | ||
595 | |||
596 | /* Correct the tsc counter value */ | ||
597 | if (cpu_has_tsc) { | ||
598 | res = (((u64)(*deltatsc)) * pm_100ms); | ||
567 | do_div(res, deltapm); | 599 | do_div(res, deltapm); |
568 | printk(KERN_INFO "APIC delta adjusted to PM-Timer: " | 600 | apic_printk(APIC_VERBOSE, "TSC delta adjusted to " |
569 | "%lu (%ld)\n", (unsigned long)res, *delta); | 601 | "PM-Timer: %lu (%ld) \n", |
570 | *delta = (long)res; | 602 | (unsigned long)res, *deltatsc); |
603 | *deltatsc = (long)res; | ||
571 | } | 604 | } |
572 | 605 | ||
573 | return 0; | 606 | return 0; |
@@ -578,7 +611,7 @@ static int __init calibrate_APIC_clock(void) | |||
578 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); | 611 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); |
579 | void (*real_handler)(struct clock_event_device *dev); | 612 | void (*real_handler)(struct clock_event_device *dev); |
580 | unsigned long deltaj; | 613 | unsigned long deltaj; |
581 | long delta; | 614 | long delta, deltatsc; |
582 | int pm_referenced = 0; | 615 | int pm_referenced = 0; |
583 | 616 | ||
584 | local_irq_disable(); | 617 | local_irq_disable(); |
@@ -608,9 +641,11 @@ static int __init calibrate_APIC_clock(void) | |||
608 | delta = lapic_cal_t1 - lapic_cal_t2; | 641 | delta = lapic_cal_t1 - lapic_cal_t2; |
609 | apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); | 642 | apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); |
610 | 643 | ||
644 | deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); | ||
645 | |||
611 | /* we trust the PM based calibration if possible */ | 646 | /* we trust the PM based calibration if possible */ |
612 | pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, | 647 | pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, |
613 | &delta); | 648 | &delta, &deltatsc); |
614 | 649 | ||
615 | /* Calculate the scaled math multiplication factor */ | 650 | /* Calculate the scaled math multiplication factor */ |
616 | lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, | 651 | lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, |
@@ -628,11 +663,10 @@ static int __init calibrate_APIC_clock(void) | |||
628 | calibration_result); | 663 | calibration_result); |
629 | 664 | ||
630 | if (cpu_has_tsc) { | 665 | if (cpu_has_tsc) { |
631 | delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); | ||
632 | apic_printk(APIC_VERBOSE, "..... CPU clock speed is " | 666 | apic_printk(APIC_VERBOSE, "..... CPU clock speed is " |
633 | "%ld.%04ld MHz.\n", | 667 | "%ld.%04ld MHz.\n", |
634 | (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), | 668 | (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ), |
635 | (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); | 669 | (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ)); |
636 | } | 670 | } |
637 | 671 | ||
638 | apic_printk(APIC_VERBOSE, "..... host bus clock speed is " | 672 | apic_printk(APIC_VERBOSE, "..... host bus clock speed is " |
@@ -645,8 +679,7 @@ static int __init calibrate_APIC_clock(void) | |||
645 | */ | 679 | */ |
646 | if (calibration_result < (1000000 / HZ)) { | 680 | if (calibration_result < (1000000 / HZ)) { |
647 | local_irq_enable(); | 681 | local_irq_enable(); |
648 | printk(KERN_WARNING | 682 | pr_warning("APIC frequency too slow, disabling apic timer\n"); |
649 | "APIC frequency too slow, disabling apic timer\n"); | ||
650 | return -1; | 683 | return -1; |
651 | } | 684 | } |
652 | 685 | ||
@@ -672,13 +705,9 @@ static int __init calibrate_APIC_clock(void) | |||
672 | while (lapic_cal_loops <= LAPIC_CAL_LOOPS) | 705 | while (lapic_cal_loops <= LAPIC_CAL_LOOPS) |
673 | cpu_relax(); | 706 | cpu_relax(); |
674 | 707 | ||
675 | local_irq_disable(); | ||
676 | |||
677 | /* Stop the lapic timer */ | 708 | /* Stop the lapic timer */ |
678 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); | 709 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); |
679 | 710 | ||
680 | local_irq_enable(); | ||
681 | |||
682 | /* Jiffies delta */ | 711 | /* Jiffies delta */ |
683 | deltaj = lapic_cal_j2 - lapic_cal_j1; | 712 | deltaj = lapic_cal_j2 - lapic_cal_j1; |
684 | apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); | 713 | apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); |
@@ -692,8 +721,7 @@ static int __init calibrate_APIC_clock(void) | |||
692 | local_irq_enable(); | 721 | local_irq_enable(); |
693 | 722 | ||
694 | if (levt->features & CLOCK_EVT_FEAT_DUMMY) { | 723 | if (levt->features & CLOCK_EVT_FEAT_DUMMY) { |
695 | printk(KERN_WARNING | 724 | pr_warning("APIC timer disabled due to verification failure\n"); |
696 | "APIC timer disabled due to verification failure.\n"); | ||
697 | return -1; | 725 | return -1; |
698 | } | 726 | } |
699 | 727 | ||
@@ -714,7 +742,7 @@ void __init setup_boot_APIC_clock(void) | |||
714 | * broadcast mechanism is used. On UP systems simply ignore it. | 742 | * broadcast mechanism is used. On UP systems simply ignore it. |
715 | */ | 743 | */ |
716 | if (disable_apic_timer) { | 744 | if (disable_apic_timer) { |
717 | printk(KERN_INFO "Disabling APIC timer\n"); | 745 | pr_info("Disabling APIC timer\n"); |
718 | /* No broadcast on UP ! */ | 746 | /* No broadcast on UP ! */ |
719 | if (num_possible_cpus() > 1) { | 747 | if (num_possible_cpus() > 1) { |
720 | lapic_clockevent.mult = 1; | 748 | lapic_clockevent.mult = 1; |
@@ -741,7 +769,7 @@ void __init setup_boot_APIC_clock(void) | |||
741 | if (nmi_watchdog != NMI_IO_APIC) | 769 | if (nmi_watchdog != NMI_IO_APIC) |
742 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; | 770 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; |
743 | else | 771 | else |
744 | printk(KERN_WARNING "APIC timer registered as dummy," | 772 | pr_warning("APIC timer registered as dummy," |
745 | " due to nmi_watchdog=%d!\n", nmi_watchdog); | 773 | " due to nmi_watchdog=%d!\n", nmi_watchdog); |
746 | 774 | ||
747 | /* Setup the lapic or request the broadcast */ | 775 | /* Setup the lapic or request the broadcast */ |
@@ -773,8 +801,7 @@ static void local_apic_timer_interrupt(void) | |||
773 | * spurious. | 801 | * spurious. |
774 | */ | 802 | */ |
775 | if (!evt->event_handler) { | 803 | if (!evt->event_handler) { |
776 | printk(KERN_WARNING | 804 | pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu); |
777 | "Spurious LAPIC timer interrupt on cpu %d\n", cpu); | ||
778 | /* Switch it off */ | 805 | /* Switch it off */ |
779 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); | 806 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); |
780 | return; | 807 | return; |
@@ -783,11 +810,7 @@ static void local_apic_timer_interrupt(void) | |||
783 | /* | 810 | /* |
784 | * the NMI deadlock-detector uses this. | 811 | * the NMI deadlock-detector uses this. |
785 | */ | 812 | */ |
786 | #ifdef CONFIG_X86_64 | 813 | inc_irq_stat(apic_timer_irqs); |
787 | add_pda(apic_timer_irqs, 1); | ||
788 | #else | ||
789 | per_cpu(irq_stat, cpu).apic_timer_irqs++; | ||
790 | #endif | ||
791 | 814 | ||
792 | evt->event_handler(evt); | 815 | evt->event_handler(evt); |
793 | } | 816 | } |
@@ -800,7 +823,7 @@ static void local_apic_timer_interrupt(void) | |||
800 | * [ if a single-CPU system runs an SMP kernel then we call the local | 823 | * [ if a single-CPU system runs an SMP kernel then we call the local |
801 | * interrupt as well. Thus we cannot inline the local irq ... ] | 824 | * interrupt as well. Thus we cannot inline the local irq ... ] |
802 | */ | 825 | */ |
803 | void smp_apic_timer_interrupt(struct pt_regs *regs) | 826 | void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) |
804 | { | 827 | { |
805 | struct pt_regs *old_regs = set_irq_regs(regs); | 828 | struct pt_regs *old_regs = set_irq_regs(regs); |
806 | 829 | ||
@@ -814,9 +837,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) | |||
814 | * Besides, if we don't timer interrupts ignore the global | 837 | * Besides, if we don't timer interrupts ignore the global |
815 | * interrupt lock, which is the WrongThing (tm) to do. | 838 | * interrupt lock, which is the WrongThing (tm) to do. |
816 | */ | 839 | */ |
817 | #ifdef CONFIG_X86_64 | ||
818 | exit_idle(); | 840 | exit_idle(); |
819 | #endif | ||
820 | irq_enter(); | 841 | irq_enter(); |
821 | local_apic_timer_interrupt(); | 842 | local_apic_timer_interrupt(); |
822 | irq_exit(); | 843 | irq_exit(); |
@@ -907,6 +928,10 @@ void disable_local_APIC(void) | |||
907 | { | 928 | { |
908 | unsigned int value; | 929 | unsigned int value; |
909 | 930 | ||
931 | /* APIC hasn't been mapped yet */ | ||
932 | if (!apic_phys) | ||
933 | return; | ||
934 | |||
910 | clear_local_APIC(); | 935 | clear_local_APIC(); |
911 | 936 | ||
912 | /* | 937 | /* |
@@ -999,11 +1024,11 @@ int __init verify_local_APIC(void) | |||
999 | */ | 1024 | */ |
1000 | reg0 = apic_read(APIC_ID); | 1025 | reg0 = apic_read(APIC_ID); |
1001 | apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); | 1026 | apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); |
1002 | apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); | 1027 | apic_write(APIC_ID, reg0 ^ apic->apic_id_mask); |
1003 | reg1 = apic_read(APIC_ID); | 1028 | reg1 = apic_read(APIC_ID); |
1004 | apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); | 1029 | apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); |
1005 | apic_write(APIC_ID, reg0); | 1030 | apic_write(APIC_ID, reg0); |
1006 | if (reg1 != (reg0 ^ APIC_ID_MASK)) | 1031 | if (reg1 != (reg0 ^ apic->apic_id_mask)) |
1007 | return 0; | 1032 | return 0; |
1008 | 1033 | ||
1009 | /* | 1034 | /* |
@@ -1093,18 +1118,18 @@ static void __cpuinit lapic_setup_esr(void) | |||
1093 | unsigned int oldvalue, value, maxlvt; | 1118 | unsigned int oldvalue, value, maxlvt; |
1094 | 1119 | ||
1095 | if (!lapic_is_integrated()) { | 1120 | if (!lapic_is_integrated()) { |
1096 | printk(KERN_INFO "No ESR for 82489DX.\n"); | 1121 | pr_info("No ESR for 82489DX.\n"); |
1097 | return; | 1122 | return; |
1098 | } | 1123 | } |
1099 | 1124 | ||
1100 | if (esr_disable) { | 1125 | if (apic->disable_esr) { |
1101 | /* | 1126 | /* |
1102 | * Something untraceable is creating bad interrupts on | 1127 | * Something untraceable is creating bad interrupts on |
1103 | * secondary quads ... for the moment, just leave the | 1128 | * secondary quads ... for the moment, just leave the |
1104 | * ESR disabled - we can't do anything useful with the | 1129 | * ESR disabled - we can't do anything useful with the |
1105 | * errors anyway - mbligh | 1130 | * errors anyway - mbligh |
1106 | */ | 1131 | */ |
1107 | printk(KERN_INFO "Leaving ESR disabled.\n"); | 1132 | pr_info("Leaving ESR disabled.\n"); |
1108 | return; | 1133 | return; |
1109 | } | 1134 | } |
1110 | 1135 | ||
@@ -1138,9 +1163,14 @@ void __cpuinit setup_local_APIC(void) | |||
1138 | unsigned int value; | 1163 | unsigned int value; |
1139 | int i, j; | 1164 | int i, j; |
1140 | 1165 | ||
1166 | if (disable_apic) { | ||
1167 | arch_disable_smp_support(); | ||
1168 | return; | ||
1169 | } | ||
1170 | |||
1141 | #ifdef CONFIG_X86_32 | 1171 | #ifdef CONFIG_X86_32 |
1142 | /* Pound the ESR really hard over the head with a big hammer - mbligh */ | 1172 | /* Pound the ESR really hard over the head with a big hammer - mbligh */ |
1143 | if (lapic_is_integrated() && esr_disable) { | 1173 | if (lapic_is_integrated() && apic->disable_esr) { |
1144 | apic_write(APIC_ESR, 0); | 1174 | apic_write(APIC_ESR, 0); |
1145 | apic_write(APIC_ESR, 0); | 1175 | apic_write(APIC_ESR, 0); |
1146 | apic_write(APIC_ESR, 0); | 1176 | apic_write(APIC_ESR, 0); |
@@ -1154,7 +1184,7 @@ void __cpuinit setup_local_APIC(void) | |||
1154 | * Double-check whether this APIC is really registered. | 1184 | * Double-check whether this APIC is really registered. |
1155 | * This is meaningless in clustered apic mode, so we skip it. | 1185 | * This is meaningless in clustered apic mode, so we skip it. |
1156 | */ | 1186 | */ |
1157 | if (!apic_id_registered()) | 1187 | if (!apic->apic_id_registered()) |
1158 | BUG(); | 1188 | BUG(); |
1159 | 1189 | ||
1160 | /* | 1190 | /* |
@@ -1162,7 +1192,7 @@ void __cpuinit setup_local_APIC(void) | |||
1162 | * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel | 1192 | * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel |
1163 | * document number 292116). So here it goes... | 1193 | * document number 292116). So here it goes... |
1164 | */ | 1194 | */ |
1165 | init_apic_ldr(); | 1195 | apic->init_apic_ldr(); |
1166 | 1196 | ||
1167 | /* | 1197 | /* |
1168 | * Set Task Priority to 'accept all'. We never change this | 1198 | * Set Task Priority to 'accept all'. We never change this |
@@ -1298,7 +1328,7 @@ void check_x2apic(void) | |||
1298 | rdmsr(MSR_IA32_APICBASE, msr, msr2); | 1328 | rdmsr(MSR_IA32_APICBASE, msr, msr2); |
1299 | 1329 | ||
1300 | if (msr & X2APIC_ENABLE) { | 1330 | if (msr & X2APIC_ENABLE) { |
1301 | printk("x2apic enabled by BIOS, switching to x2apic ops\n"); | 1331 | pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); |
1302 | x2apic_preenabled = x2apic = 1; | 1332 | x2apic_preenabled = x2apic = 1; |
1303 | apic_ops = &x2apic_ops; | 1333 | apic_ops = &x2apic_ops; |
1304 | } | 1334 | } |
@@ -1310,12 +1340,12 @@ void enable_x2apic(void) | |||
1310 | 1340 | ||
1311 | rdmsr(MSR_IA32_APICBASE, msr, msr2); | 1341 | rdmsr(MSR_IA32_APICBASE, msr, msr2); |
1312 | if (!(msr & X2APIC_ENABLE)) { | 1342 | if (!(msr & X2APIC_ENABLE)) { |
1313 | printk("Enabling x2apic\n"); | 1343 | pr_info("Enabling x2apic\n"); |
1314 | wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); | 1344 | wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); |
1315 | } | 1345 | } |
1316 | } | 1346 | } |
1317 | 1347 | ||
1318 | void enable_IR_x2apic(void) | 1348 | void __init enable_IR_x2apic(void) |
1319 | { | 1349 | { |
1320 | #ifdef CONFIG_INTR_REMAP | 1350 | #ifdef CONFIG_INTR_REMAP |
1321 | int ret; | 1351 | int ret; |
@@ -1325,9 +1355,8 @@ void enable_IR_x2apic(void) | |||
1325 | return; | 1355 | return; |
1326 | 1356 | ||
1327 | if (!x2apic_preenabled && disable_x2apic) { | 1357 | if (!x2apic_preenabled && disable_x2apic) { |
1328 | printk(KERN_INFO | 1358 | pr_info("Skipped enabling x2apic and Interrupt-remapping " |
1329 | "Skipped enabling x2apic and Interrupt-remapping " | 1359 | "because of nox2apic\n"); |
1330 | "because of nox2apic\n"); | ||
1331 | return; | 1360 | return; |
1332 | } | 1361 | } |
1333 | 1362 | ||
@@ -1335,22 +1364,19 @@ void enable_IR_x2apic(void) | |||
1335 | panic("Bios already enabled x2apic, can't enforce nox2apic"); | 1364 | panic("Bios already enabled x2apic, can't enforce nox2apic"); |
1336 | 1365 | ||
1337 | if (!x2apic_preenabled && skip_ioapic_setup) { | 1366 | if (!x2apic_preenabled && skip_ioapic_setup) { |
1338 | printk(KERN_INFO | 1367 | pr_info("Skipped enabling x2apic and Interrupt-remapping " |
1339 | "Skipped enabling x2apic and Interrupt-remapping " | 1368 | "because of skipping io-apic setup\n"); |
1340 | "because of skipping io-apic setup\n"); | ||
1341 | return; | 1369 | return; |
1342 | } | 1370 | } |
1343 | 1371 | ||
1344 | ret = dmar_table_init(); | 1372 | ret = dmar_table_init(); |
1345 | if (ret) { | 1373 | if (ret) { |
1346 | printk(KERN_INFO | 1374 | pr_info("dmar_table_init() failed with %d:\n", ret); |
1347 | "dmar_table_init() failed with %d:\n", ret); | ||
1348 | 1375 | ||
1349 | if (x2apic_preenabled) | 1376 | if (x2apic_preenabled) |
1350 | panic("x2apic enabled by bios. But IR enabling failed"); | 1377 | panic("x2apic enabled by bios. But IR enabling failed"); |
1351 | else | 1378 | else |
1352 | printk(KERN_INFO | 1379 | pr_info("Not enabling x2apic,Intr-remapping\n"); |
1353 | "Not enabling x2apic,Intr-remapping\n"); | ||
1354 | return; | 1380 | return; |
1355 | } | 1381 | } |
1356 | 1382 | ||
@@ -1359,7 +1385,7 @@ void enable_IR_x2apic(void) | |||
1359 | 1385 | ||
1360 | ret = save_mask_IO_APIC_setup(); | 1386 | ret = save_mask_IO_APIC_setup(); |
1361 | if (ret) { | 1387 | if (ret) { |
1362 | printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret); | 1388 | pr_info("Saving IO-APIC state failed: %d\n", ret); |
1363 | goto end; | 1389 | goto end; |
1364 | } | 1390 | } |
1365 | 1391 | ||
@@ -1394,14 +1420,11 @@ end: | |||
1394 | 1420 | ||
1395 | if (!ret) { | 1421 | if (!ret) { |
1396 | if (!x2apic_preenabled) | 1422 | if (!x2apic_preenabled) |
1397 | printk(KERN_INFO | 1423 | pr_info("Enabled x2apic and interrupt-remapping\n"); |
1398 | "Enabled x2apic and interrupt-remapping\n"); | ||
1399 | else | 1424 | else |
1400 | printk(KERN_INFO | 1425 | pr_info("Enabled Interrupt-remapping\n"); |
1401 | "Enabled Interrupt-remapping\n"); | ||
1402 | } else | 1426 | } else |
1403 | printk(KERN_ERR | 1427 | pr_err("Failed to enable Interrupt-remapping and x2apic\n"); |
1404 | "Failed to enable Interrupt-remapping and x2apic\n"); | ||
1405 | #else | 1428 | #else |
1406 | if (!cpu_has_x2apic) | 1429 | if (!cpu_has_x2apic) |
1407 | return; | 1430 | return; |
@@ -1410,8 +1433,8 @@ end: | |||
1410 | panic("x2apic enabled prior OS handover," | 1433 | panic("x2apic enabled prior OS handover," |
1411 | " enable CONFIG_INTR_REMAP"); | 1434 | " enable CONFIG_INTR_REMAP"); |
1412 | 1435 | ||
1413 | printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping " | 1436 | pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping " |
1414 | " and x2apic\n"); | 1437 | " and x2apic\n"); |
1415 | #endif | 1438 | #endif |
1416 | 1439 | ||
1417 | return; | 1440 | return; |
@@ -1428,7 +1451,7 @@ end: | |||
1428 | static int __init detect_init_APIC(void) | 1451 | static int __init detect_init_APIC(void) |
1429 | { | 1452 | { |
1430 | if (!cpu_has_apic) { | 1453 | if (!cpu_has_apic) { |
1431 | printk(KERN_INFO "No local APIC present\n"); | 1454 | pr_info("No local APIC present\n"); |
1432 | return -1; | 1455 | return -1; |
1433 | } | 1456 | } |
1434 | 1457 | ||
@@ -1451,7 +1474,7 @@ static int __init detect_init_APIC(void) | |||
1451 | switch (boot_cpu_data.x86_vendor) { | 1474 | switch (boot_cpu_data.x86_vendor) { |
1452 | case X86_VENDOR_AMD: | 1475 | case X86_VENDOR_AMD: |
1453 | if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || | 1476 | if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || |
1454 | (boot_cpu_data.x86 == 15)) | 1477 | (boot_cpu_data.x86 >= 15)) |
1455 | break; | 1478 | break; |
1456 | goto no_apic; | 1479 | goto no_apic; |
1457 | case X86_VENDOR_INTEL: | 1480 | case X86_VENDOR_INTEL: |
@@ -1469,8 +1492,8 @@ static int __init detect_init_APIC(void) | |||
1469 | * "lapic" specified. | 1492 | * "lapic" specified. |
1470 | */ | 1493 | */ |
1471 | if (!force_enable_local_apic) { | 1494 | if (!force_enable_local_apic) { |
1472 | printk(KERN_INFO "Local APIC disabled by BIOS -- " | 1495 | pr_info("Local APIC disabled by BIOS -- " |
1473 | "you can enable it with \"lapic\"\n"); | 1496 | "you can enable it with \"lapic\"\n"); |
1474 | return -1; | 1497 | return -1; |
1475 | } | 1498 | } |
1476 | /* | 1499 | /* |
@@ -1480,8 +1503,7 @@ static int __init detect_init_APIC(void) | |||
1480 | */ | 1503 | */ |
1481 | rdmsr(MSR_IA32_APICBASE, l, h); | 1504 | rdmsr(MSR_IA32_APICBASE, l, h); |
1482 | if (!(l & MSR_IA32_APICBASE_ENABLE)) { | 1505 | if (!(l & MSR_IA32_APICBASE_ENABLE)) { |
1483 | printk(KERN_INFO | 1506 | pr_info("Local APIC disabled by BIOS -- reenabling.\n"); |
1484 | "Local APIC disabled by BIOS -- reenabling.\n"); | ||
1485 | l &= ~MSR_IA32_APICBASE_BASE; | 1507 | l &= ~MSR_IA32_APICBASE_BASE; |
1486 | l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; | 1508 | l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; |
1487 | wrmsr(MSR_IA32_APICBASE, l, h); | 1509 | wrmsr(MSR_IA32_APICBASE, l, h); |
@@ -1494,7 +1516,7 @@ static int __init detect_init_APIC(void) | |||
1494 | */ | 1516 | */ |
1495 | features = cpuid_edx(1); | 1517 | features = cpuid_edx(1); |
1496 | if (!(features & (1 << X86_FEATURE_APIC))) { | 1518 | if (!(features & (1 << X86_FEATURE_APIC))) { |
1497 | printk(KERN_WARNING "Could not enable APIC!\n"); | 1519 | pr_warning("Could not enable APIC!\n"); |
1498 | return -1; | 1520 | return -1; |
1499 | } | 1521 | } |
1500 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | 1522 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); |
@@ -1505,14 +1527,14 @@ static int __init detect_init_APIC(void) | |||
1505 | if (l & MSR_IA32_APICBASE_ENABLE) | 1527 | if (l & MSR_IA32_APICBASE_ENABLE) |
1506 | mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; | 1528 | mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; |
1507 | 1529 | ||
1508 | printk(KERN_INFO "Found and enabled local APIC!\n"); | 1530 | pr_info("Found and enabled local APIC!\n"); |
1509 | 1531 | ||
1510 | apic_pm_activate(); | 1532 | apic_pm_activate(); |
1511 | 1533 | ||
1512 | return 0; | 1534 | return 0; |
1513 | 1535 | ||
1514 | no_apic: | 1536 | no_apic: |
1515 | printk(KERN_INFO "No local APIC present or hardware disabled\n"); | 1537 | pr_info("No local APIC present or hardware disabled\n"); |
1516 | return -1; | 1538 | return -1; |
1517 | } | 1539 | } |
1518 | #endif | 1540 | #endif |
@@ -1586,14 +1608,14 @@ int apic_version[MAX_APICS]; | |||
1586 | 1608 | ||
1587 | int __init APIC_init_uniprocessor(void) | 1609 | int __init APIC_init_uniprocessor(void) |
1588 | { | 1610 | { |
1589 | #ifdef CONFIG_X86_64 | ||
1590 | if (disable_apic) { | 1611 | if (disable_apic) { |
1591 | printk(KERN_INFO "Apic disabled\n"); | 1612 | pr_info("Apic disabled\n"); |
1592 | return -1; | 1613 | return -1; |
1593 | } | 1614 | } |
1615 | #ifdef CONFIG_X86_64 | ||
1594 | if (!cpu_has_apic) { | 1616 | if (!cpu_has_apic) { |
1595 | disable_apic = 1; | 1617 | disable_apic = 1; |
1596 | printk(KERN_INFO "Apic disabled by BIOS\n"); | 1618 | pr_info("Apic disabled by BIOS\n"); |
1597 | return -1; | 1619 | return -1; |
1598 | } | 1620 | } |
1599 | #else | 1621 | #else |
@@ -1605,8 +1627,8 @@ int __init APIC_init_uniprocessor(void) | |||
1605 | */ | 1627 | */ |
1606 | if (!cpu_has_apic && | 1628 | if (!cpu_has_apic && |
1607 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { | 1629 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { |
1608 | printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n", | 1630 | pr_err("BIOS bug, local APIC 0x%x not detected!...\n", |
1609 | boot_cpu_physical_apicid); | 1631 | boot_cpu_physical_apicid); |
1610 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | 1632 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); |
1611 | return -1; | 1633 | return -1; |
1612 | } | 1634 | } |
@@ -1616,7 +1638,7 @@ int __init APIC_init_uniprocessor(void) | |||
1616 | enable_IR_x2apic(); | 1638 | enable_IR_x2apic(); |
1617 | #endif | 1639 | #endif |
1618 | #ifdef CONFIG_X86_64 | 1640 | #ifdef CONFIG_X86_64 |
1619 | setup_apic_routing(); | 1641 | default_setup_apic_routing(); |
1620 | #endif | 1642 | #endif |
1621 | 1643 | ||
1622 | verify_local_APIC(); | 1644 | verify_local_APIC(); |
@@ -1682,9 +1704,7 @@ void smp_spurious_interrupt(struct pt_regs *regs) | |||
1682 | { | 1704 | { |
1683 | u32 v; | 1705 | u32 v; |
1684 | 1706 | ||
1685 | #ifdef CONFIG_X86_64 | ||
1686 | exit_idle(); | 1707 | exit_idle(); |
1687 | #endif | ||
1688 | irq_enter(); | 1708 | irq_enter(); |
1689 | /* | 1709 | /* |
1690 | * Check if this really is a spurious interrupt and ACK it | 1710 | * Check if this really is a spurious interrupt and ACK it |
@@ -1695,14 +1715,11 @@ void smp_spurious_interrupt(struct pt_regs *regs) | |||
1695 | if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) | 1715 | if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) |
1696 | ack_APIC_irq(); | 1716 | ack_APIC_irq(); |
1697 | 1717 | ||
1698 | #ifdef CONFIG_X86_64 | 1718 | inc_irq_stat(irq_spurious_count); |
1699 | add_pda(irq_spurious_count, 1); | 1719 | |
1700 | #else | ||
1701 | /* see sw-dev-man vol 3, chapter 7.4.13.5 */ | 1720 | /* see sw-dev-man vol 3, chapter 7.4.13.5 */ |
1702 | printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " | 1721 | pr_info("spurious APIC interrupt on CPU#%d, " |
1703 | "should never happen.\n", smp_processor_id()); | 1722 | "should never happen.\n", smp_processor_id()); |
1704 | __get_cpu_var(irq_stat).irq_spurious_count++; | ||
1705 | #endif | ||
1706 | irq_exit(); | 1723 | irq_exit(); |
1707 | } | 1724 | } |
1708 | 1725 | ||
@@ -1713,9 +1730,7 @@ void smp_error_interrupt(struct pt_regs *regs) | |||
1713 | { | 1730 | { |
1714 | u32 v, v1; | 1731 | u32 v, v1; |
1715 | 1732 | ||
1716 | #ifdef CONFIG_X86_64 | ||
1717 | exit_idle(); | 1733 | exit_idle(); |
1718 | #endif | ||
1719 | irq_enter(); | 1734 | irq_enter(); |
1720 | /* First tickle the hardware, only then report what went on. -- REW */ | 1735 | /* First tickle the hardware, only then report what went on. -- REW */ |
1721 | v = apic_read(APIC_ESR); | 1736 | v = apic_read(APIC_ESR); |
@@ -1724,17 +1739,18 @@ void smp_error_interrupt(struct pt_regs *regs) | |||
1724 | ack_APIC_irq(); | 1739 | ack_APIC_irq(); |
1725 | atomic_inc(&irq_err_count); | 1740 | atomic_inc(&irq_err_count); |
1726 | 1741 | ||
1727 | /* Here is what the APIC error bits mean: | 1742 | /* |
1728 | 0: Send CS error | 1743 | * Here is what the APIC error bits mean: |
1729 | 1: Receive CS error | 1744 | * 0: Send CS error |
1730 | 2: Send accept error | 1745 | * 1: Receive CS error |
1731 | 3: Receive accept error | 1746 | * 2: Send accept error |
1732 | 4: Reserved | 1747 | * 3: Receive accept error |
1733 | 5: Send illegal vector | 1748 | * 4: Reserved |
1734 | 6: Received illegal vector | 1749 | * 5: Send illegal vector |
1735 | 7: Illegal register address | 1750 | * 6: Received illegal vector |
1736 | */ | 1751 | * 7: Illegal register address |
1737 | printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", | 1752 | */ |
1753 | pr_debug("APIC error on CPU%d: %02x(%02x)\n", | ||
1738 | smp_processor_id(), v , v1); | 1754 | smp_processor_id(), v , v1); |
1739 | irq_exit(); | 1755 | irq_exit(); |
1740 | } | 1756 | } |
@@ -1760,7 +1776,8 @@ void __init connect_bsp_APIC(void) | |||
1760 | outb(0x01, 0x23); | 1776 | outb(0x01, 0x23); |
1761 | } | 1777 | } |
1762 | #endif | 1778 | #endif |
1763 | enable_apic_mode(); | 1779 | if (apic->enable_apic_mode) |
1780 | apic->enable_apic_mode(); | ||
1764 | } | 1781 | } |
1765 | 1782 | ||
1766 | /** | 1783 | /** |
@@ -1832,28 +1849,37 @@ void disconnect_bsp_APIC(int virt_wire_setup) | |||
1832 | void __cpuinit generic_processor_info(int apicid, int version) | 1849 | void __cpuinit generic_processor_info(int apicid, int version) |
1833 | { | 1850 | { |
1834 | int cpu; | 1851 | int cpu; |
1835 | cpumask_t tmp_map; | ||
1836 | 1852 | ||
1837 | /* | 1853 | /* |
1838 | * Validate version | 1854 | * Validate version |
1839 | */ | 1855 | */ |
1840 | if (version == 0x0) { | 1856 | if (version == 0x0) { |
1841 | printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " | 1857 | pr_warning("BIOS bug, APIC version is 0 for CPU#%d! " |
1842 | "fixing up to 0x10. (tell your hw vendor)\n", | 1858 | "fixing up to 0x10. (tell your hw vendor)\n", |
1843 | version); | 1859 | version); |
1844 | version = 0x10; | 1860 | version = 0x10; |
1845 | } | 1861 | } |
1846 | apic_version[apicid] = version; | 1862 | apic_version[apicid] = version; |
1847 | 1863 | ||
1848 | if (num_processors >= NR_CPUS) { | 1864 | if (num_processors >= nr_cpu_ids) { |
1849 | printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." | 1865 | int max = nr_cpu_ids; |
1850 | " Processor ignored.\n", NR_CPUS); | 1866 | int thiscpu = max + disabled_cpus; |
1867 | |||
1868 | pr_warning( | ||
1869 | "ACPI: NR_CPUS/possible_cpus limit of %i reached." | ||
1870 | " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); | ||
1871 | |||
1872 | disabled_cpus++; | ||
1851 | return; | 1873 | return; |
1852 | } | 1874 | } |
1853 | 1875 | ||
1854 | num_processors++; | 1876 | num_processors++; |
1855 | cpus_complement(tmp_map, cpu_present_map); | 1877 | cpu = cpumask_next_zero(-1, cpu_present_mask); |
1856 | cpu = first_cpu(tmp_map); | 1878 | |
1879 | if (version != apic_version[boot_cpu_physical_apicid]) | ||
1880 | WARN_ONCE(1, | ||
1881 | "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n", | ||
1882 | apic_version[boot_cpu_physical_apicid], cpu, version); | ||
1857 | 1883 | ||
1858 | physid_set(apicid, phys_cpu_present_map); | 1884 | physid_set(apicid, phys_cpu_present_map); |
1859 | if (apicid == boot_cpu_physical_apicid) { | 1885 | if (apicid == boot_cpu_physical_apicid) { |
@@ -1889,29 +1915,39 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1889 | } | 1915 | } |
1890 | #endif | 1916 | #endif |
1891 | 1917 | ||
1892 | #if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) | 1918 | #if defined(CONFIG_SMP) || defined(CONFIG_X86_64) |
1893 | /* are we being called early in kernel startup? */ | 1919 | early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; |
1894 | if (early_per_cpu_ptr(x86_cpu_to_apicid)) { | 1920 | early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; |
1895 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); | ||
1896 | u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); | ||
1897 | |||
1898 | cpu_to_apicid[cpu] = apicid; | ||
1899 | bios_cpu_apicid[cpu] = apicid; | ||
1900 | } else { | ||
1901 | per_cpu(x86_cpu_to_apicid, cpu) = apicid; | ||
1902 | per_cpu(x86_bios_cpu_apicid, cpu) = apicid; | ||
1903 | } | ||
1904 | #endif | 1921 | #endif |
1905 | 1922 | ||
1906 | cpu_set(cpu, cpu_possible_map); | 1923 | set_cpu_possible(cpu, true); |
1907 | cpu_set(cpu, cpu_present_map); | 1924 | set_cpu_present(cpu, true); |
1908 | } | 1925 | } |
1909 | 1926 | ||
1910 | #ifdef CONFIG_X86_64 | ||
1911 | int hard_smp_processor_id(void) | 1927 | int hard_smp_processor_id(void) |
1912 | { | 1928 | { |
1913 | return read_apic_id(); | 1929 | return read_apic_id(); |
1914 | } | 1930 | } |
1931 | |||
1932 | void default_init_apic_ldr(void) | ||
1933 | { | ||
1934 | unsigned long val; | ||
1935 | |||
1936 | apic_write(APIC_DFR, APIC_DFR_VALUE); | ||
1937 | val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; | ||
1938 | val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); | ||
1939 | apic_write(APIC_LDR, val); | ||
1940 | } | ||
1941 | |||
1942 | #ifdef CONFIG_X86_32 | ||
1943 | int default_apicid_to_node(int logical_apicid) | ||
1944 | { | ||
1945 | #ifdef CONFIG_SMP | ||
1946 | return apicid_2_node[hard_smp_processor_id()]; | ||
1947 | #else | ||
1948 | return 0; | ||
1949 | #endif | ||
1950 | } | ||
1915 | #endif | 1951 | #endif |
1916 | 1952 | ||
1917 | /* | 1953 | /* |
@@ -2106,18 +2142,16 @@ __cpuinit int apic_is_clustered_box(void) | |||
2106 | bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); | 2142 | bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); |
2107 | bitmap_zero(clustermap, NUM_APIC_CLUSTERS); | 2143 | bitmap_zero(clustermap, NUM_APIC_CLUSTERS); |
2108 | 2144 | ||
2109 | for (i = 0; i < NR_CPUS; i++) { | 2145 | for (i = 0; i < nr_cpu_ids; i++) { |
2110 | /* are we being called early in kernel startup? */ | 2146 | /* are we being called early in kernel startup? */ |
2111 | if (bios_cpu_apicid) { | 2147 | if (bios_cpu_apicid) { |
2112 | id = bios_cpu_apicid[i]; | 2148 | id = bios_cpu_apicid[i]; |
2113 | } | 2149 | } else if (i < nr_cpu_ids) { |
2114 | else if (i < nr_cpu_ids) { | ||
2115 | if (cpu_present(i)) | 2150 | if (cpu_present(i)) |
2116 | id = per_cpu(x86_bios_cpu_apicid, i); | 2151 | id = per_cpu(x86_bios_cpu_apicid, i); |
2117 | else | 2152 | else |
2118 | continue; | 2153 | continue; |
2119 | } | 2154 | } else |
2120 | else | ||
2121 | break; | 2155 | break; |
2122 | 2156 | ||
2123 | if (id != BAD_APICID) | 2157 | if (id != BAD_APICID) |
@@ -2209,7 +2243,7 @@ static int __init apic_set_verbosity(char *arg) | |||
2209 | else if (strcmp("verbose", arg) == 0) | 2243 | else if (strcmp("verbose", arg) == 0) |
2210 | apic_verbosity = APIC_VERBOSE; | 2244 | apic_verbosity = APIC_VERBOSE; |
2211 | else { | 2245 | else { |
2212 | printk(KERN_WARNING "APIC Verbosity level %s not recognised" | 2246 | pr_warning("APIC Verbosity level %s not recognised" |
2213 | " use apic=verbose or apic=debug\n", arg); | 2247 | " use apic=verbose or apic=debug\n", arg); |
2214 | return -EINVAL; | 2248 | return -EINVAL; |
2215 | } | 2249 | } |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 5145a6e72bbb..37ba5f85b718 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -160,9 +160,9 @@ | |||
160 | * Work around byte swap bug in one of the Vaio's BIOS's | 160 | * Work around byte swap bug in one of the Vaio's BIOS's |
161 | * (Marc Boucher <marc@mbsi.ca>). | 161 | * (Marc Boucher <marc@mbsi.ca>). |
162 | * Exposed the disable flag to dmi so that we can handle known | 162 | * Exposed the disable flag to dmi so that we can handle known |
163 | * broken APM (Alan Cox <alan@redhat.com>). | 163 | * broken APM (Alan Cox <alan@lxorguk.ukuu.org.uk>). |
164 | * 1.14ac: If the BIOS says "I slowed the CPU down" then don't spin | 164 | * 1.14ac: If the BIOS says "I slowed the CPU down" then don't spin |
165 | * calling it - instead idle. (Alan Cox <alan@redhat.com>) | 165 | * calling it - instead idle. (Alan Cox <alan@lxorguk.ukuu.org.uk>) |
166 | * If an APM idle fails log it and idle sensibly | 166 | * If an APM idle fails log it and idle sensibly |
167 | * 1.15: Don't queue events to clients who open the device O_WRONLY. | 167 | * 1.15: Don't queue events to clients who open the device O_WRONLY. |
168 | * Don't expect replies from clients who open the device O_RDONLY. | 168 | * Don't expect replies from clients who open the device O_RDONLY. |
@@ -301,7 +301,7 @@ extern int (*console_blank_hook)(int); | |||
301 | */ | 301 | */ |
302 | #define APM_ZERO_SEGS | 302 | #define APM_ZERO_SEGS |
303 | 303 | ||
304 | #include "apm.h" | 304 | #include <asm/apm.h> |
305 | 305 | ||
306 | /* | 306 | /* |
307 | * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend. | 307 | * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend. |
@@ -391,11 +391,7 @@ static int power_off; | |||
391 | #else | 391 | #else |
392 | static int power_off = 1; | 392 | static int power_off = 1; |
393 | #endif | 393 | #endif |
394 | #ifdef CONFIG_APM_REAL_MODE_POWER_OFF | ||
395 | static int realmode_power_off = 1; | ||
396 | #else | ||
397 | static int realmode_power_off; | 394 | static int realmode_power_off; |
398 | #endif | ||
399 | #ifdef CONFIG_APM_ALLOW_INTS | 395 | #ifdef CONFIG_APM_ALLOW_INTS |
400 | static int allow_ints = 1; | 396 | static int allow_ints = 1; |
401 | #else | 397 | #else |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 6649d09ad88f..fbf2f33e3080 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -11,7 +11,7 @@ | |||
11 | #include <linux/suspend.h> | 11 | #include <linux/suspend.h> |
12 | #include <linux/kbuild.h> | 12 | #include <linux/kbuild.h> |
13 | #include <asm/ucontext.h> | 13 | #include <asm/ucontext.h> |
14 | #include "sigframe.h" | 14 | #include <asm/sigframe.h> |
15 | #include <asm/pgtable.h> | 15 | #include <asm/pgtable.h> |
16 | #include <asm/fixmap.h> | 16 | #include <asm/fixmap.h> |
17 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
@@ -75,6 +75,7 @@ void foo(void) | |||
75 | OFFSET(PT_DS, pt_regs, ds); | 75 | OFFSET(PT_DS, pt_regs, ds); |
76 | OFFSET(PT_ES, pt_regs, es); | 76 | OFFSET(PT_ES, pt_regs, es); |
77 | OFFSET(PT_FS, pt_regs, fs); | 77 | OFFSET(PT_FS, pt_regs, fs); |
78 | OFFSET(PT_GS, pt_regs, gs); | ||
78 | OFFSET(PT_ORIG_EAX, pt_regs, orig_ax); | 79 | OFFSET(PT_ORIG_EAX, pt_regs, orig_ax); |
79 | OFFSET(PT_EIP, pt_regs, ip); | 80 | OFFSET(PT_EIP, pt_regs, ip); |
80 | OFFSET(PT_CS, pt_regs, cs); | 81 | OFFSET(PT_CS, pt_regs, cs); |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 7fcf63d22f8b..8793ab33e2c1 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -11,7 +11,6 @@ | |||
11 | #include <linux/hardirq.h> | 11 | #include <linux/hardirq.h> |
12 | #include <linux/suspend.h> | 12 | #include <linux/suspend.h> |
13 | #include <linux/kbuild.h> | 13 | #include <linux/kbuild.h> |
14 | #include <asm/pda.h> | ||
15 | #include <asm/processor.h> | 14 | #include <asm/processor.h> |
16 | #include <asm/segment.h> | 15 | #include <asm/segment.h> |
17 | #include <asm/thread_info.h> | 16 | #include <asm/thread_info.h> |
@@ -20,6 +19,8 @@ | |||
20 | 19 | ||
21 | #include <xen/interface/xen.h> | 20 | #include <xen/interface/xen.h> |
22 | 21 | ||
22 | #include <asm/sigframe.h> | ||
23 | |||
23 | #define __NO_STUBS 1 | 24 | #define __NO_STUBS 1 |
24 | #undef __SYSCALL | 25 | #undef __SYSCALL |
25 | #undef _ASM_X86_UNISTD_64_H | 26 | #undef _ASM_X86_UNISTD_64_H |
@@ -46,16 +47,6 @@ int main(void) | |||
46 | #endif | 47 | #endif |
47 | BLANK(); | 48 | BLANK(); |
48 | #undef ENTRY | 49 | #undef ENTRY |
49 | #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) | ||
50 | ENTRY(kernelstack); | ||
51 | ENTRY(oldrsp); | ||
52 | ENTRY(pcurrent); | ||
53 | ENTRY(irqcount); | ||
54 | ENTRY(cpunumber); | ||
55 | ENTRY(irqstackptr); | ||
56 | ENTRY(data_offset); | ||
57 | BLANK(); | ||
58 | #undef ENTRY | ||
59 | #ifdef CONFIG_PARAVIRT | 50 | #ifdef CONFIG_PARAVIRT |
60 | BLANK(); | 51 | BLANK(); |
61 | OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); | 52 | OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); |
@@ -87,7 +78,7 @@ int main(void) | |||
87 | BLANK(); | 78 | BLANK(); |
88 | #undef ENTRY | 79 | #undef ENTRY |
89 | DEFINE(IA32_RT_SIGFRAME_sigcontext, | 80 | DEFINE(IA32_RT_SIGFRAME_sigcontext, |
90 | offsetof (struct rt_sigframe32, uc.uc_mcontext)); | 81 | offsetof (struct rt_sigframe_ia32, uc.uc_mcontext)); |
91 | BLANK(); | 82 | BLANK(); |
92 | #endif | 83 | #endif |
93 | DEFINE(pbe_address, offsetof(struct pbe, address)); | 84 | DEFINE(pbe_address, offsetof(struct pbe, address)); |
diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c new file mode 100644 index 000000000000..47a62f46afdb --- /dev/null +++ b/arch/x86/kernel/bigsmp_32.c | |||
@@ -0,0 +1,266 @@ | |||
1 | /* | ||
2 | * APIC driver for "bigsmp" XAPIC machines with more than 8 virtual CPUs. | ||
3 | * Drives the local APIC in "clustered mode". | ||
4 | */ | ||
5 | #define APIC_DEFINITION 1 | ||
6 | #include <linux/threads.h> | ||
7 | #include <linux/cpumask.h> | ||
8 | #include <asm/mpspec.h> | ||
9 | #include <asm/genapic.h> | ||
10 | #include <asm/fixmap.h> | ||
11 | #include <asm/apicdef.h> | ||
12 | #include <asm/ipi.h> | ||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/dmi.h> | ||
16 | #include <linux/smp.h> | ||
17 | |||
18 | |||
19 | static inline unsigned bigsmp_get_apic_id(unsigned long x) | ||
20 | { | ||
21 | return (x >> 24) & 0xFF; | ||
22 | } | ||
23 | |||
24 | #define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) | ||
25 | |||
26 | static inline int bigsmp_apic_id_registered(void) | ||
27 | { | ||
28 | return 1; | ||
29 | } | ||
30 | |||
31 | static inline const cpumask_t *bigsmp_target_cpus(void) | ||
32 | { | ||
33 | #ifdef CONFIG_SMP | ||
34 | return &cpu_online_map; | ||
35 | #else | ||
36 | return &cpumask_of_cpu(0); | ||
37 | #endif | ||
38 | } | ||
39 | |||
40 | #define APIC_DFR_VALUE (APIC_DFR_FLAT) | ||
41 | |||
42 | static inline unsigned long | ||
43 | bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) | ||
44 | { | ||
45 | return 0; | ||
46 | } | ||
47 | |||
48 | static inline unsigned long bigsmp_check_apicid_present(int bit) | ||
49 | { | ||
50 | return 1; | ||
51 | } | ||
52 | |||
53 | static inline unsigned long calculate_ldr(int cpu) | ||
54 | { | ||
55 | unsigned long val, id; | ||
56 | val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; | ||
57 | id = xapic_phys_to_log_apicid(cpu); | ||
58 | val |= SET_APIC_LOGICAL_ID(id); | ||
59 | return val; | ||
60 | } | ||
61 | |||
62 | /* | ||
63 | * Set up the logical destination ID. | ||
64 | * | ||
65 | * Intel recommends to set DFR, LDR and TPR before enabling | ||
66 | * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel | ||
67 | * document number 292116). So here it goes... | ||
68 | */ | ||
69 | static inline void bigsmp_init_apic_ldr(void) | ||
70 | { | ||
71 | unsigned long val; | ||
72 | int cpu = smp_processor_id(); | ||
73 | |||
74 | apic_write(APIC_DFR, APIC_DFR_VALUE); | ||
75 | val = calculate_ldr(cpu); | ||
76 | apic_write(APIC_LDR, val); | ||
77 | } | ||
78 | |||
79 | static inline void bigsmp_setup_apic_routing(void) | ||
80 | { | ||
81 | printk("Enabling APIC mode: %s. Using %d I/O APICs\n", | ||
82 | "Physflat", nr_ioapics); | ||
83 | } | ||
84 | |||
85 | static inline int bigsmp_apicid_to_node(int logical_apicid) | ||
86 | { | ||
87 | return apicid_2_node[hard_smp_processor_id()]; | ||
88 | } | ||
89 | |||
90 | static inline int bigsmp_cpu_present_to_apicid(int mps_cpu) | ||
91 | { | ||
92 | if (mps_cpu < nr_cpu_ids) | ||
93 | return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); | ||
94 | |||
95 | return BAD_APICID; | ||
96 | } | ||
97 | |||
98 | static inline physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) | ||
99 | { | ||
100 | return physid_mask_of_physid(phys_apicid); | ||
101 | } | ||
102 | |||
103 | extern u8 cpu_2_logical_apicid[]; | ||
104 | /* Mapping from cpu number to logical apicid */ | ||
105 | static inline int bigsmp_cpu_to_logical_apicid(int cpu) | ||
106 | { | ||
107 | if (cpu >= nr_cpu_ids) | ||
108 | return BAD_APICID; | ||
109 | return cpu_physical_id(cpu); | ||
110 | } | ||
111 | |||
112 | static inline physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) | ||
113 | { | ||
114 | /* For clustered we don't have a good way to do this yet - hack */ | ||
115 | return physids_promote(0xFFL); | ||
116 | } | ||
117 | |||
118 | static inline void bigsmp_setup_portio_remap(void) | ||
119 | { | ||
120 | } | ||
121 | |||
122 | static inline int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid) | ||
123 | { | ||
124 | return 1; | ||
125 | } | ||
126 | |||
127 | /* As we are using single CPU as destination, pick only one CPU here */ | ||
128 | static inline unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask) | ||
129 | { | ||
130 | return bigsmp_cpu_to_logical_apicid(first_cpu(*cpumask)); | ||
131 | } | ||
132 | |||
133 | static inline unsigned int | ||
134 | bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
135 | const struct cpumask *andmask) | ||
136 | { | ||
137 | int cpu; | ||
138 | |||
139 | /* | ||
140 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
141 | * May as well be the first. | ||
142 | */ | ||
143 | for_each_cpu_and(cpu, cpumask, andmask) { | ||
144 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
145 | break; | ||
146 | } | ||
147 | if (cpu < nr_cpu_ids) | ||
148 | return bigsmp_cpu_to_logical_apicid(cpu); | ||
149 | |||
150 | return BAD_APICID; | ||
151 | } | ||
152 | |||
153 | static inline int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) | ||
154 | { | ||
155 | return cpuid_apic >> index_msb; | ||
156 | } | ||
157 | |||
158 | static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector) | ||
159 | { | ||
160 | default_send_IPI_mask_sequence_phys(mask, vector); | ||
161 | } | ||
162 | |||
163 | static inline void bigsmp_send_IPI_allbutself(int vector) | ||
164 | { | ||
165 | default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); | ||
166 | } | ||
167 | |||
168 | static inline void bigsmp_send_IPI_all(int vector) | ||
169 | { | ||
170 | bigsmp_send_IPI_mask(cpu_online_mask, vector); | ||
171 | } | ||
172 | |||
173 | static int dmi_bigsmp; /* can be set by dmi scanners */ | ||
174 | |||
175 | static int hp_ht_bigsmp(const struct dmi_system_id *d) | ||
176 | { | ||
177 | printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident); | ||
178 | dmi_bigsmp = 1; | ||
179 | return 0; | ||
180 | } | ||
181 | |||
182 | |||
183 | static const struct dmi_system_id bigsmp_dmi_table[] = { | ||
184 | { hp_ht_bigsmp, "HP ProLiant DL760 G2", | ||
185 | { DMI_MATCH(DMI_BIOS_VENDOR, "HP"), | ||
186 | DMI_MATCH(DMI_BIOS_VERSION, "P44-"),} | ||
187 | }, | ||
188 | |||
189 | { hp_ht_bigsmp, "HP ProLiant DL740", | ||
190 | { DMI_MATCH(DMI_BIOS_VENDOR, "HP"), | ||
191 | DMI_MATCH(DMI_BIOS_VERSION, "P47-"),} | ||
192 | }, | ||
193 | { } | ||
194 | }; | ||
195 | |||
196 | static void bigsmp_vector_allocation_domain(int cpu, cpumask_t *retmask) | ||
197 | { | ||
198 | cpus_clear(*retmask); | ||
199 | cpu_set(cpu, *retmask); | ||
200 | } | ||
201 | |||
202 | static int probe_bigsmp(void) | ||
203 | { | ||
204 | if (def_to_bigsmp) | ||
205 | dmi_bigsmp = 1; | ||
206 | else | ||
207 | dmi_check_system(bigsmp_dmi_table); | ||
208 | return dmi_bigsmp; | ||
209 | } | ||
210 | |||
211 | struct genapic apic_bigsmp = { | ||
212 | |||
213 | .name = "bigsmp", | ||
214 | .probe = probe_bigsmp, | ||
215 | .acpi_madt_oem_check = NULL, | ||
216 | .apic_id_registered = bigsmp_apic_id_registered, | ||
217 | |||
218 | .irq_delivery_mode = dest_Fixed, | ||
219 | /* phys delivery to target CPU: */ | ||
220 | .irq_dest_mode = 0, | ||
221 | |||
222 | .target_cpus = bigsmp_target_cpus, | ||
223 | .disable_esr = 1, | ||
224 | .dest_logical = 0, | ||
225 | .check_apicid_used = bigsmp_check_apicid_used, | ||
226 | .check_apicid_present = bigsmp_check_apicid_present, | ||
227 | |||
228 | .vector_allocation_domain = bigsmp_vector_allocation_domain, | ||
229 | .init_apic_ldr = bigsmp_init_apic_ldr, | ||
230 | |||
231 | .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, | ||
232 | .setup_apic_routing = bigsmp_setup_apic_routing, | ||
233 | .multi_timer_check = NULL, | ||
234 | .apicid_to_node = bigsmp_apicid_to_node, | ||
235 | .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, | ||
236 | .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, | ||
237 | .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present, | ||
238 | .setup_portio_remap = NULL, | ||
239 | .check_phys_apicid_present = bigsmp_check_phys_apicid_present, | ||
240 | .enable_apic_mode = NULL, | ||
241 | .phys_pkg_id = bigsmp_phys_pkg_id, | ||
242 | .mps_oem_check = NULL, | ||
243 | |||
244 | .get_apic_id = bigsmp_get_apic_id, | ||
245 | .set_apic_id = NULL, | ||
246 | .apic_id_mask = 0xFF << 24, | ||
247 | |||
248 | .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid, | ||
249 | .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and, | ||
250 | |||
251 | .send_IPI_mask = bigsmp_send_IPI_mask, | ||
252 | .send_IPI_mask_allbutself = NULL, | ||
253 | .send_IPI_allbutself = bigsmp_send_IPI_allbutself, | ||
254 | .send_IPI_all = bigsmp_send_IPI_all, | ||
255 | .send_IPI_self = default_send_IPI_self, | ||
256 | |||
257 | .wakeup_cpu = NULL, | ||
258 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
259 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
260 | |||
261 | .wait_for_init_deassert = default_wait_for_init_deassert, | ||
262 | |||
263 | .smp_callin_clear_local_apic = NULL, | ||
264 | .store_NMI_vector = NULL, | ||
265 | .inquire_remote_apic = default_inquire_remote_apic, | ||
266 | }; | ||
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index f0dfe6f17e7e..f63882728d91 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c | |||
@@ -25,7 +25,7 @@ | |||
25 | #include <asm/uv/bios.h> | 25 | #include <asm/uv/bios.h> |
26 | #include <asm/uv/uv_hub.h> | 26 | #include <asm/uv/uv_hub.h> |
27 | 27 | ||
28 | struct uv_systab uv_systab; | 28 | static struct uv_systab uv_systab; |
29 | 29 | ||
30 | s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) | 30 | s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) |
31 | { | 31 | { |
@@ -69,10 +69,10 @@ s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, | |||
69 | 69 | ||
70 | long sn_partition_id; | 70 | long sn_partition_id; |
71 | EXPORT_SYMBOL_GPL(sn_partition_id); | 71 | EXPORT_SYMBOL_GPL(sn_partition_id); |
72 | long uv_coherency_id; | 72 | long sn_coherency_id; |
73 | EXPORT_SYMBOL_GPL(uv_coherency_id); | 73 | EXPORT_SYMBOL_GPL(sn_coherency_id); |
74 | long uv_region_size; | 74 | long sn_region_size; |
75 | EXPORT_SYMBOL_GPL(uv_region_size); | 75 | EXPORT_SYMBOL_GPL(sn_region_size); |
76 | int uv_type; | 76 | int uv_type; |
77 | 77 | ||
78 | 78 | ||
@@ -100,6 +100,56 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, | |||
100 | return ret; | 100 | return ret; |
101 | } | 101 | } |
102 | 102 | ||
103 | int | ||
104 | uv_bios_mq_watchlist_alloc(int blade, unsigned long addr, unsigned int mq_size, | ||
105 | unsigned long *intr_mmr_offset) | ||
106 | { | ||
107 | union uv_watchlist_u size_blade; | ||
108 | u64 watchlist; | ||
109 | s64 ret; | ||
110 | |||
111 | size_blade.size = mq_size; | ||
112 | size_blade.blade = blade; | ||
113 | |||
114 | /* | ||
115 | * bios returns watchlist number or negative error number. | ||
116 | */ | ||
117 | ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr, | ||
118 | size_blade.val, (u64)intr_mmr_offset, | ||
119 | (u64)&watchlist, 0); | ||
120 | if (ret < BIOS_STATUS_SUCCESS) | ||
121 | return ret; | ||
122 | |||
123 | return watchlist; | ||
124 | } | ||
125 | EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_alloc); | ||
126 | |||
127 | int | ||
128 | uv_bios_mq_watchlist_free(int blade, int watchlist_num) | ||
129 | { | ||
130 | return (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_FREE, | ||
131 | blade, watchlist_num, 0, 0, 0); | ||
132 | } | ||
133 | EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free); | ||
134 | |||
135 | s64 | ||
136 | uv_bios_change_memprotect(u64 paddr, u64 len, enum uv_memprotect perms) | ||
137 | { | ||
138 | return uv_bios_call_irqsave(UV_BIOS_MEMPROTECT, paddr, len, | ||
139 | perms, 0, 0); | ||
140 | } | ||
141 | EXPORT_SYMBOL_GPL(uv_bios_change_memprotect); | ||
142 | |||
143 | s64 | ||
144 | uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len) | ||
145 | { | ||
146 | s64 ret; | ||
147 | |||
148 | ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie, | ||
149 | (u64)addr, buf, (u64)len, 0); | ||
150 | return ret; | ||
151 | } | ||
152 | EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa); | ||
103 | 153 | ||
104 | s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) | 154 | s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) |
105 | { | 155 | { |
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c new file mode 100644 index 000000000000..2ac0ab71412a --- /dev/null +++ b/arch/x86/kernel/check.c | |||
@@ -0,0 +1,161 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/sched.h> | ||
3 | #include <linux/kthread.h> | ||
4 | #include <linux/workqueue.h> | ||
5 | #include <asm/e820.h> | ||
6 | #include <asm/proto.h> | ||
7 | |||
8 | /* | ||
9 | * Some BIOSes seem to corrupt the low 64k of memory during events | ||
10 | * like suspend/resume and unplugging an HDMI cable. Reserve all | ||
11 | * remaining free memory in that area and fill it with a distinct | ||
12 | * pattern. | ||
13 | */ | ||
14 | #define MAX_SCAN_AREAS 8 | ||
15 | |||
16 | static int __read_mostly memory_corruption_check = -1; | ||
17 | |||
18 | static unsigned __read_mostly corruption_check_size = 64*1024; | ||
19 | static unsigned __read_mostly corruption_check_period = 60; /* seconds */ | ||
20 | |||
21 | static struct e820entry scan_areas[MAX_SCAN_AREAS]; | ||
22 | static int num_scan_areas; | ||
23 | |||
24 | |||
25 | static __init int set_corruption_check(char *arg) | ||
26 | { | ||
27 | char *end; | ||
28 | |||
29 | memory_corruption_check = simple_strtol(arg, &end, 10); | ||
30 | |||
31 | return (*end == 0) ? 0 : -EINVAL; | ||
32 | } | ||
33 | early_param("memory_corruption_check", set_corruption_check); | ||
34 | |||
35 | static __init int set_corruption_check_period(char *arg) | ||
36 | { | ||
37 | char *end; | ||
38 | |||
39 | corruption_check_period = simple_strtoul(arg, &end, 10); | ||
40 | |||
41 | return (*end == 0) ? 0 : -EINVAL; | ||
42 | } | ||
43 | early_param("memory_corruption_check_period", set_corruption_check_period); | ||
44 | |||
45 | static __init int set_corruption_check_size(char *arg) | ||
46 | { | ||
47 | char *end; | ||
48 | unsigned size; | ||
49 | |||
50 | size = memparse(arg, &end); | ||
51 | |||
52 | if (*end == '\0') | ||
53 | corruption_check_size = size; | ||
54 | |||
55 | return (size == corruption_check_size) ? 0 : -EINVAL; | ||
56 | } | ||
57 | early_param("memory_corruption_check_size", set_corruption_check_size); | ||
58 | |||
59 | |||
60 | void __init setup_bios_corruption_check(void) | ||
61 | { | ||
62 | u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ | ||
63 | |||
64 | if (memory_corruption_check == -1) { | ||
65 | memory_corruption_check = | ||
66 | #ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK | ||
67 | 1 | ||
68 | #else | ||
69 | 0 | ||
70 | #endif | ||
71 | ; | ||
72 | } | ||
73 | |||
74 | if (corruption_check_size == 0) | ||
75 | memory_corruption_check = 0; | ||
76 | |||
77 | if (!memory_corruption_check) | ||
78 | return; | ||
79 | |||
80 | corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); | ||
81 | |||
82 | while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { | ||
83 | u64 size; | ||
84 | addr = find_e820_area_size(addr, &size, PAGE_SIZE); | ||
85 | |||
86 | if (addr == 0) | ||
87 | break; | ||
88 | |||
89 | if ((addr + size) > corruption_check_size) | ||
90 | size = corruption_check_size - addr; | ||
91 | |||
92 | if (size == 0) | ||
93 | break; | ||
94 | |||
95 | e820_update_range(addr, size, E820_RAM, E820_RESERVED); | ||
96 | scan_areas[num_scan_areas].addr = addr; | ||
97 | scan_areas[num_scan_areas].size = size; | ||
98 | num_scan_areas++; | ||
99 | |||
100 | /* Assume we've already mapped this early memory */ | ||
101 | memset(__va(addr), 0, size); | ||
102 | |||
103 | addr += size; | ||
104 | } | ||
105 | |||
106 | printk(KERN_INFO "Scanning %d areas for low memory corruption\n", | ||
107 | num_scan_areas); | ||
108 | update_e820(); | ||
109 | } | ||
110 | |||
111 | |||
112 | void check_for_bios_corruption(void) | ||
113 | { | ||
114 | int i; | ||
115 | int corruption = 0; | ||
116 | |||
117 | if (!memory_corruption_check) | ||
118 | return; | ||
119 | |||
120 | for (i = 0; i < num_scan_areas; i++) { | ||
121 | unsigned long *addr = __va(scan_areas[i].addr); | ||
122 | unsigned long size = scan_areas[i].size; | ||
123 | |||
124 | for (; size; addr++, size -= sizeof(unsigned long)) { | ||
125 | if (!*addr) | ||
126 | continue; | ||
127 | printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n", | ||
128 | addr, __pa(addr), *addr); | ||
129 | corruption = 1; | ||
130 | *addr = 0; | ||
131 | } | ||
132 | } | ||
133 | |||
134 | WARN_ONCE(corruption, KERN_ERR "Memory corruption detected in low memory\n"); | ||
135 | } | ||
136 | |||
137 | static void check_corruption(struct work_struct *dummy); | ||
138 | static DECLARE_DELAYED_WORK(bios_check_work, check_corruption); | ||
139 | |||
140 | static void check_corruption(struct work_struct *dummy) | ||
141 | { | ||
142 | check_for_bios_corruption(); | ||
143 | schedule_delayed_work(&bios_check_work, | ||
144 | round_jiffies_relative(corruption_check_period*HZ)); | ||
145 | } | ||
146 | |||
147 | static int start_periodic_check_for_corruption(void) | ||
148 | { | ||
149 | if (!memory_corruption_check || corruption_check_period == 0) | ||
150 | return 0; | ||
151 | |||
152 | printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", | ||
153 | corruption_check_period); | ||
154 | |||
155 | /* First time we run the checks right away */ | ||
156 | schedule_delayed_work(&bios_check_work, 0); | ||
157 | return 0; | ||
158 | } | ||
159 | |||
160 | module_init(start_periodic_check_for_corruption); | ||
161 | |||
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 82ec6075c057..82db7f45e2de 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -2,8 +2,14 @@ | |||
2 | # Makefile for x86-compatible CPU details and quirks | 2 | # Makefile for x86-compatible CPU details and quirks |
3 | # | 3 | # |
4 | 4 | ||
5 | # Don't trace early stages of a secondary CPU boot | ||
6 | ifdef CONFIG_FUNCTION_TRACER | ||
7 | CFLAGS_REMOVE_common.o = -pg | ||
8 | endif | ||
9 | |||
5 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 10 | obj-y := intel_cacheinfo.o addon_cpuid_features.o |
6 | obj-y += proc.o capflags.o powerflags.o common.o | 11 | obj-y += proc.o capflags.o powerflags.o common.o |
12 | obj-y += vmware.o hypervisor.o | ||
7 | 13 | ||
8 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o | 14 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o |
9 | obj-$(CONFIG_X86_64) += bugs_64.o | 15 | obj-$(CONFIG_X86_64) += bugs_64.o |
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index ef8f831af823..e48640cfac0c 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c | |||
@@ -7,7 +7,7 @@ | |||
7 | #include <asm/pat.h> | 7 | #include <asm/pat.h> |
8 | #include <asm/processor.h> | 8 | #include <asm/processor.h> |
9 | 9 | ||
10 | #include <mach_apic.h> | 10 | #include <asm/genapic.h> |
11 | 11 | ||
12 | struct cpuid_bit { | 12 | struct cpuid_bit { |
13 | u16 feature; | 13 | u16 feature; |
@@ -69,7 +69,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | |||
69 | */ | 69 | */ |
70 | void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) | 70 | void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) |
71 | { | 71 | { |
72 | #ifdef CONFIG_X86_SMP | 72 | #ifdef CONFIG_SMP |
73 | unsigned int eax, ebx, ecx, edx, sub_index; | 73 | unsigned int eax, ebx, ecx, edx, sub_index; |
74 | unsigned int ht_mask_width, core_plus_mask_width; | 74 | unsigned int ht_mask_width, core_plus_mask_width; |
75 | unsigned int core_select_mask, core_level_siblings; | 75 | unsigned int core_select_mask, core_level_siblings; |
@@ -116,14 +116,14 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) | |||
116 | 116 | ||
117 | core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; | 117 | core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; |
118 | 118 | ||
119 | #ifdef CONFIG_X86_32 | 119 | c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width) |
120 | c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) | ||
121 | & core_select_mask; | 120 | & core_select_mask; |
122 | c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); | 121 | c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width); |
123 | #else | 122 | /* |
124 | c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask; | 123 | * Reinit the apicid, now that we have extended initial_apicid. |
125 | c->phys_proc_id = phys_pkg_id(core_plus_mask_width); | 124 | */ |
126 | #endif | 125 | c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); |
126 | |||
127 | c->x86_max_cores = (core_level_siblings / smp_num_siblings); | 127 | c->x86_max_cores = (core_level_siblings / smp_num_siblings); |
128 | 128 | ||
129 | 129 | ||
@@ -135,37 +135,3 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) | |||
135 | return; | 135 | return; |
136 | #endif | 136 | #endif |
137 | } | 137 | } |
138 | |||
139 | #ifdef CONFIG_X86_PAT | ||
140 | void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) | ||
141 | { | ||
142 | if (!cpu_has_pat) | ||
143 | pat_disable("PAT not supported by CPU."); | ||
144 | |||
145 | switch (c->x86_vendor) { | ||
146 | case X86_VENDOR_INTEL: | ||
147 | /* | ||
148 | * There is a known erratum on Pentium III and Core Solo | ||
149 | * and Core Duo CPUs. | ||
150 | * " Page with PAT set to WC while associated MTRR is UC | ||
151 | * may consolidate to UC " | ||
152 | * Because of this erratum, it is better to stick with | ||
153 | * setting WC in MTRR rather than using PAT on these CPUs. | ||
154 | * | ||
155 | * Enable PAT WC only on P4, Core 2 or later CPUs. | ||
156 | */ | ||
157 | if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15)) | ||
158 | return; | ||
159 | |||
160 | pat_disable("PAT WC disabled due to known CPU erratum."); | ||
161 | return; | ||
162 | |||
163 | case X86_VENDOR_AMD: | ||
164 | case X86_VENDOR_CENTAUR: | ||
165 | case X86_VENDOR_TRANSMETA: | ||
166 | return; | ||
167 | } | ||
168 | |||
169 | pat_disable("PAT disabled. Not yet verified on this CPU type."); | ||
170 | } | ||
171 | #endif | ||
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8f1e31db2ad5..ff4d7b9e32e4 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -12,7 +12,7 @@ | |||
12 | # include <asm/cacheflush.h> | 12 | # include <asm/cacheflush.h> |
13 | #endif | 13 | #endif |
14 | 14 | ||
15 | #include <mach_apic.h> | 15 | #include <asm/genapic.h> |
16 | 16 | ||
17 | #include "cpu.h" | 17 | #include "cpu.h" |
18 | 18 | ||
@@ -283,9 +283,14 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
283 | { | 283 | { |
284 | early_init_amd_mc(c); | 284 | early_init_amd_mc(c); |
285 | 285 | ||
286 | /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ | 286 | /* |
287 | if (c->x86_power & (1<<8)) | 287 | * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate |
288 | * with P/T states and does not stop in deep C-states | ||
289 | */ | ||
290 | if (c->x86_power & (1 << 8)) { | ||
288 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 291 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
292 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | ||
293 | } | ||
289 | 294 | ||
290 | #ifdef CONFIG_X86_64 | 295 | #ifdef CONFIG_X86_64 |
291 | set_cpu_cap(c, X86_FEATURE_SYSCALL32); | 296 | set_cpu_cap(c, X86_FEATURE_SYSCALL32); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b9c9ea0217a9..e8f4a386bd9d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -21,14 +21,16 @@ | |||
21 | #include <asm/asm.h> | 21 | #include <asm/asm.h> |
22 | #include <asm/numa.h> | 22 | #include <asm/numa.h> |
23 | #include <asm/smp.h> | 23 | #include <asm/smp.h> |
24 | #include <asm/cpu.h> | ||
25 | #include <asm/cpumask.h> | ||
24 | #ifdef CONFIG_X86_LOCAL_APIC | 26 | #ifdef CONFIG_X86_LOCAL_APIC |
25 | #include <asm/mpspec.h> | 27 | #include <asm/mpspec.h> |
26 | #include <asm/apic.h> | 28 | #include <asm/apic.h> |
27 | #include <mach_apic.h> | ||
28 | #include <asm/genapic.h> | 29 | #include <asm/genapic.h> |
30 | #include <asm/genapic.h> | ||
31 | #include <asm/uv/uv.h> | ||
29 | #endif | 32 | #endif |
30 | 33 | ||
31 | #include <asm/pda.h> | ||
32 | #include <asm/pgtable.h> | 34 | #include <asm/pgtable.h> |
33 | #include <asm/processor.h> | 35 | #include <asm/processor.h> |
34 | #include <asm/desc.h> | 36 | #include <asm/desc.h> |
@@ -36,28 +38,59 @@ | |||
36 | #include <asm/proto.h> | 38 | #include <asm/proto.h> |
37 | #include <asm/sections.h> | 39 | #include <asm/sections.h> |
38 | #include <asm/setup.h> | 40 | #include <asm/setup.h> |
41 | #include <asm/hypervisor.h> | ||
42 | #include <asm/stackprotector.h> | ||
39 | 43 | ||
40 | #include "cpu.h" | 44 | #include "cpu.h" |
41 | 45 | ||
46 | #ifdef CONFIG_X86_64 | ||
47 | |||
48 | /* all of these masks are initialized in setup_cpu_local_masks() */ | ||
49 | cpumask_var_t cpu_callin_mask; | ||
50 | cpumask_var_t cpu_callout_mask; | ||
51 | cpumask_var_t cpu_initialized_mask; | ||
52 | |||
53 | /* representing cpus for which sibling maps can be computed */ | ||
54 | cpumask_var_t cpu_sibling_setup_mask; | ||
55 | |||
56 | /* correctly size the local cpu masks */ | ||
57 | void __init setup_cpu_local_masks(void) | ||
58 | { | ||
59 | alloc_bootmem_cpumask_var(&cpu_initialized_mask); | ||
60 | alloc_bootmem_cpumask_var(&cpu_callin_mask); | ||
61 | alloc_bootmem_cpumask_var(&cpu_callout_mask); | ||
62 | alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); | ||
63 | } | ||
64 | |||
65 | #else /* CONFIG_X86_32 */ | ||
66 | |||
67 | cpumask_t cpu_callin_map; | ||
68 | cpumask_t cpu_callout_map; | ||
69 | cpumask_t cpu_initialized; | ||
70 | cpumask_t cpu_sibling_setup_map; | ||
71 | |||
72 | #endif /* CONFIG_X86_32 */ | ||
73 | |||
74 | |||
42 | static struct cpu_dev *this_cpu __cpuinitdata; | 75 | static struct cpu_dev *this_cpu __cpuinitdata; |
43 | 76 | ||
77 | DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { | ||
44 | #ifdef CONFIG_X86_64 | 78 | #ifdef CONFIG_X86_64 |
45 | /* We need valid kernel segments for data and code in long mode too | 79 | /* |
46 | * IRET will check the segment types kkeil 2000/10/28 | 80 | * We need valid kernel segments for data and code in long mode too |
47 | * Also sysret mandates a special GDT layout | 81 | * IRET will check the segment types kkeil 2000/10/28 |
48 | */ | 82 | * Also sysret mandates a special GDT layout |
49 | /* The TLS descriptors are currently at a different place compared to i386. | 83 | * |
50 | Hopefully nobody expects them at a fixed place (Wine?) */ | 84 | * The TLS descriptors are currently at a different place compared to i386. |
51 | DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { | 85 | * Hopefully nobody expects them at a fixed place (Wine?) |
86 | */ | ||
52 | [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, | 87 | [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, |
53 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, | 88 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, |
54 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, | 89 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, |
55 | [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, | 90 | [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, |
56 | [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, | 91 | [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, |
57 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, | 92 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, |
58 | } }; | ||
59 | #else | 93 | #else |
60 | DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { | ||
61 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, | 94 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, |
62 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, | 95 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, |
63 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, | 96 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, |
@@ -89,9 +122,10 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { | |||
89 | [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, | 122 | [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, |
90 | 123 | ||
91 | [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, | 124 | [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, |
92 | [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, | 125 | [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, |
93 | } }; | 126 | GDT_STACK_CANARY_INIT |
94 | #endif | 127 | #endif |
128 | } }; | ||
95 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); | 129 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); |
96 | 130 | ||
97 | #ifdef CONFIG_X86_32 | 131 | #ifdef CONFIG_X86_32 |
@@ -192,6 +226,49 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) | |||
192 | #endif | 226 | #endif |
193 | 227 | ||
194 | /* | 228 | /* |
229 | * Some CPU features depend on higher CPUID levels, which may not always | ||
230 | * be available due to CPUID level capping or broken virtualization | ||
231 | * software. Add those features to this table to auto-disable them. | ||
232 | */ | ||
233 | struct cpuid_dependent_feature { | ||
234 | u32 feature; | ||
235 | u32 level; | ||
236 | }; | ||
237 | static const struct cpuid_dependent_feature __cpuinitconst | ||
238 | cpuid_dependent_features[] = { | ||
239 | { X86_FEATURE_MWAIT, 0x00000005 }, | ||
240 | { X86_FEATURE_DCA, 0x00000009 }, | ||
241 | { X86_FEATURE_XSAVE, 0x0000000d }, | ||
242 | { 0, 0 } | ||
243 | }; | ||
244 | |||
245 | static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) | ||
246 | { | ||
247 | const struct cpuid_dependent_feature *df; | ||
248 | for (df = cpuid_dependent_features; df->feature; df++) { | ||
249 | /* | ||
250 | * Note: cpuid_level is set to -1 if unavailable, but | ||
251 | * extended_extended_level is set to 0 if unavailable | ||
252 | * and the legitimate extended levels are all negative | ||
253 | * when signed; hence the weird messing around with | ||
254 | * signs here... | ||
255 | */ | ||
256 | if (cpu_has(c, df->feature) && | ||
257 | ((s32)df->feature < 0 ? | ||
258 | (u32)df->feature > (u32)c->extended_cpuid_level : | ||
259 | (s32)df->feature > (s32)c->cpuid_level)) { | ||
260 | clear_cpu_cap(c, df->feature); | ||
261 | if (warn) | ||
262 | printk(KERN_WARNING | ||
263 | "CPU: CPU feature %s disabled " | ||
264 | "due to lack of CPUID level 0x%x\n", | ||
265 | x86_cap_flags[df->feature], | ||
266 | df->level); | ||
267 | } | ||
268 | } | ||
269 | } | ||
270 | |||
271 | /* | ||
195 | * Naming convention should be: <Name> [(<Codename>)] | 272 | * Naming convention should be: <Name> [(<Codename>)] |
196 | * This table only is used unless init_<vendor>() below doesn't set it; | 273 | * This table only is used unless init_<vendor>() below doesn't set it; |
197 | * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used | 274 | * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used |
@@ -221,18 +298,29 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) | |||
221 | 298 | ||
222 | __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; | 299 | __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; |
223 | 300 | ||
301 | void load_percpu_segment(int cpu) | ||
302 | { | ||
303 | #ifdef CONFIG_X86_32 | ||
304 | loadsegment(fs, __KERNEL_PERCPU); | ||
305 | #else | ||
306 | loadsegment(gs, 0); | ||
307 | wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); | ||
308 | #endif | ||
309 | load_stack_canary_segment(); | ||
310 | } | ||
311 | |||
224 | /* Current gdt points %fs at the "master" per-cpu area: after this, | 312 | /* Current gdt points %fs at the "master" per-cpu area: after this, |
225 | * it's on the real one. */ | 313 | * it's on the real one. */ |
226 | void switch_to_new_gdt(void) | 314 | void switch_to_new_gdt(int cpu) |
227 | { | 315 | { |
228 | struct desc_ptr gdt_descr; | 316 | struct desc_ptr gdt_descr; |
229 | 317 | ||
230 | gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); | 318 | gdt_descr.address = (long)get_cpu_gdt_table(cpu); |
231 | gdt_descr.size = GDT_SIZE - 1; | 319 | gdt_descr.size = GDT_SIZE - 1; |
232 | load_gdt(&gdt_descr); | 320 | load_gdt(&gdt_descr); |
233 | #ifdef CONFIG_X86_32 | 321 | /* Reload the per-cpu base */ |
234 | asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); | 322 | |
235 | #endif | 323 | load_percpu_segment(cpu); |
236 | } | 324 | } |
237 | 325 | ||
238 | static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; | 326 | static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; |
@@ -354,7 +442,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
354 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); | 442 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); |
355 | } else if (smp_num_siblings > 1) { | 443 | } else if (smp_num_siblings > 1) { |
356 | 444 | ||
357 | if (smp_num_siblings > NR_CPUS) { | 445 | if (smp_num_siblings > nr_cpu_ids) { |
358 | printk(KERN_WARNING "CPU: Unsupported number of siblings %d", | 446 | printk(KERN_WARNING "CPU: Unsupported number of siblings %d", |
359 | smp_num_siblings); | 447 | smp_num_siblings); |
360 | smp_num_siblings = 1; | 448 | smp_num_siblings = 1; |
@@ -362,11 +450,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
362 | } | 450 | } |
363 | 451 | ||
364 | index_msb = get_count_order(smp_num_siblings); | 452 | index_msb = get_count_order(smp_num_siblings); |
365 | #ifdef CONFIG_X86_64 | 453 | c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); |
366 | c->phys_proc_id = phys_pkg_id(index_msb); | ||
367 | #else | ||
368 | c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); | ||
369 | #endif | ||
370 | 454 | ||
371 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; | 455 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; |
372 | 456 | ||
@@ -374,13 +458,8 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
374 | 458 | ||
375 | core_bits = get_count_order(c->x86_max_cores); | 459 | core_bits = get_count_order(c->x86_max_cores); |
376 | 460 | ||
377 | #ifdef CONFIG_X86_64 | 461 | c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) & |
378 | c->cpu_core_id = phys_pkg_id(index_msb) & | ||
379 | ((1 << core_bits) - 1); | 462 | ((1 << core_bits) - 1); |
380 | #else | ||
381 | c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & | ||
382 | ((1 << core_bits) - 1); | ||
383 | #endif | ||
384 | } | 463 | } |
385 | 464 | ||
386 | out: | 465 | out: |
@@ -549,11 +628,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) | |||
549 | if (this_cpu->c_early_init) | 628 | if (this_cpu->c_early_init) |
550 | this_cpu->c_early_init(c); | 629 | this_cpu->c_early_init(c); |
551 | 630 | ||
552 | validate_pat_support(c); | ||
553 | |||
554 | #ifdef CONFIG_SMP | 631 | #ifdef CONFIG_SMP |
555 | c->cpu_index = boot_cpu_id; | 632 | c->cpu_index = boot_cpu_id; |
556 | #endif | 633 | #endif |
634 | filter_cpuid_features(c, false); | ||
557 | } | 635 | } |
558 | 636 | ||
559 | void __init early_cpu_init(void) | 637 | void __init early_cpu_init(void) |
@@ -616,7 +694,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | |||
616 | c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; | 694 | c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; |
617 | #ifdef CONFIG_X86_32 | 695 | #ifdef CONFIG_X86_32 |
618 | # ifdef CONFIG_X86_HT | 696 | # ifdef CONFIG_X86_HT |
619 | c->apicid = phys_pkg_id(c->initial_apicid, 0); | 697 | c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); |
620 | # else | 698 | # else |
621 | c->apicid = c->initial_apicid; | 699 | c->apicid = c->initial_apicid; |
622 | # endif | 700 | # endif |
@@ -663,7 +741,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
663 | this_cpu->c_identify(c); | 741 | this_cpu->c_identify(c); |
664 | 742 | ||
665 | #ifdef CONFIG_X86_64 | 743 | #ifdef CONFIG_X86_64 |
666 | c->apicid = phys_pkg_id(0); | 744 | c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); |
667 | #endif | 745 | #endif |
668 | 746 | ||
669 | /* | 747 | /* |
@@ -687,6 +765,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
687 | * we do "generic changes." | 765 | * we do "generic changes." |
688 | */ | 766 | */ |
689 | 767 | ||
768 | /* Filter out anything that depends on CPUID levels we don't have */ | ||
769 | filter_cpuid_features(c, true); | ||
770 | |||
690 | /* If the model name is still unset, do table lookup. */ | 771 | /* If the model name is still unset, do table lookup. */ |
691 | if (!c->x86_model_id[0]) { | 772 | if (!c->x86_model_id[0]) { |
692 | char *p; | 773 | char *p; |
@@ -703,6 +784,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
703 | detect_ht(c); | 784 | detect_ht(c); |
704 | #endif | 785 | #endif |
705 | 786 | ||
787 | init_hypervisor(c); | ||
706 | /* | 788 | /* |
707 | * On SMP, boot_cpu_data holds the common feature set between | 789 | * On SMP, boot_cpu_data holds the common feature set between |
708 | * all CPUs; so make sure that we indicate which features are | 790 | * all CPUs; so make sure that we indicate which features are |
@@ -854,57 +936,23 @@ static __init int setup_disablecpuid(char *arg) | |||
854 | } | 936 | } |
855 | __setup("clearcpuid=", setup_disablecpuid); | 937 | __setup("clearcpuid=", setup_disablecpuid); |
856 | 938 | ||
857 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; | ||
858 | |||
859 | #ifdef CONFIG_X86_64 | 939 | #ifdef CONFIG_X86_64 |
860 | struct x8664_pda **_cpu_pda __read_mostly; | ||
861 | EXPORT_SYMBOL(_cpu_pda); | ||
862 | |||
863 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | 940 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; |
864 | 941 | ||
865 | char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; | 942 | DEFINE_PER_CPU_FIRST(union irq_stack_union, |
943 | irq_stack_union) __aligned(PAGE_SIZE); | ||
944 | DEFINE_PER_CPU(char *, irq_stack_ptr) = | ||
945 | init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; | ||
866 | 946 | ||
867 | void __cpuinit pda_init(int cpu) | 947 | DEFINE_PER_CPU(unsigned long, kernel_stack) = |
868 | { | 948 | (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; |
869 | struct x8664_pda *pda = cpu_pda(cpu); | 949 | EXPORT_PER_CPU_SYMBOL(kernel_stack); |
870 | 950 | ||
871 | /* Setup up data that may be needed in __get_free_pages early */ | 951 | DEFINE_PER_CPU(unsigned int, irq_count) = -1; |
872 | loadsegment(fs, 0); | ||
873 | loadsegment(gs, 0); | ||
874 | /* Memory clobbers used to order PDA accessed */ | ||
875 | mb(); | ||
876 | wrmsrl(MSR_GS_BASE, pda); | ||
877 | mb(); | ||
878 | |||
879 | pda->cpunumber = cpu; | ||
880 | pda->irqcount = -1; | ||
881 | pda->kernelstack = (unsigned long)stack_thread_info() - | ||
882 | PDA_STACKOFFSET + THREAD_SIZE; | ||
883 | pda->active_mm = &init_mm; | ||
884 | pda->mmu_state = 0; | ||
885 | |||
886 | if (cpu == 0) { | ||
887 | /* others are initialized in smpboot.c */ | ||
888 | pda->pcurrent = &init_task; | ||
889 | pda->irqstackptr = boot_cpu_stack; | ||
890 | pda->irqstackptr += IRQSTACKSIZE - 64; | ||
891 | } else { | ||
892 | if (!pda->irqstackptr) { | ||
893 | pda->irqstackptr = (char *) | ||
894 | __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); | ||
895 | if (!pda->irqstackptr) | ||
896 | panic("cannot allocate irqstack for cpu %d", | ||
897 | cpu); | ||
898 | pda->irqstackptr += IRQSTACKSIZE - 64; | ||
899 | } | ||
900 | 952 | ||
901 | if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) | 953 | static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks |
902 | pda->nodenumber = cpu_to_node(cpu); | 954 | [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) |
903 | } | 955 | __aligned(PAGE_SIZE); |
904 | } | ||
905 | |||
906 | char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + | ||
907 | DEBUG_STKSZ] __page_aligned_bss; | ||
908 | 956 | ||
909 | extern asmlinkage void ignore_sysret(void); | 957 | extern asmlinkage void ignore_sysret(void); |
910 | 958 | ||
@@ -937,16 +985,21 @@ unsigned long kernel_eflags; | |||
937 | */ | 985 | */ |
938 | DEFINE_PER_CPU(struct orig_ist, orig_ist); | 986 | DEFINE_PER_CPU(struct orig_ist, orig_ist); |
939 | 987 | ||
940 | #else | 988 | #else /* x86_64 */ |
989 | |||
990 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
991 | DEFINE_PER_CPU(unsigned long, stack_canary); | ||
992 | #endif | ||
941 | 993 | ||
942 | /* Make sure %fs is initialized properly in idle threads */ | 994 | /* Make sure %fs and %gs are initialized properly in idle threads */ |
943 | struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) | 995 | struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) |
944 | { | 996 | { |
945 | memset(regs, 0, sizeof(struct pt_regs)); | 997 | memset(regs, 0, sizeof(struct pt_regs)); |
946 | regs->fs = __KERNEL_PERCPU; | 998 | regs->fs = __KERNEL_PERCPU; |
999 | regs->gs = __KERNEL_STACK_CANARY; | ||
947 | return regs; | 1000 | return regs; |
948 | } | 1001 | } |
949 | #endif | 1002 | #endif /* x86_64 */ |
950 | 1003 | ||
951 | /* | 1004 | /* |
952 | * cpu_init() initializes state that is per-CPU. Some data is already | 1005 | * cpu_init() initializes state that is per-CPU. Some data is already |
@@ -962,19 +1015,18 @@ void __cpuinit cpu_init(void) | |||
962 | struct tss_struct *t = &per_cpu(init_tss, cpu); | 1015 | struct tss_struct *t = &per_cpu(init_tss, cpu); |
963 | struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); | 1016 | struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); |
964 | unsigned long v; | 1017 | unsigned long v; |
965 | char *estacks = NULL; | ||
966 | struct task_struct *me; | 1018 | struct task_struct *me; |
967 | int i; | 1019 | int i; |
968 | 1020 | ||
969 | /* CPU 0 is initialised in head64.c */ | 1021 | #ifdef CONFIG_NUMA |
970 | if (cpu != 0) | 1022 | if (cpu != 0 && percpu_read(node_number) == 0 && |
971 | pda_init(cpu); | 1023 | cpu_to_node(cpu) != NUMA_NO_NODE) |
972 | else | 1024 | percpu_write(node_number, cpu_to_node(cpu)); |
973 | estacks = boot_exception_stacks; | 1025 | #endif |
974 | 1026 | ||
975 | me = current; | 1027 | me = current; |
976 | 1028 | ||
977 | if (cpu_test_and_set(cpu, cpu_initialized)) | 1029 | if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) |
978 | panic("CPU#%d already initialized!\n", cpu); | 1030 | panic("CPU#%d already initialized!\n", cpu); |
979 | 1031 | ||
980 | printk(KERN_INFO "Initializing CPU#%d\n", cpu); | 1032 | printk(KERN_INFO "Initializing CPU#%d\n", cpu); |
@@ -986,7 +1038,9 @@ void __cpuinit cpu_init(void) | |||
986 | * and set up the GDT descriptor: | 1038 | * and set up the GDT descriptor: |
987 | */ | 1039 | */ |
988 | 1040 | ||
989 | switch_to_new_gdt(); | 1041 | switch_to_new_gdt(cpu); |
1042 | loadsegment(fs, 0); | ||
1043 | |||
990 | load_idt((const struct desc_ptr *)&idt_descr); | 1044 | load_idt((const struct desc_ptr *)&idt_descr); |
991 | 1045 | ||
992 | memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); | 1046 | memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); |
@@ -1004,18 +1058,13 @@ void __cpuinit cpu_init(void) | |||
1004 | * set up and load the per-CPU TSS | 1058 | * set up and load the per-CPU TSS |
1005 | */ | 1059 | */ |
1006 | if (!orig_ist->ist[0]) { | 1060 | if (!orig_ist->ist[0]) { |
1007 | static const unsigned int order[N_EXCEPTION_STACKS] = { | 1061 | static const unsigned int sizes[N_EXCEPTION_STACKS] = { |
1008 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, | 1062 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, |
1009 | [DEBUG_STACK - 1] = DEBUG_STACK_ORDER | 1063 | [DEBUG_STACK - 1] = DEBUG_STKSZ |
1010 | }; | 1064 | }; |
1065 | char *estacks = per_cpu(exception_stacks, cpu); | ||
1011 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { | 1066 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { |
1012 | if (cpu) { | 1067 | estacks += sizes[v]; |
1013 | estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); | ||
1014 | if (!estacks) | ||
1015 | panic("Cannot allocate exception " | ||
1016 | "stack %ld %d\n", v, cpu); | ||
1017 | } | ||
1018 | estacks += PAGE_SIZE << order[v]; | ||
1019 | orig_ist->ist[v] = t->x86_tss.ist[v] = | 1068 | orig_ist->ist[v] = t->x86_tss.ist[v] = |
1020 | (unsigned long)estacks; | 1069 | (unsigned long)estacks; |
1021 | } | 1070 | } |
@@ -1049,22 +1098,19 @@ void __cpuinit cpu_init(void) | |||
1049 | */ | 1098 | */ |
1050 | if (kgdb_connected && arch_kgdb_ops.correct_hw_break) | 1099 | if (kgdb_connected && arch_kgdb_ops.correct_hw_break) |
1051 | arch_kgdb_ops.correct_hw_break(); | 1100 | arch_kgdb_ops.correct_hw_break(); |
1052 | else { | 1101 | else |
1053 | #endif | 1102 | #endif |
1054 | /* | 1103 | { |
1055 | * Clear all 6 debug registers: | 1104 | /* |
1056 | */ | 1105 | * Clear all 6 debug registers: |
1057 | 1106 | */ | |
1058 | set_debugreg(0UL, 0); | 1107 | set_debugreg(0UL, 0); |
1059 | set_debugreg(0UL, 1); | 1108 | set_debugreg(0UL, 1); |
1060 | set_debugreg(0UL, 2); | 1109 | set_debugreg(0UL, 2); |
1061 | set_debugreg(0UL, 3); | 1110 | set_debugreg(0UL, 3); |
1062 | set_debugreg(0UL, 6); | 1111 | set_debugreg(0UL, 6); |
1063 | set_debugreg(0UL, 7); | 1112 | set_debugreg(0UL, 7); |
1064 | #ifdef CONFIG_KGDB | ||
1065 | /* If the kgdb is connected no debug regs should be altered. */ | ||
1066 | } | 1113 | } |
1067 | #endif | ||
1068 | 1114 | ||
1069 | fpu_init(); | 1115 | fpu_init(); |
1070 | 1116 | ||
@@ -1083,7 +1129,7 @@ void __cpuinit cpu_init(void) | |||
1083 | struct tss_struct *t = &per_cpu(init_tss, cpu); | 1129 | struct tss_struct *t = &per_cpu(init_tss, cpu); |
1084 | struct thread_struct *thread = &curr->thread; | 1130 | struct thread_struct *thread = &curr->thread; |
1085 | 1131 | ||
1086 | if (cpu_test_and_set(cpu, cpu_initialized)) { | 1132 | if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { |
1087 | printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); | 1133 | printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); |
1088 | for (;;) local_irq_enable(); | 1134 | for (;;) local_irq_enable(); |
1089 | } | 1135 | } |
@@ -1094,7 +1140,7 @@ void __cpuinit cpu_init(void) | |||
1094 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | 1140 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); |
1095 | 1141 | ||
1096 | load_idt(&idt_descr); | 1142 | load_idt(&idt_descr); |
1097 | switch_to_new_gdt(); | 1143 | switch_to_new_gdt(cpu); |
1098 | 1144 | ||
1099 | /* | 1145 | /* |
1100 | * Set up and load the per-CPU TSS and LDT | 1146 | * Set up and load the per-CPU TSS and LDT |
@@ -1115,9 +1161,6 @@ void __cpuinit cpu_init(void) | |||
1115 | __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); | 1161 | __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); |
1116 | #endif | 1162 | #endif |
1117 | 1163 | ||
1118 | /* Clear %gs. */ | ||
1119 | asm volatile ("mov %0, %%gs" : : "r" (0)); | ||
1120 | |||
1121 | /* Clear all 6 debug registers: */ | 1164 | /* Clear all 6 debug registers: */ |
1122 | set_debugreg(0, 0); | 1165 | set_debugreg(0, 0); |
1123 | set_debugreg(0, 1); | 1166 | set_debugreg(0, 1); |
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index efae3b22a0ff..65792c2cc462 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig | |||
@@ -245,17 +245,6 @@ config X86_E_POWERSAVER | |||
245 | 245 | ||
246 | comment "shared options" | 246 | comment "shared options" |
247 | 247 | ||
248 | config X86_ACPI_CPUFREQ_PROC_INTF | ||
249 | bool "/proc/acpi/processor/../performance interface (deprecated)" | ||
250 | depends on PROC_FS | ||
251 | depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI | ||
252 | help | ||
253 | This enables the deprecated /proc/acpi/processor/../performance | ||
254 | interface. While it is helpful for debugging, the generic, | ||
255 | cross-architecture cpufreq interfaces should be used. | ||
256 | |||
257 | If in doubt, say N. | ||
258 | |||
259 | config X86_SPEEDSTEP_LIB | 248 | config X86_SPEEDSTEP_LIB |
260 | tristate | 249 | tristate |
261 | default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD) | 250 | default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD) |
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 8e48c5d4467d..4b1c319d30c3 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/cpufreq.h> | 33 | #include <linux/cpufreq.h> |
34 | #include <linux/compiler.h> | 34 | #include <linux/compiler.h> |
35 | #include <linux/dmi.h> | 35 | #include <linux/dmi.h> |
36 | #include <linux/ftrace.h> | ||
36 | 37 | ||
37 | #include <linux/acpi.h> | 38 | #include <linux/acpi.h> |
38 | #include <acpi/processor.h> | 39 | #include <acpi/processor.h> |
@@ -144,13 +145,14 @@ typedef union { | |||
144 | 145 | ||
145 | struct drv_cmd { | 146 | struct drv_cmd { |
146 | unsigned int type; | 147 | unsigned int type; |
147 | cpumask_t mask; | 148 | const struct cpumask *mask; |
148 | drv_addr_union addr; | 149 | drv_addr_union addr; |
149 | u32 val; | 150 | u32 val; |
150 | }; | 151 | }; |
151 | 152 | ||
152 | static void do_drv_read(struct drv_cmd *cmd) | 153 | static long do_drv_read(void *_cmd) |
153 | { | 154 | { |
155 | struct drv_cmd *cmd = _cmd; | ||
154 | u32 h; | 156 | u32 h; |
155 | 157 | ||
156 | switch (cmd->type) { | 158 | switch (cmd->type) { |
@@ -165,10 +167,12 @@ static void do_drv_read(struct drv_cmd *cmd) | |||
165 | default: | 167 | default: |
166 | break; | 168 | break; |
167 | } | 169 | } |
170 | return 0; | ||
168 | } | 171 | } |
169 | 172 | ||
170 | static void do_drv_write(struct drv_cmd *cmd) | 173 | static long do_drv_write(void *_cmd) |
171 | { | 174 | { |
175 | struct drv_cmd *cmd = _cmd; | ||
172 | u32 lo, hi; | 176 | u32 lo, hi; |
173 | 177 | ||
174 | switch (cmd->type) { | 178 | switch (cmd->type) { |
@@ -185,48 +189,41 @@ static void do_drv_write(struct drv_cmd *cmd) | |||
185 | default: | 189 | default: |
186 | break; | 190 | break; |
187 | } | 191 | } |
192 | return 0; | ||
188 | } | 193 | } |
189 | 194 | ||
190 | static void drv_read(struct drv_cmd *cmd) | 195 | static void drv_read(struct drv_cmd *cmd) |
191 | { | 196 | { |
192 | cpumask_t saved_mask = current->cpus_allowed; | ||
193 | cmd->val = 0; | 197 | cmd->val = 0; |
194 | 198 | ||
195 | set_cpus_allowed_ptr(current, &cmd->mask); | 199 | work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd); |
196 | do_drv_read(cmd); | ||
197 | set_cpus_allowed_ptr(current, &saved_mask); | ||
198 | } | 200 | } |
199 | 201 | ||
200 | static void drv_write(struct drv_cmd *cmd) | 202 | static void drv_write(struct drv_cmd *cmd) |
201 | { | 203 | { |
202 | cpumask_t saved_mask = current->cpus_allowed; | ||
203 | unsigned int i; | 204 | unsigned int i; |
204 | 205 | ||
205 | for_each_cpu_mask_nr(i, cmd->mask) { | 206 | for_each_cpu(i, cmd->mask) { |
206 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); | 207 | work_on_cpu(i, do_drv_write, cmd); |
207 | do_drv_write(cmd); | ||
208 | } | 208 | } |
209 | |||
210 | set_cpus_allowed_ptr(current, &saved_mask); | ||
211 | return; | ||
212 | } | 209 | } |
213 | 210 | ||
214 | static u32 get_cur_val(const cpumask_t *mask) | 211 | static u32 get_cur_val(const struct cpumask *mask) |
215 | { | 212 | { |
216 | struct acpi_processor_performance *perf; | 213 | struct acpi_processor_performance *perf; |
217 | struct drv_cmd cmd; | 214 | struct drv_cmd cmd; |
218 | 215 | ||
219 | if (unlikely(cpus_empty(*mask))) | 216 | if (unlikely(cpumask_empty(mask))) |
220 | return 0; | 217 | return 0; |
221 | 218 | ||
222 | switch (per_cpu(drv_data, first_cpu(*mask))->cpu_feature) { | 219 | switch (per_cpu(drv_data, cpumask_first(mask))->cpu_feature) { |
223 | case SYSTEM_INTEL_MSR_CAPABLE: | 220 | case SYSTEM_INTEL_MSR_CAPABLE: |
224 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; | 221 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; |
225 | cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; | 222 | cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; |
226 | break; | 223 | break; |
227 | case SYSTEM_IO_CAPABLE: | 224 | case SYSTEM_IO_CAPABLE: |
228 | cmd.type = SYSTEM_IO_CAPABLE; | 225 | cmd.type = SYSTEM_IO_CAPABLE; |
229 | perf = per_cpu(drv_data, first_cpu(*mask))->acpi_data; | 226 | perf = per_cpu(drv_data, cpumask_first(mask))->acpi_data; |
230 | cmd.addr.io.port = perf->control_register.address; | 227 | cmd.addr.io.port = perf->control_register.address; |
231 | cmd.addr.io.bit_width = perf->control_register.bit_width; | 228 | cmd.addr.io.bit_width = perf->control_register.bit_width; |
232 | break; | 229 | break; |
@@ -234,8 +231,7 @@ static u32 get_cur_val(const cpumask_t *mask) | |||
234 | return 0; | 231 | return 0; |
235 | } | 232 | } |
236 | 233 | ||
237 | cmd.mask = *mask; | 234 | cmd.mask = mask; |
238 | |||
239 | drv_read(&cmd); | 235 | drv_read(&cmd); |
240 | 236 | ||
241 | dprintk("get_cur_val = %u\n", cmd.val); | 237 | dprintk("get_cur_val = %u\n", cmd.val); |
@@ -243,6 +239,30 @@ static u32 get_cur_val(const cpumask_t *mask) | |||
243 | return cmd.val; | 239 | return cmd.val; |
244 | } | 240 | } |
245 | 241 | ||
242 | struct perf_cur { | ||
243 | union { | ||
244 | struct { | ||
245 | u32 lo; | ||
246 | u32 hi; | ||
247 | } split; | ||
248 | u64 whole; | ||
249 | } aperf_cur, mperf_cur; | ||
250 | }; | ||
251 | |||
252 | |||
253 | static long read_measured_perf_ctrs(void *_cur) | ||
254 | { | ||
255 | struct perf_cur *cur = _cur; | ||
256 | |||
257 | rdmsr(MSR_IA32_APERF, cur->aperf_cur.split.lo, cur->aperf_cur.split.hi); | ||
258 | rdmsr(MSR_IA32_MPERF, cur->mperf_cur.split.lo, cur->mperf_cur.split.hi); | ||
259 | |||
260 | wrmsr(MSR_IA32_APERF, 0, 0); | ||
261 | wrmsr(MSR_IA32_MPERF, 0, 0); | ||
262 | |||
263 | return 0; | ||
264 | } | ||
265 | |||
246 | /* | 266 | /* |
247 | * Return the measured active (C0) frequency on this CPU since last call | 267 | * Return the measured active (C0) frequency on this CPU since last call |
248 | * to this function. | 268 | * to this function. |
@@ -259,31 +279,12 @@ static u32 get_cur_val(const cpumask_t *mask) | |||
259 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, | 279 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, |
260 | unsigned int cpu) | 280 | unsigned int cpu) |
261 | { | 281 | { |
262 | union { | 282 | struct perf_cur cur; |
263 | struct { | ||
264 | u32 lo; | ||
265 | u32 hi; | ||
266 | } split; | ||
267 | u64 whole; | ||
268 | } aperf_cur, mperf_cur; | ||
269 | |||
270 | cpumask_t saved_mask; | ||
271 | unsigned int perf_percent; | 283 | unsigned int perf_percent; |
272 | unsigned int retval; | 284 | unsigned int retval; |
273 | 285 | ||
274 | saved_mask = current->cpus_allowed; | 286 | if (!work_on_cpu(cpu, read_measured_perf_ctrs, &cur)) |
275 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | ||
276 | if (get_cpu() != cpu) { | ||
277 | /* We were not able to run on requested processor */ | ||
278 | put_cpu(); | ||
279 | return 0; | 287 | return 0; |
280 | } | ||
281 | |||
282 | rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi); | ||
283 | rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi); | ||
284 | |||
285 | wrmsr(MSR_IA32_APERF, 0,0); | ||
286 | wrmsr(MSR_IA32_MPERF, 0,0); | ||
287 | 288 | ||
288 | #ifdef __i386__ | 289 | #ifdef __i386__ |
289 | /* | 290 | /* |
@@ -291,37 +292,39 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy, | |||
291 | * Get an approximate value. Return failure in case we cannot get | 292 | * Get an approximate value. Return failure in case we cannot get |
292 | * an approximate value. | 293 | * an approximate value. |
293 | */ | 294 | */ |
294 | if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) { | 295 | if (unlikely(cur.aperf_cur.split.hi || cur.mperf_cur.split.hi)) { |
295 | int shift_count; | 296 | int shift_count; |
296 | u32 h; | 297 | u32 h; |
297 | 298 | ||
298 | h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi); | 299 | h = max_t(u32, cur.aperf_cur.split.hi, cur.mperf_cur.split.hi); |
299 | shift_count = fls(h); | 300 | shift_count = fls(h); |
300 | 301 | ||
301 | aperf_cur.whole >>= shift_count; | 302 | cur.aperf_cur.whole >>= shift_count; |
302 | mperf_cur.whole >>= shift_count; | 303 | cur.mperf_cur.whole >>= shift_count; |
303 | } | 304 | } |
304 | 305 | ||
305 | if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) { | 306 | if (((unsigned long)(-1) / 100) < cur.aperf_cur.split.lo) { |
306 | int shift_count = 7; | 307 | int shift_count = 7; |
307 | aperf_cur.split.lo >>= shift_count; | 308 | cur.aperf_cur.split.lo >>= shift_count; |
308 | mperf_cur.split.lo >>= shift_count; | 309 | cur.mperf_cur.split.lo >>= shift_count; |
309 | } | 310 | } |
310 | 311 | ||
311 | if (aperf_cur.split.lo && mperf_cur.split.lo) | 312 | if (cur.aperf_cur.split.lo && cur.mperf_cur.split.lo) |
312 | perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo; | 313 | perf_percent = (cur.aperf_cur.split.lo * 100) / |
314 | cur.mperf_cur.split.lo; | ||
313 | else | 315 | else |
314 | perf_percent = 0; | 316 | perf_percent = 0; |
315 | 317 | ||
316 | #else | 318 | #else |
317 | if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) { | 319 | if (unlikely(((unsigned long)(-1) / 100) < cur.aperf_cur.whole)) { |
318 | int shift_count = 7; | 320 | int shift_count = 7; |
319 | aperf_cur.whole >>= shift_count; | 321 | cur.aperf_cur.whole >>= shift_count; |
320 | mperf_cur.whole >>= shift_count; | 322 | cur.mperf_cur.whole >>= shift_count; |
321 | } | 323 | } |
322 | 324 | ||
323 | if (aperf_cur.whole && mperf_cur.whole) | 325 | if (cur.aperf_cur.whole && cur.mperf_cur.whole) |
324 | perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole; | 326 | perf_percent = (cur.aperf_cur.whole * 100) / |
327 | cur.mperf_cur.whole; | ||
325 | else | 328 | else |
326 | perf_percent = 0; | 329 | perf_percent = 0; |
327 | 330 | ||
@@ -329,10 +332,6 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy, | |||
329 | 332 | ||
330 | retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100; | 333 | retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100; |
331 | 334 | ||
332 | put_cpu(); | ||
333 | set_cpus_allowed_ptr(current, &saved_mask); | ||
334 | |||
335 | dprintk("cpu %d: performance percent %d\n", cpu, perf_percent); | ||
336 | return retval; | 335 | return retval; |
337 | } | 336 | } |
338 | 337 | ||
@@ -350,7 +349,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) | |||
350 | } | 349 | } |
351 | 350 | ||
352 | cached_freq = data->freq_table[data->acpi_data->state].frequency; | 351 | cached_freq = data->freq_table[data->acpi_data->state].frequency; |
353 | freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data); | 352 | freq = extract_freq(get_cur_val(cpumask_of(cpu)), data); |
354 | if (freq != cached_freq) { | 353 | if (freq != cached_freq) { |
355 | /* | 354 | /* |
356 | * The dreaded BIOS frequency change behind our back. | 355 | * The dreaded BIOS frequency change behind our back. |
@@ -364,7 +363,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) | |||
364 | return freq; | 363 | return freq; |
365 | } | 364 | } |
366 | 365 | ||
367 | static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq, | 366 | static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, |
368 | struct acpi_cpufreq_data *data) | 367 | struct acpi_cpufreq_data *data) |
369 | { | 368 | { |
370 | unsigned int cur_freq; | 369 | unsigned int cur_freq; |
@@ -385,12 +384,12 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
385 | struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); | 384 | struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); |
386 | struct acpi_processor_performance *perf; | 385 | struct acpi_processor_performance *perf; |
387 | struct cpufreq_freqs freqs; | 386 | struct cpufreq_freqs freqs; |
388 | cpumask_t online_policy_cpus; | ||
389 | struct drv_cmd cmd; | 387 | struct drv_cmd cmd; |
390 | unsigned int next_state = 0; /* Index into freq_table */ | 388 | unsigned int next_state = 0; /* Index into freq_table */ |
391 | unsigned int next_perf_state = 0; /* Index into perf table */ | 389 | unsigned int next_perf_state = 0; /* Index into perf table */ |
392 | unsigned int i; | 390 | unsigned int i; |
393 | int result = 0; | 391 | int result = 0; |
392 | struct power_trace it; | ||
394 | 393 | ||
395 | dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); | 394 | dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); |
396 | 395 | ||
@@ -404,15 +403,10 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
404 | data->freq_table, | 403 | data->freq_table, |
405 | target_freq, | 404 | target_freq, |
406 | relation, &next_state); | 405 | relation, &next_state); |
407 | if (unlikely(result)) | 406 | if (unlikely(result)) { |
408 | return -ENODEV; | 407 | result = -ENODEV; |
409 | 408 | goto out; | |
410 | #ifdef CONFIG_HOTPLUG_CPU | 409 | } |
411 | /* cpufreq holds the hotplug lock, so we are safe from here on */ | ||
412 | cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); | ||
413 | #else | ||
414 | online_policy_cpus = policy->cpus; | ||
415 | #endif | ||
416 | 410 | ||
417 | next_perf_state = data->freq_table[next_state].index; | 411 | next_perf_state = data->freq_table[next_state].index; |
418 | if (perf->state == next_perf_state) { | 412 | if (perf->state == next_perf_state) { |
@@ -423,10 +417,12 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
423 | } else { | 417 | } else { |
424 | dprintk("Already at target state (P%d)\n", | 418 | dprintk("Already at target state (P%d)\n", |
425 | next_perf_state); | 419 | next_perf_state); |
426 | return 0; | 420 | goto out; |
427 | } | 421 | } |
428 | } | 422 | } |
429 | 423 | ||
424 | trace_power_mark(&it, POWER_PSTATE, next_perf_state); | ||
425 | |||
430 | switch (data->cpu_feature) { | 426 | switch (data->cpu_feature) { |
431 | case SYSTEM_INTEL_MSR_CAPABLE: | 427 | case SYSTEM_INTEL_MSR_CAPABLE: |
432 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; | 428 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; |
@@ -440,19 +436,19 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
440 | cmd.val = (u32) perf->states[next_perf_state].control; | 436 | cmd.val = (u32) perf->states[next_perf_state].control; |
441 | break; | 437 | break; |
442 | default: | 438 | default: |
443 | return -ENODEV; | 439 | result = -ENODEV; |
440 | goto out; | ||
444 | } | 441 | } |
445 | 442 | ||
446 | cpus_clear(cmd.mask); | 443 | /* cpufreq holds the hotplug lock, so we are safe from here on */ |
447 | |||
448 | if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) | 444 | if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) |
449 | cmd.mask = online_policy_cpus; | 445 | cmd.mask = policy->cpus; |
450 | else | 446 | else |
451 | cpu_set(policy->cpu, cmd.mask); | 447 | cmd.mask = cpumask_of(policy->cpu); |
452 | 448 | ||
453 | freqs.old = perf->states[perf->state].core_frequency * 1000; | 449 | freqs.old = perf->states[perf->state].core_frequency * 1000; |
454 | freqs.new = data->freq_table[next_state].frequency; | 450 | freqs.new = data->freq_table[next_state].frequency; |
455 | for_each_cpu_mask_nr(i, cmd.mask) { | 451 | for_each_cpu(i, cmd.mask) { |
456 | freqs.cpu = i; | 452 | freqs.cpu = i; |
457 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 453 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
458 | } | 454 | } |
@@ -460,19 +456,21 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
460 | drv_write(&cmd); | 456 | drv_write(&cmd); |
461 | 457 | ||
462 | if (acpi_pstate_strict) { | 458 | if (acpi_pstate_strict) { |
463 | if (!check_freqs(&cmd.mask, freqs.new, data)) { | 459 | if (!check_freqs(cmd.mask, freqs.new, data)) { |
464 | dprintk("acpi_cpufreq_target failed (%d)\n", | 460 | dprintk("acpi_cpufreq_target failed (%d)\n", |
465 | policy->cpu); | 461 | policy->cpu); |
466 | return -EAGAIN; | 462 | result = -EAGAIN; |
463 | goto out; | ||
467 | } | 464 | } |
468 | } | 465 | } |
469 | 466 | ||
470 | for_each_cpu_mask_nr(i, cmd.mask) { | 467 | for_each_cpu(i, cmd.mask) { |
471 | freqs.cpu = i; | 468 | freqs.cpu = i; |
472 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 469 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
473 | } | 470 | } |
474 | perf->state = next_perf_state; | 471 | perf->state = next_perf_state; |
475 | 472 | ||
473 | out: | ||
476 | return result; | 474 | return result; |
477 | } | 475 | } |
478 | 476 | ||
@@ -513,6 +511,17 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) | |||
513 | } | 511 | } |
514 | } | 512 | } |
515 | 513 | ||
514 | static void free_acpi_perf_data(void) | ||
515 | { | ||
516 | unsigned int i; | ||
517 | |||
518 | /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */ | ||
519 | for_each_possible_cpu(i) | ||
520 | free_cpumask_var(per_cpu_ptr(acpi_perf_data, i) | ||
521 | ->shared_cpu_map); | ||
522 | free_percpu(acpi_perf_data); | ||
523 | } | ||
524 | |||
516 | /* | 525 | /* |
517 | * acpi_cpufreq_early_init - initialize ACPI P-States library | 526 | * acpi_cpufreq_early_init - initialize ACPI P-States library |
518 | * | 527 | * |
@@ -523,6 +532,7 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) | |||
523 | */ | 532 | */ |
524 | static int __init acpi_cpufreq_early_init(void) | 533 | static int __init acpi_cpufreq_early_init(void) |
525 | { | 534 | { |
535 | unsigned int i; | ||
526 | dprintk("acpi_cpufreq_early_init\n"); | 536 | dprintk("acpi_cpufreq_early_init\n"); |
527 | 537 | ||
528 | acpi_perf_data = alloc_percpu(struct acpi_processor_performance); | 538 | acpi_perf_data = alloc_percpu(struct acpi_processor_performance); |
@@ -530,6 +540,16 @@ static int __init acpi_cpufreq_early_init(void) | |||
530 | dprintk("Memory allocation error for acpi_perf_data.\n"); | 540 | dprintk("Memory allocation error for acpi_perf_data.\n"); |
531 | return -ENOMEM; | 541 | return -ENOMEM; |
532 | } | 542 | } |
543 | for_each_possible_cpu(i) { | ||
544 | if (!alloc_cpumask_var_node( | ||
545 | &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map, | ||
546 | GFP_KERNEL, cpu_to_node(i))) { | ||
547 | |||
548 | /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ | ||
549 | free_acpi_perf_data(); | ||
550 | return -ENOMEM; | ||
551 | } | ||
552 | } | ||
533 | 553 | ||
534 | /* Do initialization in ACPI core */ | 554 | /* Do initialization in ACPI core */ |
535 | acpi_processor_preregister_performance(acpi_perf_data); | 555 | acpi_processor_preregister_performance(acpi_perf_data); |
@@ -600,15 +620,15 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
600 | */ | 620 | */ |
601 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || | 621 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || |
602 | policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { | 622 | policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { |
603 | policy->cpus = perf->shared_cpu_map; | 623 | cpumask_copy(policy->cpus, perf->shared_cpu_map); |
604 | } | 624 | } |
605 | policy->related_cpus = perf->shared_cpu_map; | 625 | cpumask_copy(policy->related_cpus, perf->shared_cpu_map); |
606 | 626 | ||
607 | #ifdef CONFIG_SMP | 627 | #ifdef CONFIG_SMP |
608 | dmi_check_system(sw_any_bug_dmi_table); | 628 | dmi_check_system(sw_any_bug_dmi_table); |
609 | if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) { | 629 | if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) { |
610 | policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; | 630 | policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; |
611 | policy->cpus = per_cpu(cpu_core_map, cpu); | 631 | cpumask_copy(policy->cpus, cpu_core_mask(cpu)); |
612 | } | 632 | } |
613 | #endif | 633 | #endif |
614 | 634 | ||
@@ -791,7 +811,7 @@ static int __init acpi_cpufreq_init(void) | |||
791 | 811 | ||
792 | ret = cpufreq_register_driver(&acpi_cpufreq_driver); | 812 | ret = cpufreq_register_driver(&acpi_cpufreq_driver); |
793 | if (ret) | 813 | if (ret) |
794 | free_percpu(acpi_perf_data); | 814 | free_acpi_perf_data(); |
795 | 815 | ||
796 | return ret; | 816 | return ret; |
797 | } | 817 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index b0461856acfb..a4cff5d6e380 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c | |||
@@ -982,7 +982,7 @@ static int __init longhaul_init(void) | |||
982 | case 10: | 982 | case 10: |
983 | printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n"); | 983 | printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n"); |
984 | default: | 984 | default: |
985 | ;; | 985 | ; |
986 | } | 986 | } |
987 | 987 | ||
988 | return -ENODEV; | 988 | return -ENODEV; |
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index b8e05ee4f736..b585e04cbc9e 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | |||
@@ -122,7 +122,7 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, | |||
122 | return 0; | 122 | return 0; |
123 | 123 | ||
124 | /* notifiers */ | 124 | /* notifiers */ |
125 | for_each_cpu_mask_nr(i, policy->cpus) { | 125 | for_each_cpu(i, policy->cpus) { |
126 | freqs.cpu = i; | 126 | freqs.cpu = i; |
127 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 127 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
128 | } | 128 | } |
@@ -130,11 +130,11 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, | |||
130 | /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software | 130 | /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software |
131 | * Developer's Manual, Volume 3 | 131 | * Developer's Manual, Volume 3 |
132 | */ | 132 | */ |
133 | for_each_cpu_mask_nr(i, policy->cpus) | 133 | for_each_cpu(i, policy->cpus) |
134 | cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); | 134 | cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); |
135 | 135 | ||
136 | /* notifiers */ | 136 | /* notifiers */ |
137 | for_each_cpu_mask_nr(i, policy->cpus) { | 137 | for_each_cpu(i, policy->cpus) { |
138 | freqs.cpu = i; | 138 | freqs.cpu = i; |
139 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 139 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
140 | } | 140 | } |
@@ -160,6 +160,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) | |||
160 | switch (c->x86_model) { | 160 | switch (c->x86_model) { |
161 | case 0x0E: /* Core */ | 161 | case 0x0E: /* Core */ |
162 | case 0x0F: /* Core Duo */ | 162 | case 0x0F: /* Core Duo */ |
163 | case 0x16: /* Celeron Core */ | ||
163 | p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; | 164 | p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; |
164 | return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE); | 165 | return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE); |
165 | case 0x0D: /* Pentium M (Dothan) */ | 166 | case 0x0D: /* Pentium M (Dothan) */ |
@@ -171,7 +172,9 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) | |||
171 | } | 172 | } |
172 | 173 | ||
173 | if (c->x86 != 0xF) { | 174 | if (c->x86 != 0xF) { |
174 | printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@vger.kernel.org>\n"); | 175 | if (!cpu_has(c, X86_FEATURE_EST)) |
176 | printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. " | ||
177 | "Please send an e-mail to <cpufreq@vger.kernel.org>\n"); | ||
175 | return 0; | 178 | return 0; |
176 | } | 179 | } |
177 | 180 | ||
@@ -200,7 +203,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) | |||
200 | unsigned int i; | 203 | unsigned int i; |
201 | 204 | ||
202 | #ifdef CONFIG_SMP | 205 | #ifdef CONFIG_SMP |
203 | policy->cpus = per_cpu(cpu_sibling_map, policy->cpu); | 206 | cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu)); |
204 | #endif | 207 | #endif |
205 | 208 | ||
206 | /* Errata workaround */ | 209 | /* Errata workaround */ |
@@ -274,6 +277,7 @@ static struct cpufreq_driver p4clockmod_driver = { | |||
274 | .name = "p4-clockmod", | 277 | .name = "p4-clockmod", |
275 | .owner = THIS_MODULE, | 278 | .owner = THIS_MODULE, |
276 | .attr = p4clockmod_attr, | 279 | .attr = p4clockmod_attr, |
280 | .hide_interface = 1, | ||
277 | }; | 281 | }; |
278 | 282 | ||
279 | 283 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 7c7d56b43136..1b446d79a8fd 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c | |||
@@ -310,6 +310,12 @@ static int powernow_acpi_init(void) | |||
310 | goto err0; | 310 | goto err0; |
311 | } | 311 | } |
312 | 312 | ||
313 | if (!alloc_cpumask_var(&acpi_processor_perf->shared_cpu_map, | ||
314 | GFP_KERNEL)) { | ||
315 | retval = -ENOMEM; | ||
316 | goto err05; | ||
317 | } | ||
318 | |||
313 | if (acpi_processor_register_performance(acpi_processor_perf, 0)) { | 319 | if (acpi_processor_register_performance(acpi_processor_perf, 0)) { |
314 | retval = -EIO; | 320 | retval = -EIO; |
315 | goto err1; | 321 | goto err1; |
@@ -412,6 +418,8 @@ static int powernow_acpi_init(void) | |||
412 | err2: | 418 | err2: |
413 | acpi_processor_unregister_performance(acpi_processor_perf, 0); | 419 | acpi_processor_unregister_performance(acpi_processor_perf, 0); |
414 | err1: | 420 | err1: |
421 | free_cpumask_var(acpi_processor_perf->shared_cpu_map); | ||
422 | err05: | ||
415 | kfree(acpi_processor_perf); | 423 | kfree(acpi_processor_perf); |
416 | err0: | 424 | err0: |
417 | printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n"); | 425 | printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n"); |
@@ -652,6 +660,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) { | |||
652 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | 660 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI |
653 | if (acpi_processor_perf) { | 661 | if (acpi_processor_perf) { |
654 | acpi_processor_unregister_performance(acpi_processor_perf, 0); | 662 | acpi_processor_unregister_performance(acpi_processor_perf, 0); |
663 | free_cpumask_var(acpi_processor_perf->shared_cpu_map); | ||
655 | kfree(acpi_processor_perf); | 664 | kfree(acpi_processor_perf); |
656 | } | 665 | } |
657 | #endif | 666 | #endif |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index d3dcd58b87cd..fb039cd345d8 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -115,9 +115,20 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data) | |||
115 | u32 i = 0; | 115 | u32 i = 0; |
116 | 116 | ||
117 | if (cpu_family == CPU_HW_PSTATE) { | 117 | if (cpu_family == CPU_HW_PSTATE) { |
118 | rdmsr(MSR_PSTATE_STATUS, lo, hi); | 118 | if (data->currpstate == HW_PSTATE_INVALID) { |
119 | i = lo & HW_PSTATE_MASK; | 119 | /* read (initial) hw pstate if not yet set */ |
120 | data->currpstate = i; | 120 | rdmsr(MSR_PSTATE_STATUS, lo, hi); |
121 | i = lo & HW_PSTATE_MASK; | ||
122 | |||
123 | /* | ||
124 | * a workaround for family 11h erratum 311 might cause | ||
125 | * an "out-of-range Pstate if the core is in Pstate-0 | ||
126 | */ | ||
127 | if (i >= data->numps) | ||
128 | data->currpstate = HW_PSTATE_0; | ||
129 | else | ||
130 | data->currpstate = i; | ||
131 | } | ||
121 | return 0; | 132 | return 0; |
122 | } | 133 | } |
123 | do { | 134 | do { |
@@ -755,7 +766,7 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned | |||
755 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | 766 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) |
756 | { | 767 | { |
757 | struct cpufreq_frequency_table *powernow_table; | 768 | struct cpufreq_frequency_table *powernow_table; |
758 | int ret_val; | 769 | int ret_val = -ENODEV; |
759 | 770 | ||
760 | if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { | 771 | if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { |
761 | dprintk("register performance failed: bad ACPI data\n"); | 772 | dprintk("register performance failed: bad ACPI data\n"); |
@@ -804,6 +815,13 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | |||
804 | /* notify BIOS that we exist */ | 815 | /* notify BIOS that we exist */ |
805 | acpi_processor_notify_smm(THIS_MODULE); | 816 | acpi_processor_notify_smm(THIS_MODULE); |
806 | 817 | ||
818 | if (!alloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) { | ||
819 | printk(KERN_ERR PFX | ||
820 | "unable to alloc powernow_k8_data cpumask\n"); | ||
821 | ret_val = -ENOMEM; | ||
822 | goto err_out_mem; | ||
823 | } | ||
824 | |||
807 | return 0; | 825 | return 0; |
808 | 826 | ||
809 | err_out_mem: | 827 | err_out_mem: |
@@ -815,7 +833,7 @@ err_out: | |||
815 | /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ | 833 | /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ |
816 | data->acpi_data.state_count = 0; | 834 | data->acpi_data.state_count = 0; |
817 | 835 | ||
818 | return -ENODEV; | 836 | return ret_val; |
819 | } | 837 | } |
820 | 838 | ||
821 | static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) | 839 | static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) |
@@ -918,12 +936,28 @@ static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) | |||
918 | { | 936 | { |
919 | if (data->acpi_data.state_count) | 937 | if (data->acpi_data.state_count) |
920 | acpi_processor_unregister_performance(&data->acpi_data, data->cpu); | 938 | acpi_processor_unregister_performance(&data->acpi_data, data->cpu); |
939 | free_cpumask_var(data->acpi_data.shared_cpu_map); | ||
940 | } | ||
941 | |||
942 | static int get_transition_latency(struct powernow_k8_data *data) | ||
943 | { | ||
944 | int max_latency = 0; | ||
945 | int i; | ||
946 | for (i = 0; i < data->acpi_data.state_count; i++) { | ||
947 | int cur_latency = data->acpi_data.states[i].transition_latency | ||
948 | + data->acpi_data.states[i].bus_master_latency; | ||
949 | if (cur_latency > max_latency) | ||
950 | max_latency = cur_latency; | ||
951 | } | ||
952 | /* value in usecs, needs to be in nanoseconds */ | ||
953 | return 1000 * max_latency; | ||
921 | } | 954 | } |
922 | 955 | ||
923 | #else | 956 | #else |
924 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } | 957 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } |
925 | static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } | 958 | static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } |
926 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } | 959 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } |
960 | static int get_transition_latency(struct powernow_k8_data *data) { return 0; } | ||
927 | #endif /* CONFIG_X86_POWERNOW_K8_ACPI */ | 961 | #endif /* CONFIG_X86_POWERNOW_K8_ACPI */ |
928 | 962 | ||
929 | /* Take a frequency, and issue the fid/vid transition command */ | 963 | /* Take a frequency, and issue the fid/vid transition command */ |
@@ -1121,8 +1155,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1121 | } | 1155 | } |
1122 | 1156 | ||
1123 | data->cpu = pol->cpu; | 1157 | data->cpu = pol->cpu; |
1158 | data->currpstate = HW_PSTATE_INVALID; | ||
1124 | 1159 | ||
1125 | if (powernow_k8_cpu_init_acpi(data)) { | 1160 | rc = powernow_k8_cpu_init_acpi(data); |
1161 | if (rc) { | ||
1126 | /* | 1162 | /* |
1127 | * Use the PSB BIOS structure. This is only availabe on | 1163 | * Use the PSB BIOS structure. This is only availabe on |
1128 | * an UP version, and is deprecated by AMD. | 1164 | * an UP version, and is deprecated by AMD. |
@@ -1140,22 +1176,25 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1140 | "ACPI maintainers and complain to your BIOS " | 1176 | "ACPI maintainers and complain to your BIOS " |
1141 | "vendor.\n"); | 1177 | "vendor.\n"); |
1142 | #endif | 1178 | #endif |
1143 | kfree(data); | 1179 | goto err_out; |
1144 | return -ENODEV; | ||
1145 | } | 1180 | } |
1146 | if (pol->cpu != 0) { | 1181 | if (pol->cpu != 0) { |
1147 | printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " | 1182 | printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " |
1148 | "CPU other than CPU0. Complain to your BIOS " | 1183 | "CPU other than CPU0. Complain to your BIOS " |
1149 | "vendor.\n"); | 1184 | "vendor.\n"); |
1150 | kfree(data); | 1185 | goto err_out; |
1151 | return -ENODEV; | ||
1152 | } | 1186 | } |
1153 | rc = find_psb_table(data); | 1187 | rc = find_psb_table(data); |
1154 | if (rc) { | 1188 | if (rc) { |
1155 | kfree(data); | 1189 | goto err_out; |
1156 | return -ENODEV; | ||
1157 | } | 1190 | } |
1158 | } | 1191 | /* Take a crude guess here. |
1192 | * That guess was in microseconds, so multiply with 1000 */ | ||
1193 | pol->cpuinfo.transition_latency = ( | ||
1194 | ((data->rvo + 8) * data->vstable * VST_UNITS_20US) + | ||
1195 | ((1 << data->irt) * 30)) * 1000; | ||
1196 | } else /* ACPI _PSS objects available */ | ||
1197 | pol->cpuinfo.transition_latency = get_transition_latency(data); | ||
1159 | 1198 | ||
1160 | /* only run on specific CPU from here on */ | 1199 | /* only run on specific CPU from here on */ |
1161 | oldmask = current->cpus_allowed; | 1200 | oldmask = current->cpus_allowed; |
@@ -1181,15 +1220,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1181 | set_cpus_allowed_ptr(current, &oldmask); | 1220 | set_cpus_allowed_ptr(current, &oldmask); |
1182 | 1221 | ||
1183 | if (cpu_family == CPU_HW_PSTATE) | 1222 | if (cpu_family == CPU_HW_PSTATE) |
1184 | pol->cpus = cpumask_of_cpu(pol->cpu); | 1223 | cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); |
1185 | else | 1224 | else |
1186 | pol->cpus = per_cpu(cpu_core_map, pol->cpu); | 1225 | cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu)); |
1187 | data->available_cores = &(pol->cpus); | 1226 | data->available_cores = pol->cpus; |
1188 | |||
1189 | /* Take a crude guess here. | ||
1190 | * That guess was in microseconds, so multiply with 1000 */ | ||
1191 | pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US) | ||
1192 | + (3 * (1 << data->irt) * 10)) * 1000; | ||
1193 | 1227 | ||
1194 | if (cpu_family == CPU_HW_PSTATE) | 1228 | if (cpu_family == CPU_HW_PSTATE) |
1195 | pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); | 1229 | pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index ab48cfed4d96..8ecc75b6c7c3 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h | |||
@@ -5,6 +5,19 @@ | |||
5 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
6 | */ | 6 | */ |
7 | 7 | ||
8 | |||
9 | enum pstate { | ||
10 | HW_PSTATE_INVALID = 0xff, | ||
11 | HW_PSTATE_0 = 0, | ||
12 | HW_PSTATE_1 = 1, | ||
13 | HW_PSTATE_2 = 2, | ||
14 | HW_PSTATE_3 = 3, | ||
15 | HW_PSTATE_4 = 4, | ||
16 | HW_PSTATE_5 = 5, | ||
17 | HW_PSTATE_6 = 6, | ||
18 | HW_PSTATE_7 = 7, | ||
19 | }; | ||
20 | |||
8 | struct powernow_k8_data { | 21 | struct powernow_k8_data { |
9 | unsigned int cpu; | 22 | unsigned int cpu; |
10 | 23 | ||
@@ -23,7 +36,9 @@ struct powernow_k8_data { | |||
23 | u32 exttype; /* extended interface = 1 */ | 36 | u32 exttype; /* extended interface = 1 */ |
24 | 37 | ||
25 | /* keep track of the current fid / vid or pstate */ | 38 | /* keep track of the current fid / vid or pstate */ |
26 | u32 currvid, currfid, currpstate; | 39 | u32 currvid; |
40 | u32 currfid; | ||
41 | enum pstate currpstate; | ||
27 | 42 | ||
28 | /* the powernow_table includes all frequency and vid/fid pairings: | 43 | /* the powernow_table includes all frequency and vid/fid pairings: |
29 | * fid are the lower 8 bits of the index, vid are the upper 8 bits. | 44 | * fid are the lower 8 bits of the index, vid are the upper 8 bits. |
@@ -38,7 +53,7 @@ struct powernow_k8_data { | |||
38 | /* we need to keep track of associated cores, but let cpufreq | 53 | /* we need to keep track of associated cores, but let cpufreq |
39 | * handle hotplug events - so just point at cpufreq pol->cpus | 54 | * handle hotplug events - so just point at cpufreq pol->cpus |
40 | * structure */ | 55 | * structure */ |
41 | cpumask_t *available_cores; | 56 | struct cpumask *available_cores; |
42 | }; | 57 | }; |
43 | 58 | ||
44 | 59 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 3b5f06423e77..f08998278a3a 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | |||
@@ -458,13 +458,6 @@ static int centrino_verify (struct cpufreq_policy *policy) | |||
458 | * | 458 | * |
459 | * Sets a new CPUFreq policy. | 459 | * Sets a new CPUFreq policy. |
460 | */ | 460 | */ |
461 | struct allmasks { | ||
462 | cpumask_t online_policy_cpus; | ||
463 | cpumask_t saved_mask; | ||
464 | cpumask_t set_mask; | ||
465 | cpumask_t covered_cpus; | ||
466 | }; | ||
467 | |||
468 | static int centrino_target (struct cpufreq_policy *policy, | 461 | static int centrino_target (struct cpufreq_policy *policy, |
469 | unsigned int target_freq, | 462 | unsigned int target_freq, |
470 | unsigned int relation) | 463 | unsigned int relation) |
@@ -474,14 +467,15 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
474 | struct cpufreq_freqs freqs; | 467 | struct cpufreq_freqs freqs; |
475 | int retval = 0; | 468 | int retval = 0; |
476 | unsigned int j, k, first_cpu, tmp; | 469 | unsigned int j, k, first_cpu, tmp; |
477 | CPUMASK_ALLOC(allmasks); | 470 | cpumask_var_t saved_mask, covered_cpus; |
478 | CPUMASK_PTR(online_policy_cpus, allmasks); | ||
479 | CPUMASK_PTR(saved_mask, allmasks); | ||
480 | CPUMASK_PTR(set_mask, allmasks); | ||
481 | CPUMASK_PTR(covered_cpus, allmasks); | ||
482 | 471 | ||
483 | if (unlikely(allmasks == NULL)) | 472 | if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL))) |
473 | return -ENOMEM; | ||
474 | if (unlikely(!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))) { | ||
475 | free_cpumask_var(saved_mask); | ||
484 | return -ENOMEM; | 476 | return -ENOMEM; |
477 | } | ||
478 | cpumask_copy(saved_mask, ¤t->cpus_allowed); | ||
485 | 479 | ||
486 | if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { | 480 | if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { |
487 | retval = -ENODEV; | 481 | retval = -ENODEV; |
@@ -497,30 +491,26 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
497 | goto out; | 491 | goto out; |
498 | } | 492 | } |
499 | 493 | ||
500 | #ifdef CONFIG_HOTPLUG_CPU | ||
501 | /* cpufreq holds the hotplug lock, so we are safe from here on */ | ||
502 | cpus_and(*online_policy_cpus, cpu_online_map, policy->cpus); | ||
503 | #else | ||
504 | *online_policy_cpus = policy->cpus; | ||
505 | #endif | ||
506 | |||
507 | *saved_mask = current->cpus_allowed; | ||
508 | first_cpu = 1; | 494 | first_cpu = 1; |
509 | cpus_clear(*covered_cpus); | 495 | for_each_cpu(j, policy->cpus) { |
510 | for_each_cpu_mask_nr(j, *online_policy_cpus) { | 496 | const struct cpumask *mask; |
497 | |||
498 | /* cpufreq holds the hotplug lock, so we are safe here */ | ||
499 | if (!cpu_online(j)) | ||
500 | continue; | ||
501 | |||
511 | /* | 502 | /* |
512 | * Support for SMP systems. | 503 | * Support for SMP systems. |
513 | * Make sure we are running on CPU that wants to change freq | 504 | * Make sure we are running on CPU that wants to change freq |
514 | */ | 505 | */ |
515 | cpus_clear(*set_mask); | ||
516 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) | 506 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) |
517 | cpus_or(*set_mask, *set_mask, *online_policy_cpus); | 507 | mask = policy->cpus; |
518 | else | 508 | else |
519 | cpu_set(j, *set_mask); | 509 | mask = cpumask_of(j); |
520 | 510 | ||
521 | set_cpus_allowed_ptr(current, set_mask); | 511 | set_cpus_allowed_ptr(current, mask); |
522 | preempt_disable(); | 512 | preempt_disable(); |
523 | if (unlikely(!cpu_isset(smp_processor_id(), *set_mask))) { | 513 | if (unlikely(!cpu_isset(smp_processor_id(), *mask))) { |
524 | dprintk("couldn't limit to CPUs in this domain\n"); | 514 | dprintk("couldn't limit to CPUs in this domain\n"); |
525 | retval = -EAGAIN; | 515 | retval = -EAGAIN; |
526 | if (first_cpu) { | 516 | if (first_cpu) { |
@@ -548,7 +538,9 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
548 | dprintk("target=%dkHz old=%d new=%d msr=%04x\n", | 538 | dprintk("target=%dkHz old=%d new=%d msr=%04x\n", |
549 | target_freq, freqs.old, freqs.new, msr); | 539 | target_freq, freqs.old, freqs.new, msr); |
550 | 540 | ||
551 | for_each_cpu_mask_nr(k, *online_policy_cpus) { | 541 | for_each_cpu(k, policy->cpus) { |
542 | if (!cpu_online(k)) | ||
543 | continue; | ||
552 | freqs.cpu = k; | 544 | freqs.cpu = k; |
553 | cpufreq_notify_transition(&freqs, | 545 | cpufreq_notify_transition(&freqs, |
554 | CPUFREQ_PRECHANGE); | 546 | CPUFREQ_PRECHANGE); |
@@ -571,7 +563,9 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
571 | preempt_enable(); | 563 | preempt_enable(); |
572 | } | 564 | } |
573 | 565 | ||
574 | for_each_cpu_mask_nr(k, *online_policy_cpus) { | 566 | for_each_cpu(k, policy->cpus) { |
567 | if (!cpu_online(k)) | ||
568 | continue; | ||
575 | freqs.cpu = k; | 569 | freqs.cpu = k; |
576 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 570 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
577 | } | 571 | } |
@@ -584,18 +578,17 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
584 | * Best effort undo.. | 578 | * Best effort undo.. |
585 | */ | 579 | */ |
586 | 580 | ||
587 | if (!cpus_empty(*covered_cpus)) | 581 | for_each_cpu_mask_nr(j, *covered_cpus) { |
588 | for_each_cpu_mask_nr(j, *covered_cpus) { | 582 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(j)); |
589 | set_cpus_allowed_ptr(current, | 583 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); |
590 | &cpumask_of_cpu(j)); | 584 | } |
591 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); | ||
592 | } | ||
593 | 585 | ||
594 | tmp = freqs.new; | 586 | tmp = freqs.new; |
595 | freqs.new = freqs.old; | 587 | freqs.new = freqs.old; |
596 | freqs.old = tmp; | 588 | freqs.old = tmp; |
597 | for_each_cpu_mask_nr(j, *online_policy_cpus) { | 589 | for_each_cpu(j, policy->cpus) { |
598 | freqs.cpu = j; | 590 | if (!cpu_online(j)) |
591 | continue; | ||
599 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 592 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
600 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 593 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
601 | } | 594 | } |
@@ -608,7 +601,8 @@ migrate_end: | |||
608 | preempt_enable(); | 601 | preempt_enable(); |
609 | set_cpus_allowed_ptr(current, saved_mask); | 602 | set_cpus_allowed_ptr(current, saved_mask); |
610 | out: | 603 | out: |
611 | CPUMASK_FREE(allmasks); | 604 | free_cpumask_var(saved_mask); |
605 | free_cpumask_var(covered_cpus); | ||
612 | return retval; | 606 | return retval; |
613 | } | 607 | } |
614 | 608 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 04d0376b64b0..dedc1e98f168 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | |||
@@ -229,7 +229,7 @@ static unsigned int speedstep_detect_chipset (void) | |||
229 | return 0; | 229 | return 0; |
230 | } | 230 | } |
231 | 231 | ||
232 | static unsigned int _speedstep_get(const cpumask_t *cpus) | 232 | static unsigned int _speedstep_get(const struct cpumask *cpus) |
233 | { | 233 | { |
234 | unsigned int speed; | 234 | unsigned int speed; |
235 | cpumask_t cpus_allowed; | 235 | cpumask_t cpus_allowed; |
@@ -244,7 +244,7 @@ static unsigned int _speedstep_get(const cpumask_t *cpus) | |||
244 | 244 | ||
245 | static unsigned int speedstep_get(unsigned int cpu) | 245 | static unsigned int speedstep_get(unsigned int cpu) |
246 | { | 246 | { |
247 | return _speedstep_get(&cpumask_of_cpu(cpu)); | 247 | return _speedstep_get(cpumask_of(cpu)); |
248 | } | 248 | } |
249 | 249 | ||
250 | /** | 250 | /** |
@@ -267,7 +267,7 @@ static int speedstep_target (struct cpufreq_policy *policy, | |||
267 | if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) | 267 | if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) |
268 | return -EINVAL; | 268 | return -EINVAL; |
269 | 269 | ||
270 | freqs.old = _speedstep_get(&policy->cpus); | 270 | freqs.old = _speedstep_get(policy->cpus); |
271 | freqs.new = speedstep_freqs[newstate].frequency; | 271 | freqs.new = speedstep_freqs[newstate].frequency; |
272 | freqs.cpu = policy->cpu; | 272 | freqs.cpu = policy->cpu; |
273 | 273 | ||
@@ -279,20 +279,20 @@ static int speedstep_target (struct cpufreq_policy *policy, | |||
279 | 279 | ||
280 | cpus_allowed = current->cpus_allowed; | 280 | cpus_allowed = current->cpus_allowed; |
281 | 281 | ||
282 | for_each_cpu_mask_nr(i, policy->cpus) { | 282 | for_each_cpu(i, policy->cpus) { |
283 | freqs.cpu = i; | 283 | freqs.cpu = i; |
284 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 284 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
285 | } | 285 | } |
286 | 286 | ||
287 | /* switch to physical CPU where state is to be changed */ | 287 | /* switch to physical CPU where state is to be changed */ |
288 | set_cpus_allowed_ptr(current, &policy->cpus); | 288 | set_cpus_allowed_ptr(current, policy->cpus); |
289 | 289 | ||
290 | speedstep_set_state(newstate); | 290 | speedstep_set_state(newstate); |
291 | 291 | ||
292 | /* allow to be run on all CPUs */ | 292 | /* allow to be run on all CPUs */ |
293 | set_cpus_allowed_ptr(current, &cpus_allowed); | 293 | set_cpus_allowed_ptr(current, &cpus_allowed); |
294 | 294 | ||
295 | for_each_cpu_mask_nr(i, policy->cpus) { | 295 | for_each_cpu(i, policy->cpus) { |
296 | freqs.cpu = i; | 296 | freqs.cpu = i; |
297 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 297 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
298 | } | 298 | } |
@@ -322,11 +322,11 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) | |||
322 | 322 | ||
323 | /* only run on CPU to be set, or on its sibling */ | 323 | /* only run on CPU to be set, or on its sibling */ |
324 | #ifdef CONFIG_SMP | 324 | #ifdef CONFIG_SMP |
325 | policy->cpus = per_cpu(cpu_sibling_map, policy->cpu); | 325 | cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu)); |
326 | #endif | 326 | #endif |
327 | 327 | ||
328 | cpus_allowed = current->cpus_allowed; | 328 | cpus_allowed = current->cpus_allowed; |
329 | set_cpus_allowed_ptr(current, &policy->cpus); | 329 | set_cpus_allowed_ptr(current, policy->cpus); |
330 | 330 | ||
331 | /* detect low and high frequency and transition latency */ | 331 | /* detect low and high frequency and transition latency */ |
332 | result = speedstep_get_freqs(speedstep_processor, | 332 | result = speedstep_get_freqs(speedstep_processor, |
@@ -339,7 +339,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) | |||
339 | return result; | 339 | return result; |
340 | 340 | ||
341 | /* get current speed setting */ | 341 | /* get current speed setting */ |
342 | speed = _speedstep_get(&policy->cpus); | 342 | speed = _speedstep_get(policy->cpus); |
343 | if (!speed) | 343 | if (!speed) |
344 | return -EIO; | 344 | return -EIO; |
345 | 345 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index 98d4fdb7dc04..cdac7d62369b 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | |||
@@ -139,6 +139,15 @@ static unsigned int pentium_core_get_frequency(void) | |||
139 | case 3: | 139 | case 3: |
140 | fsb = 166667; | 140 | fsb = 166667; |
141 | break; | 141 | break; |
142 | case 2: | ||
143 | fsb = 200000; | ||
144 | break; | ||
145 | case 0: | ||
146 | fsb = 266667; | ||
147 | break; | ||
148 | case 4: | ||
149 | fsb = 333333; | ||
150 | break; | ||
142 | default: | 151 | default: |
143 | printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value"); | 152 | printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value"); |
144 | } | 153 | } |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c new file mode 100644 index 000000000000..fb5b86af0b01 --- /dev/null +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -0,0 +1,58 @@ | |||
1 | /* | ||
2 | * Common hypervisor code | ||
3 | * | ||
4 | * Copyright (C) 2008, VMware, Inc. | ||
5 | * Author : Alok N Kataria <akataria@vmware.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, but | ||
13 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
15 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
16 | * details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <asm/processor.h> | ||
25 | #include <asm/vmware.h> | ||
26 | #include <asm/hypervisor.h> | ||
27 | |||
28 | static inline void __cpuinit | ||
29 | detect_hypervisor_vendor(struct cpuinfo_x86 *c) | ||
30 | { | ||
31 | if (vmware_platform()) { | ||
32 | c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE; | ||
33 | } else { | ||
34 | c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; | ||
35 | } | ||
36 | } | ||
37 | |||
38 | unsigned long get_hypervisor_tsc_freq(void) | ||
39 | { | ||
40 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) | ||
41 | return vmware_get_tsc_khz(); | ||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | static inline void __cpuinit | ||
46 | hypervisor_set_feature_bits(struct cpuinfo_x86 *c) | ||
47 | { | ||
48 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) { | ||
49 | vmware_set_feature_bits(c); | ||
50 | return; | ||
51 | } | ||
52 | } | ||
53 | |||
54 | void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) | ||
55 | { | ||
56 | detect_hypervisor_vendor(c); | ||
57 | hypervisor_set_feature_bits(c); | ||
58 | } | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index cce0b6118d55..1f137a87d4bd 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -11,7 +11,6 @@ | |||
11 | #include <asm/pgtable.h> | 11 | #include <asm/pgtable.h> |
12 | #include <asm/msr.h> | 12 | #include <asm/msr.h> |
13 | #include <asm/uaccess.h> | 13 | #include <asm/uaccess.h> |
14 | #include <asm/ptrace.h> | ||
15 | #include <asm/ds.h> | 14 | #include <asm/ds.h> |
16 | #include <asm/bugs.h> | 15 | #include <asm/bugs.h> |
17 | 16 | ||
@@ -25,11 +24,24 @@ | |||
25 | #ifdef CONFIG_X86_LOCAL_APIC | 24 | #ifdef CONFIG_X86_LOCAL_APIC |
26 | #include <asm/mpspec.h> | 25 | #include <asm/mpspec.h> |
27 | #include <asm/apic.h> | 26 | #include <asm/apic.h> |
28 | #include <mach_apic.h> | 27 | #include <asm/genapic.h> |
29 | #endif | 28 | #endif |
30 | 29 | ||
31 | static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | 30 | static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) |
32 | { | 31 | { |
32 | /* Unmask CPUID levels if masked: */ | ||
33 | if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { | ||
34 | u64 misc_enable; | ||
35 | |||
36 | rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | ||
37 | |||
38 | if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) { | ||
39 | misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; | ||
40 | wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | ||
41 | c->cpuid_level = cpuid_eax(0); | ||
42 | } | ||
43 | } | ||
44 | |||
33 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || | 45 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || |
34 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) | 46 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) |
35 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 47 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
@@ -41,6 +53,28 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
41 | if (c->x86 == 15 && c->x86_cache_alignment == 64) | 53 | if (c->x86 == 15 && c->x86_cache_alignment == 64) |
42 | c->x86_cache_alignment = 128; | 54 | c->x86_cache_alignment = 128; |
43 | #endif | 55 | #endif |
56 | |||
57 | /* | ||
58 | * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate | ||
59 | * with P/T states and does not stop in deep C-states | ||
60 | */ | ||
61 | if (c->x86_power & (1 << 8)) { | ||
62 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
63 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | ||
64 | } | ||
65 | |||
66 | /* | ||
67 | * There is a known erratum on Pentium III and Core Solo | ||
68 | * and Core Duo CPUs. | ||
69 | * " Page with PAT set to WC while associated MTRR is UC | ||
70 | * may consolidate to UC " | ||
71 | * Because of this erratum, it is better to stick with | ||
72 | * setting WC in MTRR rather than using PAT on these CPUs. | ||
73 | * | ||
74 | * Enable PAT WC only on P4, Core 2 or later CPUs. | ||
75 | */ | ||
76 | if (c->x86 == 6 && c->x86_model < 15) | ||
77 | clear_cpu_cap(c, X86_FEATURE_PAT); | ||
44 | } | 78 | } |
45 | 79 | ||
46 | #ifdef CONFIG_X86_32 | 80 | #ifdef CONFIG_X86_32 |
@@ -242,6 +276,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
242 | 276 | ||
243 | intel_workarounds(c); | 277 | intel_workarounds(c); |
244 | 278 | ||
279 | /* | ||
280 | * Detect the extended topology information if available. This | ||
281 | * will reinitialise the initial_apicid which will be used | ||
282 | * in init_intel_cacheinfo() | ||
283 | */ | ||
284 | detect_extended_topology(c); | ||
285 | |||
245 | l2 = init_intel_cacheinfo(c); | 286 | l2 = init_intel_cacheinfo(c); |
246 | if (c->cpuid_level > 9) { | 287 | if (c->cpuid_level > 9) { |
247 | unsigned eax = cpuid_eax(10); | 288 | unsigned eax = cpuid_eax(10); |
@@ -262,6 +303,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
262 | ds_init_intel(c); | 303 | ds_init_intel(c); |
263 | } | 304 | } |
264 | 305 | ||
306 | if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) | ||
307 | set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR); | ||
308 | |||
265 | #ifdef CONFIG_X86_64 | 309 | #ifdef CONFIG_X86_64 |
266 | if (c->x86 == 15) | 310 | if (c->x86 == 15) |
267 | c->x86_cache_alignment = c->x86_clflush_size * 2; | 311 | c->x86_cache_alignment = c->x86_clflush_size * 2; |
@@ -307,13 +351,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
307 | set_cpu_cap(c, X86_FEATURE_P4); | 351 | set_cpu_cap(c, X86_FEATURE_P4); |
308 | if (c->x86 == 6) | 352 | if (c->x86 == 6) |
309 | set_cpu_cap(c, X86_FEATURE_P3); | 353 | set_cpu_cap(c, X86_FEATURE_P3); |
310 | |||
311 | if (cpu_has_bts) | ||
312 | ptrace_bts_init_intel(c); | ||
313 | |||
314 | #endif | 354 | #endif |
315 | 355 | ||
316 | detect_extended_topology(c); | ||
317 | if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { | 356 | if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { |
318 | /* | 357 | /* |
319 | * let's use the legacy cpuid vector 0x1 and 0x4 for topology | 358 | * let's use the legacy cpuid vector 0x1 and 0x4 for topology |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 3f46afbb1cf1..7293508d8f5c 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -36,8 +36,11 @@ static struct _cache_table cache_table[] __cpuinitdata = | |||
36 | { | 36 | { |
37 | { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ | 37 | { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ |
38 | { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ | 38 | { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ |
39 | { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */ | ||
39 | { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ | 40 | { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ |
40 | { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ | 41 | { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ |
42 | { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ | ||
43 | { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ | ||
41 | { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ | 44 | { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
42 | { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 45 | { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
43 | { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 46 | { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
@@ -85,6 +88,18 @@ static struct _cache_table cache_table[] __cpuinitdata = | |||
85 | { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ | 88 | { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ |
86 | { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ | 89 | { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ |
87 | { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ | 90 | { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ |
91 | { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */ | ||
92 | { 0xd1, LVL_3, 1024 }, /* 4-way set assoc, 64 byte line size */ | ||
93 | { 0xd2, LVL_3, 2048 }, /* 4-way set assoc, 64 byte line size */ | ||
94 | { 0xd6, LVL_3, 1024 }, /* 8-way set assoc, 64 byte line size */ | ||
95 | { 0xd7, LVL_3, 2038 }, /* 8-way set assoc, 64 byte line size */ | ||
96 | { 0xd8, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ | ||
97 | { 0xdc, LVL_3, 2048 }, /* 12-way set assoc, 64 byte line size */ | ||
98 | { 0xdd, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ | ||
99 | { 0xde, LVL_3, 8192 }, /* 12-way set assoc, 64 byte line size */ | ||
100 | { 0xe2, LVL_3, 2048 }, /* 16-way set assoc, 64 byte line size */ | ||
101 | { 0xe3, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ | ||
102 | { 0xe4, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ | ||
88 | { 0x00, 0, 0} | 103 | { 0x00, 0, 0} |
89 | }; | 104 | }; |
90 | 105 | ||
@@ -132,7 +147,16 @@ struct _cpuid4_info { | |||
132 | union _cpuid4_leaf_ecx ecx; | 147 | union _cpuid4_leaf_ecx ecx; |
133 | unsigned long size; | 148 | unsigned long size; |
134 | unsigned long can_disable; | 149 | unsigned long can_disable; |
135 | cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ | 150 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); |
151 | }; | ||
152 | |||
153 | /* subset of above _cpuid4_info w/o shared_cpu_map */ | ||
154 | struct _cpuid4_info_regs { | ||
155 | union _cpuid4_leaf_eax eax; | ||
156 | union _cpuid4_leaf_ebx ebx; | ||
157 | union _cpuid4_leaf_ecx ecx; | ||
158 | unsigned long size; | ||
159 | unsigned long can_disable; | ||
136 | }; | 160 | }; |
137 | 161 | ||
138 | #ifdef CONFIG_PCI | 162 | #ifdef CONFIG_PCI |
@@ -263,7 +287,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
263 | } | 287 | } |
264 | 288 | ||
265 | static void __cpuinit | 289 | static void __cpuinit |
266 | amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) | 290 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) |
267 | { | 291 | { |
268 | if (index < 3) | 292 | if (index < 3) |
269 | return; | 293 | return; |
@@ -271,7 +295,8 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) | |||
271 | } | 295 | } |
272 | 296 | ||
273 | static int | 297 | static int |
274 | __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) | 298 | __cpuinit cpuid4_cache_lookup_regs(int index, |
299 | struct _cpuid4_info_regs *this_leaf) | ||
275 | { | 300 | { |
276 | union _cpuid4_leaf_eax eax; | 301 | union _cpuid4_leaf_eax eax; |
277 | union _cpuid4_leaf_ebx ebx; | 302 | union _cpuid4_leaf_ebx ebx; |
@@ -299,6 +324,15 @@ __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) | |||
299 | return 0; | 324 | return 0; |
300 | } | 325 | } |
301 | 326 | ||
327 | static int | ||
328 | __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) | ||
329 | { | ||
330 | struct _cpuid4_info_regs *leaf_regs = | ||
331 | (struct _cpuid4_info_regs *)this_leaf; | ||
332 | |||
333 | return cpuid4_cache_lookup_regs(index, leaf_regs); | ||
334 | } | ||
335 | |||
302 | static int __cpuinit find_num_cache_leaves(void) | 336 | static int __cpuinit find_num_cache_leaves(void) |
303 | { | 337 | { |
304 | unsigned int eax, ebx, ecx, edx; | 338 | unsigned int eax, ebx, ecx, edx; |
@@ -338,11 +372,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
338 | * parameters cpuid leaf to find the cache details | 372 | * parameters cpuid leaf to find the cache details |
339 | */ | 373 | */ |
340 | for (i = 0; i < num_cache_leaves; i++) { | 374 | for (i = 0; i < num_cache_leaves; i++) { |
341 | struct _cpuid4_info this_leaf; | 375 | struct _cpuid4_info_regs this_leaf; |
342 | |||
343 | int retval; | 376 | int retval; |
344 | 377 | ||
345 | retval = cpuid4_cache_lookup(i, &this_leaf); | 378 | retval = cpuid4_cache_lookup_regs(i, &this_leaf); |
346 | if (retval >= 0) { | 379 | if (retval >= 0) { |
347 | switch(this_leaf.eax.split.level) { | 380 | switch(this_leaf.eax.split.level) { |
348 | case 1: | 381 | case 1: |
@@ -491,17 +524,20 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | |||
491 | num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; | 524 | num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; |
492 | 525 | ||
493 | if (num_threads_sharing == 1) | 526 | if (num_threads_sharing == 1) |
494 | cpu_set(cpu, this_leaf->shared_cpu_map); | 527 | cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map)); |
495 | else { | 528 | else { |
496 | index_msb = get_count_order(num_threads_sharing); | 529 | index_msb = get_count_order(num_threads_sharing); |
497 | 530 | ||
498 | for_each_online_cpu(i) { | 531 | for_each_online_cpu(i) { |
499 | if (cpu_data(i).apicid >> index_msb == | 532 | if (cpu_data(i).apicid >> index_msb == |
500 | c->apicid >> index_msb) { | 533 | c->apicid >> index_msb) { |
501 | cpu_set(i, this_leaf->shared_cpu_map); | 534 | cpumask_set_cpu(i, |
535 | to_cpumask(this_leaf->shared_cpu_map)); | ||
502 | if (i != cpu && per_cpu(cpuid4_info, i)) { | 536 | if (i != cpu && per_cpu(cpuid4_info, i)) { |
503 | sibling_leaf = CPUID4_INFO_IDX(i, index); | 537 | sibling_leaf = |
504 | cpu_set(cpu, sibling_leaf->shared_cpu_map); | 538 | CPUID4_INFO_IDX(i, index); |
539 | cpumask_set_cpu(cpu, to_cpumask( | ||
540 | sibling_leaf->shared_cpu_map)); | ||
505 | } | 541 | } |
506 | } | 542 | } |
507 | } | 543 | } |
@@ -513,9 +549,10 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) | |||
513 | int sibling; | 549 | int sibling; |
514 | 550 | ||
515 | this_leaf = CPUID4_INFO_IDX(cpu, index); | 551 | this_leaf = CPUID4_INFO_IDX(cpu, index); |
516 | for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { | 552 | for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) { |
517 | sibling_leaf = CPUID4_INFO_IDX(sibling, index); | 553 | sibling_leaf = CPUID4_INFO_IDX(sibling, index); |
518 | cpu_clear(cpu, sibling_leaf->shared_cpu_map); | 554 | cpumask_clear_cpu(cpu, |
555 | to_cpumask(sibling_leaf->shared_cpu_map)); | ||
519 | } | 556 | } |
520 | } | 557 | } |
521 | #else | 558 | #else |
@@ -534,31 +571,16 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) | |||
534 | per_cpu(cpuid4_info, cpu) = NULL; | 571 | per_cpu(cpuid4_info, cpu) = NULL; |
535 | } | 572 | } |
536 | 573 | ||
537 | static int __cpuinit detect_cache_attributes(unsigned int cpu) | 574 | static void __cpuinit get_cpu_leaves(void *_retval) |
538 | { | 575 | { |
539 | struct _cpuid4_info *this_leaf; | 576 | int j, *retval = _retval, cpu = smp_processor_id(); |
540 | unsigned long j; | ||
541 | int retval; | ||
542 | cpumask_t oldmask; | ||
543 | |||
544 | if (num_cache_leaves == 0) | ||
545 | return -ENOENT; | ||
546 | |||
547 | per_cpu(cpuid4_info, cpu) = kzalloc( | ||
548 | sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); | ||
549 | if (per_cpu(cpuid4_info, cpu) == NULL) | ||
550 | return -ENOMEM; | ||
551 | |||
552 | oldmask = current->cpus_allowed; | ||
553 | retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | ||
554 | if (retval) | ||
555 | goto out; | ||
556 | 577 | ||
557 | /* Do cpuid and store the results */ | 578 | /* Do cpuid and store the results */ |
558 | for (j = 0; j < num_cache_leaves; j++) { | 579 | for (j = 0; j < num_cache_leaves; j++) { |
580 | struct _cpuid4_info *this_leaf; | ||
559 | this_leaf = CPUID4_INFO_IDX(cpu, j); | 581 | this_leaf = CPUID4_INFO_IDX(cpu, j); |
560 | retval = cpuid4_cache_lookup(j, this_leaf); | 582 | *retval = cpuid4_cache_lookup(j, this_leaf); |
561 | if (unlikely(retval < 0)) { | 583 | if (unlikely(*retval < 0)) { |
562 | int i; | 584 | int i; |
563 | 585 | ||
564 | for (i = 0; i < j; i++) | 586 | for (i = 0; i < j; i++) |
@@ -567,9 +589,21 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) | |||
567 | } | 589 | } |
568 | cache_shared_cpu_map_setup(cpu, j); | 590 | cache_shared_cpu_map_setup(cpu, j); |
569 | } | 591 | } |
570 | set_cpus_allowed_ptr(current, &oldmask); | 592 | } |
593 | |||
594 | static int __cpuinit detect_cache_attributes(unsigned int cpu) | ||
595 | { | ||
596 | int retval; | ||
597 | |||
598 | if (num_cache_leaves == 0) | ||
599 | return -ENOENT; | ||
571 | 600 | ||
572 | out: | 601 | per_cpu(cpuid4_info, cpu) = kzalloc( |
602 | sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); | ||
603 | if (per_cpu(cpuid4_info, cpu) == NULL) | ||
604 | return -ENOMEM; | ||
605 | |||
606 | smp_call_function_single(cpu, get_cpu_leaves, &retval, true); | ||
573 | if (retval) { | 607 | if (retval) { |
574 | kfree(per_cpu(cpuid4_info, cpu)); | 608 | kfree(per_cpu(cpuid4_info, cpu)); |
575 | per_cpu(cpuid4_info, cpu) = NULL; | 609 | per_cpu(cpuid4_info, cpu) = NULL; |
@@ -623,11 +657,12 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, | |||
623 | int n = 0; | 657 | int n = 0; |
624 | 658 | ||
625 | if (len > 1) { | 659 | if (len > 1) { |
626 | cpumask_t *mask = &this_leaf->shared_cpu_map; | 660 | const struct cpumask *mask; |
627 | 661 | ||
662 | mask = to_cpumask(this_leaf->shared_cpu_map); | ||
628 | n = type? | 663 | n = type? |
629 | cpulist_scnprintf(buf, len-2, *mask): | 664 | cpulist_scnprintf(buf, len-2, mask) : |
630 | cpumask_scnprintf(buf, len-2, *mask); | 665 | cpumask_scnprintf(buf, len-2, mask); |
631 | buf[n++] = '\n'; | 666 | buf[n++] = '\n'; |
632 | buf[n] = '\0'; | 667 | buf[n] = '\0'; |
633 | } | 668 | } |
@@ -644,20 +679,17 @@ static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf) | |||
644 | return show_shared_cpu_map_func(leaf, 1, buf); | 679 | return show_shared_cpu_map_func(leaf, 1, buf); |
645 | } | 680 | } |
646 | 681 | ||
647 | static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { | 682 | static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) |
648 | switch(this_leaf->eax.split.type) { | 683 | { |
649 | case CACHE_TYPE_DATA: | 684 | switch (this_leaf->eax.split.type) { |
685 | case CACHE_TYPE_DATA: | ||
650 | return sprintf(buf, "Data\n"); | 686 | return sprintf(buf, "Data\n"); |
651 | break; | 687 | case CACHE_TYPE_INST: |
652 | case CACHE_TYPE_INST: | ||
653 | return sprintf(buf, "Instruction\n"); | 688 | return sprintf(buf, "Instruction\n"); |
654 | break; | 689 | case CACHE_TYPE_UNIFIED: |
655 | case CACHE_TYPE_UNIFIED: | ||
656 | return sprintf(buf, "Unified\n"); | 690 | return sprintf(buf, "Unified\n"); |
657 | break; | 691 | default: |
658 | default: | ||
659 | return sprintf(buf, "Unknown\n"); | 692 | return sprintf(buf, "Unknown\n"); |
660 | break; | ||
661 | } | 693 | } |
662 | } | 694 | } |
663 | 695 | ||
@@ -690,7 +722,8 @@ static struct pci_dev *get_k8_northbridge(int node) | |||
690 | 722 | ||
691 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) | 723 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) |
692 | { | 724 | { |
693 | int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); | 725 | const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); |
726 | int node = cpu_to_node(cpumask_first(mask)); | ||
694 | struct pci_dev *dev = NULL; | 727 | struct pci_dev *dev = NULL; |
695 | ssize_t ret = 0; | 728 | ssize_t ret = 0; |
696 | int i; | 729 | int i; |
@@ -724,7 +757,8 @@ static ssize_t | |||
724 | store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, | 757 | store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, |
725 | size_t count) | 758 | size_t count) |
726 | { | 759 | { |
727 | int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); | 760 | const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); |
761 | int node = cpu_to_node(cpumask_first(mask)); | ||
728 | struct pci_dev *dev = NULL; | 762 | struct pci_dev *dev = NULL; |
729 | unsigned int ret, index, val; | 763 | unsigned int ret, index, val; |
730 | 764 | ||
@@ -869,7 +903,7 @@ err_out: | |||
869 | return -ENOMEM; | 903 | return -ENOMEM; |
870 | } | 904 | } |
871 | 905 | ||
872 | static cpumask_t cache_dev_map = CPU_MASK_NONE; | 906 | static DECLARE_BITMAP(cache_dev_map, NR_CPUS); |
873 | 907 | ||
874 | /* Add/Remove cache interface for CPU device */ | 908 | /* Add/Remove cache interface for CPU device */ |
875 | static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | 909 | static int __cpuinit cache_add_dev(struct sys_device * sys_dev) |
@@ -909,7 +943,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
909 | } | 943 | } |
910 | kobject_uevent(&(this_object->kobj), KOBJ_ADD); | 944 | kobject_uevent(&(this_object->kobj), KOBJ_ADD); |
911 | } | 945 | } |
912 | cpu_set(cpu, cache_dev_map); | 946 | cpumask_set_cpu(cpu, to_cpumask(cache_dev_map)); |
913 | 947 | ||
914 | kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); | 948 | kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); |
915 | return 0; | 949 | return 0; |
@@ -922,9 +956,9 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) | |||
922 | 956 | ||
923 | if (per_cpu(cpuid4_info, cpu) == NULL) | 957 | if (per_cpu(cpuid4_info, cpu) == NULL) |
924 | return; | 958 | return; |
925 | if (!cpu_isset(cpu, cache_dev_map)) | 959 | if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map))) |
926 | return; | 960 | return; |
927 | cpu_clear(cpu, cache_dev_map); | 961 | cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map)); |
928 | 962 | ||
929 | for (i = 0; i < num_cache_leaves; i++) | 963 | for (i = 0; i < num_cache_leaves; i++) |
930 | kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); | 964 | kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c index 0ebf3fc6a610..dfaebce3633e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_32.c +++ b/arch/x86/kernel/cpu/mcheck/mce_32.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * mce.c - x86 Machine Check Exception Reporting | 2 | * mce.c - x86 Machine Check Exception Reporting |
3 | * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@redhat.com> | 3 | * (c) 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>, Dave Jones <davej@redhat.com> |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 4b031a4ac856..1c838032fd37 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c | |||
@@ -510,12 +510,9 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) | |||
510 | */ | 510 | */ |
511 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) | 511 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) |
512 | { | 512 | { |
513 | static cpumask_t mce_cpus = CPU_MASK_NONE; | ||
514 | |||
515 | mce_cpu_quirks(c); | 513 | mce_cpu_quirks(c); |
516 | 514 | ||
517 | if (mce_dont_init || | 515 | if (mce_dont_init || |
518 | cpu_test_and_set(smp_processor_id(), mce_cpus) || | ||
519 | !mce_available(c)) | 516 | !mce_available(c)) |
520 | return; | 517 | return; |
521 | 518 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 5eb390a4b2e9..4772e91e8246 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c | |||
@@ -67,7 +67,7 @@ static struct threshold_block threshold_defaults = { | |||
67 | struct threshold_bank { | 67 | struct threshold_bank { |
68 | struct kobject *kobj; | 68 | struct kobject *kobj; |
69 | struct threshold_block *blocks; | 69 | struct threshold_block *blocks; |
70 | cpumask_t cpus; | 70 | cpumask_var_t cpus; |
71 | }; | 71 | }; |
72 | static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); | 72 | static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); |
73 | 73 | ||
@@ -83,34 +83,41 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ | |||
83 | * CPU Initialization | 83 | * CPU Initialization |
84 | */ | 84 | */ |
85 | 85 | ||
86 | struct thresh_restart { | ||
87 | struct threshold_block *b; | ||
88 | int reset; | ||
89 | u16 old_limit; | ||
90 | }; | ||
91 | |||
86 | /* must be called with correct cpu affinity */ | 92 | /* must be called with correct cpu affinity */ |
87 | static void threshold_restart_bank(struct threshold_block *b, | 93 | static long threshold_restart_bank(void *_tr) |
88 | int reset, u16 old_limit) | ||
89 | { | 94 | { |
95 | struct thresh_restart *tr = _tr; | ||
90 | u32 mci_misc_hi, mci_misc_lo; | 96 | u32 mci_misc_hi, mci_misc_lo; |
91 | 97 | ||
92 | rdmsr(b->address, mci_misc_lo, mci_misc_hi); | 98 | rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi); |
93 | 99 | ||
94 | if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) | 100 | if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) |
95 | reset = 1; /* limit cannot be lower than err count */ | 101 | tr->reset = 1; /* limit cannot be lower than err count */ |
96 | 102 | ||
97 | if (reset) { /* reset err count and overflow bit */ | 103 | if (tr->reset) { /* reset err count and overflow bit */ |
98 | mci_misc_hi = | 104 | mci_misc_hi = |
99 | (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | | 105 | (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | |
100 | (THRESHOLD_MAX - b->threshold_limit); | 106 | (THRESHOLD_MAX - tr->b->threshold_limit); |
101 | } else if (old_limit) { /* change limit w/o reset */ | 107 | } else if (tr->old_limit) { /* change limit w/o reset */ |
102 | int new_count = (mci_misc_hi & THRESHOLD_MAX) + | 108 | int new_count = (mci_misc_hi & THRESHOLD_MAX) + |
103 | (old_limit - b->threshold_limit); | 109 | (tr->old_limit - tr->b->threshold_limit); |
104 | mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | | 110 | mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | |
105 | (new_count & THRESHOLD_MAX); | 111 | (new_count & THRESHOLD_MAX); |
106 | } | 112 | } |
107 | 113 | ||
108 | b->interrupt_enable ? | 114 | tr->b->interrupt_enable ? |
109 | (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : | 115 | (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : |
110 | (mci_misc_hi &= ~MASK_INT_TYPE_HI); | 116 | (mci_misc_hi &= ~MASK_INT_TYPE_HI); |
111 | 117 | ||
112 | mci_misc_hi |= MASK_COUNT_EN_HI; | 118 | mci_misc_hi |= MASK_COUNT_EN_HI; |
113 | wrmsr(b->address, mci_misc_lo, mci_misc_hi); | 119 | wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); |
120 | return 0; | ||
114 | } | 121 | } |
115 | 122 | ||
116 | /* cpu init entry point, called from mce.c with preempt off */ | 123 | /* cpu init entry point, called from mce.c with preempt off */ |
@@ -120,6 +127,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
120 | unsigned int cpu = smp_processor_id(); | 127 | unsigned int cpu = smp_processor_id(); |
121 | u8 lvt_off; | 128 | u8 lvt_off; |
122 | u32 low = 0, high = 0, address = 0; | 129 | u32 low = 0, high = 0, address = 0; |
130 | struct thresh_restart tr; | ||
123 | 131 | ||
124 | for (bank = 0; bank < NR_BANKS; ++bank) { | 132 | for (bank = 0; bank < NR_BANKS; ++bank) { |
125 | for (block = 0; block < NR_BLOCKS; ++block) { | 133 | for (block = 0; block < NR_BLOCKS; ++block) { |
@@ -162,7 +170,10 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
162 | wrmsr(address, low, high); | 170 | wrmsr(address, low, high); |
163 | 171 | ||
164 | threshold_defaults.address = address; | 172 | threshold_defaults.address = address; |
165 | threshold_restart_bank(&threshold_defaults, 0, 0); | 173 | tr.b = &threshold_defaults; |
174 | tr.reset = 0; | ||
175 | tr.old_limit = 0; | ||
176 | threshold_restart_bank(&tr); | ||
166 | } | 177 | } |
167 | } | 178 | } |
168 | } | 179 | } |
@@ -237,7 +248,7 @@ asmlinkage void mce_threshold_interrupt(void) | |||
237 | } | 248 | } |
238 | } | 249 | } |
239 | out: | 250 | out: |
240 | add_pda(irq_threshold_count, 1); | 251 | inc_irq_stat(irq_threshold_count); |
241 | irq_exit(); | 252 | irq_exit(); |
242 | } | 253 | } |
243 | 254 | ||
@@ -251,20 +262,6 @@ struct threshold_attr { | |||
251 | ssize_t(*store) (struct threshold_block *, const char *, size_t count); | 262 | ssize_t(*store) (struct threshold_block *, const char *, size_t count); |
252 | }; | 263 | }; |
253 | 264 | ||
254 | static void affinity_set(unsigned int cpu, cpumask_t *oldmask, | ||
255 | cpumask_t *newmask) | ||
256 | { | ||
257 | *oldmask = current->cpus_allowed; | ||
258 | cpus_clear(*newmask); | ||
259 | cpu_set(cpu, *newmask); | ||
260 | set_cpus_allowed_ptr(current, newmask); | ||
261 | } | ||
262 | |||
263 | static void affinity_restore(const cpumask_t *oldmask) | ||
264 | { | ||
265 | set_cpus_allowed_ptr(current, oldmask); | ||
266 | } | ||
267 | |||
268 | #define SHOW_FIELDS(name) \ | 265 | #define SHOW_FIELDS(name) \ |
269 | static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ | 266 | static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ |
270 | { \ | 267 | { \ |
@@ -277,15 +274,16 @@ static ssize_t store_interrupt_enable(struct threshold_block *b, | |||
277 | const char *buf, size_t count) | 274 | const char *buf, size_t count) |
278 | { | 275 | { |
279 | char *end; | 276 | char *end; |
280 | cpumask_t oldmask, newmask; | 277 | struct thresh_restart tr; |
281 | unsigned long new = simple_strtoul(buf, &end, 0); | 278 | unsigned long new = simple_strtoul(buf, &end, 0); |
282 | if (end == buf) | 279 | if (end == buf) |
283 | return -EINVAL; | 280 | return -EINVAL; |
284 | b->interrupt_enable = !!new; | 281 | b->interrupt_enable = !!new; |
285 | 282 | ||
286 | affinity_set(b->cpu, &oldmask, &newmask); | 283 | tr.b = b; |
287 | threshold_restart_bank(b, 0, 0); | 284 | tr.reset = 0; |
288 | affinity_restore(&oldmask); | 285 | tr.old_limit = 0; |
286 | work_on_cpu(b->cpu, threshold_restart_bank, &tr); | ||
289 | 287 | ||
290 | return end - buf; | 288 | return end - buf; |
291 | } | 289 | } |
@@ -294,8 +292,7 @@ static ssize_t store_threshold_limit(struct threshold_block *b, | |||
294 | const char *buf, size_t count) | 292 | const char *buf, size_t count) |
295 | { | 293 | { |
296 | char *end; | 294 | char *end; |
297 | cpumask_t oldmask, newmask; | 295 | struct thresh_restart tr; |
298 | u16 old; | ||
299 | unsigned long new = simple_strtoul(buf, &end, 0); | 296 | unsigned long new = simple_strtoul(buf, &end, 0); |
300 | if (end == buf) | 297 | if (end == buf) |
301 | return -EINVAL; | 298 | return -EINVAL; |
@@ -303,34 +300,36 @@ static ssize_t store_threshold_limit(struct threshold_block *b, | |||
303 | new = THRESHOLD_MAX; | 300 | new = THRESHOLD_MAX; |
304 | if (new < 1) | 301 | if (new < 1) |
305 | new = 1; | 302 | new = 1; |
306 | old = b->threshold_limit; | 303 | tr.old_limit = b->threshold_limit; |
307 | b->threshold_limit = new; | 304 | b->threshold_limit = new; |
305 | tr.b = b; | ||
306 | tr.reset = 0; | ||
308 | 307 | ||
309 | affinity_set(b->cpu, &oldmask, &newmask); | 308 | work_on_cpu(b->cpu, threshold_restart_bank, &tr); |
310 | threshold_restart_bank(b, 0, old); | ||
311 | affinity_restore(&oldmask); | ||
312 | 309 | ||
313 | return end - buf; | 310 | return end - buf; |
314 | } | 311 | } |
315 | 312 | ||
316 | static ssize_t show_error_count(struct threshold_block *b, char *buf) | 313 | static long local_error_count(void *_b) |
317 | { | 314 | { |
318 | u32 high, low; | 315 | struct threshold_block *b = _b; |
319 | cpumask_t oldmask, newmask; | 316 | u32 low, high; |
320 | affinity_set(b->cpu, &oldmask, &newmask); | 317 | |
321 | rdmsr(b->address, low, high); | 318 | rdmsr(b->address, low, high); |
322 | affinity_restore(&oldmask); | 319 | return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit); |
323 | return sprintf(buf, "%x\n", | 320 | } |
324 | (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); | 321 | |
322 | static ssize_t show_error_count(struct threshold_block *b, char *buf) | ||
323 | { | ||
324 | return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b)); | ||
325 | } | 325 | } |
326 | 326 | ||
327 | static ssize_t store_error_count(struct threshold_block *b, | 327 | static ssize_t store_error_count(struct threshold_block *b, |
328 | const char *buf, size_t count) | 328 | const char *buf, size_t count) |
329 | { | 329 | { |
330 | cpumask_t oldmask, newmask; | 330 | struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 }; |
331 | affinity_set(b->cpu, &oldmask, &newmask); | 331 | |
332 | threshold_restart_bank(b, 1, 0); | 332 | work_on_cpu(b->cpu, threshold_restart_bank, &tr); |
333 | affinity_restore(&oldmask); | ||
334 | return 1; | 333 | return 1; |
335 | } | 334 | } |
336 | 335 | ||
@@ -463,19 +462,26 @@ out_free: | |||
463 | return err; | 462 | return err; |
464 | } | 463 | } |
465 | 464 | ||
465 | static __cpuinit long local_allocate_threshold_blocks(void *_bank) | ||
466 | { | ||
467 | unsigned int *bank = _bank; | ||
468 | |||
469 | return allocate_threshold_blocks(smp_processor_id(), *bank, 0, | ||
470 | MSR_IA32_MC0_MISC + *bank * 4); | ||
471 | } | ||
472 | |||
466 | /* symlinks sibling shared banks to first core. first core owns dir/files. */ | 473 | /* symlinks sibling shared banks to first core. first core owns dir/files. */ |
467 | static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | 474 | static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) |
468 | { | 475 | { |
469 | int i, err = 0; | 476 | int i, err = 0; |
470 | struct threshold_bank *b = NULL; | 477 | struct threshold_bank *b = NULL; |
471 | cpumask_t oldmask, newmask; | ||
472 | char name[32]; | 478 | char name[32]; |
473 | 479 | ||
474 | sprintf(name, "threshold_bank%i", bank); | 480 | sprintf(name, "threshold_bank%i", bank); |
475 | 481 | ||
476 | #ifdef CONFIG_SMP | 482 | #ifdef CONFIG_SMP |
477 | if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ | 483 | if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ |
478 | i = first_cpu(per_cpu(cpu_core_map, cpu)); | 484 | i = cpumask_first(&per_cpu(cpu_core_map, cpu)); |
479 | 485 | ||
480 | /* first core not up yet */ | 486 | /* first core not up yet */ |
481 | if (cpu_data(i).cpu_core_id) | 487 | if (cpu_data(i).cpu_core_id) |
@@ -495,7 +501,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
495 | if (err) | 501 | if (err) |
496 | goto out; | 502 | goto out; |
497 | 503 | ||
498 | b->cpus = per_cpu(cpu_core_map, cpu); | 504 | cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu)); |
499 | per_cpu(threshold_banks, cpu)[bank] = b; | 505 | per_cpu(threshold_banks, cpu)[bank] = b; |
500 | goto out; | 506 | goto out; |
501 | } | 507 | } |
@@ -506,28 +512,29 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
506 | err = -ENOMEM; | 512 | err = -ENOMEM; |
507 | goto out; | 513 | goto out; |
508 | } | 514 | } |
515 | if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) { | ||
516 | kfree(b); | ||
517 | err = -ENOMEM; | ||
518 | goto out; | ||
519 | } | ||
509 | 520 | ||
510 | b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); | 521 | b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); |
511 | if (!b->kobj) | 522 | if (!b->kobj) |
512 | goto out_free; | 523 | goto out_free; |
513 | 524 | ||
514 | #ifndef CONFIG_SMP | 525 | #ifndef CONFIG_SMP |
515 | b->cpus = CPU_MASK_ALL; | 526 | cpumask_setall(b->cpus); |
516 | #else | 527 | #else |
517 | b->cpus = per_cpu(cpu_core_map, cpu); | 528 | cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu)); |
518 | #endif | 529 | #endif |
519 | 530 | ||
520 | per_cpu(threshold_banks, cpu)[bank] = b; | 531 | per_cpu(threshold_banks, cpu)[bank] = b; |
521 | 532 | ||
522 | affinity_set(cpu, &oldmask, &newmask); | 533 | err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank); |
523 | err = allocate_threshold_blocks(cpu, bank, 0, | ||
524 | MSR_IA32_MC0_MISC + bank * 4); | ||
525 | affinity_restore(&oldmask); | ||
526 | |||
527 | if (err) | 534 | if (err) |
528 | goto out_free; | 535 | goto out_free; |
529 | 536 | ||
530 | for_each_cpu_mask_nr(i, b->cpus) { | 537 | for_each_cpu(i, b->cpus) { |
531 | if (i == cpu) | 538 | if (i == cpu) |
532 | continue; | 539 | continue; |
533 | 540 | ||
@@ -543,6 +550,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
543 | 550 | ||
544 | out_free: | 551 | out_free: |
545 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 552 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
553 | free_cpumask_var(b->cpus); | ||
546 | kfree(b); | 554 | kfree(b); |
547 | out: | 555 | out: |
548 | return err; | 556 | return err; |
@@ -617,7 +625,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
617 | #endif | 625 | #endif |
618 | 626 | ||
619 | /* remove all sibling symlinks before unregistering */ | 627 | /* remove all sibling symlinks before unregistering */ |
620 | for_each_cpu_mask_nr(i, b->cpus) { | 628 | for_each_cpu(i, b->cpus) { |
621 | if (i == cpu) | 629 | if (i == cpu) |
622 | continue; | 630 | continue; |
623 | 631 | ||
@@ -630,6 +638,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
630 | free_out: | 638 | free_out: |
631 | kobject_del(b->kobj); | 639 | kobject_del(b->kobj); |
632 | kobject_put(b->kobj); | 640 | kobject_put(b->kobj); |
641 | free_cpumask_var(b->cpus); | ||
633 | kfree(b); | 642 | kfree(b); |
634 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 643 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
635 | } | 644 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index c17eaf5dd6dd..5e8c79e748a6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/interrupt.h> | 7 | #include <linux/interrupt.h> |
8 | #include <linux/percpu.h> | 8 | #include <linux/percpu.h> |
9 | #include <asm/processor.h> | 9 | #include <asm/processor.h> |
10 | #include <asm/apic.h> | ||
10 | #include <asm/msr.h> | 11 | #include <asm/msr.h> |
11 | #include <asm/mce.h> | 12 | #include <asm/mce.h> |
12 | #include <asm/hw_irq.h> | 13 | #include <asm/hw_irq.h> |
@@ -26,7 +27,7 @@ asmlinkage void smp_thermal_interrupt(void) | |||
26 | if (therm_throt_process(msr_val & 1)) | 27 | if (therm_throt_process(msr_val & 1)) |
27 | mce_log_therm_throt_event(smp_processor_id(), msr_val); | 28 | mce_log_therm_throt_event(smp_processor_id(), msr_val); |
28 | 29 | ||
29 | add_pda(irq_thermal_count, 1); | 30 | inc_irq_stat(irq_thermal_count); |
30 | irq_exit(); | 31 | irq_exit(); |
31 | } | 32 | } |
32 | 33 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index bfa5817afdda..c9f77ea69edc 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * P5 specific Machine Check Exception Reporting | 2 | * P5 specific Machine Check Exception Reporting |
3 | * (C) Copyright 2002 Alan Cox <alan@redhat.com> | 3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c index 62efc9c2b3af..2ac52d7b434b 100644 --- a/arch/x86/kernel/cpu/mcheck/p6.c +++ b/arch/x86/kernel/cpu/mcheck/p6.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * P6 specific Machine Check Exception Reporting | 2 | * P6 specific Machine Check Exception Reporting |
3 | * (C) Copyright 2002 Alan Cox <alan@redhat.com> | 3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index f2be3e190c6b..2a043d89811d 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * IDT Winchip specific Machine Check Exception Reporting | 2 | * IDT Winchip specific Machine Check Exception Reporting |
3 | * (C) Copyright 2002 Alan Cox <alan@redhat.com> | 3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 4e8d77f01eeb..0c0a455fe95c 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -14,14 +14,6 @@ | |||
14 | #include <asm/pat.h> | 14 | #include <asm/pat.h> |
15 | #include "mtrr.h" | 15 | #include "mtrr.h" |
16 | 16 | ||
17 | struct mtrr_state { | ||
18 | struct mtrr_var_range var_ranges[MAX_VAR_RANGES]; | ||
19 | mtrr_type fixed_ranges[NUM_FIXED_RANGES]; | ||
20 | unsigned char enabled; | ||
21 | unsigned char have_fixed; | ||
22 | mtrr_type def_type; | ||
23 | }; | ||
24 | |||
25 | struct fixed_range_block { | 17 | struct fixed_range_block { |
26 | int base_msr; /* start address of an MTRR block */ | 18 | int base_msr; /* start address of an MTRR block */ |
27 | int ranges; /* number of MTRRs in this block */ | 19 | int ranges; /* number of MTRRs in this block */ |
@@ -35,15 +27,19 @@ static struct fixed_range_block fixed_range_blocks[] = { | |||
35 | }; | 27 | }; |
36 | 28 | ||
37 | static unsigned long smp_changes_mask; | 29 | static unsigned long smp_changes_mask; |
38 | static struct mtrr_state mtrr_state = {}; | ||
39 | static int mtrr_state_set; | 30 | static int mtrr_state_set; |
40 | u64 mtrr_tom2; | 31 | u64 mtrr_tom2; |
41 | 32 | ||
42 | #undef MODULE_PARAM_PREFIX | 33 | struct mtrr_state_type mtrr_state = {}; |
43 | #define MODULE_PARAM_PREFIX "mtrr." | 34 | EXPORT_SYMBOL_GPL(mtrr_state); |
44 | 35 | ||
45 | static int mtrr_show; | 36 | static int __initdata mtrr_show; |
46 | module_param_named(show, mtrr_show, bool, 0); | 37 | static int __init mtrr_debug(char *opt) |
38 | { | ||
39 | mtrr_show = 1; | ||
40 | return 0; | ||
41 | } | ||
42 | early_param("mtrr.show", mtrr_debug); | ||
47 | 43 | ||
48 | /* | 44 | /* |
49 | * Returns the effective MTRR type for the region | 45 | * Returns the effective MTRR type for the region |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index c78c04821ea1..236a401b8259 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -49,7 +49,7 @@ | |||
49 | 49 | ||
50 | u32 num_var_ranges = 0; | 50 | u32 num_var_ranges = 0; |
51 | 51 | ||
52 | unsigned int mtrr_usage_table[MAX_VAR_RANGES]; | 52 | unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; |
53 | static DEFINE_MUTEX(mtrr_mutex); | 53 | static DEFINE_MUTEX(mtrr_mutex); |
54 | 54 | ||
55 | u64 size_or_mask, size_and_mask; | 55 | u64 size_or_mask, size_and_mask; |
@@ -574,7 +574,7 @@ struct mtrr_value { | |||
574 | unsigned long lsize; | 574 | unsigned long lsize; |
575 | }; | 575 | }; |
576 | 576 | ||
577 | static struct mtrr_value mtrr_state[MAX_VAR_RANGES]; | 577 | static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES]; |
578 | 578 | ||
579 | static int mtrr_save(struct sys_device * sysdev, pm_message_t state) | 579 | static int mtrr_save(struct sys_device * sysdev, pm_message_t state) |
580 | { | 580 | { |
@@ -803,6 +803,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
803 | } | 803 | } |
804 | 804 | ||
805 | static struct res_range __initdata range[RANGE_NUM]; | 805 | static struct res_range __initdata range[RANGE_NUM]; |
806 | static int __initdata nr_range; | ||
806 | 807 | ||
807 | #ifdef CONFIG_MTRR_SANITIZER | 808 | #ifdef CONFIG_MTRR_SANITIZER |
808 | 809 | ||
@@ -823,16 +824,14 @@ static int enable_mtrr_cleanup __initdata = | |||
823 | 824 | ||
824 | static int __init disable_mtrr_cleanup_setup(char *str) | 825 | static int __init disable_mtrr_cleanup_setup(char *str) |
825 | { | 826 | { |
826 | if (enable_mtrr_cleanup != -1) | 827 | enable_mtrr_cleanup = 0; |
827 | enable_mtrr_cleanup = 0; | ||
828 | return 0; | 828 | return 0; |
829 | } | 829 | } |
830 | early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); | 830 | early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); |
831 | 831 | ||
832 | static int __init enable_mtrr_cleanup_setup(char *str) | 832 | static int __init enable_mtrr_cleanup_setup(char *str) |
833 | { | 833 | { |
834 | if (enable_mtrr_cleanup != -1) | 834 | enable_mtrr_cleanup = 1; |
835 | enable_mtrr_cleanup = 1; | ||
836 | return 0; | 835 | return 0; |
837 | } | 836 | } |
838 | early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); | 837 | early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); |
@@ -1206,39 +1205,43 @@ struct mtrr_cleanup_result { | |||
1206 | #define PSHIFT (PAGE_SHIFT - 10) | 1205 | #define PSHIFT (PAGE_SHIFT - 10) |
1207 | 1206 | ||
1208 | static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; | 1207 | static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; |
1209 | static struct res_range __initdata range_new[RANGE_NUM]; | ||
1210 | static unsigned long __initdata min_loss_pfn[RANGE_NUM]; | 1208 | static unsigned long __initdata min_loss_pfn[RANGE_NUM]; |
1211 | 1209 | ||
1212 | static int __init mtrr_cleanup(unsigned address_bits) | 1210 | static void __init print_out_mtrr_range_state(void) |
1213 | { | 1211 | { |
1214 | unsigned long extra_remove_base, extra_remove_size; | ||
1215 | unsigned long base, size, def, dummy; | ||
1216 | mtrr_type type; | ||
1217 | int nr_range, nr_range_new; | ||
1218 | u64 chunk_size, gran_size; | ||
1219 | unsigned long range_sums, range_sums_new; | ||
1220 | int index_good; | ||
1221 | int num_reg_good; | ||
1222 | int i; | 1212 | int i; |
1213 | char start_factor = 'K', size_factor = 'K'; | ||
1214 | unsigned long start_base, size_base; | ||
1215 | mtrr_type type; | ||
1223 | 1216 | ||
1224 | /* extra one for all 0 */ | 1217 | for (i = 0; i < num_var_ranges; i++) { |
1225 | int num[MTRR_NUM_TYPES + 1]; | ||
1226 | 1218 | ||
1227 | if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) | 1219 | size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); |
1228 | return 0; | 1220 | if (!size_base) |
1229 | rdmsr(MTRRdefType_MSR, def, dummy); | 1221 | continue; |
1230 | def &= 0xff; | ||
1231 | if (def != MTRR_TYPE_UNCACHABLE) | ||
1232 | return 0; | ||
1233 | 1222 | ||
1234 | /* get it and store it aside */ | 1223 | size_base = to_size_factor(size_base, &size_factor), |
1235 | memset(range_state, 0, sizeof(range_state)); | 1224 | start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); |
1236 | for (i = 0; i < num_var_ranges; i++) { | 1225 | start_base = to_size_factor(start_base, &start_factor), |
1237 | mtrr_if->get(i, &base, &size, &type); | 1226 | type = range_state[i].type; |
1238 | range_state[i].base_pfn = base; | 1227 | |
1239 | range_state[i].size_pfn = size; | 1228 | printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", |
1240 | range_state[i].type = type; | 1229 | i, start_base, start_factor, |
1230 | size_base, size_factor, | ||
1231 | (type == MTRR_TYPE_UNCACHABLE) ? "UC" : | ||
1232 | ((type == MTRR_TYPE_WRPROT) ? "WP" : | ||
1233 | ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) | ||
1234 | ); | ||
1241 | } | 1235 | } |
1236 | } | ||
1237 | |||
1238 | static int __init mtrr_need_cleanup(void) | ||
1239 | { | ||
1240 | int i; | ||
1241 | mtrr_type type; | ||
1242 | unsigned long size; | ||
1243 | /* extra one for all 0 */ | ||
1244 | int num[MTRR_NUM_TYPES + 1]; | ||
1242 | 1245 | ||
1243 | /* check entries number */ | 1246 | /* check entries number */ |
1244 | memset(num, 0, sizeof(num)); | 1247 | memset(num, 0, sizeof(num)); |
@@ -1263,29 +1266,133 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
1263 | num_var_ranges - num[MTRR_NUM_TYPES]) | 1266 | num_var_ranges - num[MTRR_NUM_TYPES]) |
1264 | return 0; | 1267 | return 0; |
1265 | 1268 | ||
1266 | /* print original var MTRRs at first, for debugging: */ | 1269 | return 1; |
1267 | printk(KERN_DEBUG "original variable MTRRs\n"); | 1270 | } |
1268 | for (i = 0; i < num_var_ranges; i++) { | ||
1269 | char start_factor = 'K', size_factor = 'K'; | ||
1270 | unsigned long start_base, size_base; | ||
1271 | 1271 | ||
1272 | size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); | 1272 | static unsigned long __initdata range_sums; |
1273 | if (!size_base) | 1273 | static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size, |
1274 | continue; | 1274 | unsigned long extra_remove_base, |
1275 | unsigned long extra_remove_size, | ||
1276 | int i) | ||
1277 | { | ||
1278 | int num_reg; | ||
1279 | static struct res_range range_new[RANGE_NUM]; | ||
1280 | static int nr_range_new; | ||
1281 | unsigned long range_sums_new; | ||
1282 | |||
1283 | /* convert ranges to var ranges state */ | ||
1284 | num_reg = x86_setup_var_mtrrs(range, nr_range, | ||
1285 | chunk_size, gran_size); | ||
1286 | |||
1287 | /* we got new setting in range_state, check it */ | ||
1288 | memset(range_new, 0, sizeof(range_new)); | ||
1289 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, | ||
1290 | extra_remove_base, extra_remove_size); | ||
1291 | range_sums_new = sum_ranges(range_new, nr_range_new); | ||
1292 | |||
1293 | result[i].chunk_sizek = chunk_size >> 10; | ||
1294 | result[i].gran_sizek = gran_size >> 10; | ||
1295 | result[i].num_reg = num_reg; | ||
1296 | if (range_sums < range_sums_new) { | ||
1297 | result[i].lose_cover_sizek = | ||
1298 | (range_sums_new - range_sums) << PSHIFT; | ||
1299 | result[i].bad = 1; | ||
1300 | } else | ||
1301 | result[i].lose_cover_sizek = | ||
1302 | (range_sums - range_sums_new) << PSHIFT; | ||
1275 | 1303 | ||
1276 | size_base = to_size_factor(size_base, &size_factor), | 1304 | /* double check it */ |
1277 | start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); | 1305 | if (!result[i].bad && !result[i].lose_cover_sizek) { |
1278 | start_base = to_size_factor(start_base, &start_factor), | 1306 | if (nr_range_new != nr_range || |
1279 | type = range_state[i].type; | 1307 | memcmp(range, range_new, sizeof(range))) |
1308 | result[i].bad = 1; | ||
1309 | } | ||
1280 | 1310 | ||
1281 | printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", | 1311 | if (!result[i].bad && (range_sums - range_sums_new < |
1282 | i, start_base, start_factor, | 1312 | min_loss_pfn[num_reg])) { |
1283 | size_base, size_factor, | 1313 | min_loss_pfn[num_reg] = |
1284 | (type == MTRR_TYPE_UNCACHABLE) ? "UC" : | 1314 | range_sums - range_sums_new; |
1285 | ((type == MTRR_TYPE_WRPROT) ? "WP" : | ||
1286 | ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) | ||
1287 | ); | ||
1288 | } | 1315 | } |
1316 | } | ||
1317 | |||
1318 | static void __init mtrr_print_out_one_result(int i) | ||
1319 | { | ||
1320 | char gran_factor, chunk_factor, lose_factor; | ||
1321 | unsigned long gran_base, chunk_base, lose_base; | ||
1322 | |||
1323 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | ||
1324 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | ||
1325 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | ||
1326 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", | ||
1327 | result[i].bad ? "*BAD*" : " ", | ||
1328 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
1329 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | ||
1330 | result[i].num_reg, result[i].bad ? "-" : "", | ||
1331 | lose_base, lose_factor); | ||
1332 | } | ||
1333 | |||
1334 | static int __init mtrr_search_optimal_index(void) | ||
1335 | { | ||
1336 | int i; | ||
1337 | int num_reg_good; | ||
1338 | int index_good; | ||
1339 | |||
1340 | if (nr_mtrr_spare_reg >= num_var_ranges) | ||
1341 | nr_mtrr_spare_reg = num_var_ranges - 1; | ||
1342 | num_reg_good = -1; | ||
1343 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { | ||
1344 | if (!min_loss_pfn[i]) | ||
1345 | num_reg_good = i; | ||
1346 | } | ||
1347 | |||
1348 | index_good = -1; | ||
1349 | if (num_reg_good != -1) { | ||
1350 | for (i = 0; i < NUM_RESULT; i++) { | ||
1351 | if (!result[i].bad && | ||
1352 | result[i].num_reg == num_reg_good && | ||
1353 | !result[i].lose_cover_sizek) { | ||
1354 | index_good = i; | ||
1355 | break; | ||
1356 | } | ||
1357 | } | ||
1358 | } | ||
1359 | |||
1360 | return index_good; | ||
1361 | } | ||
1362 | |||
1363 | |||
1364 | static int __init mtrr_cleanup(unsigned address_bits) | ||
1365 | { | ||
1366 | unsigned long extra_remove_base, extra_remove_size; | ||
1367 | unsigned long base, size, def, dummy; | ||
1368 | mtrr_type type; | ||
1369 | u64 chunk_size, gran_size; | ||
1370 | int index_good; | ||
1371 | int i; | ||
1372 | |||
1373 | if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) | ||
1374 | return 0; | ||
1375 | rdmsr(MTRRdefType_MSR, def, dummy); | ||
1376 | def &= 0xff; | ||
1377 | if (def != MTRR_TYPE_UNCACHABLE) | ||
1378 | return 0; | ||
1379 | |||
1380 | /* get it and store it aside */ | ||
1381 | memset(range_state, 0, sizeof(range_state)); | ||
1382 | for (i = 0; i < num_var_ranges; i++) { | ||
1383 | mtrr_if->get(i, &base, &size, &type); | ||
1384 | range_state[i].base_pfn = base; | ||
1385 | range_state[i].size_pfn = size; | ||
1386 | range_state[i].type = type; | ||
1387 | } | ||
1388 | |||
1389 | /* check if we need handle it and can handle it */ | ||
1390 | if (!mtrr_need_cleanup()) | ||
1391 | return 0; | ||
1392 | |||
1393 | /* print original var MTRRs at first, for debugging: */ | ||
1394 | printk(KERN_DEBUG "original variable MTRRs\n"); | ||
1395 | print_out_mtrr_range_state(); | ||
1289 | 1396 | ||
1290 | memset(range, 0, sizeof(range)); | 1397 | memset(range, 0, sizeof(range)); |
1291 | extra_remove_size = 0; | 1398 | extra_remove_size = 0; |
@@ -1309,176 +1416,64 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
1309 | range_sums >> (20 - PAGE_SHIFT)); | 1416 | range_sums >> (20 - PAGE_SHIFT)); |
1310 | 1417 | ||
1311 | if (mtrr_chunk_size && mtrr_gran_size) { | 1418 | if (mtrr_chunk_size && mtrr_gran_size) { |
1312 | int num_reg; | 1419 | i = 0; |
1313 | char gran_factor, chunk_factor, lose_factor; | 1420 | mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size, |
1314 | unsigned long gran_base, chunk_base, lose_base; | 1421 | extra_remove_base, extra_remove_size, i); |
1315 | |||
1316 | debug_print++; | ||
1317 | /* convert ranges to var ranges state */ | ||
1318 | num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, | ||
1319 | mtrr_gran_size); | ||
1320 | 1422 | ||
1321 | /* we got new setting in range_state, check it */ | 1423 | mtrr_print_out_one_result(i); |
1322 | memset(range_new, 0, sizeof(range_new)); | ||
1323 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, | ||
1324 | extra_remove_base, | ||
1325 | extra_remove_size); | ||
1326 | range_sums_new = sum_ranges(range_new, nr_range_new); | ||
1327 | 1424 | ||
1328 | i = 0; | ||
1329 | result[i].chunk_sizek = mtrr_chunk_size >> 10; | ||
1330 | result[i].gran_sizek = mtrr_gran_size >> 10; | ||
1331 | result[i].num_reg = num_reg; | ||
1332 | if (range_sums < range_sums_new) { | ||
1333 | result[i].lose_cover_sizek = | ||
1334 | (range_sums_new - range_sums) << PSHIFT; | ||
1335 | result[i].bad = 1; | ||
1336 | } else | ||
1337 | result[i].lose_cover_sizek = | ||
1338 | (range_sums - range_sums_new) << PSHIFT; | ||
1339 | |||
1340 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | ||
1341 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | ||
1342 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | ||
1343 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", | ||
1344 | result[i].bad?"*BAD*":" ", | ||
1345 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
1346 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | ||
1347 | result[i].num_reg, result[i].bad?"-":"", | ||
1348 | lose_base, lose_factor); | ||
1349 | if (!result[i].bad) { | 1425 | if (!result[i].bad) { |
1350 | set_var_mtrr_all(address_bits); | 1426 | set_var_mtrr_all(address_bits); |
1351 | return 1; | 1427 | return 1; |
1352 | } | 1428 | } |
1353 | printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " | 1429 | printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " |
1354 | "will find optimal one\n"); | 1430 | "will find optimal one\n"); |
1355 | debug_print--; | ||
1356 | memset(result, 0, sizeof(result[0])); | ||
1357 | } | 1431 | } |
1358 | 1432 | ||
1359 | i = 0; | 1433 | i = 0; |
1360 | memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); | 1434 | memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); |
1361 | memset(result, 0, sizeof(result)); | 1435 | memset(result, 0, sizeof(result)); |
1362 | for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { | 1436 | for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { |
1363 | char gran_factor; | ||
1364 | unsigned long gran_base; | ||
1365 | |||
1366 | if (debug_print) | ||
1367 | gran_base = to_size_factor(gran_size >> 10, &gran_factor); | ||
1368 | 1437 | ||
1369 | for (chunk_size = gran_size; chunk_size < (1ULL<<32); | 1438 | for (chunk_size = gran_size; chunk_size < (1ULL<<32); |
1370 | chunk_size <<= 1) { | 1439 | chunk_size <<= 1) { |
1371 | int num_reg; | ||
1372 | 1440 | ||
1373 | if (debug_print) { | ||
1374 | char chunk_factor; | ||
1375 | unsigned long chunk_base; | ||
1376 | |||
1377 | chunk_base = to_size_factor(chunk_size>>10, &chunk_factor), | ||
1378 | printk(KERN_INFO "\n"); | ||
1379 | printk(KERN_INFO "gran_size: %ld%c chunk_size: %ld%c \n", | ||
1380 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
1381 | } | ||
1382 | if (i >= NUM_RESULT) | 1441 | if (i >= NUM_RESULT) |
1383 | continue; | 1442 | continue; |
1384 | 1443 | ||
1385 | /* convert ranges to var ranges state */ | 1444 | mtrr_calc_range_state(chunk_size, gran_size, |
1386 | num_reg = x86_setup_var_mtrrs(range, nr_range, | 1445 | extra_remove_base, extra_remove_size, i); |
1387 | chunk_size, gran_size); | 1446 | if (debug_print) { |
1388 | 1447 | mtrr_print_out_one_result(i); | |
1389 | /* we got new setting in range_state, check it */ | 1448 | printk(KERN_INFO "\n"); |
1390 | memset(range_new, 0, sizeof(range_new)); | ||
1391 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, | ||
1392 | extra_remove_base, extra_remove_size); | ||
1393 | range_sums_new = sum_ranges(range_new, nr_range_new); | ||
1394 | |||
1395 | result[i].chunk_sizek = chunk_size >> 10; | ||
1396 | result[i].gran_sizek = gran_size >> 10; | ||
1397 | result[i].num_reg = num_reg; | ||
1398 | if (range_sums < range_sums_new) { | ||
1399 | result[i].lose_cover_sizek = | ||
1400 | (range_sums_new - range_sums) << PSHIFT; | ||
1401 | result[i].bad = 1; | ||
1402 | } else | ||
1403 | result[i].lose_cover_sizek = | ||
1404 | (range_sums - range_sums_new) << PSHIFT; | ||
1405 | |||
1406 | /* double check it */ | ||
1407 | if (!result[i].bad && !result[i].lose_cover_sizek) { | ||
1408 | if (nr_range_new != nr_range || | ||
1409 | memcmp(range, range_new, sizeof(range))) | ||
1410 | result[i].bad = 1; | ||
1411 | } | 1449 | } |
1412 | 1450 | ||
1413 | if (!result[i].bad && (range_sums - range_sums_new < | ||
1414 | min_loss_pfn[num_reg])) { | ||
1415 | min_loss_pfn[num_reg] = | ||
1416 | range_sums - range_sums_new; | ||
1417 | } | ||
1418 | i++; | 1451 | i++; |
1419 | } | 1452 | } |
1420 | } | 1453 | } |
1421 | 1454 | ||
1422 | /* print out all */ | ||
1423 | for (i = 0; i < NUM_RESULT; i++) { | ||
1424 | char gran_factor, chunk_factor, lose_factor; | ||
1425 | unsigned long gran_base, chunk_base, lose_base; | ||
1426 | |||
1427 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | ||
1428 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | ||
1429 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | ||
1430 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", | ||
1431 | result[i].bad?"*BAD*":" ", | ||
1432 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
1433 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | ||
1434 | result[i].num_reg, result[i].bad?"-":"", | ||
1435 | lose_base, lose_factor); | ||
1436 | } | ||
1437 | |||
1438 | /* try to find the optimal index */ | 1455 | /* try to find the optimal index */ |
1439 | if (nr_mtrr_spare_reg >= num_var_ranges) | 1456 | index_good = mtrr_search_optimal_index(); |
1440 | nr_mtrr_spare_reg = num_var_ranges - 1; | ||
1441 | num_reg_good = -1; | ||
1442 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { | ||
1443 | if (!min_loss_pfn[i]) | ||
1444 | num_reg_good = i; | ||
1445 | } | ||
1446 | |||
1447 | index_good = -1; | ||
1448 | if (num_reg_good != -1) { | ||
1449 | for (i = 0; i < NUM_RESULT; i++) { | ||
1450 | if (!result[i].bad && | ||
1451 | result[i].num_reg == num_reg_good && | ||
1452 | !result[i].lose_cover_sizek) { | ||
1453 | index_good = i; | ||
1454 | break; | ||
1455 | } | ||
1456 | } | ||
1457 | } | ||
1458 | 1457 | ||
1459 | if (index_good != -1) { | 1458 | if (index_good != -1) { |
1460 | char gran_factor, chunk_factor, lose_factor; | ||
1461 | unsigned long gran_base, chunk_base, lose_base; | ||
1462 | |||
1463 | printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); | 1459 | printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); |
1464 | i = index_good; | 1460 | i = index_good; |
1465 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | 1461 | mtrr_print_out_one_result(i); |
1466 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | 1462 | |
1467 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | ||
1468 | printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t", | ||
1469 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
1470 | printk(KERN_CONT "num_reg: %d \tlose RAM: %ld%c\n", | ||
1471 | result[i].num_reg, lose_base, lose_factor); | ||
1472 | /* convert ranges to var ranges state */ | 1463 | /* convert ranges to var ranges state */ |
1473 | chunk_size = result[i].chunk_sizek; | 1464 | chunk_size = result[i].chunk_sizek; |
1474 | chunk_size <<= 10; | 1465 | chunk_size <<= 10; |
1475 | gran_size = result[i].gran_sizek; | 1466 | gran_size = result[i].gran_sizek; |
1476 | gran_size <<= 10; | 1467 | gran_size <<= 10; |
1477 | debug_print++; | ||
1478 | x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); | 1468 | x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); |
1479 | debug_print--; | ||
1480 | set_var_mtrr_all(address_bits); | 1469 | set_var_mtrr_all(address_bits); |
1470 | printk(KERN_DEBUG "New variable MTRRs\n"); | ||
1471 | print_out_mtrr_range_state(); | ||
1481 | return 1; | 1472 | return 1; |
1473 | } else { | ||
1474 | /* print out all */ | ||
1475 | for (i = 0; i < NUM_RESULT; i++) | ||
1476 | mtrr_print_out_one_result(i); | ||
1482 | } | 1477 | } |
1483 | 1478 | ||
1484 | printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); | 1479 | printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); |
@@ -1562,7 +1557,6 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1562 | { | 1557 | { |
1563 | unsigned long i, base, size, highest_pfn = 0, def, dummy; | 1558 | unsigned long i, base, size, highest_pfn = 0, def, dummy; |
1564 | mtrr_type type; | 1559 | mtrr_type type; |
1565 | int nr_range; | ||
1566 | u64 total_trim_size; | 1560 | u64 total_trim_size; |
1567 | 1561 | ||
1568 | /* extra one for all 0 */ | 1562 | /* extra one for all 0 */ |
@@ -1600,8 +1594,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1600 | 1594 | ||
1601 | /* kvm/qemu doesn't have mtrr set right, don't trim them all */ | 1595 | /* kvm/qemu doesn't have mtrr set right, don't trim them all */ |
1602 | if (!highest_pfn) { | 1596 | if (!highest_pfn) { |
1603 | WARN(!kvm_para_available(), KERN_WARNING | 1597 | printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n"); |
1604 | "WARNING: strange, CPU MTRRs all blank?\n"); | ||
1605 | return 0; | 1598 | return 0; |
1606 | } | 1599 | } |
1607 | 1600 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 2dc4ec656b23..ffd60409cc6d 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h | |||
@@ -8,11 +8,6 @@ | |||
8 | #define MTRRcap_MSR 0x0fe | 8 | #define MTRRcap_MSR 0x0fe |
9 | #define MTRRdefType_MSR 0x2ff | 9 | #define MTRRdefType_MSR 0x2ff |
10 | 10 | ||
11 | #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) | ||
12 | #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) | ||
13 | |||
14 | #define NUM_FIXED_RANGES 88 | ||
15 | #define MAX_VAR_RANGES 256 | ||
16 | #define MTRRfix64K_00000_MSR 0x250 | 11 | #define MTRRfix64K_00000_MSR 0x250 |
17 | #define MTRRfix16K_80000_MSR 0x258 | 12 | #define MTRRfix16K_80000_MSR 0x258 |
18 | #define MTRRfix16K_A0000_MSR 0x259 | 13 | #define MTRRfix16K_A0000_MSR 0x259 |
@@ -29,11 +24,7 @@ | |||
29 | #define MTRR_CHANGE_MASK_VARIABLE 0x02 | 24 | #define MTRR_CHANGE_MASK_VARIABLE 0x02 |
30 | #define MTRR_CHANGE_MASK_DEFTYPE 0x04 | 25 | #define MTRR_CHANGE_MASK_DEFTYPE 0x04 |
31 | 26 | ||
32 | /* In the Intel processor's MTRR interface, the MTRR type is always held in | 27 | extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; |
33 | an 8 bit field: */ | ||
34 | typedef u8 mtrr_type; | ||
35 | |||
36 | extern unsigned int mtrr_usage_table[MAX_VAR_RANGES]; | ||
37 | 28 | ||
38 | struct mtrr_ops { | 29 | struct mtrr_ops { |
39 | u32 vendor; | 30 | u32 vendor; |
@@ -70,13 +61,6 @@ struct set_mtrr_context { | |||
70 | u32 ccr3; | 61 | u32 ccr3; |
71 | }; | 62 | }; |
72 | 63 | ||
73 | struct mtrr_var_range { | ||
74 | u32 base_lo; | ||
75 | u32 base_hi; | ||
76 | u32 mask_lo; | ||
77 | u32 mask_hi; | ||
78 | }; | ||
79 | |||
80 | void set_mtrr_done(struct set_mtrr_context *ctxt); | 64 | void set_mtrr_done(struct set_mtrr_context *ctxt); |
81 | void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); | 65 | void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); |
82 | void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); | 66 | void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); |
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c new file mode 100644 index 000000000000..284c399e3234 --- /dev/null +++ b/arch/x86/kernel/cpu/vmware.c | |||
@@ -0,0 +1,112 @@ | |||
1 | /* | ||
2 | * VMware Detection code. | ||
3 | * | ||
4 | * Copyright (C) 2008, VMware, Inc. | ||
5 | * Author : Alok N Kataria <akataria@vmware.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, but | ||
13 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
15 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
16 | * details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/dmi.h> | ||
25 | #include <asm/div64.h> | ||
26 | #include <asm/vmware.h> | ||
27 | |||
28 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 | ||
29 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 | ||
30 | #define VMWARE_HYPERVISOR_PORT 0x5658 | ||
31 | |||
32 | #define VMWARE_PORT_CMD_GETVERSION 10 | ||
33 | #define VMWARE_PORT_CMD_GETHZ 45 | ||
34 | |||
35 | #define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ | ||
36 | __asm__("inl (%%dx)" : \ | ||
37 | "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \ | ||
38 | "0"(VMWARE_HYPERVISOR_MAGIC), \ | ||
39 | "1"(VMWARE_PORT_CMD_##cmd), \ | ||
40 | "2"(VMWARE_HYPERVISOR_PORT), "3"(UINT_MAX) : \ | ||
41 | "memory"); | ||
42 | |||
43 | static inline int __vmware_platform(void) | ||
44 | { | ||
45 | uint32_t eax, ebx, ecx, edx; | ||
46 | VMWARE_PORT(GETVERSION, eax, ebx, ecx, edx); | ||
47 | return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC; | ||
48 | } | ||
49 | |||
50 | static unsigned long __vmware_get_tsc_khz(void) | ||
51 | { | ||
52 | uint64_t tsc_hz; | ||
53 | uint32_t eax, ebx, ecx, edx; | ||
54 | |||
55 | VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); | ||
56 | |||
57 | if (ebx == UINT_MAX) | ||
58 | return 0; | ||
59 | tsc_hz = eax | (((uint64_t)ebx) << 32); | ||
60 | do_div(tsc_hz, 1000); | ||
61 | BUG_ON(tsc_hz >> 32); | ||
62 | return tsc_hz; | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * While checking the dmi string infomation, just checking the product | ||
67 | * serial key should be enough, as this will always have a VMware | ||
68 | * specific string when running under VMware hypervisor. | ||
69 | */ | ||
70 | int vmware_platform(void) | ||
71 | { | ||
72 | if (cpu_has_hypervisor) { | ||
73 | unsigned int eax, ebx, ecx, edx; | ||
74 | char hyper_vendor_id[13]; | ||
75 | |||
76 | cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &ebx, &ecx, &edx); | ||
77 | memcpy(hyper_vendor_id + 0, &ebx, 4); | ||
78 | memcpy(hyper_vendor_id + 4, &ecx, 4); | ||
79 | memcpy(hyper_vendor_id + 8, &edx, 4); | ||
80 | hyper_vendor_id[12] = '\0'; | ||
81 | if (!strcmp(hyper_vendor_id, "VMwareVMware")) | ||
82 | return 1; | ||
83 | } else if (dmi_available && dmi_name_in_serial("VMware") && | ||
84 | __vmware_platform()) | ||
85 | return 1; | ||
86 | |||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | unsigned long vmware_get_tsc_khz(void) | ||
91 | { | ||
92 | BUG_ON(!vmware_platform()); | ||
93 | return __vmware_get_tsc_khz(); | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. | ||
98 | * Still, due to timing difference when running on virtual cpus, the TSC can | ||
99 | * be marked as unstable in some cases. For example, the TSC sync check at | ||
100 | * bootup can fail due to a marginal offset between vcpus' TSCs (though the | ||
101 | * TSCs do not drift from each other). Also, the ACPI PM timer clocksource | ||
102 | * is not suitable as a watchdog when running on a hypervisor because the | ||
103 | * kernel may miss a wrap of the counter if the vcpu is descheduled for a | ||
104 | * long time. To skip these checks at runtime we set these capability bits, | ||
105 | * so that the kernel could just trust the hypervisor with providing a | ||
106 | * reliable virtual TSC that is suitable for timekeeping. | ||
107 | */ | ||
108 | void __cpuinit vmware_set_feature_bits(struct cpuinfo_x86 *c) | ||
109 | { | ||
110 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
111 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); | ||
112 | } | ||
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 72cefd1e649b..2ac1f0c2beb3 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -39,10 +39,10 @@ | |||
39 | #include <linux/device.h> | 39 | #include <linux/device.h> |
40 | #include <linux/cpu.h> | 40 | #include <linux/cpu.h> |
41 | #include <linux/notifier.h> | 41 | #include <linux/notifier.h> |
42 | #include <linux/uaccess.h> | ||
42 | 43 | ||
43 | #include <asm/processor.h> | 44 | #include <asm/processor.h> |
44 | #include <asm/msr.h> | 45 | #include <asm/msr.h> |
45 | #include <asm/uaccess.h> | ||
46 | #include <asm/system.h> | 46 | #include <asm/system.h> |
47 | 47 | ||
48 | static struct class *cpuid_class; | 48 | static struct class *cpuid_class; |
@@ -82,7 +82,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) | |||
82 | } | 82 | } |
83 | 83 | ||
84 | static ssize_t cpuid_read(struct file *file, char __user *buf, | 84 | static ssize_t cpuid_read(struct file *file, char __user *buf, |
85 | size_t count, loff_t * ppos) | 85 | size_t count, loff_t *ppos) |
86 | { | 86 | { |
87 | char __user *tmp = buf; | 87 | char __user *tmp = buf; |
88 | struct cpuid_regs cmd; | 88 | struct cpuid_regs cmd; |
@@ -117,11 +117,11 @@ static int cpuid_open(struct inode *inode, struct file *file) | |||
117 | unsigned int cpu; | 117 | unsigned int cpu; |
118 | struct cpuinfo_x86 *c; | 118 | struct cpuinfo_x86 *c; |
119 | int ret = 0; | 119 | int ret = 0; |
120 | 120 | ||
121 | lock_kernel(); | 121 | lock_kernel(); |
122 | 122 | ||
123 | cpu = iminor(file->f_path.dentry->d_inode); | 123 | cpu = iminor(file->f_path.dentry->d_inode); |
124 | if (cpu >= NR_CPUS || !cpu_online(cpu)) { | 124 | if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { |
125 | ret = -ENXIO; /* No such CPU */ | 125 | ret = -ENXIO; /* No such CPU */ |
126 | goto out; | 126 | goto out; |
127 | } | 127 | } |
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index d84a852e4cd7..ad7f2a696f4a 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
@@ -24,10 +24,11 @@ | |||
24 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
25 | #include <asm/hpet.h> | 25 | #include <asm/hpet.h> |
26 | #include <linux/kdebug.h> | 26 | #include <linux/kdebug.h> |
27 | #include <asm/smp.h> | 27 | #include <asm/cpu.h> |
28 | #include <asm/reboot.h> | 28 | #include <asm/reboot.h> |
29 | #include <asm/virtext.h> | ||
29 | 30 | ||
30 | #include <mach_ipi.h> | 31 | #include <asm/genapic.h> |
31 | 32 | ||
32 | 33 | ||
33 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) | 34 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) |
@@ -49,6 +50,15 @@ static void kdump_nmi_callback(int cpu, struct die_args *args) | |||
49 | #endif | 50 | #endif |
50 | crash_save_cpu(regs, cpu); | 51 | crash_save_cpu(regs, cpu); |
51 | 52 | ||
53 | /* Disable VMX or SVM if needed. | ||
54 | * | ||
55 | * We need to disable virtualization on all CPUs. | ||
56 | * Having VMX or SVM enabled on any CPU may break rebooting | ||
57 | * after the kdump kernel has finished its task. | ||
58 | */ | ||
59 | cpu_emergency_vmxoff(); | ||
60 | cpu_emergency_svm_disable(); | ||
61 | |||
52 | disable_local_APIC(); | 62 | disable_local_APIC(); |
53 | } | 63 | } |
54 | 64 | ||
@@ -80,6 +90,14 @@ void native_machine_crash_shutdown(struct pt_regs *regs) | |||
80 | local_irq_disable(); | 90 | local_irq_disable(); |
81 | 91 | ||
82 | kdump_nmi_shootdown_cpus(); | 92 | kdump_nmi_shootdown_cpus(); |
93 | |||
94 | /* Booting kdump kernel with VMX or SVM enabled won't work, | ||
95 | * because (among other limitations) we can't disable paging | ||
96 | * with the virt flags. | ||
97 | */ | ||
98 | cpu_emergency_vmxoff(); | ||
99 | cpu_emergency_svm_disable(); | ||
100 | |||
83 | lapic_shutdown(); | 101 | lapic_shutdown(); |
84 | #if defined(CONFIG_X86_IO_APIC) | 102 | #if defined(CONFIG_X86_IO_APIC) |
85 | disable_IO_APIC(); | 103 | disable_IO_APIC(); |
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 2b69994fd3a8..169a120587be 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c | |||
@@ -6,23 +6,20 @@ | |||
6 | * precise-event based sampling (PEBS). | 6 | * precise-event based sampling (PEBS). |
7 | * | 7 | * |
8 | * It manages: | 8 | * It manages: |
9 | * - per-thread and per-cpu allocation of BTS and PEBS | 9 | * - DS and BTS hardware configuration |
10 | * - buffer memory allocation (optional) | 10 | * - buffer overflow handling (to be done) |
11 | * - buffer overflow handling | ||
12 | * - buffer access | 11 | * - buffer access |
13 | * | 12 | * |
14 | * It assumes: | 13 | * It does not do: |
15 | * - get_task_struct on all parameter tasks | 14 | * - security checking (is the caller allowed to trace the task) |
16 | * - current is allowed to trace parameter tasks | 15 | * - buffer allocation (memory accounting) |
17 | * | 16 | * |
18 | * | 17 | * |
19 | * Copyright (C) 2007-2008 Intel Corporation. | 18 | * Copyright (C) 2007-2009 Intel Corporation. |
20 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008 | 19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 |
21 | */ | 20 | */ |
22 | 21 | ||
23 | 22 | ||
24 | #ifdef CONFIG_X86_DS | ||
25 | |||
26 | #include <asm/ds.h> | 23 | #include <asm/ds.h> |
27 | 24 | ||
28 | #include <linux/errno.h> | 25 | #include <linux/errno.h> |
@@ -30,22 +27,69 @@ | |||
30 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
31 | #include <linux/sched.h> | 28 | #include <linux/sched.h> |
32 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
30 | #include <linux/kernel.h> | ||
33 | 31 | ||
34 | 32 | ||
35 | /* | 33 | /* |
36 | * The configuration for a particular DS hardware implementation. | 34 | * The configuration for a particular DS hardware implementation. |
37 | */ | 35 | */ |
38 | struct ds_configuration { | 36 | struct ds_configuration { |
39 | /* the size of the DS structure in bytes */ | 37 | /* the name of the configuration */ |
40 | unsigned char sizeof_ds; | 38 | const char *name; |
41 | /* the size of one pointer-typed field in the DS structure in bytes; | 39 | /* the size of one pointer-typed field in the DS structure and |
42 | this covers the first 8 fields related to buffer management. */ | 40 | in the BTS and PEBS buffers in bytes; |
41 | this covers the first 8 DS fields related to buffer management. */ | ||
43 | unsigned char sizeof_field; | 42 | unsigned char sizeof_field; |
44 | /* the size of a BTS/PEBS record in bytes */ | 43 | /* the size of a BTS/PEBS record in bytes */ |
45 | unsigned char sizeof_rec[2]; | 44 | unsigned char sizeof_rec[2]; |
45 | /* a series of bit-masks to control various features indexed | ||
46 | * by enum ds_feature */ | ||
47 | unsigned long ctl[dsf_ctl_max]; | ||
48 | }; | ||
49 | static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); | ||
50 | |||
51 | #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) | ||
52 | |||
53 | #define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */ | ||
54 | #define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */ | ||
55 | #define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ | ||
56 | |||
57 | #define BTS_CONTROL \ | ||
58 | (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ | ||
59 | ds_cfg.ctl[dsf_bts_overflow]) | ||
60 | |||
61 | |||
62 | /* | ||
63 | * A BTS or PEBS tracer. | ||
64 | * | ||
65 | * This holds the configuration of the tracer and serves as a handle | ||
66 | * to identify tracers. | ||
67 | */ | ||
68 | struct ds_tracer { | ||
69 | /* the DS context (partially) owned by this tracer */ | ||
70 | struct ds_context *context; | ||
71 | /* the buffer provided on ds_request() and its size in bytes */ | ||
72 | void *buffer; | ||
73 | size_t size; | ||
74 | }; | ||
75 | |||
76 | struct bts_tracer { | ||
77 | /* the common DS part */ | ||
78 | struct ds_tracer ds; | ||
79 | /* the trace including the DS configuration */ | ||
80 | struct bts_trace trace; | ||
81 | /* buffer overflow notification function */ | ||
82 | bts_ovfl_callback_t ovfl; | ||
46 | }; | 83 | }; |
47 | static struct ds_configuration ds_cfg; | ||
48 | 84 | ||
85 | struct pebs_tracer { | ||
86 | /* the common DS part */ | ||
87 | struct ds_tracer ds; | ||
88 | /* the trace including the DS configuration */ | ||
89 | struct pebs_trace trace; | ||
90 | /* buffer overflow notification function */ | ||
91 | pebs_ovfl_callback_t ovfl; | ||
92 | }; | ||
49 | 93 | ||
50 | /* | 94 | /* |
51 | * Debug Store (DS) save area configuration (see Intel64 and IA32 | 95 | * Debug Store (DS) save area configuration (see Intel64 and IA32 |
@@ -111,32 +155,9 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual, | |||
111 | 155 | ||
112 | 156 | ||
113 | /* | 157 | /* |
114 | * Locking is done only for allocating BTS or PEBS resources and for | 158 | * Locking is done only for allocating BTS or PEBS resources. |
115 | * guarding context and buffer memory allocation. | ||
116 | * | ||
117 | * Most functions require the current task to own the ds context part | ||
118 | * they are going to access. All the locking is done when validating | ||
119 | * access to the context. | ||
120 | */ | 159 | */ |
121 | static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); | 160 | static DEFINE_SPINLOCK(ds_lock); |
122 | |||
123 | /* | ||
124 | * Validate that the current task is allowed to access the BTS/PEBS | ||
125 | * buffer of the parameter task. | ||
126 | * | ||
127 | * Returns 0, if access is granted; -Eerrno, otherwise. | ||
128 | */ | ||
129 | static inline int ds_validate_access(struct ds_context *context, | ||
130 | enum ds_qualifier qual) | ||
131 | { | ||
132 | if (!context) | ||
133 | return -EPERM; | ||
134 | |||
135 | if (context->owner[qual] == current) | ||
136 | return 0; | ||
137 | |||
138 | return -EPERM; | ||
139 | } | ||
140 | 161 | ||
141 | 162 | ||
142 | /* | 163 | /* |
@@ -152,27 +173,32 @@ static inline int ds_validate_access(struct ds_context *context, | |||
152 | * >0 number of per-thread tracers | 173 | * >0 number of per-thread tracers |
153 | * <0 number of per-cpu tracers | 174 | * <0 number of per-cpu tracers |
154 | * | 175 | * |
155 | * The below functions to get and put tracers and to check the | ||
156 | * allocation type require the ds_lock to be held by the caller. | ||
157 | * | ||
158 | * Tracers essentially gives the number of ds contexts for a certain | 176 | * Tracers essentially gives the number of ds contexts for a certain |
159 | * type of allocation. | 177 | * type of allocation. |
160 | */ | 178 | */ |
161 | static long tracers; | 179 | static atomic_t tracers = ATOMIC_INIT(0); |
162 | 180 | ||
163 | static inline void get_tracer(struct task_struct *task) | 181 | static inline void get_tracer(struct task_struct *task) |
164 | { | 182 | { |
165 | tracers += (task ? 1 : -1); | 183 | if (task) |
184 | atomic_inc(&tracers); | ||
185 | else | ||
186 | atomic_dec(&tracers); | ||
166 | } | 187 | } |
167 | 188 | ||
168 | static inline void put_tracer(struct task_struct *task) | 189 | static inline void put_tracer(struct task_struct *task) |
169 | { | 190 | { |
170 | tracers -= (task ? 1 : -1); | 191 | if (task) |
192 | atomic_dec(&tracers); | ||
193 | else | ||
194 | atomic_inc(&tracers); | ||
171 | } | 195 | } |
172 | 196 | ||
173 | static inline int check_tracer(struct task_struct *task) | 197 | static inline int check_tracer(struct task_struct *task) |
174 | { | 198 | { |
175 | return (task ? (tracers >= 0) : (tracers <= 0)); | 199 | return task ? |
200 | (atomic_read(&tracers) >= 0) : | ||
201 | (atomic_read(&tracers) <= 0); | ||
176 | } | 202 | } |
177 | 203 | ||
178 | 204 | ||
@@ -185,100 +211,83 @@ static inline int check_tracer(struct task_struct *task) | |||
185 | * | 211 | * |
186 | * Contexts are use-counted. They are allocated on first access and | 212 | * Contexts are use-counted. They are allocated on first access and |
187 | * deallocated when the last user puts the context. | 213 | * deallocated when the last user puts the context. |
188 | * | ||
189 | * We distinguish between an allocating and a non-allocating get of a | ||
190 | * context: | ||
191 | * - the allocating get is used for requesting BTS/PEBS resources. It | ||
192 | * requires the caller to hold the global ds_lock. | ||
193 | * - the non-allocating get is used for all other cases. A | ||
194 | * non-existing context indicates an error. It acquires and releases | ||
195 | * the ds_lock itself for obtaining the context. | ||
196 | * | ||
197 | * A context and its DS configuration are allocated and deallocated | ||
198 | * together. A context always has a DS configuration of the | ||
199 | * appropriate size. | ||
200 | */ | ||
201 | static DEFINE_PER_CPU(struct ds_context *, system_context); | ||
202 | |||
203 | #define this_system_context per_cpu(system_context, smp_processor_id()) | ||
204 | |||
205 | /* | ||
206 | * Returns the pointer to the parameter task's context or to the | ||
207 | * system-wide context, if task is NULL. | ||
208 | * | ||
209 | * Increases the use count of the returned context, if not NULL. | ||
210 | */ | 214 | */ |
211 | static inline struct ds_context *ds_get_context(struct task_struct *task) | 215 | struct ds_context { |
212 | { | 216 | /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ |
213 | struct ds_context *context; | 217 | unsigned char ds[MAX_SIZEOF_DS]; |
218 | /* the owner of the BTS and PEBS configuration, respectively */ | ||
219 | struct bts_tracer *bts_master; | ||
220 | struct pebs_tracer *pebs_master; | ||
221 | /* use count */ | ||
222 | unsigned long count; | ||
223 | /* a pointer to the context location inside the thread_struct | ||
224 | * or the per_cpu context array */ | ||
225 | struct ds_context **this; | ||
226 | /* a pointer to the task owning this context, or NULL, if the | ||
227 | * context is owned by a cpu */ | ||
228 | struct task_struct *task; | ||
229 | }; | ||
214 | 230 | ||
215 | spin_lock(&ds_lock); | 231 | static DEFINE_PER_CPU(struct ds_context *, system_context_array); |
216 | 232 | ||
217 | context = (task ? task->thread.ds_ctx : this_system_context); | 233 | #define system_context per_cpu(system_context_array, smp_processor_id()) |
218 | if (context) | ||
219 | context->count++; | ||
220 | 234 | ||
221 | spin_unlock(&ds_lock); | ||
222 | 235 | ||
223 | return context; | 236 | static inline struct ds_context *ds_get_context(struct task_struct *task) |
224 | } | ||
225 | |||
226 | /* | ||
227 | * Same as ds_get_context, but allocates the context and it's DS | ||
228 | * structure, if necessary; returns NULL; if out of memory. | ||
229 | * | ||
230 | * pre: requires ds_lock to be held | ||
231 | */ | ||
232 | static inline struct ds_context *ds_alloc_context(struct task_struct *task) | ||
233 | { | 237 | { |
234 | struct ds_context **p_context = | 238 | struct ds_context **p_context = |
235 | (task ? &task->thread.ds_ctx : &this_system_context); | 239 | (task ? &task->thread.ds_ctx : &system_context); |
236 | struct ds_context *context = *p_context; | 240 | struct ds_context *context = NULL; |
237 | 241 | struct ds_context *new_context = NULL; | |
238 | if (!context) { | 242 | unsigned long irq; |
239 | context = kzalloc(sizeof(*context), GFP_KERNEL); | 243 | |
240 | 244 | /* Chances are small that we already have a context. */ | |
241 | if (!context) | 245 | new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); |
242 | return NULL; | 246 | if (!new_context) |
247 | return NULL; | ||
243 | 248 | ||
244 | context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); | 249 | spin_lock_irqsave(&ds_lock, irq); |
245 | if (!context->ds) { | ||
246 | kfree(context); | ||
247 | return NULL; | ||
248 | } | ||
249 | 250 | ||
250 | *p_context = context; | 251 | context = *p_context; |
252 | if (!context) { | ||
253 | context = new_context; | ||
251 | 254 | ||
252 | context->this = p_context; | 255 | context->this = p_context; |
253 | context->task = task; | 256 | context->task = task; |
257 | context->count = 0; | ||
254 | 258 | ||
255 | if (task) | 259 | if (task) |
256 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); | 260 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); |
257 | 261 | ||
258 | if (!task || (task == current)) | 262 | if (!task || (task == current)) |
259 | wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); | 263 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds); |
260 | 264 | ||
261 | get_tracer(task); | 265 | *p_context = context; |
262 | } | 266 | } |
263 | 267 | ||
264 | context->count++; | 268 | context->count++; |
265 | 269 | ||
270 | spin_unlock_irqrestore(&ds_lock, irq); | ||
271 | |||
272 | if (context != new_context) | ||
273 | kfree(new_context); | ||
274 | |||
266 | return context; | 275 | return context; |
267 | } | 276 | } |
268 | 277 | ||
269 | /* | ||
270 | * Decreases the use count of the parameter context, if not NULL. | ||
271 | * Deallocates the context, if the use count reaches zero. | ||
272 | */ | ||
273 | static inline void ds_put_context(struct ds_context *context) | 278 | static inline void ds_put_context(struct ds_context *context) |
274 | { | 279 | { |
280 | unsigned long irq; | ||
281 | |||
275 | if (!context) | 282 | if (!context) |
276 | return; | 283 | return; |
277 | 284 | ||
278 | spin_lock(&ds_lock); | 285 | spin_lock_irqsave(&ds_lock, irq); |
279 | 286 | ||
280 | if (--context->count) | 287 | if (--context->count) { |
281 | goto out; | 288 | spin_unlock_irqrestore(&ds_lock, irq); |
289 | return; | ||
290 | } | ||
282 | 291 | ||
283 | *(context->this) = NULL; | 292 | *(context->this) = NULL; |
284 | 293 | ||
@@ -288,132 +297,263 @@ static inline void ds_put_context(struct ds_context *context) | |||
288 | if (!context->task || (context->task == current)) | 297 | if (!context->task || (context->task == current)) |
289 | wrmsrl(MSR_IA32_DS_AREA, 0); | 298 | wrmsrl(MSR_IA32_DS_AREA, 0); |
290 | 299 | ||
291 | put_tracer(context->task); | 300 | spin_unlock_irqrestore(&ds_lock, irq); |
292 | 301 | ||
293 | /* free any leftover buffers from tracers that did not | ||
294 | * deallocate them properly. */ | ||
295 | kfree(context->buffer[ds_bts]); | ||
296 | kfree(context->buffer[ds_pebs]); | ||
297 | kfree(context->ds); | ||
298 | kfree(context); | 302 | kfree(context); |
299 | out: | ||
300 | spin_unlock(&ds_lock); | ||
301 | } | 303 | } |
302 | 304 | ||
303 | 305 | ||
304 | /* | 306 | /* |
305 | * Handle a buffer overflow | 307 | * Call the tracer's callback on a buffer overflow. |
306 | * | 308 | * |
307 | * task: the task whose buffers are overflowing; | ||
308 | * NULL for a buffer overflow on the current cpu | ||
309 | * context: the ds context | 309 | * context: the ds context |
310 | * qual: the buffer type | 310 | * qual: the buffer type |
311 | */ | 311 | */ |
312 | static void ds_overflow(struct task_struct *task, struct ds_context *context, | 312 | static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) |
313 | enum ds_qualifier qual) | ||
314 | { | 313 | { |
315 | if (!context) | 314 | switch (qual) { |
316 | return; | 315 | case ds_bts: |
317 | 316 | if (context->bts_master && | |
318 | if (context->callback[qual]) | 317 | context->bts_master->ovfl) |
319 | (*context->callback[qual])(task); | 318 | context->bts_master->ovfl(context->bts_master); |
320 | 319 | break; | |
321 | /* todo: do some more overflow handling */ | 320 | case ds_pebs: |
321 | if (context->pebs_master && | ||
322 | context->pebs_master->ovfl) | ||
323 | context->pebs_master->ovfl(context->pebs_master); | ||
324 | break; | ||
325 | } | ||
322 | } | 326 | } |
323 | 327 | ||
324 | 328 | ||
325 | /* | 329 | /* |
326 | * Allocate a non-pageable buffer of the parameter size. | 330 | * Write raw data into the BTS or PEBS buffer. |
327 | * Checks the memory and the locked memory rlimit. | ||
328 | * | 331 | * |
329 | * Returns the buffer, if successful; | 332 | * The remainder of any partially written record is zeroed out. |
330 | * NULL, if out of memory or rlimit exceeded. | ||
331 | * | 333 | * |
332 | * size: the requested buffer size in bytes | 334 | * context: the DS context |
333 | * pages (out): if not NULL, contains the number of pages reserved | 335 | * qual: the buffer type |
336 | * record: the data to write | ||
337 | * size: the size of the data | ||
334 | */ | 338 | */ |
335 | static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) | 339 | static int ds_write(struct ds_context *context, enum ds_qualifier qual, |
340 | const void *record, size_t size) | ||
336 | { | 341 | { |
337 | unsigned long rlim, vm, pgsz; | 342 | int bytes_written = 0; |
338 | void *buffer; | ||
339 | 343 | ||
340 | pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; | 344 | if (!record) |
345 | return -EINVAL; | ||
341 | 346 | ||
342 | rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; | 347 | while (size) { |
343 | vm = current->mm->total_vm + pgsz; | 348 | unsigned long base, index, end, write_end, int_th; |
344 | if (rlim < vm) | 349 | unsigned long write_size, adj_write_size; |
345 | return NULL; | ||
346 | 350 | ||
347 | rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | 351 | /* |
348 | vm = current->mm->locked_vm + pgsz; | 352 | * write as much as possible without producing an |
349 | if (rlim < vm) | 353 | * overflow interrupt. |
350 | return NULL; | 354 | * |
355 | * interrupt_threshold must either be | ||
356 | * - bigger than absolute_maximum or | ||
357 | * - point to a record between buffer_base and absolute_maximum | ||
358 | * | ||
359 | * index points to a valid record. | ||
360 | */ | ||
361 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
362 | index = ds_get(context->ds, qual, ds_index); | ||
363 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
364 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
351 | 365 | ||
352 | buffer = kzalloc(size, GFP_KERNEL); | 366 | write_end = min(end, int_th); |
353 | if (!buffer) | 367 | |
354 | return NULL; | 368 | /* if we are already beyond the interrupt threshold, |
369 | * we fill the entire buffer */ | ||
370 | if (write_end <= index) | ||
371 | write_end = end; | ||
355 | 372 | ||
356 | current->mm->total_vm += pgsz; | 373 | if (write_end <= index) |
357 | current->mm->locked_vm += pgsz; | 374 | break; |
358 | 375 | ||
359 | if (pages) | 376 | write_size = min((unsigned long) size, write_end - index); |
360 | *pages = pgsz; | 377 | memcpy((void *)index, record, write_size); |
361 | 378 | ||
362 | return buffer; | 379 | record = (const char *)record + write_size; |
380 | size -= write_size; | ||
381 | bytes_written += write_size; | ||
382 | |||
383 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | ||
384 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | ||
385 | |||
386 | /* zero out trailing bytes */ | ||
387 | memset((char *)index + write_size, 0, | ||
388 | adj_write_size - write_size); | ||
389 | index += adj_write_size; | ||
390 | |||
391 | if (index >= end) | ||
392 | index = base; | ||
393 | ds_set(context->ds, qual, ds_index, index); | ||
394 | |||
395 | if (index >= int_th) | ||
396 | ds_overflow(context, qual); | ||
397 | } | ||
398 | |||
399 | return bytes_written; | ||
363 | } | 400 | } |
364 | 401 | ||
365 | static int ds_request(struct task_struct *task, void *base, size_t size, | 402 | |
366 | ds_ovfl_callback_t ovfl, enum ds_qualifier qual) | 403 | /* |
404 | * Branch Trace Store (BTS) uses the following format. Different | ||
405 | * architectures vary in the size of those fields. | ||
406 | * - source linear address | ||
407 | * - destination linear address | ||
408 | * - flags | ||
409 | * | ||
410 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
411 | * architectures use 32bit pointers in 32bit mode. | ||
412 | * | ||
413 | * We compute the base address for the first 8 fields based on: | ||
414 | * - the field size stored in the DS configuration | ||
415 | * - the relative field position | ||
416 | * | ||
417 | * In order to store additional information in the BTS buffer, we use | ||
418 | * a special source address to indicate that the record requires | ||
419 | * special interpretation. | ||
420 | * | ||
421 | * Netburst indicated via a bit in the flags field whether the branch | ||
422 | * was predicted; this is ignored. | ||
423 | * | ||
424 | * We use two levels of abstraction: | ||
425 | * - the raw data level defined here | ||
426 | * - an arch-independent level defined in ds.h | ||
427 | */ | ||
428 | |||
429 | enum bts_field { | ||
430 | bts_from, | ||
431 | bts_to, | ||
432 | bts_flags, | ||
433 | |||
434 | bts_qual = bts_from, | ||
435 | bts_jiffies = bts_to, | ||
436 | bts_pid = bts_flags, | ||
437 | |||
438 | bts_qual_mask = (bts_qual_max - 1), | ||
439 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) | ||
440 | }; | ||
441 | |||
442 | static inline unsigned long bts_get(const char *base, enum bts_field field) | ||
367 | { | 443 | { |
368 | struct ds_context *context; | 444 | base += (ds_cfg.sizeof_field * field); |
369 | unsigned long buffer, adj; | 445 | return *(unsigned long *)base; |
370 | const unsigned long alignment = (1 << 3); | 446 | } |
371 | int error = 0; | ||
372 | 447 | ||
373 | if (!ds_cfg.sizeof_ds) | 448 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) |
374 | return -EOPNOTSUPP; | 449 | { |
450 | base += (ds_cfg.sizeof_field * field);; | ||
451 | (*(unsigned long *)base) = val; | ||
452 | } | ||
375 | 453 | ||
376 | /* we require some space to do alignment adjustments below */ | 454 | |
377 | if (size < (alignment + ds_cfg.sizeof_rec[qual])) | 455 | /* |
456 | * The raw BTS data is architecture dependent. | ||
457 | * | ||
458 | * For higher-level users, we give an arch-independent view. | ||
459 | * - ds.h defines struct bts_struct | ||
460 | * - bts_read translates one raw bts record into a bts_struct | ||
461 | * - bts_write translates one bts_struct into the raw format and | ||
462 | * writes it into the top of the parameter tracer's buffer. | ||
463 | * | ||
464 | * return: bytes read/written on success; -Eerrno, otherwise | ||
465 | */ | ||
466 | static int bts_read(struct bts_tracer *tracer, const void *at, | ||
467 | struct bts_struct *out) | ||
468 | { | ||
469 | if (!tracer) | ||
378 | return -EINVAL; | 470 | return -EINVAL; |
379 | 471 | ||
380 | /* buffer overflow notification is not yet implemented */ | 472 | if (at < tracer->trace.ds.begin) |
381 | if (ovfl) | 473 | return -EINVAL; |
382 | return -EOPNOTSUPP; | ||
383 | 474 | ||
475 | if (tracer->trace.ds.end < (at + tracer->trace.ds.size)) | ||
476 | return -EINVAL; | ||
384 | 477 | ||
385 | spin_lock(&ds_lock); | 478 | memset(out, 0, sizeof(*out)); |
479 | if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { | ||
480 | out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); | ||
481 | out->variant.timestamp.jiffies = bts_get(at, bts_jiffies); | ||
482 | out->variant.timestamp.pid = bts_get(at, bts_pid); | ||
483 | } else { | ||
484 | out->qualifier = bts_branch; | ||
485 | out->variant.lbr.from = bts_get(at, bts_from); | ||
486 | out->variant.lbr.to = bts_get(at, bts_to); | ||
487 | |||
488 | if (!out->variant.lbr.from && !out->variant.lbr.to) | ||
489 | out->qualifier = bts_invalid; | ||
490 | } | ||
386 | 491 | ||
387 | if (!check_tracer(task)) | 492 | return ds_cfg.sizeof_rec[ds_bts]; |
388 | return -EPERM; | 493 | } |
389 | 494 | ||
390 | error = -ENOMEM; | 495 | static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) |
391 | context = ds_alloc_context(task); | 496 | { |
392 | if (!context) | 497 | unsigned char raw[MAX_SIZEOF_BTS]; |
393 | goto out_unlock; | ||
394 | 498 | ||
395 | error = -EALREADY; | 499 | if (!tracer) |
396 | if (context->owner[qual] == current) | 500 | return -EINVAL; |
397 | goto out_unlock; | ||
398 | error = -EPERM; | ||
399 | if (context->owner[qual] != NULL) | ||
400 | goto out_unlock; | ||
401 | context->owner[qual] = current; | ||
402 | 501 | ||
403 | spin_unlock(&ds_lock); | 502 | if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts]) |
503 | return -EOVERFLOW; | ||
404 | 504 | ||
505 | switch (in->qualifier) { | ||
506 | case bts_invalid: | ||
507 | bts_set(raw, bts_from, 0); | ||
508 | bts_set(raw, bts_to, 0); | ||
509 | bts_set(raw, bts_flags, 0); | ||
510 | break; | ||
511 | case bts_branch: | ||
512 | bts_set(raw, bts_from, in->variant.lbr.from); | ||
513 | bts_set(raw, bts_to, in->variant.lbr.to); | ||
514 | bts_set(raw, bts_flags, 0); | ||
515 | break; | ||
516 | case bts_task_arrives: | ||
517 | case bts_task_departs: | ||
518 | bts_set(raw, bts_qual, (bts_escape | in->qualifier)); | ||
519 | bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies); | ||
520 | bts_set(raw, bts_pid, in->variant.timestamp.pid); | ||
521 | break; | ||
522 | default: | ||
523 | return -EINVAL; | ||
524 | } | ||
405 | 525 | ||
406 | error = -ENOMEM; | 526 | return ds_write(tracer->ds.context, ds_bts, raw, |
407 | if (!base) { | 527 | ds_cfg.sizeof_rec[ds_bts]); |
408 | base = ds_allocate_buffer(size, &context->pages[qual]); | 528 | } |
409 | if (!base) | ||
410 | goto out_release; | ||
411 | 529 | ||
412 | context->buffer[qual] = base; | ||
413 | } | ||
414 | error = 0; | ||
415 | 530 | ||
416 | context->callback[qual] = ovfl; | 531 | static void ds_write_config(struct ds_context *context, |
532 | struct ds_trace *cfg, enum ds_qualifier qual) | ||
533 | { | ||
534 | unsigned char *ds = context->ds; | ||
535 | |||
536 | ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin); | ||
537 | ds_set(ds, qual, ds_index, (unsigned long)cfg->top); | ||
538 | ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end); | ||
539 | ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith); | ||
540 | } | ||
541 | |||
542 | static void ds_read_config(struct ds_context *context, | ||
543 | struct ds_trace *cfg, enum ds_qualifier qual) | ||
544 | { | ||
545 | unsigned char *ds = context->ds; | ||
546 | |||
547 | cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base); | ||
548 | cfg->top = (void *)ds_get(ds, qual, ds_index); | ||
549 | cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum); | ||
550 | cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold); | ||
551 | } | ||
552 | |||
553 | static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | ||
554 | void *base, size_t size, size_t ith, | ||
555 | unsigned int flags) { | ||
556 | unsigned long buffer, adj; | ||
417 | 557 | ||
418 | /* adjust the buffer address and size to meet alignment | 558 | /* adjust the buffer address and size to meet alignment |
419 | * constraints: | 559 | * constraints: |
@@ -425,395 +565,383 @@ static int ds_request(struct task_struct *task, void *base, size_t size, | |||
425 | */ | 565 | */ |
426 | buffer = (unsigned long)base; | 566 | buffer = (unsigned long)base; |
427 | 567 | ||
428 | adj = ALIGN(buffer, alignment) - buffer; | 568 | adj = ALIGN(buffer, DS_ALIGNMENT) - buffer; |
429 | buffer += adj; | 569 | buffer += adj; |
430 | size -= adj; | 570 | size -= adj; |
431 | 571 | ||
432 | size /= ds_cfg.sizeof_rec[qual]; | 572 | trace->n = size / ds_cfg.sizeof_rec[qual]; |
433 | size *= ds_cfg.sizeof_rec[qual]; | 573 | trace->size = ds_cfg.sizeof_rec[qual]; |
434 | |||
435 | ds_set(context->ds, qual, ds_buffer_base, buffer); | ||
436 | ds_set(context->ds, qual, ds_index, buffer); | ||
437 | ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); | ||
438 | 574 | ||
439 | if (ovfl) { | 575 | size = (trace->n * trace->size); |
440 | /* todo: select a suitable interrupt threshold */ | ||
441 | } else | ||
442 | ds_set(context->ds, qual, | ||
443 | ds_interrupt_threshold, buffer + size + 1); | ||
444 | 576 | ||
445 | /* we keep the context until ds_release */ | 577 | trace->begin = (void *)buffer; |
446 | return error; | 578 | trace->top = trace->begin; |
447 | 579 | trace->end = (void *)(buffer + size); | |
448 | out_release: | 580 | /* The value for 'no threshold' is -1, which will set the |
449 | context->owner[qual] = NULL; | 581 | * threshold outside of the buffer, just like we want it. |
450 | ds_put_context(context); | 582 | */ |
451 | return error; | 583 | trace->ith = (void *)(buffer + size - ith); |
452 | |||
453 | out_unlock: | ||
454 | spin_unlock(&ds_lock); | ||
455 | ds_put_context(context); | ||
456 | return error; | ||
457 | } | ||
458 | 584 | ||
459 | int ds_request_bts(struct task_struct *task, void *base, size_t size, | 585 | trace->flags = flags; |
460 | ds_ovfl_callback_t ovfl) | ||
461 | { | ||
462 | return ds_request(task, base, size, ovfl, ds_bts); | ||
463 | } | 586 | } |
464 | 587 | ||
465 | int ds_request_pebs(struct task_struct *task, void *base, size_t size, | ||
466 | ds_ovfl_callback_t ovfl) | ||
467 | { | ||
468 | return ds_request(task, base, size, ovfl, ds_pebs); | ||
469 | } | ||
470 | 588 | ||
471 | static int ds_release(struct task_struct *task, enum ds_qualifier qual) | 589 | static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, |
590 | enum ds_qualifier qual, struct task_struct *task, | ||
591 | void *base, size_t size, size_t th, unsigned int flags) | ||
472 | { | 592 | { |
473 | struct ds_context *context; | 593 | struct ds_context *context; |
474 | int error; | 594 | int error; |
475 | 595 | ||
476 | context = ds_get_context(task); | 596 | error = -EINVAL; |
477 | error = ds_validate_access(context, qual); | 597 | if (!base) |
478 | if (error < 0) | ||
479 | goto out; | 598 | goto out; |
480 | 599 | ||
481 | kfree(context->buffer[qual]); | 600 | /* we require some space to do alignment adjustments below */ |
482 | context->buffer[qual] = NULL; | 601 | error = -EINVAL; |
483 | 602 | if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) | |
484 | current->mm->total_vm -= context->pages[qual]; | 603 | goto out; |
485 | current->mm->locked_vm -= context->pages[qual]; | ||
486 | context->pages[qual] = 0; | ||
487 | context->owner[qual] = NULL; | ||
488 | |||
489 | /* | ||
490 | * we put the context twice: | ||
491 | * once for the ds_get_context | ||
492 | * once for the corresponding ds_request | ||
493 | */ | ||
494 | ds_put_context(context); | ||
495 | out: | ||
496 | ds_put_context(context); | ||
497 | return error; | ||
498 | } | ||
499 | 604 | ||
500 | int ds_release_bts(struct task_struct *task) | 605 | if (th != (size_t)-1) { |
501 | { | 606 | th *= ds_cfg.sizeof_rec[qual]; |
502 | return ds_release(task, ds_bts); | ||
503 | } | ||
504 | 607 | ||
505 | int ds_release_pebs(struct task_struct *task) | 608 | error = -EINVAL; |
506 | { | 609 | if (size <= th) |
507 | return ds_release(task, ds_pebs); | 610 | goto out; |
508 | } | 611 | } |
509 | 612 | ||
510 | static int ds_get_index(struct task_struct *task, size_t *pos, | 613 | tracer->buffer = base; |
511 | enum ds_qualifier qual) | 614 | tracer->size = size; |
512 | { | ||
513 | struct ds_context *context; | ||
514 | unsigned long base, index; | ||
515 | int error; | ||
516 | 615 | ||
616 | error = -ENOMEM; | ||
517 | context = ds_get_context(task); | 617 | context = ds_get_context(task); |
518 | error = ds_validate_access(context, qual); | 618 | if (!context) |
519 | if (error < 0) | ||
520 | goto out; | 619 | goto out; |
620 | tracer->context = context; | ||
521 | 621 | ||
522 | base = ds_get(context->ds, qual, ds_buffer_base); | 622 | ds_init_ds_trace(trace, qual, base, size, th, flags); |
523 | index = ds_get(context->ds, qual, ds_index); | ||
524 | 623 | ||
525 | error = ((index - base) / ds_cfg.sizeof_rec[qual]); | 624 | error = 0; |
526 | if (pos) | ||
527 | *pos = error; | ||
528 | out: | 625 | out: |
529 | ds_put_context(context); | ||
530 | return error; | 626 | return error; |
531 | } | 627 | } |
532 | 628 | ||
533 | int ds_get_bts_index(struct task_struct *task, size_t *pos) | 629 | struct bts_tracer *ds_request_bts(struct task_struct *task, |
630 | void *base, size_t size, | ||
631 | bts_ovfl_callback_t ovfl, size_t th, | ||
632 | unsigned int flags) | ||
534 | { | 633 | { |
535 | return ds_get_index(task, pos, ds_bts); | 634 | struct bts_tracer *tracer; |
536 | } | 635 | unsigned long irq; |
537 | |||
538 | int ds_get_pebs_index(struct task_struct *task, size_t *pos) | ||
539 | { | ||
540 | return ds_get_index(task, pos, ds_pebs); | ||
541 | } | ||
542 | |||
543 | static int ds_get_end(struct task_struct *task, size_t *pos, | ||
544 | enum ds_qualifier qual) | ||
545 | { | ||
546 | struct ds_context *context; | ||
547 | unsigned long base, end; | ||
548 | int error; | 636 | int error; |
549 | 637 | ||
550 | context = ds_get_context(task); | 638 | error = -EOPNOTSUPP; |
551 | error = ds_validate_access(context, qual); | 639 | if (!ds_cfg.ctl[dsf_bts]) |
552 | if (error < 0) | ||
553 | goto out; | 640 | goto out; |
554 | 641 | ||
555 | base = ds_get(context->ds, qual, ds_buffer_base); | 642 | /* buffer overflow notification is not yet implemented */ |
556 | end = ds_get(context->ds, qual, ds_absolute_maximum); | 643 | error = -EOPNOTSUPP; |
644 | if (ovfl) | ||
645 | goto out; | ||
557 | 646 | ||
558 | error = ((end - base) / ds_cfg.sizeof_rec[qual]); | 647 | error = -ENOMEM; |
559 | if (pos) | 648 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); |
560 | *pos = error; | 649 | if (!tracer) |
561 | out: | 650 | goto out; |
562 | ds_put_context(context); | 651 | tracer->ovfl = ovfl; |
563 | return error; | ||
564 | } | ||
565 | 652 | ||
566 | int ds_get_bts_end(struct task_struct *task, size_t *pos) | 653 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
567 | { | 654 | ds_bts, task, base, size, th, flags); |
568 | return ds_get_end(task, pos, ds_bts); | 655 | if (error < 0) |
569 | } | 656 | goto out_tracer; |
570 | 657 | ||
571 | int ds_get_pebs_end(struct task_struct *task, size_t *pos) | ||
572 | { | ||
573 | return ds_get_end(task, pos, ds_pebs); | ||
574 | } | ||
575 | 658 | ||
576 | static int ds_access(struct task_struct *task, size_t index, | 659 | spin_lock_irqsave(&ds_lock, irq); |
577 | const void **record, enum ds_qualifier qual) | ||
578 | { | ||
579 | struct ds_context *context; | ||
580 | unsigned long base, idx; | ||
581 | int error; | ||
582 | 660 | ||
583 | if (!record) | 661 | error = -EPERM; |
584 | return -EINVAL; | 662 | if (!check_tracer(task)) |
663 | goto out_unlock; | ||
664 | get_tracer(task); | ||
585 | 665 | ||
586 | context = ds_get_context(task); | 666 | error = -EPERM; |
587 | error = ds_validate_access(context, qual); | 667 | if (tracer->ds.context->bts_master) |
588 | if (error < 0) | 668 | goto out_put_tracer; |
589 | goto out; | 669 | tracer->ds.context->bts_master = tracer; |
590 | 670 | ||
591 | base = ds_get(context->ds, qual, ds_buffer_base); | 671 | spin_unlock_irqrestore(&ds_lock, irq); |
592 | idx = base + (index * ds_cfg.sizeof_rec[qual]); | ||
593 | 672 | ||
594 | error = -EINVAL; | ||
595 | if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) | ||
596 | goto out; | ||
597 | 673 | ||
598 | *record = (const void *)idx; | 674 | tracer->trace.read = bts_read; |
599 | error = ds_cfg.sizeof_rec[qual]; | 675 | tracer->trace.write = bts_write; |
600 | out: | ||
601 | ds_put_context(context); | ||
602 | return error; | ||
603 | } | ||
604 | 676 | ||
605 | int ds_access_bts(struct task_struct *task, size_t index, const void **record) | 677 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); |
606 | { | 678 | ds_resume_bts(tracer); |
607 | return ds_access(task, index, record, ds_bts); | ||
608 | } | ||
609 | 679 | ||
610 | int ds_access_pebs(struct task_struct *task, size_t index, const void **record) | 680 | return tracer; |
611 | { | 681 | |
612 | return ds_access(task, index, record, ds_pebs); | 682 | out_put_tracer: |
683 | put_tracer(task); | ||
684 | out_unlock: | ||
685 | spin_unlock_irqrestore(&ds_lock, irq); | ||
686 | ds_put_context(tracer->ds.context); | ||
687 | out_tracer: | ||
688 | kfree(tracer); | ||
689 | out: | ||
690 | return ERR_PTR(error); | ||
613 | } | 691 | } |
614 | 692 | ||
615 | static int ds_write(struct task_struct *task, const void *record, size_t size, | 693 | struct pebs_tracer *ds_request_pebs(struct task_struct *task, |
616 | enum ds_qualifier qual, int force) | 694 | void *base, size_t size, |
695 | pebs_ovfl_callback_t ovfl, size_t th, | ||
696 | unsigned int flags) | ||
617 | { | 697 | { |
618 | struct ds_context *context; | 698 | struct pebs_tracer *tracer; |
699 | unsigned long irq; | ||
619 | int error; | 700 | int error; |
620 | 701 | ||
621 | if (!record) | 702 | /* buffer overflow notification is not yet implemented */ |
622 | return -EINVAL; | 703 | error = -EOPNOTSUPP; |
704 | if (ovfl) | ||
705 | goto out; | ||
623 | 706 | ||
624 | error = -EPERM; | 707 | error = -ENOMEM; |
625 | context = ds_get_context(task); | 708 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); |
626 | if (!context) | 709 | if (!tracer) |
627 | goto out; | 710 | goto out; |
711 | tracer->ovfl = ovfl; | ||
628 | 712 | ||
629 | if (!force) { | 713 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
630 | error = ds_validate_access(context, qual); | 714 | ds_pebs, task, base, size, th, flags); |
631 | if (error < 0) | 715 | if (error < 0) |
632 | goto out; | 716 | goto out_tracer; |
633 | } | ||
634 | 717 | ||
635 | error = 0; | 718 | spin_lock_irqsave(&ds_lock, irq); |
636 | while (size) { | ||
637 | unsigned long base, index, end, write_end, int_th; | ||
638 | unsigned long write_size, adj_write_size; | ||
639 | 719 | ||
640 | /* | 720 | error = -EPERM; |
641 | * write as much as possible without producing an | 721 | if (!check_tracer(task)) |
642 | * overflow interrupt. | 722 | goto out_unlock; |
643 | * | 723 | get_tracer(task); |
644 | * interrupt_threshold must either be | ||
645 | * - bigger than absolute_maximum or | ||
646 | * - point to a record between buffer_base and absolute_maximum | ||
647 | * | ||
648 | * index points to a valid record. | ||
649 | */ | ||
650 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
651 | index = ds_get(context->ds, qual, ds_index); | ||
652 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
653 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
654 | 724 | ||
655 | write_end = min(end, int_th); | 725 | error = -EPERM; |
726 | if (tracer->ds.context->pebs_master) | ||
727 | goto out_put_tracer; | ||
728 | tracer->ds.context->pebs_master = tracer; | ||
656 | 729 | ||
657 | /* if we are already beyond the interrupt threshold, | 730 | spin_unlock_irqrestore(&ds_lock, irq); |
658 | * we fill the entire buffer */ | ||
659 | if (write_end <= index) | ||
660 | write_end = end; | ||
661 | 731 | ||
662 | if (write_end <= index) | 732 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); |
663 | goto out; | 733 | ds_resume_pebs(tracer); |
664 | 734 | ||
665 | write_size = min((unsigned long) size, write_end - index); | 735 | return tracer; |
666 | memcpy((void *)index, record, write_size); | ||
667 | 736 | ||
668 | record = (const char *)record + write_size; | 737 | out_put_tracer: |
669 | size -= write_size; | 738 | put_tracer(task); |
670 | error += write_size; | 739 | out_unlock: |
740 | spin_unlock_irqrestore(&ds_lock, irq); | ||
741 | ds_put_context(tracer->ds.context); | ||
742 | out_tracer: | ||
743 | kfree(tracer); | ||
744 | out: | ||
745 | return ERR_PTR(error); | ||
746 | } | ||
671 | 747 | ||
672 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | 748 | void ds_release_bts(struct bts_tracer *tracer) |
673 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | 749 | { |
750 | if (!tracer) | ||
751 | return; | ||
674 | 752 | ||
675 | /* zero out trailing bytes */ | 753 | ds_suspend_bts(tracer); |
676 | memset((char *)index + write_size, 0, | ||
677 | adj_write_size - write_size); | ||
678 | index += adj_write_size; | ||
679 | 754 | ||
680 | if (index >= end) | 755 | WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); |
681 | index = base; | 756 | tracer->ds.context->bts_master = NULL; |
682 | ds_set(context->ds, qual, ds_index, index); | ||
683 | 757 | ||
684 | if (index >= int_th) | 758 | put_tracer(tracer->ds.context->task); |
685 | ds_overflow(task, context, qual); | 759 | ds_put_context(tracer->ds.context); |
686 | } | ||
687 | 760 | ||
688 | out: | 761 | kfree(tracer); |
689 | ds_put_context(context); | ||
690 | return error; | ||
691 | } | 762 | } |
692 | 763 | ||
693 | int ds_write_bts(struct task_struct *task, const void *record, size_t size) | 764 | void ds_suspend_bts(struct bts_tracer *tracer) |
694 | { | 765 | { |
695 | return ds_write(task, record, size, ds_bts, /* force = */ 0); | 766 | struct task_struct *task; |
696 | } | ||
697 | 767 | ||
698 | int ds_write_pebs(struct task_struct *task, const void *record, size_t size) | 768 | if (!tracer) |
699 | { | 769 | return; |
700 | return ds_write(task, record, size, ds_pebs, /* force = */ 0); | ||
701 | } | ||
702 | 770 | ||
703 | int ds_unchecked_write_bts(struct task_struct *task, | 771 | task = tracer->ds.context->task; |
704 | const void *record, size_t size) | ||
705 | { | ||
706 | return ds_write(task, record, size, ds_bts, /* force = */ 1); | ||
707 | } | ||
708 | 772 | ||
709 | int ds_unchecked_write_pebs(struct task_struct *task, | 773 | if (!task || (task == current)) |
710 | const void *record, size_t size) | 774 | update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL); |
711 | { | 775 | |
712 | return ds_write(task, record, size, ds_pebs, /* force = */ 1); | 776 | if (task) { |
777 | task->thread.debugctlmsr &= ~BTS_CONTROL; | ||
778 | |||
779 | if (!task->thread.debugctlmsr) | ||
780 | clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | ||
781 | } | ||
713 | } | 782 | } |
714 | 783 | ||
715 | static int ds_reset_or_clear(struct task_struct *task, | 784 | void ds_resume_bts(struct bts_tracer *tracer) |
716 | enum ds_qualifier qual, int clear) | ||
717 | { | 785 | { |
718 | struct ds_context *context; | 786 | struct task_struct *task; |
719 | unsigned long base, end; | 787 | unsigned long control; |
720 | int error; | ||
721 | 788 | ||
722 | context = ds_get_context(task); | 789 | if (!tracer) |
723 | error = ds_validate_access(context, qual); | 790 | return; |
724 | if (error < 0) | ||
725 | goto out; | ||
726 | 791 | ||
727 | base = ds_get(context->ds, qual, ds_buffer_base); | 792 | task = tracer->ds.context->task; |
728 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
729 | 793 | ||
730 | if (clear) | 794 | control = ds_cfg.ctl[dsf_bts]; |
731 | memset((void *)base, 0, end - base); | 795 | if (!(tracer->trace.ds.flags & BTS_KERNEL)) |
796 | control |= ds_cfg.ctl[dsf_bts_kernel]; | ||
797 | if (!(tracer->trace.ds.flags & BTS_USER)) | ||
798 | control |= ds_cfg.ctl[dsf_bts_user]; | ||
732 | 799 | ||
733 | ds_set(context->ds, qual, ds_index, base); | 800 | if (task) { |
801 | task->thread.debugctlmsr |= control; | ||
802 | set_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | ||
803 | } | ||
734 | 804 | ||
735 | error = 0; | 805 | if (!task || (task == current)) |
736 | out: | 806 | update_debugctlmsr(get_debugctlmsr() | control); |
737 | ds_put_context(context); | ||
738 | return error; | ||
739 | } | 807 | } |
740 | 808 | ||
741 | int ds_reset_bts(struct task_struct *task) | 809 | void ds_release_pebs(struct pebs_tracer *tracer) |
742 | { | 810 | { |
743 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); | 811 | if (!tracer) |
812 | return; | ||
813 | |||
814 | ds_suspend_pebs(tracer); | ||
815 | |||
816 | WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); | ||
817 | tracer->ds.context->pebs_master = NULL; | ||
818 | |||
819 | put_tracer(tracer->ds.context->task); | ||
820 | ds_put_context(tracer->ds.context); | ||
821 | |||
822 | kfree(tracer); | ||
744 | } | 823 | } |
745 | 824 | ||
746 | int ds_reset_pebs(struct task_struct *task) | 825 | void ds_suspend_pebs(struct pebs_tracer *tracer) |
747 | { | 826 | { |
748 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); | 827 | |
749 | } | 828 | } |
750 | 829 | ||
751 | int ds_clear_bts(struct task_struct *task) | 830 | void ds_resume_pebs(struct pebs_tracer *tracer) |
752 | { | 831 | { |
753 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); | 832 | |
754 | } | 833 | } |
755 | 834 | ||
756 | int ds_clear_pebs(struct task_struct *task) | 835 | const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) |
757 | { | 836 | { |
758 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); | 837 | if (!tracer) |
838 | return NULL; | ||
839 | |||
840 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | ||
841 | return &tracer->trace; | ||
759 | } | 842 | } |
760 | 843 | ||
761 | int ds_get_pebs_reset(struct task_struct *task, u64 *value) | 844 | const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) |
762 | { | 845 | { |
763 | struct ds_context *context; | 846 | if (!tracer) |
764 | int error; | 847 | return NULL; |
848 | |||
849 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | ||
850 | tracer->trace.reset_value = | ||
851 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); | ||
765 | 852 | ||
766 | if (!value) | 853 | return &tracer->trace; |
854 | } | ||
855 | |||
856 | int ds_reset_bts(struct bts_tracer *tracer) | ||
857 | { | ||
858 | if (!tracer) | ||
767 | return -EINVAL; | 859 | return -EINVAL; |
768 | 860 | ||
769 | context = ds_get_context(task); | 861 | tracer->trace.ds.top = tracer->trace.ds.begin; |
770 | error = ds_validate_access(context, ds_pebs); | ||
771 | if (error < 0) | ||
772 | goto out; | ||
773 | 862 | ||
774 | *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); | 863 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, |
864 | (unsigned long)tracer->trace.ds.top); | ||
775 | 865 | ||
776 | error = 0; | 866 | return 0; |
777 | out: | ||
778 | ds_put_context(context); | ||
779 | return error; | ||
780 | } | 867 | } |
781 | 868 | ||
782 | int ds_set_pebs_reset(struct task_struct *task, u64 value) | 869 | int ds_reset_pebs(struct pebs_tracer *tracer) |
783 | { | 870 | { |
784 | struct ds_context *context; | 871 | if (!tracer) |
785 | int error; | 872 | return -EINVAL; |
786 | 873 | ||
787 | context = ds_get_context(task); | 874 | tracer->trace.ds.top = tracer->trace.ds.begin; |
788 | error = ds_validate_access(context, ds_pebs); | ||
789 | if (error < 0) | ||
790 | goto out; | ||
791 | 875 | ||
792 | *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; | 876 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, |
877 | (unsigned long)tracer->trace.ds.top); | ||
793 | 878 | ||
794 | error = 0; | 879 | return 0; |
795 | out: | 880 | } |
796 | ds_put_context(context); | 881 | |
797 | return error; | 882 | int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) |
883 | { | ||
884 | if (!tracer) | ||
885 | return -EINVAL; | ||
886 | |||
887 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value; | ||
888 | |||
889 | return 0; | ||
798 | } | 890 | } |
799 | 891 | ||
800 | static const struct ds_configuration ds_cfg_var = { | 892 | static const struct ds_configuration ds_cfg_netburst = { |
801 | .sizeof_ds = sizeof(long) * 12, | 893 | .name = "Netburst", |
802 | .sizeof_field = sizeof(long), | 894 | .ctl[dsf_bts] = (1 << 2) | (1 << 3), |
803 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | 895 | .ctl[dsf_bts_kernel] = (1 << 5), |
804 | .sizeof_rec[ds_pebs] = sizeof(long) * 10 | 896 | .ctl[dsf_bts_user] = (1 << 6), |
897 | |||
898 | .sizeof_field = sizeof(long), | ||
899 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | ||
900 | #ifdef __i386__ | ||
901 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, | ||
902 | #else | ||
903 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, | ||
904 | #endif | ||
905 | }; | ||
906 | static const struct ds_configuration ds_cfg_pentium_m = { | ||
907 | .name = "Pentium M", | ||
908 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
909 | |||
910 | .sizeof_field = sizeof(long), | ||
911 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | ||
912 | #ifdef __i386__ | ||
913 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, | ||
914 | #else | ||
915 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, | ||
916 | #endif | ||
805 | }; | 917 | }; |
806 | static const struct ds_configuration ds_cfg_64 = { | 918 | static const struct ds_configuration ds_cfg_core2_atom = { |
807 | .sizeof_ds = 8 * 12, | 919 | .name = "Core 2/Atom", |
808 | .sizeof_field = 8, | 920 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), |
809 | .sizeof_rec[ds_bts] = 8 * 3, | 921 | .ctl[dsf_bts_kernel] = (1 << 9), |
810 | .sizeof_rec[ds_pebs] = 8 * 10 | 922 | .ctl[dsf_bts_user] = (1 << 10), |
923 | |||
924 | .sizeof_field = 8, | ||
925 | .sizeof_rec[ds_bts] = 8 * 3, | ||
926 | .sizeof_rec[ds_pebs] = 8 * 18, | ||
811 | }; | 927 | }; |
812 | 928 | ||
813 | static inline void | 929 | static void |
814 | ds_configure(const struct ds_configuration *cfg) | 930 | ds_configure(const struct ds_configuration *cfg) |
815 | { | 931 | { |
932 | memset(&ds_cfg, 0, sizeof(ds_cfg)); | ||
816 | ds_cfg = *cfg; | 933 | ds_cfg = *cfg; |
934 | |||
935 | printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name); | ||
936 | |||
937 | if (!cpu_has_bts) { | ||
938 | ds_cfg.ctl[dsf_bts] = 0; | ||
939 | printk(KERN_INFO "[ds] bts not available\n"); | ||
940 | } | ||
941 | if (!cpu_has_pebs) | ||
942 | printk(KERN_INFO "[ds] pebs not available\n"); | ||
943 | |||
944 | WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field)); | ||
817 | } | 945 | } |
818 | 946 | ||
819 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | 947 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) |
@@ -821,25 +949,27 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
821 | switch (c->x86) { | 949 | switch (c->x86) { |
822 | case 0x6: | 950 | case 0x6: |
823 | switch (c->x86_model) { | 951 | switch (c->x86_model) { |
824 | case 0xD: | 952 | case 0x9: |
825 | case 0xE: /* Pentium M */ | 953 | case 0xd: /* Pentium M */ |
826 | ds_configure(&ds_cfg_var); | 954 | ds_configure(&ds_cfg_pentium_m); |
827 | break; | 955 | break; |
828 | case 0xF: /* Core2 */ | 956 | case 0xf: |
829 | case 0x1C: /* Atom */ | 957 | case 0x17: /* Core2 */ |
830 | ds_configure(&ds_cfg_64); | 958 | case 0x1c: /* Atom */ |
959 | ds_configure(&ds_cfg_core2_atom); | ||
831 | break; | 960 | break; |
961 | case 0x1a: /* i7 */ | ||
832 | default: | 962 | default: |
833 | /* sorry, don't know about them */ | 963 | /* sorry, don't know about them */ |
834 | break; | 964 | break; |
835 | } | 965 | } |
836 | break; | 966 | break; |
837 | case 0xF: | 967 | case 0xf: |
838 | switch (c->x86_model) { | 968 | switch (c->x86_model) { |
839 | case 0x0: | 969 | case 0x0: |
840 | case 0x1: | 970 | case 0x1: |
841 | case 0x2: /* Netburst */ | 971 | case 0x2: /* Netburst */ |
842 | ds_configure(&ds_cfg_var); | 972 | ds_configure(&ds_cfg_netburst); |
843 | break; | 973 | break; |
844 | default: | 974 | default: |
845 | /* sorry, don't know about them */ | 975 | /* sorry, don't know about them */ |
@@ -852,13 +982,52 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
852 | } | 982 | } |
853 | } | 983 | } |
854 | 984 | ||
855 | void ds_free(struct ds_context *context) | 985 | /* |
986 | * Change the DS configuration from tracing prev to tracing next. | ||
987 | */ | ||
988 | void ds_switch_to(struct task_struct *prev, struct task_struct *next) | ||
989 | { | ||
990 | struct ds_context *prev_ctx = prev->thread.ds_ctx; | ||
991 | struct ds_context *next_ctx = next->thread.ds_ctx; | ||
992 | |||
993 | if (prev_ctx) { | ||
994 | update_debugctlmsr(0); | ||
995 | |||
996 | if (prev_ctx->bts_master && | ||
997 | (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | ||
998 | struct bts_struct ts = { | ||
999 | .qualifier = bts_task_departs, | ||
1000 | .variant.timestamp.jiffies = jiffies_64, | ||
1001 | .variant.timestamp.pid = prev->pid | ||
1002 | }; | ||
1003 | bts_write(prev_ctx->bts_master, &ts); | ||
1004 | } | ||
1005 | } | ||
1006 | |||
1007 | if (next_ctx) { | ||
1008 | if (next_ctx->bts_master && | ||
1009 | (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | ||
1010 | struct bts_struct ts = { | ||
1011 | .qualifier = bts_task_arrives, | ||
1012 | .variant.timestamp.jiffies = jiffies_64, | ||
1013 | .variant.timestamp.pid = next->pid | ||
1014 | }; | ||
1015 | bts_write(next_ctx->bts_master, &ts); | ||
1016 | } | ||
1017 | |||
1018 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); | ||
1019 | } | ||
1020 | |||
1021 | update_debugctlmsr(next->thread.debugctlmsr); | ||
1022 | } | ||
1023 | |||
1024 | void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) | ||
1025 | { | ||
1026 | clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR); | ||
1027 | tsk->thread.ds_ctx = NULL; | ||
1028 | } | ||
1029 | |||
1030 | void ds_exit_thread(struct task_struct *tsk) | ||
856 | { | 1031 | { |
857 | /* This is called when the task owning the parameter context | 1032 | WARN_ON(tsk->thread.ds_ctx); |
858 | * is dying. There should not be any user of that context left | ||
859 | * to disturb us, anymore. */ | ||
860 | unsigned long leftovers = context->count; | ||
861 | while (leftovers--) | ||
862 | ds_put_context(context); | ||
863 | } | 1033 | } |
864 | #endif /* CONFIG_X86_DS */ | ||
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c new file mode 100644 index 000000000000..87d103ded1c3 --- /dev/null +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -0,0 +1,351 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
4 | */ | ||
5 | #include <linux/kallsyms.h> | ||
6 | #include <linux/kprobes.h> | ||
7 | #include <linux/uaccess.h> | ||
8 | #include <linux/utsname.h> | ||
9 | #include <linux/hardirq.h> | ||
10 | #include <linux/kdebug.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/ptrace.h> | ||
13 | #include <linux/kexec.h> | ||
14 | #include <linux/bug.h> | ||
15 | #include <linux/nmi.h> | ||
16 | #include <linux/sysfs.h> | ||
17 | |||
18 | #include <asm/stacktrace.h> | ||
19 | |||
20 | #include "dumpstack.h" | ||
21 | |||
22 | int panic_on_unrecovered_nmi; | ||
23 | unsigned int code_bytes = 64; | ||
24 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | ||
25 | static int die_counter; | ||
26 | |||
27 | void printk_address(unsigned long address, int reliable) | ||
28 | { | ||
29 | printk(" [<%p>] %s%pS\n", (void *) address, | ||
30 | reliable ? "" : "? ", (void *) address); | ||
31 | } | ||
32 | |||
33 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
34 | static void | ||
35 | print_ftrace_graph_addr(unsigned long addr, void *data, | ||
36 | const struct stacktrace_ops *ops, | ||
37 | struct thread_info *tinfo, int *graph) | ||
38 | { | ||
39 | struct task_struct *task = tinfo->task; | ||
40 | unsigned long ret_addr; | ||
41 | int index = task->curr_ret_stack; | ||
42 | |||
43 | if (addr != (unsigned long)return_to_handler) | ||
44 | return; | ||
45 | |||
46 | if (!task->ret_stack || index < *graph) | ||
47 | return; | ||
48 | |||
49 | index -= *graph; | ||
50 | ret_addr = task->ret_stack[index].ret; | ||
51 | |||
52 | ops->address(data, ret_addr, 1); | ||
53 | |||
54 | (*graph)++; | ||
55 | } | ||
56 | #else | ||
57 | static inline void | ||
58 | print_ftrace_graph_addr(unsigned long addr, void *data, | ||
59 | const struct stacktrace_ops *ops, | ||
60 | struct thread_info *tinfo, int *graph) | ||
61 | { } | ||
62 | #endif | ||
63 | |||
64 | /* | ||
65 | * x86-64 can have up to three kernel stacks: | ||
66 | * process stack | ||
67 | * interrupt stack | ||
68 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | ||
69 | */ | ||
70 | |||
71 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
72 | void *p, unsigned int size, void *end) | ||
73 | { | ||
74 | void *t = tinfo; | ||
75 | if (end) { | ||
76 | if (p < end && p >= (end-THREAD_SIZE)) | ||
77 | return 1; | ||
78 | else | ||
79 | return 0; | ||
80 | } | ||
81 | return p > t && p < t + THREAD_SIZE - size; | ||
82 | } | ||
83 | |||
84 | unsigned long | ||
85 | print_context_stack(struct thread_info *tinfo, | ||
86 | unsigned long *stack, unsigned long bp, | ||
87 | const struct stacktrace_ops *ops, void *data, | ||
88 | unsigned long *end, int *graph) | ||
89 | { | ||
90 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
91 | |||
92 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
93 | unsigned long addr; | ||
94 | |||
95 | addr = *stack; | ||
96 | if (__kernel_text_address(addr)) { | ||
97 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
98 | ops->address(data, addr, 1); | ||
99 | frame = frame->next_frame; | ||
100 | bp = (unsigned long) frame; | ||
101 | } else { | ||
102 | ops->address(data, addr, 0); | ||
103 | } | ||
104 | print_ftrace_graph_addr(addr, data, ops, tinfo, graph); | ||
105 | } | ||
106 | stack++; | ||
107 | } | ||
108 | return bp; | ||
109 | } | ||
110 | |||
111 | |||
112 | static void | ||
113 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
114 | { | ||
115 | printk(data); | ||
116 | print_symbol(msg, symbol); | ||
117 | printk("\n"); | ||
118 | } | ||
119 | |||
120 | static void print_trace_warning(void *data, char *msg) | ||
121 | { | ||
122 | printk("%s%s\n", (char *)data, msg); | ||
123 | } | ||
124 | |||
125 | static int print_trace_stack(void *data, char *name) | ||
126 | { | ||
127 | printk("%s <%s> ", (char *)data, name); | ||
128 | return 0; | ||
129 | } | ||
130 | |||
131 | /* | ||
132 | * Print one address/symbol entries per line. | ||
133 | */ | ||
134 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
135 | { | ||
136 | touch_nmi_watchdog(); | ||
137 | printk(data); | ||
138 | printk_address(addr, reliable); | ||
139 | } | ||
140 | |||
141 | static const struct stacktrace_ops print_trace_ops = { | ||
142 | .warning = print_trace_warning, | ||
143 | .warning_symbol = print_trace_warning_symbol, | ||
144 | .stack = print_trace_stack, | ||
145 | .address = print_trace_address, | ||
146 | }; | ||
147 | |||
148 | void | ||
149 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
150 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
151 | { | ||
152 | printk("%sCall Trace:\n", log_lvl); | ||
153 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | ||
154 | } | ||
155 | |||
156 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
157 | unsigned long *stack, unsigned long bp) | ||
158 | { | ||
159 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
160 | } | ||
161 | |||
162 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
163 | { | ||
164 | show_stack_log_lvl(task, NULL, sp, 0, ""); | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * The architecture-independent dump_stack generator | ||
169 | */ | ||
170 | void dump_stack(void) | ||
171 | { | ||
172 | unsigned long bp = 0; | ||
173 | unsigned long stack; | ||
174 | |||
175 | #ifdef CONFIG_FRAME_POINTER | ||
176 | if (!bp) | ||
177 | get_bp(bp); | ||
178 | #endif | ||
179 | |||
180 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
181 | current->pid, current->comm, print_tainted(), | ||
182 | init_utsname()->release, | ||
183 | (int)strcspn(init_utsname()->version, " "), | ||
184 | init_utsname()->version); | ||
185 | show_trace(NULL, NULL, &stack, bp); | ||
186 | } | ||
187 | EXPORT_SYMBOL(dump_stack); | ||
188 | |||
189 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
190 | static int die_owner = -1; | ||
191 | static unsigned int die_nest_count; | ||
192 | |||
193 | unsigned __kprobes long oops_begin(void) | ||
194 | { | ||
195 | int cpu; | ||
196 | unsigned long flags; | ||
197 | |||
198 | oops_enter(); | ||
199 | |||
200 | /* racy, but better than risking deadlock. */ | ||
201 | raw_local_irq_save(flags); | ||
202 | cpu = smp_processor_id(); | ||
203 | if (!__raw_spin_trylock(&die_lock)) { | ||
204 | if (cpu == die_owner) | ||
205 | /* nested oops. should stop eventually */; | ||
206 | else | ||
207 | __raw_spin_lock(&die_lock); | ||
208 | } | ||
209 | die_nest_count++; | ||
210 | die_owner = cpu; | ||
211 | console_verbose(); | ||
212 | bust_spinlocks(1); | ||
213 | return flags; | ||
214 | } | ||
215 | |||
216 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
217 | { | ||
218 | if (regs && kexec_should_crash(current)) | ||
219 | crash_kexec(regs); | ||
220 | |||
221 | bust_spinlocks(0); | ||
222 | die_owner = -1; | ||
223 | add_taint(TAINT_DIE); | ||
224 | die_nest_count--; | ||
225 | if (!die_nest_count) | ||
226 | /* Nest count reaches zero, release the lock. */ | ||
227 | __raw_spin_unlock(&die_lock); | ||
228 | raw_local_irq_restore(flags); | ||
229 | oops_exit(); | ||
230 | |||
231 | if (!signr) | ||
232 | return; | ||
233 | if (in_interrupt()) | ||
234 | panic("Fatal exception in interrupt"); | ||
235 | if (panic_on_oops) | ||
236 | panic("Fatal exception"); | ||
237 | do_exit(signr); | ||
238 | } | ||
239 | |||
240 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
241 | { | ||
242 | #ifdef CONFIG_X86_32 | ||
243 | unsigned short ss; | ||
244 | unsigned long sp; | ||
245 | #endif | ||
246 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
247 | #ifdef CONFIG_PREEMPT | ||
248 | printk("PREEMPT "); | ||
249 | #endif | ||
250 | #ifdef CONFIG_SMP | ||
251 | printk("SMP "); | ||
252 | #endif | ||
253 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
254 | printk("DEBUG_PAGEALLOC"); | ||
255 | #endif | ||
256 | printk("\n"); | ||
257 | sysfs_printk_last_file(); | ||
258 | if (notify_die(DIE_OOPS, str, regs, err, | ||
259 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
260 | return 1; | ||
261 | |||
262 | show_registers(regs); | ||
263 | #ifdef CONFIG_X86_32 | ||
264 | sp = (unsigned long) (®s->sp); | ||
265 | savesegment(ss, ss); | ||
266 | if (user_mode(regs)) { | ||
267 | sp = regs->sp; | ||
268 | ss = regs->ss & 0xffff; | ||
269 | } | ||
270 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | ||
271 | print_symbol("%s", regs->ip); | ||
272 | printk(" SS:ESP %04x:%08lx\n", ss, sp); | ||
273 | #else | ||
274 | /* Executive summary in case the oops scrolled away */ | ||
275 | printk(KERN_ALERT "RIP "); | ||
276 | printk_address(regs->ip, 1); | ||
277 | printk(" RSP <%016lx>\n", regs->sp); | ||
278 | #endif | ||
279 | return 0; | ||
280 | } | ||
281 | |||
282 | /* | ||
283 | * This is gone through when something in the kernel has done something bad | ||
284 | * and is about to be terminated: | ||
285 | */ | ||
286 | void die(const char *str, struct pt_regs *regs, long err) | ||
287 | { | ||
288 | unsigned long flags = oops_begin(); | ||
289 | int sig = SIGSEGV; | ||
290 | |||
291 | if (!user_mode_vm(regs)) | ||
292 | report_bug(regs->ip, regs); | ||
293 | |||
294 | if (__die(str, regs, err)) | ||
295 | sig = 0; | ||
296 | oops_end(flags, regs, sig); | ||
297 | } | ||
298 | |||
299 | void notrace __kprobes | ||
300 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
301 | { | ||
302 | unsigned long flags; | ||
303 | |||
304 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
305 | return; | ||
306 | |||
307 | /* | ||
308 | * We are in trouble anyway, lets at least try | ||
309 | * to get a message out. | ||
310 | */ | ||
311 | flags = oops_begin(); | ||
312 | printk(KERN_EMERG "%s", str); | ||
313 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
314 | smp_processor_id(), regs->ip); | ||
315 | show_registers(regs); | ||
316 | oops_end(flags, regs, 0); | ||
317 | if (do_panic || panic_on_oops) | ||
318 | panic("Non maskable interrupt"); | ||
319 | nmi_exit(); | ||
320 | local_irq_enable(); | ||
321 | do_exit(SIGBUS); | ||
322 | } | ||
323 | |||
324 | static int __init oops_setup(char *s) | ||
325 | { | ||
326 | if (!s) | ||
327 | return -EINVAL; | ||
328 | if (!strcmp(s, "panic")) | ||
329 | panic_on_oops = 1; | ||
330 | return 0; | ||
331 | } | ||
332 | early_param("oops", oops_setup); | ||
333 | |||
334 | static int __init kstack_setup(char *s) | ||
335 | { | ||
336 | if (!s) | ||
337 | return -EINVAL; | ||
338 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
339 | return 0; | ||
340 | } | ||
341 | early_param("kstack", kstack_setup); | ||
342 | |||
343 | static int __init code_bytes_setup(char *s) | ||
344 | { | ||
345 | code_bytes = simple_strtoul(s, NULL, 0); | ||
346 | if (code_bytes > 8192) | ||
347 | code_bytes = 8192; | ||
348 | |||
349 | return 1; | ||
350 | } | ||
351 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h new file mode 100644 index 000000000000..da87590b8698 --- /dev/null +++ b/arch/x86/kernel/dumpstack.h | |||
@@ -0,0 +1,39 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
4 | */ | ||
5 | |||
6 | #ifndef DUMPSTACK_H | ||
7 | #define DUMPSTACK_H | ||
8 | |||
9 | #ifdef CONFIG_X86_32 | ||
10 | #define STACKSLOTS_PER_LINE 8 | ||
11 | #define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) | ||
12 | #else | ||
13 | #define STACKSLOTS_PER_LINE 4 | ||
14 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | ||
15 | #endif | ||
16 | |||
17 | extern unsigned long | ||
18 | print_context_stack(struct thread_info *tinfo, | ||
19 | unsigned long *stack, unsigned long bp, | ||
20 | const struct stacktrace_ops *ops, void *data, | ||
21 | unsigned long *end, int *graph); | ||
22 | |||
23 | extern void | ||
24 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
25 | unsigned long *stack, unsigned long bp, char *log_lvl); | ||
26 | |||
27 | extern void | ||
28 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
29 | unsigned long *sp, unsigned long bp, char *log_lvl); | ||
30 | |||
31 | extern unsigned int code_bytes; | ||
32 | extern int kstack_depth_to_print; | ||
33 | |||
34 | /* The form of the top of the frame on the stack */ | ||
35 | struct stack_frame { | ||
36 | struct stack_frame *next_frame; | ||
37 | unsigned long return_address; | ||
38 | }; | ||
39 | #endif | ||
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index b3614752197b..d593cd1f58dc 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -17,69 +17,14 @@ | |||
17 | 17 | ||
18 | #include <asm/stacktrace.h> | 18 | #include <asm/stacktrace.h> |
19 | 19 | ||
20 | #define STACKSLOTS_PER_LINE 8 | 20 | #include "dumpstack.h" |
21 | #define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) | ||
22 | |||
23 | int panic_on_unrecovered_nmi; | ||
24 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | ||
25 | static unsigned int code_bytes = 64; | ||
26 | static int die_counter; | ||
27 | |||
28 | void printk_address(unsigned long address, int reliable) | ||
29 | { | ||
30 | printk(" [<%p>] %s%pS\n", (void *) address, | ||
31 | reliable ? "" : "? ", (void *) address); | ||
32 | } | ||
33 | |||
34 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
35 | void *p, unsigned int size, void *end) | ||
36 | { | ||
37 | void *t = tinfo; | ||
38 | if (end) { | ||
39 | if (p < end && p >= (end-THREAD_SIZE)) | ||
40 | return 1; | ||
41 | else | ||
42 | return 0; | ||
43 | } | ||
44 | return p > t && p < t + THREAD_SIZE - size; | ||
45 | } | ||
46 | |||
47 | /* The form of the top of the frame on the stack */ | ||
48 | struct stack_frame { | ||
49 | struct stack_frame *next_frame; | ||
50 | unsigned long return_address; | ||
51 | }; | ||
52 | |||
53 | static inline unsigned long | ||
54 | print_context_stack(struct thread_info *tinfo, | ||
55 | unsigned long *stack, unsigned long bp, | ||
56 | const struct stacktrace_ops *ops, void *data, | ||
57 | unsigned long *end) | ||
58 | { | ||
59 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
60 | |||
61 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
62 | unsigned long addr; | ||
63 | |||
64 | addr = *stack; | ||
65 | if (__kernel_text_address(addr)) { | ||
66 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
67 | ops->address(data, addr, 1); | ||
68 | frame = frame->next_frame; | ||
69 | bp = (unsigned long) frame; | ||
70 | } else { | ||
71 | ops->address(data, addr, bp == 0); | ||
72 | } | ||
73 | } | ||
74 | stack++; | ||
75 | } | ||
76 | return bp; | ||
77 | } | ||
78 | 21 | ||
79 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 22 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
80 | unsigned long *stack, unsigned long bp, | 23 | unsigned long *stack, unsigned long bp, |
81 | const struct stacktrace_ops *ops, void *data) | 24 | const struct stacktrace_ops *ops, void *data) |
82 | { | 25 | { |
26 | int graph = 0; | ||
27 | |||
83 | if (!task) | 28 | if (!task) |
84 | task = current; | 29 | task = current; |
85 | 30 | ||
@@ -107,7 +52,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
107 | 52 | ||
108 | context = (struct thread_info *) | 53 | context = (struct thread_info *) |
109 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); | 54 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); |
110 | bp = print_context_stack(context, stack, bp, ops, data, NULL); | 55 | bp = print_context_stack(context, stack, bp, ops, |
56 | data, NULL, &graph); | ||
111 | 57 | ||
112 | stack = (unsigned long *)context->previous_esp; | 58 | stack = (unsigned long *)context->previous_esp; |
113 | if (!stack) | 59 | if (!stack) |
@@ -119,57 +65,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
119 | } | 65 | } |
120 | EXPORT_SYMBOL(dump_trace); | 66 | EXPORT_SYMBOL(dump_trace); |
121 | 67 | ||
122 | static void | 68 | void |
123 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
124 | { | ||
125 | printk(data); | ||
126 | print_symbol(msg, symbol); | ||
127 | printk("\n"); | ||
128 | } | ||
129 | |||
130 | static void print_trace_warning(void *data, char *msg) | ||
131 | { | ||
132 | printk("%s%s\n", (char *)data, msg); | ||
133 | } | ||
134 | |||
135 | static int print_trace_stack(void *data, char *name) | ||
136 | { | ||
137 | printk("%s <%s> ", (char *)data, name); | ||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * Print one address/symbol entries per line. | ||
143 | */ | ||
144 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
145 | { | ||
146 | touch_nmi_watchdog(); | ||
147 | printk(data); | ||
148 | printk_address(addr, reliable); | ||
149 | } | ||
150 | |||
151 | static const struct stacktrace_ops print_trace_ops = { | ||
152 | .warning = print_trace_warning, | ||
153 | .warning_symbol = print_trace_warning_symbol, | ||
154 | .stack = print_trace_stack, | ||
155 | .address = print_trace_address, | ||
156 | }; | ||
157 | |||
158 | static void | ||
159 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
160 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
161 | { | ||
162 | printk("%sCall Trace:\n", log_lvl); | ||
163 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | ||
164 | } | ||
165 | |||
166 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
167 | unsigned long *stack, unsigned long bp) | ||
168 | { | ||
169 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
170 | } | ||
171 | |||
172 | static void | ||
173 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 69 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
174 | unsigned long *sp, unsigned long bp, char *log_lvl) | 70 | unsigned long *sp, unsigned long bp, char *log_lvl) |
175 | { | 71 | { |
@@ -196,33 +92,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
196 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 92 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
197 | } | 93 | } |
198 | 94 | ||
199 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
200 | { | ||
201 | show_stack_log_lvl(task, NULL, sp, 0, ""); | ||
202 | } | ||
203 | |||
204 | /* | ||
205 | * The architecture-independent dump_stack generator | ||
206 | */ | ||
207 | void dump_stack(void) | ||
208 | { | ||
209 | unsigned long bp = 0; | ||
210 | unsigned long stack; | ||
211 | |||
212 | #ifdef CONFIG_FRAME_POINTER | ||
213 | if (!bp) | ||
214 | get_bp(bp); | ||
215 | #endif | ||
216 | |||
217 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
218 | current->pid, current->comm, print_tainted(), | ||
219 | init_utsname()->release, | ||
220 | (int)strcspn(init_utsname()->version, " "), | ||
221 | init_utsname()->version); | ||
222 | show_trace(NULL, NULL, &stack, bp); | ||
223 | } | ||
224 | |||
225 | EXPORT_SYMBOL(dump_stack); | ||
226 | 95 | ||
227 | void show_registers(struct pt_regs *regs) | 96 | void show_registers(struct pt_regs *regs) |
228 | { | 97 | { |
@@ -283,167 +152,3 @@ int is_valid_bugaddr(unsigned long ip) | |||
283 | return ud2 == 0x0b0f; | 152 | return ud2 == 0x0b0f; |
284 | } | 153 | } |
285 | 154 | ||
286 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
287 | static int die_owner = -1; | ||
288 | static unsigned int die_nest_count; | ||
289 | |||
290 | unsigned __kprobes long oops_begin(void) | ||
291 | { | ||
292 | unsigned long flags; | ||
293 | |||
294 | oops_enter(); | ||
295 | |||
296 | if (die_owner != raw_smp_processor_id()) { | ||
297 | console_verbose(); | ||
298 | raw_local_irq_save(flags); | ||
299 | __raw_spin_lock(&die_lock); | ||
300 | die_owner = smp_processor_id(); | ||
301 | die_nest_count = 0; | ||
302 | bust_spinlocks(1); | ||
303 | } else { | ||
304 | raw_local_irq_save(flags); | ||
305 | } | ||
306 | die_nest_count++; | ||
307 | return flags; | ||
308 | } | ||
309 | |||
310 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
311 | { | ||
312 | bust_spinlocks(0); | ||
313 | die_owner = -1; | ||
314 | add_taint(TAINT_DIE); | ||
315 | __raw_spin_unlock(&die_lock); | ||
316 | raw_local_irq_restore(flags); | ||
317 | |||
318 | if (!regs) | ||
319 | return; | ||
320 | |||
321 | if (kexec_should_crash(current)) | ||
322 | crash_kexec(regs); | ||
323 | if (in_interrupt()) | ||
324 | panic("Fatal exception in interrupt"); | ||
325 | if (panic_on_oops) | ||
326 | panic("Fatal exception"); | ||
327 | oops_exit(); | ||
328 | do_exit(signr); | ||
329 | } | ||
330 | |||
331 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
332 | { | ||
333 | unsigned short ss; | ||
334 | unsigned long sp; | ||
335 | |||
336 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
337 | #ifdef CONFIG_PREEMPT | ||
338 | printk("PREEMPT "); | ||
339 | #endif | ||
340 | #ifdef CONFIG_SMP | ||
341 | printk("SMP "); | ||
342 | #endif | ||
343 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
344 | printk("DEBUG_PAGEALLOC"); | ||
345 | #endif | ||
346 | printk("\n"); | ||
347 | sysfs_printk_last_file(); | ||
348 | if (notify_die(DIE_OOPS, str, regs, err, | ||
349 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
350 | return 1; | ||
351 | |||
352 | show_registers(regs); | ||
353 | /* Executive summary in case the oops scrolled away */ | ||
354 | sp = (unsigned long) (®s->sp); | ||
355 | savesegment(ss, ss); | ||
356 | if (user_mode(regs)) { | ||
357 | sp = regs->sp; | ||
358 | ss = regs->ss & 0xffff; | ||
359 | } | ||
360 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | ||
361 | print_symbol("%s", regs->ip); | ||
362 | printk(" SS:ESP %04x:%08lx\n", ss, sp); | ||
363 | return 0; | ||
364 | } | ||
365 | |||
366 | /* | ||
367 | * This is gone through when something in the kernel has done something bad | ||
368 | * and is about to be terminated: | ||
369 | */ | ||
370 | void die(const char *str, struct pt_regs *regs, long err) | ||
371 | { | ||
372 | unsigned long flags = oops_begin(); | ||
373 | |||
374 | if (die_nest_count < 3) { | ||
375 | report_bug(regs->ip, regs); | ||
376 | |||
377 | if (__die(str, regs, err)) | ||
378 | regs = NULL; | ||
379 | } else { | ||
380 | printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); | ||
381 | } | ||
382 | |||
383 | oops_end(flags, regs, SIGSEGV); | ||
384 | } | ||
385 | |||
386 | static DEFINE_SPINLOCK(nmi_print_lock); | ||
387 | |||
388 | void notrace __kprobes | ||
389 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
390 | { | ||
391 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
392 | return; | ||
393 | |||
394 | spin_lock(&nmi_print_lock); | ||
395 | /* | ||
396 | * We are in trouble anyway, lets at least try | ||
397 | * to get a message out: | ||
398 | */ | ||
399 | bust_spinlocks(1); | ||
400 | printk(KERN_EMERG "%s", str); | ||
401 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
402 | smp_processor_id(), regs->ip); | ||
403 | show_registers(regs); | ||
404 | if (do_panic) | ||
405 | panic("Non maskable interrupt"); | ||
406 | console_silent(); | ||
407 | spin_unlock(&nmi_print_lock); | ||
408 | |||
409 | /* | ||
410 | * If we are in kernel we are probably nested up pretty bad | ||
411 | * and might aswell get out now while we still can: | ||
412 | */ | ||
413 | if (!user_mode_vm(regs)) { | ||
414 | current->thread.trap_no = 2; | ||
415 | crash_kexec(regs); | ||
416 | } | ||
417 | |||
418 | bust_spinlocks(0); | ||
419 | do_exit(SIGSEGV); | ||
420 | } | ||
421 | |||
422 | static int __init oops_setup(char *s) | ||
423 | { | ||
424 | if (!s) | ||
425 | return -EINVAL; | ||
426 | if (!strcmp(s, "panic")) | ||
427 | panic_on_oops = 1; | ||
428 | return 0; | ||
429 | } | ||
430 | early_param("oops", oops_setup); | ||
431 | |||
432 | static int __init kstack_setup(char *s) | ||
433 | { | ||
434 | if (!s) | ||
435 | return -EINVAL; | ||
436 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
437 | return 0; | ||
438 | } | ||
439 | early_param("kstack", kstack_setup); | ||
440 | |||
441 | static int __init code_bytes_setup(char *s) | ||
442 | { | ||
443 | code_bytes = simple_strtoul(s, NULL, 0); | ||
444 | if (code_bytes > 8192) | ||
445 | code_bytes = 8192; | ||
446 | |||
447 | return 1; | ||
448 | } | ||
449 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 96a5db7da8a7..d35db5993fd6 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -17,19 +17,7 @@ | |||
17 | 17 | ||
18 | #include <asm/stacktrace.h> | 18 | #include <asm/stacktrace.h> |
19 | 19 | ||
20 | #define STACKSLOTS_PER_LINE 4 | 20 | #include "dumpstack.h" |
21 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | ||
22 | |||
23 | int panic_on_unrecovered_nmi; | ||
24 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | ||
25 | static unsigned int code_bytes = 64; | ||
26 | static int die_counter; | ||
27 | |||
28 | void printk_address(unsigned long address, int reliable) | ||
29 | { | ||
30 | printk(" [<%p>] %s%pS\n", (void *) address, | ||
31 | reliable ? "" : "? ", (void *) address); | ||
32 | } | ||
33 | 21 | ||
34 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 22 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
35 | unsigned *usedp, char **idp) | 23 | unsigned *usedp, char **idp) |
@@ -113,59 +101,16 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
113 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | 101 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack |
114 | */ | 102 | */ |
115 | 103 | ||
116 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
117 | void *p, unsigned int size, void *end) | ||
118 | { | ||
119 | void *t = tinfo; | ||
120 | if (end) { | ||
121 | if (p < end && p >= (end-THREAD_SIZE)) | ||
122 | return 1; | ||
123 | else | ||
124 | return 0; | ||
125 | } | ||
126 | return p > t && p < t + THREAD_SIZE - size; | ||
127 | } | ||
128 | |||
129 | /* The form of the top of the frame on the stack */ | ||
130 | struct stack_frame { | ||
131 | struct stack_frame *next_frame; | ||
132 | unsigned long return_address; | ||
133 | }; | ||
134 | |||
135 | static inline unsigned long | ||
136 | print_context_stack(struct thread_info *tinfo, | ||
137 | unsigned long *stack, unsigned long bp, | ||
138 | const struct stacktrace_ops *ops, void *data, | ||
139 | unsigned long *end) | ||
140 | { | ||
141 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
142 | |||
143 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
144 | unsigned long addr; | ||
145 | |||
146 | addr = *stack; | ||
147 | if (__kernel_text_address(addr)) { | ||
148 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
149 | ops->address(data, addr, 1); | ||
150 | frame = frame->next_frame; | ||
151 | bp = (unsigned long) frame; | ||
152 | } else { | ||
153 | ops->address(data, addr, bp == 0); | ||
154 | } | ||
155 | } | ||
156 | stack++; | ||
157 | } | ||
158 | return bp; | ||
159 | } | ||
160 | |||
161 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 104 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
162 | unsigned long *stack, unsigned long bp, | 105 | unsigned long *stack, unsigned long bp, |
163 | const struct stacktrace_ops *ops, void *data) | 106 | const struct stacktrace_ops *ops, void *data) |
164 | { | 107 | { |
165 | const unsigned cpu = get_cpu(); | 108 | const unsigned cpu = get_cpu(); |
166 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; | 109 | unsigned long *irq_stack_end = |
110 | (unsigned long *)per_cpu(irq_stack_ptr, cpu); | ||
167 | unsigned used = 0; | 111 | unsigned used = 0; |
168 | struct thread_info *tinfo; | 112 | struct thread_info *tinfo; |
113 | int graph = 0; | ||
169 | 114 | ||
170 | if (!task) | 115 | if (!task) |
171 | task = current; | 116 | task = current; |
@@ -206,7 +151,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
206 | break; | 151 | break; |
207 | 152 | ||
208 | bp = print_context_stack(tinfo, stack, bp, ops, | 153 | bp = print_context_stack(tinfo, stack, bp, ops, |
209 | data, estack_end); | 154 | data, estack_end, &graph); |
210 | ops->stack(data, "<EOE>"); | 155 | ops->stack(data, "<EOE>"); |
211 | /* | 156 | /* |
212 | * We link to the next stack via the | 157 | * We link to the next stack via the |
@@ -216,23 +161,23 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
216 | stack = (unsigned long *) estack_end[-2]; | 161 | stack = (unsigned long *) estack_end[-2]; |
217 | continue; | 162 | continue; |
218 | } | 163 | } |
219 | if (irqstack_end) { | 164 | if (irq_stack_end) { |
220 | unsigned long *irqstack; | 165 | unsigned long *irq_stack; |
221 | irqstack = irqstack_end - | 166 | irq_stack = irq_stack_end - |
222 | (IRQSTACKSIZE - 64) / sizeof(*irqstack); | 167 | (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); |
223 | 168 | ||
224 | if (stack >= irqstack && stack < irqstack_end) { | 169 | if (stack >= irq_stack && stack < irq_stack_end) { |
225 | if (ops->stack(data, "IRQ") < 0) | 170 | if (ops->stack(data, "IRQ") < 0) |
226 | break; | 171 | break; |
227 | bp = print_context_stack(tinfo, stack, bp, | 172 | bp = print_context_stack(tinfo, stack, bp, |
228 | ops, data, irqstack_end); | 173 | ops, data, irq_stack_end, &graph); |
229 | /* | 174 | /* |
230 | * We link to the next stack (which would be | 175 | * We link to the next stack (which would be |
231 | * the process stack normally) the last | 176 | * the process stack normally) the last |
232 | * pointer (index -1 to end) in the IRQ stack: | 177 | * pointer (index -1 to end) in the IRQ stack: |
233 | */ | 178 | */ |
234 | stack = (unsigned long *) (irqstack_end[-1]); | 179 | stack = (unsigned long *) (irq_stack_end[-1]); |
235 | irqstack_end = NULL; | 180 | irq_stack_end = NULL; |
236 | ops->stack(data, "EOI"); | 181 | ops->stack(data, "EOI"); |
237 | continue; | 182 | continue; |
238 | } | 183 | } |
@@ -243,72 +188,22 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
243 | /* | 188 | /* |
244 | * This handles the process stack: | 189 | * This handles the process stack: |
245 | */ | 190 | */ |
246 | bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); | 191 | bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph); |
247 | put_cpu(); | 192 | put_cpu(); |
248 | } | 193 | } |
249 | EXPORT_SYMBOL(dump_trace); | 194 | EXPORT_SYMBOL(dump_trace); |
250 | 195 | ||
251 | static void | 196 | void |
252 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
253 | { | ||
254 | printk(data); | ||
255 | print_symbol(msg, symbol); | ||
256 | printk("\n"); | ||
257 | } | ||
258 | |||
259 | static void print_trace_warning(void *data, char *msg) | ||
260 | { | ||
261 | printk("%s%s\n", (char *)data, msg); | ||
262 | } | ||
263 | |||
264 | static int print_trace_stack(void *data, char *name) | ||
265 | { | ||
266 | printk("%s <%s> ", (char *)data, name); | ||
267 | return 0; | ||
268 | } | ||
269 | |||
270 | /* | ||
271 | * Print one address/symbol entries per line. | ||
272 | */ | ||
273 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
274 | { | ||
275 | touch_nmi_watchdog(); | ||
276 | printk(data); | ||
277 | printk_address(addr, reliable); | ||
278 | } | ||
279 | |||
280 | static const struct stacktrace_ops print_trace_ops = { | ||
281 | .warning = print_trace_warning, | ||
282 | .warning_symbol = print_trace_warning_symbol, | ||
283 | .stack = print_trace_stack, | ||
284 | .address = print_trace_address, | ||
285 | }; | ||
286 | |||
287 | static void | ||
288 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
289 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
290 | { | ||
291 | printk("%sCall Trace:\n", log_lvl); | ||
292 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | ||
293 | } | ||
294 | |||
295 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
296 | unsigned long *stack, unsigned long bp) | ||
297 | { | ||
298 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
299 | } | ||
300 | |||
301 | static void | ||
302 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 197 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
303 | unsigned long *sp, unsigned long bp, char *log_lvl) | 198 | unsigned long *sp, unsigned long bp, char *log_lvl) |
304 | { | 199 | { |
305 | unsigned long *stack; | 200 | unsigned long *stack; |
306 | int i; | 201 | int i; |
307 | const int cpu = smp_processor_id(); | 202 | const int cpu = smp_processor_id(); |
308 | unsigned long *irqstack_end = | 203 | unsigned long *irq_stack_end = |
309 | (unsigned long *) (cpu_pda(cpu)->irqstackptr); | 204 | (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); |
310 | unsigned long *irqstack = | 205 | unsigned long *irq_stack = |
311 | (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); | 206 | (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); |
312 | 207 | ||
313 | /* | 208 | /* |
314 | * debugging aid: "show_stack(NULL, NULL);" prints the | 209 | * debugging aid: "show_stack(NULL, NULL);" prints the |
@@ -324,9 +219,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
324 | 219 | ||
325 | stack = sp; | 220 | stack = sp; |
326 | for (i = 0; i < kstack_depth_to_print; i++) { | 221 | for (i = 0; i < kstack_depth_to_print; i++) { |
327 | if (stack >= irqstack && stack <= irqstack_end) { | 222 | if (stack >= irq_stack && stack <= irq_stack_end) { |
328 | if (stack == irqstack_end) { | 223 | if (stack == irq_stack_end) { |
329 | stack = (unsigned long *) (irqstack_end[-1]); | 224 | stack = (unsigned long *) (irq_stack_end[-1]); |
330 | printk(" <EOI> "); | 225 | printk(" <EOI> "); |
331 | } | 226 | } |
332 | } else { | 227 | } else { |
@@ -342,39 +237,12 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
342 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 237 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
343 | } | 238 | } |
344 | 239 | ||
345 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
346 | { | ||
347 | show_stack_log_lvl(task, NULL, sp, 0, ""); | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * The architecture-independent dump_stack generator | ||
352 | */ | ||
353 | void dump_stack(void) | ||
354 | { | ||
355 | unsigned long bp = 0; | ||
356 | unsigned long stack; | ||
357 | |||
358 | #ifdef CONFIG_FRAME_POINTER | ||
359 | if (!bp) | ||
360 | get_bp(bp); | ||
361 | #endif | ||
362 | |||
363 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
364 | current->pid, current->comm, print_tainted(), | ||
365 | init_utsname()->release, | ||
366 | (int)strcspn(init_utsname()->version, " "), | ||
367 | init_utsname()->version); | ||
368 | show_trace(NULL, NULL, &stack, bp); | ||
369 | } | ||
370 | EXPORT_SYMBOL(dump_stack); | ||
371 | |||
372 | void show_registers(struct pt_regs *regs) | 240 | void show_registers(struct pt_regs *regs) |
373 | { | 241 | { |
374 | int i; | 242 | int i; |
375 | unsigned long sp; | 243 | unsigned long sp; |
376 | const int cpu = smp_processor_id(); | 244 | const int cpu = smp_processor_id(); |
377 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; | 245 | struct task_struct *cur = current; |
378 | 246 | ||
379 | sp = regs->sp; | 247 | sp = regs->sp; |
380 | printk("CPU %d ", cpu); | 248 | printk("CPU %d ", cpu); |
@@ -429,147 +297,3 @@ int is_valid_bugaddr(unsigned long ip) | |||
429 | return ud2 == 0x0b0f; | 297 | return ud2 == 0x0b0f; |
430 | } | 298 | } |
431 | 299 | ||
432 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
433 | static int die_owner = -1; | ||
434 | static unsigned int die_nest_count; | ||
435 | |||
436 | unsigned __kprobes long oops_begin(void) | ||
437 | { | ||
438 | int cpu; | ||
439 | unsigned long flags; | ||
440 | |||
441 | oops_enter(); | ||
442 | |||
443 | /* racy, but better than risking deadlock. */ | ||
444 | raw_local_irq_save(flags); | ||
445 | cpu = smp_processor_id(); | ||
446 | if (!__raw_spin_trylock(&die_lock)) { | ||
447 | if (cpu == die_owner) | ||
448 | /* nested oops. should stop eventually */; | ||
449 | else | ||
450 | __raw_spin_lock(&die_lock); | ||
451 | } | ||
452 | die_nest_count++; | ||
453 | die_owner = cpu; | ||
454 | console_verbose(); | ||
455 | bust_spinlocks(1); | ||
456 | return flags; | ||
457 | } | ||
458 | |||
459 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
460 | { | ||
461 | die_owner = -1; | ||
462 | bust_spinlocks(0); | ||
463 | die_nest_count--; | ||
464 | if (!die_nest_count) | ||
465 | /* Nest count reaches zero, release the lock. */ | ||
466 | __raw_spin_unlock(&die_lock); | ||
467 | raw_local_irq_restore(flags); | ||
468 | if (!regs) { | ||
469 | oops_exit(); | ||
470 | return; | ||
471 | } | ||
472 | if (in_interrupt()) | ||
473 | panic("Fatal exception in interrupt"); | ||
474 | if (panic_on_oops) | ||
475 | panic("Fatal exception"); | ||
476 | oops_exit(); | ||
477 | do_exit(signr); | ||
478 | } | ||
479 | |||
480 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
481 | { | ||
482 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
483 | #ifdef CONFIG_PREEMPT | ||
484 | printk("PREEMPT "); | ||
485 | #endif | ||
486 | #ifdef CONFIG_SMP | ||
487 | printk("SMP "); | ||
488 | #endif | ||
489 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
490 | printk("DEBUG_PAGEALLOC"); | ||
491 | #endif | ||
492 | printk("\n"); | ||
493 | sysfs_printk_last_file(); | ||
494 | if (notify_die(DIE_OOPS, str, regs, err, | ||
495 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
496 | return 1; | ||
497 | |||
498 | show_registers(regs); | ||
499 | add_taint(TAINT_DIE); | ||
500 | /* Executive summary in case the oops scrolled away */ | ||
501 | printk(KERN_ALERT "RIP "); | ||
502 | printk_address(regs->ip, 1); | ||
503 | printk(" RSP <%016lx>\n", regs->sp); | ||
504 | if (kexec_should_crash(current)) | ||
505 | crash_kexec(regs); | ||
506 | return 0; | ||
507 | } | ||
508 | |||
509 | void die(const char *str, struct pt_regs *regs, long err) | ||
510 | { | ||
511 | unsigned long flags = oops_begin(); | ||
512 | |||
513 | if (!user_mode(regs)) | ||
514 | report_bug(regs->ip, regs); | ||
515 | |||
516 | if (__die(str, regs, err)) | ||
517 | regs = NULL; | ||
518 | oops_end(flags, regs, SIGSEGV); | ||
519 | } | ||
520 | |||
521 | notrace __kprobes void | ||
522 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
523 | { | ||
524 | unsigned long flags; | ||
525 | |||
526 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
527 | return; | ||
528 | |||
529 | flags = oops_begin(); | ||
530 | /* | ||
531 | * We are in trouble anyway, lets at least try | ||
532 | * to get a message out. | ||
533 | */ | ||
534 | printk(KERN_EMERG "%s", str); | ||
535 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
536 | smp_processor_id(), regs->ip); | ||
537 | show_registers(regs); | ||
538 | if (kexec_should_crash(current)) | ||
539 | crash_kexec(regs); | ||
540 | if (do_panic || panic_on_oops) | ||
541 | panic("Non maskable interrupt"); | ||
542 | oops_end(flags, NULL, SIGBUS); | ||
543 | nmi_exit(); | ||
544 | local_irq_enable(); | ||
545 | do_exit(SIGBUS); | ||
546 | } | ||
547 | |||
548 | static int __init oops_setup(char *s) | ||
549 | { | ||
550 | if (!s) | ||
551 | return -EINVAL; | ||
552 | if (!strcmp(s, "panic")) | ||
553 | panic_on_oops = 1; | ||
554 | return 0; | ||
555 | } | ||
556 | early_param("oops", oops_setup); | ||
557 | |||
558 | static int __init kstack_setup(char *s) | ||
559 | { | ||
560 | if (!s) | ||
561 | return -EINVAL; | ||
562 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
563 | return 0; | ||
564 | } | ||
565 | early_param("kstack", kstack_setup); | ||
566 | |||
567 | static int __init code_bytes_setup(char *s) | ||
568 | { | ||
569 | code_bytes = simple_strtoul(s, NULL, 0); | ||
570 | if (code_bytes > 8192) | ||
571 | code_bytes = 8192; | ||
572 | |||
573 | return 1; | ||
574 | } | ||
575 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 7aafeb5263ef..e85826829cf2 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -665,6 +665,27 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn) | |||
665 | } | 665 | } |
666 | #endif | 666 | #endif |
667 | 667 | ||
668 | #ifdef CONFIG_HIBERNATION | ||
669 | /** | ||
670 | * Mark ACPI NVS memory region, so that we can save/restore it during | ||
671 | * hibernation and the subsequent resume. | ||
672 | */ | ||
673 | static int __init e820_mark_nvs_memory(void) | ||
674 | { | ||
675 | int i; | ||
676 | |||
677 | for (i = 0; i < e820.nr_map; i++) { | ||
678 | struct e820entry *ei = &e820.map[i]; | ||
679 | |||
680 | if (ei->type == E820_NVS) | ||
681 | hibernate_nvs_register(ei->addr, ei->size); | ||
682 | } | ||
683 | |||
684 | return 0; | ||
685 | } | ||
686 | core_initcall(e820_mark_nvs_memory); | ||
687 | #endif | ||
688 | |||
668 | /* | 689 | /* |
669 | * Early reserved memory areas. | 690 | * Early reserved memory areas. |
670 | */ | 691 | */ |
@@ -677,22 +698,6 @@ struct early_res { | |||
677 | }; | 698 | }; |
678 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { | 699 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { |
679 | { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ | 700 | { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ |
680 | #if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE) | ||
681 | { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, | ||
682 | #endif | ||
683 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) | ||
684 | /* | ||
685 | * But first pinch a few for the stack/trampoline stuff | ||
686 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
687 | * trampoline before removing it. (see the GDT stuff) | ||
688 | */ | ||
689 | { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" }, | ||
690 | /* | ||
691 | * Has to be in very low memory so we can execute | ||
692 | * real-mode AP code. | ||
693 | */ | ||
694 | { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" }, | ||
695 | #endif | ||
696 | {} | 701 | {} |
697 | }; | 702 | }; |
698 | 703 | ||
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 3ce029ffaa55..76b8cd953dee 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <asm/io_apic.h> | 17 | #include <asm/io_apic.h> |
18 | #include <asm/apic.h> | 18 | #include <asm/apic.h> |
19 | #include <asm/iommu.h> | 19 | #include <asm/iommu.h> |
20 | #include <asm/gart.h> | ||
20 | 21 | ||
21 | static void __init fix_hypertransport_config(int num, int slot, int func) | 22 | static void __init fix_hypertransport_config(int num, int slot, int func) |
22 | { | 23 | { |
@@ -188,20 +189,6 @@ static void __init ati_bugs_contd(int num, int slot, int func) | |||
188 | } | 189 | } |
189 | #endif | 190 | #endif |
190 | 191 | ||
191 | #ifdef CONFIG_DMAR | ||
192 | static void __init intel_g33_dmar(int num, int slot, int func) | ||
193 | { | ||
194 | struct acpi_table_header *dmar_tbl; | ||
195 | acpi_status status; | ||
196 | |||
197 | status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl); | ||
198 | if (ACPI_SUCCESS(status)) { | ||
199 | printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n"); | ||
200 | dmar_disabled = 1; | ||
201 | } | ||
202 | } | ||
203 | #endif | ||
204 | |||
205 | #define QFLAG_APPLY_ONCE 0x1 | 192 | #define QFLAG_APPLY_ONCE 0x1 |
206 | #define QFLAG_APPLIED 0x2 | 193 | #define QFLAG_APPLIED 0x2 |
207 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) | 194 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) |
@@ -214,6 +201,12 @@ struct chipset { | |||
214 | void (*f)(int num, int slot, int func); | 201 | void (*f)(int num, int slot, int func); |
215 | }; | 202 | }; |
216 | 203 | ||
204 | /* | ||
205 | * Only works for devices on the root bus. If you add any devices | ||
206 | * not on bus 0 readd another loop level in early_quirks(). But | ||
207 | * be careful because at least the Nvidia quirk here relies on | ||
208 | * only matching on bus 0. | ||
209 | */ | ||
217 | static struct chipset early_qrk[] __initdata = { | 210 | static struct chipset early_qrk[] __initdata = { |
218 | { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, | 211 | { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, |
219 | PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs }, | 212 | PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs }, |
@@ -225,10 +218,6 @@ static struct chipset early_qrk[] __initdata = { | |||
225 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, | 218 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, |
226 | { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, | 219 | { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, |
227 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, | 220 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, |
228 | #ifdef CONFIG_DMAR | ||
229 | { PCI_VENDOR_ID_INTEL, 0x29c0, | ||
230 | PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar }, | ||
231 | #endif | ||
232 | {} | 221 | {} |
233 | }; | 222 | }; |
234 | 223 | ||
@@ -284,17 +273,17 @@ static int __init check_dev_quirk(int num, int slot, int func) | |||
284 | 273 | ||
285 | void __init early_quirks(void) | 274 | void __init early_quirks(void) |
286 | { | 275 | { |
287 | int num, slot, func; | 276 | int slot, func; |
288 | 277 | ||
289 | if (!early_pci_allowed()) | 278 | if (!early_pci_allowed()) |
290 | return; | 279 | return; |
291 | 280 | ||
292 | /* Poor man's PCI discovery */ | 281 | /* Poor man's PCI discovery */ |
293 | for (num = 0; num < 32; num++) | 282 | /* Only scan the root bus */ |
294 | for (slot = 0; slot < 32; slot++) | 283 | for (slot = 0; slot < 32; slot++) |
295 | for (func = 0; func < 8; func++) { | 284 | for (func = 0; func < 8; func++) { |
296 | /* Only probe function 0 on single fn devices */ | 285 | /* Only probe function 0 on single fn devices */ |
297 | if (check_dev_quirk(num, slot, func)) | 286 | if (check_dev_quirk(0, slot, func)) |
298 | break; | 287 | break; |
299 | } | 288 | } |
300 | } | 289 | } |
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 34ad997d3834..639ad98238a2 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
@@ -13,8 +13,8 @@ | |||
13 | #include <asm/setup.h> | 13 | #include <asm/setup.h> |
14 | #include <xen/hvc-console.h> | 14 | #include <xen/hvc-console.h> |
15 | #include <asm/pci-direct.h> | 15 | #include <asm/pci-direct.h> |
16 | #include <asm/pgtable.h> | ||
17 | #include <asm/fixmap.h> | 16 | #include <asm/fixmap.h> |
17 | #include <asm/pgtable.h> | ||
18 | #include <linux/usb/ehci_def.h> | 18 | #include <linux/usb/ehci_def.h> |
19 | 19 | ||
20 | /* Simple VGA output */ | 20 | /* Simple VGA output */ |
@@ -875,49 +875,6 @@ static struct console early_dbgp_console = { | |||
875 | }; | 875 | }; |
876 | #endif | 876 | #endif |
877 | 877 | ||
878 | /* Console interface to a host file on AMD's SimNow! */ | ||
879 | |||
880 | static int simnow_fd; | ||
881 | |||
882 | enum { | ||
883 | MAGIC1 = 0xBACCD00A, | ||
884 | MAGIC2 = 0xCA110000, | ||
885 | XOPEN = 5, | ||
886 | XWRITE = 4, | ||
887 | }; | ||
888 | |||
889 | static noinline long simnow(long cmd, long a, long b, long c) | ||
890 | { | ||
891 | long ret; | ||
892 | |||
893 | asm volatile("cpuid" : | ||
894 | "=a" (ret) : | ||
895 | "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2)); | ||
896 | return ret; | ||
897 | } | ||
898 | |||
899 | static void __init simnow_init(char *str) | ||
900 | { | ||
901 | char *fn = "klog"; | ||
902 | |||
903 | if (*str == '=') | ||
904 | fn = ++str; | ||
905 | /* error ignored */ | ||
906 | simnow_fd = simnow(XOPEN, (unsigned long)fn, O_WRONLY|O_APPEND|O_CREAT, 0644); | ||
907 | } | ||
908 | |||
909 | static void simnow_write(struct console *con, const char *s, unsigned n) | ||
910 | { | ||
911 | simnow(XWRITE, simnow_fd, (unsigned long)s, n); | ||
912 | } | ||
913 | |||
914 | static struct console simnow_console = { | ||
915 | .name = "simnow", | ||
916 | .write = simnow_write, | ||
917 | .flags = CON_PRINTBUFFER, | ||
918 | .index = -1, | ||
919 | }; | ||
920 | |||
921 | /* Direct interface for emergencies */ | 878 | /* Direct interface for emergencies */ |
922 | static struct console *early_console = &early_vga_console; | 879 | static struct console *early_console = &early_vga_console; |
923 | static int __initdata early_console_initialized; | 880 | static int __initdata early_console_initialized; |
@@ -929,7 +886,7 @@ asmlinkage void early_printk(const char *fmt, ...) | |||
929 | va_list ap; | 886 | va_list ap; |
930 | 887 | ||
931 | va_start(ap, fmt); | 888 | va_start(ap, fmt); |
932 | n = vscnprintf(buf, 512, fmt, ap); | 889 | n = vscnprintf(buf, sizeof(buf), fmt, ap); |
933 | early_console->write(early_console, buf, n); | 890 | early_console->write(early_console, buf, n); |
934 | va_end(ap); | 891 | va_end(ap); |
935 | } | 892 | } |
@@ -960,10 +917,6 @@ static int __init setup_early_printk(char *buf) | |||
960 | max_ypos = boot_params.screen_info.orig_video_lines; | 917 | max_ypos = boot_params.screen_info.orig_video_lines; |
961 | current_ypos = boot_params.screen_info.orig_y; | 918 | current_ypos = boot_params.screen_info.orig_y; |
962 | early_console = &early_vga_console; | 919 | early_console = &early_vga_console; |
963 | } else if (!strncmp(buf, "simnow", 6)) { | ||
964 | simnow_init(buf + 6); | ||
965 | early_console = &simnow_console; | ||
966 | keep_early = 1; | ||
967 | #ifdef CONFIG_EARLY_PRINTK_DBGP | 920 | #ifdef CONFIG_EARLY_PRINTK_DBGP |
968 | } else if (!strncmp(buf, "dbgp", 4)) { | 921 | } else if (!strncmp(buf, "dbgp", 4)) { |
969 | if (early_dbgp_init(buf+4) < 0) | 922 | if (early_dbgp_init(buf+4) < 0) |
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 1119d247fe11..b205272ad394 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c | |||
@@ -366,10 +366,12 @@ void __init efi_init(void) | |||
366 | SMBIOS_TABLE_GUID)) { | 366 | SMBIOS_TABLE_GUID)) { |
367 | efi.smbios = config_tables[i].table; | 367 | efi.smbios = config_tables[i].table; |
368 | printk(" SMBIOS=0x%lx ", config_tables[i].table); | 368 | printk(" SMBIOS=0x%lx ", config_tables[i].table); |
369 | #ifdef CONFIG_X86_UV | ||
369 | } else if (!efi_guidcmp(config_tables[i].guid, | 370 | } else if (!efi_guidcmp(config_tables[i].guid, |
370 | UV_SYSTEM_TABLE_GUID)) { | 371 | UV_SYSTEM_TABLE_GUID)) { |
371 | efi.uv_systab = config_tables[i].table; | 372 | efi.uv_systab = config_tables[i].table; |
372 | printk(" UVsystab=0x%lx ", config_tables[i].table); | 373 | printk(" UVsystab=0x%lx ", config_tables[i].table); |
374 | #endif | ||
373 | } else if (!efi_guidcmp(config_tables[i].guid, | 375 | } else if (!efi_guidcmp(config_tables[i].guid, |
374 | HCDP_TABLE_GUID)) { | 376 | HCDP_TABLE_GUID)) { |
375 | efi.hcdp = config_tables[i].table; | 377 | efi.hcdp = config_tables[i].table; |
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index 652c5287215f..a4ee29127fdf 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <asm/proto.h> | 36 | #include <asm/proto.h> |
37 | #include <asm/efi.h> | 37 | #include <asm/efi.h> |
38 | #include <asm/cacheflush.h> | 38 | #include <asm/cacheflush.h> |
39 | #include <asm/fixmap.h> | ||
39 | 40 | ||
40 | static pgd_t save_pgd __initdata; | 41 | static pgd_t save_pgd __initdata; |
41 | static unsigned long efi_flags __initdata; | 42 | static unsigned long efi_flags __initdata; |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 28b597ef9ca1..e99206831459 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -30,12 +30,13 @@ | |||
30 | * 1C(%esp) - %ds | 30 | * 1C(%esp) - %ds |
31 | * 20(%esp) - %es | 31 | * 20(%esp) - %es |
32 | * 24(%esp) - %fs | 32 | * 24(%esp) - %fs |
33 | * 28(%esp) - orig_eax | 33 | * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS |
34 | * 2C(%esp) - %eip | 34 | * 2C(%esp) - orig_eax |
35 | * 30(%esp) - %cs | 35 | * 30(%esp) - %eip |
36 | * 34(%esp) - %eflags | 36 | * 34(%esp) - %cs |
37 | * 38(%esp) - %oldesp | 37 | * 38(%esp) - %eflags |
38 | * 3C(%esp) - %oldss | 38 | * 3C(%esp) - %oldesp |
39 | * 40(%esp) - %oldss | ||
39 | * | 40 | * |
40 | * "current" is in register %ebx during any slow entries. | 41 | * "current" is in register %ebx during any slow entries. |
41 | */ | 42 | */ |
@@ -101,121 +102,221 @@ | |||
101 | #define resume_userspace_sig resume_userspace | 102 | #define resume_userspace_sig resume_userspace |
102 | #endif | 103 | #endif |
103 | 104 | ||
104 | #define SAVE_ALL \ | 105 | /* |
105 | cld; \ | 106 | * User gs save/restore |
106 | pushl %fs; \ | 107 | * |
107 | CFI_ADJUST_CFA_OFFSET 4;\ | 108 | * %gs is used for userland TLS and kernel only uses it for stack |
108 | /*CFI_REL_OFFSET fs, 0;*/\ | 109 | * canary which is required to be at %gs:20 by gcc. Read the comment |
109 | pushl %es; \ | 110 | * at the top of stackprotector.h for more info. |
110 | CFI_ADJUST_CFA_OFFSET 4;\ | 111 | * |
111 | /*CFI_REL_OFFSET es, 0;*/\ | 112 | * Local labels 98 and 99 are used. |
112 | pushl %ds; \ | 113 | */ |
113 | CFI_ADJUST_CFA_OFFSET 4;\ | 114 | #ifdef CONFIG_X86_32_LAZY_GS |
114 | /*CFI_REL_OFFSET ds, 0;*/\ | 115 | |
115 | pushl %eax; \ | 116 | /* unfortunately push/pop can't be no-op */ |
116 | CFI_ADJUST_CFA_OFFSET 4;\ | 117 | .macro PUSH_GS |
117 | CFI_REL_OFFSET eax, 0;\ | 118 | pushl $0 |
118 | pushl %ebp; \ | 119 | CFI_ADJUST_CFA_OFFSET 4 |
119 | CFI_ADJUST_CFA_OFFSET 4;\ | 120 | .endm |
120 | CFI_REL_OFFSET ebp, 0;\ | 121 | .macro POP_GS pop=0 |
121 | pushl %edi; \ | 122 | addl $(4 + \pop), %esp |
122 | CFI_ADJUST_CFA_OFFSET 4;\ | 123 | CFI_ADJUST_CFA_OFFSET -(4 + \pop) |
123 | CFI_REL_OFFSET edi, 0;\ | 124 | .endm |
124 | pushl %esi; \ | 125 | .macro POP_GS_EX |
125 | CFI_ADJUST_CFA_OFFSET 4;\ | 126 | .endm |
126 | CFI_REL_OFFSET esi, 0;\ | 127 | |
127 | pushl %edx; \ | 128 | /* all the rest are no-op */ |
128 | CFI_ADJUST_CFA_OFFSET 4;\ | 129 | .macro PTGS_TO_GS |
129 | CFI_REL_OFFSET edx, 0;\ | 130 | .endm |
130 | pushl %ecx; \ | 131 | .macro PTGS_TO_GS_EX |
131 | CFI_ADJUST_CFA_OFFSET 4;\ | 132 | .endm |
132 | CFI_REL_OFFSET ecx, 0;\ | 133 | .macro GS_TO_REG reg |
133 | pushl %ebx; \ | 134 | .endm |
134 | CFI_ADJUST_CFA_OFFSET 4;\ | 135 | .macro REG_TO_PTGS reg |
135 | CFI_REL_OFFSET ebx, 0;\ | 136 | .endm |
136 | movl $(__USER_DS), %edx; \ | 137 | .macro SET_KERNEL_GS reg |
137 | movl %edx, %ds; \ | 138 | .endm |
138 | movl %edx, %es; \ | 139 | |
139 | movl $(__KERNEL_PERCPU), %edx; \ | 140 | #else /* CONFIG_X86_32_LAZY_GS */ |
141 | |||
142 | .macro PUSH_GS | ||
143 | pushl %gs | ||
144 | CFI_ADJUST_CFA_OFFSET 4 | ||
145 | /*CFI_REL_OFFSET gs, 0*/ | ||
146 | .endm | ||
147 | |||
148 | .macro POP_GS pop=0 | ||
149 | 98: popl %gs | ||
150 | CFI_ADJUST_CFA_OFFSET -4 | ||
151 | /*CFI_RESTORE gs*/ | ||
152 | .if \pop <> 0 | ||
153 | add $\pop, %esp | ||
154 | CFI_ADJUST_CFA_OFFSET -\pop | ||
155 | .endif | ||
156 | .endm | ||
157 | .macro POP_GS_EX | ||
158 | .pushsection .fixup, "ax" | ||
159 | 99: movl $0, (%esp) | ||
160 | jmp 98b | ||
161 | .section __ex_table, "a" | ||
162 | .align 4 | ||
163 | .long 98b, 99b | ||
164 | .popsection | ||
165 | .endm | ||
166 | |||
167 | .macro PTGS_TO_GS | ||
168 | 98: mov PT_GS(%esp), %gs | ||
169 | .endm | ||
170 | .macro PTGS_TO_GS_EX | ||
171 | .pushsection .fixup, "ax" | ||
172 | 99: movl $0, PT_GS(%esp) | ||
173 | jmp 98b | ||
174 | .section __ex_table, "a" | ||
175 | .align 4 | ||
176 | .long 98b, 99b | ||
177 | .popsection | ||
178 | .endm | ||
179 | |||
180 | .macro GS_TO_REG reg | ||
181 | movl %gs, \reg | ||
182 | /*CFI_REGISTER gs, \reg*/ | ||
183 | .endm | ||
184 | .macro REG_TO_PTGS reg | ||
185 | movl \reg, PT_GS(%esp) | ||
186 | /*CFI_REL_OFFSET gs, PT_GS*/ | ||
187 | .endm | ||
188 | .macro SET_KERNEL_GS reg | ||
189 | movl $(__KERNEL_STACK_CANARY), \reg | ||
190 | movl \reg, %gs | ||
191 | .endm | ||
192 | |||
193 | #endif /* CONFIG_X86_32_LAZY_GS */ | ||
194 | |||
195 | .macro SAVE_ALL | ||
196 | cld | ||
197 | PUSH_GS | ||
198 | pushl %fs | ||
199 | CFI_ADJUST_CFA_OFFSET 4 | ||
200 | /*CFI_REL_OFFSET fs, 0;*/ | ||
201 | pushl %es | ||
202 | CFI_ADJUST_CFA_OFFSET 4 | ||
203 | /*CFI_REL_OFFSET es, 0;*/ | ||
204 | pushl %ds | ||
205 | CFI_ADJUST_CFA_OFFSET 4 | ||
206 | /*CFI_REL_OFFSET ds, 0;*/ | ||
207 | pushl %eax | ||
208 | CFI_ADJUST_CFA_OFFSET 4 | ||
209 | CFI_REL_OFFSET eax, 0 | ||
210 | pushl %ebp | ||
211 | CFI_ADJUST_CFA_OFFSET 4 | ||
212 | CFI_REL_OFFSET ebp, 0 | ||
213 | pushl %edi | ||
214 | CFI_ADJUST_CFA_OFFSET 4 | ||
215 | CFI_REL_OFFSET edi, 0 | ||
216 | pushl %esi | ||
217 | CFI_ADJUST_CFA_OFFSET 4 | ||
218 | CFI_REL_OFFSET esi, 0 | ||
219 | pushl %edx | ||
220 | CFI_ADJUST_CFA_OFFSET 4 | ||
221 | CFI_REL_OFFSET edx, 0 | ||
222 | pushl %ecx | ||
223 | CFI_ADJUST_CFA_OFFSET 4 | ||
224 | CFI_REL_OFFSET ecx, 0 | ||
225 | pushl %ebx | ||
226 | CFI_ADJUST_CFA_OFFSET 4 | ||
227 | CFI_REL_OFFSET ebx, 0 | ||
228 | movl $(__USER_DS), %edx | ||
229 | movl %edx, %ds | ||
230 | movl %edx, %es | ||
231 | movl $(__KERNEL_PERCPU), %edx | ||
140 | movl %edx, %fs | 232 | movl %edx, %fs |
233 | SET_KERNEL_GS %edx | ||
234 | .endm | ||
141 | 235 | ||
142 | #define RESTORE_INT_REGS \ | 236 | .macro RESTORE_INT_REGS |
143 | popl %ebx; \ | 237 | popl %ebx |
144 | CFI_ADJUST_CFA_OFFSET -4;\ | 238 | CFI_ADJUST_CFA_OFFSET -4 |
145 | CFI_RESTORE ebx;\ | 239 | CFI_RESTORE ebx |
146 | popl %ecx; \ | 240 | popl %ecx |
147 | CFI_ADJUST_CFA_OFFSET -4;\ | 241 | CFI_ADJUST_CFA_OFFSET -4 |
148 | CFI_RESTORE ecx;\ | 242 | CFI_RESTORE ecx |
149 | popl %edx; \ | 243 | popl %edx |
150 | CFI_ADJUST_CFA_OFFSET -4;\ | 244 | CFI_ADJUST_CFA_OFFSET -4 |
151 | CFI_RESTORE edx;\ | 245 | CFI_RESTORE edx |
152 | popl %esi; \ | 246 | popl %esi |
153 | CFI_ADJUST_CFA_OFFSET -4;\ | 247 | CFI_ADJUST_CFA_OFFSET -4 |
154 | CFI_RESTORE esi;\ | 248 | CFI_RESTORE esi |
155 | popl %edi; \ | 249 | popl %edi |
156 | CFI_ADJUST_CFA_OFFSET -4;\ | 250 | CFI_ADJUST_CFA_OFFSET -4 |
157 | CFI_RESTORE edi;\ | 251 | CFI_RESTORE edi |
158 | popl %ebp; \ | 252 | popl %ebp |
159 | CFI_ADJUST_CFA_OFFSET -4;\ | 253 | CFI_ADJUST_CFA_OFFSET -4 |
160 | CFI_RESTORE ebp;\ | 254 | CFI_RESTORE ebp |
161 | popl %eax; \ | 255 | popl %eax |
162 | CFI_ADJUST_CFA_OFFSET -4;\ | 256 | CFI_ADJUST_CFA_OFFSET -4 |
163 | CFI_RESTORE eax | 257 | CFI_RESTORE eax |
258 | .endm | ||
164 | 259 | ||
165 | #define RESTORE_REGS \ | 260 | .macro RESTORE_REGS pop=0 |
166 | RESTORE_INT_REGS; \ | 261 | RESTORE_INT_REGS |
167 | 1: popl %ds; \ | 262 | 1: popl %ds |
168 | CFI_ADJUST_CFA_OFFSET -4;\ | 263 | CFI_ADJUST_CFA_OFFSET -4 |
169 | /*CFI_RESTORE ds;*/\ | 264 | /*CFI_RESTORE ds;*/ |
170 | 2: popl %es; \ | 265 | 2: popl %es |
171 | CFI_ADJUST_CFA_OFFSET -4;\ | 266 | CFI_ADJUST_CFA_OFFSET -4 |
172 | /*CFI_RESTORE es;*/\ | 267 | /*CFI_RESTORE es;*/ |
173 | 3: popl %fs; \ | 268 | 3: popl %fs |
174 | CFI_ADJUST_CFA_OFFSET -4;\ | 269 | CFI_ADJUST_CFA_OFFSET -4 |
175 | /*CFI_RESTORE fs;*/\ | 270 | /*CFI_RESTORE fs;*/ |
176 | .pushsection .fixup,"ax"; \ | 271 | POP_GS \pop |
177 | 4: movl $0,(%esp); \ | 272 | .pushsection .fixup, "ax" |
178 | jmp 1b; \ | 273 | 4: movl $0, (%esp) |
179 | 5: movl $0,(%esp); \ | 274 | jmp 1b |
180 | jmp 2b; \ | 275 | 5: movl $0, (%esp) |
181 | 6: movl $0,(%esp); \ | 276 | jmp 2b |
182 | jmp 3b; \ | 277 | 6: movl $0, (%esp) |
183 | .section __ex_table,"a";\ | 278 | jmp 3b |
184 | .align 4; \ | 279 | .section __ex_table, "a" |
185 | .long 1b,4b; \ | 280 | .align 4 |
186 | .long 2b,5b; \ | 281 | .long 1b, 4b |
187 | .long 3b,6b; \ | 282 | .long 2b, 5b |
283 | .long 3b, 6b | ||
188 | .popsection | 284 | .popsection |
285 | POP_GS_EX | ||
286 | .endm | ||
189 | 287 | ||
190 | #define RING0_INT_FRAME \ | 288 | .macro RING0_INT_FRAME |
191 | CFI_STARTPROC simple;\ | 289 | CFI_STARTPROC simple |
192 | CFI_SIGNAL_FRAME;\ | 290 | CFI_SIGNAL_FRAME |
193 | CFI_DEF_CFA esp, 3*4;\ | 291 | CFI_DEF_CFA esp, 3*4 |
194 | /*CFI_OFFSET cs, -2*4;*/\ | 292 | /*CFI_OFFSET cs, -2*4;*/ |
195 | CFI_OFFSET eip, -3*4 | 293 | CFI_OFFSET eip, -3*4 |
294 | .endm | ||
196 | 295 | ||
197 | #define RING0_EC_FRAME \ | 296 | .macro RING0_EC_FRAME |
198 | CFI_STARTPROC simple;\ | 297 | CFI_STARTPROC simple |
199 | CFI_SIGNAL_FRAME;\ | 298 | CFI_SIGNAL_FRAME |
200 | CFI_DEF_CFA esp, 4*4;\ | 299 | CFI_DEF_CFA esp, 4*4 |
201 | /*CFI_OFFSET cs, -2*4;*/\ | 300 | /*CFI_OFFSET cs, -2*4;*/ |
202 | CFI_OFFSET eip, -3*4 | 301 | CFI_OFFSET eip, -3*4 |
302 | .endm | ||
203 | 303 | ||
204 | #define RING0_PTREGS_FRAME \ | 304 | .macro RING0_PTREGS_FRAME |
205 | CFI_STARTPROC simple;\ | 305 | CFI_STARTPROC simple |
206 | CFI_SIGNAL_FRAME;\ | 306 | CFI_SIGNAL_FRAME |
207 | CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ | 307 | CFI_DEF_CFA esp, PT_OLDESP-PT_EBX |
208 | /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ | 308 | /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/ |
209 | CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ | 309 | CFI_OFFSET eip, PT_EIP-PT_OLDESP |
210 | /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ | 310 | /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/ |
211 | /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ | 311 | /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/ |
212 | CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ | 312 | CFI_OFFSET eax, PT_EAX-PT_OLDESP |
213 | CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ | 313 | CFI_OFFSET ebp, PT_EBP-PT_OLDESP |
214 | CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ | 314 | CFI_OFFSET edi, PT_EDI-PT_OLDESP |
215 | CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ | 315 | CFI_OFFSET esi, PT_ESI-PT_OLDESP |
216 | CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ | 316 | CFI_OFFSET edx, PT_EDX-PT_OLDESP |
217 | CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ | 317 | CFI_OFFSET ecx, PT_ECX-PT_OLDESP |
218 | CFI_OFFSET ebx, PT_EBX-PT_OLDESP | 318 | CFI_OFFSET ebx, PT_EBX-PT_OLDESP |
319 | .endm | ||
219 | 320 | ||
220 | ENTRY(ret_from_fork) | 321 | ENTRY(ret_from_fork) |
221 | CFI_STARTPROC | 322 | CFI_STARTPROC |
@@ -362,6 +463,7 @@ sysenter_exit: | |||
362 | xorl %ebp,%ebp | 463 | xorl %ebp,%ebp |
363 | TRACE_IRQS_ON | 464 | TRACE_IRQS_ON |
364 | 1: mov PT_FS(%esp), %fs | 465 | 1: mov PT_FS(%esp), %fs |
466 | PTGS_TO_GS | ||
365 | ENABLE_INTERRUPTS_SYSEXIT | 467 | ENABLE_INTERRUPTS_SYSEXIT |
366 | 468 | ||
367 | #ifdef CONFIG_AUDITSYSCALL | 469 | #ifdef CONFIG_AUDITSYSCALL |
@@ -410,6 +512,7 @@ sysexit_audit: | |||
410 | .align 4 | 512 | .align 4 |
411 | .long 1b,2b | 513 | .long 1b,2b |
412 | .popsection | 514 | .popsection |
515 | PTGS_TO_GS_EX | ||
413 | ENDPROC(ia32_sysenter_target) | 516 | ENDPROC(ia32_sysenter_target) |
414 | 517 | ||
415 | # system call handler stub | 518 | # system call handler stub |
@@ -452,8 +555,7 @@ restore_all: | |||
452 | restore_nocheck: | 555 | restore_nocheck: |
453 | TRACE_IRQS_IRET | 556 | TRACE_IRQS_IRET |
454 | restore_nocheck_notrace: | 557 | restore_nocheck_notrace: |
455 | RESTORE_REGS | 558 | RESTORE_REGS 4 # skip orig_eax/error_code |
456 | addl $4, %esp # skip orig_eax/error_code | ||
457 | CFI_ADJUST_CFA_OFFSET -4 | 559 | CFI_ADJUST_CFA_OFFSET -4 |
458 | irq_return: | 560 | irq_return: |
459 | INTERRUPT_RETURN | 561 | INTERRUPT_RETURN |
@@ -595,52 +697,83 @@ syscall_badsys: | |||
595 | END(syscall_badsys) | 697 | END(syscall_badsys) |
596 | CFI_ENDPROC | 698 | CFI_ENDPROC |
597 | 699 | ||
598 | #define FIXUP_ESPFIX_STACK \ | 700 | /* |
599 | /* since we are on a wrong stack, we cant make it a C code :( */ \ | 701 | * System calls that need a pt_regs pointer. |
600 | PER_CPU(gdt_page, %ebx); \ | 702 | */ |
601 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ | 703 | #define PTREGSCALL(name) \ |
602 | addl %esp, %eax; \ | 704 | ALIGN; \ |
603 | pushl $__KERNEL_DS; \ | 705 | ptregs_##name: \ |
604 | CFI_ADJUST_CFA_OFFSET 4; \ | 706 | leal 4(%esp),%eax; \ |
605 | pushl %eax; \ | 707 | jmp sys_##name; |
606 | CFI_ADJUST_CFA_OFFSET 4; \ | 708 | |
607 | lss (%esp), %esp; \ | 709 | PTREGSCALL(iopl) |
608 | CFI_ADJUST_CFA_OFFSET -8; | 710 | PTREGSCALL(fork) |
609 | #define UNWIND_ESPFIX_STACK \ | 711 | PTREGSCALL(clone) |
610 | movl %ss, %eax; \ | 712 | PTREGSCALL(vfork) |
611 | /* see if on espfix stack */ \ | 713 | PTREGSCALL(execve) |
612 | cmpw $__ESPFIX_SS, %ax; \ | 714 | PTREGSCALL(sigaltstack) |
613 | jne 27f; \ | 715 | PTREGSCALL(sigreturn) |
614 | movl $__KERNEL_DS, %eax; \ | 716 | PTREGSCALL(rt_sigreturn) |
615 | movl %eax, %ds; \ | 717 | PTREGSCALL(vm86) |
616 | movl %eax, %es; \ | 718 | PTREGSCALL(vm86old) |
617 | /* switch to normal stack */ \ | 719 | |
618 | FIXUP_ESPFIX_STACK; \ | 720 | .macro FIXUP_ESPFIX_STACK |
619 | 27:; | 721 | /* since we are on a wrong stack, we cant make it a C code :( */ |
722 | PER_CPU(gdt_page, %ebx) | ||
723 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) | ||
724 | addl %esp, %eax | ||
725 | pushl $__KERNEL_DS | ||
726 | CFI_ADJUST_CFA_OFFSET 4 | ||
727 | pushl %eax | ||
728 | CFI_ADJUST_CFA_OFFSET 4 | ||
729 | lss (%esp), %esp | ||
730 | CFI_ADJUST_CFA_OFFSET -8 | ||
731 | .endm | ||
732 | .macro UNWIND_ESPFIX_STACK | ||
733 | movl %ss, %eax | ||
734 | /* see if on espfix stack */ | ||
735 | cmpw $__ESPFIX_SS, %ax | ||
736 | jne 27f | ||
737 | movl $__KERNEL_DS, %eax | ||
738 | movl %eax, %ds | ||
739 | movl %eax, %es | ||
740 | /* switch to normal stack */ | ||
741 | FIXUP_ESPFIX_STACK | ||
742 | 27: | ||
743 | .endm | ||
620 | 744 | ||
621 | /* | 745 | /* |
622 | * Build the entry stubs and pointer table with | 746 | * Build the entry stubs and pointer table with some assembler magic. |
623 | * some assembler magic. | 747 | * We pack 7 stubs into a single 32-byte chunk, which will fit in a |
748 | * single cache line on all modern x86 implementations. | ||
624 | */ | 749 | */ |
625 | .section .rodata,"a" | 750 | .section .init.rodata,"a" |
626 | ENTRY(interrupt) | 751 | ENTRY(interrupt) |
627 | .text | 752 | .text |
628 | 753 | .p2align 5 | |
754 | .p2align CONFIG_X86_L1_CACHE_SHIFT | ||
629 | ENTRY(irq_entries_start) | 755 | ENTRY(irq_entries_start) |
630 | RING0_INT_FRAME | 756 | RING0_INT_FRAME |
631 | vector=0 | 757 | vector=FIRST_EXTERNAL_VECTOR |
632 | .rept NR_VECTORS | 758 | .rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 |
633 | ALIGN | 759 | .balign 32 |
634 | .if vector | 760 | .rept 7 |
761 | .if vector < NR_VECTORS | ||
762 | .if vector <> FIRST_EXTERNAL_VECTOR | ||
635 | CFI_ADJUST_CFA_OFFSET -4 | 763 | CFI_ADJUST_CFA_OFFSET -4 |
636 | .endif | 764 | .endif |
637 | 1: pushl $~(vector) | 765 | 1: pushl $(~vector+0x80) /* Note: always in signed byte range */ |
638 | CFI_ADJUST_CFA_OFFSET 4 | 766 | CFI_ADJUST_CFA_OFFSET 4 |
639 | jmp common_interrupt | 767 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 |
640 | .previous | 768 | jmp 2f |
769 | .endif | ||
770 | .previous | ||
641 | .long 1b | 771 | .long 1b |
642 | .text | 772 | .text |
643 | vector=vector+1 | 773 | vector=vector+1 |
774 | .endif | ||
775 | .endr | ||
776 | 2: jmp common_interrupt | ||
644 | .endr | 777 | .endr |
645 | END(irq_entries_start) | 778 | END(irq_entries_start) |
646 | 779 | ||
@@ -652,8 +785,9 @@ END(interrupt) | |||
652 | * the CPU automatically disables interrupts when executing an IRQ vector, | 785 | * the CPU automatically disables interrupts when executing an IRQ vector, |
653 | * so IRQ-flags tracing has to follow that: | 786 | * so IRQ-flags tracing has to follow that: |
654 | */ | 787 | */ |
655 | ALIGN | 788 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
656 | common_interrupt: | 789 | common_interrupt: |
790 | addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ | ||
657 | SAVE_ALL | 791 | SAVE_ALL |
658 | TRACE_IRQS_OFF | 792 | TRACE_IRQS_OFF |
659 | movl %esp,%eax | 793 | movl %esp,%eax |
@@ -662,7 +796,7 @@ common_interrupt: | |||
662 | ENDPROC(common_interrupt) | 796 | ENDPROC(common_interrupt) |
663 | CFI_ENDPROC | 797 | CFI_ENDPROC |
664 | 798 | ||
665 | #define BUILD_INTERRUPT(name, nr) \ | 799 | #define BUILD_INTERRUPT3(name, nr, fn) \ |
666 | ENTRY(name) \ | 800 | ENTRY(name) \ |
667 | RING0_INT_FRAME; \ | 801 | RING0_INT_FRAME; \ |
668 | pushl $~(nr); \ | 802 | pushl $~(nr); \ |
@@ -670,72 +804,15 @@ ENTRY(name) \ | |||
670 | SAVE_ALL; \ | 804 | SAVE_ALL; \ |
671 | TRACE_IRQS_OFF \ | 805 | TRACE_IRQS_OFF \ |
672 | movl %esp,%eax; \ | 806 | movl %esp,%eax; \ |
673 | call smp_##name; \ | 807 | call fn; \ |
674 | jmp ret_from_intr; \ | 808 | jmp ret_from_intr; \ |
675 | CFI_ENDPROC; \ | 809 | CFI_ENDPROC; \ |
676 | ENDPROC(name) | 810 | ENDPROC(name) |
677 | 811 | ||
678 | /* The include is where all of the SMP etc. interrupts come from */ | 812 | #define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) |
679 | #include "entry_arch.h" | ||
680 | 813 | ||
681 | KPROBE_ENTRY(page_fault) | 814 | /* The include is where all of the SMP etc. interrupts come from */ |
682 | RING0_EC_FRAME | 815 | #include <asm/entry_arch.h> |
683 | pushl $do_page_fault | ||
684 | CFI_ADJUST_CFA_OFFSET 4 | ||
685 | ALIGN | ||
686 | error_code: | ||
687 | /* the function address is in %fs's slot on the stack */ | ||
688 | pushl %es | ||
689 | CFI_ADJUST_CFA_OFFSET 4 | ||
690 | /*CFI_REL_OFFSET es, 0*/ | ||
691 | pushl %ds | ||
692 | CFI_ADJUST_CFA_OFFSET 4 | ||
693 | /*CFI_REL_OFFSET ds, 0*/ | ||
694 | pushl %eax | ||
695 | CFI_ADJUST_CFA_OFFSET 4 | ||
696 | CFI_REL_OFFSET eax, 0 | ||
697 | pushl %ebp | ||
698 | CFI_ADJUST_CFA_OFFSET 4 | ||
699 | CFI_REL_OFFSET ebp, 0 | ||
700 | pushl %edi | ||
701 | CFI_ADJUST_CFA_OFFSET 4 | ||
702 | CFI_REL_OFFSET edi, 0 | ||
703 | pushl %esi | ||
704 | CFI_ADJUST_CFA_OFFSET 4 | ||
705 | CFI_REL_OFFSET esi, 0 | ||
706 | pushl %edx | ||
707 | CFI_ADJUST_CFA_OFFSET 4 | ||
708 | CFI_REL_OFFSET edx, 0 | ||
709 | pushl %ecx | ||
710 | CFI_ADJUST_CFA_OFFSET 4 | ||
711 | CFI_REL_OFFSET ecx, 0 | ||
712 | pushl %ebx | ||
713 | CFI_ADJUST_CFA_OFFSET 4 | ||
714 | CFI_REL_OFFSET ebx, 0 | ||
715 | cld | ||
716 | pushl %fs | ||
717 | CFI_ADJUST_CFA_OFFSET 4 | ||
718 | /*CFI_REL_OFFSET fs, 0*/ | ||
719 | movl $(__KERNEL_PERCPU), %ecx | ||
720 | movl %ecx, %fs | ||
721 | UNWIND_ESPFIX_STACK | ||
722 | popl %ecx | ||
723 | CFI_ADJUST_CFA_OFFSET -4 | ||
724 | /*CFI_REGISTER es, ecx*/ | ||
725 | movl PT_FS(%esp), %edi # get the function address | ||
726 | movl PT_ORIG_EAX(%esp), %edx # get the error code | ||
727 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart | ||
728 | mov %ecx, PT_FS(%esp) | ||
729 | /*CFI_REL_OFFSET fs, ES*/ | ||
730 | movl $(__USER_DS), %ecx | ||
731 | movl %ecx, %ds | ||
732 | movl %ecx, %es | ||
733 | TRACE_IRQS_OFF | ||
734 | movl %esp,%eax # pt_regs pointer | ||
735 | call *%edi | ||
736 | jmp ret_from_exception | ||
737 | CFI_ENDPROC | ||
738 | KPROBE_END(page_fault) | ||
739 | 816 | ||
740 | ENTRY(coprocessor_error) | 817 | ENTRY(coprocessor_error) |
741 | RING0_INT_FRAME | 818 | RING0_INT_FRAME |
@@ -767,140 +844,6 @@ ENTRY(device_not_available) | |||
767 | CFI_ENDPROC | 844 | CFI_ENDPROC |
768 | END(device_not_available) | 845 | END(device_not_available) |
769 | 846 | ||
770 | /* | ||
771 | * Debug traps and NMI can happen at the one SYSENTER instruction | ||
772 | * that sets up the real kernel stack. Check here, since we can't | ||
773 | * allow the wrong stack to be used. | ||
774 | * | ||
775 | * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have | ||
776 | * already pushed 3 words if it hits on the sysenter instruction: | ||
777 | * eflags, cs and eip. | ||
778 | * | ||
779 | * We just load the right stack, and push the three (known) values | ||
780 | * by hand onto the new stack - while updating the return eip past | ||
781 | * the instruction that would have done it for sysenter. | ||
782 | */ | ||
783 | #define FIX_STACK(offset, ok, label) \ | ||
784 | cmpw $__KERNEL_CS,4(%esp); \ | ||
785 | jne ok; \ | ||
786 | label: \ | ||
787 | movl TSS_sysenter_sp0+offset(%esp),%esp; \ | ||
788 | CFI_DEF_CFA esp, 0; \ | ||
789 | CFI_UNDEFINED eip; \ | ||
790 | pushfl; \ | ||
791 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
792 | pushl $__KERNEL_CS; \ | ||
793 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
794 | pushl $sysenter_past_esp; \ | ||
795 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
796 | CFI_REL_OFFSET eip, 0 | ||
797 | |||
798 | KPROBE_ENTRY(debug) | ||
799 | RING0_INT_FRAME | ||
800 | cmpl $ia32_sysenter_target,(%esp) | ||
801 | jne debug_stack_correct | ||
802 | FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) | ||
803 | debug_stack_correct: | ||
804 | pushl $-1 # mark this as an int | ||
805 | CFI_ADJUST_CFA_OFFSET 4 | ||
806 | SAVE_ALL | ||
807 | TRACE_IRQS_OFF | ||
808 | xorl %edx,%edx # error code 0 | ||
809 | movl %esp,%eax # pt_regs pointer | ||
810 | call do_debug | ||
811 | jmp ret_from_exception | ||
812 | CFI_ENDPROC | ||
813 | KPROBE_END(debug) | ||
814 | |||
815 | /* | ||
816 | * NMI is doubly nasty. It can happen _while_ we're handling | ||
817 | * a debug fault, and the debug fault hasn't yet been able to | ||
818 | * clear up the stack. So we first check whether we got an | ||
819 | * NMI on the sysenter entry path, but after that we need to | ||
820 | * check whether we got an NMI on the debug path where the debug | ||
821 | * fault happened on the sysenter path. | ||
822 | */ | ||
823 | KPROBE_ENTRY(nmi) | ||
824 | RING0_INT_FRAME | ||
825 | pushl %eax | ||
826 | CFI_ADJUST_CFA_OFFSET 4 | ||
827 | movl %ss, %eax | ||
828 | cmpw $__ESPFIX_SS, %ax | ||
829 | popl %eax | ||
830 | CFI_ADJUST_CFA_OFFSET -4 | ||
831 | je nmi_espfix_stack | ||
832 | cmpl $ia32_sysenter_target,(%esp) | ||
833 | je nmi_stack_fixup | ||
834 | pushl %eax | ||
835 | CFI_ADJUST_CFA_OFFSET 4 | ||
836 | movl %esp,%eax | ||
837 | /* Do not access memory above the end of our stack page, | ||
838 | * it might not exist. | ||
839 | */ | ||
840 | andl $(THREAD_SIZE-1),%eax | ||
841 | cmpl $(THREAD_SIZE-20),%eax | ||
842 | popl %eax | ||
843 | CFI_ADJUST_CFA_OFFSET -4 | ||
844 | jae nmi_stack_correct | ||
845 | cmpl $ia32_sysenter_target,12(%esp) | ||
846 | je nmi_debug_stack_check | ||
847 | nmi_stack_correct: | ||
848 | /* We have a RING0_INT_FRAME here */ | ||
849 | pushl %eax | ||
850 | CFI_ADJUST_CFA_OFFSET 4 | ||
851 | SAVE_ALL | ||
852 | TRACE_IRQS_OFF | ||
853 | xorl %edx,%edx # zero error code | ||
854 | movl %esp,%eax # pt_regs pointer | ||
855 | call do_nmi | ||
856 | jmp restore_nocheck_notrace | ||
857 | CFI_ENDPROC | ||
858 | |||
859 | nmi_stack_fixup: | ||
860 | RING0_INT_FRAME | ||
861 | FIX_STACK(12,nmi_stack_correct, 1) | ||
862 | jmp nmi_stack_correct | ||
863 | |||
864 | nmi_debug_stack_check: | ||
865 | /* We have a RING0_INT_FRAME here */ | ||
866 | cmpw $__KERNEL_CS,16(%esp) | ||
867 | jne nmi_stack_correct | ||
868 | cmpl $debug,(%esp) | ||
869 | jb nmi_stack_correct | ||
870 | cmpl $debug_esp_fix_insn,(%esp) | ||
871 | ja nmi_stack_correct | ||
872 | FIX_STACK(24,nmi_stack_correct, 1) | ||
873 | jmp nmi_stack_correct | ||
874 | |||
875 | nmi_espfix_stack: | ||
876 | /* We have a RING0_INT_FRAME here. | ||
877 | * | ||
878 | * create the pointer to lss back | ||
879 | */ | ||
880 | pushl %ss | ||
881 | CFI_ADJUST_CFA_OFFSET 4 | ||
882 | pushl %esp | ||
883 | CFI_ADJUST_CFA_OFFSET 4 | ||
884 | addw $4, (%esp) | ||
885 | /* copy the iret frame of 12 bytes */ | ||
886 | .rept 3 | ||
887 | pushl 16(%esp) | ||
888 | CFI_ADJUST_CFA_OFFSET 4 | ||
889 | .endr | ||
890 | pushl %eax | ||
891 | CFI_ADJUST_CFA_OFFSET 4 | ||
892 | SAVE_ALL | ||
893 | TRACE_IRQS_OFF | ||
894 | FIXUP_ESPFIX_STACK # %eax == %esp | ||
895 | xorl %edx,%edx # zero error code | ||
896 | call do_nmi | ||
897 | RESTORE_REGS | ||
898 | lss 12+4(%esp), %esp # back to espfix stack | ||
899 | CFI_ADJUST_CFA_OFFSET -24 | ||
900 | jmp irq_return | ||
901 | CFI_ENDPROC | ||
902 | KPROBE_END(nmi) | ||
903 | |||
904 | #ifdef CONFIG_PARAVIRT | 847 | #ifdef CONFIG_PARAVIRT |
905 | ENTRY(native_iret) | 848 | ENTRY(native_iret) |
906 | iret | 849 | iret |
@@ -916,19 +859,6 @@ ENTRY(native_irq_enable_sysexit) | |||
916 | END(native_irq_enable_sysexit) | 859 | END(native_irq_enable_sysexit) |
917 | #endif | 860 | #endif |
918 | 861 | ||
919 | KPROBE_ENTRY(int3) | ||
920 | RING0_INT_FRAME | ||
921 | pushl $-1 # mark this as an int | ||
922 | CFI_ADJUST_CFA_OFFSET 4 | ||
923 | SAVE_ALL | ||
924 | TRACE_IRQS_OFF | ||
925 | xorl %edx,%edx # zero error code | ||
926 | movl %esp,%eax # pt_regs pointer | ||
927 | call do_int3 | ||
928 | jmp ret_from_exception | ||
929 | CFI_ENDPROC | ||
930 | KPROBE_END(int3) | ||
931 | |||
932 | ENTRY(overflow) | 862 | ENTRY(overflow) |
933 | RING0_INT_FRAME | 863 | RING0_INT_FRAME |
934 | pushl $0 | 864 | pushl $0 |
@@ -993,14 +923,6 @@ ENTRY(stack_segment) | |||
993 | CFI_ENDPROC | 923 | CFI_ENDPROC |
994 | END(stack_segment) | 924 | END(stack_segment) |
995 | 925 | ||
996 | KPROBE_ENTRY(general_protection) | ||
997 | RING0_EC_FRAME | ||
998 | pushl $do_general_protection | ||
999 | CFI_ADJUST_CFA_OFFSET 4 | ||
1000 | jmp error_code | ||
1001 | CFI_ENDPROC | ||
1002 | KPROBE_END(general_protection) | ||
1003 | |||
1004 | ENTRY(alignment_check) | 926 | ENTRY(alignment_check) |
1005 | RING0_EC_FRAME | 927 | RING0_EC_FRAME |
1006 | pushl $do_alignment_check | 928 | pushl $do_alignment_check |
@@ -1051,6 +973,7 @@ ENTRY(kernel_thread_helper) | |||
1051 | push %eax | 973 | push %eax |
1052 | CFI_ADJUST_CFA_OFFSET 4 | 974 | CFI_ADJUST_CFA_OFFSET 4 |
1053 | call do_exit | 975 | call do_exit |
976 | ud2 # padding for call trace | ||
1054 | CFI_ENDPROC | 977 | CFI_ENDPROC |
1055 | ENDPROC(kernel_thread_helper) | 978 | ENDPROC(kernel_thread_helper) |
1056 | 979 | ||
@@ -1157,6 +1080,9 @@ ENTRY(mcount) | |||
1157 | END(mcount) | 1080 | END(mcount) |
1158 | 1081 | ||
1159 | ENTRY(ftrace_caller) | 1082 | ENTRY(ftrace_caller) |
1083 | cmpl $0, function_trace_stop | ||
1084 | jne ftrace_stub | ||
1085 | |||
1160 | pushl %eax | 1086 | pushl %eax |
1161 | pushl %ecx | 1087 | pushl %ecx |
1162 | pushl %edx | 1088 | pushl %edx |
@@ -1171,6 +1097,11 @@ ftrace_call: | |||
1171 | popl %edx | 1097 | popl %edx |
1172 | popl %ecx | 1098 | popl %ecx |
1173 | popl %eax | 1099 | popl %eax |
1100 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
1101 | .globl ftrace_graph_call | ||
1102 | ftrace_graph_call: | ||
1103 | jmp ftrace_stub | ||
1104 | #endif | ||
1174 | 1105 | ||
1175 | .globl ftrace_stub | 1106 | .globl ftrace_stub |
1176 | ftrace_stub: | 1107 | ftrace_stub: |
@@ -1180,8 +1111,18 @@ END(ftrace_caller) | |||
1180 | #else /* ! CONFIG_DYNAMIC_FTRACE */ | 1111 | #else /* ! CONFIG_DYNAMIC_FTRACE */ |
1181 | 1112 | ||
1182 | ENTRY(mcount) | 1113 | ENTRY(mcount) |
1114 | cmpl $0, function_trace_stop | ||
1115 | jne ftrace_stub | ||
1116 | |||
1183 | cmpl $ftrace_stub, ftrace_trace_function | 1117 | cmpl $ftrace_stub, ftrace_trace_function |
1184 | jnz trace | 1118 | jnz trace |
1119 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
1120 | cmpl $ftrace_stub, ftrace_graph_return | ||
1121 | jnz ftrace_graph_caller | ||
1122 | |||
1123 | cmpl $ftrace_graph_entry_stub, ftrace_graph_entry | ||
1124 | jnz ftrace_graph_caller | ||
1125 | #endif | ||
1185 | .globl ftrace_stub | 1126 | .globl ftrace_stub |
1186 | ftrace_stub: | 1127 | ftrace_stub: |
1187 | ret | 1128 | ret |
@@ -1200,13 +1141,265 @@ trace: | |||
1200 | popl %edx | 1141 | popl %edx |
1201 | popl %ecx | 1142 | popl %ecx |
1202 | popl %eax | 1143 | popl %eax |
1203 | |||
1204 | jmp ftrace_stub | 1144 | jmp ftrace_stub |
1205 | END(mcount) | 1145 | END(mcount) |
1206 | #endif /* CONFIG_DYNAMIC_FTRACE */ | 1146 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
1207 | #endif /* CONFIG_FUNCTION_TRACER */ | 1147 | #endif /* CONFIG_FUNCTION_TRACER */ |
1208 | 1148 | ||
1149 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
1150 | ENTRY(ftrace_graph_caller) | ||
1151 | cmpl $0, function_trace_stop | ||
1152 | jne ftrace_stub | ||
1153 | |||
1154 | pushl %eax | ||
1155 | pushl %ecx | ||
1156 | pushl %edx | ||
1157 | movl 0xc(%esp), %edx | ||
1158 | lea 0x4(%ebp), %eax | ||
1159 | subl $MCOUNT_INSN_SIZE, %edx | ||
1160 | call prepare_ftrace_return | ||
1161 | popl %edx | ||
1162 | popl %ecx | ||
1163 | popl %eax | ||
1164 | ret | ||
1165 | END(ftrace_graph_caller) | ||
1166 | |||
1167 | .globl return_to_handler | ||
1168 | return_to_handler: | ||
1169 | pushl $0 | ||
1170 | pushl %eax | ||
1171 | pushl %ecx | ||
1172 | pushl %edx | ||
1173 | call ftrace_return_to_handler | ||
1174 | movl %eax, 0xc(%esp) | ||
1175 | popl %edx | ||
1176 | popl %ecx | ||
1177 | popl %eax | ||
1178 | ret | ||
1179 | #endif | ||
1180 | |||
1209 | .section .rodata,"a" | 1181 | .section .rodata,"a" |
1210 | #include "syscall_table_32.S" | 1182 | #include "syscall_table_32.S" |
1211 | 1183 | ||
1212 | syscall_table_size=(.-sys_call_table) | 1184 | syscall_table_size=(.-sys_call_table) |
1185 | |||
1186 | /* | ||
1187 | * Some functions should be protected against kprobes | ||
1188 | */ | ||
1189 | .pushsection .kprobes.text, "ax" | ||
1190 | |||
1191 | ENTRY(page_fault) | ||
1192 | RING0_EC_FRAME | ||
1193 | pushl $do_page_fault | ||
1194 | CFI_ADJUST_CFA_OFFSET 4 | ||
1195 | ALIGN | ||
1196 | error_code: | ||
1197 | /* the function address is in %gs's slot on the stack */ | ||
1198 | pushl %fs | ||
1199 | CFI_ADJUST_CFA_OFFSET 4 | ||
1200 | /*CFI_REL_OFFSET fs, 0*/ | ||
1201 | pushl %es | ||
1202 | CFI_ADJUST_CFA_OFFSET 4 | ||
1203 | /*CFI_REL_OFFSET es, 0*/ | ||
1204 | pushl %ds | ||
1205 | CFI_ADJUST_CFA_OFFSET 4 | ||
1206 | /*CFI_REL_OFFSET ds, 0*/ | ||
1207 | pushl %eax | ||
1208 | CFI_ADJUST_CFA_OFFSET 4 | ||
1209 | CFI_REL_OFFSET eax, 0 | ||
1210 | pushl %ebp | ||
1211 | CFI_ADJUST_CFA_OFFSET 4 | ||
1212 | CFI_REL_OFFSET ebp, 0 | ||
1213 | pushl %edi | ||
1214 | CFI_ADJUST_CFA_OFFSET 4 | ||
1215 | CFI_REL_OFFSET edi, 0 | ||
1216 | pushl %esi | ||
1217 | CFI_ADJUST_CFA_OFFSET 4 | ||
1218 | CFI_REL_OFFSET esi, 0 | ||
1219 | pushl %edx | ||
1220 | CFI_ADJUST_CFA_OFFSET 4 | ||
1221 | CFI_REL_OFFSET edx, 0 | ||
1222 | pushl %ecx | ||
1223 | CFI_ADJUST_CFA_OFFSET 4 | ||
1224 | CFI_REL_OFFSET ecx, 0 | ||
1225 | pushl %ebx | ||
1226 | CFI_ADJUST_CFA_OFFSET 4 | ||
1227 | CFI_REL_OFFSET ebx, 0 | ||
1228 | cld | ||
1229 | movl $(__KERNEL_PERCPU), %ecx | ||
1230 | movl %ecx, %fs | ||
1231 | UNWIND_ESPFIX_STACK | ||
1232 | GS_TO_REG %ecx | ||
1233 | movl PT_GS(%esp), %edi # get the function address | ||
1234 | movl PT_ORIG_EAX(%esp), %edx # get the error code | ||
1235 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart | ||
1236 | REG_TO_PTGS %ecx | ||
1237 | SET_KERNEL_GS %ecx | ||
1238 | movl $(__USER_DS), %ecx | ||
1239 | movl %ecx, %ds | ||
1240 | movl %ecx, %es | ||
1241 | TRACE_IRQS_OFF | ||
1242 | movl %esp,%eax # pt_regs pointer | ||
1243 | call *%edi | ||
1244 | jmp ret_from_exception | ||
1245 | CFI_ENDPROC | ||
1246 | END(page_fault) | ||
1247 | |||
1248 | /* | ||
1249 | * Debug traps and NMI can happen at the one SYSENTER instruction | ||
1250 | * that sets up the real kernel stack. Check here, since we can't | ||
1251 | * allow the wrong stack to be used. | ||
1252 | * | ||
1253 | * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have | ||
1254 | * already pushed 3 words if it hits on the sysenter instruction: | ||
1255 | * eflags, cs and eip. | ||
1256 | * | ||
1257 | * We just load the right stack, and push the three (known) values | ||
1258 | * by hand onto the new stack - while updating the return eip past | ||
1259 | * the instruction that would have done it for sysenter. | ||
1260 | */ | ||
1261 | .macro FIX_STACK offset ok label | ||
1262 | cmpw $__KERNEL_CS, 4(%esp) | ||
1263 | jne \ok | ||
1264 | \label: | ||
1265 | movl TSS_sysenter_sp0 + \offset(%esp), %esp | ||
1266 | CFI_DEF_CFA esp, 0 | ||
1267 | CFI_UNDEFINED eip | ||
1268 | pushfl | ||
1269 | CFI_ADJUST_CFA_OFFSET 4 | ||
1270 | pushl $__KERNEL_CS | ||
1271 | CFI_ADJUST_CFA_OFFSET 4 | ||
1272 | pushl $sysenter_past_esp | ||
1273 | CFI_ADJUST_CFA_OFFSET 4 | ||
1274 | CFI_REL_OFFSET eip, 0 | ||
1275 | .endm | ||
1276 | |||
1277 | ENTRY(debug) | ||
1278 | RING0_INT_FRAME | ||
1279 | cmpl $ia32_sysenter_target,(%esp) | ||
1280 | jne debug_stack_correct | ||
1281 | FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn | ||
1282 | debug_stack_correct: | ||
1283 | pushl $-1 # mark this as an int | ||
1284 | CFI_ADJUST_CFA_OFFSET 4 | ||
1285 | SAVE_ALL | ||
1286 | TRACE_IRQS_OFF | ||
1287 | xorl %edx,%edx # error code 0 | ||
1288 | movl %esp,%eax # pt_regs pointer | ||
1289 | call do_debug | ||
1290 | jmp ret_from_exception | ||
1291 | CFI_ENDPROC | ||
1292 | END(debug) | ||
1293 | |||
1294 | /* | ||
1295 | * NMI is doubly nasty. It can happen _while_ we're handling | ||
1296 | * a debug fault, and the debug fault hasn't yet been able to | ||
1297 | * clear up the stack. So we first check whether we got an | ||
1298 | * NMI on the sysenter entry path, but after that we need to | ||
1299 | * check whether we got an NMI on the debug path where the debug | ||
1300 | * fault happened on the sysenter path. | ||
1301 | */ | ||
1302 | ENTRY(nmi) | ||
1303 | RING0_INT_FRAME | ||
1304 | pushl %eax | ||
1305 | CFI_ADJUST_CFA_OFFSET 4 | ||
1306 | movl %ss, %eax | ||
1307 | cmpw $__ESPFIX_SS, %ax | ||
1308 | popl %eax | ||
1309 | CFI_ADJUST_CFA_OFFSET -4 | ||
1310 | je nmi_espfix_stack | ||
1311 | cmpl $ia32_sysenter_target,(%esp) | ||
1312 | je nmi_stack_fixup | ||
1313 | pushl %eax | ||
1314 | CFI_ADJUST_CFA_OFFSET 4 | ||
1315 | movl %esp,%eax | ||
1316 | /* Do not access memory above the end of our stack page, | ||
1317 | * it might not exist. | ||
1318 | */ | ||
1319 | andl $(THREAD_SIZE-1),%eax | ||
1320 | cmpl $(THREAD_SIZE-20),%eax | ||
1321 | popl %eax | ||
1322 | CFI_ADJUST_CFA_OFFSET -4 | ||
1323 | jae nmi_stack_correct | ||
1324 | cmpl $ia32_sysenter_target,12(%esp) | ||
1325 | je nmi_debug_stack_check | ||
1326 | nmi_stack_correct: | ||
1327 | /* We have a RING0_INT_FRAME here */ | ||
1328 | pushl %eax | ||
1329 | CFI_ADJUST_CFA_OFFSET 4 | ||
1330 | SAVE_ALL | ||
1331 | xorl %edx,%edx # zero error code | ||
1332 | movl %esp,%eax # pt_regs pointer | ||
1333 | call do_nmi | ||
1334 | jmp restore_nocheck_notrace | ||
1335 | CFI_ENDPROC | ||
1336 | |||
1337 | nmi_stack_fixup: | ||
1338 | RING0_INT_FRAME | ||
1339 | FIX_STACK 12, nmi_stack_correct, 1 | ||
1340 | jmp nmi_stack_correct | ||
1341 | |||
1342 | nmi_debug_stack_check: | ||
1343 | /* We have a RING0_INT_FRAME here */ | ||
1344 | cmpw $__KERNEL_CS,16(%esp) | ||
1345 | jne nmi_stack_correct | ||
1346 | cmpl $debug,(%esp) | ||
1347 | jb nmi_stack_correct | ||
1348 | cmpl $debug_esp_fix_insn,(%esp) | ||
1349 | ja nmi_stack_correct | ||
1350 | FIX_STACK 24, nmi_stack_correct, 1 | ||
1351 | jmp nmi_stack_correct | ||
1352 | |||
1353 | nmi_espfix_stack: | ||
1354 | /* We have a RING0_INT_FRAME here. | ||
1355 | * | ||
1356 | * create the pointer to lss back | ||
1357 | */ | ||
1358 | pushl %ss | ||
1359 | CFI_ADJUST_CFA_OFFSET 4 | ||
1360 | pushl %esp | ||
1361 | CFI_ADJUST_CFA_OFFSET 4 | ||
1362 | addw $4, (%esp) | ||
1363 | /* copy the iret frame of 12 bytes */ | ||
1364 | .rept 3 | ||
1365 | pushl 16(%esp) | ||
1366 | CFI_ADJUST_CFA_OFFSET 4 | ||
1367 | .endr | ||
1368 | pushl %eax | ||
1369 | CFI_ADJUST_CFA_OFFSET 4 | ||
1370 | SAVE_ALL | ||
1371 | FIXUP_ESPFIX_STACK # %eax == %esp | ||
1372 | xorl %edx,%edx # zero error code | ||
1373 | call do_nmi | ||
1374 | RESTORE_REGS | ||
1375 | lss 12+4(%esp), %esp # back to espfix stack | ||
1376 | CFI_ADJUST_CFA_OFFSET -24 | ||
1377 | jmp irq_return | ||
1378 | CFI_ENDPROC | ||
1379 | END(nmi) | ||
1380 | |||
1381 | ENTRY(int3) | ||
1382 | RING0_INT_FRAME | ||
1383 | pushl $-1 # mark this as an int | ||
1384 | CFI_ADJUST_CFA_OFFSET 4 | ||
1385 | SAVE_ALL | ||
1386 | TRACE_IRQS_OFF | ||
1387 | xorl %edx,%edx # zero error code | ||
1388 | movl %esp,%eax # pt_regs pointer | ||
1389 | call do_int3 | ||
1390 | jmp ret_from_exception | ||
1391 | CFI_ENDPROC | ||
1392 | END(int3) | ||
1393 | |||
1394 | ENTRY(general_protection) | ||
1395 | RING0_EC_FRAME | ||
1396 | pushl $do_general_protection | ||
1397 | CFI_ADJUST_CFA_OFFSET 4 | ||
1398 | jmp error_code | ||
1399 | CFI_ENDPROC | ||
1400 | END(general_protection) | ||
1401 | |||
1402 | /* | ||
1403 | * End of kprobes section | ||
1404 | */ | ||
1405 | .popsection | ||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b86f332c96a6..fbcf96b295ff 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -11,15 +11,15 @@ | |||
11 | * | 11 | * |
12 | * NOTE: This code handles signal-recognition, which happens every time | 12 | * NOTE: This code handles signal-recognition, which happens every time |
13 | * after an interrupt and after each system call. | 13 | * after an interrupt and after each system call. |
14 | * | 14 | * |
15 | * Normal syscalls and interrupts don't save a full stack frame, this is | 15 | * Normal syscalls and interrupts don't save a full stack frame, this is |
16 | * only done for syscall tracing, signals or fork/exec et.al. | 16 | * only done for syscall tracing, signals or fork/exec et.al. |
17 | * | 17 | * |
18 | * A note on terminology: | 18 | * A note on terminology: |
19 | * - top of stack: Architecture defined interrupt frame from SS to RIP | 19 | * - top of stack: Architecture defined interrupt frame from SS to RIP |
20 | * at the top of the kernel process stack. | 20 | * at the top of the kernel process stack. |
21 | * - partial stack frame: partially saved registers upto R11. | 21 | * - partial stack frame: partially saved registers upto R11. |
22 | * - full stack frame: Like partial stack frame, but all register saved. | 22 | * - full stack frame: Like partial stack frame, but all register saved. |
23 | * | 23 | * |
24 | * Some macro usage: | 24 | * Some macro usage: |
25 | * - CFI macros are used to generate dwarf2 unwind information for better | 25 | * - CFI macros are used to generate dwarf2 unwind information for better |
@@ -52,6 +52,7 @@ | |||
52 | #include <asm/irqflags.h> | 52 | #include <asm/irqflags.h> |
53 | #include <asm/paravirt.h> | 53 | #include <asm/paravirt.h> |
54 | #include <asm/ftrace.h> | 54 | #include <asm/ftrace.h> |
55 | #include <asm/percpu.h> | ||
55 | 56 | ||
56 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 57 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
57 | #include <linux/elf-em.h> | 58 | #include <linux/elf-em.h> |
@@ -60,7 +61,6 @@ | |||
60 | #define __AUDIT_ARCH_LE 0x40000000 | 61 | #define __AUDIT_ARCH_LE 0x40000000 |
61 | 62 | ||
62 | .code64 | 63 | .code64 |
63 | |||
64 | #ifdef CONFIG_FUNCTION_TRACER | 64 | #ifdef CONFIG_FUNCTION_TRACER |
65 | #ifdef CONFIG_DYNAMIC_FTRACE | 65 | #ifdef CONFIG_DYNAMIC_FTRACE |
66 | ENTRY(mcount) | 66 | ENTRY(mcount) |
@@ -68,16 +68,10 @@ ENTRY(mcount) | |||
68 | END(mcount) | 68 | END(mcount) |
69 | 69 | ||
70 | ENTRY(ftrace_caller) | 70 | ENTRY(ftrace_caller) |
71 | cmpl $0, function_trace_stop | ||
72 | jne ftrace_stub | ||
71 | 73 | ||
72 | /* taken from glibc */ | 74 | MCOUNT_SAVE_FRAME |
73 | subq $0x38, %rsp | ||
74 | movq %rax, (%rsp) | ||
75 | movq %rcx, 8(%rsp) | ||
76 | movq %rdx, 16(%rsp) | ||
77 | movq %rsi, 24(%rsp) | ||
78 | movq %rdi, 32(%rsp) | ||
79 | movq %r8, 40(%rsp) | ||
80 | movq %r9, 48(%rsp) | ||
81 | 75 | ||
82 | movq 0x38(%rsp), %rdi | 76 | movq 0x38(%rsp), %rdi |
83 | movq 8(%rbp), %rsi | 77 | movq 8(%rbp), %rsi |
@@ -87,14 +81,13 @@ ENTRY(ftrace_caller) | |||
87 | ftrace_call: | 81 | ftrace_call: |
88 | call ftrace_stub | 82 | call ftrace_stub |
89 | 83 | ||
90 | movq 48(%rsp), %r9 | 84 | MCOUNT_RESTORE_FRAME |
91 | movq 40(%rsp), %r8 | 85 | |
92 | movq 32(%rsp), %rdi | 86 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
93 | movq 24(%rsp), %rsi | 87 | .globl ftrace_graph_call |
94 | movq 16(%rsp), %rdx | 88 | ftrace_graph_call: |
95 | movq 8(%rsp), %rcx | 89 | jmp ftrace_stub |
96 | movq (%rsp), %rax | 90 | #endif |
97 | addq $0x38, %rsp | ||
98 | 91 | ||
99 | .globl ftrace_stub | 92 | .globl ftrace_stub |
100 | ftrace_stub: | 93 | ftrace_stub: |
@@ -103,15 +96,63 @@ END(ftrace_caller) | |||
103 | 96 | ||
104 | #else /* ! CONFIG_DYNAMIC_FTRACE */ | 97 | #else /* ! CONFIG_DYNAMIC_FTRACE */ |
105 | ENTRY(mcount) | 98 | ENTRY(mcount) |
99 | cmpl $0, function_trace_stop | ||
100 | jne ftrace_stub | ||
101 | |||
106 | cmpq $ftrace_stub, ftrace_trace_function | 102 | cmpq $ftrace_stub, ftrace_trace_function |
107 | jnz trace | 103 | jnz trace |
104 | |||
105 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
106 | cmpq $ftrace_stub, ftrace_graph_return | ||
107 | jnz ftrace_graph_caller | ||
108 | |||
109 | cmpq $ftrace_graph_entry_stub, ftrace_graph_entry | ||
110 | jnz ftrace_graph_caller | ||
111 | #endif | ||
112 | |||
108 | .globl ftrace_stub | 113 | .globl ftrace_stub |
109 | ftrace_stub: | 114 | ftrace_stub: |
110 | retq | 115 | retq |
111 | 116 | ||
112 | trace: | 117 | trace: |
113 | /* taken from glibc */ | 118 | MCOUNT_SAVE_FRAME |
114 | subq $0x38, %rsp | 119 | |
120 | movq 0x38(%rsp), %rdi | ||
121 | movq 8(%rbp), %rsi | ||
122 | subq $MCOUNT_INSN_SIZE, %rdi | ||
123 | |||
124 | call *ftrace_trace_function | ||
125 | |||
126 | MCOUNT_RESTORE_FRAME | ||
127 | |||
128 | jmp ftrace_stub | ||
129 | END(mcount) | ||
130 | #endif /* CONFIG_DYNAMIC_FTRACE */ | ||
131 | #endif /* CONFIG_FUNCTION_TRACER */ | ||
132 | |||
133 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
134 | ENTRY(ftrace_graph_caller) | ||
135 | cmpl $0, function_trace_stop | ||
136 | jne ftrace_stub | ||
137 | |||
138 | MCOUNT_SAVE_FRAME | ||
139 | |||
140 | leaq 8(%rbp), %rdi | ||
141 | movq 0x38(%rsp), %rsi | ||
142 | subq $MCOUNT_INSN_SIZE, %rsi | ||
143 | |||
144 | call prepare_ftrace_return | ||
145 | |||
146 | MCOUNT_RESTORE_FRAME | ||
147 | |||
148 | retq | ||
149 | END(ftrace_graph_caller) | ||
150 | |||
151 | |||
152 | .globl return_to_handler | ||
153 | return_to_handler: | ||
154 | subq $80, %rsp | ||
155 | |||
115 | movq %rax, (%rsp) | 156 | movq %rax, (%rsp) |
116 | movq %rcx, 8(%rsp) | 157 | movq %rcx, 8(%rsp) |
117 | movq %rdx, 16(%rsp) | 158 | movq %rdx, 16(%rsp) |
@@ -119,13 +160,14 @@ trace: | |||
119 | movq %rdi, 32(%rsp) | 160 | movq %rdi, 32(%rsp) |
120 | movq %r8, 40(%rsp) | 161 | movq %r8, 40(%rsp) |
121 | movq %r9, 48(%rsp) | 162 | movq %r9, 48(%rsp) |
163 | movq %r10, 56(%rsp) | ||
164 | movq %r11, 64(%rsp) | ||
122 | 165 | ||
123 | movq 0x38(%rsp), %rdi | 166 | call ftrace_return_to_handler |
124 | movq 8(%rbp), %rsi | ||
125 | subq $MCOUNT_INSN_SIZE, %rdi | ||
126 | |||
127 | call *ftrace_trace_function | ||
128 | 167 | ||
168 | movq %rax, 72(%rsp) | ||
169 | movq 64(%rsp), %r11 | ||
170 | movq 56(%rsp), %r10 | ||
129 | movq 48(%rsp), %r9 | 171 | movq 48(%rsp), %r9 |
130 | movq 40(%rsp), %r8 | 172 | movq 40(%rsp), %r8 |
131 | movq 32(%rsp), %rdi | 173 | movq 32(%rsp), %rdi |
@@ -133,16 +175,14 @@ trace: | |||
133 | movq 16(%rsp), %rdx | 175 | movq 16(%rsp), %rdx |
134 | movq 8(%rsp), %rcx | 176 | movq 8(%rsp), %rcx |
135 | movq (%rsp), %rax | 177 | movq (%rsp), %rax |
136 | addq $0x38, %rsp | 178 | addq $72, %rsp |
179 | retq | ||
180 | #endif | ||
137 | 181 | ||
138 | jmp ftrace_stub | ||
139 | END(mcount) | ||
140 | #endif /* CONFIG_DYNAMIC_FTRACE */ | ||
141 | #endif /* CONFIG_FUNCTION_TRACER */ | ||
142 | 182 | ||
143 | #ifndef CONFIG_PREEMPT | 183 | #ifndef CONFIG_PREEMPT |
144 | #define retint_kernel retint_restore_args | 184 | #define retint_kernel retint_restore_args |
145 | #endif | 185 | #endif |
146 | 186 | ||
147 | #ifdef CONFIG_PARAVIRT | 187 | #ifdef CONFIG_PARAVIRT |
148 | ENTRY(native_usergs_sysret64) | 188 | ENTRY(native_usergs_sysret64) |
@@ -161,29 +201,29 @@ ENTRY(native_usergs_sysret64) | |||
161 | .endm | 201 | .endm |
162 | 202 | ||
163 | /* | 203 | /* |
164 | * C code is not supposed to know about undefined top of stack. Every time | 204 | * C code is not supposed to know about undefined top of stack. Every time |
165 | * a C function with an pt_regs argument is called from the SYSCALL based | 205 | * a C function with an pt_regs argument is called from the SYSCALL based |
166 | * fast path FIXUP_TOP_OF_STACK is needed. | 206 | * fast path FIXUP_TOP_OF_STACK is needed. |
167 | * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs | 207 | * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs |
168 | * manipulation. | 208 | * manipulation. |
169 | */ | 209 | */ |
170 | 210 | ||
171 | /* %rsp:at FRAMEEND */ | 211 | /* %rsp:at FRAMEEND */ |
172 | .macro FIXUP_TOP_OF_STACK tmp | 212 | .macro FIXUP_TOP_OF_STACK tmp offset=0 |
173 | movq %gs:pda_oldrsp,\tmp | 213 | movq PER_CPU_VAR(old_rsp),\tmp |
174 | movq \tmp,RSP(%rsp) | 214 | movq \tmp,RSP+\offset(%rsp) |
175 | movq $__USER_DS,SS(%rsp) | 215 | movq $__USER_DS,SS+\offset(%rsp) |
176 | movq $__USER_CS,CS(%rsp) | 216 | movq $__USER_CS,CS+\offset(%rsp) |
177 | movq $-1,RCX(%rsp) | 217 | movq $-1,RCX+\offset(%rsp) |
178 | movq R11(%rsp),\tmp /* get eflags */ | 218 | movq R11+\offset(%rsp),\tmp /* get eflags */ |
179 | movq \tmp,EFLAGS(%rsp) | 219 | movq \tmp,EFLAGS+\offset(%rsp) |
180 | .endm | 220 | .endm |
181 | 221 | ||
182 | .macro RESTORE_TOP_OF_STACK tmp,offset=0 | 222 | .macro RESTORE_TOP_OF_STACK tmp offset=0 |
183 | movq RSP-\offset(%rsp),\tmp | 223 | movq RSP+\offset(%rsp),\tmp |
184 | movq \tmp,%gs:pda_oldrsp | 224 | movq \tmp,PER_CPU_VAR(old_rsp) |
185 | movq EFLAGS-\offset(%rsp),\tmp | 225 | movq EFLAGS+\offset(%rsp),\tmp |
186 | movq \tmp,R11-\offset(%rsp) | 226 | movq \tmp,R11+\offset(%rsp) |
187 | .endm | 227 | .endm |
188 | 228 | ||
189 | .macro FAKE_STACK_FRAME child_rip | 229 | .macro FAKE_STACK_FRAME child_rip |
@@ -195,7 +235,7 @@ ENTRY(native_usergs_sysret64) | |||
195 | pushq %rax /* rsp */ | 235 | pushq %rax /* rsp */ |
196 | CFI_ADJUST_CFA_OFFSET 8 | 236 | CFI_ADJUST_CFA_OFFSET 8 |
197 | CFI_REL_OFFSET rsp,0 | 237 | CFI_REL_OFFSET rsp,0 |
198 | pushq $(1<<9) /* eflags - interrupts on */ | 238 | pushq $X86_EFLAGS_IF /* eflags - interrupts on */ |
199 | CFI_ADJUST_CFA_OFFSET 8 | 239 | CFI_ADJUST_CFA_OFFSET 8 |
200 | /*CFI_REL_OFFSET rflags,0*/ | 240 | /*CFI_REL_OFFSET rflags,0*/ |
201 | pushq $__KERNEL_CS /* cs */ | 241 | pushq $__KERNEL_CS /* cs */ |
@@ -213,62 +253,187 @@ ENTRY(native_usergs_sysret64) | |||
213 | CFI_ADJUST_CFA_OFFSET -(6*8) | 253 | CFI_ADJUST_CFA_OFFSET -(6*8) |
214 | .endm | 254 | .endm |
215 | 255 | ||
216 | .macro CFI_DEFAULT_STACK start=1 | 256 | /* |
257 | * initial frame state for interrupts (and exceptions without error code) | ||
258 | */ | ||
259 | .macro EMPTY_FRAME start=1 offset=0 | ||
217 | .if \start | 260 | .if \start |
218 | CFI_STARTPROC simple | 261 | CFI_STARTPROC simple |
219 | CFI_SIGNAL_FRAME | 262 | CFI_SIGNAL_FRAME |
220 | CFI_DEF_CFA rsp,SS+8 | 263 | CFI_DEF_CFA rsp,8+\offset |
221 | .else | 264 | .else |
222 | CFI_DEF_CFA_OFFSET SS+8 | 265 | CFI_DEF_CFA_OFFSET 8+\offset |
223 | .endif | 266 | .endif |
224 | CFI_REL_OFFSET r15,R15 | ||
225 | CFI_REL_OFFSET r14,R14 | ||
226 | CFI_REL_OFFSET r13,R13 | ||
227 | CFI_REL_OFFSET r12,R12 | ||
228 | CFI_REL_OFFSET rbp,RBP | ||
229 | CFI_REL_OFFSET rbx,RBX | ||
230 | CFI_REL_OFFSET r11,R11 | ||
231 | CFI_REL_OFFSET r10,R10 | ||
232 | CFI_REL_OFFSET r9,R9 | ||
233 | CFI_REL_OFFSET r8,R8 | ||
234 | CFI_REL_OFFSET rax,RAX | ||
235 | CFI_REL_OFFSET rcx,RCX | ||
236 | CFI_REL_OFFSET rdx,RDX | ||
237 | CFI_REL_OFFSET rsi,RSI | ||
238 | CFI_REL_OFFSET rdi,RDI | ||
239 | CFI_REL_OFFSET rip,RIP | ||
240 | /*CFI_REL_OFFSET cs,CS*/ | ||
241 | /*CFI_REL_OFFSET rflags,EFLAGS*/ | ||
242 | CFI_REL_OFFSET rsp,RSP | ||
243 | /*CFI_REL_OFFSET ss,SS*/ | ||
244 | .endm | 267 | .endm |
268 | |||
269 | /* | ||
270 | * initial frame state for interrupts (and exceptions without error code) | ||
271 | */ | ||
272 | .macro INTR_FRAME start=1 offset=0 | ||
273 | EMPTY_FRAME \start, SS+8+\offset-RIP | ||
274 | /*CFI_REL_OFFSET ss, SS+\offset-RIP*/ | ||
275 | CFI_REL_OFFSET rsp, RSP+\offset-RIP | ||
276 | /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/ | ||
277 | /*CFI_REL_OFFSET cs, CS+\offset-RIP*/ | ||
278 | CFI_REL_OFFSET rip, RIP+\offset-RIP | ||
279 | .endm | ||
280 | |||
281 | /* | ||
282 | * initial frame state for exceptions with error code (and interrupts | ||
283 | * with vector already pushed) | ||
284 | */ | ||
285 | .macro XCPT_FRAME start=1 offset=0 | ||
286 | INTR_FRAME \start, RIP+\offset-ORIG_RAX | ||
287 | /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/ | ||
288 | .endm | ||
289 | |||
290 | /* | ||
291 | * frame that enables calling into C. | ||
292 | */ | ||
293 | .macro PARTIAL_FRAME start=1 offset=0 | ||
294 | XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET | ||
295 | CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET | ||
296 | CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET | ||
297 | CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET | ||
298 | CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET | ||
299 | CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET | ||
300 | CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET | ||
301 | CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET | ||
302 | CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET | ||
303 | CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET | ||
304 | .endm | ||
305 | |||
245 | /* | 306 | /* |
246 | * A newly forked process directly context switches into this. | 307 | * frame that enables passing a complete pt_regs to a C function. |
247 | */ | 308 | */ |
248 | /* rdi: prev */ | 309 | .macro DEFAULT_FRAME start=1 offset=0 |
310 | PARTIAL_FRAME \start, R11+\offset-R15 | ||
311 | CFI_REL_OFFSET rbx, RBX+\offset | ||
312 | CFI_REL_OFFSET rbp, RBP+\offset | ||
313 | CFI_REL_OFFSET r12, R12+\offset | ||
314 | CFI_REL_OFFSET r13, R13+\offset | ||
315 | CFI_REL_OFFSET r14, R14+\offset | ||
316 | CFI_REL_OFFSET r15, R15+\offset | ||
317 | .endm | ||
318 | |||
319 | /* save partial stack frame */ | ||
320 | ENTRY(save_args) | ||
321 | XCPT_FRAME | ||
322 | cld | ||
323 | movq_cfi rdi, RDI+16-ARGOFFSET | ||
324 | movq_cfi rsi, RSI+16-ARGOFFSET | ||
325 | movq_cfi rdx, RDX+16-ARGOFFSET | ||
326 | movq_cfi rcx, RCX+16-ARGOFFSET | ||
327 | movq_cfi rax, RAX+16-ARGOFFSET | ||
328 | movq_cfi r8, R8+16-ARGOFFSET | ||
329 | movq_cfi r9, R9+16-ARGOFFSET | ||
330 | movq_cfi r10, R10+16-ARGOFFSET | ||
331 | movq_cfi r11, R11+16-ARGOFFSET | ||
332 | |||
333 | leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ | ||
334 | movq_cfi rbp, 8 /* push %rbp */ | ||
335 | leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ | ||
336 | testl $3, CS(%rdi) | ||
337 | je 1f | ||
338 | SWAPGS | ||
339 | /* | ||
340 | * irq_count is used to check if a CPU is already on an interrupt stack | ||
341 | * or not. While this is essentially redundant with preempt_count it is | ||
342 | * a little cheaper to use a separate counter in the PDA (short of | ||
343 | * moving irq_enter into assembly, which would be too much work) | ||
344 | */ | ||
345 | 1: incl PER_CPU_VAR(irq_count) | ||
346 | jne 2f | ||
347 | popq_cfi %rax /* move return address... */ | ||
348 | mov PER_CPU_VAR(irq_stack_ptr),%rsp | ||
349 | EMPTY_FRAME 0 | ||
350 | pushq_cfi %rbp /* backlink for unwinder */ | ||
351 | pushq_cfi %rax /* ... to the new stack */ | ||
352 | /* | ||
353 | * We entered an interrupt context - irqs are off: | ||
354 | */ | ||
355 | 2: TRACE_IRQS_OFF | ||
356 | ret | ||
357 | CFI_ENDPROC | ||
358 | END(save_args) | ||
359 | |||
360 | ENTRY(save_rest) | ||
361 | PARTIAL_FRAME 1 REST_SKIP+8 | ||
362 | movq 5*8+16(%rsp), %r11 /* save return address */ | ||
363 | movq_cfi rbx, RBX+16 | ||
364 | movq_cfi rbp, RBP+16 | ||
365 | movq_cfi r12, R12+16 | ||
366 | movq_cfi r13, R13+16 | ||
367 | movq_cfi r14, R14+16 | ||
368 | movq_cfi r15, R15+16 | ||
369 | movq %r11, 8(%rsp) /* return address */ | ||
370 | FIXUP_TOP_OF_STACK %r11, 16 | ||
371 | ret | ||
372 | CFI_ENDPROC | ||
373 | END(save_rest) | ||
374 | |||
375 | /* save complete stack frame */ | ||
376 | ENTRY(save_paranoid) | ||
377 | XCPT_FRAME 1 RDI+8 | ||
378 | cld | ||
379 | movq_cfi rdi, RDI+8 | ||
380 | movq_cfi rsi, RSI+8 | ||
381 | movq_cfi rdx, RDX+8 | ||
382 | movq_cfi rcx, RCX+8 | ||
383 | movq_cfi rax, RAX+8 | ||
384 | movq_cfi r8, R8+8 | ||
385 | movq_cfi r9, R9+8 | ||
386 | movq_cfi r10, R10+8 | ||
387 | movq_cfi r11, R11+8 | ||
388 | movq_cfi rbx, RBX+8 | ||
389 | movq_cfi rbp, RBP+8 | ||
390 | movq_cfi r12, R12+8 | ||
391 | movq_cfi r13, R13+8 | ||
392 | movq_cfi r14, R14+8 | ||
393 | movq_cfi r15, R15+8 | ||
394 | movl $1,%ebx | ||
395 | movl $MSR_GS_BASE,%ecx | ||
396 | rdmsr | ||
397 | testl %edx,%edx | ||
398 | js 1f /* negative -> in kernel */ | ||
399 | SWAPGS | ||
400 | xorl %ebx,%ebx | ||
401 | 1: ret | ||
402 | CFI_ENDPROC | ||
403 | END(save_paranoid) | ||
404 | |||
405 | /* | ||
406 | * A newly forked process directly context switches into this address. | ||
407 | * | ||
408 | * rdi: prev task we switched from | ||
409 | */ | ||
249 | ENTRY(ret_from_fork) | 410 | ENTRY(ret_from_fork) |
250 | CFI_DEFAULT_STACK | 411 | DEFAULT_FRAME |
412 | |||
413 | LOCK ; btr $TIF_FORK,TI_flags(%r8) | ||
414 | |||
251 | push kernel_eflags(%rip) | 415 | push kernel_eflags(%rip) |
252 | CFI_ADJUST_CFA_OFFSET 8 | 416 | CFI_ADJUST_CFA_OFFSET 8 |
253 | popf # reset kernel eflags | 417 | popf # reset kernel eflags |
254 | CFI_ADJUST_CFA_OFFSET -8 | 418 | CFI_ADJUST_CFA_OFFSET -8 |
255 | call schedule_tail | 419 | |
420 | call schedule_tail # rdi: 'prev' task parameter | ||
421 | |||
256 | GET_THREAD_INFO(%rcx) | 422 | GET_THREAD_INFO(%rcx) |
257 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) | 423 | |
258 | jnz rff_trace | 424 | CFI_REMEMBER_STATE |
259 | rff_action: | ||
260 | RESTORE_REST | 425 | RESTORE_REST |
261 | testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? | 426 | |
427 | testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? | ||
262 | je int_ret_from_sys_call | 428 | je int_ret_from_sys_call |
263 | testl $_TIF_IA32,TI_flags(%rcx) | 429 | |
430 | testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET | ||
264 | jnz int_ret_from_sys_call | 431 | jnz int_ret_from_sys_call |
265 | RESTORE_TOP_OF_STACK %rdi,ARGOFFSET | 432 | |
266 | jmp ret_from_sys_call | 433 | RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET |
267 | rff_trace: | 434 | jmp ret_from_sys_call # go to the SYSRET fastpath |
268 | movq %rsp,%rdi | 435 | |
269 | call syscall_trace_leave | 436 | CFI_RESTORE_STATE |
270 | GET_THREAD_INFO(%rcx) | ||
271 | jmp rff_action | ||
272 | CFI_ENDPROC | 437 | CFI_ENDPROC |
273 | END(ret_from_fork) | 438 | END(ret_from_fork) |
274 | 439 | ||
@@ -278,20 +443,20 @@ END(ret_from_fork) | |||
278 | * SYSCALL does not save anything on the stack and does not change the | 443 | * SYSCALL does not save anything on the stack and does not change the |
279 | * stack pointer. | 444 | * stack pointer. |
280 | */ | 445 | */ |
281 | 446 | ||
282 | /* | 447 | /* |
283 | * Register setup: | 448 | * Register setup: |
284 | * rax system call number | 449 | * rax system call number |
285 | * rdi arg0 | 450 | * rdi arg0 |
286 | * rcx return address for syscall/sysret, C arg3 | 451 | * rcx return address for syscall/sysret, C arg3 |
287 | * rsi arg1 | 452 | * rsi arg1 |
288 | * rdx arg2 | 453 | * rdx arg2 |
289 | * r10 arg3 (--> moved to rcx for C) | 454 | * r10 arg3 (--> moved to rcx for C) |
290 | * r8 arg4 | 455 | * r8 arg4 |
291 | * r9 arg5 | 456 | * r9 arg5 |
292 | * r11 eflags for syscall/sysret, temporary for C | 457 | * r11 eflags for syscall/sysret, temporary for C |
293 | * r12-r15,rbp,rbx saved by C code, not touched. | 458 | * r12-r15,rbp,rbx saved by C code, not touched. |
294 | * | 459 | * |
295 | * Interrupts are off on entry. | 460 | * Interrupts are off on entry. |
296 | * Only called from user space. | 461 | * Only called from user space. |
297 | * | 462 | * |
@@ -301,12 +466,12 @@ END(ret_from_fork) | |||
301 | * When user can change the frames always force IRET. That is because | 466 | * When user can change the frames always force IRET. That is because |
302 | * it deals with uncanonical addresses better. SYSRET has trouble | 467 | * it deals with uncanonical addresses better. SYSRET has trouble |
303 | * with them due to bugs in both AMD and Intel CPUs. | 468 | * with them due to bugs in both AMD and Intel CPUs. |
304 | */ | 469 | */ |
305 | 470 | ||
306 | ENTRY(system_call) | 471 | ENTRY(system_call) |
307 | CFI_STARTPROC simple | 472 | CFI_STARTPROC simple |
308 | CFI_SIGNAL_FRAME | 473 | CFI_SIGNAL_FRAME |
309 | CFI_DEF_CFA rsp,PDA_STACKOFFSET | 474 | CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET |
310 | CFI_REGISTER rip,rcx | 475 | CFI_REGISTER rip,rcx |
311 | /*CFI_REGISTER rflags,r11*/ | 476 | /*CFI_REGISTER rflags,r11*/ |
312 | SWAPGS_UNSAFE_STACK | 477 | SWAPGS_UNSAFE_STACK |
@@ -317,15 +482,15 @@ ENTRY(system_call) | |||
317 | */ | 482 | */ |
318 | ENTRY(system_call_after_swapgs) | 483 | ENTRY(system_call_after_swapgs) |
319 | 484 | ||
320 | movq %rsp,%gs:pda_oldrsp | 485 | movq %rsp,PER_CPU_VAR(old_rsp) |
321 | movq %gs:pda_kernelstack,%rsp | 486 | movq PER_CPU_VAR(kernel_stack),%rsp |
322 | /* | 487 | /* |
323 | * No need to follow this irqs off/on section - it's straight | 488 | * No need to follow this irqs off/on section - it's straight |
324 | * and short: | 489 | * and short: |
325 | */ | 490 | */ |
326 | ENABLE_INTERRUPTS(CLBR_NONE) | 491 | ENABLE_INTERRUPTS(CLBR_NONE) |
327 | SAVE_ARGS 8,1 | 492 | SAVE_ARGS 8,1 |
328 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 493 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
329 | movq %rcx,RIP-ARGOFFSET(%rsp) | 494 | movq %rcx,RIP-ARGOFFSET(%rsp) |
330 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 495 | CFI_REL_OFFSET rip,RIP-ARGOFFSET |
331 | GET_THREAD_INFO(%rcx) | 496 | GET_THREAD_INFO(%rcx) |
@@ -339,19 +504,19 @@ system_call_fastpath: | |||
339 | movq %rax,RAX-ARGOFFSET(%rsp) | 504 | movq %rax,RAX-ARGOFFSET(%rsp) |
340 | /* | 505 | /* |
341 | * Syscall return path ending with SYSRET (fast path) | 506 | * Syscall return path ending with SYSRET (fast path) |
342 | * Has incomplete stack frame and undefined top of stack. | 507 | * Has incomplete stack frame and undefined top of stack. |
343 | */ | 508 | */ |
344 | ret_from_sys_call: | 509 | ret_from_sys_call: |
345 | movl $_TIF_ALLWORK_MASK,%edi | 510 | movl $_TIF_ALLWORK_MASK,%edi |
346 | /* edi: flagmask */ | 511 | /* edi: flagmask */ |
347 | sysret_check: | 512 | sysret_check: |
348 | LOCKDEP_SYS_EXIT | 513 | LOCKDEP_SYS_EXIT |
349 | GET_THREAD_INFO(%rcx) | 514 | GET_THREAD_INFO(%rcx) |
350 | DISABLE_INTERRUPTS(CLBR_NONE) | 515 | DISABLE_INTERRUPTS(CLBR_NONE) |
351 | TRACE_IRQS_OFF | 516 | TRACE_IRQS_OFF |
352 | movl TI_flags(%rcx),%edx | 517 | movl TI_flags(%rcx),%edx |
353 | andl %edi,%edx | 518 | andl %edi,%edx |
354 | jnz sysret_careful | 519 | jnz sysret_careful |
355 | CFI_REMEMBER_STATE | 520 | CFI_REMEMBER_STATE |
356 | /* | 521 | /* |
357 | * sysretq will re-enable interrupts: | 522 | * sysretq will re-enable interrupts: |
@@ -361,12 +526,12 @@ sysret_check: | |||
361 | CFI_REGISTER rip,rcx | 526 | CFI_REGISTER rip,rcx |
362 | RESTORE_ARGS 0,-ARG_SKIP,1 | 527 | RESTORE_ARGS 0,-ARG_SKIP,1 |
363 | /*CFI_REGISTER rflags,r11*/ | 528 | /*CFI_REGISTER rflags,r11*/ |
364 | movq %gs:pda_oldrsp, %rsp | 529 | movq PER_CPU_VAR(old_rsp), %rsp |
365 | USERGS_SYSRET64 | 530 | USERGS_SYSRET64 |
366 | 531 | ||
367 | CFI_RESTORE_STATE | 532 | CFI_RESTORE_STATE |
368 | /* Handle reschedules */ | 533 | /* Handle reschedules */ |
369 | /* edx: work, edi: workmask */ | 534 | /* edx: work, edi: workmask */ |
370 | sysret_careful: | 535 | sysret_careful: |
371 | bt $TIF_NEED_RESCHED,%edx | 536 | bt $TIF_NEED_RESCHED,%edx |
372 | jnc sysret_signal | 537 | jnc sysret_signal |
@@ -379,7 +544,7 @@ sysret_careful: | |||
379 | CFI_ADJUST_CFA_OFFSET -8 | 544 | CFI_ADJUST_CFA_OFFSET -8 |
380 | jmp sysret_check | 545 | jmp sysret_check |
381 | 546 | ||
382 | /* Handle a signal */ | 547 | /* Handle a signal */ |
383 | sysret_signal: | 548 | sysret_signal: |
384 | TRACE_IRQS_ON | 549 | TRACE_IRQS_ON |
385 | ENABLE_INTERRUPTS(CLBR_NONE) | 550 | ENABLE_INTERRUPTS(CLBR_NONE) |
@@ -388,17 +553,20 @@ sysret_signal: | |||
388 | jc sysret_audit | 553 | jc sysret_audit |
389 | #endif | 554 | #endif |
390 | /* edx: work flags (arg3) */ | 555 | /* edx: work flags (arg3) */ |
391 | leaq do_notify_resume(%rip),%rax | ||
392 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 | 556 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 |
393 | xorl %esi,%esi # oldset -> arg2 | 557 | xorl %esi,%esi # oldset -> arg2 |
394 | call ptregscall_common | 558 | SAVE_REST |
559 | FIXUP_TOP_OF_STACK %r11 | ||
560 | call do_notify_resume | ||
561 | RESTORE_TOP_OF_STACK %r11 | ||
562 | RESTORE_REST | ||
395 | movl $_TIF_WORK_MASK,%edi | 563 | movl $_TIF_WORK_MASK,%edi |
396 | /* Use IRET because user could have changed frame. This | 564 | /* Use IRET because user could have changed frame. This |
397 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ | 565 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ |
398 | DISABLE_INTERRUPTS(CLBR_NONE) | 566 | DISABLE_INTERRUPTS(CLBR_NONE) |
399 | TRACE_IRQS_OFF | 567 | TRACE_IRQS_OFF |
400 | jmp int_with_check | 568 | jmp int_with_check |
401 | 569 | ||
402 | badsys: | 570 | badsys: |
403 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) | 571 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) |
404 | jmp ret_from_sys_call | 572 | jmp ret_from_sys_call |
@@ -437,7 +605,7 @@ sysret_audit: | |||
437 | #endif /* CONFIG_AUDITSYSCALL */ | 605 | #endif /* CONFIG_AUDITSYSCALL */ |
438 | 606 | ||
439 | /* Do syscall tracing */ | 607 | /* Do syscall tracing */ |
440 | tracesys: | 608 | tracesys: |
441 | #ifdef CONFIG_AUDITSYSCALL | 609 | #ifdef CONFIG_AUDITSYSCALL |
442 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) | 610 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) |
443 | jz auditsys | 611 | jz auditsys |
@@ -460,8 +628,8 @@ tracesys: | |||
460 | call *sys_call_table(,%rax,8) | 628 | call *sys_call_table(,%rax,8) |
461 | movq %rax,RAX-ARGOFFSET(%rsp) | 629 | movq %rax,RAX-ARGOFFSET(%rsp) |
462 | /* Use IRET because user could have changed frame */ | 630 | /* Use IRET because user could have changed frame */ |
463 | 631 | ||
464 | /* | 632 | /* |
465 | * Syscall return path ending with IRET. | 633 | * Syscall return path ending with IRET. |
466 | * Has correct top of stack, but partial stack frame. | 634 | * Has correct top of stack, but partial stack frame. |
467 | */ | 635 | */ |
@@ -505,18 +673,18 @@ int_very_careful: | |||
505 | TRACE_IRQS_ON | 673 | TRACE_IRQS_ON |
506 | ENABLE_INTERRUPTS(CLBR_NONE) | 674 | ENABLE_INTERRUPTS(CLBR_NONE) |
507 | SAVE_REST | 675 | SAVE_REST |
508 | /* Check for syscall exit trace */ | 676 | /* Check for syscall exit trace */ |
509 | testl $_TIF_WORK_SYSCALL_EXIT,%edx | 677 | testl $_TIF_WORK_SYSCALL_EXIT,%edx |
510 | jz int_signal | 678 | jz int_signal |
511 | pushq %rdi | 679 | pushq %rdi |
512 | CFI_ADJUST_CFA_OFFSET 8 | 680 | CFI_ADJUST_CFA_OFFSET 8 |
513 | leaq 8(%rsp),%rdi # &ptregs -> arg1 | 681 | leaq 8(%rsp),%rdi # &ptregs -> arg1 |
514 | call syscall_trace_leave | 682 | call syscall_trace_leave |
515 | popq %rdi | 683 | popq %rdi |
516 | CFI_ADJUST_CFA_OFFSET -8 | 684 | CFI_ADJUST_CFA_OFFSET -8 |
517 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi | 685 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi |
518 | jmp int_restore_rest | 686 | jmp int_restore_rest |
519 | 687 | ||
520 | int_signal: | 688 | int_signal: |
521 | testl $_TIF_DO_NOTIFY_MASK,%edx | 689 | testl $_TIF_DO_NOTIFY_MASK,%edx |
522 | jz 1f | 690 | jz 1f |
@@ -531,22 +699,24 @@ int_restore_rest: | |||
531 | jmp int_with_check | 699 | jmp int_with_check |
532 | CFI_ENDPROC | 700 | CFI_ENDPROC |
533 | END(system_call) | 701 | END(system_call) |
534 | 702 | ||
535 | /* | 703 | /* |
536 | * Certain special system calls that need to save a complete full stack frame. | 704 | * Certain special system calls that need to save a complete full stack frame. |
537 | */ | 705 | */ |
538 | |||
539 | .macro PTREGSCALL label,func,arg | 706 | .macro PTREGSCALL label,func,arg |
540 | .globl \label | 707 | ENTRY(\label) |
541 | \label: | 708 | PARTIAL_FRAME 1 8 /* offset 8: return address */ |
542 | leaq \func(%rip),%rax | 709 | subq $REST_SKIP, %rsp |
543 | leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ | 710 | CFI_ADJUST_CFA_OFFSET REST_SKIP |
544 | jmp ptregscall_common | 711 | call save_rest |
712 | DEFAULT_FRAME 0 8 /* offset 8: return address */ | ||
713 | leaq 8(%rsp), \arg /* pt_regs pointer */ | ||
714 | call \func | ||
715 | jmp ptregscall_common | ||
716 | CFI_ENDPROC | ||
545 | END(\label) | 717 | END(\label) |
546 | .endm | 718 | .endm |
547 | 719 | ||
548 | CFI_STARTPROC | ||
549 | |||
550 | PTREGSCALL stub_clone, sys_clone, %r8 | 720 | PTREGSCALL stub_clone, sys_clone, %r8 |
551 | PTREGSCALL stub_fork, sys_fork, %rdi | 721 | PTREGSCALL stub_fork, sys_fork, %rdi |
552 | PTREGSCALL stub_vfork, sys_vfork, %rdi | 722 | PTREGSCALL stub_vfork, sys_vfork, %rdi |
@@ -554,25 +724,18 @@ END(\label) | |||
554 | PTREGSCALL stub_iopl, sys_iopl, %rsi | 724 | PTREGSCALL stub_iopl, sys_iopl, %rsi |
555 | 725 | ||
556 | ENTRY(ptregscall_common) | 726 | ENTRY(ptregscall_common) |
557 | popq %r11 | 727 | DEFAULT_FRAME 1 8 /* offset 8: return address */ |
558 | CFI_ADJUST_CFA_OFFSET -8 | 728 | RESTORE_TOP_OF_STACK %r11, 8 |
559 | CFI_REGISTER rip, r11 | 729 | movq_cfi_restore R15+8, r15 |
560 | SAVE_REST | 730 | movq_cfi_restore R14+8, r14 |
561 | movq %r11, %r15 | 731 | movq_cfi_restore R13+8, r13 |
562 | CFI_REGISTER rip, r15 | 732 | movq_cfi_restore R12+8, r12 |
563 | FIXUP_TOP_OF_STACK %r11 | 733 | movq_cfi_restore RBP+8, rbp |
564 | call *%rax | 734 | movq_cfi_restore RBX+8, rbx |
565 | RESTORE_TOP_OF_STACK %r11 | 735 | ret $REST_SKIP /* pop extended registers */ |
566 | movq %r15, %r11 | ||
567 | CFI_REGISTER rip, r11 | ||
568 | RESTORE_REST | ||
569 | pushq %r11 | ||
570 | CFI_ADJUST_CFA_OFFSET 8 | ||
571 | CFI_REL_OFFSET rip, 0 | ||
572 | ret | ||
573 | CFI_ENDPROC | 736 | CFI_ENDPROC |
574 | END(ptregscall_common) | 737 | END(ptregscall_common) |
575 | 738 | ||
576 | ENTRY(stub_execve) | 739 | ENTRY(stub_execve) |
577 | CFI_STARTPROC | 740 | CFI_STARTPROC |
578 | popq %r11 | 741 | popq %r11 |
@@ -588,11 +751,11 @@ ENTRY(stub_execve) | |||
588 | jmp int_ret_from_sys_call | 751 | jmp int_ret_from_sys_call |
589 | CFI_ENDPROC | 752 | CFI_ENDPROC |
590 | END(stub_execve) | 753 | END(stub_execve) |
591 | 754 | ||
592 | /* | 755 | /* |
593 | * sigreturn is special because it needs to restore all registers on return. | 756 | * sigreturn is special because it needs to restore all registers on return. |
594 | * This cannot be done with SYSRET, so use the IRET return path instead. | 757 | * This cannot be done with SYSRET, so use the IRET return path instead. |
595 | */ | 758 | */ |
596 | ENTRY(stub_rt_sigreturn) | 759 | ENTRY(stub_rt_sigreturn) |
597 | CFI_STARTPROC | 760 | CFI_STARTPROC |
598 | addq $8, %rsp | 761 | addq $8, %rsp |
@@ -608,76 +771,76 @@ ENTRY(stub_rt_sigreturn) | |||
608 | END(stub_rt_sigreturn) | 771 | END(stub_rt_sigreturn) |
609 | 772 | ||
610 | /* | 773 | /* |
611 | * initial frame state for interrupts and exceptions | 774 | * Build the entry stubs and pointer table with some assembler magic. |
775 | * We pack 7 stubs into a single 32-byte chunk, which will fit in a | ||
776 | * single cache line on all modern x86 implementations. | ||
612 | */ | 777 | */ |
613 | .macro _frame ref | 778 | .section .init.rodata,"a" |
614 | CFI_STARTPROC simple | 779 | ENTRY(interrupt) |
615 | CFI_SIGNAL_FRAME | 780 | .text |
616 | CFI_DEF_CFA rsp,SS+8-\ref | 781 | .p2align 5 |
617 | /*CFI_REL_OFFSET ss,SS-\ref*/ | 782 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
618 | CFI_REL_OFFSET rsp,RSP-\ref | 783 | ENTRY(irq_entries_start) |
619 | /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ | 784 | INTR_FRAME |
620 | /*CFI_REL_OFFSET cs,CS-\ref*/ | 785 | vector=FIRST_EXTERNAL_VECTOR |
621 | CFI_REL_OFFSET rip,RIP-\ref | 786 | .rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 |
622 | .endm | 787 | .balign 32 |
788 | .rept 7 | ||
789 | .if vector < NR_VECTORS | ||
790 | .if vector <> FIRST_EXTERNAL_VECTOR | ||
791 | CFI_ADJUST_CFA_OFFSET -8 | ||
792 | .endif | ||
793 | 1: pushq $(~vector+0x80) /* Note: always in signed byte range */ | ||
794 | CFI_ADJUST_CFA_OFFSET 8 | ||
795 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 | ||
796 | jmp 2f | ||
797 | .endif | ||
798 | .previous | ||
799 | .quad 1b | ||
800 | .text | ||
801 | vector=vector+1 | ||
802 | .endif | ||
803 | .endr | ||
804 | 2: jmp common_interrupt | ||
805 | .endr | ||
806 | CFI_ENDPROC | ||
807 | END(irq_entries_start) | ||
623 | 808 | ||
624 | /* initial frame state for interrupts (and exceptions without error code) */ | 809 | .previous |
625 | #define INTR_FRAME _frame RIP | 810 | END(interrupt) |
626 | /* initial frame state for exceptions with error code (and interrupts with | 811 | .previous |
627 | vector already pushed) */ | ||
628 | #define XCPT_FRAME _frame ORIG_RAX | ||
629 | 812 | ||
630 | /* | 813 | /* |
631 | * Interrupt entry/exit. | 814 | * Interrupt entry/exit. |
632 | * | 815 | * |
633 | * Interrupt entry points save only callee clobbered registers in fast path. | 816 | * Interrupt entry points save only callee clobbered registers in fast path. |
634 | * | 817 | * |
635 | * Entry runs with interrupts off. | 818 | * Entry runs with interrupts off. |
636 | */ | 819 | */ |
637 | 820 | ||
638 | /* 0(%rsp): interrupt number */ | 821 | /* 0(%rsp): ~(interrupt number) */ |
639 | .macro interrupt func | 822 | .macro interrupt func |
640 | cld | 823 | subq $10*8, %rsp |
641 | SAVE_ARGS | 824 | CFI_ADJUST_CFA_OFFSET 10*8 |
642 | leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler | 825 | call save_args |
643 | pushq %rbp | 826 | PARTIAL_FRAME 0 |
644 | /* | ||
645 | * Save rbp twice: One is for marking the stack frame, as usual, and the | ||
646 | * other, to fill pt_regs properly. This is because bx comes right | ||
647 | * before the last saved register in that structure, and not bp. If the | ||
648 | * base pointer were in the place bx is today, this would not be needed. | ||
649 | */ | ||
650 | movq %rbp, -8(%rsp) | ||
651 | CFI_ADJUST_CFA_OFFSET 8 | ||
652 | CFI_REL_OFFSET rbp, 0 | ||
653 | movq %rsp,%rbp | ||
654 | CFI_DEF_CFA_REGISTER rbp | ||
655 | testl $3,CS(%rdi) | ||
656 | je 1f | ||
657 | SWAPGS | ||
658 | /* irqcount is used to check if a CPU is already on an interrupt | ||
659 | stack or not. While this is essentially redundant with preempt_count | ||
660 | it is a little cheaper to use a separate counter in the PDA | ||
661 | (short of moving irq_enter into assembly, which would be too | ||
662 | much work) */ | ||
663 | 1: incl %gs:pda_irqcount | ||
664 | cmoveq %gs:pda_irqstackptr,%rsp | ||
665 | push %rbp # backlink for old unwinder | ||
666 | /* | ||
667 | * We entered an interrupt context - irqs are off: | ||
668 | */ | ||
669 | TRACE_IRQS_OFF | ||
670 | call \func | 827 | call \func |
671 | .endm | 828 | .endm |
672 | 829 | ||
673 | ENTRY(common_interrupt) | 830 | /* |
831 | * The interrupt stubs push (~vector+0x80) onto the stack and | ||
832 | * then jump to common_interrupt. | ||
833 | */ | ||
834 | .p2align CONFIG_X86_L1_CACHE_SHIFT | ||
835 | common_interrupt: | ||
674 | XCPT_FRAME | 836 | XCPT_FRAME |
837 | addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ | ||
675 | interrupt do_IRQ | 838 | interrupt do_IRQ |
676 | /* 0(%rsp): oldrsp-ARGOFFSET */ | 839 | /* 0(%rsp): old_rsp-ARGOFFSET */ |
677 | ret_from_intr: | 840 | ret_from_intr: |
678 | DISABLE_INTERRUPTS(CLBR_NONE) | 841 | DISABLE_INTERRUPTS(CLBR_NONE) |
679 | TRACE_IRQS_OFF | 842 | TRACE_IRQS_OFF |
680 | decl %gs:pda_irqcount | 843 | decl PER_CPU_VAR(irq_count) |
681 | leaveq | 844 | leaveq |
682 | CFI_DEF_CFA_REGISTER rsp | 845 | CFI_DEF_CFA_REGISTER rsp |
683 | CFI_ADJUST_CFA_OFFSET -8 | 846 | CFI_ADJUST_CFA_OFFSET -8 |
@@ -685,12 +848,12 @@ exit_intr: | |||
685 | GET_THREAD_INFO(%rcx) | 848 | GET_THREAD_INFO(%rcx) |
686 | testl $3,CS-ARGOFFSET(%rsp) | 849 | testl $3,CS-ARGOFFSET(%rsp) |
687 | je retint_kernel | 850 | je retint_kernel |
688 | 851 | ||
689 | /* Interrupt came from user space */ | 852 | /* Interrupt came from user space */ |
690 | /* | 853 | /* |
691 | * Has a correct top of stack, but a partial stack frame | 854 | * Has a correct top of stack, but a partial stack frame |
692 | * %rcx: thread info. Interrupts off. | 855 | * %rcx: thread info. Interrupts off. |
693 | */ | 856 | */ |
694 | retint_with_reschedule: | 857 | retint_with_reschedule: |
695 | movl $_TIF_WORK_MASK,%edi | 858 | movl $_TIF_WORK_MASK,%edi |
696 | retint_check: | 859 | retint_check: |
@@ -763,20 +926,20 @@ retint_careful: | |||
763 | pushq %rdi | 926 | pushq %rdi |
764 | CFI_ADJUST_CFA_OFFSET 8 | 927 | CFI_ADJUST_CFA_OFFSET 8 |
765 | call schedule | 928 | call schedule |
766 | popq %rdi | 929 | popq %rdi |
767 | CFI_ADJUST_CFA_OFFSET -8 | 930 | CFI_ADJUST_CFA_OFFSET -8 |
768 | GET_THREAD_INFO(%rcx) | 931 | GET_THREAD_INFO(%rcx) |
769 | DISABLE_INTERRUPTS(CLBR_NONE) | 932 | DISABLE_INTERRUPTS(CLBR_NONE) |
770 | TRACE_IRQS_OFF | 933 | TRACE_IRQS_OFF |
771 | jmp retint_check | 934 | jmp retint_check |
772 | 935 | ||
773 | retint_signal: | 936 | retint_signal: |
774 | testl $_TIF_DO_NOTIFY_MASK,%edx | 937 | testl $_TIF_DO_NOTIFY_MASK,%edx |
775 | jz retint_swapgs | 938 | jz retint_swapgs |
776 | TRACE_IRQS_ON | 939 | TRACE_IRQS_ON |
777 | ENABLE_INTERRUPTS(CLBR_NONE) | 940 | ENABLE_INTERRUPTS(CLBR_NONE) |
778 | SAVE_REST | 941 | SAVE_REST |
779 | movq $-1,ORIG_RAX(%rsp) | 942 | movq $-1,ORIG_RAX(%rsp) |
780 | xorl %esi,%esi # oldset | 943 | xorl %esi,%esi # oldset |
781 | movq %rsp,%rdi # &pt_regs | 944 | movq %rsp,%rdi # &pt_regs |
782 | call do_notify_resume | 945 | call do_notify_resume |
@@ -798,324 +961,213 @@ ENTRY(retint_kernel) | |||
798 | jnc retint_restore_args | 961 | jnc retint_restore_args |
799 | call preempt_schedule_irq | 962 | call preempt_schedule_irq |
800 | jmp exit_intr | 963 | jmp exit_intr |
801 | #endif | 964 | #endif |
802 | 965 | ||
803 | CFI_ENDPROC | 966 | CFI_ENDPROC |
804 | END(common_interrupt) | 967 | END(common_interrupt) |
805 | 968 | ||
806 | /* | 969 | /* |
807 | * APIC interrupts. | 970 | * APIC interrupts. |
808 | */ | 971 | */ |
809 | .macro apicinterrupt num,func | 972 | .macro apicinterrupt num sym do_sym |
973 | ENTRY(\sym) | ||
810 | INTR_FRAME | 974 | INTR_FRAME |
811 | pushq $~(\num) | 975 | pushq $~(\num) |
812 | CFI_ADJUST_CFA_OFFSET 8 | 976 | CFI_ADJUST_CFA_OFFSET 8 |
813 | interrupt \func | 977 | interrupt \do_sym |
814 | jmp ret_from_intr | 978 | jmp ret_from_intr |
815 | CFI_ENDPROC | 979 | CFI_ENDPROC |
816 | .endm | 980 | END(\sym) |
817 | 981 | .endm | |
818 | ENTRY(thermal_interrupt) | ||
819 | apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt | ||
820 | END(thermal_interrupt) | ||
821 | |||
822 | ENTRY(threshold_interrupt) | ||
823 | apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt | ||
824 | END(threshold_interrupt) | ||
825 | |||
826 | #ifdef CONFIG_SMP | ||
827 | ENTRY(reschedule_interrupt) | ||
828 | apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt | ||
829 | END(reschedule_interrupt) | ||
830 | |||
831 | .macro INVALIDATE_ENTRY num | ||
832 | ENTRY(invalidate_interrupt\num) | ||
833 | apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt | ||
834 | END(invalidate_interrupt\num) | ||
835 | .endm | ||
836 | 982 | ||
837 | INVALIDATE_ENTRY 0 | 983 | #ifdef CONFIG_SMP |
838 | INVALIDATE_ENTRY 1 | 984 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ |
839 | INVALIDATE_ENTRY 2 | 985 | irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt |
840 | INVALIDATE_ENTRY 3 | ||
841 | INVALIDATE_ENTRY 4 | ||
842 | INVALIDATE_ENTRY 5 | ||
843 | INVALIDATE_ENTRY 6 | ||
844 | INVALIDATE_ENTRY 7 | ||
845 | |||
846 | ENTRY(call_function_interrupt) | ||
847 | apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt | ||
848 | END(call_function_interrupt) | ||
849 | ENTRY(call_function_single_interrupt) | ||
850 | apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt | ||
851 | END(call_function_single_interrupt) | ||
852 | ENTRY(irq_move_cleanup_interrupt) | ||
853 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt | ||
854 | END(irq_move_cleanup_interrupt) | ||
855 | #endif | 986 | #endif |
856 | 987 | ||
857 | ENTRY(apic_timer_interrupt) | 988 | #ifdef CONFIG_X86_UV |
858 | apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt | 989 | apicinterrupt UV_BAU_MESSAGE \ |
859 | END(apic_timer_interrupt) | 990 | uv_bau_message_intr1 uv_bau_message_interrupt |
991 | #endif | ||
992 | apicinterrupt LOCAL_TIMER_VECTOR \ | ||
993 | apic_timer_interrupt smp_apic_timer_interrupt | ||
994 | |||
995 | #ifdef CONFIG_SMP | ||
996 | apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ | ||
997 | invalidate_interrupt0 smp_invalidate_interrupt | ||
998 | apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \ | ||
999 | invalidate_interrupt1 smp_invalidate_interrupt | ||
1000 | apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \ | ||
1001 | invalidate_interrupt2 smp_invalidate_interrupt | ||
1002 | apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \ | ||
1003 | invalidate_interrupt3 smp_invalidate_interrupt | ||
1004 | apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \ | ||
1005 | invalidate_interrupt4 smp_invalidate_interrupt | ||
1006 | apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \ | ||
1007 | invalidate_interrupt5 smp_invalidate_interrupt | ||
1008 | apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \ | ||
1009 | invalidate_interrupt6 smp_invalidate_interrupt | ||
1010 | apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \ | ||
1011 | invalidate_interrupt7 smp_invalidate_interrupt | ||
1012 | #endif | ||
860 | 1013 | ||
861 | ENTRY(uv_bau_message_intr1) | 1014 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
862 | apicinterrupt 220,uv_bau_message_interrupt | 1015 | threshold_interrupt mce_threshold_interrupt |
863 | END(uv_bau_message_intr1) | 1016 | apicinterrupt THERMAL_APIC_VECTOR \ |
1017 | thermal_interrupt smp_thermal_interrupt | ||
1018 | |||
1019 | #ifdef CONFIG_SMP | ||
1020 | apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ | ||
1021 | call_function_single_interrupt smp_call_function_single_interrupt | ||
1022 | apicinterrupt CALL_FUNCTION_VECTOR \ | ||
1023 | call_function_interrupt smp_call_function_interrupt | ||
1024 | apicinterrupt RESCHEDULE_VECTOR \ | ||
1025 | reschedule_interrupt smp_reschedule_interrupt | ||
1026 | #endif | ||
864 | 1027 | ||
865 | ENTRY(error_interrupt) | 1028 | apicinterrupt ERROR_APIC_VECTOR \ |
866 | apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt | 1029 | error_interrupt smp_error_interrupt |
867 | END(error_interrupt) | 1030 | apicinterrupt SPURIOUS_APIC_VECTOR \ |
1031 | spurious_interrupt smp_spurious_interrupt | ||
868 | 1032 | ||
869 | ENTRY(spurious_interrupt) | ||
870 | apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt | ||
871 | END(spurious_interrupt) | ||
872 | |||
873 | /* | 1033 | /* |
874 | * Exception entry points. | 1034 | * Exception entry points. |
875 | */ | 1035 | */ |
876 | .macro zeroentry sym | 1036 | .macro zeroentry sym do_sym |
1037 | ENTRY(\sym) | ||
877 | INTR_FRAME | 1038 | INTR_FRAME |
878 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1039 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
879 | pushq $0 /* push error code/oldrax */ | 1040 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
880 | CFI_ADJUST_CFA_OFFSET 8 | 1041 | subq $15*8,%rsp |
881 | pushq %rax /* push real oldrax to the rdi slot */ | 1042 | CFI_ADJUST_CFA_OFFSET 15*8 |
882 | CFI_ADJUST_CFA_OFFSET 8 | 1043 | call error_entry |
883 | CFI_REL_OFFSET rax,0 | 1044 | DEFAULT_FRAME 0 |
884 | leaq \sym(%rip),%rax | 1045 | movq %rsp,%rdi /* pt_regs pointer */ |
885 | jmp error_entry | 1046 | xorl %esi,%esi /* no error code */ |
1047 | call \do_sym | ||
1048 | jmp error_exit /* %ebx: no swapgs flag */ | ||
886 | CFI_ENDPROC | 1049 | CFI_ENDPROC |
887 | .endm | 1050 | END(\sym) |
1051 | .endm | ||
888 | 1052 | ||
889 | .macro errorentry sym | 1053 | .macro paranoidzeroentry sym do_sym |
890 | XCPT_FRAME | 1054 | ENTRY(\sym) |
1055 | INTR_FRAME | ||
891 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1056 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
892 | pushq %rax | 1057 | pushq $-1 /* ORIG_RAX: no syscall to restart */ |
893 | CFI_ADJUST_CFA_OFFSET 8 | 1058 | CFI_ADJUST_CFA_OFFSET 8 |
894 | CFI_REL_OFFSET rax,0 | 1059 | subq $15*8, %rsp |
895 | leaq \sym(%rip),%rax | 1060 | call save_paranoid |
896 | jmp error_entry | 1061 | TRACE_IRQS_OFF |
1062 | movq %rsp,%rdi /* pt_regs pointer */ | ||
1063 | xorl %esi,%esi /* no error code */ | ||
1064 | call \do_sym | ||
1065 | jmp paranoid_exit /* %ebx: no swapgs flag */ | ||
897 | CFI_ENDPROC | 1066 | CFI_ENDPROC |
898 | .endm | 1067 | END(\sym) |
1068 | .endm | ||
899 | 1069 | ||
900 | /* error code is on the stack already */ | 1070 | .macro paranoidzeroentry_ist sym do_sym ist |
901 | /* handle NMI like exceptions that can happen everywhere */ | 1071 | ENTRY(\sym) |
902 | .macro paranoidentry sym, ist=0, irqtrace=1 | 1072 | INTR_FRAME |
903 | SAVE_ALL | 1073 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
904 | cld | 1074 | pushq $-1 /* ORIG_RAX: no syscall to restart */ |
905 | movl $1,%ebx | 1075 | CFI_ADJUST_CFA_OFFSET 8 |
906 | movl $MSR_GS_BASE,%ecx | 1076 | subq $15*8, %rsp |
907 | rdmsr | 1077 | call save_paranoid |
908 | testl %edx,%edx | ||
909 | js 1f | ||
910 | SWAPGS | ||
911 | xorl %ebx,%ebx | ||
912 | 1: | ||
913 | .if \ist | ||
914 | movq %gs:pda_data_offset, %rbp | ||
915 | .endif | ||
916 | .if \irqtrace | ||
917 | TRACE_IRQS_OFF | ||
918 | .endif | ||
919 | movq %rsp,%rdi | ||
920 | movq ORIG_RAX(%rsp),%rsi | ||
921 | movq $-1,ORIG_RAX(%rsp) | ||
922 | .if \ist | ||
923 | subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | ||
924 | .endif | ||
925 | call \sym | ||
926 | .if \ist | ||
927 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | ||
928 | .endif | ||
929 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
930 | .if \irqtrace | ||
931 | TRACE_IRQS_OFF | 1078 | TRACE_IRQS_OFF |
932 | .endif | 1079 | movq %rsp,%rdi /* pt_regs pointer */ |
933 | .endm | 1080 | xorl %esi,%esi /* no error code */ |
1081 | PER_CPU(init_tss, %rbp) | ||
1082 | subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) | ||
1083 | call \do_sym | ||
1084 | addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) | ||
1085 | jmp paranoid_exit /* %ebx: no swapgs flag */ | ||
1086 | CFI_ENDPROC | ||
1087 | END(\sym) | ||
1088 | .endm | ||
934 | 1089 | ||
935 | /* | 1090 | .macro errorentry sym do_sym |
936 | * "Paranoid" exit path from exception stack. | 1091 | ENTRY(\sym) |
937 | * Paranoid because this is used by NMIs and cannot take | 1092 | XCPT_FRAME |
938 | * any kernel state for granted. | 1093 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
939 | * We don't do kernel preemption checks here, because only | 1094 | subq $15*8,%rsp |
940 | * NMI should be common and it does not enable IRQs and | 1095 | CFI_ADJUST_CFA_OFFSET 15*8 |
941 | * cannot get reschedule ticks. | 1096 | call error_entry |
942 | * | 1097 | DEFAULT_FRAME 0 |
943 | * "trace" is 0 for the NMI handler only, because irq-tracing | 1098 | movq %rsp,%rdi /* pt_regs pointer */ |
944 | * is fundamentally NMI-unsafe. (we cannot change the soft and | 1099 | movq ORIG_RAX(%rsp),%rsi /* get error code */ |
945 | * hard flags at once, atomically) | 1100 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ |
946 | */ | 1101 | call \do_sym |
947 | .macro paranoidexit trace=1 | 1102 | jmp error_exit /* %ebx: no swapgs flag */ |
948 | /* ebx: no swapgs flag */ | ||
949 | paranoid_exit\trace: | ||
950 | testl %ebx,%ebx /* swapgs needed? */ | ||
951 | jnz paranoid_restore\trace | ||
952 | testl $3,CS(%rsp) | ||
953 | jnz paranoid_userspace\trace | ||
954 | paranoid_swapgs\trace: | ||
955 | .if \trace | ||
956 | TRACE_IRQS_IRETQ 0 | ||
957 | .endif | ||
958 | SWAPGS_UNSAFE_STACK | ||
959 | paranoid_restore\trace: | ||
960 | RESTORE_ALL 8 | ||
961 | jmp irq_return | ||
962 | paranoid_userspace\trace: | ||
963 | GET_THREAD_INFO(%rcx) | ||
964 | movl TI_flags(%rcx),%ebx | ||
965 | andl $_TIF_WORK_MASK,%ebx | ||
966 | jz paranoid_swapgs\trace | ||
967 | movq %rsp,%rdi /* &pt_regs */ | ||
968 | call sync_regs | ||
969 | movq %rax,%rsp /* switch stack for scheduling */ | ||
970 | testl $_TIF_NEED_RESCHED,%ebx | ||
971 | jnz paranoid_schedule\trace | ||
972 | movl %ebx,%edx /* arg3: thread flags */ | ||
973 | .if \trace | ||
974 | TRACE_IRQS_ON | ||
975 | .endif | ||
976 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
977 | xorl %esi,%esi /* arg2: oldset */ | ||
978 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
979 | call do_notify_resume | ||
980 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
981 | .if \trace | ||
982 | TRACE_IRQS_OFF | ||
983 | .endif | ||
984 | jmp paranoid_userspace\trace | ||
985 | paranoid_schedule\trace: | ||
986 | .if \trace | ||
987 | TRACE_IRQS_ON | ||
988 | .endif | ||
989 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
990 | call schedule | ||
991 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
992 | .if \trace | ||
993 | TRACE_IRQS_OFF | ||
994 | .endif | ||
995 | jmp paranoid_userspace\trace | ||
996 | CFI_ENDPROC | 1103 | CFI_ENDPROC |
997 | .endm | 1104 | END(\sym) |
1105 | .endm | ||
998 | 1106 | ||
999 | /* | 1107 | /* error code is on the stack already */ |
1000 | * Exception entry point. This expects an error code/orig_rax on the stack | 1108 | .macro paranoiderrorentry sym do_sym |
1001 | * and the exception handler in %rax. | 1109 | ENTRY(\sym) |
1002 | */ | 1110 | XCPT_FRAME |
1003 | KPROBE_ENTRY(error_entry) | 1111 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1004 | _frame RDI | 1112 | subq $15*8,%rsp |
1005 | CFI_REL_OFFSET rax,0 | 1113 | CFI_ADJUST_CFA_OFFSET 15*8 |
1006 | /* rdi slot contains rax, oldrax contains error code */ | 1114 | call save_paranoid |
1007 | cld | 1115 | DEFAULT_FRAME 0 |
1008 | subq $14*8,%rsp | ||
1009 | CFI_ADJUST_CFA_OFFSET (14*8) | ||
1010 | movq %rsi,13*8(%rsp) | ||
1011 | CFI_REL_OFFSET rsi,RSI | ||
1012 | movq 14*8(%rsp),%rsi /* load rax from rdi slot */ | ||
1013 | CFI_REGISTER rax,rsi | ||
1014 | movq %rdx,12*8(%rsp) | ||
1015 | CFI_REL_OFFSET rdx,RDX | ||
1016 | movq %rcx,11*8(%rsp) | ||
1017 | CFI_REL_OFFSET rcx,RCX | ||
1018 | movq %rsi,10*8(%rsp) /* store rax */ | ||
1019 | CFI_REL_OFFSET rax,RAX | ||
1020 | movq %r8, 9*8(%rsp) | ||
1021 | CFI_REL_OFFSET r8,R8 | ||
1022 | movq %r9, 8*8(%rsp) | ||
1023 | CFI_REL_OFFSET r9,R9 | ||
1024 | movq %r10,7*8(%rsp) | ||
1025 | CFI_REL_OFFSET r10,R10 | ||
1026 | movq %r11,6*8(%rsp) | ||
1027 | CFI_REL_OFFSET r11,R11 | ||
1028 | movq %rbx,5*8(%rsp) | ||
1029 | CFI_REL_OFFSET rbx,RBX | ||
1030 | movq %rbp,4*8(%rsp) | ||
1031 | CFI_REL_OFFSET rbp,RBP | ||
1032 | movq %r12,3*8(%rsp) | ||
1033 | CFI_REL_OFFSET r12,R12 | ||
1034 | movq %r13,2*8(%rsp) | ||
1035 | CFI_REL_OFFSET r13,R13 | ||
1036 | movq %r14,1*8(%rsp) | ||
1037 | CFI_REL_OFFSET r14,R14 | ||
1038 | movq %r15,(%rsp) | ||
1039 | CFI_REL_OFFSET r15,R15 | ||
1040 | xorl %ebx,%ebx | ||
1041 | testl $3,CS(%rsp) | ||
1042 | je error_kernelspace | ||
1043 | error_swapgs: | ||
1044 | SWAPGS | ||
1045 | error_sti: | ||
1046 | TRACE_IRQS_OFF | ||
1047 | movq %rdi,RDI(%rsp) | ||
1048 | CFI_REL_OFFSET rdi,RDI | ||
1049 | movq %rsp,%rdi | ||
1050 | movq ORIG_RAX(%rsp),%rsi /* get error code */ | ||
1051 | movq $-1,ORIG_RAX(%rsp) | ||
1052 | call *%rax | ||
1053 | /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ | ||
1054 | error_exit: | ||
1055 | movl %ebx,%eax | ||
1056 | RESTORE_REST | ||
1057 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1058 | TRACE_IRQS_OFF | 1116 | TRACE_IRQS_OFF |
1059 | GET_THREAD_INFO(%rcx) | 1117 | movq %rsp,%rdi /* pt_regs pointer */ |
1060 | testl %eax,%eax | 1118 | movq ORIG_RAX(%rsp),%rsi /* get error code */ |
1061 | jne retint_kernel | 1119 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ |
1062 | LOCKDEP_SYS_EXIT_IRQ | 1120 | call \do_sym |
1063 | movl TI_flags(%rcx),%edx | 1121 | jmp paranoid_exit /* %ebx: no swapgs flag */ |
1064 | movl $_TIF_WORK_MASK,%edi | ||
1065 | andl %edi,%edx | ||
1066 | jnz retint_careful | ||
1067 | jmp retint_swapgs | ||
1068 | CFI_ENDPROC | 1122 | CFI_ENDPROC |
1123 | END(\sym) | ||
1124 | .endm | ||
1069 | 1125 | ||
1070 | error_kernelspace: | 1126 | zeroentry divide_error do_divide_error |
1071 | incl %ebx | 1127 | zeroentry overflow do_overflow |
1072 | /* There are two places in the kernel that can potentially fault with | 1128 | zeroentry bounds do_bounds |
1073 | usergs. Handle them here. The exception handlers after | 1129 | zeroentry invalid_op do_invalid_op |
1074 | iret run with kernel gs again, so don't set the user space flag. | 1130 | zeroentry device_not_available do_device_not_available |
1075 | B stepping K8s sometimes report an truncated RIP for IRET | 1131 | paranoiderrorentry double_fault do_double_fault |
1076 | exceptions returning to compat mode. Check for these here too. */ | 1132 | zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun |
1077 | leaq irq_return(%rip),%rcx | 1133 | errorentry invalid_TSS do_invalid_TSS |
1078 | cmpq %rcx,RIP(%rsp) | 1134 | errorentry segment_not_present do_segment_not_present |
1079 | je error_swapgs | 1135 | zeroentry spurious_interrupt_bug do_spurious_interrupt_bug |
1080 | movl %ecx,%ecx /* zero extend */ | 1136 | zeroentry coprocessor_error do_coprocessor_error |
1081 | cmpq %rcx,RIP(%rsp) | 1137 | errorentry alignment_check do_alignment_check |
1082 | je error_swapgs | 1138 | zeroentry simd_coprocessor_error do_simd_coprocessor_error |
1083 | cmpq $gs_change,RIP(%rsp) | 1139 | |
1084 | je error_swapgs | 1140 | /* Reload gs selector with exception handling */ |
1085 | jmp error_sti | 1141 | /* edi: new selector */ |
1086 | KPROBE_END(error_entry) | ||
1087 | |||
1088 | /* Reload gs selector with exception handling */ | ||
1089 | /* edi: new selector */ | ||
1090 | ENTRY(native_load_gs_index) | 1142 | ENTRY(native_load_gs_index) |
1091 | CFI_STARTPROC | 1143 | CFI_STARTPROC |
1092 | pushf | 1144 | pushf |
1093 | CFI_ADJUST_CFA_OFFSET 8 | 1145 | CFI_ADJUST_CFA_OFFSET 8 |
1094 | DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) | 1146 | DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) |
1095 | SWAPGS | 1147 | SWAPGS |
1096 | gs_change: | 1148 | gs_change: |
1097 | movl %edi,%gs | 1149 | movl %edi,%gs |
1098 | 2: mfence /* workaround */ | 1150 | 2: mfence /* workaround */ |
1099 | SWAPGS | 1151 | SWAPGS |
1100 | popf | 1152 | popf |
1101 | CFI_ADJUST_CFA_OFFSET -8 | 1153 | CFI_ADJUST_CFA_OFFSET -8 |
1102 | ret | 1154 | ret |
1103 | CFI_ENDPROC | 1155 | CFI_ENDPROC |
1104 | ENDPROC(native_load_gs_index) | 1156 | END(native_load_gs_index) |
1105 | 1157 | ||
1106 | .section __ex_table,"a" | 1158 | .section __ex_table,"a" |
1107 | .align 8 | 1159 | .align 8 |
1108 | .quad gs_change,bad_gs | 1160 | .quad gs_change,bad_gs |
1109 | .previous | 1161 | .previous |
1110 | .section .fixup,"ax" | 1162 | .section .fixup,"ax" |
1111 | /* running with kernelgs */ | 1163 | /* running with kernelgs */ |
1112 | bad_gs: | 1164 | bad_gs: |
1113 | SWAPGS /* switch back to user gs */ | 1165 | SWAPGS /* switch back to user gs */ |
1114 | xorl %eax,%eax | 1166 | xorl %eax,%eax |
1115 | movl %eax,%gs | 1167 | movl %eax,%gs |
1116 | jmp 2b | 1168 | jmp 2b |
1117 | .previous | 1169 | .previous |
1118 | 1170 | ||
1119 | /* | 1171 | /* |
1120 | * Create a kernel thread. | 1172 | * Create a kernel thread. |
1121 | * | 1173 | * |
@@ -1138,7 +1190,7 @@ ENTRY(kernel_thread) | |||
1138 | 1190 | ||
1139 | xorl %r8d,%r8d | 1191 | xorl %r8d,%r8d |
1140 | xorl %r9d,%r9d | 1192 | xorl %r9d,%r9d |
1141 | 1193 | ||
1142 | # clone now | 1194 | # clone now |
1143 | call do_fork | 1195 | call do_fork |
1144 | movq %rax,RAX(%rsp) | 1196 | movq %rax,RAX(%rsp) |
@@ -1149,15 +1201,15 @@ ENTRY(kernel_thread) | |||
1149 | * so internally to the x86_64 port you can rely on kernel_thread() | 1201 | * so internally to the x86_64 port you can rely on kernel_thread() |
1150 | * not to reschedule the child before returning, this avoids the need | 1202 | * not to reschedule the child before returning, this avoids the need |
1151 | * of hacks for example to fork off the per-CPU idle tasks. | 1203 | * of hacks for example to fork off the per-CPU idle tasks. |
1152 | * [Hopefully no generic code relies on the reschedule -AK] | 1204 | * [Hopefully no generic code relies on the reschedule -AK] |
1153 | */ | 1205 | */ |
1154 | RESTORE_ALL | 1206 | RESTORE_ALL |
1155 | UNFAKE_STACK_FRAME | 1207 | UNFAKE_STACK_FRAME |
1156 | ret | 1208 | ret |
1157 | CFI_ENDPROC | 1209 | CFI_ENDPROC |
1158 | ENDPROC(kernel_thread) | 1210 | END(kernel_thread) |
1159 | 1211 | ||
1160 | child_rip: | 1212 | ENTRY(child_rip) |
1161 | pushq $0 # fake return address | 1213 | pushq $0 # fake return address |
1162 | CFI_STARTPROC | 1214 | CFI_STARTPROC |
1163 | /* | 1215 | /* |
@@ -1170,8 +1222,9 @@ child_rip: | |||
1170 | # exit | 1222 | # exit |
1171 | mov %eax, %edi | 1223 | mov %eax, %edi |
1172 | call do_exit | 1224 | call do_exit |
1225 | ud2 # padding for call trace | ||
1173 | CFI_ENDPROC | 1226 | CFI_ENDPROC |
1174 | ENDPROC(child_rip) | 1227 | END(child_rip) |
1175 | 1228 | ||
1176 | /* | 1229 | /* |
1177 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. | 1230 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. |
@@ -1191,10 +1244,10 @@ ENDPROC(child_rip) | |||
1191 | ENTRY(kernel_execve) | 1244 | ENTRY(kernel_execve) |
1192 | CFI_STARTPROC | 1245 | CFI_STARTPROC |
1193 | FAKE_STACK_FRAME $0 | 1246 | FAKE_STACK_FRAME $0 |
1194 | SAVE_ALL | 1247 | SAVE_ALL |
1195 | movq %rsp,%rcx | 1248 | movq %rsp,%rcx |
1196 | call sys_execve | 1249 | call sys_execve |
1197 | movq %rax, RAX(%rsp) | 1250 | movq %rax, RAX(%rsp) |
1198 | RESTORE_REST | 1251 | RESTORE_REST |
1199 | testq %rax,%rax | 1252 | testq %rax,%rax |
1200 | je int_ret_from_sys_call | 1253 | je int_ret_from_sys_call |
@@ -1202,129 +1255,7 @@ ENTRY(kernel_execve) | |||
1202 | UNFAKE_STACK_FRAME | 1255 | UNFAKE_STACK_FRAME |
1203 | ret | 1256 | ret |
1204 | CFI_ENDPROC | 1257 | CFI_ENDPROC |
1205 | ENDPROC(kernel_execve) | 1258 | END(kernel_execve) |
1206 | |||
1207 | KPROBE_ENTRY(page_fault) | ||
1208 | errorentry do_page_fault | ||
1209 | KPROBE_END(page_fault) | ||
1210 | |||
1211 | ENTRY(coprocessor_error) | ||
1212 | zeroentry do_coprocessor_error | ||
1213 | END(coprocessor_error) | ||
1214 | |||
1215 | ENTRY(simd_coprocessor_error) | ||
1216 | zeroentry do_simd_coprocessor_error | ||
1217 | END(simd_coprocessor_error) | ||
1218 | |||
1219 | ENTRY(device_not_available) | ||
1220 | zeroentry do_device_not_available | ||
1221 | END(device_not_available) | ||
1222 | |||
1223 | /* runs on exception stack */ | ||
1224 | KPROBE_ENTRY(debug) | ||
1225 | INTR_FRAME | ||
1226 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1227 | pushq $0 | ||
1228 | CFI_ADJUST_CFA_OFFSET 8 | ||
1229 | paranoidentry do_debug, DEBUG_STACK | ||
1230 | paranoidexit | ||
1231 | KPROBE_END(debug) | ||
1232 | |||
1233 | /* runs on exception stack */ | ||
1234 | KPROBE_ENTRY(nmi) | ||
1235 | INTR_FRAME | ||
1236 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1237 | pushq $-1 | ||
1238 | CFI_ADJUST_CFA_OFFSET 8 | ||
1239 | paranoidentry do_nmi, 0, 0 | ||
1240 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
1241 | paranoidexit 0 | ||
1242 | #else | ||
1243 | jmp paranoid_exit1 | ||
1244 | CFI_ENDPROC | ||
1245 | #endif | ||
1246 | KPROBE_END(nmi) | ||
1247 | |||
1248 | KPROBE_ENTRY(int3) | ||
1249 | INTR_FRAME | ||
1250 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1251 | pushq $0 | ||
1252 | CFI_ADJUST_CFA_OFFSET 8 | ||
1253 | paranoidentry do_int3, DEBUG_STACK | ||
1254 | jmp paranoid_exit1 | ||
1255 | CFI_ENDPROC | ||
1256 | KPROBE_END(int3) | ||
1257 | |||
1258 | ENTRY(overflow) | ||
1259 | zeroentry do_overflow | ||
1260 | END(overflow) | ||
1261 | |||
1262 | ENTRY(bounds) | ||
1263 | zeroentry do_bounds | ||
1264 | END(bounds) | ||
1265 | |||
1266 | ENTRY(invalid_op) | ||
1267 | zeroentry do_invalid_op | ||
1268 | END(invalid_op) | ||
1269 | |||
1270 | ENTRY(coprocessor_segment_overrun) | ||
1271 | zeroentry do_coprocessor_segment_overrun | ||
1272 | END(coprocessor_segment_overrun) | ||
1273 | |||
1274 | /* runs on exception stack */ | ||
1275 | ENTRY(double_fault) | ||
1276 | XCPT_FRAME | ||
1277 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1278 | paranoidentry do_double_fault | ||
1279 | jmp paranoid_exit1 | ||
1280 | CFI_ENDPROC | ||
1281 | END(double_fault) | ||
1282 | |||
1283 | ENTRY(invalid_TSS) | ||
1284 | errorentry do_invalid_TSS | ||
1285 | END(invalid_TSS) | ||
1286 | |||
1287 | ENTRY(segment_not_present) | ||
1288 | errorentry do_segment_not_present | ||
1289 | END(segment_not_present) | ||
1290 | |||
1291 | /* runs on exception stack */ | ||
1292 | ENTRY(stack_segment) | ||
1293 | XCPT_FRAME | ||
1294 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1295 | paranoidentry do_stack_segment | ||
1296 | jmp paranoid_exit1 | ||
1297 | CFI_ENDPROC | ||
1298 | END(stack_segment) | ||
1299 | |||
1300 | KPROBE_ENTRY(general_protection) | ||
1301 | errorentry do_general_protection | ||
1302 | KPROBE_END(general_protection) | ||
1303 | |||
1304 | ENTRY(alignment_check) | ||
1305 | errorentry do_alignment_check | ||
1306 | END(alignment_check) | ||
1307 | |||
1308 | ENTRY(divide_error) | ||
1309 | zeroentry do_divide_error | ||
1310 | END(divide_error) | ||
1311 | |||
1312 | ENTRY(spurious_interrupt_bug) | ||
1313 | zeroentry do_spurious_interrupt_bug | ||
1314 | END(spurious_interrupt_bug) | ||
1315 | |||
1316 | #ifdef CONFIG_X86_MCE | ||
1317 | /* runs on exception stack */ | ||
1318 | ENTRY(machine_check) | ||
1319 | INTR_FRAME | ||
1320 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1321 | pushq $0 | ||
1322 | CFI_ADJUST_CFA_OFFSET 8 | ||
1323 | paranoidentry do_machine_check | ||
1324 | jmp paranoid_exit1 | ||
1325 | CFI_ENDPROC | ||
1326 | END(machine_check) | ||
1327 | #endif | ||
1328 | 1259 | ||
1329 | /* Call softirq on interrupt stack. Interrupts are off. */ | 1260 | /* Call softirq on interrupt stack. Interrupts are off. */ |
1330 | ENTRY(call_softirq) | 1261 | ENTRY(call_softirq) |
@@ -1334,81 +1265,77 @@ ENTRY(call_softirq) | |||
1334 | CFI_REL_OFFSET rbp,0 | 1265 | CFI_REL_OFFSET rbp,0 |
1335 | mov %rsp,%rbp | 1266 | mov %rsp,%rbp |
1336 | CFI_DEF_CFA_REGISTER rbp | 1267 | CFI_DEF_CFA_REGISTER rbp |
1337 | incl %gs:pda_irqcount | 1268 | incl PER_CPU_VAR(irq_count) |
1338 | cmove %gs:pda_irqstackptr,%rsp | 1269 | cmove PER_CPU_VAR(irq_stack_ptr),%rsp |
1339 | push %rbp # backlink for old unwinder | 1270 | push %rbp # backlink for old unwinder |
1340 | call __do_softirq | 1271 | call __do_softirq |
1341 | leaveq | 1272 | leaveq |
1342 | CFI_DEF_CFA_REGISTER rsp | 1273 | CFI_DEF_CFA_REGISTER rsp |
1343 | CFI_ADJUST_CFA_OFFSET -8 | 1274 | CFI_ADJUST_CFA_OFFSET -8 |
1344 | decl %gs:pda_irqcount | 1275 | decl PER_CPU_VAR(irq_count) |
1345 | ret | 1276 | ret |
1346 | CFI_ENDPROC | 1277 | CFI_ENDPROC |
1347 | ENDPROC(call_softirq) | 1278 | END(call_softirq) |
1348 | |||
1349 | KPROBE_ENTRY(ignore_sysret) | ||
1350 | CFI_STARTPROC | ||
1351 | mov $-ENOSYS,%eax | ||
1352 | sysret | ||
1353 | CFI_ENDPROC | ||
1354 | ENDPROC(ignore_sysret) | ||
1355 | 1279 | ||
1356 | #ifdef CONFIG_XEN | 1280 | #ifdef CONFIG_XEN |
1357 | ENTRY(xen_hypervisor_callback) | 1281 | zeroentry xen_hypervisor_callback xen_do_hypervisor_callback |
1358 | zeroentry xen_do_hypervisor_callback | ||
1359 | END(xen_hypervisor_callback) | ||
1360 | 1282 | ||
1361 | /* | 1283 | /* |
1362 | # A note on the "critical region" in our callback handler. | 1284 | * A note on the "critical region" in our callback handler. |
1363 | # We want to avoid stacking callback handlers due to events occurring | 1285 | * We want to avoid stacking callback handlers due to events occurring |
1364 | # during handling of the last event. To do this, we keep events disabled | 1286 | * during handling of the last event. To do this, we keep events disabled |
1365 | # until we've done all processing. HOWEVER, we must enable events before | 1287 | * until we've done all processing. HOWEVER, we must enable events before |
1366 | # popping the stack frame (can't be done atomically) and so it would still | 1288 | * popping the stack frame (can't be done atomically) and so it would still |
1367 | # be possible to get enough handler activations to overflow the stack. | 1289 | * be possible to get enough handler activations to overflow the stack. |
1368 | # Although unlikely, bugs of that kind are hard to track down, so we'd | 1290 | * Although unlikely, bugs of that kind are hard to track down, so we'd |
1369 | # like to avoid the possibility. | 1291 | * like to avoid the possibility. |
1370 | # So, on entry to the handler we detect whether we interrupted an | 1292 | * So, on entry to the handler we detect whether we interrupted an |
1371 | # existing activation in its critical region -- if so, we pop the current | 1293 | * existing activation in its critical region -- if so, we pop the current |
1372 | # activation and restart the handler using the previous one. | 1294 | * activation and restart the handler using the previous one. |
1373 | */ | 1295 | */ |
1374 | ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) | 1296 | ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) |
1375 | CFI_STARTPROC | 1297 | CFI_STARTPROC |
1376 | /* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will | 1298 | /* |
1377 | see the correct pointer to the pt_regs */ | 1299 | * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will |
1300 | * see the correct pointer to the pt_regs | ||
1301 | */ | ||
1378 | movq %rdi, %rsp # we don't return, adjust the stack frame | 1302 | movq %rdi, %rsp # we don't return, adjust the stack frame |
1379 | CFI_ENDPROC | 1303 | CFI_ENDPROC |
1380 | CFI_DEFAULT_STACK | 1304 | DEFAULT_FRAME |
1381 | 11: incl %gs:pda_irqcount | 1305 | 11: incl PER_CPU_VAR(irq_count) |
1382 | movq %rsp,%rbp | 1306 | movq %rsp,%rbp |
1383 | CFI_DEF_CFA_REGISTER rbp | 1307 | CFI_DEF_CFA_REGISTER rbp |
1384 | cmovzq %gs:pda_irqstackptr,%rsp | 1308 | cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp |
1385 | pushq %rbp # backlink for old unwinder | 1309 | pushq %rbp # backlink for old unwinder |
1386 | call xen_evtchn_do_upcall | 1310 | call xen_evtchn_do_upcall |
1387 | popq %rsp | 1311 | popq %rsp |
1388 | CFI_DEF_CFA_REGISTER rsp | 1312 | CFI_DEF_CFA_REGISTER rsp |
1389 | decl %gs:pda_irqcount | 1313 | decl PER_CPU_VAR(irq_count) |
1390 | jmp error_exit | 1314 | jmp error_exit |
1391 | CFI_ENDPROC | 1315 | CFI_ENDPROC |
1392 | END(do_hypervisor_callback) | 1316 | END(do_hypervisor_callback) |
1393 | 1317 | ||
1394 | /* | 1318 | /* |
1395 | # Hypervisor uses this for application faults while it executes. | 1319 | * Hypervisor uses this for application faults while it executes. |
1396 | # We get here for two reasons: | 1320 | * We get here for two reasons: |
1397 | # 1. Fault while reloading DS, ES, FS or GS | 1321 | * 1. Fault while reloading DS, ES, FS or GS |
1398 | # 2. Fault while executing IRET | 1322 | * 2. Fault while executing IRET |
1399 | # Category 1 we do not need to fix up as Xen has already reloaded all segment | 1323 | * Category 1 we do not need to fix up as Xen has already reloaded all segment |
1400 | # registers that could be reloaded and zeroed the others. | 1324 | * registers that could be reloaded and zeroed the others. |
1401 | # Category 2 we fix up by killing the current process. We cannot use the | 1325 | * Category 2 we fix up by killing the current process. We cannot use the |
1402 | # normal Linux return path in this case because if we use the IRET hypercall | 1326 | * normal Linux return path in this case because if we use the IRET hypercall |
1403 | # to pop the stack frame we end up in an infinite loop of failsafe callbacks. | 1327 | * to pop the stack frame we end up in an infinite loop of failsafe callbacks. |
1404 | # We distinguish between categories by comparing each saved segment register | 1328 | * We distinguish between categories by comparing each saved segment register |
1405 | # with its current contents: any discrepancy means we in category 1. | 1329 | * with its current contents: any discrepancy means we in category 1. |
1406 | */ | 1330 | */ |
1407 | ENTRY(xen_failsafe_callback) | 1331 | ENTRY(xen_failsafe_callback) |
1408 | framesz = (RIP-0x30) /* workaround buggy gas */ | 1332 | INTR_FRAME 1 (6*8) |
1409 | _frame framesz | 1333 | /*CFI_REL_OFFSET gs,GS*/ |
1410 | CFI_REL_OFFSET rcx, 0 | 1334 | /*CFI_REL_OFFSET fs,FS*/ |
1411 | CFI_REL_OFFSET r11, 8 | 1335 | /*CFI_REL_OFFSET es,ES*/ |
1336 | /*CFI_REL_OFFSET ds,DS*/ | ||
1337 | CFI_REL_OFFSET r11,8 | ||
1338 | CFI_REL_OFFSET rcx,0 | ||
1412 | movw %ds,%cx | 1339 | movw %ds,%cx |
1413 | cmpw %cx,0x10(%rsp) | 1340 | cmpw %cx,0x10(%rsp) |
1414 | CFI_REMEMBER_STATE | 1341 | CFI_REMEMBER_STATE |
@@ -1429,12 +1356,9 @@ ENTRY(xen_failsafe_callback) | |||
1429 | CFI_RESTORE r11 | 1356 | CFI_RESTORE r11 |
1430 | addq $0x30,%rsp | 1357 | addq $0x30,%rsp |
1431 | CFI_ADJUST_CFA_OFFSET -0x30 | 1358 | CFI_ADJUST_CFA_OFFSET -0x30 |
1432 | pushq $0 | 1359 | pushq_cfi $0 /* RIP */ |
1433 | CFI_ADJUST_CFA_OFFSET 8 | 1360 | pushq_cfi %r11 |
1434 | pushq %r11 | 1361 | pushq_cfi %rcx |
1435 | CFI_ADJUST_CFA_OFFSET 8 | ||
1436 | pushq %rcx | ||
1437 | CFI_ADJUST_CFA_OFFSET 8 | ||
1438 | jmp general_protection | 1362 | jmp general_protection |
1439 | CFI_RESTORE_STATE | 1363 | CFI_RESTORE_STATE |
1440 | 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ | 1364 | 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ |
@@ -1444,11 +1368,223 @@ ENTRY(xen_failsafe_callback) | |||
1444 | CFI_RESTORE r11 | 1368 | CFI_RESTORE r11 |
1445 | addq $0x30,%rsp | 1369 | addq $0x30,%rsp |
1446 | CFI_ADJUST_CFA_OFFSET -0x30 | 1370 | CFI_ADJUST_CFA_OFFSET -0x30 |
1447 | pushq $0 | 1371 | pushq_cfi $0 |
1448 | CFI_ADJUST_CFA_OFFSET 8 | ||
1449 | SAVE_ALL | 1372 | SAVE_ALL |
1450 | jmp error_exit | 1373 | jmp error_exit |
1451 | CFI_ENDPROC | 1374 | CFI_ENDPROC |
1452 | END(xen_failsafe_callback) | 1375 | END(xen_failsafe_callback) |
1453 | 1376 | ||
1454 | #endif /* CONFIG_XEN */ | 1377 | #endif /* CONFIG_XEN */ |
1378 | |||
1379 | /* | ||
1380 | * Some functions should be protected against kprobes | ||
1381 | */ | ||
1382 | .pushsection .kprobes.text, "ax" | ||
1383 | |||
1384 | paranoidzeroentry_ist debug do_debug DEBUG_STACK | ||
1385 | paranoidzeroentry_ist int3 do_int3 DEBUG_STACK | ||
1386 | paranoiderrorentry stack_segment do_stack_segment | ||
1387 | errorentry general_protection do_general_protection | ||
1388 | errorentry page_fault do_page_fault | ||
1389 | #ifdef CONFIG_X86_MCE | ||
1390 | paranoidzeroentry machine_check do_machine_check | ||
1391 | #endif | ||
1392 | |||
1393 | /* | ||
1394 | * "Paranoid" exit path from exception stack. | ||
1395 | * Paranoid because this is used by NMIs and cannot take | ||
1396 | * any kernel state for granted. | ||
1397 | * We don't do kernel preemption checks here, because only | ||
1398 | * NMI should be common and it does not enable IRQs and | ||
1399 | * cannot get reschedule ticks. | ||
1400 | * | ||
1401 | * "trace" is 0 for the NMI handler only, because irq-tracing | ||
1402 | * is fundamentally NMI-unsafe. (we cannot change the soft and | ||
1403 | * hard flags at once, atomically) | ||
1404 | */ | ||
1405 | |||
1406 | /* ebx: no swapgs flag */ | ||
1407 | ENTRY(paranoid_exit) | ||
1408 | INTR_FRAME | ||
1409 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1410 | TRACE_IRQS_OFF | ||
1411 | testl %ebx,%ebx /* swapgs needed? */ | ||
1412 | jnz paranoid_restore | ||
1413 | testl $3,CS(%rsp) | ||
1414 | jnz paranoid_userspace | ||
1415 | paranoid_swapgs: | ||
1416 | TRACE_IRQS_IRETQ 0 | ||
1417 | SWAPGS_UNSAFE_STACK | ||
1418 | paranoid_restore: | ||
1419 | RESTORE_ALL 8 | ||
1420 | jmp irq_return | ||
1421 | paranoid_userspace: | ||
1422 | GET_THREAD_INFO(%rcx) | ||
1423 | movl TI_flags(%rcx),%ebx | ||
1424 | andl $_TIF_WORK_MASK,%ebx | ||
1425 | jz paranoid_swapgs | ||
1426 | movq %rsp,%rdi /* &pt_regs */ | ||
1427 | call sync_regs | ||
1428 | movq %rax,%rsp /* switch stack for scheduling */ | ||
1429 | testl $_TIF_NEED_RESCHED,%ebx | ||
1430 | jnz paranoid_schedule | ||
1431 | movl %ebx,%edx /* arg3: thread flags */ | ||
1432 | TRACE_IRQS_ON | ||
1433 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
1434 | xorl %esi,%esi /* arg2: oldset */ | ||
1435 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
1436 | call do_notify_resume | ||
1437 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1438 | TRACE_IRQS_OFF | ||
1439 | jmp paranoid_userspace | ||
1440 | paranoid_schedule: | ||
1441 | TRACE_IRQS_ON | ||
1442 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
1443 | call schedule | ||
1444 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
1445 | TRACE_IRQS_OFF | ||
1446 | jmp paranoid_userspace | ||
1447 | CFI_ENDPROC | ||
1448 | END(paranoid_exit) | ||
1449 | |||
1450 | /* | ||
1451 | * Exception entry point. This expects an error code/orig_rax on the stack. | ||
1452 | * returns in "no swapgs flag" in %ebx. | ||
1453 | */ | ||
1454 | ENTRY(error_entry) | ||
1455 | XCPT_FRAME | ||
1456 | CFI_ADJUST_CFA_OFFSET 15*8 | ||
1457 | /* oldrax contains error code */ | ||
1458 | cld | ||
1459 | movq_cfi rdi, RDI+8 | ||
1460 | movq_cfi rsi, RSI+8 | ||
1461 | movq_cfi rdx, RDX+8 | ||
1462 | movq_cfi rcx, RCX+8 | ||
1463 | movq_cfi rax, RAX+8 | ||
1464 | movq_cfi r8, R8+8 | ||
1465 | movq_cfi r9, R9+8 | ||
1466 | movq_cfi r10, R10+8 | ||
1467 | movq_cfi r11, R11+8 | ||
1468 | movq_cfi rbx, RBX+8 | ||
1469 | movq_cfi rbp, RBP+8 | ||
1470 | movq_cfi r12, R12+8 | ||
1471 | movq_cfi r13, R13+8 | ||
1472 | movq_cfi r14, R14+8 | ||
1473 | movq_cfi r15, R15+8 | ||
1474 | xorl %ebx,%ebx | ||
1475 | testl $3,CS+8(%rsp) | ||
1476 | je error_kernelspace | ||
1477 | error_swapgs: | ||
1478 | SWAPGS | ||
1479 | error_sti: | ||
1480 | TRACE_IRQS_OFF | ||
1481 | ret | ||
1482 | CFI_ENDPROC | ||
1483 | |||
1484 | /* | ||
1485 | * There are two places in the kernel that can potentially fault with | ||
1486 | * usergs. Handle them here. The exception handlers after iret run with | ||
1487 | * kernel gs again, so don't set the user space flag. B stepping K8s | ||
1488 | * sometimes report an truncated RIP for IRET exceptions returning to | ||
1489 | * compat mode. Check for these here too. | ||
1490 | */ | ||
1491 | error_kernelspace: | ||
1492 | incl %ebx | ||
1493 | leaq irq_return(%rip),%rcx | ||
1494 | cmpq %rcx,RIP+8(%rsp) | ||
1495 | je error_swapgs | ||
1496 | movl %ecx,%ecx /* zero extend */ | ||
1497 | cmpq %rcx,RIP+8(%rsp) | ||
1498 | je error_swapgs | ||
1499 | cmpq $gs_change,RIP+8(%rsp) | ||
1500 | je error_swapgs | ||
1501 | jmp error_sti | ||
1502 | END(error_entry) | ||
1503 | |||
1504 | |||
1505 | /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ | ||
1506 | ENTRY(error_exit) | ||
1507 | DEFAULT_FRAME | ||
1508 | movl %ebx,%eax | ||
1509 | RESTORE_REST | ||
1510 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1511 | TRACE_IRQS_OFF | ||
1512 | GET_THREAD_INFO(%rcx) | ||
1513 | testl %eax,%eax | ||
1514 | jne retint_kernel | ||
1515 | LOCKDEP_SYS_EXIT_IRQ | ||
1516 | movl TI_flags(%rcx),%edx | ||
1517 | movl $_TIF_WORK_MASK,%edi | ||
1518 | andl %edi,%edx | ||
1519 | jnz retint_careful | ||
1520 | jmp retint_swapgs | ||
1521 | CFI_ENDPROC | ||
1522 | END(error_exit) | ||
1523 | |||
1524 | |||
1525 | /* runs on exception stack */ | ||
1526 | ENTRY(nmi) | ||
1527 | INTR_FRAME | ||
1528 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1529 | pushq_cfi $-1 | ||
1530 | subq $15*8, %rsp | ||
1531 | CFI_ADJUST_CFA_OFFSET 15*8 | ||
1532 | call save_paranoid | ||
1533 | DEFAULT_FRAME 0 | ||
1534 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ | ||
1535 | movq %rsp,%rdi | ||
1536 | movq $-1,%rsi | ||
1537 | call do_nmi | ||
1538 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
1539 | /* paranoidexit; without TRACE_IRQS_OFF */ | ||
1540 | /* ebx: no swapgs flag */ | ||
1541 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1542 | testl %ebx,%ebx /* swapgs needed? */ | ||
1543 | jnz nmi_restore | ||
1544 | testl $3,CS(%rsp) | ||
1545 | jnz nmi_userspace | ||
1546 | nmi_swapgs: | ||
1547 | SWAPGS_UNSAFE_STACK | ||
1548 | nmi_restore: | ||
1549 | RESTORE_ALL 8 | ||
1550 | jmp irq_return | ||
1551 | nmi_userspace: | ||
1552 | GET_THREAD_INFO(%rcx) | ||
1553 | movl TI_flags(%rcx),%ebx | ||
1554 | andl $_TIF_WORK_MASK,%ebx | ||
1555 | jz nmi_swapgs | ||
1556 | movq %rsp,%rdi /* &pt_regs */ | ||
1557 | call sync_regs | ||
1558 | movq %rax,%rsp /* switch stack for scheduling */ | ||
1559 | testl $_TIF_NEED_RESCHED,%ebx | ||
1560 | jnz nmi_schedule | ||
1561 | movl %ebx,%edx /* arg3: thread flags */ | ||
1562 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
1563 | xorl %esi,%esi /* arg2: oldset */ | ||
1564 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
1565 | call do_notify_resume | ||
1566 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1567 | jmp nmi_userspace | ||
1568 | nmi_schedule: | ||
1569 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
1570 | call schedule | ||
1571 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
1572 | jmp nmi_userspace | ||
1573 | CFI_ENDPROC | ||
1574 | #else | ||
1575 | jmp paranoid_exit | ||
1576 | CFI_ENDPROC | ||
1577 | #endif | ||
1578 | END(nmi) | ||
1579 | |||
1580 | ENTRY(ignore_sysret) | ||
1581 | CFI_STARTPROC | ||
1582 | mov $-ENOSYS,%eax | ||
1583 | sysret | ||
1584 | CFI_ENDPROC | ||
1585 | END(ignore_sysret) | ||
1586 | |||
1587 | /* | ||
1588 | * End of kprobes section | ||
1589 | */ | ||
1590 | .popsection | ||
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index f454c78fcef6..55515d73d9c2 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c | |||
@@ -38,8 +38,10 @@ | |||
38 | #include <asm/io.h> | 38 | #include <asm/io.h> |
39 | #include <asm/nmi.h> | 39 | #include <asm/nmi.h> |
40 | #include <asm/smp.h> | 40 | #include <asm/smp.h> |
41 | #include <asm/atomic.h> | ||
41 | #include <asm/apicdef.h> | 42 | #include <asm/apicdef.h> |
42 | #include <mach_mpparse.h> | 43 | #include <asm/genapic.h> |
44 | #include <asm/setup.h> | ||
43 | 45 | ||
44 | /* | 46 | /* |
45 | * ES7000 chipsets | 47 | * ES7000 chipsets |
@@ -161,6 +163,39 @@ es7000_rename_gsi(int ioapic, int gsi) | |||
161 | return gsi; | 163 | return gsi; |
162 | } | 164 | } |
163 | 165 | ||
166 | static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) | ||
167 | { | ||
168 | unsigned long vect = 0, psaival = 0; | ||
169 | |||
170 | if (psai == NULL) | ||
171 | return -1; | ||
172 | |||
173 | vect = ((unsigned long)__pa(eip)/0x1000) << 16; | ||
174 | psaival = (0x1000000 | vect | cpu); | ||
175 | |||
176 | while (*psai & 0x1000000) | ||
177 | ; | ||
178 | |||
179 | *psai = psaival; | ||
180 | |||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | static int __init es7000_update_genapic(void) | ||
185 | { | ||
186 | apic->wakeup_cpu = wakeup_secondary_cpu_via_mip; | ||
187 | |||
188 | /* MPENTIUMIII */ | ||
189 | if (boot_cpu_data.x86 == 6 && | ||
190 | (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) { | ||
191 | es7000_update_genapic_to_cluster(); | ||
192 | apic->wait_for_init_deassert = NULL; | ||
193 | apic->wakeup_cpu = wakeup_secondary_cpu_via_mip; | ||
194 | } | ||
195 | |||
196 | return 0; | ||
197 | } | ||
198 | |||
164 | void __init | 199 | void __init |
165 | setup_unisys(void) | 200 | setup_unisys(void) |
166 | { | 201 | { |
@@ -176,6 +211,8 @@ setup_unisys(void) | |||
176 | else | 211 | else |
177 | es7000_plat = ES7000_CLASSIC; | 212 | es7000_plat = ES7000_CLASSIC; |
178 | ioapic_renumber_irq = es7000_rename_gsi; | 213 | ioapic_renumber_irq = es7000_rename_gsi; |
214 | |||
215 | x86_quirks->update_genapic = es7000_update_genapic; | ||
179 | } | 216 | } |
180 | 217 | ||
181 | /* | 218 | /* |
@@ -324,40 +361,449 @@ es7000_mip_write(struct mip_reg *mip_reg) | |||
324 | return status; | 361 | return status; |
325 | } | 362 | } |
326 | 363 | ||
327 | int | 364 | void __init es7000_enable_apic_mode(void) |
328 | es7000_start_cpu(int cpu, unsigned long eip) | ||
329 | { | 365 | { |
330 | unsigned long vect = 0, psaival = 0; | 366 | struct mip_reg es7000_mip_reg; |
367 | int mip_status; | ||
331 | 368 | ||
332 | if (psai == NULL) | 369 | if (!es7000_plat) |
333 | return -1; | 370 | return; |
334 | 371 | ||
335 | vect = ((unsigned long)__pa(eip)/0x1000) << 16; | 372 | printk("ES7000: Enabling APIC mode.\n"); |
336 | psaival = (0x1000000 | vect | cpu); | 373 | memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); |
374 | es7000_mip_reg.off_0 = MIP_SW_APIC; | ||
375 | es7000_mip_reg.off_38 = MIP_VALID; | ||
337 | 376 | ||
338 | while (*psai & 0x1000000) | 377 | while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) { |
339 | ; | 378 | printk("es7000_enable_apic_mode: command failed, status = %x\n", |
379 | mip_status); | ||
380 | } | ||
381 | } | ||
382 | |||
383 | /* | ||
384 | * APIC driver for the Unisys ES7000 chipset. | ||
385 | */ | ||
386 | #define APIC_DEFINITION 1 | ||
387 | #include <linux/threads.h> | ||
388 | #include <linux/cpumask.h> | ||
389 | #include <asm/mpspec.h> | ||
390 | #include <asm/genapic.h> | ||
391 | #include <asm/fixmap.h> | ||
392 | #include <asm/apicdef.h> | ||
393 | #include <linux/kernel.h> | ||
394 | #include <linux/string.h> | ||
395 | #include <linux/init.h> | ||
396 | #include <linux/acpi.h> | ||
397 | #include <linux/smp.h> | ||
398 | #include <asm/ipi.h> | ||
399 | |||
400 | #define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) | ||
401 | #define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) | ||
402 | #define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */ | ||
403 | |||
404 | #define APIC_DFR_VALUE (APIC_DFR_FLAT) | ||
405 | |||
406 | extern void es7000_enable_apic_mode(void); | ||
407 | extern int apic_version [MAX_APICS]; | ||
408 | extern u8 cpu_2_logical_apicid[]; | ||
409 | extern unsigned int boot_cpu_physical_apicid; | ||
410 | |||
411 | extern int parse_unisys_oem (char *oemptr); | ||
412 | extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); | ||
413 | extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); | ||
414 | extern void setup_unisys(void); | ||
415 | |||
416 | #define apicid_cluster(apicid) (apicid & 0xF0) | ||
417 | #define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) | ||
418 | |||
419 | static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask) | ||
420 | { | ||
421 | /* Careful. Some cpus do not strictly honor the set of cpus | ||
422 | * specified in the interrupt destination when using lowest | ||
423 | * priority interrupt delivery mode. | ||
424 | * | ||
425 | * In particular there was a hyperthreading cpu observed to | ||
426 | * deliver interrupts to the wrong hyperthread when only one | ||
427 | * hyperthread was specified in the interrupt desitination. | ||
428 | */ | ||
429 | *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; | ||
430 | } | ||
340 | 431 | ||
341 | *psai = psaival; | ||
342 | 432 | ||
433 | static void es7000_wait_for_init_deassert(atomic_t *deassert) | ||
434 | { | ||
435 | #ifndef CONFIG_ES7000_CLUSTERED_APIC | ||
436 | while (!atomic_read(deassert)) | ||
437 | cpu_relax(); | ||
438 | #endif | ||
439 | return; | ||
440 | } | ||
441 | |||
442 | static unsigned int es7000_get_apic_id(unsigned long x) | ||
443 | { | ||
444 | return (x >> 24) & 0xFF; | ||
445 | } | ||
446 | |||
447 | #ifdef CONFIG_ACPI | ||
448 | static int es7000_check_dsdt(void) | ||
449 | { | ||
450 | struct acpi_table_header header; | ||
451 | |||
452 | if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && | ||
453 | !strncmp(header.oem_id, "UNISYS", 6)) | ||
454 | return 1; | ||
343 | return 0; | 455 | return 0; |
456 | } | ||
457 | #endif | ||
344 | 458 | ||
459 | static void es7000_send_IPI_mask(const struct cpumask *mask, int vector) | ||
460 | { | ||
461 | default_send_IPI_mask_sequence_phys(mask, vector); | ||
345 | } | 462 | } |
346 | 463 | ||
347 | void __init | 464 | static void es7000_send_IPI_allbutself(int vector) |
348 | es7000_sw_apic(void) | 465 | { |
349 | { | 466 | default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); |
350 | if (es7000_plat) { | 467 | } |
351 | int mip_status; | 468 | |
352 | struct mip_reg es7000_mip_reg; | 469 | static void es7000_send_IPI_all(int vector) |
353 | 470 | { | |
354 | printk("ES7000: Enabling APIC mode.\n"); | 471 | es7000_send_IPI_mask(cpu_online_mask, vector); |
355 | memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); | 472 | } |
356 | es7000_mip_reg.off_0 = MIP_SW_APIC; | 473 | |
357 | es7000_mip_reg.off_38 = (MIP_VALID); | 474 | static int es7000_apic_id_registered(void) |
358 | while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) | 475 | { |
359 | printk("es7000_sw_apic: command failed, status = %x\n", | 476 | return 1; |
360 | mip_status); | 477 | } |
361 | return; | 478 | |
479 | static const cpumask_t *target_cpus_cluster(void) | ||
480 | { | ||
481 | return &CPU_MASK_ALL; | ||
482 | } | ||
483 | |||
484 | static const cpumask_t *es7000_target_cpus(void) | ||
485 | { | ||
486 | return &cpumask_of_cpu(smp_processor_id()); | ||
487 | } | ||
488 | |||
489 | static unsigned long | ||
490 | es7000_check_apicid_used(physid_mask_t bitmap, int apicid) | ||
491 | { | ||
492 | return 0; | ||
493 | } | ||
494 | static unsigned long es7000_check_apicid_present(int bit) | ||
495 | { | ||
496 | return physid_isset(bit, phys_cpu_present_map); | ||
497 | } | ||
498 | |||
499 | static unsigned long calculate_ldr(int cpu) | ||
500 | { | ||
501 | unsigned long id = xapic_phys_to_log_apicid(cpu); | ||
502 | |||
503 | return (SET_APIC_LOGICAL_ID(id)); | ||
504 | } | ||
505 | |||
506 | /* | ||
507 | * Set up the logical destination ID. | ||
508 | * | ||
509 | * Intel recommends to set DFR, LdR and TPR before enabling | ||
510 | * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel | ||
511 | * document number 292116). So here it goes... | ||
512 | */ | ||
513 | static void es7000_init_apic_ldr_cluster(void) | ||
514 | { | ||
515 | unsigned long val; | ||
516 | int cpu = smp_processor_id(); | ||
517 | |||
518 | apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER); | ||
519 | val = calculate_ldr(cpu); | ||
520 | apic_write(APIC_LDR, val); | ||
521 | } | ||
522 | |||
523 | static void es7000_init_apic_ldr(void) | ||
524 | { | ||
525 | unsigned long val; | ||
526 | int cpu = smp_processor_id(); | ||
527 | |||
528 | apic_write(APIC_DFR, APIC_DFR_VALUE); | ||
529 | val = calculate_ldr(cpu); | ||
530 | apic_write(APIC_LDR, val); | ||
531 | } | ||
532 | |||
533 | static void es7000_setup_apic_routing(void) | ||
534 | { | ||
535 | int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); | ||
536 | printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", | ||
537 | (apic_version[apic] == 0x14) ? | ||
538 | "Physical Cluster" : "Logical Cluster", | ||
539 | nr_ioapics, cpus_addr(*es7000_target_cpus())[0]); | ||
540 | } | ||
541 | |||
542 | static int es7000_apicid_to_node(int logical_apicid) | ||
543 | { | ||
544 | return 0; | ||
545 | } | ||
546 | |||
547 | |||
548 | static int es7000_cpu_present_to_apicid(int mps_cpu) | ||
549 | { | ||
550 | if (!mps_cpu) | ||
551 | return boot_cpu_physical_apicid; | ||
552 | else if (mps_cpu < nr_cpu_ids) | ||
553 | return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); | ||
554 | else | ||
555 | return BAD_APICID; | ||
556 | } | ||
557 | |||
558 | static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) | ||
559 | { | ||
560 | static int id = 0; | ||
561 | physid_mask_t mask; | ||
562 | |||
563 | mask = physid_mask_of_physid(id); | ||
564 | ++id; | ||
565 | |||
566 | return mask; | ||
567 | } | ||
568 | |||
569 | /* Mapping from cpu number to logical apicid */ | ||
570 | static int es7000_cpu_to_logical_apicid(int cpu) | ||
571 | { | ||
572 | #ifdef CONFIG_SMP | ||
573 | if (cpu >= nr_cpu_ids) | ||
574 | return BAD_APICID; | ||
575 | return (int)cpu_2_logical_apicid[cpu]; | ||
576 | #else | ||
577 | return logical_smp_processor_id(); | ||
578 | #endif | ||
579 | } | ||
580 | |||
581 | static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) | ||
582 | { | ||
583 | /* For clustered we don't have a good way to do this yet - hack */ | ||
584 | return physids_promote(0xff); | ||
585 | } | ||
586 | |||
587 | static int es7000_check_phys_apicid_present(int cpu_physical_apicid) | ||
588 | { | ||
589 | boot_cpu_physical_apicid = read_apic_id(); | ||
590 | return (1); | ||
591 | } | ||
592 | |||
593 | static unsigned int | ||
594 | es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) | ||
595 | { | ||
596 | int cpus_found = 0; | ||
597 | int num_bits_set; | ||
598 | int apicid; | ||
599 | int cpu; | ||
600 | |||
601 | num_bits_set = cpumask_weight(cpumask); | ||
602 | /* Return id to all */ | ||
603 | if (num_bits_set == nr_cpu_ids) | ||
604 | return 0xFF; | ||
605 | /* | ||
606 | * The cpus in the mask must all be on the apic cluster. If are not | ||
607 | * on the same apicid cluster return default value of target_cpus(): | ||
608 | */ | ||
609 | cpu = cpumask_first(cpumask); | ||
610 | apicid = es7000_cpu_to_logical_apicid(cpu); | ||
611 | |||
612 | while (cpus_found < num_bits_set) { | ||
613 | if (cpumask_test_cpu(cpu, cpumask)) { | ||
614 | int new_apicid = es7000_cpu_to_logical_apicid(cpu); | ||
615 | |||
616 | if (apicid_cluster(apicid) != | ||
617 | apicid_cluster(new_apicid)) { | ||
618 | printk ("%s: Not a valid mask!\n", __func__); | ||
619 | |||
620 | return 0xFF; | ||
621 | } | ||
622 | apicid = new_apicid; | ||
623 | cpus_found++; | ||
624 | } | ||
625 | cpu++; | ||
626 | } | ||
627 | return apicid; | ||
628 | } | ||
629 | |||
630 | static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask) | ||
631 | { | ||
632 | int cpus_found = 0; | ||
633 | int num_bits_set; | ||
634 | int apicid; | ||
635 | int cpu; | ||
636 | |||
637 | num_bits_set = cpus_weight(*cpumask); | ||
638 | /* Return id to all */ | ||
639 | if (num_bits_set == nr_cpu_ids) | ||
640 | return es7000_cpu_to_logical_apicid(0); | ||
641 | /* | ||
642 | * The cpus in the mask must all be on the apic cluster. If are not | ||
643 | * on the same apicid cluster return default value of target_cpus(): | ||
644 | */ | ||
645 | cpu = first_cpu(*cpumask); | ||
646 | apicid = es7000_cpu_to_logical_apicid(cpu); | ||
647 | while (cpus_found < num_bits_set) { | ||
648 | if (cpu_isset(cpu, *cpumask)) { | ||
649 | int new_apicid = es7000_cpu_to_logical_apicid(cpu); | ||
650 | |||
651 | if (apicid_cluster(apicid) != | ||
652 | apicid_cluster(new_apicid)) { | ||
653 | printk ("%s: Not a valid mask!\n", __func__); | ||
654 | |||
655 | return es7000_cpu_to_logical_apicid(0); | ||
656 | } | ||
657 | apicid = new_apicid; | ||
658 | cpus_found++; | ||
659 | } | ||
660 | cpu++; | ||
362 | } | 661 | } |
662 | return apicid; | ||
363 | } | 663 | } |
664 | |||
665 | static unsigned int | ||
666 | es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, | ||
667 | const struct cpumask *andmask) | ||
668 | { | ||
669 | int apicid = es7000_cpu_to_logical_apicid(0); | ||
670 | cpumask_var_t cpumask; | ||
671 | |||
672 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) | ||
673 | return apicid; | ||
674 | |||
675 | cpumask_and(cpumask, inmask, andmask); | ||
676 | cpumask_and(cpumask, cpumask, cpu_online_mask); | ||
677 | apicid = es7000_cpu_mask_to_apicid(cpumask); | ||
678 | |||
679 | free_cpumask_var(cpumask); | ||
680 | |||
681 | return apicid; | ||
682 | } | ||
683 | |||
684 | static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) | ||
685 | { | ||
686 | return cpuid_apic >> index_msb; | ||
687 | } | ||
688 | |||
689 | void __init es7000_update_genapic_to_cluster(void) | ||
690 | { | ||
691 | apic->target_cpus = target_cpus_cluster; | ||
692 | apic->irq_delivery_mode = INT_DELIVERY_MODE_CLUSTER; | ||
693 | apic->irq_dest_mode = INT_DEST_MODE_CLUSTER; | ||
694 | |||
695 | apic->init_apic_ldr = es7000_init_apic_ldr_cluster; | ||
696 | |||
697 | apic->cpu_mask_to_apicid = es7000_cpu_mask_to_apicid_cluster; | ||
698 | } | ||
699 | |||
700 | static int probe_es7000(void) | ||
701 | { | ||
702 | /* probed later in mptable/ACPI hooks */ | ||
703 | return 0; | ||
704 | } | ||
705 | |||
706 | static __init int | ||
707 | es7000_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) | ||
708 | { | ||
709 | if (mpc->oemptr) { | ||
710 | struct mpc_oemtable *oem_table = | ||
711 | (struct mpc_oemtable *)mpc->oemptr; | ||
712 | |||
713 | if (!strncmp(oem, "UNISYS", 6)) | ||
714 | return parse_unisys_oem((char *)oem_table); | ||
715 | } | ||
716 | return 0; | ||
717 | } | ||
718 | |||
719 | #ifdef CONFIG_ACPI | ||
720 | /* Hook from generic ACPI tables.c */ | ||
721 | static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
722 | { | ||
723 | unsigned long oem_addr = 0; | ||
724 | int check_dsdt; | ||
725 | int ret = 0; | ||
726 | |||
727 | /* check dsdt at first to avoid clear fix_map for oem_addr */ | ||
728 | check_dsdt = es7000_check_dsdt(); | ||
729 | |||
730 | if (!find_unisys_acpi_oem_table(&oem_addr)) { | ||
731 | if (check_dsdt) | ||
732 | ret = parse_unisys_oem((char *)oem_addr); | ||
733 | else { | ||
734 | setup_unisys(); | ||
735 | ret = 1; | ||
736 | } | ||
737 | /* | ||
738 | * we need to unmap it | ||
739 | */ | ||
740 | unmap_unisys_acpi_oem_table(oem_addr); | ||
741 | } | ||
742 | return ret; | ||
743 | } | ||
744 | #else | ||
745 | static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
746 | { | ||
747 | return 0; | ||
748 | } | ||
749 | #endif | ||
750 | |||
751 | |||
752 | struct genapic apic_es7000 = { | ||
753 | |||
754 | .name = "es7000", | ||
755 | .probe = probe_es7000, | ||
756 | .acpi_madt_oem_check = es7000_acpi_madt_oem_check, | ||
757 | .apic_id_registered = es7000_apic_id_registered, | ||
758 | |||
759 | .irq_delivery_mode = dest_Fixed, | ||
760 | /* phys delivery to target CPUs: */ | ||
761 | .irq_dest_mode = 0, | ||
762 | |||
763 | .target_cpus = es7000_target_cpus, | ||
764 | .disable_esr = 1, | ||
765 | .dest_logical = 0, | ||
766 | .check_apicid_used = es7000_check_apicid_used, | ||
767 | .check_apicid_present = es7000_check_apicid_present, | ||
768 | |||
769 | .vector_allocation_domain = es7000_vector_allocation_domain, | ||
770 | .init_apic_ldr = es7000_init_apic_ldr, | ||
771 | |||
772 | .ioapic_phys_id_map = es7000_ioapic_phys_id_map, | ||
773 | .setup_apic_routing = es7000_setup_apic_routing, | ||
774 | .multi_timer_check = NULL, | ||
775 | .apicid_to_node = es7000_apicid_to_node, | ||
776 | .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, | ||
777 | .cpu_present_to_apicid = es7000_cpu_present_to_apicid, | ||
778 | .apicid_to_cpu_present = es7000_apicid_to_cpu_present, | ||
779 | .setup_portio_remap = NULL, | ||
780 | .check_phys_apicid_present = es7000_check_phys_apicid_present, | ||
781 | .enable_apic_mode = es7000_enable_apic_mode, | ||
782 | .phys_pkg_id = es7000_phys_pkg_id, | ||
783 | .mps_oem_check = es7000_mps_oem_check, | ||
784 | |||
785 | .get_apic_id = es7000_get_apic_id, | ||
786 | .set_apic_id = NULL, | ||
787 | .apic_id_mask = 0xFF << 24, | ||
788 | |||
789 | .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, | ||
790 | .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, | ||
791 | |||
792 | .send_IPI_mask = es7000_send_IPI_mask, | ||
793 | .send_IPI_mask_allbutself = NULL, | ||
794 | .send_IPI_allbutself = es7000_send_IPI_allbutself, | ||
795 | .send_IPI_all = es7000_send_IPI_all, | ||
796 | .send_IPI_self = default_send_IPI_self, | ||
797 | |||
798 | .wakeup_cpu = NULL, | ||
799 | |||
800 | .trampoline_phys_low = 0x467, | ||
801 | .trampoline_phys_high = 0x469, | ||
802 | |||
803 | .wait_for_init_deassert = es7000_wait_for_init_deassert, | ||
804 | |||
805 | /* Nothing to do for most platforms, since cleared by the INIT cycle: */ | ||
806 | .smp_callin_clear_local_apic = NULL, | ||
807 | .store_NMI_vector = NULL, | ||
808 | .inquire_remote_apic = default_inquire_remote_apic, | ||
809 | }; | ||
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 50ea0ac8c9bf..231bdd3c5b1c 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -14,14 +14,17 @@ | |||
14 | #include <linux/uaccess.h> | 14 | #include <linux/uaccess.h> |
15 | #include <linux/ftrace.h> | 15 | #include <linux/ftrace.h> |
16 | #include <linux/percpu.h> | 16 | #include <linux/percpu.h> |
17 | #include <linux/sched.h> | ||
17 | #include <linux/init.h> | 18 | #include <linux/init.h> |
18 | #include <linux/list.h> | 19 | #include <linux/list.h> |
19 | 20 | ||
20 | #include <asm/ftrace.h> | 21 | #include <asm/ftrace.h> |
22 | #include <linux/ftrace.h> | ||
21 | #include <asm/nops.h> | 23 | #include <asm/nops.h> |
24 | #include <asm/nmi.h> | ||
22 | 25 | ||
23 | 26 | ||
24 | static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; | 27 | #ifdef CONFIG_DYNAMIC_FTRACE |
25 | 28 | ||
26 | union ftrace_code_union { | 29 | union ftrace_code_union { |
27 | char code[MCOUNT_INSN_SIZE]; | 30 | char code[MCOUNT_INSN_SIZE]; |
@@ -31,18 +34,12 @@ union ftrace_code_union { | |||
31 | } __attribute__((packed)); | 34 | } __attribute__((packed)); |
32 | }; | 35 | }; |
33 | 36 | ||
34 | |||
35 | static int ftrace_calc_offset(long ip, long addr) | 37 | static int ftrace_calc_offset(long ip, long addr) |
36 | { | 38 | { |
37 | return (int)(addr - ip); | 39 | return (int)(addr - ip); |
38 | } | 40 | } |
39 | 41 | ||
40 | unsigned char *ftrace_nop_replace(void) | 42 | static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) |
41 | { | ||
42 | return ftrace_nop; | ||
43 | } | ||
44 | |||
45 | unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) | ||
46 | { | 43 | { |
47 | static union ftrace_code_union calc; | 44 | static union ftrace_code_union calc; |
48 | 45 | ||
@@ -56,7 +53,142 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) | |||
56 | return calc.code; | 53 | return calc.code; |
57 | } | 54 | } |
58 | 55 | ||
59 | int | 56 | /* |
57 | * Modifying code must take extra care. On an SMP machine, if | ||
58 | * the code being modified is also being executed on another CPU | ||
59 | * that CPU will have undefined results and possibly take a GPF. | ||
60 | * We use kstop_machine to stop other CPUS from exectuing code. | ||
61 | * But this does not stop NMIs from happening. We still need | ||
62 | * to protect against that. We separate out the modification of | ||
63 | * the code to take care of this. | ||
64 | * | ||
65 | * Two buffers are added: An IP buffer and a "code" buffer. | ||
66 | * | ||
67 | * 1) Put the instruction pointer into the IP buffer | ||
68 | * and the new code into the "code" buffer. | ||
69 | * 2) Set a flag that says we are modifying code | ||
70 | * 3) Wait for any running NMIs to finish. | ||
71 | * 4) Write the code | ||
72 | * 5) clear the flag. | ||
73 | * 6) Wait for any running NMIs to finish. | ||
74 | * | ||
75 | * If an NMI is executed, the first thing it does is to call | ||
76 | * "ftrace_nmi_enter". This will check if the flag is set to write | ||
77 | * and if it is, it will write what is in the IP and "code" buffers. | ||
78 | * | ||
79 | * The trick is, it does not matter if everyone is writing the same | ||
80 | * content to the code location. Also, if a CPU is executing code | ||
81 | * it is OK to write to that code location if the contents being written | ||
82 | * are the same as what exists. | ||
83 | */ | ||
84 | |||
85 | static atomic_t in_nmi = ATOMIC_INIT(0); | ||
86 | static int mod_code_status; /* holds return value of text write */ | ||
87 | static int mod_code_write; /* set when NMI should do the write */ | ||
88 | static void *mod_code_ip; /* holds the IP to write to */ | ||
89 | static void *mod_code_newcode; /* holds the text to write to the IP */ | ||
90 | |||
91 | static unsigned nmi_wait_count; | ||
92 | static atomic_t nmi_update_count = ATOMIC_INIT(0); | ||
93 | |||
94 | int ftrace_arch_read_dyn_info(char *buf, int size) | ||
95 | { | ||
96 | int r; | ||
97 | |||
98 | r = snprintf(buf, size, "%u %u", | ||
99 | nmi_wait_count, | ||
100 | atomic_read(&nmi_update_count)); | ||
101 | return r; | ||
102 | } | ||
103 | |||
104 | static void ftrace_mod_code(void) | ||
105 | { | ||
106 | /* | ||
107 | * Yes, more than one CPU process can be writing to mod_code_status. | ||
108 | * (and the code itself) | ||
109 | * But if one were to fail, then they all should, and if one were | ||
110 | * to succeed, then they all should. | ||
111 | */ | ||
112 | mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, | ||
113 | MCOUNT_INSN_SIZE); | ||
114 | } | ||
115 | |||
116 | void ftrace_nmi_enter(void) | ||
117 | { | ||
118 | atomic_inc(&in_nmi); | ||
119 | /* Must have in_nmi seen before reading write flag */ | ||
120 | smp_mb(); | ||
121 | if (mod_code_write) { | ||
122 | ftrace_mod_code(); | ||
123 | atomic_inc(&nmi_update_count); | ||
124 | } | ||
125 | } | ||
126 | |||
127 | void ftrace_nmi_exit(void) | ||
128 | { | ||
129 | /* Finish all executions before clearing in_nmi */ | ||
130 | smp_wmb(); | ||
131 | atomic_dec(&in_nmi); | ||
132 | } | ||
133 | |||
134 | static void wait_for_nmi(void) | ||
135 | { | ||
136 | int waited = 0; | ||
137 | |||
138 | while (atomic_read(&in_nmi)) { | ||
139 | waited = 1; | ||
140 | cpu_relax(); | ||
141 | } | ||
142 | |||
143 | if (waited) | ||
144 | nmi_wait_count++; | ||
145 | } | ||
146 | |||
147 | static int | ||
148 | do_ftrace_mod_code(unsigned long ip, void *new_code) | ||
149 | { | ||
150 | mod_code_ip = (void *)ip; | ||
151 | mod_code_newcode = new_code; | ||
152 | |||
153 | /* The buffers need to be visible before we let NMIs write them */ | ||
154 | smp_wmb(); | ||
155 | |||
156 | mod_code_write = 1; | ||
157 | |||
158 | /* Make sure write bit is visible before we wait on NMIs */ | ||
159 | smp_mb(); | ||
160 | |||
161 | wait_for_nmi(); | ||
162 | |||
163 | /* Make sure all running NMIs have finished before we write the code */ | ||
164 | smp_mb(); | ||
165 | |||
166 | ftrace_mod_code(); | ||
167 | |||
168 | /* Make sure the write happens before clearing the bit */ | ||
169 | smp_wmb(); | ||
170 | |||
171 | mod_code_write = 0; | ||
172 | |||
173 | /* make sure NMIs see the cleared bit */ | ||
174 | smp_mb(); | ||
175 | |||
176 | wait_for_nmi(); | ||
177 | |||
178 | return mod_code_status; | ||
179 | } | ||
180 | |||
181 | |||
182 | |||
183 | |||
184 | static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; | ||
185 | |||
186 | static unsigned char *ftrace_nop_replace(void) | ||
187 | { | ||
188 | return ftrace_nop; | ||
189 | } | ||
190 | |||
191 | static int | ||
60 | ftrace_modify_code(unsigned long ip, unsigned char *old_code, | 192 | ftrace_modify_code(unsigned long ip, unsigned char *old_code, |
61 | unsigned char *new_code) | 193 | unsigned char *new_code) |
62 | { | 194 | { |
@@ -81,7 +213,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, | |||
81 | return -EINVAL; | 213 | return -EINVAL; |
82 | 214 | ||
83 | /* replace the text with the new text */ | 215 | /* replace the text with the new text */ |
84 | if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) | 216 | if (do_ftrace_mod_code(ip, new_code)) |
85 | return -EPERM; | 217 | return -EPERM; |
86 | 218 | ||
87 | sync_core(); | 219 | sync_core(); |
@@ -89,6 +221,29 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, | |||
89 | return 0; | 221 | return 0; |
90 | } | 222 | } |
91 | 223 | ||
224 | int ftrace_make_nop(struct module *mod, | ||
225 | struct dyn_ftrace *rec, unsigned long addr) | ||
226 | { | ||
227 | unsigned char *new, *old; | ||
228 | unsigned long ip = rec->ip; | ||
229 | |||
230 | old = ftrace_call_replace(ip, addr); | ||
231 | new = ftrace_nop_replace(); | ||
232 | |||
233 | return ftrace_modify_code(rec->ip, old, new); | ||
234 | } | ||
235 | |||
236 | int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | ||
237 | { | ||
238 | unsigned char *new, *old; | ||
239 | unsigned long ip = rec->ip; | ||
240 | |||
241 | old = ftrace_nop_replace(); | ||
242 | new = ftrace_call_replace(ip, addr); | ||
243 | |||
244 | return ftrace_modify_code(rec->ip, old, new); | ||
245 | } | ||
246 | |||
92 | int ftrace_update_ftrace_func(ftrace_func_t func) | 247 | int ftrace_update_ftrace_func(ftrace_func_t func) |
93 | { | 248 | { |
94 | unsigned long ip = (unsigned long)(&ftrace_call); | 249 | unsigned long ip = (unsigned long)(&ftrace_call); |
@@ -165,3 +320,219 @@ int __init ftrace_dyn_arch_init(void *data) | |||
165 | 320 | ||
166 | return 0; | 321 | return 0; |
167 | } | 322 | } |
323 | #endif | ||
324 | |||
325 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
326 | |||
327 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
328 | extern void ftrace_graph_call(void); | ||
329 | |||
330 | static int ftrace_mod_jmp(unsigned long ip, | ||
331 | int old_offset, int new_offset) | ||
332 | { | ||
333 | unsigned char code[MCOUNT_INSN_SIZE]; | ||
334 | |||
335 | if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) | ||
336 | return -EFAULT; | ||
337 | |||
338 | if (code[0] != 0xe9 || old_offset != *(int *)(&code[1])) | ||
339 | return -EINVAL; | ||
340 | |||
341 | *(int *)(&code[1]) = new_offset; | ||
342 | |||
343 | if (do_ftrace_mod_code(ip, &code)) | ||
344 | return -EPERM; | ||
345 | |||
346 | return 0; | ||
347 | } | ||
348 | |||
349 | int ftrace_enable_ftrace_graph_caller(void) | ||
350 | { | ||
351 | unsigned long ip = (unsigned long)(&ftrace_graph_call); | ||
352 | int old_offset, new_offset; | ||
353 | |||
354 | old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); | ||
355 | new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); | ||
356 | |||
357 | return ftrace_mod_jmp(ip, old_offset, new_offset); | ||
358 | } | ||
359 | |||
360 | int ftrace_disable_ftrace_graph_caller(void) | ||
361 | { | ||
362 | unsigned long ip = (unsigned long)(&ftrace_graph_call); | ||
363 | int old_offset, new_offset; | ||
364 | |||
365 | old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); | ||
366 | new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); | ||
367 | |||
368 | return ftrace_mod_jmp(ip, old_offset, new_offset); | ||
369 | } | ||
370 | |||
371 | #else /* CONFIG_DYNAMIC_FTRACE */ | ||
372 | |||
373 | /* | ||
374 | * These functions are picked from those used on | ||
375 | * this page for dynamic ftrace. They have been | ||
376 | * simplified to ignore all traces in NMI context. | ||
377 | */ | ||
378 | static atomic_t in_nmi; | ||
379 | |||
380 | void ftrace_nmi_enter(void) | ||
381 | { | ||
382 | atomic_inc(&in_nmi); | ||
383 | } | ||
384 | |||
385 | void ftrace_nmi_exit(void) | ||
386 | { | ||
387 | atomic_dec(&in_nmi); | ||
388 | } | ||
389 | |||
390 | #endif /* !CONFIG_DYNAMIC_FTRACE */ | ||
391 | |||
392 | /* Add a function return address to the trace stack on thread info.*/ | ||
393 | static int push_return_trace(unsigned long ret, unsigned long long time, | ||
394 | unsigned long func, int *depth) | ||
395 | { | ||
396 | int index; | ||
397 | |||
398 | if (!current->ret_stack) | ||
399 | return -EBUSY; | ||
400 | |||
401 | /* The return trace stack is full */ | ||
402 | if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { | ||
403 | atomic_inc(¤t->trace_overrun); | ||
404 | return -EBUSY; | ||
405 | } | ||
406 | |||
407 | index = ++current->curr_ret_stack; | ||
408 | barrier(); | ||
409 | current->ret_stack[index].ret = ret; | ||
410 | current->ret_stack[index].func = func; | ||
411 | current->ret_stack[index].calltime = time; | ||
412 | *depth = index; | ||
413 | |||
414 | return 0; | ||
415 | } | ||
416 | |||
417 | /* Retrieve a function return address to the trace stack on thread info.*/ | ||
418 | static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) | ||
419 | { | ||
420 | int index; | ||
421 | |||
422 | index = current->curr_ret_stack; | ||
423 | |||
424 | if (unlikely(index < 0)) { | ||
425 | ftrace_graph_stop(); | ||
426 | WARN_ON(1); | ||
427 | /* Might as well panic, otherwise we have no where to go */ | ||
428 | *ret = (unsigned long)panic; | ||
429 | return; | ||
430 | } | ||
431 | |||
432 | *ret = current->ret_stack[index].ret; | ||
433 | trace->func = current->ret_stack[index].func; | ||
434 | trace->calltime = current->ret_stack[index].calltime; | ||
435 | trace->overrun = atomic_read(¤t->trace_overrun); | ||
436 | trace->depth = index; | ||
437 | barrier(); | ||
438 | current->curr_ret_stack--; | ||
439 | |||
440 | } | ||
441 | |||
442 | /* | ||
443 | * Send the trace to the ring-buffer. | ||
444 | * @return the original return address. | ||
445 | */ | ||
446 | unsigned long ftrace_return_to_handler(void) | ||
447 | { | ||
448 | struct ftrace_graph_ret trace; | ||
449 | unsigned long ret; | ||
450 | |||
451 | pop_return_trace(&trace, &ret); | ||
452 | trace.rettime = cpu_clock(raw_smp_processor_id()); | ||
453 | ftrace_graph_return(&trace); | ||
454 | |||
455 | if (unlikely(!ret)) { | ||
456 | ftrace_graph_stop(); | ||
457 | WARN_ON(1); | ||
458 | /* Might as well panic. What else to do? */ | ||
459 | ret = (unsigned long)panic; | ||
460 | } | ||
461 | |||
462 | return ret; | ||
463 | } | ||
464 | |||
465 | /* | ||
466 | * Hook the return address and push it in the stack of return addrs | ||
467 | * in current thread info. | ||
468 | */ | ||
469 | void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) | ||
470 | { | ||
471 | unsigned long old; | ||
472 | unsigned long long calltime; | ||
473 | int faulted; | ||
474 | struct ftrace_graph_ent trace; | ||
475 | unsigned long return_hooker = (unsigned long) | ||
476 | &return_to_handler; | ||
477 | |||
478 | /* Nmi's are currently unsupported */ | ||
479 | if (unlikely(atomic_read(&in_nmi))) | ||
480 | return; | ||
481 | |||
482 | if (unlikely(atomic_read(¤t->tracing_graph_pause))) | ||
483 | return; | ||
484 | |||
485 | /* | ||
486 | * Protect against fault, even if it shouldn't | ||
487 | * happen. This tool is too much intrusive to | ||
488 | * ignore such a protection. | ||
489 | */ | ||
490 | asm volatile( | ||
491 | "1: " _ASM_MOV " (%[parent]), %[old]\n" | ||
492 | "2: " _ASM_MOV " %[return_hooker], (%[parent])\n" | ||
493 | " movl $0, %[faulted]\n" | ||
494 | "3:\n" | ||
495 | |||
496 | ".section .fixup, \"ax\"\n" | ||
497 | "4: movl $1, %[faulted]\n" | ||
498 | " jmp 3b\n" | ||
499 | ".previous\n" | ||
500 | |||
501 | _ASM_EXTABLE(1b, 4b) | ||
502 | _ASM_EXTABLE(2b, 4b) | ||
503 | |||
504 | : [old] "=r" (old), [faulted] "=r" (faulted) | ||
505 | : [parent] "r" (parent), [return_hooker] "r" (return_hooker) | ||
506 | : "memory" | ||
507 | ); | ||
508 | |||
509 | if (unlikely(faulted)) { | ||
510 | ftrace_graph_stop(); | ||
511 | WARN_ON(1); | ||
512 | return; | ||
513 | } | ||
514 | |||
515 | if (unlikely(!__kernel_text_address(old))) { | ||
516 | ftrace_graph_stop(); | ||
517 | *parent = old; | ||
518 | WARN_ON(1); | ||
519 | return; | ||
520 | } | ||
521 | |||
522 | calltime = cpu_clock(raw_smp_processor_id()); | ||
523 | |||
524 | if (push_return_trace(old, calltime, | ||
525 | self_addr, &trace.depth) == -EBUSY) { | ||
526 | *parent = old; | ||
527 | return; | ||
528 | } | ||
529 | |||
530 | trace.func = self_addr; | ||
531 | |||
532 | /* Only trace if the calling function expects to */ | ||
533 | if (!ftrace_graph_entry(&trace)) { | ||
534 | current->curr_ret_stack--; | ||
535 | *parent = old; | ||
536 | } | ||
537 | } | ||
538 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | ||
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 6c9bfc9e1e95..820dea5d0ebe 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <asm/smp.h> | 21 | #include <asm/smp.h> |
22 | #include <asm/ipi.h> | 22 | #include <asm/ipi.h> |
23 | #include <asm/genapic.h> | 23 | #include <asm/genapic.h> |
24 | #include <asm/setup.h> | ||
24 | 25 | ||
25 | extern struct genapic apic_flat; | 26 | extern struct genapic apic_flat; |
26 | extern struct genapic apic_physflat; | 27 | extern struct genapic apic_physflat; |
@@ -28,10 +29,12 @@ extern struct genapic apic_x2xpic_uv_x; | |||
28 | extern struct genapic apic_x2apic_phys; | 29 | extern struct genapic apic_x2apic_phys; |
29 | extern struct genapic apic_x2apic_cluster; | 30 | extern struct genapic apic_x2apic_cluster; |
30 | 31 | ||
31 | struct genapic __read_mostly *genapic = &apic_flat; | 32 | struct genapic __read_mostly *apic = &apic_flat; |
32 | 33 | ||
33 | static struct genapic *apic_probe[] __initdata = { | 34 | static struct genapic *apic_probe[] __initdata = { |
35 | #ifdef CONFIG_X86_UV | ||
34 | &apic_x2apic_uv_x, | 36 | &apic_x2apic_uv_x, |
37 | #endif | ||
35 | &apic_x2apic_phys, | 38 | &apic_x2apic_phys, |
36 | &apic_x2apic_cluster, | 39 | &apic_x2apic_cluster, |
37 | &apic_physflat, | 40 | &apic_physflat, |
@@ -41,36 +44,39 @@ static struct genapic *apic_probe[] __initdata = { | |||
41 | /* | 44 | /* |
42 | * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. | 45 | * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. |
43 | */ | 46 | */ |
44 | void __init setup_apic_routing(void) | 47 | void __init default_setup_apic_routing(void) |
45 | { | 48 | { |
46 | if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) { | 49 | if (apic == &apic_x2apic_phys || apic == &apic_x2apic_cluster) { |
47 | if (!intr_remapping_enabled) | 50 | if (!intr_remapping_enabled) |
48 | genapic = &apic_flat; | 51 | apic = &apic_flat; |
49 | } | 52 | } |
50 | 53 | ||
51 | if (genapic == &apic_flat) { | 54 | if (apic == &apic_flat) { |
52 | if (max_physical_apicid >= 8) | 55 | if (max_physical_apicid >= 8) |
53 | genapic = &apic_physflat; | 56 | apic = &apic_physflat; |
54 | printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); | 57 | printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); |
55 | } | 58 | } |
59 | |||
60 | if (x86_quirks->update_genapic) | ||
61 | x86_quirks->update_genapic(); | ||
56 | } | 62 | } |
57 | 63 | ||
58 | /* Same for both flat and physical. */ | 64 | /* Same for both flat and physical. */ |
59 | 65 | ||
60 | void apic_send_IPI_self(int vector) | 66 | void apic_send_IPI_self(int vector) |
61 | { | 67 | { |
62 | __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); | 68 | __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); |
63 | } | 69 | } |
64 | 70 | ||
65 | int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 71 | int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
66 | { | 72 | { |
67 | int i; | 73 | int i; |
68 | 74 | ||
69 | for (i = 0; apic_probe[i]; ++i) { | 75 | for (i = 0; apic_probe[i]; ++i) { |
70 | if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { | 76 | if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { |
71 | genapic = apic_probe[i]; | 77 | apic = apic_probe[i]; |
72 | printk(KERN_INFO "Setting APIC routing to %s.\n", | 78 | printk(KERN_INFO "Setting APIC routing to %s.\n", |
73 | genapic->name); | 79 | apic->name); |
74 | return 1; | 80 | return 1; |
75 | } | 81 | } |
76 | } | 82 | } |
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index c0262791bda4..249d2d3c034c 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include <asm/smp.h> | 19 | #include <asm/smp.h> |
20 | #include <asm/ipi.h> | 20 | #include <asm/ipi.h> |
21 | #include <asm/genapic.h> | 21 | #include <asm/genapic.h> |
22 | #include <mach_apicdef.h> | ||
23 | 22 | ||
24 | #ifdef CONFIG_ACPI | 23 | #ifdef CONFIG_ACPI |
25 | #include <acpi/acpi_bus.h> | 24 | #include <acpi/acpi_bus.h> |
@@ -30,12 +29,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
30 | return 1; | 29 | return 1; |
31 | } | 30 | } |
32 | 31 | ||
33 | static cpumask_t flat_target_cpus(void) | 32 | static const struct cpumask *flat_target_cpus(void) |
34 | { | 33 | { |
35 | return cpu_online_map; | 34 | return cpu_online_mask; |
36 | } | 35 | } |
37 | 36 | ||
38 | static cpumask_t flat_vector_allocation_domain(int cpu) | 37 | static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) |
39 | { | 38 | { |
40 | /* Careful. Some cpus do not strictly honor the set of cpus | 39 | /* Careful. Some cpus do not strictly honor the set of cpus |
41 | * specified in the interrupt destination when using lowest | 40 | * specified in the interrupt destination when using lowest |
@@ -45,8 +44,8 @@ static cpumask_t flat_vector_allocation_domain(int cpu) | |||
45 | * deliver interrupts to the wrong hyperthread when only one | 44 | * deliver interrupts to the wrong hyperthread when only one |
46 | * hyperthread was specified in the interrupt desitination. | 45 | * hyperthread was specified in the interrupt desitination. |
47 | */ | 46 | */ |
48 | cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; | 47 | cpumask_clear(retmask); |
49 | return domain; | 48 | cpumask_bits(retmask)[0] = APIC_ALL_CPUS; |
50 | } | 49 | } |
51 | 50 | ||
52 | /* | 51 | /* |
@@ -69,48 +68,73 @@ static void flat_init_apic_ldr(void) | |||
69 | apic_write(APIC_LDR, val); | 68 | apic_write(APIC_LDR, val); |
70 | } | 69 | } |
71 | 70 | ||
72 | static void flat_send_IPI_mask(cpumask_t cpumask, int vector) | 71 | static inline void _flat_send_IPI_mask(unsigned long mask, int vector) |
73 | { | 72 | { |
74 | unsigned long mask = cpus_addr(cpumask)[0]; | ||
75 | unsigned long flags; | 73 | unsigned long flags; |
76 | 74 | ||
77 | local_irq_save(flags); | 75 | local_irq_save(flags); |
78 | __send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL); | 76 | __default_send_IPI_dest_field(mask, vector, apic->dest_logical); |
79 | local_irq_restore(flags); | 77 | local_irq_restore(flags); |
80 | } | 78 | } |
81 | 79 | ||
80 | static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) | ||
81 | { | ||
82 | unsigned long mask = cpumask_bits(cpumask)[0]; | ||
83 | |||
84 | _flat_send_IPI_mask(mask, vector); | ||
85 | } | ||
86 | |||
87 | static void | ||
88 | flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) | ||
89 | { | ||
90 | unsigned long mask = cpumask_bits(cpumask)[0]; | ||
91 | int cpu = smp_processor_id(); | ||
92 | |||
93 | if (cpu < BITS_PER_LONG) | ||
94 | clear_bit(cpu, &mask); | ||
95 | |||
96 | _flat_send_IPI_mask(mask, vector); | ||
97 | } | ||
98 | |||
82 | static void flat_send_IPI_allbutself(int vector) | 99 | static void flat_send_IPI_allbutself(int vector) |
83 | { | 100 | { |
101 | int cpu = smp_processor_id(); | ||
84 | #ifdef CONFIG_HOTPLUG_CPU | 102 | #ifdef CONFIG_HOTPLUG_CPU |
85 | int hotplug = 1; | 103 | int hotplug = 1; |
86 | #else | 104 | #else |
87 | int hotplug = 0; | 105 | int hotplug = 0; |
88 | #endif | 106 | #endif |
89 | if (hotplug || vector == NMI_VECTOR) { | 107 | if (hotplug || vector == NMI_VECTOR) { |
90 | cpumask_t allbutme = cpu_online_map; | 108 | if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) { |
109 | unsigned long mask = cpumask_bits(cpu_online_mask)[0]; | ||
91 | 110 | ||
92 | cpu_clear(smp_processor_id(), allbutme); | 111 | if (cpu < BITS_PER_LONG) |
112 | clear_bit(cpu, &mask); | ||
93 | 113 | ||
94 | if (!cpus_empty(allbutme)) | 114 | _flat_send_IPI_mask(mask, vector); |
95 | flat_send_IPI_mask(allbutme, vector); | 115 | } |
96 | } else if (num_online_cpus() > 1) { | 116 | } else if (num_online_cpus() > 1) { |
97 | __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); | 117 | __default_send_IPI_shortcut(APIC_DEST_ALLBUT, |
118 | vector, apic->dest_logical); | ||
98 | } | 119 | } |
99 | } | 120 | } |
100 | 121 | ||
101 | static void flat_send_IPI_all(int vector) | 122 | static void flat_send_IPI_all(int vector) |
102 | { | 123 | { |
103 | if (vector == NMI_VECTOR) | 124 | if (vector == NMI_VECTOR) { |
104 | flat_send_IPI_mask(cpu_online_map, vector); | 125 | flat_send_IPI_mask(cpu_online_mask, vector); |
105 | else | 126 | } else { |
106 | __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); | 127 | __default_send_IPI_shortcut(APIC_DEST_ALLINC, |
128 | vector, apic->dest_logical); | ||
129 | } | ||
107 | } | 130 | } |
108 | 131 | ||
109 | static unsigned int get_apic_id(unsigned long x) | 132 | static unsigned int flat_get_apic_id(unsigned long x) |
110 | { | 133 | { |
111 | unsigned int id; | 134 | unsigned int id; |
112 | 135 | ||
113 | id = (((x)>>24) & 0xFFu); | 136 | id = (((x)>>24) & 0xFFu); |
137 | |||
114 | return id; | 138 | return id; |
115 | } | 139 | } |
116 | 140 | ||
@@ -126,7 +150,7 @@ static unsigned int read_xapic_id(void) | |||
126 | { | 150 | { |
127 | unsigned int id; | 151 | unsigned int id; |
128 | 152 | ||
129 | id = get_apic_id(apic_read(APIC_ID)); | 153 | id = flat_get_apic_id(apic_read(APIC_ID)); |
130 | return id; | 154 | return id; |
131 | } | 155 | } |
132 | 156 | ||
@@ -135,34 +159,76 @@ static int flat_apic_id_registered(void) | |||
135 | return physid_isset(read_xapic_id(), phys_cpu_present_map); | 159 | return physid_isset(read_xapic_id(), phys_cpu_present_map); |
136 | } | 160 | } |
137 | 161 | ||
138 | static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) | 162 | static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask) |
163 | { | ||
164 | return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; | ||
165 | } | ||
166 | |||
167 | static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
168 | const struct cpumask *andmask) | ||
139 | { | 169 | { |
140 | return cpus_addr(cpumask)[0] & APIC_ALL_CPUS; | 170 | unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; |
171 | unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS; | ||
172 | |||
173 | return mask1 & mask2; | ||
141 | } | 174 | } |
142 | 175 | ||
143 | static unsigned int phys_pkg_id(int index_msb) | 176 | static int flat_phys_pkg_id(int initial_apic_id, int index_msb) |
144 | { | 177 | { |
145 | return hard_smp_processor_id() >> index_msb; | 178 | return hard_smp_processor_id() >> index_msb; |
146 | } | 179 | } |
147 | 180 | ||
148 | struct genapic apic_flat = { | 181 | struct genapic apic_flat = { |
149 | .name = "flat", | 182 | .name = "flat", |
150 | .acpi_madt_oem_check = flat_acpi_madt_oem_check, | 183 | .probe = NULL, |
151 | .int_delivery_mode = dest_LowestPrio, | 184 | .acpi_madt_oem_check = flat_acpi_madt_oem_check, |
152 | .int_dest_mode = (APIC_DEST_LOGICAL != 0), | 185 | .apic_id_registered = flat_apic_id_registered, |
153 | .target_cpus = flat_target_cpus, | 186 | |
154 | .vector_allocation_domain = flat_vector_allocation_domain, | 187 | .irq_delivery_mode = dest_LowestPrio, |
155 | .apic_id_registered = flat_apic_id_registered, | 188 | .irq_dest_mode = 1, /* logical */ |
156 | .init_apic_ldr = flat_init_apic_ldr, | 189 | |
157 | .send_IPI_all = flat_send_IPI_all, | 190 | .target_cpus = flat_target_cpus, |
158 | .send_IPI_allbutself = flat_send_IPI_allbutself, | 191 | .disable_esr = 0, |
159 | .send_IPI_mask = flat_send_IPI_mask, | 192 | .dest_logical = APIC_DEST_LOGICAL, |
160 | .send_IPI_self = apic_send_IPI_self, | 193 | .check_apicid_used = NULL, |
161 | .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, | 194 | .check_apicid_present = NULL, |
162 | .phys_pkg_id = phys_pkg_id, | 195 | |
163 | .get_apic_id = get_apic_id, | 196 | .vector_allocation_domain = flat_vector_allocation_domain, |
164 | .set_apic_id = set_apic_id, | 197 | .init_apic_ldr = flat_init_apic_ldr, |
165 | .apic_id_mask = (0xFFu<<24), | 198 | |
199 | .ioapic_phys_id_map = NULL, | ||
200 | .setup_apic_routing = NULL, | ||
201 | .multi_timer_check = NULL, | ||
202 | .apicid_to_node = NULL, | ||
203 | .cpu_to_logical_apicid = NULL, | ||
204 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | ||
205 | .apicid_to_cpu_present = NULL, | ||
206 | .setup_portio_remap = NULL, | ||
207 | .check_phys_apicid_present = default_check_phys_apicid_present, | ||
208 | .enable_apic_mode = NULL, | ||
209 | .phys_pkg_id = flat_phys_pkg_id, | ||
210 | .mps_oem_check = NULL, | ||
211 | |||
212 | .get_apic_id = flat_get_apic_id, | ||
213 | .set_apic_id = set_apic_id, | ||
214 | .apic_id_mask = 0xFFu << 24, | ||
215 | |||
216 | .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, | ||
217 | .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, | ||
218 | |||
219 | .send_IPI_mask = flat_send_IPI_mask, | ||
220 | .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, | ||
221 | .send_IPI_allbutself = flat_send_IPI_allbutself, | ||
222 | .send_IPI_all = flat_send_IPI_all, | ||
223 | .send_IPI_self = apic_send_IPI_self, | ||
224 | |||
225 | .wakeup_cpu = NULL, | ||
226 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
227 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
228 | .wait_for_init_deassert = NULL, | ||
229 | .smp_callin_clear_local_apic = NULL, | ||
230 | .store_NMI_vector = NULL, | ||
231 | .inquire_remote_apic = NULL, | ||
166 | }; | 232 | }; |
167 | 233 | ||
168 | /* | 234 | /* |
@@ -188,35 +254,39 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
188 | return 0; | 254 | return 0; |
189 | } | 255 | } |
190 | 256 | ||
191 | static cpumask_t physflat_target_cpus(void) | 257 | static const struct cpumask *physflat_target_cpus(void) |
192 | { | 258 | { |
193 | return cpu_online_map; | 259 | return cpu_online_mask; |
194 | } | 260 | } |
195 | 261 | ||
196 | static cpumask_t physflat_vector_allocation_domain(int cpu) | 262 | static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) |
197 | { | 263 | { |
198 | return cpumask_of_cpu(cpu); | 264 | cpumask_clear(retmask); |
265 | cpumask_set_cpu(cpu, retmask); | ||
199 | } | 266 | } |
200 | 267 | ||
201 | static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) | 268 | static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) |
202 | { | 269 | { |
203 | send_IPI_mask_sequence(cpumask, vector); | 270 | default_send_IPI_mask_sequence_phys(cpumask, vector); |
204 | } | 271 | } |
205 | 272 | ||
206 | static void physflat_send_IPI_allbutself(int vector) | 273 | static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, |
274 | int vector) | ||
207 | { | 275 | { |
208 | cpumask_t allbutme = cpu_online_map; | 276 | default_send_IPI_mask_allbutself_phys(cpumask, vector); |
277 | } | ||
209 | 278 | ||
210 | cpu_clear(smp_processor_id(), allbutme); | 279 | static void physflat_send_IPI_allbutself(int vector) |
211 | physflat_send_IPI_mask(allbutme, vector); | 280 | { |
281 | default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); | ||
212 | } | 282 | } |
213 | 283 | ||
214 | static void physflat_send_IPI_all(int vector) | 284 | static void physflat_send_IPI_all(int vector) |
215 | { | 285 | { |
216 | physflat_send_IPI_mask(cpu_online_map, vector); | 286 | physflat_send_IPI_mask(cpu_online_mask, vector); |
217 | } | 287 | } |
218 | 288 | ||
219 | static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) | 289 | static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask) |
220 | { | 290 | { |
221 | int cpu; | 291 | int cpu; |
222 | 292 | ||
@@ -224,29 +294,84 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) | |||
224 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | 294 | * We're using fixed IRQ delivery, can only return one phys APIC ID. |
225 | * May as well be the first. | 295 | * May as well be the first. |
226 | */ | 296 | */ |
227 | cpu = first_cpu(cpumask); | 297 | cpu = cpumask_first(cpumask); |
228 | if ((unsigned)cpu < nr_cpu_ids) | 298 | if ((unsigned)cpu < nr_cpu_ids) |
229 | return per_cpu(x86_cpu_to_apicid, cpu); | 299 | return per_cpu(x86_cpu_to_apicid, cpu); |
230 | else | 300 | else |
231 | return BAD_APICID; | 301 | return BAD_APICID; |
232 | } | 302 | } |
233 | 303 | ||
304 | static unsigned int | ||
305 | physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
306 | const struct cpumask *andmask) | ||
307 | { | ||
308 | int cpu; | ||
309 | |||
310 | /* | ||
311 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
312 | * May as well be the first. | ||
313 | */ | ||
314 | for_each_cpu_and(cpu, cpumask, andmask) { | ||
315 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
316 | break; | ||
317 | } | ||
318 | if (cpu < nr_cpu_ids) | ||
319 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
320 | |||
321 | return BAD_APICID; | ||
322 | } | ||
323 | |||
234 | struct genapic apic_physflat = { | 324 | struct genapic apic_physflat = { |
235 | .name = "physical flat", | 325 | |
236 | .acpi_madt_oem_check = physflat_acpi_madt_oem_check, | 326 | .name = "physical flat", |
237 | .int_delivery_mode = dest_Fixed, | 327 | .probe = NULL, |
238 | .int_dest_mode = (APIC_DEST_PHYSICAL != 0), | 328 | .acpi_madt_oem_check = physflat_acpi_madt_oem_check, |
239 | .target_cpus = physflat_target_cpus, | 329 | .apic_id_registered = flat_apic_id_registered, |
240 | .vector_allocation_domain = physflat_vector_allocation_domain, | 330 | |
241 | .apic_id_registered = flat_apic_id_registered, | 331 | .irq_delivery_mode = dest_Fixed, |
242 | .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/ | 332 | .irq_dest_mode = 0, /* physical */ |
243 | .send_IPI_all = physflat_send_IPI_all, | 333 | |
244 | .send_IPI_allbutself = physflat_send_IPI_allbutself, | 334 | .target_cpus = physflat_target_cpus, |
245 | .send_IPI_mask = physflat_send_IPI_mask, | 335 | .disable_esr = 0, |
246 | .send_IPI_self = apic_send_IPI_self, | 336 | .dest_logical = 0, |
247 | .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, | 337 | .check_apicid_used = NULL, |
248 | .phys_pkg_id = phys_pkg_id, | 338 | .check_apicid_present = NULL, |
249 | .get_apic_id = get_apic_id, | 339 | |
250 | .set_apic_id = set_apic_id, | 340 | .vector_allocation_domain = physflat_vector_allocation_domain, |
251 | .apic_id_mask = (0xFFu<<24), | 341 | /* not needed, but shouldn't hurt: */ |
342 | .init_apic_ldr = flat_init_apic_ldr, | ||
343 | |||
344 | .ioapic_phys_id_map = NULL, | ||
345 | .setup_apic_routing = NULL, | ||
346 | .multi_timer_check = NULL, | ||
347 | .apicid_to_node = NULL, | ||
348 | .cpu_to_logical_apicid = NULL, | ||
349 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | ||
350 | .apicid_to_cpu_present = NULL, | ||
351 | .setup_portio_remap = NULL, | ||
352 | .check_phys_apicid_present = default_check_phys_apicid_present, | ||
353 | .enable_apic_mode = NULL, | ||
354 | .phys_pkg_id = flat_phys_pkg_id, | ||
355 | .mps_oem_check = NULL, | ||
356 | |||
357 | .get_apic_id = flat_get_apic_id, | ||
358 | .set_apic_id = set_apic_id, | ||
359 | .apic_id_mask = 0xFFu << 24, | ||
360 | |||
361 | .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, | ||
362 | .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, | ||
363 | |||
364 | .send_IPI_mask = physflat_send_IPI_mask, | ||
365 | .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, | ||
366 | .send_IPI_allbutself = physflat_send_IPI_allbutself, | ||
367 | .send_IPI_all = physflat_send_IPI_all, | ||
368 | .send_IPI_self = apic_send_IPI_self, | ||
369 | |||
370 | .wakeup_cpu = NULL, | ||
371 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
372 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
373 | .wait_for_init_deassert = NULL, | ||
374 | .smp_callin_clear_local_apic = NULL, | ||
375 | .store_NMI_vector = NULL, | ||
376 | .inquire_remote_apic = NULL, | ||
252 | }; | 377 | }; |
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index f6a2c8eb48a6..7c87156b6411 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c | |||
@@ -22,23 +22,22 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
22 | 22 | ||
23 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | 23 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ |
24 | 24 | ||
25 | static cpumask_t x2apic_target_cpus(void) | 25 | static const struct cpumask *x2apic_target_cpus(void) |
26 | { | 26 | { |
27 | return cpumask_of_cpu(0); | 27 | return cpumask_of(0); |
28 | } | 28 | } |
29 | 29 | ||
30 | /* | 30 | /* |
31 | * for now each logical cpu is in its own vector allocation domain. | 31 | * for now each logical cpu is in its own vector allocation domain. |
32 | */ | 32 | */ |
33 | static cpumask_t x2apic_vector_allocation_domain(int cpu) | 33 | static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) |
34 | { | 34 | { |
35 | cpumask_t domain = CPU_MASK_NONE; | 35 | cpumask_clear(retmask); |
36 | cpu_set(cpu, domain); | 36 | cpumask_set_cpu(cpu, retmask); |
37 | return domain; | ||
38 | } | 37 | } |
39 | 38 | ||
40 | static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, | 39 | static void |
41 | unsigned int dest) | 40 | __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) |
42 | { | 41 | { |
43 | unsigned long cfg; | 42 | unsigned long cfg; |
44 | 43 | ||
@@ -56,32 +55,58 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, | |||
56 | * at once. We have 16 cpu's in a cluster. This will minimize IPI register | 55 | * at once. We have 16 cpu's in a cluster. This will minimize IPI register |
57 | * writes. | 56 | * writes. |
58 | */ | 57 | */ |
59 | static void x2apic_send_IPI_mask(cpumask_t mask, int vector) | 58 | static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) |
60 | { | 59 | { |
60 | unsigned long query_cpu; | ||
61 | unsigned long flags; | 61 | unsigned long flags; |
62 | |||
63 | local_irq_save(flags); | ||
64 | for_each_cpu(query_cpu, mask) { | ||
65 | __x2apic_send_IPI_dest( | ||
66 | per_cpu(x86_cpu_to_logical_apicid, query_cpu), | ||
67 | vector, apic->dest_logical); | ||
68 | } | ||
69 | local_irq_restore(flags); | ||
70 | } | ||
71 | |||
72 | static void | ||
73 | x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) | ||
74 | { | ||
75 | unsigned long this_cpu = smp_processor_id(); | ||
62 | unsigned long query_cpu; | 76 | unsigned long query_cpu; |
77 | unsigned long flags; | ||
63 | 78 | ||
64 | local_irq_save(flags); | 79 | local_irq_save(flags); |
65 | for_each_cpu_mask(query_cpu, mask) { | 80 | for_each_cpu(query_cpu, mask) { |
66 | __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), | 81 | if (query_cpu == this_cpu) |
67 | vector, APIC_DEST_LOGICAL); | 82 | continue; |
83 | __x2apic_send_IPI_dest( | ||
84 | per_cpu(x86_cpu_to_logical_apicid, query_cpu), | ||
85 | vector, apic->dest_logical); | ||
68 | } | 86 | } |
69 | local_irq_restore(flags); | 87 | local_irq_restore(flags); |
70 | } | 88 | } |
71 | 89 | ||
72 | static void x2apic_send_IPI_allbutself(int vector) | 90 | static void x2apic_send_IPI_allbutself(int vector) |
73 | { | 91 | { |
74 | cpumask_t mask = cpu_online_map; | 92 | unsigned long this_cpu = smp_processor_id(); |
75 | 93 | unsigned long query_cpu; | |
76 | cpu_clear(smp_processor_id(), mask); | 94 | unsigned long flags; |
77 | 95 | ||
78 | if (!cpus_empty(mask)) | 96 | local_irq_save(flags); |
79 | x2apic_send_IPI_mask(mask, vector); | 97 | for_each_online_cpu(query_cpu) { |
98 | if (query_cpu == this_cpu) | ||
99 | continue; | ||
100 | __x2apic_send_IPI_dest( | ||
101 | per_cpu(x86_cpu_to_logical_apicid, query_cpu), | ||
102 | vector, apic->dest_logical); | ||
103 | } | ||
104 | local_irq_restore(flags); | ||
80 | } | 105 | } |
81 | 106 | ||
82 | static void x2apic_send_IPI_all(int vector) | 107 | static void x2apic_send_IPI_all(int vector) |
83 | { | 108 | { |
84 | x2apic_send_IPI_mask(cpu_online_map, vector); | 109 | x2apic_send_IPI_mask(cpu_online_mask, vector); |
85 | } | 110 | } |
86 | 111 | ||
87 | static int x2apic_apic_id_registered(void) | 112 | static int x2apic_apic_id_registered(void) |
@@ -89,22 +114,42 @@ static int x2apic_apic_id_registered(void) | |||
89 | return 1; | 114 | return 1; |
90 | } | 115 | } |
91 | 116 | ||
92 | static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) | 117 | static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) |
93 | { | 118 | { |
94 | int cpu; | ||
95 | |||
96 | /* | 119 | /* |
97 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | 120 | * We're using fixed IRQ delivery, can only return one logical APIC ID. |
98 | * May as well be the first. | 121 | * May as well be the first. |
99 | */ | 122 | */ |
100 | cpu = first_cpu(cpumask); | 123 | int cpu = cpumask_first(cpumask); |
101 | if ((unsigned)cpu < NR_CPUS) | 124 | |
125 | if ((unsigned)cpu < nr_cpu_ids) | ||
102 | return per_cpu(x86_cpu_to_logical_apicid, cpu); | 126 | return per_cpu(x86_cpu_to_logical_apicid, cpu); |
103 | else | 127 | else |
104 | return BAD_APICID; | 128 | return BAD_APICID; |
105 | } | 129 | } |
106 | 130 | ||
107 | static unsigned int get_apic_id(unsigned long x) | 131 | static unsigned int |
132 | x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
133 | const struct cpumask *andmask) | ||
134 | { | ||
135 | int cpu; | ||
136 | |||
137 | /* | ||
138 | * We're using fixed IRQ delivery, can only return one logical APIC ID. | ||
139 | * May as well be the first. | ||
140 | */ | ||
141 | for_each_cpu_and(cpu, cpumask, andmask) { | ||
142 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
143 | break; | ||
144 | } | ||
145 | |||
146 | if (cpu < nr_cpu_ids) | ||
147 | return per_cpu(x86_cpu_to_logical_apicid, cpu); | ||
148 | |||
149 | return BAD_APICID; | ||
150 | } | ||
151 | |||
152 | static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x) | ||
108 | { | 153 | { |
109 | unsigned int id; | 154 | unsigned int id; |
110 | 155 | ||
@@ -120,7 +165,7 @@ static unsigned long set_apic_id(unsigned int id) | |||
120 | return x; | 165 | return x; |
121 | } | 166 | } |
122 | 167 | ||
123 | static unsigned int phys_pkg_id(int index_msb) | 168 | static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb) |
124 | { | 169 | { |
125 | return current_cpu_data.initial_apicid >> index_msb; | 170 | return current_cpu_data.initial_apicid >> index_msb; |
126 | } | 171 | } |
@@ -135,25 +180,58 @@ static void init_x2apic_ldr(void) | |||
135 | int cpu = smp_processor_id(); | 180 | int cpu = smp_processor_id(); |
136 | 181 | ||
137 | per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); | 182 | per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); |
138 | return; | ||
139 | } | 183 | } |
140 | 184 | ||
141 | struct genapic apic_x2apic_cluster = { | 185 | struct genapic apic_x2apic_cluster = { |
142 | .name = "cluster x2apic", | 186 | |
143 | .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, | 187 | .name = "cluster x2apic", |
144 | .int_delivery_mode = dest_LowestPrio, | 188 | .probe = NULL, |
145 | .int_dest_mode = (APIC_DEST_LOGICAL != 0), | 189 | .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, |
146 | .target_cpus = x2apic_target_cpus, | 190 | .apic_id_registered = x2apic_apic_id_registered, |
147 | .vector_allocation_domain = x2apic_vector_allocation_domain, | 191 | |
148 | .apic_id_registered = x2apic_apic_id_registered, | 192 | .irq_delivery_mode = dest_LowestPrio, |
149 | .init_apic_ldr = init_x2apic_ldr, | 193 | .irq_dest_mode = 1, /* logical */ |
150 | .send_IPI_all = x2apic_send_IPI_all, | 194 | |
151 | .send_IPI_allbutself = x2apic_send_IPI_allbutself, | 195 | .target_cpus = x2apic_target_cpus, |
152 | .send_IPI_mask = x2apic_send_IPI_mask, | 196 | .disable_esr = 0, |
153 | .send_IPI_self = x2apic_send_IPI_self, | 197 | .dest_logical = APIC_DEST_LOGICAL, |
154 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, | 198 | .check_apicid_used = NULL, |
155 | .phys_pkg_id = phys_pkg_id, | 199 | .check_apicid_present = NULL, |
156 | .get_apic_id = get_apic_id, | 200 | |
157 | .set_apic_id = set_apic_id, | 201 | .vector_allocation_domain = x2apic_vector_allocation_domain, |
158 | .apic_id_mask = (0xFFFFFFFFu), | 202 | .init_apic_ldr = init_x2apic_ldr, |
203 | |||
204 | .ioapic_phys_id_map = NULL, | ||
205 | .setup_apic_routing = NULL, | ||
206 | .multi_timer_check = NULL, | ||
207 | .apicid_to_node = NULL, | ||
208 | .cpu_to_logical_apicid = NULL, | ||
209 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | ||
210 | .apicid_to_cpu_present = NULL, | ||
211 | .setup_portio_remap = NULL, | ||
212 | .check_phys_apicid_present = default_check_phys_apicid_present, | ||
213 | .enable_apic_mode = NULL, | ||
214 | .phys_pkg_id = x2apic_cluster_phys_pkg_id, | ||
215 | .mps_oem_check = NULL, | ||
216 | |||
217 | .get_apic_id = x2apic_cluster_phys_get_apic_id, | ||
218 | .set_apic_id = set_apic_id, | ||
219 | .apic_id_mask = 0xFFFFFFFFu, | ||
220 | |||
221 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, | ||
222 | .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, | ||
223 | |||
224 | .send_IPI_mask = x2apic_send_IPI_mask, | ||
225 | .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, | ||
226 | .send_IPI_allbutself = x2apic_send_IPI_allbutself, | ||
227 | .send_IPI_all = x2apic_send_IPI_all, | ||
228 | .send_IPI_self = x2apic_send_IPI_self, | ||
229 | |||
230 | .wakeup_cpu = NULL, | ||
231 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
232 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
233 | .wait_for_init_deassert = NULL, | ||
234 | .smp_callin_clear_local_apic = NULL, | ||
235 | .store_NMI_vector = NULL, | ||
236 | .inquire_remote_apic = NULL, | ||
159 | }; | 237 | }; |
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index d042211768b7..5cbae8aa0408 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c | |||
@@ -29,16 +29,15 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
29 | 29 | ||
30 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | 30 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ |
31 | 31 | ||
32 | static cpumask_t x2apic_target_cpus(void) | 32 | static const struct cpumask *x2apic_target_cpus(void) |
33 | { | 33 | { |
34 | return cpumask_of_cpu(0); | 34 | return cpumask_of(0); |
35 | } | 35 | } |
36 | 36 | ||
37 | static cpumask_t x2apic_vector_allocation_domain(int cpu) | 37 | static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) |
38 | { | 38 | { |
39 | cpumask_t domain = CPU_MASK_NONE; | 39 | cpumask_clear(retmask); |
40 | cpu_set(cpu, domain); | 40 | cpumask_set_cpu(cpu, retmask); |
41 | return domain; | ||
42 | } | 41 | } |
43 | 42 | ||
44 | static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, | 43 | static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, |
@@ -54,32 +53,55 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, | |||
54 | x2apic_icr_write(cfg, apicid); | 53 | x2apic_icr_write(cfg, apicid); |
55 | } | 54 | } |
56 | 55 | ||
57 | static void x2apic_send_IPI_mask(cpumask_t mask, int vector) | 56 | static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) |
58 | { | 57 | { |
59 | unsigned long flags; | ||
60 | unsigned long query_cpu; | 58 | unsigned long query_cpu; |
59 | unsigned long flags; | ||
61 | 60 | ||
62 | local_irq_save(flags); | 61 | local_irq_save(flags); |
63 | for_each_cpu_mask(query_cpu, mask) { | 62 | for_each_cpu(query_cpu, mask) { |
64 | __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), | 63 | __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), |
65 | vector, APIC_DEST_PHYSICAL); | 64 | vector, APIC_DEST_PHYSICAL); |
66 | } | 65 | } |
67 | local_irq_restore(flags); | 66 | local_irq_restore(flags); |
68 | } | 67 | } |
69 | 68 | ||
70 | static void x2apic_send_IPI_allbutself(int vector) | 69 | static void |
70 | x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) | ||
71 | { | 71 | { |
72 | cpumask_t mask = cpu_online_map; | 72 | unsigned long this_cpu = smp_processor_id(); |
73 | unsigned long query_cpu; | ||
74 | unsigned long flags; | ||
75 | |||
76 | local_irq_save(flags); | ||
77 | for_each_cpu(query_cpu, mask) { | ||
78 | if (query_cpu != this_cpu) | ||
79 | __x2apic_send_IPI_dest( | ||
80 | per_cpu(x86_cpu_to_apicid, query_cpu), | ||
81 | vector, APIC_DEST_PHYSICAL); | ||
82 | } | ||
83 | local_irq_restore(flags); | ||
84 | } | ||
73 | 85 | ||
74 | cpu_clear(smp_processor_id(), mask); | 86 | static void x2apic_send_IPI_allbutself(int vector) |
87 | { | ||
88 | unsigned long this_cpu = smp_processor_id(); | ||
89 | unsigned long query_cpu; | ||
90 | unsigned long flags; | ||
75 | 91 | ||
76 | if (!cpus_empty(mask)) | 92 | local_irq_save(flags); |
77 | x2apic_send_IPI_mask(mask, vector); | 93 | for_each_online_cpu(query_cpu) { |
94 | if (query_cpu == this_cpu) | ||
95 | continue; | ||
96 | __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), | ||
97 | vector, APIC_DEST_PHYSICAL); | ||
98 | } | ||
99 | local_irq_restore(flags); | ||
78 | } | 100 | } |
79 | 101 | ||
80 | static void x2apic_send_IPI_all(int vector) | 102 | static void x2apic_send_IPI_all(int vector) |
81 | { | 103 | { |
82 | x2apic_send_IPI_mask(cpu_online_map, vector); | 104 | x2apic_send_IPI_mask(cpu_online_mask, vector); |
83 | } | 105 | } |
84 | 106 | ||
85 | static int x2apic_apic_id_registered(void) | 107 | static int x2apic_apic_id_registered(void) |
@@ -87,68 +109,115 @@ static int x2apic_apic_id_registered(void) | |||
87 | return 1; | 109 | return 1; |
88 | } | 110 | } |
89 | 111 | ||
90 | static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) | 112 | static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) |
91 | { | 113 | { |
92 | int cpu; | ||
93 | |||
94 | /* | 114 | /* |
95 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | 115 | * We're using fixed IRQ delivery, can only return one phys APIC ID. |
96 | * May as well be the first. | 116 | * May as well be the first. |
97 | */ | 117 | */ |
98 | cpu = first_cpu(cpumask); | 118 | int cpu = cpumask_first(cpumask); |
99 | if ((unsigned)cpu < NR_CPUS) | 119 | |
120 | if ((unsigned)cpu < nr_cpu_ids) | ||
100 | return per_cpu(x86_cpu_to_apicid, cpu); | 121 | return per_cpu(x86_cpu_to_apicid, cpu); |
101 | else | 122 | else |
102 | return BAD_APICID; | 123 | return BAD_APICID; |
103 | } | 124 | } |
104 | 125 | ||
105 | static unsigned int get_apic_id(unsigned long x) | 126 | static unsigned int |
127 | x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
128 | const struct cpumask *andmask) | ||
106 | { | 129 | { |
107 | unsigned int id; | 130 | int cpu; |
108 | 131 | ||
109 | id = x; | 132 | /* |
110 | return id; | 133 | * We're using fixed IRQ delivery, can only return one phys APIC ID. |
134 | * May as well be the first. | ||
135 | */ | ||
136 | for_each_cpu_and(cpu, cpumask, andmask) { | ||
137 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
138 | break; | ||
139 | } | ||
140 | |||
141 | if (cpu < nr_cpu_ids) | ||
142 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
143 | |||
144 | return BAD_APICID; | ||
111 | } | 145 | } |
112 | 146 | ||
113 | static unsigned long set_apic_id(unsigned int id) | 147 | static unsigned int x2apic_phys_get_apic_id(unsigned long x) |
114 | { | 148 | { |
115 | unsigned long x; | ||
116 | |||
117 | x = id; | ||
118 | return x; | 149 | return x; |
119 | } | 150 | } |
120 | 151 | ||
121 | static unsigned int phys_pkg_id(int index_msb) | 152 | static unsigned long set_apic_id(unsigned int id) |
153 | { | ||
154 | return id; | ||
155 | } | ||
156 | |||
157 | static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) | ||
122 | { | 158 | { |
123 | return current_cpu_data.initial_apicid >> index_msb; | 159 | return current_cpu_data.initial_apicid >> index_msb; |
124 | } | 160 | } |
125 | 161 | ||
126 | void x2apic_send_IPI_self(int vector) | 162 | static void x2apic_send_IPI_self(int vector) |
127 | { | 163 | { |
128 | apic_write(APIC_SELF_IPI, vector); | 164 | apic_write(APIC_SELF_IPI, vector); |
129 | } | 165 | } |
130 | 166 | ||
131 | void init_x2apic_ldr(void) | 167 | static void init_x2apic_ldr(void) |
132 | { | 168 | { |
133 | return; | ||
134 | } | 169 | } |
135 | 170 | ||
136 | struct genapic apic_x2apic_phys = { | 171 | struct genapic apic_x2apic_phys = { |
137 | .name = "physical x2apic", | 172 | |
138 | .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, | 173 | .name = "physical x2apic", |
139 | .int_delivery_mode = dest_Fixed, | 174 | .probe = NULL, |
140 | .int_dest_mode = (APIC_DEST_PHYSICAL != 0), | 175 | .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, |
141 | .target_cpus = x2apic_target_cpus, | 176 | .apic_id_registered = x2apic_apic_id_registered, |
142 | .vector_allocation_domain = x2apic_vector_allocation_domain, | 177 | |
143 | .apic_id_registered = x2apic_apic_id_registered, | 178 | .irq_delivery_mode = dest_Fixed, |
144 | .init_apic_ldr = init_x2apic_ldr, | 179 | .irq_dest_mode = 0, /* physical */ |
145 | .send_IPI_all = x2apic_send_IPI_all, | 180 | |
146 | .send_IPI_allbutself = x2apic_send_IPI_allbutself, | 181 | .target_cpus = x2apic_target_cpus, |
147 | .send_IPI_mask = x2apic_send_IPI_mask, | 182 | .disable_esr = 0, |
148 | .send_IPI_self = x2apic_send_IPI_self, | 183 | .dest_logical = 0, |
149 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, | 184 | .check_apicid_used = NULL, |
150 | .phys_pkg_id = phys_pkg_id, | 185 | .check_apicid_present = NULL, |
151 | .get_apic_id = get_apic_id, | 186 | |
152 | .set_apic_id = set_apic_id, | 187 | .vector_allocation_domain = x2apic_vector_allocation_domain, |
153 | .apic_id_mask = (0xFFFFFFFFu), | 188 | .init_apic_ldr = init_x2apic_ldr, |
189 | |||
190 | .ioapic_phys_id_map = NULL, | ||
191 | .setup_apic_routing = NULL, | ||
192 | .multi_timer_check = NULL, | ||
193 | .apicid_to_node = NULL, | ||
194 | .cpu_to_logical_apicid = NULL, | ||
195 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | ||
196 | .apicid_to_cpu_present = NULL, | ||
197 | .setup_portio_remap = NULL, | ||
198 | .check_phys_apicid_present = default_check_phys_apicid_present, | ||
199 | .enable_apic_mode = NULL, | ||
200 | .phys_pkg_id = x2apic_phys_pkg_id, | ||
201 | .mps_oem_check = NULL, | ||
202 | |||
203 | .get_apic_id = x2apic_phys_get_apic_id, | ||
204 | .set_apic_id = set_apic_id, | ||
205 | .apic_id_mask = 0xFFFFFFFFu, | ||
206 | |||
207 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, | ||
208 | .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, | ||
209 | |||
210 | .send_IPI_mask = x2apic_send_IPI_mask, | ||
211 | .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, | ||
212 | .send_IPI_allbutself = x2apic_send_IPI_allbutself, | ||
213 | .send_IPI_all = x2apic_send_IPI_all, | ||
214 | .send_IPI_self = x2apic_send_IPI_self, | ||
215 | |||
216 | .wakeup_cpu = NULL, | ||
217 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
218 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
219 | .wait_for_init_deassert = NULL, | ||
220 | .smp_callin_clear_local_apic = NULL, | ||
221 | .store_NMI_vector = NULL, | ||
222 | .inquire_remote_apic = NULL, | ||
154 | }; | 223 | }; |
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 2c7dbdb98278..6adb5e6f4d92 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c | |||
@@ -10,6 +10,7 @@ | |||
10 | 10 | ||
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/threads.h> | 12 | #include <linux/threads.h> |
13 | #include <linux/cpu.h> | ||
13 | #include <linux/cpumask.h> | 14 | #include <linux/cpumask.h> |
14 | #include <linux/string.h> | 15 | #include <linux/string.h> |
15 | #include <linux/ctype.h> | 16 | #include <linux/ctype.h> |
@@ -17,10 +18,14 @@ | |||
17 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
18 | #include <linux/module.h> | 19 | #include <linux/module.h> |
19 | #include <linux/hardirq.h> | 20 | #include <linux/hardirq.h> |
21 | #include <linux/timer.h> | ||
22 | #include <linux/proc_fs.h> | ||
23 | #include <asm/current.h> | ||
20 | #include <asm/smp.h> | 24 | #include <asm/smp.h> |
21 | #include <asm/ipi.h> | 25 | #include <asm/ipi.h> |
22 | #include <asm/genapic.h> | 26 | #include <asm/genapic.h> |
23 | #include <asm/pgtable.h> | 27 | #include <asm/pgtable.h> |
28 | #include <asm/uv/uv.h> | ||
24 | #include <asm/uv/uv_mmrs.h> | 29 | #include <asm/uv/uv_mmrs.h> |
25 | #include <asm/uv/uv_hub.h> | 30 | #include <asm/uv/uv_hub.h> |
26 | #include <asm/uv/bios.h> | 31 | #include <asm/uv/bios.h> |
@@ -75,16 +80,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); | |||
75 | 80 | ||
76 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | 81 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ |
77 | 82 | ||
78 | static cpumask_t uv_target_cpus(void) | 83 | static const struct cpumask *uv_target_cpus(void) |
79 | { | 84 | { |
80 | return cpumask_of_cpu(0); | 85 | return cpumask_of(0); |
81 | } | 86 | } |
82 | 87 | ||
83 | static cpumask_t uv_vector_allocation_domain(int cpu) | 88 | static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) |
84 | { | 89 | { |
85 | cpumask_t domain = CPU_MASK_NONE; | 90 | cpumask_clear(retmask); |
86 | cpu_set(cpu, domain); | 91 | cpumask_set_cpu(cpu, retmask); |
87 | return domain; | ||
88 | } | 92 | } |
89 | 93 | ||
90 | int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) | 94 | int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) |
@@ -114,37 +118,49 @@ static void uv_send_IPI_one(int cpu, int vector) | |||
114 | int pnode; | 118 | int pnode; |
115 | 119 | ||
116 | apicid = per_cpu(x86_cpu_to_apicid, cpu); | 120 | apicid = per_cpu(x86_cpu_to_apicid, cpu); |
117 | lapicid = apicid & 0x3f; /* ZZZ macro needed */ | 121 | lapicid = apicid & 0x3f; /* ZZZ macro needed */ |
118 | pnode = uv_apicid_to_pnode(apicid); | 122 | pnode = uv_apicid_to_pnode(apicid); |
119 | val = | 123 | |
120 | (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid << | 124 | val = ( 1UL << UVH_IPI_INT_SEND_SHFT ) | |
121 | UVH_IPI_INT_APIC_ID_SHFT) | | 125 | ( lapicid << UVH_IPI_INT_APIC_ID_SHFT ) | |
122 | (vector << UVH_IPI_INT_VECTOR_SHFT); | 126 | ( vector << UVH_IPI_INT_VECTOR_SHFT ); |
127 | |||
123 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); | 128 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); |
124 | } | 129 | } |
125 | 130 | ||
126 | static void uv_send_IPI_mask(cpumask_t mask, int vector) | 131 | static void uv_send_IPI_mask(const struct cpumask *mask, int vector) |
127 | { | 132 | { |
128 | unsigned int cpu; | 133 | unsigned int cpu; |
129 | 134 | ||
130 | for_each_possible_cpu(cpu) | 135 | for_each_cpu(cpu, mask) |
131 | if (cpu_isset(cpu, mask)) | 136 | uv_send_IPI_one(cpu, vector); |
137 | } | ||
138 | |||
139 | static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) | ||
140 | { | ||
141 | unsigned int this_cpu = smp_processor_id(); | ||
142 | unsigned int cpu; | ||
143 | |||
144 | for_each_cpu(cpu, mask) { | ||
145 | if (cpu != this_cpu) | ||
132 | uv_send_IPI_one(cpu, vector); | 146 | uv_send_IPI_one(cpu, vector); |
147 | } | ||
133 | } | 148 | } |
134 | 149 | ||
135 | static void uv_send_IPI_allbutself(int vector) | 150 | static void uv_send_IPI_allbutself(int vector) |
136 | { | 151 | { |
137 | cpumask_t mask = cpu_online_map; | 152 | unsigned int this_cpu = smp_processor_id(); |
138 | 153 | unsigned int cpu; | |
139 | cpu_clear(smp_processor_id(), mask); | ||
140 | 154 | ||
141 | if (!cpus_empty(mask)) | 155 | for_each_online_cpu(cpu) { |
142 | uv_send_IPI_mask(mask, vector); | 156 | if (cpu != this_cpu) |
157 | uv_send_IPI_one(cpu, vector); | ||
158 | } | ||
143 | } | 159 | } |
144 | 160 | ||
145 | static void uv_send_IPI_all(int vector) | 161 | static void uv_send_IPI_all(int vector) |
146 | { | 162 | { |
147 | uv_send_IPI_mask(cpu_online_map, vector); | 163 | uv_send_IPI_mask(cpu_online_mask, vector); |
148 | } | 164 | } |
149 | 165 | ||
150 | static int uv_apic_id_registered(void) | 166 | static int uv_apic_id_registered(void) |
@@ -156,22 +172,41 @@ static void uv_init_apic_ldr(void) | |||
156 | { | 172 | { |
157 | } | 173 | } |
158 | 174 | ||
159 | static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) | 175 | static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) |
160 | { | 176 | { |
161 | int cpu; | ||
162 | |||
163 | /* | 177 | /* |
164 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | 178 | * We're using fixed IRQ delivery, can only return one phys APIC ID. |
165 | * May as well be the first. | 179 | * May as well be the first. |
166 | */ | 180 | */ |
167 | cpu = first_cpu(cpumask); | 181 | int cpu = cpumask_first(cpumask); |
182 | |||
168 | if ((unsigned)cpu < nr_cpu_ids) | 183 | if ((unsigned)cpu < nr_cpu_ids) |
169 | return per_cpu(x86_cpu_to_apicid, cpu); | 184 | return per_cpu(x86_cpu_to_apicid, cpu); |
170 | else | 185 | else |
171 | return BAD_APICID; | 186 | return BAD_APICID; |
172 | } | 187 | } |
173 | 188 | ||
174 | static unsigned int get_apic_id(unsigned long x) | 189 | static unsigned int |
190 | uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
191 | const struct cpumask *andmask) | ||
192 | { | ||
193 | int cpu; | ||
194 | |||
195 | /* | ||
196 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
197 | * May as well be the first. | ||
198 | */ | ||
199 | for_each_cpu_and(cpu, cpumask, andmask) { | ||
200 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
201 | break; | ||
202 | } | ||
203 | if (cpu < nr_cpu_ids) | ||
204 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
205 | |||
206 | return BAD_APICID; | ||
207 | } | ||
208 | |||
209 | static unsigned int x2apic_get_apic_id(unsigned long x) | ||
175 | { | 210 | { |
176 | unsigned int id; | 211 | unsigned int id; |
177 | 212 | ||
@@ -193,10 +228,10 @@ static unsigned long set_apic_id(unsigned int id) | |||
193 | static unsigned int uv_read_apic_id(void) | 228 | static unsigned int uv_read_apic_id(void) |
194 | { | 229 | { |
195 | 230 | ||
196 | return get_apic_id(apic_read(APIC_ID)); | 231 | return x2apic_get_apic_id(apic_read(APIC_ID)); |
197 | } | 232 | } |
198 | 233 | ||
199 | static unsigned int phys_pkg_id(int index_msb) | 234 | static int uv_phys_pkg_id(int initial_apicid, int index_msb) |
200 | { | 235 | { |
201 | return uv_read_apic_id() >> index_msb; | 236 | return uv_read_apic_id() >> index_msb; |
202 | } | 237 | } |
@@ -207,23 +242,57 @@ static void uv_send_IPI_self(int vector) | |||
207 | } | 242 | } |
208 | 243 | ||
209 | struct genapic apic_x2apic_uv_x = { | 244 | struct genapic apic_x2apic_uv_x = { |
210 | .name = "UV large system", | 245 | |
211 | .acpi_madt_oem_check = uv_acpi_madt_oem_check, | 246 | .name = "UV large system", |
212 | .int_delivery_mode = dest_Fixed, | 247 | .probe = NULL, |
213 | .int_dest_mode = (APIC_DEST_PHYSICAL != 0), | 248 | .acpi_madt_oem_check = uv_acpi_madt_oem_check, |
214 | .target_cpus = uv_target_cpus, | 249 | .apic_id_registered = uv_apic_id_registered, |
215 | .vector_allocation_domain = uv_vector_allocation_domain, | 250 | |
216 | .apic_id_registered = uv_apic_id_registered, | 251 | .irq_delivery_mode = dest_Fixed, |
217 | .init_apic_ldr = uv_init_apic_ldr, | 252 | .irq_dest_mode = 1, /* logical */ |
218 | .send_IPI_all = uv_send_IPI_all, | 253 | |
219 | .send_IPI_allbutself = uv_send_IPI_allbutself, | 254 | .target_cpus = uv_target_cpus, |
220 | .send_IPI_mask = uv_send_IPI_mask, | 255 | .disable_esr = 0, |
221 | .send_IPI_self = uv_send_IPI_self, | 256 | .dest_logical = APIC_DEST_LOGICAL, |
222 | .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, | 257 | .check_apicid_used = NULL, |
223 | .phys_pkg_id = phys_pkg_id, | 258 | .check_apicid_present = NULL, |
224 | .get_apic_id = get_apic_id, | 259 | |
225 | .set_apic_id = set_apic_id, | 260 | .vector_allocation_domain = uv_vector_allocation_domain, |
226 | .apic_id_mask = (0xFFFFFFFFu), | 261 | .init_apic_ldr = uv_init_apic_ldr, |
262 | |||
263 | .ioapic_phys_id_map = NULL, | ||
264 | .setup_apic_routing = NULL, | ||
265 | .multi_timer_check = NULL, | ||
266 | .apicid_to_node = NULL, | ||
267 | .cpu_to_logical_apicid = NULL, | ||
268 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | ||
269 | .apicid_to_cpu_present = NULL, | ||
270 | .setup_portio_remap = NULL, | ||
271 | .check_phys_apicid_present = default_check_phys_apicid_present, | ||
272 | .enable_apic_mode = NULL, | ||
273 | .phys_pkg_id = uv_phys_pkg_id, | ||
274 | .mps_oem_check = NULL, | ||
275 | |||
276 | .get_apic_id = x2apic_get_apic_id, | ||
277 | .set_apic_id = set_apic_id, | ||
278 | .apic_id_mask = 0xFFFFFFFFu, | ||
279 | |||
280 | .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, | ||
281 | .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, | ||
282 | |||
283 | .send_IPI_mask = uv_send_IPI_mask, | ||
284 | .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, | ||
285 | .send_IPI_allbutself = uv_send_IPI_allbutself, | ||
286 | .send_IPI_all = uv_send_IPI_all, | ||
287 | .send_IPI_self = uv_send_IPI_self, | ||
288 | |||
289 | .wakeup_cpu = NULL, | ||
290 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
291 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
292 | .wait_for_init_deassert = NULL, | ||
293 | .smp_callin_clear_local_apic = NULL, | ||
294 | .store_NMI_vector = NULL, | ||
295 | .inquire_remote_apic = NULL, | ||
227 | }; | 296 | }; |
228 | 297 | ||
229 | static __cpuinit void set_x2apic_extra_bits(int pnode) | 298 | static __cpuinit void set_x2apic_extra_bits(int pnode) |
@@ -356,6 +425,103 @@ static __init void uv_rtc_init(void) | |||
356 | } | 425 | } |
357 | 426 | ||
358 | /* | 427 | /* |
428 | * percpu heartbeat timer | ||
429 | */ | ||
430 | static void uv_heartbeat(unsigned long ignored) | ||
431 | { | ||
432 | struct timer_list *timer = &uv_hub_info->scir.timer; | ||
433 | unsigned char bits = uv_hub_info->scir.state; | ||
434 | |||
435 | /* flip heartbeat bit */ | ||
436 | bits ^= SCIR_CPU_HEARTBEAT; | ||
437 | |||
438 | /* is this cpu idle? */ | ||
439 | if (idle_cpu(raw_smp_processor_id())) | ||
440 | bits &= ~SCIR_CPU_ACTIVITY; | ||
441 | else | ||
442 | bits |= SCIR_CPU_ACTIVITY; | ||
443 | |||
444 | /* update system controller interface reg */ | ||
445 | uv_set_scir_bits(bits); | ||
446 | |||
447 | /* enable next timer period */ | ||
448 | mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL); | ||
449 | } | ||
450 | |||
451 | static void __cpuinit uv_heartbeat_enable(int cpu) | ||
452 | { | ||
453 | if (!uv_cpu_hub_info(cpu)->scir.enabled) { | ||
454 | struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer; | ||
455 | |||
456 | uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); | ||
457 | setup_timer(timer, uv_heartbeat, cpu); | ||
458 | timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; | ||
459 | add_timer_on(timer, cpu); | ||
460 | uv_cpu_hub_info(cpu)->scir.enabled = 1; | ||
461 | } | ||
462 | |||
463 | /* check boot cpu */ | ||
464 | if (!uv_cpu_hub_info(0)->scir.enabled) | ||
465 | uv_heartbeat_enable(0); | ||
466 | } | ||
467 | |||
468 | #ifdef CONFIG_HOTPLUG_CPU | ||
469 | static void __cpuinit uv_heartbeat_disable(int cpu) | ||
470 | { | ||
471 | if (uv_cpu_hub_info(cpu)->scir.enabled) { | ||
472 | uv_cpu_hub_info(cpu)->scir.enabled = 0; | ||
473 | del_timer(&uv_cpu_hub_info(cpu)->scir.timer); | ||
474 | } | ||
475 | uv_set_cpu_scir_bits(cpu, 0xff); | ||
476 | } | ||
477 | |||
478 | /* | ||
479 | * cpu hotplug notifier | ||
480 | */ | ||
481 | static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self, | ||
482 | unsigned long action, void *hcpu) | ||
483 | { | ||
484 | long cpu = (long)hcpu; | ||
485 | |||
486 | switch (action) { | ||
487 | case CPU_ONLINE: | ||
488 | uv_heartbeat_enable(cpu); | ||
489 | break; | ||
490 | case CPU_DOWN_PREPARE: | ||
491 | uv_heartbeat_disable(cpu); | ||
492 | break; | ||
493 | default: | ||
494 | break; | ||
495 | } | ||
496 | return NOTIFY_OK; | ||
497 | } | ||
498 | |||
499 | static __init void uv_scir_register_cpu_notifier(void) | ||
500 | { | ||
501 | hotcpu_notifier(uv_scir_cpu_notify, 0); | ||
502 | } | ||
503 | |||
504 | #else /* !CONFIG_HOTPLUG_CPU */ | ||
505 | |||
506 | static __init void uv_scir_register_cpu_notifier(void) | ||
507 | { | ||
508 | } | ||
509 | |||
510 | static __init int uv_init_heartbeat(void) | ||
511 | { | ||
512 | int cpu; | ||
513 | |||
514 | if (is_uv_system()) | ||
515 | for_each_online_cpu(cpu) | ||
516 | uv_heartbeat_enable(cpu); | ||
517 | return 0; | ||
518 | } | ||
519 | |||
520 | late_initcall(uv_init_heartbeat); | ||
521 | |||
522 | #endif /* !CONFIG_HOTPLUG_CPU */ | ||
523 | |||
524 | /* | ||
359 | * Called on each cpu to initialize the per_cpu UV data area. | 525 | * Called on each cpu to initialize the per_cpu UV data area. |
360 | * ZZZ hotplug not supported yet | 526 | * ZZZ hotplug not supported yet |
361 | */ | 527 | */ |
@@ -428,7 +594,7 @@ void __init uv_system_init(void) | |||
428 | 594 | ||
429 | uv_bios_init(); | 595 | uv_bios_init(); |
430 | uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, | 596 | uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, |
431 | &uv_coherency_id, &uv_region_size); | 597 | &sn_coherency_id, &sn_region_size); |
432 | uv_rtc_init(); | 598 | uv_rtc_init(); |
433 | 599 | ||
434 | for_each_present_cpu(cpu) { | 600 | for_each_present_cpu(cpu) { |
@@ -439,8 +605,7 @@ void __init uv_system_init(void) | |||
439 | uv_blade_info[blade].nr_possible_cpus++; | 605 | uv_blade_info[blade].nr_possible_cpus++; |
440 | 606 | ||
441 | uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; | 607 | uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; |
442 | uv_cpu_hub_info(cpu)->lowmem_remap_top = | 608 | uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; |
443 | lowmem_redir_base + lowmem_redir_size; | ||
444 | uv_cpu_hub_info(cpu)->m_val = m_val; | 609 | uv_cpu_hub_info(cpu)->m_val = m_val; |
445 | uv_cpu_hub_info(cpu)->n_val = m_val; | 610 | uv_cpu_hub_info(cpu)->n_val = m_val; |
446 | uv_cpu_hub_info(cpu)->numa_blade_id = blade; | 611 | uv_cpu_hub_info(cpu)->numa_blade_id = blade; |
@@ -450,7 +615,8 @@ void __init uv_system_init(void) | |||
450 | uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; | 615 | uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; |
451 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; | 616 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; |
452 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; | 617 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; |
453 | uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id; | 618 | uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; |
619 | uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu; | ||
454 | uv_node_to_blade[nid] = blade; | 620 | uv_node_to_blade[nid] = blade; |
455 | uv_cpu_to_blade[cpu] = blade; | 621 | uv_cpu_to_blade[cpu] = blade; |
456 | max_pnode = max(pnode, max_pnode); | 622 | max_pnode = max(pnode, max_pnode); |
@@ -467,4 +633,6 @@ void __init uv_system_init(void) | |||
467 | map_mmioh_high(max_pnode); | 633 | map_mmioh_high(max_pnode); |
468 | 634 | ||
469 | uv_cpu_init(); | 635 | uv_cpu_init(); |
636 | uv_scir_register_cpu_notifier(); | ||
637 | proc_mkdir("sgi_uv", NULL); | ||
470 | } | 638 | } |
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c index 1dcb0f13897e..3e66bd364a9d 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/head.c | |||
@@ -35,7 +35,6 @@ void __init reserve_ebda_region(void) | |||
35 | 35 | ||
36 | /* start of EBDA area */ | 36 | /* start of EBDA area */ |
37 | ebda_addr = get_bios_ebda(); | 37 | ebda_addr = get_bios_ebda(); |
38 | printk(KERN_INFO "BIOS EBDA/lowmem at: %08x/%08x\n", ebda_addr, lowmem); | ||
39 | 38 | ||
40 | /* Fixup: bios puts an EBDA in the top 64K segment */ | 39 | /* Fixup: bios puts an EBDA in the top 64K segment */ |
41 | /* of conventional memory, but does not adjust lowmem. */ | 40 | /* of conventional memory, but does not adjust lowmem. */ |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index fa1d25dd83e3..ac108d1fe182 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -12,9 +12,12 @@ | |||
12 | #include <asm/sections.h> | 12 | #include <asm/sections.h> |
13 | #include <asm/e820.h> | 13 | #include <asm/e820.h> |
14 | #include <asm/bios_ebda.h> | 14 | #include <asm/bios_ebda.h> |
15 | #include <asm/trampoline.h> | ||
15 | 16 | ||
16 | void __init i386_start_kernel(void) | 17 | void __init i386_start_kernel(void) |
17 | { | 18 | { |
19 | reserve_trampoline_memory(); | ||
20 | |||
18 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); | 21 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); |
19 | 22 | ||
20 | #ifdef CONFIG_BLK_DEV_INITRD | 23 | #ifdef CONFIG_BLK_DEV_INITRD |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index d16084f90649..f5b272247690 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -24,27 +24,7 @@ | |||
24 | #include <asm/kdebug.h> | 24 | #include <asm/kdebug.h> |
25 | #include <asm/e820.h> | 25 | #include <asm/e820.h> |
26 | #include <asm/bios_ebda.h> | 26 | #include <asm/bios_ebda.h> |
27 | 27 | #include <asm/trampoline.h> | |
28 | /* boot cpu pda */ | ||
29 | static struct x8664_pda _boot_cpu_pda __read_mostly; | ||
30 | |||
31 | #ifdef CONFIG_SMP | ||
32 | /* | ||
33 | * We install an empty cpu_pda pointer table to indicate to early users | ||
34 | * (numa_set_node) that the cpu_pda pointer table for cpus other than | ||
35 | * the boot cpu is not yet setup. | ||
36 | */ | ||
37 | static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; | ||
38 | #else | ||
39 | static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; | ||
40 | #endif | ||
41 | |||
42 | void __init x86_64_init_pda(void) | ||
43 | { | ||
44 | _cpu_pda = __cpu_pda; | ||
45 | cpu_pda(0) = &_boot_cpu_pda; | ||
46 | pda_init(0); | ||
47 | } | ||
48 | 28 | ||
49 | static void __init zap_identity_mappings(void) | 29 | static void __init zap_identity_mappings(void) |
50 | { | 30 | { |
@@ -111,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
111 | if (console_loglevel == 10) | 91 | if (console_loglevel == 10) |
112 | early_printk("Kernel alive\n"); | 92 | early_printk("Kernel alive\n"); |
113 | 93 | ||
114 | x86_64_init_pda(); | ||
115 | |||
116 | x86_64_start_reservations(real_mode_data); | 94 | x86_64_start_reservations(real_mode_data); |
117 | } | 95 | } |
118 | 96 | ||
@@ -120,6 +98,8 @@ void __init x86_64_start_reservations(char *real_mode_data) | |||
120 | { | 98 | { |
121 | copy_bootdata(__va(real_mode_data)); | 99 | copy_bootdata(__va(real_mode_data)); |
122 | 100 | ||
101 | reserve_trampoline_memory(); | ||
102 | |||
123 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); | 103 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); |
124 | 104 | ||
125 | #ifdef CONFIG_BLK_DEV_INITRD | 105 | #ifdef CONFIG_BLK_DEV_INITRD |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index e835b4eea70b..2a0aad7718d5 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <asm/asm-offsets.h> | 19 | #include <asm/asm-offsets.h> |
20 | #include <asm/setup.h> | 20 | #include <asm/setup.h> |
21 | #include <asm/processor-flags.h> | 21 | #include <asm/processor-flags.h> |
22 | #include <asm/percpu.h> | ||
22 | 23 | ||
23 | /* Physical address */ | 24 | /* Physical address */ |
24 | #define pa(X) ((X) - __PAGE_OFFSET) | 25 | #define pa(X) ((X) - __PAGE_OFFSET) |
@@ -429,14 +430,34 @@ is386: movl $2,%ecx # set MP | |||
429 | ljmp $(__KERNEL_CS),$1f | 430 | ljmp $(__KERNEL_CS),$1f |
430 | 1: movl $(__KERNEL_DS),%eax # reload all the segment registers | 431 | 1: movl $(__KERNEL_DS),%eax # reload all the segment registers |
431 | movl %eax,%ss # after changing gdt. | 432 | movl %eax,%ss # after changing gdt. |
432 | movl %eax,%fs # gets reset once there's real percpu | ||
433 | 433 | ||
434 | movl $(__USER_DS),%eax # DS/ES contains default USER segment | 434 | movl $(__USER_DS),%eax # DS/ES contains default USER segment |
435 | movl %eax,%ds | 435 | movl %eax,%ds |
436 | movl %eax,%es | 436 | movl %eax,%es |
437 | 437 | ||
438 | xorl %eax,%eax # Clear GS and LDT | 438 | movl $(__KERNEL_PERCPU), %eax |
439 | movl %eax,%fs # set this cpu's percpu | ||
440 | |||
441 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
442 | /* | ||
443 | * The linker can't handle this by relocation. Manually set | ||
444 | * base address in stack canary segment descriptor. | ||
445 | */ | ||
446 | cmpb $0,ready | ||
447 | jne 1f | ||
448 | movl $per_cpu__gdt_page,%eax | ||
449 | movl $per_cpu__stack_canary,%ecx | ||
450 | subl $20, %ecx | ||
451 | movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) | ||
452 | shrl $16, %ecx | ||
453 | movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) | ||
454 | movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax) | ||
455 | 1: | ||
456 | #endif | ||
457 | movl $(__KERNEL_STACK_CANARY),%eax | ||
439 | movl %eax,%gs | 458 | movl %eax,%gs |
459 | |||
460 | xorl %eax,%eax # Clear LDT | ||
440 | lldt %ax | 461 | lldt %ax |
441 | 462 | ||
442 | cld # gcc2 wants the direction flag cleared at all times | 463 | cld # gcc2 wants the direction flag cleared at all times |
@@ -446,8 +467,6 @@ is386: movl $2,%ecx # set MP | |||
446 | movb $1, ready | 467 | movb $1, ready |
447 | cmpb $0,%cl # the first CPU calls start_kernel | 468 | cmpb $0,%cl # the first CPU calls start_kernel |
448 | je 1f | 469 | je 1f |
449 | movl $(__KERNEL_PERCPU), %eax | ||
450 | movl %eax,%fs # set this cpu's percpu | ||
451 | movl (stack_start), %esp | 470 | movl (stack_start), %esp |
452 | 1: | 471 | 1: |
453 | #endif /* CONFIG_SMP */ | 472 | #endif /* CONFIG_SMP */ |
@@ -548,12 +567,8 @@ early_fault: | |||
548 | pushl %eax | 567 | pushl %eax |
549 | pushl %edx /* trapno */ | 568 | pushl %edx /* trapno */ |
550 | pushl $fault_msg | 569 | pushl $fault_msg |
551 | #ifdef CONFIG_EARLY_PRINTK | ||
552 | call early_printk | ||
553 | #else | ||
554 | call printk | 570 | call printk |
555 | #endif | 571 | #endif |
556 | #endif | ||
557 | call dump_stack | 572 | call dump_stack |
558 | hlt_loop: | 573 | hlt_loop: |
559 | hlt | 574 | hlt |
@@ -580,11 +595,10 @@ ignore_int: | |||
580 | pushl 32(%esp) | 595 | pushl 32(%esp) |
581 | pushl 40(%esp) | 596 | pushl 40(%esp) |
582 | pushl $int_msg | 597 | pushl $int_msg |
583 | #ifdef CONFIG_EARLY_PRINTK | ||
584 | call early_printk | ||
585 | #else | ||
586 | call printk | 598 | call printk |
587 | #endif | 599 | |
600 | call dump_stack | ||
601 | |||
588 | addl $(5*4),%esp | 602 | addl $(5*4),%esp |
589 | popl %ds | 603 | popl %ds |
590 | popl %es | 604 | popl %es |
@@ -660,7 +674,7 @@ early_recursion_flag: | |||
660 | .long 0 | 674 | .long 0 |
661 | 675 | ||
662 | int_msg: | 676 | int_msg: |
663 | .asciz "Unknown interrupt or fault at EIP %p %p %p\n" | 677 | .asciz "Unknown interrupt or fault at: %p %p %p\n" |
664 | 678 | ||
665 | fault_msg: | 679 | fault_msg: |
666 | /* fault info: */ | 680 | /* fault info: */ |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 26cfdc1d7c7f..2e648e3a5ea4 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <asm/msr.h> | 19 | #include <asm/msr.h> |
20 | #include <asm/cache.h> | 20 | #include <asm/cache.h> |
21 | #include <asm/processor-flags.h> | 21 | #include <asm/processor-flags.h> |
22 | #include <asm/percpu.h> | ||
22 | 23 | ||
23 | #ifdef CONFIG_PARAVIRT | 24 | #ifdef CONFIG_PARAVIRT |
24 | #include <asm/asm-offsets.h> | 25 | #include <asm/asm-offsets.h> |
@@ -226,12 +227,15 @@ ENTRY(secondary_startup_64) | |||
226 | movl %eax,%fs | 227 | movl %eax,%fs |
227 | movl %eax,%gs | 228 | movl %eax,%gs |
228 | 229 | ||
229 | /* | 230 | /* Set up %gs. |
230 | * Setup up a dummy PDA. this is just for some early bootup code | 231 | * |
231 | * that does in_interrupt() | 232 | * The base of %gs always points to the bottom of the irqstack |
232 | */ | 233 | * union. If the stack protector canary is enabled, it is |
234 | * located at %gs:40. Note that, on SMP, the boot cpu uses | ||
235 | * init data section till per cpu areas are set up. | ||
236 | */ | ||
233 | movl $MSR_GS_BASE,%ecx | 237 | movl $MSR_GS_BASE,%ecx |
234 | movq $empty_zero_page,%rax | 238 | movq initial_gs(%rip),%rax |
235 | movq %rax,%rdx | 239 | movq %rax,%rdx |
236 | shrq $32,%rdx | 240 | shrq $32,%rdx |
237 | wrmsr | 241 | wrmsr |
@@ -257,6 +261,8 @@ ENTRY(secondary_startup_64) | |||
257 | .align 8 | 261 | .align 8 |
258 | ENTRY(initial_code) | 262 | ENTRY(initial_code) |
259 | .quad x86_64_start_kernel | 263 | .quad x86_64_start_kernel |
264 | ENTRY(initial_gs) | ||
265 | .quad INIT_PER_CPU_VAR(irq_stack_union) | ||
260 | __FINITDATA | 266 | __FINITDATA |
261 | 267 | ||
262 | ENTRY(stack_start) | 268 | ENTRY(stack_start) |
@@ -305,7 +311,7 @@ ENTRY(early_idt_handler) | |||
305 | call dump_stack | 311 | call dump_stack |
306 | #ifdef CONFIG_KALLSYMS | 312 | #ifdef CONFIG_KALLSYMS |
307 | leaq early_idt_ripmsg(%rip),%rdi | 313 | leaq early_idt_ripmsg(%rip),%rdi |
308 | movq 8(%rsp),%rsi # get rip again | 314 | movq 0(%rsp),%rsi # get rip again |
309 | call __print_symbol | 315 | call __print_symbol |
310 | #endif | 316 | #endif |
311 | #endif /* EARLY_PRINTK */ | 317 | #endif /* EARLY_PRINTK */ |
@@ -401,7 +407,8 @@ NEXT_PAGE(level2_spare_pgt) | |||
401 | .globl early_gdt_descr | 407 | .globl early_gdt_descr |
402 | early_gdt_descr: | 408 | early_gdt_descr: |
403 | .word GDT_ENTRIES*8-1 | 409 | .word GDT_ENTRIES*8-1 |
404 | .quad per_cpu__gdt_page | 410 | early_gdt_descr_base: |
411 | .quad INIT_PER_CPU_VAR(gdt_page) | ||
405 | 412 | ||
406 | ENTRY(phys_base) | 413 | ENTRY(phys_base) |
407 | /* This must match the first entry in level2_kernel_pgt */ | 414 | /* This must match the first entry in level2_kernel_pgt */ |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 067d8de913f6..388254f69a2a 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -33,7 +33,9 @@ | |||
33 | * HPET address is set in acpi/boot.c, when an ACPI entry exists | 33 | * HPET address is set in acpi/boot.c, when an ACPI entry exists |
34 | */ | 34 | */ |
35 | unsigned long hpet_address; | 35 | unsigned long hpet_address; |
36 | unsigned long hpet_num_timers; | 36 | #ifdef CONFIG_PCI_MSI |
37 | static unsigned long hpet_num_timers; | ||
38 | #endif | ||
37 | static void __iomem *hpet_virt_address; | 39 | static void __iomem *hpet_virt_address; |
38 | 40 | ||
39 | struct hpet_dev { | 41 | struct hpet_dev { |
@@ -246,7 +248,7 @@ static void hpet_legacy_clockevent_register(void) | |||
246 | * Start hpet with the boot cpu mask and make it | 248 | * Start hpet with the boot cpu mask and make it |
247 | * global after the IO_APIC has been initialized. | 249 | * global after the IO_APIC has been initialized. |
248 | */ | 250 | */ |
249 | hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); | 251 | hpet_clockevent.cpumask = cpumask_of(smp_processor_id()); |
250 | clockevents_register_device(&hpet_clockevent); | 252 | clockevents_register_device(&hpet_clockevent); |
251 | global_clock_event = &hpet_clockevent; | 253 | global_clock_event = &hpet_clockevent; |
252 | printk(KERN_DEBUG "hpet clockevent registered\n"); | 254 | printk(KERN_DEBUG "hpet clockevent registered\n"); |
@@ -301,7 +303,7 @@ static void hpet_set_mode(enum clock_event_mode mode, | |||
301 | struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); | 303 | struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); |
302 | hpet_setup_msi_irq(hdev->irq); | 304 | hpet_setup_msi_irq(hdev->irq); |
303 | disable_irq(hdev->irq); | 305 | disable_irq(hdev->irq); |
304 | irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu)); | 306 | irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); |
305 | enable_irq(hdev->irq); | 307 | enable_irq(hdev->irq); |
306 | } | 308 | } |
307 | break; | 309 | break; |
@@ -449,7 +451,7 @@ static int hpet_setup_irq(struct hpet_dev *dev) | |||
449 | return -1; | 451 | return -1; |
450 | 452 | ||
451 | disable_irq(dev->irq); | 453 | disable_irq(dev->irq); |
452 | irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu)); | 454 | irq_set_affinity(dev->irq, cpumask_of(dev->cpu)); |
453 | enable_irq(dev->irq); | 455 | enable_irq(dev->irq); |
454 | 456 | ||
455 | printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", | 457 | printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", |
@@ -500,7 +502,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) | |||
500 | /* 5 usec minimum reprogramming delta. */ | 502 | /* 5 usec minimum reprogramming delta. */ |
501 | evt->min_delta_ns = 5000; | 503 | evt->min_delta_ns = 5000; |
502 | 504 | ||
503 | evt->cpumask = cpumask_of_cpu(hdev->cpu); | 505 | evt->cpumask = cpumask_of(hdev->cpu); |
504 | clockevents_register_device(evt); | 506 | clockevents_register_device(evt); |
505 | } | 507 | } |
506 | 508 | ||
@@ -626,11 +628,12 @@ static int hpet_cpuhp_notify(struct notifier_block *n, | |||
626 | 628 | ||
627 | switch (action & 0xf) { | 629 | switch (action & 0xf) { |
628 | case CPU_ONLINE: | 630 | case CPU_ONLINE: |
629 | INIT_DELAYED_WORK(&work.work, hpet_work); | 631 | INIT_DELAYED_WORK_ON_STACK(&work.work, hpet_work); |
630 | init_completion(&work.complete); | 632 | init_completion(&work.complete); |
631 | /* FIXME: add schedule_work_on() */ | 633 | /* FIXME: add schedule_work_on() */ |
632 | schedule_delayed_work_on(cpu, &work.work, 0); | 634 | schedule_delayed_work_on(cpu, &work.work, 0); |
633 | wait_for_completion(&work.complete); | 635 | wait_for_completion(&work.complete); |
636 | destroy_timer_on_stack(&work.work.timer); | ||
634 | break; | 637 | break; |
635 | case CPU_DEAD: | 638 | case CPU_DEAD: |
636 | if (hdev) { | 639 | if (hdev) { |
@@ -811,7 +814,7 @@ int __init hpet_enable(void) | |||
811 | 814 | ||
812 | out_nohpet: | 815 | out_nohpet: |
813 | hpet_clear_mapping(); | 816 | hpet_clear_mapping(); |
814 | boot_hpet_disable = 1; | 817 | hpet_address = 0; |
815 | return 0; | 818 | return 0; |
816 | } | 819 | } |
817 | 820 | ||
@@ -834,10 +837,11 @@ static __init int hpet_late_init(void) | |||
834 | 837 | ||
835 | hpet_address = force_hpet_address; | 838 | hpet_address = force_hpet_address; |
836 | hpet_enable(); | 839 | hpet_enable(); |
837 | if (!hpet_virt_address) | ||
838 | return -ENODEV; | ||
839 | } | 840 | } |
840 | 841 | ||
842 | if (!hpet_virt_address) | ||
843 | return -ENODEV; | ||
844 | |||
841 | hpet_reserve_platform_timers(hpet_readl(HPET_ID)); | 845 | hpet_reserve_platform_timers(hpet_readl(HPET_ID)); |
842 | 846 | ||
843 | for_each_online_cpu(cpu) { | 847 | for_each_online_cpu(cpu) { |
@@ -893,7 +897,7 @@ static unsigned long hpet_rtc_flags; | |||
893 | static int hpet_prev_update_sec; | 897 | static int hpet_prev_update_sec; |
894 | static struct rtc_time hpet_alarm_time; | 898 | static struct rtc_time hpet_alarm_time; |
895 | static unsigned long hpet_pie_count; | 899 | static unsigned long hpet_pie_count; |
896 | static unsigned long hpet_t1_cmp; | 900 | static u32 hpet_t1_cmp; |
897 | static unsigned long hpet_default_delta; | 901 | static unsigned long hpet_default_delta; |
898 | static unsigned long hpet_pie_delta; | 902 | static unsigned long hpet_pie_delta; |
899 | static unsigned long hpet_pie_limit; | 903 | static unsigned long hpet_pie_limit; |
@@ -901,6 +905,14 @@ static unsigned long hpet_pie_limit; | |||
901 | static rtc_irq_handler irq_handler; | 905 | static rtc_irq_handler irq_handler; |
902 | 906 | ||
903 | /* | 907 | /* |
908 | * Check that the hpet counter c1 is ahead of the c2 | ||
909 | */ | ||
910 | static inline int hpet_cnt_ahead(u32 c1, u32 c2) | ||
911 | { | ||
912 | return (s32)(c2 - c1) < 0; | ||
913 | } | ||
914 | |||
915 | /* | ||
904 | * Registers a IRQ handler. | 916 | * Registers a IRQ handler. |
905 | */ | 917 | */ |
906 | int hpet_register_irq_handler(rtc_irq_handler handler) | 918 | int hpet_register_irq_handler(rtc_irq_handler handler) |
@@ -1071,7 +1083,7 @@ static void hpet_rtc_timer_reinit(void) | |||
1071 | hpet_t1_cmp += delta; | 1083 | hpet_t1_cmp += delta; |
1072 | hpet_writel(hpet_t1_cmp, HPET_T1_CMP); | 1084 | hpet_writel(hpet_t1_cmp, HPET_T1_CMP); |
1073 | lost_ints++; | 1085 | lost_ints++; |
1074 | } while ((long)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0); | 1086 | } while (!hpet_cnt_ahead(hpet_t1_cmp, hpet_readl(HPET_COUNTER))); |
1075 | 1087 | ||
1076 | if (lost_ints) { | 1088 | if (lost_ints) { |
1077 | if (hpet_rtc_flags & RTC_PIE) | 1089 | if (hpet_rtc_flags & RTC_PIE) |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 1f20608d4ca8..b0f61f0dcd0a 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -58,7 +58,7 @@ void __cpuinit mxcsr_feature_mask_init(void) | |||
58 | stts(); | 58 | stts(); |
59 | } | 59 | } |
60 | 60 | ||
61 | void __init init_thread_xstate(void) | 61 | void __cpuinit init_thread_xstate(void) |
62 | { | 62 | { |
63 | if (!HAVE_HWFP) { | 63 | if (!HAVE_HWFP) { |
64 | xstate_size = sizeof(struct i387_soft_struct); | 64 | xstate_size = sizeof(struct i387_soft_struct); |
diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c index dbd6c1d1b638..b42ca694dc68 100644 --- a/arch/x86/kernel/i8237.c +++ b/arch/x86/kernel/i8237.c | |||
@@ -28,10 +28,10 @@ static int i8237A_resume(struct sys_device *dev) | |||
28 | 28 | ||
29 | flags = claim_dma_lock(); | 29 | flags = claim_dma_lock(); |
30 | 30 | ||
31 | dma_outb(DMA1_RESET_REG, 0); | 31 | dma_outb(0, DMA1_RESET_REG); |
32 | dma_outb(DMA2_RESET_REG, 0); | 32 | dma_outb(0, DMA2_RESET_REG); |
33 | 33 | ||
34 | for (i = 0;i < 8;i++) { | 34 | for (i = 0; i < 8; i++) { |
35 | set_dma_addr(i, 0x000000); | 35 | set_dma_addr(i, 0x000000); |
36 | /* DMA count is a bit weird so this is not 0 */ | 36 | /* DMA count is a bit weird so this is not 0 */ |
37 | set_dma_count(i, 1); | 37 | set_dma_count(i, 1); |
@@ -51,14 +51,14 @@ static int i8237A_suspend(struct sys_device *dev, pm_message_t state) | |||
51 | } | 51 | } |
52 | 52 | ||
53 | static struct sysdev_class i8237_sysdev_class = { | 53 | static struct sysdev_class i8237_sysdev_class = { |
54 | .name = "i8237", | 54 | .name = "i8237", |
55 | .suspend = i8237A_suspend, | 55 | .suspend = i8237A_suspend, |
56 | .resume = i8237A_resume, | 56 | .resume = i8237A_resume, |
57 | }; | 57 | }; |
58 | 58 | ||
59 | static struct sys_device device_i8237A = { | 59 | static struct sys_device device_i8237A = { |
60 | .id = 0, | 60 | .id = 0, |
61 | .cls = &i8237_sysdev_class, | 61 | .cls = &i8237_sysdev_class, |
62 | }; | 62 | }; |
63 | 63 | ||
64 | static int __init i8237A_init_sysfs(void) | 64 | static int __init i8237A_init_sysfs(void) |
@@ -68,5 +68,4 @@ static int __init i8237A_init_sysfs(void) | |||
68 | error = sysdev_register(&device_i8237A); | 68 | error = sysdev_register(&device_i8237A); |
69 | return error; | 69 | return error; |
70 | } | 70 | } |
71 | |||
72 | device_initcall(i8237A_init_sysfs); | 71 | device_initcall(i8237A_init_sysfs); |
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index c1b5e3ece1f2..10f92fb532f3 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c | |||
@@ -114,7 +114,7 @@ void __init setup_pit_timer(void) | |||
114 | * Start pit with the boot cpu mask and make it global after the | 114 | * Start pit with the boot cpu mask and make it global after the |
115 | * IO_APIC has been initialized. | 115 | * IO_APIC has been initialized. |
116 | */ | 116 | */ |
117 | pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); | 117 | pit_clockevent.cpumask = cpumask_of(smp_processor_id()); |
118 | pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, | 118 | pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, |
119 | pit_clockevent.shift); | 119 | pit_clockevent.shift); |
120 | pit_clockevent.max_delta_ns = | 120 | pit_clockevent.max_delta_ns = |
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 4b8a53d841f7..11d5093eb281 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c | |||
@@ -11,15 +11,15 @@ | |||
11 | #include <linux/kernel_stat.h> | 11 | #include <linux/kernel_stat.h> |
12 | #include <linux/sysdev.h> | 12 | #include <linux/sysdev.h> |
13 | #include <linux/bitops.h> | 13 | #include <linux/bitops.h> |
14 | #include <linux/acpi.h> | ||
15 | #include <linux/io.h> | ||
16 | #include <linux/delay.h> | ||
14 | 17 | ||
15 | #include <asm/acpi.h> | ||
16 | #include <asm/atomic.h> | 18 | #include <asm/atomic.h> |
17 | #include <asm/system.h> | 19 | #include <asm/system.h> |
18 | #include <asm/io.h> | ||
19 | #include <asm/timer.h> | 20 | #include <asm/timer.h> |
20 | #include <asm/hw_irq.h> | 21 | #include <asm/hw_irq.h> |
21 | #include <asm/pgtable.h> | 22 | #include <asm/pgtable.h> |
22 | #include <asm/delay.h> | ||
23 | #include <asm/desc.h> | 23 | #include <asm/desc.h> |
24 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
25 | #include <asm/arch_hooks.h> | 25 | #include <asm/arch_hooks.h> |
@@ -323,7 +323,7 @@ void init_8259A(int auto_eoi) | |||
323 | outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ | 323 | outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ |
324 | 324 | ||
325 | /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 on x86-64, | 325 | /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 on x86-64, |
326 | to 0x20-0x27 on i386 */ | 326 | to 0x20-0x27 on i386 */ |
327 | outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); | 327 | outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); |
328 | 328 | ||
329 | /* 8259A-1 (the master) has a slave on IR2 */ | 329 | /* 8259A-1 (the master) has a slave on IR2 */ |
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index a4f93b4120c1..df3bf269beab 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c | |||
@@ -10,11 +10,9 @@ | |||
10 | #include <asm/pgtable.h> | 10 | #include <asm/pgtable.h> |
11 | #include <asm/desc.h> | 11 | #include <asm/desc.h> |
12 | 12 | ||
13 | static struct fs_struct init_fs = INIT_FS; | ||
14 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); | 13 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); |
15 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); | 14 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); |
16 | struct mm_struct init_mm = INIT_MM(init_mm); | 15 | struct mm_struct init_mm = INIT_MM(init_mm); |
17 | EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */ | ||
18 | 16 | ||
19 | /* | 17 | /* |
20 | * Initial thread structure. | 18 | * Initial thread structure. |
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7a3f2028e2eb..7248ca11bdcd 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Intel IO-APIC support for multi-Pentium hosts. | 2 | * Intel IO-APIC support for multi-Pentium hosts. |
3 | * | 3 | * |
4 | * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo | 4 | * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo |
5 | * | 5 | * |
6 | * Many thanks to Stig Venaas for trying out countless experimental | 6 | * Many thanks to Stig Venaas for trying out countless experimental |
7 | * patches and reporting/debugging problems patiently! | 7 | * patches and reporting/debugging problems patiently! |
@@ -46,6 +46,7 @@ | |||
46 | #include <asm/idle.h> | 46 | #include <asm/idle.h> |
47 | #include <asm/io.h> | 47 | #include <asm/io.h> |
48 | #include <asm/smp.h> | 48 | #include <asm/smp.h> |
49 | #include <asm/cpu.h> | ||
49 | #include <asm/desc.h> | 50 | #include <asm/desc.h> |
50 | #include <asm/proto.h> | 51 | #include <asm/proto.h> |
51 | #include <asm/acpi.h> | 52 | #include <asm/acpi.h> |
@@ -61,9 +62,7 @@ | |||
61 | #include <asm/uv/uv_hub.h> | 62 | #include <asm/uv/uv_hub.h> |
62 | #include <asm/uv/uv_irq.h> | 63 | #include <asm/uv/uv_irq.h> |
63 | 64 | ||
64 | #include <mach_ipi.h> | 65 | #include <asm/genapic.h> |
65 | #include <mach_apic.h> | ||
66 | #include <mach_apicdef.h> | ||
67 | 66 | ||
68 | #define __apicdebuginit(type) static type __init | 67 | #define __apicdebuginit(type) static type __init |
69 | 68 | ||
@@ -82,11 +81,11 @@ static DEFINE_SPINLOCK(vector_lock); | |||
82 | int nr_ioapic_registers[MAX_IO_APICS]; | 81 | int nr_ioapic_registers[MAX_IO_APICS]; |
83 | 82 | ||
84 | /* I/O APIC entries */ | 83 | /* I/O APIC entries */ |
85 | struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; | 84 | struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; |
86 | int nr_ioapics; | 85 | int nr_ioapics; |
87 | 86 | ||
88 | /* MP IRQ source entries */ | 87 | /* MP IRQ source entries */ |
89 | struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; | 88 | struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; |
90 | 89 | ||
91 | /* # of MP IRQ source entries */ | 90 | /* # of MP IRQ source entries */ |
92 | int mp_irq_entries; | 91 | int mp_irq_entries; |
@@ -99,103 +98,293 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); | |||
99 | 98 | ||
100 | int skip_ioapic_setup; | 99 | int skip_ioapic_setup; |
101 | 100 | ||
101 | void arch_disable_smp_support(void) | ||
102 | { | ||
103 | #ifdef CONFIG_PCI | ||
104 | noioapicquirk = 1; | ||
105 | noioapicreroute = -1; | ||
106 | #endif | ||
107 | skip_ioapic_setup = 1; | ||
108 | } | ||
109 | |||
102 | static int __init parse_noapic(char *str) | 110 | static int __init parse_noapic(char *str) |
103 | { | 111 | { |
104 | /* disable IO-APIC */ | 112 | /* disable IO-APIC */ |
105 | disable_ioapic_setup(); | 113 | arch_disable_smp_support(); |
106 | return 0; | 114 | return 0; |
107 | } | 115 | } |
108 | early_param("noapic", parse_noapic); | 116 | early_param("noapic", parse_noapic); |
109 | 117 | ||
110 | struct irq_pin_list; | 118 | struct irq_pin_list; |
119 | |||
120 | /* | ||
121 | * This is performance-critical, we want to do it O(1) | ||
122 | * | ||
123 | * the indexing order of this array favors 1:1 mappings | ||
124 | * between pins and IRQs. | ||
125 | */ | ||
126 | |||
127 | struct irq_pin_list { | ||
128 | int apic, pin; | ||
129 | struct irq_pin_list *next; | ||
130 | }; | ||
131 | |||
132 | static struct irq_pin_list *get_one_free_irq_2_pin(int cpu) | ||
133 | { | ||
134 | struct irq_pin_list *pin; | ||
135 | int node; | ||
136 | |||
137 | node = cpu_to_node(cpu); | ||
138 | |||
139 | pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node); | ||
140 | |||
141 | return pin; | ||
142 | } | ||
143 | |||
111 | struct irq_cfg { | 144 | struct irq_cfg { |
112 | unsigned int irq; | ||
113 | struct irq_pin_list *irq_2_pin; | 145 | struct irq_pin_list *irq_2_pin; |
114 | cpumask_t domain; | 146 | cpumask_var_t domain; |
115 | cpumask_t old_domain; | 147 | cpumask_var_t old_domain; |
116 | unsigned move_cleanup_count; | 148 | unsigned move_cleanup_count; |
117 | u8 vector; | 149 | u8 vector; |
118 | u8 move_in_progress : 1; | 150 | u8 move_in_progress : 1; |
151 | #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC | ||
152 | u8 move_desc_pending : 1; | ||
153 | #endif | ||
119 | }; | 154 | }; |
120 | 155 | ||
121 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ | 156 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ |
157 | #ifdef CONFIG_SPARSE_IRQ | ||
158 | static struct irq_cfg irq_cfgx[] = { | ||
159 | #else | ||
122 | static struct irq_cfg irq_cfgx[NR_IRQS] = { | 160 | static struct irq_cfg irq_cfgx[NR_IRQS] = { |
123 | [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, | 161 | #endif |
124 | [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, | 162 | [0] = { .vector = IRQ0_VECTOR, }, |
125 | [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, | 163 | [1] = { .vector = IRQ1_VECTOR, }, |
126 | [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, | 164 | [2] = { .vector = IRQ2_VECTOR, }, |
127 | [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, | 165 | [3] = { .vector = IRQ3_VECTOR, }, |
128 | [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, | 166 | [4] = { .vector = IRQ4_VECTOR, }, |
129 | [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, | 167 | [5] = { .vector = IRQ5_VECTOR, }, |
130 | [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, | 168 | [6] = { .vector = IRQ6_VECTOR, }, |
131 | [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, | 169 | [7] = { .vector = IRQ7_VECTOR, }, |
132 | [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, | 170 | [8] = { .vector = IRQ8_VECTOR, }, |
133 | [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, | 171 | [9] = { .vector = IRQ9_VECTOR, }, |
134 | [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, | 172 | [10] = { .vector = IRQ10_VECTOR, }, |
135 | [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, | 173 | [11] = { .vector = IRQ11_VECTOR, }, |
136 | [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, | 174 | [12] = { .vector = IRQ12_VECTOR, }, |
137 | [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, | 175 | [13] = { .vector = IRQ13_VECTOR, }, |
138 | [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, | 176 | [14] = { .vector = IRQ14_VECTOR, }, |
177 | [15] = { .vector = IRQ15_VECTOR, }, | ||
139 | }; | 178 | }; |
140 | 179 | ||
141 | #define for_each_irq_cfg(irq, cfg) \ | 180 | int __init arch_early_irq_init(void) |
142 | for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++) | 181 | { |
182 | struct irq_cfg *cfg; | ||
183 | struct irq_desc *desc; | ||
184 | int count; | ||
185 | int i; | ||
186 | |||
187 | cfg = irq_cfgx; | ||
188 | count = ARRAY_SIZE(irq_cfgx); | ||
143 | 189 | ||
190 | for (i = 0; i < count; i++) { | ||
191 | desc = irq_to_desc(i); | ||
192 | desc->chip_data = &cfg[i]; | ||
193 | alloc_bootmem_cpumask_var(&cfg[i].domain); | ||
194 | alloc_bootmem_cpumask_var(&cfg[i].old_domain); | ||
195 | if (i < NR_IRQS_LEGACY) | ||
196 | cpumask_setall(cfg[i].domain); | ||
197 | } | ||
198 | |||
199 | return 0; | ||
200 | } | ||
201 | |||
202 | #ifdef CONFIG_SPARSE_IRQ | ||
144 | static struct irq_cfg *irq_cfg(unsigned int irq) | 203 | static struct irq_cfg *irq_cfg(unsigned int irq) |
145 | { | 204 | { |
146 | return irq < nr_irqs ? irq_cfgx + irq : NULL; | 205 | struct irq_cfg *cfg = NULL; |
206 | struct irq_desc *desc; | ||
207 | |||
208 | desc = irq_to_desc(irq); | ||
209 | if (desc) | ||
210 | cfg = desc->chip_data; | ||
211 | |||
212 | return cfg; | ||
147 | } | 213 | } |
148 | 214 | ||
149 | static struct irq_cfg *irq_cfg_alloc(unsigned int irq) | 215 | static struct irq_cfg *get_one_free_irq_cfg(int cpu) |
150 | { | 216 | { |
151 | return irq_cfg(irq); | 217 | struct irq_cfg *cfg; |
218 | int node; | ||
219 | |||
220 | node = cpu_to_node(cpu); | ||
221 | |||
222 | cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); | ||
223 | if (cfg) { | ||
224 | if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { | ||
225 | kfree(cfg); | ||
226 | cfg = NULL; | ||
227 | } else if (!alloc_cpumask_var_node(&cfg->old_domain, | ||
228 | GFP_ATOMIC, node)) { | ||
229 | free_cpumask_var(cfg->domain); | ||
230 | kfree(cfg); | ||
231 | cfg = NULL; | ||
232 | } else { | ||
233 | cpumask_clear(cfg->domain); | ||
234 | cpumask_clear(cfg->old_domain); | ||
235 | } | ||
236 | } | ||
237 | |||
238 | return cfg; | ||
152 | } | 239 | } |
153 | 240 | ||
154 | /* | 241 | int arch_init_chip_data(struct irq_desc *desc, int cpu) |
155 | * Rough estimation of how many shared IRQs there are, can be changed | 242 | { |
156 | * anytime. | 243 | struct irq_cfg *cfg; |
157 | */ | ||
158 | #define MAX_PLUS_SHARED_IRQS NR_IRQS | ||
159 | #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) | ||
160 | 244 | ||
161 | /* | 245 | cfg = desc->chip_data; |
162 | * This is performance-critical, we want to do it O(1) | 246 | if (!cfg) { |
163 | * | 247 | desc->chip_data = get_one_free_irq_cfg(cpu); |
164 | * the indexing order of this array favors 1:1 mappings | 248 | if (!desc->chip_data) { |
165 | * between pins and IRQs. | 249 | printk(KERN_ERR "can not alloc irq_cfg\n"); |
166 | */ | 250 | BUG_ON(1); |
251 | } | ||
252 | } | ||
167 | 253 | ||
168 | struct irq_pin_list { | 254 | return 0; |
169 | int apic, pin; | 255 | } |
170 | struct irq_pin_list *next; | ||
171 | }; | ||
172 | 256 | ||
173 | static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE]; | 257 | #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC |
174 | static struct irq_pin_list *irq_2_pin_ptr; | ||
175 | 258 | ||
176 | static void __init irq_2_pin_init(void) | 259 | static void |
260 | init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) | ||
177 | { | 261 | { |
178 | struct irq_pin_list *pin = irq_2_pin_head; | 262 | struct irq_pin_list *old_entry, *head, *tail, *entry; |
179 | int i; | ||
180 | 263 | ||
181 | for (i = 1; i < PIN_MAP_SIZE; i++) | 264 | cfg->irq_2_pin = NULL; |
182 | pin[i-1].next = &pin[i]; | 265 | old_entry = old_cfg->irq_2_pin; |
266 | if (!old_entry) | ||
267 | return; | ||
183 | 268 | ||
184 | irq_2_pin_ptr = &pin[0]; | 269 | entry = get_one_free_irq_2_pin(cpu); |
270 | if (!entry) | ||
271 | return; | ||
272 | |||
273 | entry->apic = old_entry->apic; | ||
274 | entry->pin = old_entry->pin; | ||
275 | head = entry; | ||
276 | tail = entry; | ||
277 | old_entry = old_entry->next; | ||
278 | while (old_entry) { | ||
279 | entry = get_one_free_irq_2_pin(cpu); | ||
280 | if (!entry) { | ||
281 | entry = head; | ||
282 | while (entry) { | ||
283 | head = entry->next; | ||
284 | kfree(entry); | ||
285 | entry = head; | ||
286 | } | ||
287 | /* still use the old one */ | ||
288 | return; | ||
289 | } | ||
290 | entry->apic = old_entry->apic; | ||
291 | entry->pin = old_entry->pin; | ||
292 | tail->next = entry; | ||
293 | tail = entry; | ||
294 | old_entry = old_entry->next; | ||
295 | } | ||
296 | |||
297 | tail->next = NULL; | ||
298 | cfg->irq_2_pin = head; | ||
185 | } | 299 | } |
186 | 300 | ||
187 | static struct irq_pin_list *get_one_free_irq_2_pin(void) | 301 | static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) |
188 | { | 302 | { |
189 | struct irq_pin_list *pin = irq_2_pin_ptr; | 303 | struct irq_pin_list *entry, *next; |
190 | 304 | ||
191 | if (!pin) | 305 | if (old_cfg->irq_2_pin == cfg->irq_2_pin) |
192 | panic("can not get more irq_2_pin\n"); | 306 | return; |
193 | 307 | ||
194 | irq_2_pin_ptr = pin->next; | 308 | entry = old_cfg->irq_2_pin; |
195 | pin->next = NULL; | 309 | |
196 | return pin; | 310 | while (entry) { |
311 | next = entry->next; | ||
312 | kfree(entry); | ||
313 | entry = next; | ||
314 | } | ||
315 | old_cfg->irq_2_pin = NULL; | ||
316 | } | ||
317 | |||
318 | void arch_init_copy_chip_data(struct irq_desc *old_desc, | ||
319 | struct irq_desc *desc, int cpu) | ||
320 | { | ||
321 | struct irq_cfg *cfg; | ||
322 | struct irq_cfg *old_cfg; | ||
323 | |||
324 | cfg = get_one_free_irq_cfg(cpu); | ||
325 | |||
326 | if (!cfg) | ||
327 | return; | ||
328 | |||
329 | desc->chip_data = cfg; | ||
330 | |||
331 | old_cfg = old_desc->chip_data; | ||
332 | |||
333 | memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); | ||
334 | |||
335 | init_copy_irq_2_pin(old_cfg, cfg, cpu); | ||
336 | } | ||
337 | |||
338 | static void free_irq_cfg(struct irq_cfg *old_cfg) | ||
339 | { | ||
340 | kfree(old_cfg); | ||
341 | } | ||
342 | |||
343 | void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) | ||
344 | { | ||
345 | struct irq_cfg *old_cfg, *cfg; | ||
346 | |||
347 | old_cfg = old_desc->chip_data; | ||
348 | cfg = desc->chip_data; | ||
349 | |||
350 | if (old_cfg == cfg) | ||
351 | return; | ||
352 | |||
353 | if (old_cfg) { | ||
354 | free_irq_2_pin(old_cfg, cfg); | ||
355 | free_irq_cfg(old_cfg); | ||
356 | old_desc->chip_data = NULL; | ||
357 | } | ||
197 | } | 358 | } |
198 | 359 | ||
360 | static void | ||
361 | set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) | ||
362 | { | ||
363 | struct irq_cfg *cfg = desc->chip_data; | ||
364 | |||
365 | if (!cfg->move_in_progress) { | ||
366 | /* it means that domain is not changed */ | ||
367 | if (!cpumask_intersects(desc->affinity, mask)) | ||
368 | cfg->move_desc_pending = 1; | ||
369 | } | ||
370 | } | ||
371 | #endif | ||
372 | |||
373 | #else | ||
374 | static struct irq_cfg *irq_cfg(unsigned int irq) | ||
375 | { | ||
376 | return irq < nr_irqs ? irq_cfgx + irq : NULL; | ||
377 | } | ||
378 | |||
379 | #endif | ||
380 | |||
381 | #ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC | ||
382 | static inline void | ||
383 | set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) | ||
384 | { | ||
385 | } | ||
386 | #endif | ||
387 | |||
199 | struct io_apic { | 388 | struct io_apic { |
200 | unsigned int index; | 389 | unsigned int index; |
201 | unsigned int unused[3]; | 390 | unsigned int unused[3]; |
@@ -205,7 +394,7 @@ struct io_apic { | |||
205 | static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) | 394 | static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) |
206 | { | 395 | { |
207 | return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) | 396 | return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) |
208 | + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); | 397 | + (mp_ioapics[idx].apicaddr & ~PAGE_MASK); |
209 | } | 398 | } |
210 | 399 | ||
211 | static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) | 400 | static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) |
@@ -237,11 +426,10 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned | |||
237 | writel(value, &io_apic->data); | 426 | writel(value, &io_apic->data); |
238 | } | 427 | } |
239 | 428 | ||
240 | static bool io_apic_level_ack_pending(unsigned int irq) | 429 | static bool io_apic_level_ack_pending(struct irq_cfg *cfg) |
241 | { | 430 | { |
242 | struct irq_pin_list *entry; | 431 | struct irq_pin_list *entry; |
243 | unsigned long flags; | 432 | unsigned long flags; |
244 | struct irq_cfg *cfg = irq_cfg(irq); | ||
245 | 433 | ||
246 | spin_lock_irqsave(&ioapic_lock, flags); | 434 | spin_lock_irqsave(&ioapic_lock, flags); |
247 | entry = cfg->irq_2_pin; | 435 | entry = cfg->irq_2_pin; |
@@ -298,7 +486,7 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | |||
298 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); | 486 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); |
299 | } | 487 | } |
300 | 488 | ||
301 | static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | 489 | void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) |
302 | { | 490 | { |
303 | unsigned long flags; | 491 | unsigned long flags; |
304 | spin_lock_irqsave(&ioapic_lock, flags); | 492 | spin_lock_irqsave(&ioapic_lock, flags); |
@@ -323,13 +511,32 @@ static void ioapic_mask_entry(int apic, int pin) | |||
323 | } | 511 | } |
324 | 512 | ||
325 | #ifdef CONFIG_SMP | 513 | #ifdef CONFIG_SMP |
326 | static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) | 514 | static void send_cleanup_vector(struct irq_cfg *cfg) |
515 | { | ||
516 | cpumask_var_t cleanup_mask; | ||
517 | |||
518 | if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { | ||
519 | unsigned int i; | ||
520 | cfg->move_cleanup_count = 0; | ||
521 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) | ||
522 | cfg->move_cleanup_count++; | ||
523 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) | ||
524 | apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); | ||
525 | } else { | ||
526 | cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); | ||
527 | cfg->move_cleanup_count = cpumask_weight(cleanup_mask); | ||
528 | apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
529 | free_cpumask_var(cleanup_mask); | ||
530 | } | ||
531 | cfg->move_in_progress = 0; | ||
532 | } | ||
533 | |||
534 | static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) | ||
327 | { | 535 | { |
328 | int apic, pin; | 536 | int apic, pin; |
329 | struct irq_cfg *cfg; | ||
330 | struct irq_pin_list *entry; | 537 | struct irq_pin_list *entry; |
538 | u8 vector = cfg->vector; | ||
331 | 539 | ||
332 | cfg = irq_cfg(irq); | ||
333 | entry = cfg->irq_2_pin; | 540 | entry = cfg->irq_2_pin; |
334 | for (;;) { | 541 | for (;;) { |
335 | unsigned int reg; | 542 | unsigned int reg; |
@@ -359,36 +566,63 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) | |||
359 | } | 566 | } |
360 | } | 567 | } |
361 | 568 | ||
362 | static int assign_irq_vector(int irq, cpumask_t mask); | 569 | static int |
570 | assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); | ||
571 | |||
572 | /* | ||
573 | * Either sets desc->affinity to a valid value, and returns | ||
574 | * ->cpu_mask_to_apicid of that, or returns BAD_APICID and | ||
575 | * leaves desc->affinity untouched. | ||
576 | */ | ||
577 | static unsigned int | ||
578 | set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) | ||
579 | { | ||
580 | struct irq_cfg *cfg; | ||
581 | unsigned int irq; | ||
363 | 582 | ||
364 | static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) | 583 | if (!cpumask_intersects(mask, cpu_online_mask)) |
584 | return BAD_APICID; | ||
585 | |||
586 | irq = desc->irq; | ||
587 | cfg = desc->chip_data; | ||
588 | if (assign_irq_vector(irq, cfg, mask)) | ||
589 | return BAD_APICID; | ||
590 | |||
591 | cpumask_and(desc->affinity, cfg->domain, mask); | ||
592 | set_extra_move_desc(desc, mask); | ||
593 | |||
594 | return apic->cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask); | ||
595 | } | ||
596 | |||
597 | static void | ||
598 | set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | ||
365 | { | 599 | { |
366 | struct irq_cfg *cfg; | 600 | struct irq_cfg *cfg; |
367 | unsigned long flags; | 601 | unsigned long flags; |
368 | unsigned int dest; | 602 | unsigned int dest; |
369 | cpumask_t tmp; | 603 | unsigned int irq; |
370 | struct irq_desc *desc; | ||
371 | 604 | ||
372 | cpus_and(tmp, mask, cpu_online_map); | 605 | irq = desc->irq; |
373 | if (cpus_empty(tmp)) | 606 | cfg = desc->chip_data; |
374 | return; | ||
375 | 607 | ||
376 | cfg = irq_cfg(irq); | 608 | spin_lock_irqsave(&ioapic_lock, flags); |
377 | if (assign_irq_vector(irq, mask)) | 609 | dest = set_desc_affinity(desc, mask); |
378 | return; | 610 | if (dest != BAD_APICID) { |
611 | /* Only the high 8 bits are valid. */ | ||
612 | dest = SET_APIC_LOGICAL_ID(dest); | ||
613 | __target_IO_APIC_irq(irq, dest, cfg); | ||
614 | } | ||
615 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
616 | } | ||
379 | 617 | ||
380 | cpus_and(tmp, cfg->domain, mask); | 618 | static void |
381 | dest = cpu_mask_to_apicid(tmp); | 619 | set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) |
382 | /* | 620 | { |
383 | * Only the high 8 bits are valid. | 621 | struct irq_desc *desc; |
384 | */ | ||
385 | dest = SET_APIC_LOGICAL_ID(dest); | ||
386 | 622 | ||
387 | desc = irq_to_desc(irq); | 623 | desc = irq_to_desc(irq); |
388 | spin_lock_irqsave(&ioapic_lock, flags); | 624 | |
389 | __target_IO_APIC_irq(irq, dest, cfg->vector); | 625 | set_ioapic_affinity_irq_desc(desc, mask); |
390 | desc->affinity = mask; | ||
391 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
392 | } | 626 | } |
393 | #endif /* CONFIG_SMP */ | 627 | #endif /* CONFIG_SMP */ |
394 | 628 | ||
@@ -397,16 +631,18 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) | |||
397 | * shared ISA-space IRQs, so we have to support them. We are super | 631 | * shared ISA-space IRQs, so we have to support them. We are super |
398 | * fast in the common case, and fast for shared ISA-space IRQs. | 632 | * fast in the common case, and fast for shared ISA-space IRQs. |
399 | */ | 633 | */ |
400 | static void add_pin_to_irq(unsigned int irq, int apic, int pin) | 634 | static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) |
401 | { | 635 | { |
402 | struct irq_cfg *cfg; | ||
403 | struct irq_pin_list *entry; | 636 | struct irq_pin_list *entry; |
404 | 637 | ||
405 | /* first time to refer irq_cfg, so with new */ | ||
406 | cfg = irq_cfg_alloc(irq); | ||
407 | entry = cfg->irq_2_pin; | 638 | entry = cfg->irq_2_pin; |
408 | if (!entry) { | 639 | if (!entry) { |
409 | entry = get_one_free_irq_2_pin(); | 640 | entry = get_one_free_irq_2_pin(cpu); |
641 | if (!entry) { | ||
642 | printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", | ||
643 | apic, pin); | ||
644 | return; | ||
645 | } | ||
410 | cfg->irq_2_pin = entry; | 646 | cfg->irq_2_pin = entry; |
411 | entry->apic = apic; | 647 | entry->apic = apic; |
412 | entry->pin = pin; | 648 | entry->pin = pin; |
@@ -421,7 +657,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) | |||
421 | entry = entry->next; | 657 | entry = entry->next; |
422 | } | 658 | } |
423 | 659 | ||
424 | entry->next = get_one_free_irq_2_pin(); | 660 | entry->next = get_one_free_irq_2_pin(cpu); |
425 | entry = entry->next; | 661 | entry = entry->next; |
426 | entry->apic = apic; | 662 | entry->apic = apic; |
427 | entry->pin = pin; | 663 | entry->pin = pin; |
@@ -430,11 +666,10 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) | |||
430 | /* | 666 | /* |
431 | * Reroute an IRQ to a different pin. | 667 | * Reroute an IRQ to a different pin. |
432 | */ | 668 | */ |
433 | static void __init replace_pin_at_irq(unsigned int irq, | 669 | static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, |
434 | int oldapic, int oldpin, | 670 | int oldapic, int oldpin, |
435 | int newapic, int newpin) | 671 | int newapic, int newpin) |
436 | { | 672 | { |
437 | struct irq_cfg *cfg = irq_cfg(irq); | ||
438 | struct irq_pin_list *entry = cfg->irq_2_pin; | 673 | struct irq_pin_list *entry = cfg->irq_2_pin; |
439 | int replaced = 0; | 674 | int replaced = 0; |
440 | 675 | ||
@@ -451,18 +686,16 @@ static void __init replace_pin_at_irq(unsigned int irq, | |||
451 | 686 | ||
452 | /* why? call replace before add? */ | 687 | /* why? call replace before add? */ |
453 | if (!replaced) | 688 | if (!replaced) |
454 | add_pin_to_irq(irq, newapic, newpin); | 689 | add_pin_to_irq_cpu(cfg, cpu, newapic, newpin); |
455 | } | 690 | } |
456 | 691 | ||
457 | static inline void io_apic_modify_irq(unsigned int irq, | 692 | static inline void io_apic_modify_irq(struct irq_cfg *cfg, |
458 | int mask_and, int mask_or, | 693 | int mask_and, int mask_or, |
459 | void (*final)(struct irq_pin_list *entry)) | 694 | void (*final)(struct irq_pin_list *entry)) |
460 | { | 695 | { |
461 | int pin; | 696 | int pin; |
462 | struct irq_cfg *cfg; | ||
463 | struct irq_pin_list *entry; | 697 | struct irq_pin_list *entry; |
464 | 698 | ||
465 | cfg = irq_cfg(irq); | ||
466 | for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { | 699 | for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { |
467 | unsigned int reg; | 700 | unsigned int reg; |
468 | pin = entry->pin; | 701 | pin = entry->pin; |
@@ -475,13 +708,13 @@ static inline void io_apic_modify_irq(unsigned int irq, | |||
475 | } | 708 | } |
476 | } | 709 | } |
477 | 710 | ||
478 | static void __unmask_IO_APIC_irq(unsigned int irq) | 711 | static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) |
479 | { | 712 | { |
480 | io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL); | 713 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); |
481 | } | 714 | } |
482 | 715 | ||
483 | #ifdef CONFIG_X86_64 | 716 | #ifdef CONFIG_X86_64 |
484 | void io_apic_sync(struct irq_pin_list *entry) | 717 | static void io_apic_sync(struct irq_pin_list *entry) |
485 | { | 718 | { |
486 | /* | 719 | /* |
487 | * Synchronize the IO-APIC and the CPU by doing | 720 | * Synchronize the IO-APIC and the CPU by doing |
@@ -492,47 +725,64 @@ void io_apic_sync(struct irq_pin_list *entry) | |||
492 | readl(&io_apic->data); | 725 | readl(&io_apic->data); |
493 | } | 726 | } |
494 | 727 | ||
495 | static void __mask_IO_APIC_irq(unsigned int irq) | 728 | static void __mask_IO_APIC_irq(struct irq_cfg *cfg) |
496 | { | 729 | { |
497 | io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); | 730 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); |
498 | } | 731 | } |
499 | #else /* CONFIG_X86_32 */ | 732 | #else /* CONFIG_X86_32 */ |
500 | static void __mask_IO_APIC_irq(unsigned int irq) | 733 | static void __mask_IO_APIC_irq(struct irq_cfg *cfg) |
501 | { | 734 | { |
502 | io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL); | 735 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL); |
503 | } | 736 | } |
504 | 737 | ||
505 | static void __mask_and_edge_IO_APIC_irq(unsigned int irq) | 738 | static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) |
506 | { | 739 | { |
507 | io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER, | 740 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER, |
508 | IO_APIC_REDIR_MASKED, NULL); | 741 | IO_APIC_REDIR_MASKED, NULL); |
509 | } | 742 | } |
510 | 743 | ||
511 | static void __unmask_and_level_IO_APIC_irq(unsigned int irq) | 744 | static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) |
512 | { | 745 | { |
513 | io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, | 746 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, |
514 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); | 747 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); |
515 | } | 748 | } |
516 | #endif /* CONFIG_X86_32 */ | 749 | #endif /* CONFIG_X86_32 */ |
517 | 750 | ||
518 | static void mask_IO_APIC_irq (unsigned int irq) | 751 | static void mask_IO_APIC_irq_desc(struct irq_desc *desc) |
519 | { | 752 | { |
753 | struct irq_cfg *cfg = desc->chip_data; | ||
520 | unsigned long flags; | 754 | unsigned long flags; |
521 | 755 | ||
756 | BUG_ON(!cfg); | ||
757 | |||
522 | spin_lock_irqsave(&ioapic_lock, flags); | 758 | spin_lock_irqsave(&ioapic_lock, flags); |
523 | __mask_IO_APIC_irq(irq); | 759 | __mask_IO_APIC_irq(cfg); |
524 | spin_unlock_irqrestore(&ioapic_lock, flags); | 760 | spin_unlock_irqrestore(&ioapic_lock, flags); |
525 | } | 761 | } |
526 | 762 | ||
527 | static void unmask_IO_APIC_irq (unsigned int irq) | 763 | static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) |
528 | { | 764 | { |
765 | struct irq_cfg *cfg = desc->chip_data; | ||
529 | unsigned long flags; | 766 | unsigned long flags; |
530 | 767 | ||
531 | spin_lock_irqsave(&ioapic_lock, flags); | 768 | spin_lock_irqsave(&ioapic_lock, flags); |
532 | __unmask_IO_APIC_irq(irq); | 769 | __unmask_IO_APIC_irq(cfg); |
533 | spin_unlock_irqrestore(&ioapic_lock, flags); | 770 | spin_unlock_irqrestore(&ioapic_lock, flags); |
534 | } | 771 | } |
535 | 772 | ||
773 | static void mask_IO_APIC_irq(unsigned int irq) | ||
774 | { | ||
775 | struct irq_desc *desc = irq_to_desc(irq); | ||
776 | |||
777 | mask_IO_APIC_irq_desc(desc); | ||
778 | } | ||
779 | static void unmask_IO_APIC_irq(unsigned int irq) | ||
780 | { | ||
781 | struct irq_desc *desc = irq_to_desc(irq); | ||
782 | |||
783 | unmask_IO_APIC_irq_desc(desc); | ||
784 | } | ||
785 | |||
536 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) | 786 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) |
537 | { | 787 | { |
538 | struct IO_APIC_route_entry entry; | 788 | struct IO_APIC_route_entry entry; |
@@ -556,23 +806,6 @@ static void clear_IO_APIC (void) | |||
556 | clear_IO_APIC_pin(apic, pin); | 806 | clear_IO_APIC_pin(apic, pin); |
557 | } | 807 | } |
558 | 808 | ||
559 | #if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) | ||
560 | void send_IPI_self(int vector) | ||
561 | { | ||
562 | unsigned int cfg; | ||
563 | |||
564 | /* | ||
565 | * Wait for idle. | ||
566 | */ | ||
567 | apic_wait_icr_idle(); | ||
568 | cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; | ||
569 | /* | ||
570 | * Send the IPI. The write to APIC_ICR fires this off. | ||
571 | */ | ||
572 | apic_write(APIC_ICR, cfg); | ||
573 | } | ||
574 | #endif /* !CONFIG_SMP && CONFIG_X86_32*/ | ||
575 | |||
576 | #ifdef CONFIG_X86_32 | 809 | #ifdef CONFIG_X86_32 |
577 | /* | 810 | /* |
578 | * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to | 811 | * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to |
@@ -704,10 +937,10 @@ static int find_irq_entry(int apic, int pin, int type) | |||
704 | int i; | 937 | int i; |
705 | 938 | ||
706 | for (i = 0; i < mp_irq_entries; i++) | 939 | for (i = 0; i < mp_irq_entries; i++) |
707 | if (mp_irqs[i].mp_irqtype == type && | 940 | if (mp_irqs[i].irqtype == type && |
708 | (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || | 941 | (mp_irqs[i].dstapic == mp_ioapics[apic].apicid || |
709 | mp_irqs[i].mp_dstapic == MP_APIC_ALL) && | 942 | mp_irqs[i].dstapic == MP_APIC_ALL) && |
710 | mp_irqs[i].mp_dstirq == pin) | 943 | mp_irqs[i].dstirq == pin) |
711 | return i; | 944 | return i; |
712 | 945 | ||
713 | return -1; | 946 | return -1; |
@@ -721,13 +954,13 @@ static int __init find_isa_irq_pin(int irq, int type) | |||
721 | int i; | 954 | int i; |
722 | 955 | ||
723 | for (i = 0; i < mp_irq_entries; i++) { | 956 | for (i = 0; i < mp_irq_entries; i++) { |
724 | int lbus = mp_irqs[i].mp_srcbus; | 957 | int lbus = mp_irqs[i].srcbus; |
725 | 958 | ||
726 | if (test_bit(lbus, mp_bus_not_pci) && | 959 | if (test_bit(lbus, mp_bus_not_pci) && |
727 | (mp_irqs[i].mp_irqtype == type) && | 960 | (mp_irqs[i].irqtype == type) && |
728 | (mp_irqs[i].mp_srcbusirq == irq)) | 961 | (mp_irqs[i].srcbusirq == irq)) |
729 | 962 | ||
730 | return mp_irqs[i].mp_dstirq; | 963 | return mp_irqs[i].dstirq; |
731 | } | 964 | } |
732 | return -1; | 965 | return -1; |
733 | } | 966 | } |
@@ -737,17 +970,17 @@ static int __init find_isa_irq_apic(int irq, int type) | |||
737 | int i; | 970 | int i; |
738 | 971 | ||
739 | for (i = 0; i < mp_irq_entries; i++) { | 972 | for (i = 0; i < mp_irq_entries; i++) { |
740 | int lbus = mp_irqs[i].mp_srcbus; | 973 | int lbus = mp_irqs[i].srcbus; |
741 | 974 | ||
742 | if (test_bit(lbus, mp_bus_not_pci) && | 975 | if (test_bit(lbus, mp_bus_not_pci) && |
743 | (mp_irqs[i].mp_irqtype == type) && | 976 | (mp_irqs[i].irqtype == type) && |
744 | (mp_irqs[i].mp_srcbusirq == irq)) | 977 | (mp_irqs[i].srcbusirq == irq)) |
745 | break; | 978 | break; |
746 | } | 979 | } |
747 | if (i < mp_irq_entries) { | 980 | if (i < mp_irq_entries) { |
748 | int apic; | 981 | int apic; |
749 | for(apic = 0; apic < nr_ioapics; apic++) { | 982 | for(apic = 0; apic < nr_ioapics; apic++) { |
750 | if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) | 983 | if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic) |
751 | return apic; | 984 | return apic; |
752 | } | 985 | } |
753 | } | 986 | } |
@@ -772,23 +1005,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) | |||
772 | return -1; | 1005 | return -1; |
773 | } | 1006 | } |
774 | for (i = 0; i < mp_irq_entries; i++) { | 1007 | for (i = 0; i < mp_irq_entries; i++) { |
775 | int lbus = mp_irqs[i].mp_srcbus; | 1008 | int lbus = mp_irqs[i].srcbus; |
776 | 1009 | ||
777 | for (apic = 0; apic < nr_ioapics; apic++) | 1010 | for (apic = 0; apic < nr_ioapics; apic++) |
778 | if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || | 1011 | if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic || |
779 | mp_irqs[i].mp_dstapic == MP_APIC_ALL) | 1012 | mp_irqs[i].dstapic == MP_APIC_ALL) |
780 | break; | 1013 | break; |
781 | 1014 | ||
782 | if (!test_bit(lbus, mp_bus_not_pci) && | 1015 | if (!test_bit(lbus, mp_bus_not_pci) && |
783 | !mp_irqs[i].mp_irqtype && | 1016 | !mp_irqs[i].irqtype && |
784 | (bus == lbus) && | 1017 | (bus == lbus) && |
785 | (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { | 1018 | (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { |
786 | int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); | 1019 | int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); |
787 | 1020 | ||
788 | if (!(apic || IO_APIC_IRQ(irq))) | 1021 | if (!(apic || IO_APIC_IRQ(irq))) |
789 | continue; | 1022 | continue; |
790 | 1023 | ||
791 | if (pin == (mp_irqs[i].mp_srcbusirq & 3)) | 1024 | if (pin == (mp_irqs[i].srcbusirq & 3)) |
792 | return irq; | 1025 | return irq; |
793 | /* | 1026 | /* |
794 | * Use the first all-but-pin matching entry as a | 1027 | * Use the first all-but-pin matching entry as a |
@@ -809,7 +1042,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); | |||
809 | */ | 1042 | */ |
810 | static int EISA_ELCR(unsigned int irq) | 1043 | static int EISA_ELCR(unsigned int irq) |
811 | { | 1044 | { |
812 | if (irq < 16) { | 1045 | if (irq < NR_IRQS_LEGACY) { |
813 | unsigned int port = 0x4d0 + (irq >> 3); | 1046 | unsigned int port = 0x4d0 + (irq >> 3); |
814 | return (inb(port) >> (irq & 7)) & 1; | 1047 | return (inb(port) >> (irq & 7)) & 1; |
815 | } | 1048 | } |
@@ -831,7 +1064,7 @@ static int EISA_ELCR(unsigned int irq) | |||
831 | * EISA conforming in the MP table, that means its trigger type must | 1064 | * EISA conforming in the MP table, that means its trigger type must |
832 | * be read in from the ELCR */ | 1065 | * be read in from the ELCR */ |
833 | 1066 | ||
834 | #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) | 1067 | #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq)) |
835 | #define default_EISA_polarity(idx) default_ISA_polarity(idx) | 1068 | #define default_EISA_polarity(idx) default_ISA_polarity(idx) |
836 | 1069 | ||
837 | /* PCI interrupts are always polarity one level triggered, | 1070 | /* PCI interrupts are always polarity one level triggered, |
@@ -848,13 +1081,13 @@ static int EISA_ELCR(unsigned int irq) | |||
848 | 1081 | ||
849 | static int MPBIOS_polarity(int idx) | 1082 | static int MPBIOS_polarity(int idx) |
850 | { | 1083 | { |
851 | int bus = mp_irqs[idx].mp_srcbus; | 1084 | int bus = mp_irqs[idx].srcbus; |
852 | int polarity; | 1085 | int polarity; |
853 | 1086 | ||
854 | /* | 1087 | /* |
855 | * Determine IRQ line polarity (high active or low active): | 1088 | * Determine IRQ line polarity (high active or low active): |
856 | */ | 1089 | */ |
857 | switch (mp_irqs[idx].mp_irqflag & 3) | 1090 | switch (mp_irqs[idx].irqflag & 3) |
858 | { | 1091 | { |
859 | case 0: /* conforms, ie. bus-type dependent polarity */ | 1092 | case 0: /* conforms, ie. bus-type dependent polarity */ |
860 | if (test_bit(bus, mp_bus_not_pci)) | 1093 | if (test_bit(bus, mp_bus_not_pci)) |
@@ -890,13 +1123,13 @@ static int MPBIOS_polarity(int idx) | |||
890 | 1123 | ||
891 | static int MPBIOS_trigger(int idx) | 1124 | static int MPBIOS_trigger(int idx) |
892 | { | 1125 | { |
893 | int bus = mp_irqs[idx].mp_srcbus; | 1126 | int bus = mp_irqs[idx].srcbus; |
894 | int trigger; | 1127 | int trigger; |
895 | 1128 | ||
896 | /* | 1129 | /* |
897 | * Determine IRQ trigger mode (edge or level sensitive): | 1130 | * Determine IRQ trigger mode (edge or level sensitive): |
898 | */ | 1131 | */ |
899 | switch ((mp_irqs[idx].mp_irqflag>>2) & 3) | 1132 | switch ((mp_irqs[idx].irqflag>>2) & 3) |
900 | { | 1133 | { |
901 | case 0: /* conforms, ie. bus-type dependent */ | 1134 | case 0: /* conforms, ie. bus-type dependent */ |
902 | if (test_bit(bus, mp_bus_not_pci)) | 1135 | if (test_bit(bus, mp_bus_not_pci)) |
@@ -974,16 +1207,16 @@ int (*ioapic_renumber_irq)(int ioapic, int irq); | |||
974 | static int pin_2_irq(int idx, int apic, int pin) | 1207 | static int pin_2_irq(int idx, int apic, int pin) |
975 | { | 1208 | { |
976 | int irq, i; | 1209 | int irq, i; |
977 | int bus = mp_irqs[idx].mp_srcbus; | 1210 | int bus = mp_irqs[idx].srcbus; |
978 | 1211 | ||
979 | /* | 1212 | /* |
980 | * Debugging check, we are in big trouble if this message pops up! | 1213 | * Debugging check, we are in big trouble if this message pops up! |
981 | */ | 1214 | */ |
982 | if (mp_irqs[idx].mp_dstirq != pin) | 1215 | if (mp_irqs[idx].dstirq != pin) |
983 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); | 1216 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); |
984 | 1217 | ||
985 | if (test_bit(bus, mp_bus_not_pci)) { | 1218 | if (test_bit(bus, mp_bus_not_pci)) { |
986 | irq = mp_irqs[idx].mp_srcbusirq; | 1219 | irq = mp_irqs[idx].srcbusirq; |
987 | } else { | 1220 | } else { |
988 | /* | 1221 | /* |
989 | * PCI IRQs are mapped in order | 1222 | * PCI IRQs are mapped in order |
@@ -1034,7 +1267,8 @@ void unlock_vector_lock(void) | |||
1034 | spin_unlock(&vector_lock); | 1267 | spin_unlock(&vector_lock); |
1035 | } | 1268 | } |
1036 | 1269 | ||
1037 | static int __assign_irq_vector(int irq, cpumask_t mask) | 1270 | static int |
1271 | __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | ||
1038 | { | 1272 | { |
1039 | /* | 1273 | /* |
1040 | * NOTE! The local APIC isn't very good at handling | 1274 | * NOTE! The local APIC isn't very good at handling |
@@ -1049,52 +1283,49 @@ static int __assign_irq_vector(int irq, cpumask_t mask) | |||
1049 | */ | 1283 | */ |
1050 | static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; | 1284 | static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; |
1051 | unsigned int old_vector; | 1285 | unsigned int old_vector; |
1052 | int cpu; | 1286 | int cpu, err; |
1053 | struct irq_cfg *cfg; | 1287 | cpumask_var_t tmp_mask; |
1054 | |||
1055 | cfg = irq_cfg(irq); | ||
1056 | |||
1057 | /* Only try and allocate irqs on cpus that are present */ | ||
1058 | cpus_and(mask, mask, cpu_online_map); | ||
1059 | 1288 | ||
1060 | if ((cfg->move_in_progress) || cfg->move_cleanup_count) | 1289 | if ((cfg->move_in_progress) || cfg->move_cleanup_count) |
1061 | return -EBUSY; | 1290 | return -EBUSY; |
1062 | 1291 | ||
1292 | if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) | ||
1293 | return -ENOMEM; | ||
1294 | |||
1063 | old_vector = cfg->vector; | 1295 | old_vector = cfg->vector; |
1064 | if (old_vector) { | 1296 | if (old_vector) { |
1065 | cpumask_t tmp; | 1297 | cpumask_and(tmp_mask, mask, cpu_online_mask); |
1066 | cpus_and(tmp, cfg->domain, mask); | 1298 | cpumask_and(tmp_mask, cfg->domain, tmp_mask); |
1067 | if (!cpus_empty(tmp)) | 1299 | if (!cpumask_empty(tmp_mask)) { |
1300 | free_cpumask_var(tmp_mask); | ||
1068 | return 0; | 1301 | return 0; |
1302 | } | ||
1069 | } | 1303 | } |
1070 | 1304 | ||
1071 | for_each_cpu_mask_nr(cpu, mask) { | 1305 | /* Only try and allocate irqs on cpus that are present */ |
1072 | cpumask_t domain, new_mask; | 1306 | err = -ENOSPC; |
1307 | for_each_cpu_and(cpu, mask, cpu_online_mask) { | ||
1073 | int new_cpu; | 1308 | int new_cpu; |
1074 | int vector, offset; | 1309 | int vector, offset; |
1075 | 1310 | ||
1076 | domain = vector_allocation_domain(cpu); | 1311 | apic->vector_allocation_domain(cpu, tmp_mask); |
1077 | cpus_and(new_mask, domain, cpu_online_map); | ||
1078 | 1312 | ||
1079 | vector = current_vector; | 1313 | vector = current_vector; |
1080 | offset = current_offset; | 1314 | offset = current_offset; |
1081 | next: | 1315 | next: |
1082 | vector += 8; | 1316 | vector += 8; |
1083 | if (vector >= first_system_vector) { | 1317 | if (vector >= first_system_vector) { |
1084 | /* If we run out of vectors on large boxen, must share them. */ | 1318 | /* If out of vectors on large boxen, must share them. */ |
1085 | offset = (offset + 1) % 8; | 1319 | offset = (offset + 1) % 8; |
1086 | vector = FIRST_DEVICE_VECTOR + offset; | 1320 | vector = FIRST_DEVICE_VECTOR + offset; |
1087 | } | 1321 | } |
1088 | if (unlikely(current_vector == vector)) | 1322 | if (unlikely(current_vector == vector)) |
1089 | continue; | 1323 | continue; |
1090 | #ifdef CONFIG_X86_64 | 1324 | |
1091 | if (vector == IA32_SYSCALL_VECTOR) | 1325 | if (test_bit(vector, used_vectors)) |
1092 | goto next; | ||
1093 | #else | ||
1094 | if (vector == SYSCALL_VECTOR) | ||
1095 | goto next; | 1326 | goto next; |
1096 | #endif | 1327 | |
1097 | for_each_cpu_mask_nr(new_cpu, new_mask) | 1328 | for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) |
1098 | if (per_cpu(vector_irq, new_cpu)[vector] != -1) | 1329 | if (per_cpu(vector_irq, new_cpu)[vector] != -1) |
1099 | goto next; | 1330 | goto next; |
1100 | /* Found one! */ | 1331 | /* Found one! */ |
@@ -1102,44 +1333,56 @@ next: | |||
1102 | current_offset = offset; | 1333 | current_offset = offset; |
1103 | if (old_vector) { | 1334 | if (old_vector) { |
1104 | cfg->move_in_progress = 1; | 1335 | cfg->move_in_progress = 1; |
1105 | cfg->old_domain = cfg->domain; | 1336 | cpumask_copy(cfg->old_domain, cfg->domain); |
1106 | } | 1337 | } |
1107 | for_each_cpu_mask_nr(new_cpu, new_mask) | 1338 | for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) |
1108 | per_cpu(vector_irq, new_cpu)[vector] = irq; | 1339 | per_cpu(vector_irq, new_cpu)[vector] = irq; |
1109 | cfg->vector = vector; | 1340 | cfg->vector = vector; |
1110 | cfg->domain = domain; | 1341 | cpumask_copy(cfg->domain, tmp_mask); |
1111 | return 0; | 1342 | err = 0; |
1343 | break; | ||
1112 | } | 1344 | } |
1113 | return -ENOSPC; | 1345 | free_cpumask_var(tmp_mask); |
1346 | return err; | ||
1114 | } | 1347 | } |
1115 | 1348 | ||
1116 | static int assign_irq_vector(int irq, cpumask_t mask) | 1349 | static int |
1350 | assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | ||
1117 | { | 1351 | { |
1118 | int err; | 1352 | int err; |
1119 | unsigned long flags; | 1353 | unsigned long flags; |
1120 | 1354 | ||
1121 | spin_lock_irqsave(&vector_lock, flags); | 1355 | spin_lock_irqsave(&vector_lock, flags); |
1122 | err = __assign_irq_vector(irq, mask); | 1356 | err = __assign_irq_vector(irq, cfg, mask); |
1123 | spin_unlock_irqrestore(&vector_lock, flags); | 1357 | spin_unlock_irqrestore(&vector_lock, flags); |
1124 | return err; | 1358 | return err; |
1125 | } | 1359 | } |
1126 | 1360 | ||
1127 | static void __clear_irq_vector(int irq) | 1361 | static void __clear_irq_vector(int irq, struct irq_cfg *cfg) |
1128 | { | 1362 | { |
1129 | struct irq_cfg *cfg; | ||
1130 | cpumask_t mask; | ||
1131 | int cpu, vector; | 1363 | int cpu, vector; |
1132 | 1364 | ||
1133 | cfg = irq_cfg(irq); | ||
1134 | BUG_ON(!cfg->vector); | 1365 | BUG_ON(!cfg->vector); |
1135 | 1366 | ||
1136 | vector = cfg->vector; | 1367 | vector = cfg->vector; |
1137 | cpus_and(mask, cfg->domain, cpu_online_map); | 1368 | for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) |
1138 | for_each_cpu_mask_nr(cpu, mask) | ||
1139 | per_cpu(vector_irq, cpu)[vector] = -1; | 1369 | per_cpu(vector_irq, cpu)[vector] = -1; |
1140 | 1370 | ||
1141 | cfg->vector = 0; | 1371 | cfg->vector = 0; |
1142 | cpus_clear(cfg->domain); | 1372 | cpumask_clear(cfg->domain); |
1373 | |||
1374 | if (likely(!cfg->move_in_progress)) | ||
1375 | return; | ||
1376 | for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { | ||
1377 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; | ||
1378 | vector++) { | ||
1379 | if (per_cpu(vector_irq, cpu)[vector] != irq) | ||
1380 | continue; | ||
1381 | per_cpu(vector_irq, cpu)[vector] = -1; | ||
1382 | break; | ||
1383 | } | ||
1384 | } | ||
1385 | cfg->move_in_progress = 0; | ||
1143 | } | 1386 | } |
1144 | 1387 | ||
1145 | void __setup_vector_irq(int cpu) | 1388 | void __setup_vector_irq(int cpu) |
@@ -1148,10 +1391,12 @@ void __setup_vector_irq(int cpu) | |||
1148 | /* This function must be called with vector_lock held */ | 1391 | /* This function must be called with vector_lock held */ |
1149 | int irq, vector; | 1392 | int irq, vector; |
1150 | struct irq_cfg *cfg; | 1393 | struct irq_cfg *cfg; |
1394 | struct irq_desc *desc; | ||
1151 | 1395 | ||
1152 | /* Mark the inuse vectors */ | 1396 | /* Mark the inuse vectors */ |
1153 | for_each_irq_cfg(irq, cfg) { | 1397 | for_each_irq_desc(irq, desc) { |
1154 | if (!cpu_isset(cpu, cfg->domain)) | 1398 | cfg = desc->chip_data; |
1399 | if (!cpumask_test_cpu(cpu, cfg->domain)) | ||
1155 | continue; | 1400 | continue; |
1156 | vector = cfg->vector; | 1401 | vector = cfg->vector; |
1157 | per_cpu(vector_irq, cpu)[vector] = irq; | 1402 | per_cpu(vector_irq, cpu)[vector] = irq; |
@@ -1163,7 +1408,7 @@ void __setup_vector_irq(int cpu) | |||
1163 | continue; | 1408 | continue; |
1164 | 1409 | ||
1165 | cfg = irq_cfg(irq); | 1410 | cfg = irq_cfg(irq); |
1166 | if (!cpu_isset(cpu, cfg->domain)) | 1411 | if (!cpumask_test_cpu(cpu, cfg->domain)) |
1167 | per_cpu(vector_irq, cpu)[vector] = -1; | 1412 | per_cpu(vector_irq, cpu)[vector] = -1; |
1168 | } | 1413 | } |
1169 | } | 1414 | } |
@@ -1201,11 +1446,8 @@ static inline int IO_APIC_irq_trigger(int irq) | |||
1201 | } | 1446 | } |
1202 | #endif | 1447 | #endif |
1203 | 1448 | ||
1204 | static void ioapic_register_intr(int irq, unsigned long trigger) | 1449 | static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger) |
1205 | { | 1450 | { |
1206 | struct irq_desc *desc; | ||
1207 | |||
1208 | desc = irq_to_desc(irq); | ||
1209 | 1451 | ||
1210 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || | 1452 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || |
1211 | trigger == IOAPIC_LEVEL) | 1453 | trigger == IOAPIC_LEVEL) |
@@ -1236,10 +1478,10 @@ static void ioapic_register_intr(int irq, unsigned long trigger) | |||
1236 | handle_edge_irq, "edge"); | 1478 | handle_edge_irq, "edge"); |
1237 | } | 1479 | } |
1238 | 1480 | ||
1239 | static int setup_ioapic_entry(int apic, int irq, | 1481 | int setup_ioapic_entry(int apic_id, int irq, |
1240 | struct IO_APIC_route_entry *entry, | 1482 | struct IO_APIC_route_entry *entry, |
1241 | unsigned int destination, int trigger, | 1483 | unsigned int destination, int trigger, |
1242 | int polarity, int vector) | 1484 | int polarity, int vector) |
1243 | { | 1485 | { |
1244 | /* | 1486 | /* |
1245 | * add it to the IO-APIC irq-routing table: | 1487 | * add it to the IO-APIC irq-routing table: |
@@ -1248,25 +1490,25 @@ static int setup_ioapic_entry(int apic, int irq, | |||
1248 | 1490 | ||
1249 | #ifdef CONFIG_INTR_REMAP | 1491 | #ifdef CONFIG_INTR_REMAP |
1250 | if (intr_remapping_enabled) { | 1492 | if (intr_remapping_enabled) { |
1251 | struct intel_iommu *iommu = map_ioapic_to_ir(apic); | 1493 | struct intel_iommu *iommu = map_ioapic_to_ir(apic_id); |
1252 | struct irte irte; | 1494 | struct irte irte; |
1253 | struct IR_IO_APIC_route_entry *ir_entry = | 1495 | struct IR_IO_APIC_route_entry *ir_entry = |
1254 | (struct IR_IO_APIC_route_entry *) entry; | 1496 | (struct IR_IO_APIC_route_entry *) entry; |
1255 | int index; | 1497 | int index; |
1256 | 1498 | ||
1257 | if (!iommu) | 1499 | if (!iommu) |
1258 | panic("No mapping iommu for ioapic %d\n", apic); | 1500 | panic("No mapping iommu for ioapic %d\n", apic_id); |
1259 | 1501 | ||
1260 | index = alloc_irte(iommu, irq, 1); | 1502 | index = alloc_irte(iommu, irq, 1); |
1261 | if (index < 0) | 1503 | if (index < 0) |
1262 | panic("Failed to allocate IRTE for ioapic %d\n", apic); | 1504 | panic("Failed to allocate IRTE for ioapic %d\n", apic_id); |
1263 | 1505 | ||
1264 | memset(&irte, 0, sizeof(irte)); | 1506 | memset(&irte, 0, sizeof(irte)); |
1265 | 1507 | ||
1266 | irte.present = 1; | 1508 | irte.present = 1; |
1267 | irte.dst_mode = INT_DEST_MODE; | 1509 | irte.dst_mode = apic->irq_dest_mode; |
1268 | irte.trigger_mode = trigger; | 1510 | irte.trigger_mode = trigger; |
1269 | irte.dlvry_mode = INT_DELIVERY_MODE; | 1511 | irte.dlvry_mode = apic->irq_delivery_mode; |
1270 | irte.vector = vector; | 1512 | irte.vector = vector; |
1271 | irte.dest_id = IRTE_DEST(destination); | 1513 | irte.dest_id = IRTE_DEST(destination); |
1272 | 1514 | ||
@@ -1279,8 +1521,8 @@ static int setup_ioapic_entry(int apic, int irq, | |||
1279 | } else | 1521 | } else |
1280 | #endif | 1522 | #endif |
1281 | { | 1523 | { |
1282 | entry->delivery_mode = INT_DELIVERY_MODE; | 1524 | entry->delivery_mode = apic->irq_delivery_mode; |
1283 | entry->dest_mode = INT_DEST_MODE; | 1525 | entry->dest_mode = apic->irq_dest_mode; |
1284 | entry->dest = destination; | 1526 | entry->dest = destination; |
1285 | } | 1527 | } |
1286 | 1528 | ||
@@ -1297,69 +1539,68 @@ static int setup_ioapic_entry(int apic, int irq, | |||
1297 | return 0; | 1539 | return 0; |
1298 | } | 1540 | } |
1299 | 1541 | ||
1300 | static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, | 1542 | static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc, |
1301 | int trigger, int polarity) | 1543 | int trigger, int polarity) |
1302 | { | 1544 | { |
1303 | struct irq_cfg *cfg; | 1545 | struct irq_cfg *cfg; |
1304 | struct IO_APIC_route_entry entry; | 1546 | struct IO_APIC_route_entry entry; |
1305 | cpumask_t mask; | 1547 | unsigned int dest; |
1306 | 1548 | ||
1307 | if (!IO_APIC_IRQ(irq)) | 1549 | if (!IO_APIC_IRQ(irq)) |
1308 | return; | 1550 | return; |
1309 | 1551 | ||
1310 | cfg = irq_cfg(irq); | 1552 | cfg = desc->chip_data; |
1311 | 1553 | ||
1312 | mask = TARGET_CPUS; | 1554 | if (assign_irq_vector(irq, cfg, apic->target_cpus())) |
1313 | if (assign_irq_vector(irq, mask)) | ||
1314 | return; | 1555 | return; |
1315 | 1556 | ||
1316 | cpus_and(mask, cfg->domain, mask); | 1557 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); |
1317 | 1558 | ||
1318 | apic_printk(APIC_VERBOSE,KERN_DEBUG | 1559 | apic_printk(APIC_VERBOSE,KERN_DEBUG |
1319 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " | 1560 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " |
1320 | "IRQ %d Mode:%i Active:%i)\n", | 1561 | "IRQ %d Mode:%i Active:%i)\n", |
1321 | apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, | 1562 | apic_id, mp_ioapics[apic_id].apicid, pin, cfg->vector, |
1322 | irq, trigger, polarity); | 1563 | irq, trigger, polarity); |
1323 | 1564 | ||
1324 | 1565 | ||
1325 | if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, | 1566 | if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry, |
1326 | cpu_mask_to_apicid(mask), trigger, polarity, | 1567 | dest, trigger, polarity, cfg->vector)) { |
1327 | cfg->vector)) { | ||
1328 | printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", | 1568 | printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", |
1329 | mp_ioapics[apic].mp_apicid, pin); | 1569 | mp_ioapics[apic_id].apicid, pin); |
1330 | __clear_irq_vector(irq); | 1570 | __clear_irq_vector(irq, cfg); |
1331 | return; | 1571 | return; |
1332 | } | 1572 | } |
1333 | 1573 | ||
1334 | ioapic_register_intr(irq, trigger); | 1574 | ioapic_register_intr(irq, desc, trigger); |
1335 | if (irq < 16) | 1575 | if (irq < NR_IRQS_LEGACY) |
1336 | disable_8259A_irq(irq); | 1576 | disable_8259A_irq(irq); |
1337 | 1577 | ||
1338 | ioapic_write_entry(apic, pin, entry); | 1578 | ioapic_write_entry(apic_id, pin, entry); |
1339 | } | 1579 | } |
1340 | 1580 | ||
1341 | static void __init setup_IO_APIC_irqs(void) | 1581 | static void __init setup_IO_APIC_irqs(void) |
1342 | { | 1582 | { |
1343 | int apic, pin, idx, irq; | 1583 | int apic_id, pin, idx, irq; |
1344 | int notcon = 0; | 1584 | int notcon = 0; |
1585 | struct irq_desc *desc; | ||
1586 | struct irq_cfg *cfg; | ||
1587 | int cpu = boot_cpu_id; | ||
1345 | 1588 | ||
1346 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); | 1589 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); |
1347 | 1590 | ||
1348 | for (apic = 0; apic < nr_ioapics; apic++) { | 1591 | for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { |
1349 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | 1592 | for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { |
1350 | 1593 | ||
1351 | idx = find_irq_entry(apic, pin, mp_INT); | 1594 | idx = find_irq_entry(apic_id, pin, mp_INT); |
1352 | if (idx == -1) { | 1595 | if (idx == -1) { |
1353 | if (!notcon) { | 1596 | if (!notcon) { |
1354 | notcon = 1; | 1597 | notcon = 1; |
1355 | apic_printk(APIC_VERBOSE, | 1598 | apic_printk(APIC_VERBOSE, |
1356 | KERN_DEBUG " %d-%d", | 1599 | KERN_DEBUG " %d-%d", |
1357 | mp_ioapics[apic].mp_apicid, | 1600 | mp_ioapics[apic_id].apicid, pin); |
1358 | pin); | ||
1359 | } else | 1601 | } else |
1360 | apic_printk(APIC_VERBOSE, " %d-%d", | 1602 | apic_printk(APIC_VERBOSE, " %d-%d", |
1361 | mp_ioapics[apic].mp_apicid, | 1603 | mp_ioapics[apic_id].apicid, pin); |
1362 | pin); | ||
1363 | continue; | 1604 | continue; |
1364 | } | 1605 | } |
1365 | if (notcon) { | 1606 | if (notcon) { |
@@ -1368,14 +1609,25 @@ static void __init setup_IO_APIC_irqs(void) | |||
1368 | notcon = 0; | 1609 | notcon = 0; |
1369 | } | 1610 | } |
1370 | 1611 | ||
1371 | irq = pin_2_irq(idx, apic, pin); | 1612 | irq = pin_2_irq(idx, apic_id, pin); |
1372 | #ifdef CONFIG_X86_32 | 1613 | |
1373 | if (multi_timer_check(apic, irq)) | 1614 | /* |
1615 | * Skip the timer IRQ if there's a quirk handler | ||
1616 | * installed and if it returns 1: | ||
1617 | */ | ||
1618 | if (apic->multi_timer_check && | ||
1619 | apic->multi_timer_check(apic_id, irq)) | ||
1374 | continue; | 1620 | continue; |
1375 | #endif | ||
1376 | add_pin_to_irq(irq, apic, pin); | ||
1377 | 1621 | ||
1378 | setup_IO_APIC_irq(apic, pin, irq, | 1622 | desc = irq_to_desc_alloc_cpu(irq, cpu); |
1623 | if (!desc) { | ||
1624 | printk(KERN_INFO "can not get irq_desc for %d\n", irq); | ||
1625 | continue; | ||
1626 | } | ||
1627 | cfg = desc->chip_data; | ||
1628 | add_pin_to_irq_cpu(cfg, cpu, apic_id, pin); | ||
1629 | |||
1630 | setup_IO_APIC_irq(apic_id, pin, irq, desc, | ||
1379 | irq_trigger(idx), irq_polarity(idx)); | 1631 | irq_trigger(idx), irq_polarity(idx)); |
1380 | } | 1632 | } |
1381 | } | 1633 | } |
@@ -1388,7 +1640,7 @@ static void __init setup_IO_APIC_irqs(void) | |||
1388 | /* | 1640 | /* |
1389 | * Set up the timer pin, possibly with the 8259A-master behind. | 1641 | * Set up the timer pin, possibly with the 8259A-master behind. |
1390 | */ | 1642 | */ |
1391 | static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, | 1643 | static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, |
1392 | int vector) | 1644 | int vector) |
1393 | { | 1645 | { |
1394 | struct IO_APIC_route_entry entry; | 1646 | struct IO_APIC_route_entry entry; |
@@ -1404,10 +1656,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, | |||
1404 | * We use logical delivery to get the timer IRQ | 1656 | * We use logical delivery to get the timer IRQ |
1405 | * to the first CPU. | 1657 | * to the first CPU. |
1406 | */ | 1658 | */ |
1407 | entry.dest_mode = INT_DEST_MODE; | 1659 | entry.dest_mode = apic->irq_dest_mode; |
1408 | entry.mask = 1; /* mask IRQ now */ | 1660 | entry.mask = 0; /* don't mask IRQ for edge */ |
1409 | entry.dest = cpu_mask_to_apicid(TARGET_CPUS); | 1661 | entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus()); |
1410 | entry.delivery_mode = INT_DELIVERY_MODE; | 1662 | entry.delivery_mode = apic->irq_delivery_mode; |
1411 | entry.polarity = 0; | 1663 | entry.polarity = 0; |
1412 | entry.trigger = 0; | 1664 | entry.trigger = 0; |
1413 | entry.vector = vector; | 1665 | entry.vector = vector; |
@@ -1421,7 +1673,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, | |||
1421 | /* | 1673 | /* |
1422 | * Add it to the IO-APIC irq-routing table: | 1674 | * Add it to the IO-APIC irq-routing table: |
1423 | */ | 1675 | */ |
1424 | ioapic_write_entry(apic, pin, entry); | 1676 | ioapic_write_entry(apic_id, pin, entry); |
1425 | } | 1677 | } |
1426 | 1678 | ||
1427 | 1679 | ||
@@ -1434,6 +1686,7 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1434 | union IO_APIC_reg_03 reg_03; | 1686 | union IO_APIC_reg_03 reg_03; |
1435 | unsigned long flags; | 1687 | unsigned long flags; |
1436 | struct irq_cfg *cfg; | 1688 | struct irq_cfg *cfg; |
1689 | struct irq_desc *desc; | ||
1437 | unsigned int irq; | 1690 | unsigned int irq; |
1438 | 1691 | ||
1439 | if (apic_verbosity == APIC_QUIET) | 1692 | if (apic_verbosity == APIC_QUIET) |
@@ -1442,7 +1695,7 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1442 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); | 1695 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); |
1443 | for (i = 0; i < nr_ioapics; i++) | 1696 | for (i = 0; i < nr_ioapics; i++) |
1444 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", | 1697 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", |
1445 | mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); | 1698 | mp_ioapics[i].apicid, nr_ioapic_registers[i]); |
1446 | 1699 | ||
1447 | /* | 1700 | /* |
1448 | * We are a bit conservative about what we expect. We have to | 1701 | * We are a bit conservative about what we expect. We have to |
@@ -1462,7 +1715,7 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1462 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1715 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1463 | 1716 | ||
1464 | printk("\n"); | 1717 | printk("\n"); |
1465 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); | 1718 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); |
1466 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); | 1719 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); |
1467 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); | 1720 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); |
1468 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); | 1721 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); |
@@ -1523,8 +1776,11 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1523 | } | 1776 | } |
1524 | } | 1777 | } |
1525 | printk(KERN_DEBUG "IRQ to pin mappings:\n"); | 1778 | printk(KERN_DEBUG "IRQ to pin mappings:\n"); |
1526 | for_each_irq_cfg(irq, cfg) { | 1779 | for_each_irq_desc(irq, desc) { |
1527 | struct irq_pin_list *entry = cfg->irq_2_pin; | 1780 | struct irq_pin_list *entry; |
1781 | |||
1782 | cfg = desc->chip_data; | ||
1783 | entry = cfg->irq_2_pin; | ||
1528 | if (!entry) | 1784 | if (!entry) |
1529 | continue; | 1785 | continue; |
1530 | printk(KERN_DEBUG "IRQ%d ", irq); | 1786 | printk(KERN_DEBUG "IRQ%d ", irq); |
@@ -1830,7 +2086,7 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1830 | { | 2086 | { |
1831 | union IO_APIC_reg_00 reg_00; | 2087 | union IO_APIC_reg_00 reg_00; |
1832 | physid_mask_t phys_id_present_map; | 2088 | physid_mask_t phys_id_present_map; |
1833 | int apic; | 2089 | int apic_id; |
1834 | int i; | 2090 | int i; |
1835 | unsigned char old_id; | 2091 | unsigned char old_id; |
1836 | unsigned long flags; | 2092 | unsigned long flags; |
@@ -1849,26 +2105,26 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1849 | * This is broken; anything with a real cpu count has to | 2105 | * This is broken; anything with a real cpu count has to |
1850 | * circumvent this idiocy regardless. | 2106 | * circumvent this idiocy regardless. |
1851 | */ | 2107 | */ |
1852 | phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); | 2108 | phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map); |
1853 | 2109 | ||
1854 | /* | 2110 | /* |
1855 | * Set the IOAPIC ID to the value stored in the MPC table. | 2111 | * Set the IOAPIC ID to the value stored in the MPC table. |
1856 | */ | 2112 | */ |
1857 | for (apic = 0; apic < nr_ioapics; apic++) { | 2113 | for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { |
1858 | 2114 | ||
1859 | /* Read the register 0 value */ | 2115 | /* Read the register 0 value */ |
1860 | spin_lock_irqsave(&ioapic_lock, flags); | 2116 | spin_lock_irqsave(&ioapic_lock, flags); |
1861 | reg_00.raw = io_apic_read(apic, 0); | 2117 | reg_00.raw = io_apic_read(apic_id, 0); |
1862 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2118 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1863 | 2119 | ||
1864 | old_id = mp_ioapics[apic].mp_apicid; | 2120 | old_id = mp_ioapics[apic_id].apicid; |
1865 | 2121 | ||
1866 | if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { | 2122 | if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) { |
1867 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", | 2123 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", |
1868 | apic, mp_ioapics[apic].mp_apicid); | 2124 | apic_id, mp_ioapics[apic_id].apicid); |
1869 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", | 2125 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", |
1870 | reg_00.bits.ID); | 2126 | reg_00.bits.ID); |
1871 | mp_ioapics[apic].mp_apicid = reg_00.bits.ID; | 2127 | mp_ioapics[apic_id].apicid = reg_00.bits.ID; |
1872 | } | 2128 | } |
1873 | 2129 | ||
1874 | /* | 2130 | /* |
@@ -1876,10 +2132,10 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1876 | * system must have a unique ID or we get lots of nice | 2132 | * system must have a unique ID or we get lots of nice |
1877 | * 'stuck on smp_invalidate_needed IPI wait' messages. | 2133 | * 'stuck on smp_invalidate_needed IPI wait' messages. |
1878 | */ | 2134 | */ |
1879 | if (check_apicid_used(phys_id_present_map, | 2135 | if (apic->check_apicid_used(phys_id_present_map, |
1880 | mp_ioapics[apic].mp_apicid)) { | 2136 | mp_ioapics[apic_id].apicid)) { |
1881 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", | 2137 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", |
1882 | apic, mp_ioapics[apic].mp_apicid); | 2138 | apic_id, mp_ioapics[apic_id].apicid); |
1883 | for (i = 0; i < get_physical_broadcast(); i++) | 2139 | for (i = 0; i < get_physical_broadcast(); i++) |
1884 | if (!physid_isset(i, phys_id_present_map)) | 2140 | if (!physid_isset(i, phys_id_present_map)) |
1885 | break; | 2141 | break; |
@@ -1888,13 +2144,13 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1888 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", | 2144 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", |
1889 | i); | 2145 | i); |
1890 | physid_set(i, phys_id_present_map); | 2146 | physid_set(i, phys_id_present_map); |
1891 | mp_ioapics[apic].mp_apicid = i; | 2147 | mp_ioapics[apic_id].apicid = i; |
1892 | } else { | 2148 | } else { |
1893 | physid_mask_t tmp; | 2149 | physid_mask_t tmp; |
1894 | tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); | 2150 | tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid); |
1895 | apic_printk(APIC_VERBOSE, "Setting %d in the " | 2151 | apic_printk(APIC_VERBOSE, "Setting %d in the " |
1896 | "phys_id_present_map\n", | 2152 | "phys_id_present_map\n", |
1897 | mp_ioapics[apic].mp_apicid); | 2153 | mp_ioapics[apic_id].apicid); |
1898 | physids_or(phys_id_present_map, phys_id_present_map, tmp); | 2154 | physids_or(phys_id_present_map, phys_id_present_map, tmp); |
1899 | } | 2155 | } |
1900 | 2156 | ||
@@ -1903,11 +2159,11 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1903 | * We need to adjust the IRQ routing table | 2159 | * We need to adjust the IRQ routing table |
1904 | * if the ID changed. | 2160 | * if the ID changed. |
1905 | */ | 2161 | */ |
1906 | if (old_id != mp_ioapics[apic].mp_apicid) | 2162 | if (old_id != mp_ioapics[apic_id].apicid) |
1907 | for (i = 0; i < mp_irq_entries; i++) | 2163 | for (i = 0; i < mp_irq_entries; i++) |
1908 | if (mp_irqs[i].mp_dstapic == old_id) | 2164 | if (mp_irqs[i].dstapic == old_id) |
1909 | mp_irqs[i].mp_dstapic | 2165 | mp_irqs[i].dstapic |
1910 | = mp_ioapics[apic].mp_apicid; | 2166 | = mp_ioapics[apic_id].apicid; |
1911 | 2167 | ||
1912 | /* | 2168 | /* |
1913 | * Read the right value from the MPC table and | 2169 | * Read the right value from the MPC table and |
@@ -1915,20 +2171,20 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1915 | */ | 2171 | */ |
1916 | apic_printk(APIC_VERBOSE, KERN_INFO | 2172 | apic_printk(APIC_VERBOSE, KERN_INFO |
1917 | "...changing IO-APIC physical APIC ID to %d ...", | 2173 | "...changing IO-APIC physical APIC ID to %d ...", |
1918 | mp_ioapics[apic].mp_apicid); | 2174 | mp_ioapics[apic_id].apicid); |
1919 | 2175 | ||
1920 | reg_00.bits.ID = mp_ioapics[apic].mp_apicid; | 2176 | reg_00.bits.ID = mp_ioapics[apic_id].apicid; |
1921 | spin_lock_irqsave(&ioapic_lock, flags); | 2177 | spin_lock_irqsave(&ioapic_lock, flags); |
1922 | io_apic_write(apic, 0, reg_00.raw); | 2178 | io_apic_write(apic_id, 0, reg_00.raw); |
1923 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2179 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1924 | 2180 | ||
1925 | /* | 2181 | /* |
1926 | * Sanity check | 2182 | * Sanity check |
1927 | */ | 2183 | */ |
1928 | spin_lock_irqsave(&ioapic_lock, flags); | 2184 | spin_lock_irqsave(&ioapic_lock, flags); |
1929 | reg_00.raw = io_apic_read(apic, 0); | 2185 | reg_00.raw = io_apic_read(apic_id, 0); |
1930 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2186 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1931 | if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) | 2187 | if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) |
1932 | printk("could not set ID!\n"); | 2188 | printk("could not set ID!\n"); |
1933 | else | 2189 | else |
1934 | apic_printk(APIC_VERBOSE, " ok.\n"); | 2190 | apic_printk(APIC_VERBOSE, " ok.\n"); |
@@ -2008,14 +2264,16 @@ static unsigned int startup_ioapic_irq(unsigned int irq) | |||
2008 | { | 2264 | { |
2009 | int was_pending = 0; | 2265 | int was_pending = 0; |
2010 | unsigned long flags; | 2266 | unsigned long flags; |
2267 | struct irq_cfg *cfg; | ||
2011 | 2268 | ||
2012 | spin_lock_irqsave(&ioapic_lock, flags); | 2269 | spin_lock_irqsave(&ioapic_lock, flags); |
2013 | if (irq < 16) { | 2270 | if (irq < NR_IRQS_LEGACY) { |
2014 | disable_8259A_irq(irq); | 2271 | disable_8259A_irq(irq); |
2015 | if (i8259A_irq_pending(irq)) | 2272 | if (i8259A_irq_pending(irq)) |
2016 | was_pending = 1; | 2273 | was_pending = 1; |
2017 | } | 2274 | } |
2018 | __unmask_IO_APIC_irq(irq); | 2275 | cfg = irq_cfg(irq); |
2276 | __unmask_IO_APIC_irq(cfg); | ||
2019 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2277 | spin_unlock_irqrestore(&ioapic_lock, flags); |
2020 | 2278 | ||
2021 | return was_pending; | 2279 | return was_pending; |
@@ -2029,7 +2287,7 @@ static int ioapic_retrigger_irq(unsigned int irq) | |||
2029 | unsigned long flags; | 2287 | unsigned long flags; |
2030 | 2288 | ||
2031 | spin_lock_irqsave(&vector_lock, flags); | 2289 | spin_lock_irqsave(&vector_lock, flags); |
2032 | send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); | 2290 | apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); |
2033 | spin_unlock_irqrestore(&vector_lock, flags); | 2291 | spin_unlock_irqrestore(&vector_lock, flags); |
2034 | 2292 | ||
2035 | return 1; | 2293 | return 1; |
@@ -2037,7 +2295,7 @@ static int ioapic_retrigger_irq(unsigned int irq) | |||
2037 | #else | 2295 | #else |
2038 | static int ioapic_retrigger_irq(unsigned int irq) | 2296 | static int ioapic_retrigger_irq(unsigned int irq) |
2039 | { | 2297 | { |
2040 | send_IPI_self(irq_cfg(irq)->vector); | 2298 | apic->send_IPI_self(irq_cfg(irq)->vector); |
2041 | 2299 | ||
2042 | return 1; | 2300 | return 1; |
2043 | } | 2301 | } |
@@ -2078,35 +2336,35 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); | |||
2078 | * as simple as edge triggered migration and we can do the irq migration | 2336 | * as simple as edge triggered migration and we can do the irq migration |
2079 | * with a simple atomic update to IO-APIC RTE. | 2337 | * with a simple atomic update to IO-APIC RTE. |
2080 | */ | 2338 | */ |
2081 | static void migrate_ioapic_irq(int irq, cpumask_t mask) | 2339 | static void |
2340 | migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | ||
2082 | { | 2341 | { |
2083 | struct irq_cfg *cfg; | 2342 | struct irq_cfg *cfg; |
2084 | struct irq_desc *desc; | ||
2085 | cpumask_t tmp, cleanup_mask; | ||
2086 | struct irte irte; | 2343 | struct irte irte; |
2087 | int modify_ioapic_rte; | 2344 | int modify_ioapic_rte; |
2088 | unsigned int dest; | 2345 | unsigned int dest; |
2089 | unsigned long flags; | 2346 | unsigned long flags; |
2347 | unsigned int irq; | ||
2090 | 2348 | ||
2091 | cpus_and(tmp, mask, cpu_online_map); | 2349 | if (!cpumask_intersects(mask, cpu_online_mask)) |
2092 | if (cpus_empty(tmp)) | ||
2093 | return; | 2350 | return; |
2094 | 2351 | ||
2352 | irq = desc->irq; | ||
2095 | if (get_irte(irq, &irte)) | 2353 | if (get_irte(irq, &irte)) |
2096 | return; | 2354 | return; |
2097 | 2355 | ||
2098 | if (assign_irq_vector(irq, mask)) | 2356 | cfg = desc->chip_data; |
2357 | if (assign_irq_vector(irq, cfg, mask)) | ||
2099 | return; | 2358 | return; |
2100 | 2359 | ||
2101 | cfg = irq_cfg(irq); | 2360 | set_extra_move_desc(desc, mask); |
2102 | cpus_and(tmp, cfg->domain, mask); | 2361 | |
2103 | dest = cpu_mask_to_apicid(tmp); | 2362 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); |
2104 | 2363 | ||
2105 | desc = irq_to_desc(irq); | ||
2106 | modify_ioapic_rte = desc->status & IRQ_LEVEL; | 2364 | modify_ioapic_rte = desc->status & IRQ_LEVEL; |
2107 | if (modify_ioapic_rte) { | 2365 | if (modify_ioapic_rte) { |
2108 | spin_lock_irqsave(&ioapic_lock, flags); | 2366 | spin_lock_irqsave(&ioapic_lock, flags); |
2109 | __target_IO_APIC_irq(irq, dest, cfg->vector); | 2367 | __target_IO_APIC_irq(irq, dest, cfg); |
2110 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2368 | spin_unlock_irqrestore(&ioapic_lock, flags); |
2111 | } | 2369 | } |
2112 | 2370 | ||
@@ -2118,24 +2376,20 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask) | |||
2118 | */ | 2376 | */ |
2119 | modify_irte(irq, &irte); | 2377 | modify_irte(irq, &irte); |
2120 | 2378 | ||
2121 | if (cfg->move_in_progress) { | 2379 | if (cfg->move_in_progress) |
2122 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); | 2380 | send_cleanup_vector(cfg); |
2123 | cfg->move_cleanup_count = cpus_weight(cleanup_mask); | ||
2124 | send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
2125 | cfg->move_in_progress = 0; | ||
2126 | } | ||
2127 | 2381 | ||
2128 | desc->affinity = mask; | 2382 | cpumask_copy(desc->affinity, mask); |
2129 | } | 2383 | } |
2130 | 2384 | ||
2131 | static int migrate_irq_remapped_level(int irq) | 2385 | static int migrate_irq_remapped_level_desc(struct irq_desc *desc) |
2132 | { | 2386 | { |
2133 | int ret = -1; | 2387 | int ret = -1; |
2134 | struct irq_desc *desc = irq_to_desc(irq); | 2388 | struct irq_cfg *cfg = desc->chip_data; |
2135 | 2389 | ||
2136 | mask_IO_APIC_irq(irq); | 2390 | mask_IO_APIC_irq_desc(desc); |
2137 | 2391 | ||
2138 | if (io_apic_level_ack_pending(irq)) { | 2392 | if (io_apic_level_ack_pending(cfg)) { |
2139 | /* | 2393 | /* |
2140 | * Interrupt in progress. Migrating irq now will change the | 2394 | * Interrupt in progress. Migrating irq now will change the |
2141 | * vector information in the IO-APIC RTE and that will confuse | 2395 | * vector information in the IO-APIC RTE and that will confuse |
@@ -2147,14 +2401,15 @@ static int migrate_irq_remapped_level(int irq) | |||
2147 | } | 2401 | } |
2148 | 2402 | ||
2149 | /* everthing is clear. we have right of way */ | 2403 | /* everthing is clear. we have right of way */ |
2150 | migrate_ioapic_irq(irq, desc->pending_mask); | 2404 | migrate_ioapic_irq_desc(desc, desc->pending_mask); |
2151 | 2405 | ||
2152 | ret = 0; | 2406 | ret = 0; |
2153 | desc->status &= ~IRQ_MOVE_PENDING; | 2407 | desc->status &= ~IRQ_MOVE_PENDING; |
2154 | cpus_clear(desc->pending_mask); | 2408 | cpumask_clear(desc->pending_mask); |
2155 | 2409 | ||
2156 | unmask: | 2410 | unmask: |
2157 | unmask_IO_APIC_irq(irq); | 2411 | unmask_IO_APIC_irq_desc(desc); |
2412 | |||
2158 | return ret; | 2413 | return ret; |
2159 | } | 2414 | } |
2160 | 2415 | ||
@@ -2184,28 +2439,33 @@ static void ir_irq_migration(struct work_struct *work) | |||
2184 | /* | 2439 | /* |
2185 | * Migrates the IRQ destination in the process context. | 2440 | * Migrates the IRQ destination in the process context. |
2186 | */ | 2441 | */ |
2187 | static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) | 2442 | static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, |
2443 | const struct cpumask *mask) | ||
2188 | { | 2444 | { |
2189 | struct irq_desc *desc = irq_to_desc(irq); | ||
2190 | |||
2191 | if (desc->status & IRQ_LEVEL) { | 2445 | if (desc->status & IRQ_LEVEL) { |
2192 | desc->status |= IRQ_MOVE_PENDING; | 2446 | desc->status |= IRQ_MOVE_PENDING; |
2193 | desc->pending_mask = mask; | 2447 | cpumask_copy(desc->pending_mask, mask); |
2194 | migrate_irq_remapped_level(irq); | 2448 | migrate_irq_remapped_level_desc(desc); |
2195 | return; | 2449 | return; |
2196 | } | 2450 | } |
2197 | 2451 | ||
2198 | migrate_ioapic_irq(irq, mask); | 2452 | migrate_ioapic_irq_desc(desc, mask); |
2453 | } | ||
2454 | static void set_ir_ioapic_affinity_irq(unsigned int irq, | ||
2455 | const struct cpumask *mask) | ||
2456 | { | ||
2457 | struct irq_desc *desc = irq_to_desc(irq); | ||
2458 | |||
2459 | set_ir_ioapic_affinity_irq_desc(desc, mask); | ||
2199 | } | 2460 | } |
2200 | #endif | 2461 | #endif |
2201 | 2462 | ||
2202 | asmlinkage void smp_irq_move_cleanup_interrupt(void) | 2463 | asmlinkage void smp_irq_move_cleanup_interrupt(void) |
2203 | { | 2464 | { |
2204 | unsigned vector, me; | 2465 | unsigned vector, me; |
2466 | |||
2205 | ack_APIC_irq(); | 2467 | ack_APIC_irq(); |
2206 | #ifdef CONFIG_X86_64 | ||
2207 | exit_idle(); | 2468 | exit_idle(); |
2208 | #endif | ||
2209 | irq_enter(); | 2469 | irq_enter(); |
2210 | 2470 | ||
2211 | me = smp_processor_id(); | 2471 | me = smp_processor_id(); |
@@ -2215,6 +2475,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
2215 | struct irq_cfg *cfg; | 2475 | struct irq_cfg *cfg; |
2216 | irq = __get_cpu_var(vector_irq)[vector]; | 2476 | irq = __get_cpu_var(vector_irq)[vector]; |
2217 | 2477 | ||
2478 | if (irq == -1) | ||
2479 | continue; | ||
2480 | |||
2218 | desc = irq_to_desc(irq); | 2481 | desc = irq_to_desc(irq); |
2219 | if (!desc) | 2482 | if (!desc) |
2220 | continue; | 2483 | continue; |
@@ -2224,7 +2487,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
2224 | if (!cfg->move_cleanup_count) | 2487 | if (!cfg->move_cleanup_count) |
2225 | goto unlock; | 2488 | goto unlock; |
2226 | 2489 | ||
2227 | if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) | 2490 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) |
2228 | goto unlock; | 2491 | goto unlock; |
2229 | 2492 | ||
2230 | __get_cpu_var(vector_irq)[vector] = -1; | 2493 | __get_cpu_var(vector_irq)[vector] = -1; |
@@ -2236,28 +2499,45 @@ unlock: | |||
2236 | irq_exit(); | 2499 | irq_exit(); |
2237 | } | 2500 | } |
2238 | 2501 | ||
2239 | static void irq_complete_move(unsigned int irq) | 2502 | static void irq_complete_move(struct irq_desc **descp) |
2240 | { | 2503 | { |
2241 | struct irq_cfg *cfg = irq_cfg(irq); | 2504 | struct irq_desc *desc = *descp; |
2505 | struct irq_cfg *cfg = desc->chip_data; | ||
2242 | unsigned vector, me; | 2506 | unsigned vector, me; |
2243 | 2507 | ||
2244 | if (likely(!cfg->move_in_progress)) | 2508 | if (likely(!cfg->move_in_progress)) { |
2509 | #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC | ||
2510 | if (likely(!cfg->move_desc_pending)) | ||
2511 | return; | ||
2512 | |||
2513 | /* domain has not changed, but affinity did */ | ||
2514 | me = smp_processor_id(); | ||
2515 | if (cpumask_test_cpu(me, desc->affinity)) { | ||
2516 | *descp = desc = move_irq_desc(desc, me); | ||
2517 | /* get the new one */ | ||
2518 | cfg = desc->chip_data; | ||
2519 | cfg->move_desc_pending = 0; | ||
2520 | } | ||
2521 | #endif | ||
2245 | return; | 2522 | return; |
2523 | } | ||
2246 | 2524 | ||
2247 | vector = ~get_irq_regs()->orig_ax; | 2525 | vector = ~get_irq_regs()->orig_ax; |
2248 | me = smp_processor_id(); | 2526 | me = smp_processor_id(); |
2249 | if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { | ||
2250 | cpumask_t cleanup_mask; | ||
2251 | 2527 | ||
2252 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); | 2528 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) { |
2253 | cfg->move_cleanup_count = cpus_weight(cleanup_mask); | 2529 | #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC |
2254 | send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | 2530 | *descp = desc = move_irq_desc(desc, me); |
2255 | cfg->move_in_progress = 0; | 2531 | /* get the new one */ |
2532 | cfg = desc->chip_data; | ||
2533 | #endif | ||
2534 | send_cleanup_vector(cfg); | ||
2256 | } | 2535 | } |
2257 | } | 2536 | } |
2258 | #else | 2537 | #else |
2259 | static inline void irq_complete_move(unsigned int irq) {} | 2538 | static inline void irq_complete_move(struct irq_desc **descp) {} |
2260 | #endif | 2539 | #endif |
2540 | |||
2261 | #ifdef CONFIG_INTR_REMAP | 2541 | #ifdef CONFIG_INTR_REMAP |
2262 | static void ack_x2apic_level(unsigned int irq) | 2542 | static void ack_x2apic_level(unsigned int irq) |
2263 | { | 2543 | { |
@@ -2268,11 +2548,14 @@ static void ack_x2apic_edge(unsigned int irq) | |||
2268 | { | 2548 | { |
2269 | ack_x2APIC_irq(); | 2549 | ack_x2APIC_irq(); |
2270 | } | 2550 | } |
2551 | |||
2271 | #endif | 2552 | #endif |
2272 | 2553 | ||
2273 | static void ack_apic_edge(unsigned int irq) | 2554 | static void ack_apic_edge(unsigned int irq) |
2274 | { | 2555 | { |
2275 | irq_complete_move(irq); | 2556 | struct irq_desc *desc = irq_to_desc(irq); |
2557 | |||
2558 | irq_complete_move(&desc); | ||
2276 | move_native_irq(irq); | 2559 | move_native_irq(irq); |
2277 | ack_APIC_irq(); | 2560 | ack_APIC_irq(); |
2278 | } | 2561 | } |
@@ -2281,18 +2564,21 @@ atomic_t irq_mis_count; | |||
2281 | 2564 | ||
2282 | static void ack_apic_level(unsigned int irq) | 2565 | static void ack_apic_level(unsigned int irq) |
2283 | { | 2566 | { |
2567 | struct irq_desc *desc = irq_to_desc(irq); | ||
2568 | |||
2284 | #ifdef CONFIG_X86_32 | 2569 | #ifdef CONFIG_X86_32 |
2285 | unsigned long v; | 2570 | unsigned long v; |
2286 | int i; | 2571 | int i; |
2287 | #endif | 2572 | #endif |
2573 | struct irq_cfg *cfg; | ||
2288 | int do_unmask_irq = 0; | 2574 | int do_unmask_irq = 0; |
2289 | 2575 | ||
2290 | irq_complete_move(irq); | 2576 | irq_complete_move(&desc); |
2291 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 2577 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
2292 | /* If we are moving the irq we need to mask it */ | 2578 | /* If we are moving the irq we need to mask it */ |
2293 | if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { | 2579 | if (unlikely(desc->status & IRQ_MOVE_PENDING)) { |
2294 | do_unmask_irq = 1; | 2580 | do_unmask_irq = 1; |
2295 | mask_IO_APIC_irq(irq); | 2581 | mask_IO_APIC_irq_desc(desc); |
2296 | } | 2582 | } |
2297 | #endif | 2583 | #endif |
2298 | 2584 | ||
@@ -2316,7 +2602,8 @@ static void ack_apic_level(unsigned int irq) | |||
2316 | * operation to prevent an edge-triggered interrupt escaping meanwhile. | 2602 | * operation to prevent an edge-triggered interrupt escaping meanwhile. |
2317 | * The idea is from Manfred Spraul. --macro | 2603 | * The idea is from Manfred Spraul. --macro |
2318 | */ | 2604 | */ |
2319 | i = irq_cfg(irq)->vector; | 2605 | cfg = desc->chip_data; |
2606 | i = cfg->vector; | ||
2320 | 2607 | ||
2321 | v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); | 2608 | v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); |
2322 | #endif | 2609 | #endif |
@@ -2355,17 +2642,18 @@ static void ack_apic_level(unsigned int irq) | |||
2355 | * accurate and is causing problems then it is a hardware bug | 2642 | * accurate and is causing problems then it is a hardware bug |
2356 | * and you can go talk to the chipset vendor about it. | 2643 | * and you can go talk to the chipset vendor about it. |
2357 | */ | 2644 | */ |
2358 | if (!io_apic_level_ack_pending(irq)) | 2645 | cfg = desc->chip_data; |
2646 | if (!io_apic_level_ack_pending(cfg)) | ||
2359 | move_masked_irq(irq); | 2647 | move_masked_irq(irq); |
2360 | unmask_IO_APIC_irq(irq); | 2648 | unmask_IO_APIC_irq_desc(desc); |
2361 | } | 2649 | } |
2362 | 2650 | ||
2363 | #ifdef CONFIG_X86_32 | 2651 | #ifdef CONFIG_X86_32 |
2364 | if (!(v & (1 << (i & 0x1f)))) { | 2652 | if (!(v & (1 << (i & 0x1f)))) { |
2365 | atomic_inc(&irq_mis_count); | 2653 | atomic_inc(&irq_mis_count); |
2366 | spin_lock(&ioapic_lock); | 2654 | spin_lock(&ioapic_lock); |
2367 | __mask_and_edge_IO_APIC_irq(irq); | 2655 | __mask_and_edge_IO_APIC_irq(cfg); |
2368 | __unmask_and_level_IO_APIC_irq(irq); | 2656 | __unmask_and_level_IO_APIC_irq(cfg); |
2369 | spin_unlock(&ioapic_lock); | 2657 | spin_unlock(&ioapic_lock); |
2370 | } | 2658 | } |
2371 | #endif | 2659 | #endif |
@@ -2416,20 +2704,19 @@ static inline void init_IO_APIC_traps(void) | |||
2416 | * Also, we've got to be careful not to trash gate | 2704 | * Also, we've got to be careful not to trash gate |
2417 | * 0x80, because int 0x80 is hm, kind of importantish. ;) | 2705 | * 0x80, because int 0x80 is hm, kind of importantish. ;) |
2418 | */ | 2706 | */ |
2419 | for_each_irq_cfg(irq, cfg) { | 2707 | for_each_irq_desc(irq, desc) { |
2420 | if (IO_APIC_IRQ(irq) && !cfg->vector) { | 2708 | cfg = desc->chip_data; |
2709 | if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { | ||
2421 | /* | 2710 | /* |
2422 | * Hmm.. We don't have an entry for this, | 2711 | * Hmm.. We don't have an entry for this, |
2423 | * so default to an old-fashioned 8259 | 2712 | * so default to an old-fashioned 8259 |
2424 | * interrupt if we can.. | 2713 | * interrupt if we can.. |
2425 | */ | 2714 | */ |
2426 | if (irq < 16) | 2715 | if (irq < NR_IRQS_LEGACY) |
2427 | make_8259A_irq(irq); | 2716 | make_8259A_irq(irq); |
2428 | else { | 2717 | else |
2429 | desc = irq_to_desc(irq); | ||
2430 | /* Strange. Oh, well.. */ | 2718 | /* Strange. Oh, well.. */ |
2431 | desc->chip = &no_irq_chip; | 2719 | desc->chip = &no_irq_chip; |
2432 | } | ||
2433 | } | 2720 | } |
2434 | } | 2721 | } |
2435 | } | 2722 | } |
@@ -2454,7 +2741,7 @@ static void unmask_lapic_irq(unsigned int irq) | |||
2454 | apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); | 2741 | apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); |
2455 | } | 2742 | } |
2456 | 2743 | ||
2457 | static void ack_lapic_irq (unsigned int irq) | 2744 | static void ack_lapic_irq(unsigned int irq) |
2458 | { | 2745 | { |
2459 | ack_APIC_irq(); | 2746 | ack_APIC_irq(); |
2460 | } | 2747 | } |
@@ -2466,11 +2753,8 @@ static struct irq_chip lapic_chip __read_mostly = { | |||
2466 | .ack = ack_lapic_irq, | 2753 | .ack = ack_lapic_irq, |
2467 | }; | 2754 | }; |
2468 | 2755 | ||
2469 | static void lapic_register_intr(int irq) | 2756 | static void lapic_register_intr(int irq, struct irq_desc *desc) |
2470 | { | 2757 | { |
2471 | struct irq_desc *desc; | ||
2472 | |||
2473 | desc = irq_to_desc(irq); | ||
2474 | desc->status &= ~IRQ_LEVEL; | 2758 | desc->status &= ~IRQ_LEVEL; |
2475 | set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, | 2759 | set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, |
2476 | "edge"); | 2760 | "edge"); |
@@ -2574,22 +2858,20 @@ int timer_through_8259 __initdata; | |||
2574 | */ | 2858 | */ |
2575 | static inline void __init check_timer(void) | 2859 | static inline void __init check_timer(void) |
2576 | { | 2860 | { |
2577 | struct irq_cfg *cfg = irq_cfg(0); | 2861 | struct irq_desc *desc = irq_to_desc(0); |
2862 | struct irq_cfg *cfg = desc->chip_data; | ||
2863 | int cpu = boot_cpu_id; | ||
2578 | int apic1, pin1, apic2, pin2; | 2864 | int apic1, pin1, apic2, pin2; |
2579 | unsigned long flags; | 2865 | unsigned long flags; |
2580 | unsigned int ver; | ||
2581 | int no_pin1 = 0; | 2866 | int no_pin1 = 0; |
2582 | 2867 | ||
2583 | local_irq_save(flags); | 2868 | local_irq_save(flags); |
2584 | 2869 | ||
2585 | ver = apic_read(APIC_LVR); | ||
2586 | ver = GET_APIC_VERSION(ver); | ||
2587 | |||
2588 | /* | 2870 | /* |
2589 | * get/set the timer IRQ vector: | 2871 | * get/set the timer IRQ vector: |
2590 | */ | 2872 | */ |
2591 | disable_8259A_irq(0); | 2873 | disable_8259A_irq(0); |
2592 | assign_irq_vector(0, TARGET_CPUS); | 2874 | assign_irq_vector(0, cfg, apic->target_cpus()); |
2593 | 2875 | ||
2594 | /* | 2876 | /* |
2595 | * As IRQ0 is to be enabled in the 8259A, the virtual | 2877 | * As IRQ0 is to be enabled in the 8259A, the virtual |
@@ -2603,7 +2885,13 @@ static inline void __init check_timer(void) | |||
2603 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | 2885 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); |
2604 | init_8259A(1); | 2886 | init_8259A(1); |
2605 | #ifdef CONFIG_X86_32 | 2887 | #ifdef CONFIG_X86_32 |
2606 | timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); | 2888 | { |
2889 | unsigned int ver; | ||
2890 | |||
2891 | ver = apic_read(APIC_LVR); | ||
2892 | ver = GET_APIC_VERSION(ver); | ||
2893 | timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); | ||
2894 | } | ||
2607 | #endif | 2895 | #endif |
2608 | 2896 | ||
2609 | pin1 = find_isa_irq_pin(0, mp_INT); | 2897 | pin1 = find_isa_irq_pin(0, mp_INT); |
@@ -2640,10 +2928,19 @@ static inline void __init check_timer(void) | |||
2640 | * Ok, does IRQ0 through the IOAPIC work? | 2928 | * Ok, does IRQ0 through the IOAPIC work? |
2641 | */ | 2929 | */ |
2642 | if (no_pin1) { | 2930 | if (no_pin1) { |
2643 | add_pin_to_irq(0, apic1, pin1); | 2931 | add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); |
2644 | setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); | 2932 | setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); |
2933 | } else { | ||
2934 | /* for edge trigger, setup_IO_APIC_irq already | ||
2935 | * leave it unmasked. | ||
2936 | * so only need to unmask if it is level-trigger | ||
2937 | * do we really have level trigger timer? | ||
2938 | */ | ||
2939 | int idx; | ||
2940 | idx = find_irq_entry(apic1, pin1, mp_INT); | ||
2941 | if (idx != -1 && irq_trigger(idx)) | ||
2942 | unmask_IO_APIC_irq_desc(desc); | ||
2645 | } | 2943 | } |
2646 | unmask_IO_APIC_irq(0); | ||
2647 | if (timer_irq_works()) { | 2944 | if (timer_irq_works()) { |
2648 | if (nmi_watchdog == NMI_IO_APIC) { | 2945 | if (nmi_watchdog == NMI_IO_APIC) { |
2649 | setup_nmi(); | 2946 | setup_nmi(); |
@@ -2657,6 +2954,7 @@ static inline void __init check_timer(void) | |||
2657 | if (intr_remapping_enabled) | 2954 | if (intr_remapping_enabled) |
2658 | panic("timer doesn't work through Interrupt-remapped IO-APIC"); | 2955 | panic("timer doesn't work through Interrupt-remapped IO-APIC"); |
2659 | #endif | 2956 | #endif |
2957 | local_irq_disable(); | ||
2660 | clear_IO_APIC_pin(apic1, pin1); | 2958 | clear_IO_APIC_pin(apic1, pin1); |
2661 | if (!no_pin1) | 2959 | if (!no_pin1) |
2662 | apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " | 2960 | apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " |
@@ -2669,9 +2967,8 @@ static inline void __init check_timer(void) | |||
2669 | /* | 2967 | /* |
2670 | * legacy devices should be connected to IO APIC #0 | 2968 | * legacy devices should be connected to IO APIC #0 |
2671 | */ | 2969 | */ |
2672 | replace_pin_at_irq(0, apic1, pin1, apic2, pin2); | 2970 | replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); |
2673 | setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); | 2971 | setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); |
2674 | unmask_IO_APIC_irq(0); | ||
2675 | enable_8259A_irq(0); | 2972 | enable_8259A_irq(0); |
2676 | if (timer_irq_works()) { | 2973 | if (timer_irq_works()) { |
2677 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); | 2974 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); |
@@ -2686,6 +2983,7 @@ static inline void __init check_timer(void) | |||
2686 | /* | 2983 | /* |
2687 | * Cleanup, just in case ... | 2984 | * Cleanup, just in case ... |
2688 | */ | 2985 | */ |
2986 | local_irq_disable(); | ||
2689 | disable_8259A_irq(0); | 2987 | disable_8259A_irq(0); |
2690 | clear_IO_APIC_pin(apic2, pin2); | 2988 | clear_IO_APIC_pin(apic2, pin2); |
2691 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); | 2989 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); |
@@ -2703,7 +3001,7 @@ static inline void __init check_timer(void) | |||
2703 | apic_printk(APIC_QUIET, KERN_INFO | 3001 | apic_printk(APIC_QUIET, KERN_INFO |
2704 | "...trying to set up timer as Virtual Wire IRQ...\n"); | 3002 | "...trying to set up timer as Virtual Wire IRQ...\n"); |
2705 | 3003 | ||
2706 | lapic_register_intr(0); | 3004 | lapic_register_intr(0, desc); |
2707 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ | 3005 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ |
2708 | enable_8259A_irq(0); | 3006 | enable_8259A_irq(0); |
2709 | 3007 | ||
@@ -2711,6 +3009,7 @@ static inline void __init check_timer(void) | |||
2711 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); | 3009 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); |
2712 | goto out; | 3010 | goto out; |
2713 | } | 3011 | } |
3012 | local_irq_disable(); | ||
2714 | disable_8259A_irq(0); | 3013 | disable_8259A_irq(0); |
2715 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); | 3014 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); |
2716 | apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); | 3015 | apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); |
@@ -2728,6 +3027,7 @@ static inline void __init check_timer(void) | |||
2728 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); | 3027 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); |
2729 | goto out; | 3028 | goto out; |
2730 | } | 3029 | } |
3030 | local_irq_disable(); | ||
2731 | apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); | 3031 | apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); |
2732 | panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " | 3032 | panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " |
2733 | "report. Then try booting with the 'noapic' option.\n"); | 3033 | "report. Then try booting with the 'noapic' option.\n"); |
@@ -2828,8 +3128,8 @@ static int ioapic_resume(struct sys_device *dev) | |||
2828 | 3128 | ||
2829 | spin_lock_irqsave(&ioapic_lock, flags); | 3129 | spin_lock_irqsave(&ioapic_lock, flags); |
2830 | reg_00.raw = io_apic_read(dev->id, 0); | 3130 | reg_00.raw = io_apic_read(dev->id, 0); |
2831 | if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { | 3131 | if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { |
2832 | reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; | 3132 | reg_00.bits.ID = mp_ioapics[dev->id].apicid; |
2833 | io_apic_write(dev->id, 0, reg_00.raw); | 3133 | io_apic_write(dev->id, 0, reg_00.raw); |
2834 | } | 3134 | } |
2835 | spin_unlock_irqrestore(&ioapic_lock, flags); | 3135 | spin_unlock_irqrestore(&ioapic_lock, flags); |
@@ -2879,6 +3179,7 @@ static int __init ioapic_init_sysfs(void) | |||
2879 | 3179 | ||
2880 | device_initcall(ioapic_init_sysfs); | 3180 | device_initcall(ioapic_init_sysfs); |
2881 | 3181 | ||
3182 | static int nr_irqs_gsi = NR_IRQS_LEGACY; | ||
2882 | /* | 3183 | /* |
2883 | * Dynamic irq allocate and deallocation | 3184 | * Dynamic irq allocate and deallocation |
2884 | */ | 3185 | */ |
@@ -2888,22 +3189,26 @@ unsigned int create_irq_nr(unsigned int irq_want) | |||
2888 | unsigned int irq; | 3189 | unsigned int irq; |
2889 | unsigned int new; | 3190 | unsigned int new; |
2890 | unsigned long flags; | 3191 | unsigned long flags; |
2891 | struct irq_cfg *cfg_new; | 3192 | struct irq_cfg *cfg_new = NULL; |
2892 | 3193 | int cpu = boot_cpu_id; | |
2893 | irq_want = nr_irqs - 1; | 3194 | struct irq_desc *desc_new = NULL; |
2894 | 3195 | ||
2895 | irq = 0; | 3196 | irq = 0; |
3197 | if (irq_want < nr_irqs_gsi) | ||
3198 | irq_want = nr_irqs_gsi; | ||
3199 | |||
2896 | spin_lock_irqsave(&vector_lock, flags); | 3200 | spin_lock_irqsave(&vector_lock, flags); |
2897 | for (new = irq_want; new > 0; new--) { | 3201 | for (new = irq_want; new < nr_irqs; new++) { |
2898 | if (platform_legacy_irq(new)) | 3202 | desc_new = irq_to_desc_alloc_cpu(new, cpu); |
3203 | if (!desc_new) { | ||
3204 | printk(KERN_INFO "can not get irq_desc for %d\n", new); | ||
2899 | continue; | 3205 | continue; |
2900 | cfg_new = irq_cfg(new); | 3206 | } |
2901 | if (cfg_new && cfg_new->vector != 0) | 3207 | cfg_new = desc_new->chip_data; |
3208 | |||
3209 | if (cfg_new->vector != 0) | ||
2902 | continue; | 3210 | continue; |
2903 | /* check if need to create one */ | 3211 | if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) |
2904 | if (!cfg_new) | ||
2905 | cfg_new = irq_cfg_alloc(new); | ||
2906 | if (__assign_irq_vector(new, TARGET_CPUS) == 0) | ||
2907 | irq = new; | 3212 | irq = new; |
2908 | break; | 3213 | break; |
2909 | } | 3214 | } |
@@ -2911,15 +3216,20 @@ unsigned int create_irq_nr(unsigned int irq_want) | |||
2911 | 3216 | ||
2912 | if (irq > 0) { | 3217 | if (irq > 0) { |
2913 | dynamic_irq_init(irq); | 3218 | dynamic_irq_init(irq); |
3219 | /* restore it, in case dynamic_irq_init clear it */ | ||
3220 | if (desc_new) | ||
3221 | desc_new->chip_data = cfg_new; | ||
2914 | } | 3222 | } |
2915 | return irq; | 3223 | return irq; |
2916 | } | 3224 | } |
2917 | 3225 | ||
2918 | int create_irq(void) | 3226 | int create_irq(void) |
2919 | { | 3227 | { |
3228 | unsigned int irq_want; | ||
2920 | int irq; | 3229 | int irq; |
2921 | 3230 | ||
2922 | irq = create_irq_nr(nr_irqs - 1); | 3231 | irq_want = nr_irqs_gsi; |
3232 | irq = create_irq_nr(irq_want); | ||
2923 | 3233 | ||
2924 | if (irq == 0) | 3234 | if (irq == 0) |
2925 | irq = -1; | 3235 | irq = -1; |
@@ -2930,14 +3240,22 @@ int create_irq(void) | |||
2930 | void destroy_irq(unsigned int irq) | 3240 | void destroy_irq(unsigned int irq) |
2931 | { | 3241 | { |
2932 | unsigned long flags; | 3242 | unsigned long flags; |
3243 | struct irq_cfg *cfg; | ||
3244 | struct irq_desc *desc; | ||
2933 | 3245 | ||
3246 | /* store it, in case dynamic_irq_cleanup clear it */ | ||
3247 | desc = irq_to_desc(irq); | ||
3248 | cfg = desc->chip_data; | ||
2934 | dynamic_irq_cleanup(irq); | 3249 | dynamic_irq_cleanup(irq); |
3250 | /* connect back irq_cfg */ | ||
3251 | if (desc) | ||
3252 | desc->chip_data = cfg; | ||
2935 | 3253 | ||
2936 | #ifdef CONFIG_INTR_REMAP | 3254 | #ifdef CONFIG_INTR_REMAP |
2937 | free_irte(irq); | 3255 | free_irte(irq); |
2938 | #endif | 3256 | #endif |
2939 | spin_lock_irqsave(&vector_lock, flags); | 3257 | spin_lock_irqsave(&vector_lock, flags); |
2940 | __clear_irq_vector(irq); | 3258 | __clear_irq_vector(irq, cfg); |
2941 | spin_unlock_irqrestore(&vector_lock, flags); | 3259 | spin_unlock_irqrestore(&vector_lock, flags); |
2942 | } | 3260 | } |
2943 | 3261 | ||
@@ -2950,16 +3268,16 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
2950 | struct irq_cfg *cfg; | 3268 | struct irq_cfg *cfg; |
2951 | int err; | 3269 | int err; |
2952 | unsigned dest; | 3270 | unsigned dest; |
2953 | cpumask_t tmp; | ||
2954 | 3271 | ||
2955 | tmp = TARGET_CPUS; | 3272 | if (disable_apic) |
2956 | err = assign_irq_vector(irq, tmp); | 3273 | return -ENXIO; |
3274 | |||
3275 | cfg = irq_cfg(irq); | ||
3276 | err = assign_irq_vector(irq, cfg, apic->target_cpus()); | ||
2957 | if (err) | 3277 | if (err) |
2958 | return err; | 3278 | return err; |
2959 | 3279 | ||
2960 | cfg = irq_cfg(irq); | 3280 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); |
2961 | cpus_and(tmp, cfg->domain, tmp); | ||
2962 | dest = cpu_mask_to_apicid(tmp); | ||
2963 | 3281 | ||
2964 | #ifdef CONFIG_INTR_REMAP | 3282 | #ifdef CONFIG_INTR_REMAP |
2965 | if (irq_remapped(irq)) { | 3283 | if (irq_remapped(irq)) { |
@@ -2973,9 +3291,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
2973 | memset (&irte, 0, sizeof(irte)); | 3291 | memset (&irte, 0, sizeof(irte)); |
2974 | 3292 | ||
2975 | irte.present = 1; | 3293 | irte.present = 1; |
2976 | irte.dst_mode = INT_DEST_MODE; | 3294 | irte.dst_mode = apic->irq_dest_mode; |
2977 | irte.trigger_mode = 0; /* edge */ | 3295 | irte.trigger_mode = 0; /* edge */ |
2978 | irte.dlvry_mode = INT_DELIVERY_MODE; | 3296 | irte.dlvry_mode = apic->irq_delivery_mode; |
2979 | irte.vector = cfg->vector; | 3297 | irte.vector = cfg->vector; |
2980 | irte.dest_id = IRTE_DEST(dest); | 3298 | irte.dest_id = IRTE_DEST(dest); |
2981 | 3299 | ||
@@ -2993,10 +3311,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
2993 | msg->address_hi = MSI_ADDR_BASE_HI; | 3311 | msg->address_hi = MSI_ADDR_BASE_HI; |
2994 | msg->address_lo = | 3312 | msg->address_lo = |
2995 | MSI_ADDR_BASE_LO | | 3313 | MSI_ADDR_BASE_LO | |
2996 | ((INT_DEST_MODE == 0) ? | 3314 | ((apic->irq_dest_mode == 0) ? |
2997 | MSI_ADDR_DEST_MODE_PHYSICAL: | 3315 | MSI_ADDR_DEST_MODE_PHYSICAL: |
2998 | MSI_ADDR_DEST_MODE_LOGICAL) | | 3316 | MSI_ADDR_DEST_MODE_LOGICAL) | |
2999 | ((INT_DELIVERY_MODE != dest_LowestPrio) ? | 3317 | ((apic->irq_delivery_mode != dest_LowestPrio) ? |
3000 | MSI_ADDR_REDIRECTION_CPU: | 3318 | MSI_ADDR_REDIRECTION_CPU: |
3001 | MSI_ADDR_REDIRECTION_LOWPRI) | | 3319 | MSI_ADDR_REDIRECTION_LOWPRI) | |
3002 | MSI_ADDR_DEST_ID(dest); | 3320 | MSI_ADDR_DEST_ID(dest); |
@@ -3004,7 +3322,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
3004 | msg->data = | 3322 | msg->data = |
3005 | MSI_DATA_TRIGGER_EDGE | | 3323 | MSI_DATA_TRIGGER_EDGE | |
3006 | MSI_DATA_LEVEL_ASSERT | | 3324 | MSI_DATA_LEVEL_ASSERT | |
3007 | ((INT_DELIVERY_MODE != dest_LowestPrio) ? | 3325 | ((apic->irq_delivery_mode != dest_LowestPrio) ? |
3008 | MSI_DATA_DELIVERY_FIXED: | 3326 | MSI_DATA_DELIVERY_FIXED: |
3009 | MSI_DATA_DELIVERY_LOWPRI) | | 3327 | MSI_DATA_DELIVERY_LOWPRI) | |
3010 | MSI_DATA_VECTOR(cfg->vector); | 3328 | MSI_DATA_VECTOR(cfg->vector); |
@@ -3013,64 +3331,48 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
3013 | } | 3331 | } |
3014 | 3332 | ||
3015 | #ifdef CONFIG_SMP | 3333 | #ifdef CONFIG_SMP |
3016 | static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) | 3334 | static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) |
3017 | { | 3335 | { |
3336 | struct irq_desc *desc = irq_to_desc(irq); | ||
3018 | struct irq_cfg *cfg; | 3337 | struct irq_cfg *cfg; |
3019 | struct msi_msg msg; | 3338 | struct msi_msg msg; |
3020 | unsigned int dest; | 3339 | unsigned int dest; |
3021 | cpumask_t tmp; | ||
3022 | struct irq_desc *desc; | ||
3023 | |||
3024 | cpus_and(tmp, mask, cpu_online_map); | ||
3025 | if (cpus_empty(tmp)) | ||
3026 | return; | ||
3027 | 3340 | ||
3028 | if (assign_irq_vector(irq, mask)) | 3341 | dest = set_desc_affinity(desc, mask); |
3342 | if (dest == BAD_APICID) | ||
3029 | return; | 3343 | return; |
3030 | 3344 | ||
3031 | cfg = irq_cfg(irq); | 3345 | cfg = desc->chip_data; |
3032 | cpus_and(tmp, cfg->domain, mask); | ||
3033 | dest = cpu_mask_to_apicid(tmp); | ||
3034 | 3346 | ||
3035 | read_msi_msg(irq, &msg); | 3347 | read_msi_msg_desc(desc, &msg); |
3036 | 3348 | ||
3037 | msg.data &= ~MSI_DATA_VECTOR_MASK; | 3349 | msg.data &= ~MSI_DATA_VECTOR_MASK; |
3038 | msg.data |= MSI_DATA_VECTOR(cfg->vector); | 3350 | msg.data |= MSI_DATA_VECTOR(cfg->vector); |
3039 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; | 3351 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; |
3040 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | 3352 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
3041 | 3353 | ||
3042 | write_msi_msg(irq, &msg); | 3354 | write_msi_msg_desc(desc, &msg); |
3043 | desc = irq_to_desc(irq); | ||
3044 | desc->affinity = mask; | ||
3045 | } | 3355 | } |
3046 | |||
3047 | #ifdef CONFIG_INTR_REMAP | 3356 | #ifdef CONFIG_INTR_REMAP |
3048 | /* | 3357 | /* |
3049 | * Migrate the MSI irq to another cpumask. This migration is | 3358 | * Migrate the MSI irq to another cpumask. This migration is |
3050 | * done in the process context using interrupt-remapping hardware. | 3359 | * done in the process context using interrupt-remapping hardware. |
3051 | */ | 3360 | */ |
3052 | static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) | 3361 | static void |
3362 | ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | ||
3053 | { | 3363 | { |
3054 | struct irq_cfg *cfg; | 3364 | struct irq_desc *desc = irq_to_desc(irq); |
3365 | struct irq_cfg *cfg = desc->chip_data; | ||
3055 | unsigned int dest; | 3366 | unsigned int dest; |
3056 | cpumask_t tmp, cleanup_mask; | ||
3057 | struct irte irte; | 3367 | struct irte irte; |
3058 | struct irq_desc *desc; | ||
3059 | |||
3060 | cpus_and(tmp, mask, cpu_online_map); | ||
3061 | if (cpus_empty(tmp)) | ||
3062 | return; | ||
3063 | 3368 | ||
3064 | if (get_irte(irq, &irte)) | 3369 | if (get_irte(irq, &irte)) |
3065 | return; | 3370 | return; |
3066 | 3371 | ||
3067 | if (assign_irq_vector(irq, mask)) | 3372 | dest = set_desc_affinity(desc, mask); |
3373 | if (dest == BAD_APICID) | ||
3068 | return; | 3374 | return; |
3069 | 3375 | ||
3070 | cfg = irq_cfg(irq); | ||
3071 | cpus_and(tmp, cfg->domain, mask); | ||
3072 | dest = cpu_mask_to_apicid(tmp); | ||
3073 | |||
3074 | irte.vector = cfg->vector; | 3376 | irte.vector = cfg->vector; |
3075 | irte.dest_id = IRTE_DEST(dest); | 3377 | irte.dest_id = IRTE_DEST(dest); |
3076 | 3378 | ||
@@ -3084,16 +3386,10 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) | |||
3084 | * at the new destination. So, time to cleanup the previous | 3386 | * at the new destination. So, time to cleanup the previous |
3085 | * vector allocation. | 3387 | * vector allocation. |
3086 | */ | 3388 | */ |
3087 | if (cfg->move_in_progress) { | 3389 | if (cfg->move_in_progress) |
3088 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); | 3390 | send_cleanup_vector(cfg); |
3089 | cfg->move_cleanup_count = cpus_weight(cleanup_mask); | ||
3090 | send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
3091 | cfg->move_in_progress = 0; | ||
3092 | } | ||
3093 | |||
3094 | desc = irq_to_desc(irq); | ||
3095 | desc->affinity = mask; | ||
3096 | } | 3391 | } |
3392 | |||
3097 | #endif | 3393 | #endif |
3098 | #endif /* CONFIG_SMP */ | 3394 | #endif /* CONFIG_SMP */ |
3099 | 3395 | ||
@@ -3152,7 +3448,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) | |||
3152 | } | 3448 | } |
3153 | #endif | 3449 | #endif |
3154 | 3450 | ||
3155 | static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) | 3451 | static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) |
3156 | { | 3452 | { |
3157 | int ret; | 3453 | int ret; |
3158 | struct msi_msg msg; | 3454 | struct msi_msg msg; |
@@ -3161,7 +3457,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) | |||
3161 | if (ret < 0) | 3457 | if (ret < 0) |
3162 | return ret; | 3458 | return ret; |
3163 | 3459 | ||
3164 | set_irq_msi(irq, desc); | 3460 | set_irq_msi(irq, msidesc); |
3165 | write_msi_msg(irq, &msg); | 3461 | write_msi_msg(irq, &msg); |
3166 | 3462 | ||
3167 | #ifdef CONFIG_INTR_REMAP | 3463 | #ifdef CONFIG_INTR_REMAP |
@@ -3181,58 +3477,11 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) | |||
3181 | return 0; | 3477 | return 0; |
3182 | } | 3478 | } |
3183 | 3479 | ||
3184 | static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) | ||
3185 | { | ||
3186 | unsigned int irq; | ||
3187 | |||
3188 | irq = dev->bus->number; | ||
3189 | irq <<= 8; | ||
3190 | irq |= dev->devfn; | ||
3191 | irq <<= 12; | ||
3192 | |||
3193 | return irq; | ||
3194 | } | ||
3195 | |||
3196 | int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) | ||
3197 | { | ||
3198 | unsigned int irq; | ||
3199 | int ret; | ||
3200 | unsigned int irq_want; | ||
3201 | |||
3202 | irq_want = build_irq_for_pci_dev(dev) + 0x100; | ||
3203 | |||
3204 | irq = create_irq_nr(irq_want); | ||
3205 | if (irq == 0) | ||
3206 | return -1; | ||
3207 | |||
3208 | #ifdef CONFIG_INTR_REMAP | ||
3209 | if (!intr_remapping_enabled) | ||
3210 | goto no_ir; | ||
3211 | |||
3212 | ret = msi_alloc_irte(dev, irq, 1); | ||
3213 | if (ret < 0) | ||
3214 | goto error; | ||
3215 | no_ir: | ||
3216 | #endif | ||
3217 | ret = setup_msi_irq(dev, desc, irq); | ||
3218 | if (ret < 0) { | ||
3219 | destroy_irq(irq); | ||
3220 | return ret; | ||
3221 | } | ||
3222 | return 0; | ||
3223 | |||
3224 | #ifdef CONFIG_INTR_REMAP | ||
3225 | error: | ||
3226 | destroy_irq(irq); | ||
3227 | return ret; | ||
3228 | #endif | ||
3229 | } | ||
3230 | |||
3231 | int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | 3480 | int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) |
3232 | { | 3481 | { |
3233 | unsigned int irq; | 3482 | unsigned int irq; |
3234 | int ret, sub_handle; | 3483 | int ret, sub_handle; |
3235 | struct msi_desc *desc; | 3484 | struct msi_desc *msidesc; |
3236 | unsigned int irq_want; | 3485 | unsigned int irq_want; |
3237 | 3486 | ||
3238 | #ifdef CONFIG_INTR_REMAP | 3487 | #ifdef CONFIG_INTR_REMAP |
@@ -3240,12 +3489,13 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
3240 | int index = 0; | 3489 | int index = 0; |
3241 | #endif | 3490 | #endif |
3242 | 3491 | ||
3243 | irq_want = build_irq_for_pci_dev(dev) + 0x100; | 3492 | irq_want = nr_irqs_gsi; |
3244 | sub_handle = 0; | 3493 | sub_handle = 0; |
3245 | list_for_each_entry(desc, &dev->msi_list, list) { | 3494 | list_for_each_entry(msidesc, &dev->msi_list, list) { |
3246 | irq = create_irq_nr(irq_want--); | 3495 | irq = create_irq_nr(irq_want); |
3247 | if (irq == 0) | 3496 | if (irq == 0) |
3248 | return -1; | 3497 | return -1; |
3498 | irq_want = irq + 1; | ||
3249 | #ifdef CONFIG_INTR_REMAP | 3499 | #ifdef CONFIG_INTR_REMAP |
3250 | if (!intr_remapping_enabled) | 3500 | if (!intr_remapping_enabled) |
3251 | goto no_ir; | 3501 | goto no_ir; |
@@ -3275,7 +3525,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
3275 | } | 3525 | } |
3276 | no_ir: | 3526 | no_ir: |
3277 | #endif | 3527 | #endif |
3278 | ret = setup_msi_irq(dev, desc, irq); | 3528 | ret = setup_msi_irq(dev, msidesc, irq); |
3279 | if (ret < 0) | 3529 | if (ret < 0) |
3280 | goto error; | 3530 | goto error; |
3281 | sub_handle++; | 3531 | sub_handle++; |
@@ -3294,24 +3544,18 @@ void arch_teardown_msi_irq(unsigned int irq) | |||
3294 | 3544 | ||
3295 | #ifdef CONFIG_DMAR | 3545 | #ifdef CONFIG_DMAR |
3296 | #ifdef CONFIG_SMP | 3546 | #ifdef CONFIG_SMP |
3297 | static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) | 3547 | static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) |
3298 | { | 3548 | { |
3549 | struct irq_desc *desc = irq_to_desc(irq); | ||
3299 | struct irq_cfg *cfg; | 3550 | struct irq_cfg *cfg; |
3300 | struct msi_msg msg; | 3551 | struct msi_msg msg; |
3301 | unsigned int dest; | 3552 | unsigned int dest; |
3302 | cpumask_t tmp; | ||
3303 | struct irq_desc *desc; | ||
3304 | 3553 | ||
3305 | cpus_and(tmp, mask, cpu_online_map); | 3554 | dest = set_desc_affinity(desc, mask); |
3306 | if (cpus_empty(tmp)) | 3555 | if (dest == BAD_APICID) |
3307 | return; | 3556 | return; |
3308 | 3557 | ||
3309 | if (assign_irq_vector(irq, mask)) | 3558 | cfg = desc->chip_data; |
3310 | return; | ||
3311 | |||
3312 | cfg = irq_cfg(irq); | ||
3313 | cpus_and(tmp, cfg->domain, mask); | ||
3314 | dest = cpu_mask_to_apicid(tmp); | ||
3315 | 3559 | ||
3316 | dmar_msi_read(irq, &msg); | 3560 | dmar_msi_read(irq, &msg); |
3317 | 3561 | ||
@@ -3321,9 +3565,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) | |||
3321 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | 3565 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
3322 | 3566 | ||
3323 | dmar_msi_write(irq, &msg); | 3567 | dmar_msi_write(irq, &msg); |
3324 | desc = irq_to_desc(irq); | ||
3325 | desc->affinity = mask; | ||
3326 | } | 3568 | } |
3569 | |||
3327 | #endif /* CONFIG_SMP */ | 3570 | #endif /* CONFIG_SMP */ |
3328 | 3571 | ||
3329 | struct irq_chip dmar_msi_type = { | 3572 | struct irq_chip dmar_msi_type = { |
@@ -3355,24 +3598,18 @@ int arch_setup_dmar_msi(unsigned int irq) | |||
3355 | #ifdef CONFIG_HPET_TIMER | 3598 | #ifdef CONFIG_HPET_TIMER |
3356 | 3599 | ||
3357 | #ifdef CONFIG_SMP | 3600 | #ifdef CONFIG_SMP |
3358 | static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) | 3601 | static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) |
3359 | { | 3602 | { |
3603 | struct irq_desc *desc = irq_to_desc(irq); | ||
3360 | struct irq_cfg *cfg; | 3604 | struct irq_cfg *cfg; |
3361 | struct irq_desc *desc; | ||
3362 | struct msi_msg msg; | 3605 | struct msi_msg msg; |
3363 | unsigned int dest; | 3606 | unsigned int dest; |
3364 | cpumask_t tmp; | ||
3365 | 3607 | ||
3366 | cpus_and(tmp, mask, cpu_online_map); | 3608 | dest = set_desc_affinity(desc, mask); |
3367 | if (cpus_empty(tmp)) | 3609 | if (dest == BAD_APICID) |
3368 | return; | 3610 | return; |
3369 | 3611 | ||
3370 | if (assign_irq_vector(irq, mask)) | 3612 | cfg = desc->chip_data; |
3371 | return; | ||
3372 | |||
3373 | cfg = irq_cfg(irq); | ||
3374 | cpus_and(tmp, cfg->domain, mask); | ||
3375 | dest = cpu_mask_to_apicid(tmp); | ||
3376 | 3613 | ||
3377 | hpet_msi_read(irq, &msg); | 3614 | hpet_msi_read(irq, &msg); |
3378 | 3615 | ||
@@ -3382,9 +3619,8 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) | |||
3382 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | 3619 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
3383 | 3620 | ||
3384 | hpet_msi_write(irq, &msg); | 3621 | hpet_msi_write(irq, &msg); |
3385 | desc = irq_to_desc(irq); | ||
3386 | desc->affinity = mask; | ||
3387 | } | 3622 | } |
3623 | |||
3388 | #endif /* CONFIG_SMP */ | 3624 | #endif /* CONFIG_SMP */ |
3389 | 3625 | ||
3390 | struct irq_chip hpet_msi_type = { | 3626 | struct irq_chip hpet_msi_type = { |
@@ -3437,28 +3673,21 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) | |||
3437 | write_ht_irq_msg(irq, &msg); | 3673 | write_ht_irq_msg(irq, &msg); |
3438 | } | 3674 | } |
3439 | 3675 | ||
3440 | static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) | 3676 | static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) |
3441 | { | 3677 | { |
3678 | struct irq_desc *desc = irq_to_desc(irq); | ||
3442 | struct irq_cfg *cfg; | 3679 | struct irq_cfg *cfg; |
3443 | unsigned int dest; | 3680 | unsigned int dest; |
3444 | cpumask_t tmp; | ||
3445 | struct irq_desc *desc; | ||
3446 | 3681 | ||
3447 | cpus_and(tmp, mask, cpu_online_map); | 3682 | dest = set_desc_affinity(desc, mask); |
3448 | if (cpus_empty(tmp)) | 3683 | if (dest == BAD_APICID) |
3449 | return; | 3684 | return; |
3450 | 3685 | ||
3451 | if (assign_irq_vector(irq, mask)) | 3686 | cfg = desc->chip_data; |
3452 | return; | ||
3453 | |||
3454 | cfg = irq_cfg(irq); | ||
3455 | cpus_and(tmp, cfg->domain, mask); | ||
3456 | dest = cpu_mask_to_apicid(tmp); | ||
3457 | 3687 | ||
3458 | target_ht_irq(irq, dest, cfg->vector); | 3688 | target_ht_irq(irq, dest, cfg->vector); |
3459 | desc = irq_to_desc(irq); | ||
3460 | desc->affinity = mask; | ||
3461 | } | 3689 | } |
3690 | |||
3462 | #endif | 3691 | #endif |
3463 | 3692 | ||
3464 | static struct irq_chip ht_irq_chip = { | 3693 | static struct irq_chip ht_irq_chip = { |
@@ -3476,17 +3705,18 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
3476 | { | 3705 | { |
3477 | struct irq_cfg *cfg; | 3706 | struct irq_cfg *cfg; |
3478 | int err; | 3707 | int err; |
3479 | cpumask_t tmp; | ||
3480 | 3708 | ||
3481 | tmp = TARGET_CPUS; | 3709 | if (disable_apic) |
3482 | err = assign_irq_vector(irq, tmp); | 3710 | return -ENXIO; |
3711 | |||
3712 | cfg = irq_cfg(irq); | ||
3713 | err = assign_irq_vector(irq, cfg, apic->target_cpus()); | ||
3483 | if (!err) { | 3714 | if (!err) { |
3484 | struct ht_irq_msg msg; | 3715 | struct ht_irq_msg msg; |
3485 | unsigned dest; | 3716 | unsigned dest; |
3486 | 3717 | ||
3487 | cfg = irq_cfg(irq); | 3718 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, |
3488 | cpus_and(tmp, cfg->domain, tmp); | 3719 | apic->target_cpus()); |
3489 | dest = cpu_mask_to_apicid(tmp); | ||
3490 | 3720 | ||
3491 | msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); | 3721 | msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); |
3492 | 3722 | ||
@@ -3494,11 +3724,11 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
3494 | HT_IRQ_LOW_BASE | | 3724 | HT_IRQ_LOW_BASE | |
3495 | HT_IRQ_LOW_DEST_ID(dest) | | 3725 | HT_IRQ_LOW_DEST_ID(dest) | |
3496 | HT_IRQ_LOW_VECTOR(cfg->vector) | | 3726 | HT_IRQ_LOW_VECTOR(cfg->vector) | |
3497 | ((INT_DEST_MODE == 0) ? | 3727 | ((apic->irq_dest_mode == 0) ? |
3498 | HT_IRQ_LOW_DM_PHYSICAL : | 3728 | HT_IRQ_LOW_DM_PHYSICAL : |
3499 | HT_IRQ_LOW_DM_LOGICAL) | | 3729 | HT_IRQ_LOW_DM_LOGICAL) | |
3500 | HT_IRQ_LOW_RQEOI_EDGE | | 3730 | HT_IRQ_LOW_RQEOI_EDGE | |
3501 | ((INT_DELIVERY_MODE != dest_LowestPrio) ? | 3731 | ((apic->irq_delivery_mode != dest_LowestPrio) ? |
3502 | HT_IRQ_LOW_MT_FIXED : | 3732 | HT_IRQ_LOW_MT_FIXED : |
3503 | HT_IRQ_LOW_MT_ARBITRATED) | | 3733 | HT_IRQ_LOW_MT_ARBITRATED) | |
3504 | HT_IRQ_LOW_IRQ_MASKED; | 3734 | HT_IRQ_LOW_IRQ_MASKED; |
@@ -3514,7 +3744,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
3514 | } | 3744 | } |
3515 | #endif /* CONFIG_HT_IRQ */ | 3745 | #endif /* CONFIG_HT_IRQ */ |
3516 | 3746 | ||
3517 | #ifdef CONFIG_X86_64 | 3747 | #ifdef CONFIG_X86_UV |
3518 | /* | 3748 | /* |
3519 | * Re-target the irq to the specified CPU and enable the specified MMR located | 3749 | * Re-target the irq to the specified CPU and enable the specified MMR located |
3520 | * on the specified blade to allow the sending of MSIs to the specified CPU. | 3750 | * on the specified blade to allow the sending of MSIs to the specified CPU. |
@@ -3522,7 +3752,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
3522 | int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | 3752 | int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, |
3523 | unsigned long mmr_offset) | 3753 | unsigned long mmr_offset) |
3524 | { | 3754 | { |
3525 | const cpumask_t *eligible_cpu = get_cpu_mask(cpu); | 3755 | const struct cpumask *eligible_cpu = cpumask_of(cpu); |
3526 | struct irq_cfg *cfg; | 3756 | struct irq_cfg *cfg; |
3527 | int mmr_pnode; | 3757 | int mmr_pnode; |
3528 | unsigned long mmr_value; | 3758 | unsigned long mmr_value; |
@@ -3530,7 +3760,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
3530 | unsigned long flags; | 3760 | unsigned long flags; |
3531 | int err; | 3761 | int err; |
3532 | 3762 | ||
3533 | err = assign_irq_vector(irq, *eligible_cpu); | 3763 | cfg = irq_cfg(irq); |
3764 | |||
3765 | err = assign_irq_vector(irq, cfg, eligible_cpu); | ||
3534 | if (err != 0) | 3766 | if (err != 0) |
3535 | return err; | 3767 | return err; |
3536 | 3768 | ||
@@ -3539,19 +3771,17 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
3539 | irq_name); | 3771 | irq_name); |
3540 | spin_unlock_irqrestore(&vector_lock, flags); | 3772 | spin_unlock_irqrestore(&vector_lock, flags); |
3541 | 3773 | ||
3542 | cfg = irq_cfg(irq); | ||
3543 | |||
3544 | mmr_value = 0; | 3774 | mmr_value = 0; |
3545 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | 3775 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; |
3546 | BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); | 3776 | BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); |
3547 | 3777 | ||
3548 | entry->vector = cfg->vector; | 3778 | entry->vector = cfg->vector; |
3549 | entry->delivery_mode = INT_DELIVERY_MODE; | 3779 | entry->delivery_mode = apic->irq_delivery_mode; |
3550 | entry->dest_mode = INT_DEST_MODE; | 3780 | entry->dest_mode = apic->irq_dest_mode; |
3551 | entry->polarity = 0; | 3781 | entry->polarity = 0; |
3552 | entry->trigger = 0; | 3782 | entry->trigger = 0; |
3553 | entry->mask = 0; | 3783 | entry->mask = 0; |
3554 | entry->dest = cpu_mask_to_apicid(*eligible_cpu); | 3784 | entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); |
3555 | 3785 | ||
3556 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | 3786 | mmr_pnode = uv_blade_to_pnode(mmr_blade); |
3557 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | 3787 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); |
@@ -3592,31 +3822,50 @@ int __init io_apic_get_redir_entries (int ioapic) | |||
3592 | return reg_01.bits.entries; | 3822 | return reg_01.bits.entries; |
3593 | } | 3823 | } |
3594 | 3824 | ||
3595 | int __init probe_nr_irqs(void) | 3825 | void __init probe_nr_irqs_gsi(void) |
3596 | { | 3826 | { |
3597 | int idx; | ||
3598 | int nr = 0; | 3827 | int nr = 0; |
3599 | #ifndef CONFIG_XEN | ||
3600 | int nr_min = 32; | ||
3601 | #else | ||
3602 | int nr_min = NR_IRQS; | ||
3603 | #endif | ||
3604 | 3828 | ||
3605 | for (idx = 0; idx < nr_ioapics; idx++) | 3829 | nr = acpi_probe_gsi(); |
3606 | nr += io_apic_get_redir_entries(idx) + 1; | 3830 | if (nr > nr_irqs_gsi) { |
3831 | nr_irqs_gsi = nr; | ||
3832 | } else { | ||
3833 | /* for acpi=off or acpi is not compiled in */ | ||
3834 | int idx; | ||
3607 | 3835 | ||
3608 | /* double it for hotplug and msi and nmi */ | 3836 | nr = 0; |
3609 | nr <<= 1; | 3837 | for (idx = 0; idx < nr_ioapics; idx++) |
3838 | nr += io_apic_get_redir_entries(idx) + 1; | ||
3610 | 3839 | ||
3611 | /* something wrong ? */ | 3840 | if (nr > nr_irqs_gsi) |
3612 | if (nr < nr_min) | 3841 | nr_irqs_gsi = nr; |
3613 | nr = nr_min; | 3842 | } |
3614 | if (WARN_ON(nr > NR_IRQS)) | ||
3615 | nr = NR_IRQS; | ||
3616 | 3843 | ||
3617 | return nr; | 3844 | printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); |
3618 | } | 3845 | } |
3619 | 3846 | ||
3847 | #ifdef CONFIG_SPARSE_IRQ | ||
3848 | int __init arch_probe_nr_irqs(void) | ||
3849 | { | ||
3850 | int nr; | ||
3851 | |||
3852 | if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) | ||
3853 | nr_irqs = NR_VECTORS * nr_cpu_ids; | ||
3854 | |||
3855 | nr = nr_irqs_gsi + 8 * nr_cpu_ids; | ||
3856 | #if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) | ||
3857 | /* | ||
3858 | * for MSI and HT dyn irq | ||
3859 | */ | ||
3860 | nr += nr_irqs_gsi * 16; | ||
3861 | #endif | ||
3862 | if (nr < nr_irqs) | ||
3863 | nr_irqs = nr; | ||
3864 | |||
3865 | return 0; | ||
3866 | } | ||
3867 | #endif | ||
3868 | |||
3620 | /* -------------------------------------------------------------------------- | 3869 | /* -------------------------------------------------------------------------- |
3621 | ACPI-based IOAPIC Configuration | 3870 | ACPI-based IOAPIC Configuration |
3622 | -------------------------------------------------------------------------- */ | 3871 | -------------------------------------------------------------------------- */ |
@@ -3642,7 +3891,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
3642 | */ | 3891 | */ |
3643 | 3892 | ||
3644 | if (physids_empty(apic_id_map)) | 3893 | if (physids_empty(apic_id_map)) |
3645 | apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); | 3894 | apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map); |
3646 | 3895 | ||
3647 | spin_lock_irqsave(&ioapic_lock, flags); | 3896 | spin_lock_irqsave(&ioapic_lock, flags); |
3648 | reg_00.raw = io_apic_read(ioapic, 0); | 3897 | reg_00.raw = io_apic_read(ioapic, 0); |
@@ -3658,10 +3907,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
3658 | * Every APIC in a system must have a unique ID or we get lots of nice | 3907 | * Every APIC in a system must have a unique ID or we get lots of nice |
3659 | * 'stuck on smp_invalidate_needed IPI wait' messages. | 3908 | * 'stuck on smp_invalidate_needed IPI wait' messages. |
3660 | */ | 3909 | */ |
3661 | if (check_apicid_used(apic_id_map, apic_id)) { | 3910 | if (apic->check_apicid_used(apic_id_map, apic_id)) { |
3662 | 3911 | ||
3663 | for (i = 0; i < get_physical_broadcast(); i++) { | 3912 | for (i = 0; i < get_physical_broadcast(); i++) { |
3664 | if (!check_apicid_used(apic_id_map, i)) | 3913 | if (!apic->check_apicid_used(apic_id_map, i)) |
3665 | break; | 3914 | break; |
3666 | } | 3915 | } |
3667 | 3916 | ||
@@ -3674,7 +3923,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
3674 | apic_id = i; | 3923 | apic_id = i; |
3675 | } | 3924 | } |
3676 | 3925 | ||
3677 | tmp = apicid_to_cpu_present(apic_id); | 3926 | tmp = apic->apicid_to_cpu_present(apic_id); |
3678 | physids_or(apic_id_map, apic_id_map, tmp); | 3927 | physids_or(apic_id_map, apic_id_map, tmp); |
3679 | 3928 | ||
3680 | if (reg_00.bits.ID != apic_id) { | 3929 | if (reg_00.bits.ID != apic_id) { |
@@ -3713,19 +3962,31 @@ int __init io_apic_get_version(int ioapic) | |||
3713 | 3962 | ||
3714 | int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) | 3963 | int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) |
3715 | { | 3964 | { |
3965 | struct irq_desc *desc; | ||
3966 | struct irq_cfg *cfg; | ||
3967 | int cpu = boot_cpu_id; | ||
3968 | |||
3716 | if (!IO_APIC_IRQ(irq)) { | 3969 | if (!IO_APIC_IRQ(irq)) { |
3717 | apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", | 3970 | apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", |
3718 | ioapic); | 3971 | ioapic); |
3719 | return -EINVAL; | 3972 | return -EINVAL; |
3720 | } | 3973 | } |
3721 | 3974 | ||
3975 | desc = irq_to_desc_alloc_cpu(irq, cpu); | ||
3976 | if (!desc) { | ||
3977 | printk(KERN_INFO "can not get irq_desc %d\n", irq); | ||
3978 | return 0; | ||
3979 | } | ||
3980 | |||
3722 | /* | 3981 | /* |
3723 | * IRQs < 16 are already in the irq_2_pin[] map | 3982 | * IRQs < 16 are already in the irq_2_pin[] map |
3724 | */ | 3983 | */ |
3725 | if (irq >= 16) | 3984 | if (irq >= NR_IRQS_LEGACY) { |
3726 | add_pin_to_irq(irq, ioapic, pin); | 3985 | cfg = desc->chip_data; |
3986 | add_pin_to_irq_cpu(cfg, cpu, ioapic, pin); | ||
3987 | } | ||
3727 | 3988 | ||
3728 | setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); | 3989 | setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity); |
3729 | 3990 | ||
3730 | return 0; | 3991 | return 0; |
3731 | } | 3992 | } |
@@ -3739,8 +4000,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) | |||
3739 | return -1; | 4000 | return -1; |
3740 | 4001 | ||
3741 | for (i = 0; i < mp_irq_entries; i++) | 4002 | for (i = 0; i < mp_irq_entries; i++) |
3742 | if (mp_irqs[i].mp_irqtype == mp_INT && | 4003 | if (mp_irqs[i].irqtype == mp_INT && |
3743 | mp_irqs[i].mp_srcbusirq == bus_irq) | 4004 | mp_irqs[i].srcbusirq == bus_irq) |
3744 | break; | 4005 | break; |
3745 | if (i >= mp_irq_entries) | 4006 | if (i >= mp_irq_entries) |
3746 | return -1; | 4007 | return -1; |
@@ -3755,13 +4016,15 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) | |||
3755 | /* | 4016 | /* |
3756 | * This function currently is only a helper for the i386 smp boot process where | 4017 | * This function currently is only a helper for the i386 smp boot process where |
3757 | * we need to reprogram the ioredtbls to cater for the cpus which have come online | 4018 | * we need to reprogram the ioredtbls to cater for the cpus which have come online |
3758 | * so mask in all cases should simply be TARGET_CPUS | 4019 | * so mask in all cases should simply be apic->target_cpus() |
3759 | */ | 4020 | */ |
3760 | #ifdef CONFIG_SMP | 4021 | #ifdef CONFIG_SMP |
3761 | void __init setup_ioapic_dest(void) | 4022 | void __init setup_ioapic_dest(void) |
3762 | { | 4023 | { |
3763 | int pin, ioapic, irq, irq_entry; | 4024 | int pin, ioapic, irq, irq_entry; |
4025 | struct irq_desc *desc; | ||
3764 | struct irq_cfg *cfg; | 4026 | struct irq_cfg *cfg; |
4027 | const struct cpumask *mask; | ||
3765 | 4028 | ||
3766 | if (skip_ioapic_setup == 1) | 4029 | if (skip_ioapic_setup == 1) |
3767 | return; | 4030 | return; |
@@ -3777,17 +4040,31 @@ void __init setup_ioapic_dest(void) | |||
3777 | * when you have too many devices, because at that time only boot | 4040 | * when you have too many devices, because at that time only boot |
3778 | * cpu is online. | 4041 | * cpu is online. |
3779 | */ | 4042 | */ |
3780 | cfg = irq_cfg(irq); | 4043 | desc = irq_to_desc(irq); |
3781 | if (!cfg->vector) | 4044 | cfg = desc->chip_data; |
3782 | setup_IO_APIC_irq(ioapic, pin, irq, | 4045 | if (!cfg->vector) { |
4046 | setup_IO_APIC_irq(ioapic, pin, irq, desc, | ||
3783 | irq_trigger(irq_entry), | 4047 | irq_trigger(irq_entry), |
3784 | irq_polarity(irq_entry)); | 4048 | irq_polarity(irq_entry)); |
4049 | continue; | ||
4050 | |||
4051 | } | ||
4052 | |||
4053 | /* | ||
4054 | * Honour affinities which have been set in early boot | ||
4055 | */ | ||
4056 | if (desc->status & | ||
4057 | (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) | ||
4058 | mask = desc->affinity; | ||
4059 | else | ||
4060 | mask = apic->target_cpus(); | ||
4061 | |||
3785 | #ifdef CONFIG_INTR_REMAP | 4062 | #ifdef CONFIG_INTR_REMAP |
3786 | else if (intr_remapping_enabled) | 4063 | if (intr_remapping_enabled) |
3787 | set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); | 4064 | set_ir_ioapic_affinity_irq_desc(desc, mask); |
3788 | #endif | ||
3789 | else | 4065 | else |
3790 | set_ioapic_affinity_irq(irq, TARGET_CPUS); | 4066 | #endif |
4067 | set_ioapic_affinity_irq_desc(desc, mask); | ||
3791 | } | 4068 | } |
3792 | 4069 | ||
3793 | } | 4070 | } |
@@ -3836,11 +4113,10 @@ void __init ioapic_init_mappings(void) | |||
3836 | struct resource *ioapic_res; | 4113 | struct resource *ioapic_res; |
3837 | int i; | 4114 | int i; |
3838 | 4115 | ||
3839 | irq_2_pin_init(); | ||
3840 | ioapic_res = ioapic_setup_resources(); | 4116 | ioapic_res = ioapic_setup_resources(); |
3841 | for (i = 0; i < nr_ioapics; i++) { | 4117 | for (i = 0; i < nr_ioapics; i++) { |
3842 | if (smp_found_config) { | 4118 | if (smp_found_config) { |
3843 | ioapic_phys = mp_ioapics[i].mp_apicaddr; | 4119 | ioapic_phys = mp_ioapics[i].apicaddr; |
3844 | #ifdef CONFIG_X86_32 | 4120 | #ifdef CONFIG_X86_32 |
3845 | if (!ioapic_phys) { | 4121 | if (!ioapic_phys) { |
3846 | printk(KERN_ERR | 4122 | printk(KERN_ERR |
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 191914302744..e41980a373ab 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c | |||
@@ -35,8 +35,8 @@ static void set_bitmap(unsigned long *bitmap, unsigned int base, | |||
35 | */ | 35 | */ |
36 | asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | 36 | asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) |
37 | { | 37 | { |
38 | struct thread_struct * t = ¤t->thread; | 38 | struct thread_struct *t = ¤t->thread; |
39 | struct tss_struct * tss; | 39 | struct tss_struct *tss; |
40 | unsigned int i, max_long, bytes, bytes_updated; | 40 | unsigned int i, max_long, bytes, bytes_updated; |
41 | 41 | ||
42 | if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) | 42 | if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) |
@@ -131,9 +131,8 @@ static int do_iopl(unsigned int level, struct pt_regs *regs) | |||
131 | } | 131 | } |
132 | 132 | ||
133 | #ifdef CONFIG_X86_32 | 133 | #ifdef CONFIG_X86_32 |
134 | asmlinkage long sys_iopl(unsigned long regsp) | 134 | long sys_iopl(struct pt_regs *regs) |
135 | { | 135 | { |
136 | struct pt_regs *regs = (struct pt_regs *)®sp; | ||
137 | unsigned int level = regs->bx; | 136 | unsigned int level = regs->bx; |
138 | struct thread_struct *t = ¤t->thread; | 137 | struct thread_struct *t = ¤t->thread; |
139 | int rc; | 138 | int rc; |
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index f1c688e46f35..dbf5445727a9 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c | |||
@@ -17,135 +17,121 @@ | |||
17 | #include <asm/mmu_context.h> | 17 | #include <asm/mmu_context.h> |
18 | #include <asm/apic.h> | 18 | #include <asm/apic.h> |
19 | #include <asm/proto.h> | 19 | #include <asm/proto.h> |
20 | #include <asm/ipi.h> | ||
20 | 21 | ||
21 | #ifdef CONFIG_X86_32 | 22 | void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector) |
22 | #include <mach_apic.h> | ||
23 | #include <mach_ipi.h> | ||
24 | |||
25 | /* | ||
26 | * the following functions deal with sending IPIs between CPUs. | ||
27 | * | ||
28 | * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. | ||
29 | */ | ||
30 | |||
31 | static inline int __prepare_ICR(unsigned int shortcut, int vector) | ||
32 | { | 23 | { |
33 | unsigned int icr = shortcut | APIC_DEST_LOGICAL; | 24 | unsigned long query_cpu; |
34 | 25 | unsigned long flags; | |
35 | switch (vector) { | 26 | |
36 | default: | 27 | /* |
37 | icr |= APIC_DM_FIXED | vector; | 28 | * Hack. The clustered APIC addressing mode doesn't allow us to send |
38 | break; | 29 | * to an arbitrary mask, so I do a unicast to each CPU instead. |
39 | case NMI_VECTOR: | 30 | * - mbligh |
40 | icr |= APIC_DM_NMI; | 31 | */ |
41 | break; | 32 | local_irq_save(flags); |
33 | for_each_cpu(query_cpu, mask) { | ||
34 | __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, | ||
35 | query_cpu), vector, APIC_DEST_PHYSICAL); | ||
42 | } | 36 | } |
43 | return icr; | 37 | local_irq_restore(flags); |
44 | } | 38 | } |
45 | 39 | ||
46 | static inline int __prepare_ICR2(unsigned int mask) | 40 | void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, |
41 | int vector) | ||
47 | { | 42 | { |
48 | return SET_APIC_DEST_FIELD(mask); | 43 | unsigned int this_cpu = smp_processor_id(); |
44 | unsigned int query_cpu; | ||
45 | unsigned long flags; | ||
46 | |||
47 | /* See Hack comment above */ | ||
48 | |||
49 | local_irq_save(flags); | ||
50 | for_each_cpu(query_cpu, mask) { | ||
51 | if (query_cpu == this_cpu) | ||
52 | continue; | ||
53 | __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, | ||
54 | query_cpu), vector, APIC_DEST_PHYSICAL); | ||
55 | } | ||
56 | local_irq_restore(flags); | ||
49 | } | 57 | } |
50 | 58 | ||
51 | void __send_IPI_shortcut(unsigned int shortcut, int vector) | 59 | void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, |
60 | int vector) | ||
52 | { | 61 | { |
53 | /* | 62 | unsigned long flags; |
54 | * Subtle. In the case of the 'never do double writes' workaround | 63 | unsigned int query_cpu; |
55 | * we have to lock out interrupts to be safe. As we don't care | ||
56 | * of the value read we use an atomic rmw access to avoid costly | ||
57 | * cli/sti. Otherwise we use an even cheaper single atomic write | ||
58 | * to the APIC. | ||
59 | */ | ||
60 | unsigned int cfg; | ||
61 | |||
62 | /* | ||
63 | * Wait for idle. | ||
64 | */ | ||
65 | apic_wait_icr_idle(); | ||
66 | 64 | ||
67 | /* | 65 | /* |
68 | * No need to touch the target chip field | 66 | * Hack. The clustered APIC addressing mode doesn't allow us to send |
67 | * to an arbitrary mask, so I do a unicasts to each CPU instead. This | ||
68 | * should be modified to do 1 message per cluster ID - mbligh | ||
69 | */ | 69 | */ |
70 | cfg = __prepare_ICR(shortcut, vector); | ||
71 | 70 | ||
72 | /* | 71 | local_irq_save(flags); |
73 | * Send the IPI. The write to APIC_ICR fires this off. | 72 | for_each_cpu(query_cpu, mask) |
74 | */ | 73 | __default_send_IPI_dest_field( |
75 | apic_write(APIC_ICR, cfg); | 74 | apic->cpu_to_logical_apicid(query_cpu), vector, |
75 | apic->dest_logical); | ||
76 | local_irq_restore(flags); | ||
76 | } | 77 | } |
77 | 78 | ||
78 | void send_IPI_self(int vector) | 79 | void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, |
80 | int vector) | ||
79 | { | 81 | { |
80 | __send_IPI_shortcut(APIC_DEST_SELF, vector); | 82 | unsigned long flags; |
81 | } | 83 | unsigned int query_cpu; |
82 | 84 | unsigned int this_cpu = smp_processor_id(); | |
83 | /* | ||
84 | * This is used to send an IPI with no shorthand notation (the destination is | ||
85 | * specified in bits 56 to 63 of the ICR). | ||
86 | */ | ||
87 | static inline void __send_IPI_dest_field(unsigned long mask, int vector) | ||
88 | { | ||
89 | unsigned long cfg; | ||
90 | |||
91 | /* | ||
92 | * Wait for idle. | ||
93 | */ | ||
94 | if (unlikely(vector == NMI_VECTOR)) | ||
95 | safe_apic_wait_icr_idle(); | ||
96 | else | ||
97 | apic_wait_icr_idle(); | ||
98 | |||
99 | /* | ||
100 | * prepare target chip field | ||
101 | */ | ||
102 | cfg = __prepare_ICR2(mask); | ||
103 | apic_write(APIC_ICR2, cfg); | ||
104 | 85 | ||
105 | /* | 86 | /* See Hack comment above */ |
106 | * program the ICR | ||
107 | */ | ||
108 | cfg = __prepare_ICR(0, vector); | ||
109 | 87 | ||
110 | /* | 88 | local_irq_save(flags); |
111 | * Send the IPI. The write to APIC_ICR fires this off. | 89 | for_each_cpu(query_cpu, mask) { |
112 | */ | 90 | if (query_cpu == this_cpu) |
113 | apic_write(APIC_ICR, cfg); | 91 | continue; |
92 | __default_send_IPI_dest_field( | ||
93 | apic->cpu_to_logical_apicid(query_cpu), vector, | ||
94 | apic->dest_logical); | ||
95 | } | ||
96 | local_irq_restore(flags); | ||
114 | } | 97 | } |
115 | 98 | ||
99 | #ifdef CONFIG_X86_32 | ||
100 | |||
116 | /* | 101 | /* |
117 | * This is only used on smaller machines. | 102 | * This is only used on smaller machines. |
118 | */ | 103 | */ |
119 | void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) | 104 | void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) |
120 | { | 105 | { |
121 | unsigned long mask = cpus_addr(cpumask)[0]; | 106 | unsigned long mask = cpumask_bits(cpumask)[0]; |
122 | unsigned long flags; | 107 | unsigned long flags; |
123 | 108 | ||
124 | local_irq_save(flags); | 109 | local_irq_save(flags); |
125 | WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); | 110 | WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); |
126 | __send_IPI_dest_field(mask, vector); | 111 | __default_send_IPI_dest_field(mask, vector, apic->dest_logical); |
127 | local_irq_restore(flags); | 112 | local_irq_restore(flags); |
128 | } | 113 | } |
129 | 114 | ||
130 | void send_IPI_mask_sequence(cpumask_t mask, int vector) | 115 | void default_send_IPI_allbutself(int vector) |
131 | { | 116 | { |
132 | unsigned long flags; | ||
133 | unsigned int query_cpu; | ||
134 | |||
135 | /* | 117 | /* |
136 | * Hack. The clustered APIC addressing mode doesn't allow us to send | 118 | * if there are no other CPUs in the system then we get an APIC send |
137 | * to an arbitrary mask, so I do a unicasts to each CPU instead. This | 119 | * error if we try to broadcast, thus avoid sending IPIs in this case. |
138 | * should be modified to do 1 message per cluster ID - mbligh | ||
139 | */ | 120 | */ |
121 | if (!(num_online_cpus() > 1)) | ||
122 | return; | ||
140 | 123 | ||
141 | local_irq_save(flags); | 124 | __default_local_send_IPI_allbutself(vector); |
142 | for_each_possible_cpu(query_cpu) { | 125 | } |
143 | if (cpu_isset(query_cpu, mask)) { | 126 | |
144 | __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), | 127 | void default_send_IPI_all(int vector) |
145 | vector); | 128 | { |
146 | } | 129 | __default_local_send_IPI_all(vector); |
147 | } | 130 | } |
148 | local_irq_restore(flags); | 131 | |
132 | void default_send_IPI_self(int vector) | ||
133 | { | ||
134 | __default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical); | ||
149 | } | 135 | } |
150 | 136 | ||
151 | /* must come after the send_IPI functions above for inlining */ | 137 | /* must come after the send_IPI functions above for inlining */ |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index d1d4dc52f649..f13ca1650aaf 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -5,10 +5,13 @@ | |||
5 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
6 | #include <linux/kernel_stat.h> | 6 | #include <linux/kernel_stat.h> |
7 | #include <linux/seq_file.h> | 7 | #include <linux/seq_file.h> |
8 | #include <linux/smp.h> | ||
9 | #include <linux/ftrace.h> | ||
8 | 10 | ||
9 | #include <asm/apic.h> | 11 | #include <asm/apic.h> |
10 | #include <asm/io_apic.h> | 12 | #include <asm/io_apic.h> |
11 | #include <asm/smp.h> | 13 | #include <asm/irq.h> |
14 | #include <asm/idle.h> | ||
12 | 15 | ||
13 | atomic_t irq_err_count; | 16 | atomic_t irq_err_count; |
14 | 17 | ||
@@ -35,11 +38,7 @@ void ack_bad_irq(unsigned int irq) | |||
35 | #endif | 38 | #endif |
36 | } | 39 | } |
37 | 40 | ||
38 | #ifdef CONFIG_X86_32 | 41 | #define irq_stats(x) (&per_cpu(irq_stat, x)) |
39 | # define irq_stats(x) (&per_cpu(irq_stat, x)) | ||
40 | #else | ||
41 | # define irq_stats(x) cpu_pda(x) | ||
42 | #endif | ||
43 | /* | 42 | /* |
44 | * /proc/interrupts printing: | 43 | * /proc/interrupts printing: |
45 | */ | 44 | */ |
@@ -118,6 +117,9 @@ int show_interrupts(struct seq_file *p, void *v) | |||
118 | } | 117 | } |
119 | 118 | ||
120 | desc = irq_to_desc(i); | 119 | desc = irq_to_desc(i); |
120 | if (!desc) | ||
121 | return 0; | ||
122 | |||
121 | spin_lock_irqsave(&desc->lock, flags); | 123 | spin_lock_irqsave(&desc->lock, flags); |
122 | #ifndef CONFIG_SMP | 124 | #ifndef CONFIG_SMP |
123 | any_count = kstat_irqs(i); | 125 | any_count = kstat_irqs(i); |
@@ -187,3 +189,41 @@ u64 arch_irq_stat(void) | |||
187 | #endif | 189 | #endif |
188 | return sum; | 190 | return sum; |
189 | } | 191 | } |
192 | |||
193 | |||
194 | /* | ||
195 | * do_IRQ handles all normal device IRQ's (the special | ||
196 | * SMP cross-CPU interrupts have their own specific | ||
197 | * handlers). | ||
198 | */ | ||
199 | unsigned int __irq_entry do_IRQ(struct pt_regs *regs) | ||
200 | { | ||
201 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
202 | |||
203 | /* high bit used in ret_from_ code */ | ||
204 | unsigned vector = ~regs->orig_ax; | ||
205 | unsigned irq; | ||
206 | |||
207 | exit_idle(); | ||
208 | irq_enter(); | ||
209 | |||
210 | irq = __get_cpu_var(vector_irq)[vector]; | ||
211 | |||
212 | if (!handle_irq(irq, regs)) { | ||
213 | #ifdef CONFIG_X86_64 | ||
214 | if (!disable_apic) | ||
215 | ack_APIC_irq(); | ||
216 | #endif | ||
217 | |||
218 | if (printk_ratelimit()) | ||
219 | printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n", | ||
220 | __func__, smp_processor_id(), vector, irq); | ||
221 | } | ||
222 | |||
223 | irq_exit(); | ||
224 | |||
225 | set_irq_regs(old_regs); | ||
226 | return 1; | ||
227 | } | ||
228 | |||
229 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); | ||
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index a51382672de0..4beb9a13873d 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -15,9 +15,9 @@ | |||
15 | #include <linux/notifier.h> | 15 | #include <linux/notifier.h> |
16 | #include <linux/cpu.h> | 16 | #include <linux/cpu.h> |
17 | #include <linux/delay.h> | 17 | #include <linux/delay.h> |
18 | #include <linux/uaccess.h> | ||
18 | 19 | ||
19 | #include <asm/apic.h> | 20 | #include <asm/apic.h> |
20 | #include <asm/uaccess.h> | ||
21 | 21 | ||
22 | DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); | 22 | DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); |
23 | EXPORT_PER_CPU_SYMBOL(irq_stat); | 23 | EXPORT_PER_CPU_SYMBOL(irq_stat); |
@@ -93,7 +93,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) | |||
93 | return 0; | 93 | return 0; |
94 | 94 | ||
95 | /* build the stack frame on the IRQ stack */ | 95 | /* build the stack frame on the IRQ stack */ |
96 | isp = (u32 *) ((char*)irqctx + sizeof(*irqctx)); | 96 | isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); |
97 | irqctx->tinfo.task = curctx->tinfo.task; | 97 | irqctx->tinfo.task = curctx->tinfo.task; |
98 | irqctx->tinfo.previous_esp = current_stack_pointer; | 98 | irqctx->tinfo.previous_esp = current_stack_pointer; |
99 | 99 | ||
@@ -137,7 +137,7 @@ void __cpuinit irq_ctx_init(int cpu) | |||
137 | 137 | ||
138 | hardirq_ctx[cpu] = irqctx; | 138 | hardirq_ctx[cpu] = irqctx; |
139 | 139 | ||
140 | irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE]; | 140 | irqctx = (union irq_ctx *) &softirq_stack[cpu*THREAD_SIZE]; |
141 | irqctx->tinfo.task = NULL; | 141 | irqctx->tinfo.task = NULL; |
142 | irqctx->tinfo.exec_domain = NULL; | 142 | irqctx->tinfo.exec_domain = NULL; |
143 | irqctx->tinfo.cpu = cpu; | 143 | irqctx->tinfo.cpu = cpu; |
@@ -147,7 +147,7 @@ void __cpuinit irq_ctx_init(int cpu) | |||
147 | softirq_ctx[cpu] = irqctx; | 147 | softirq_ctx[cpu] = irqctx; |
148 | 148 | ||
149 | printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", | 149 | printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", |
150 | cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); | 150 | cpu, hardirq_ctx[cpu], softirq_ctx[cpu]); |
151 | } | 151 | } |
152 | 152 | ||
153 | void irq_ctx_exit(int cpu) | 153 | void irq_ctx_exit(int cpu) |
@@ -174,7 +174,7 @@ asmlinkage void do_softirq(void) | |||
174 | irqctx->tinfo.previous_esp = current_stack_pointer; | 174 | irqctx->tinfo.previous_esp = current_stack_pointer; |
175 | 175 | ||
176 | /* build the stack frame on the softirq stack */ | 176 | /* build the stack frame on the softirq stack */ |
177 | isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); | 177 | isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); |
178 | 178 | ||
179 | call_on_stack(__do_softirq, isp); | 179 | call_on_stack(__do_softirq, isp); |
180 | /* | 180 | /* |
@@ -191,33 +191,16 @@ static inline int | |||
191 | execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; } | 191 | execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; } |
192 | #endif | 192 | #endif |
193 | 193 | ||
194 | /* | 194 | bool handle_irq(unsigned irq, struct pt_regs *regs) |
195 | * do_IRQ handles all normal device IRQ's (the special | ||
196 | * SMP cross-CPU interrupts have their own specific | ||
197 | * handlers). | ||
198 | */ | ||
199 | unsigned int do_IRQ(struct pt_regs *regs) | ||
200 | { | 195 | { |
201 | struct pt_regs *old_regs; | ||
202 | /* high bit used in ret_from_ code */ | ||
203 | int overflow; | ||
204 | unsigned vector = ~regs->orig_ax; | ||
205 | struct irq_desc *desc; | 196 | struct irq_desc *desc; |
206 | unsigned irq; | 197 | int overflow; |
207 | |||
208 | |||
209 | old_regs = set_irq_regs(regs); | ||
210 | irq_enter(); | ||
211 | irq = __get_cpu_var(vector_irq)[vector]; | ||
212 | 198 | ||
213 | overflow = check_stack_overflow(); | 199 | overflow = check_stack_overflow(); |
214 | 200 | ||
215 | desc = irq_to_desc(irq); | 201 | desc = irq_to_desc(irq); |
216 | if (unlikely(!desc)) { | 202 | if (unlikely(!desc)) |
217 | printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n", | 203 | return false; |
218 | __func__, irq, vector, smp_processor_id()); | ||
219 | BUG(); | ||
220 | } | ||
221 | 204 | ||
222 | if (!execute_on_irq_stack(overflow, desc, irq)) { | 205 | if (!execute_on_irq_stack(overflow, desc, irq)) { |
223 | if (unlikely(overflow)) | 206 | if (unlikely(overflow)) |
@@ -225,33 +208,34 @@ unsigned int do_IRQ(struct pt_regs *regs) | |||
225 | desc->handle_irq(irq, desc); | 208 | desc->handle_irq(irq, desc); |
226 | } | 209 | } |
227 | 210 | ||
228 | irq_exit(); | 211 | return true; |
229 | set_irq_regs(old_regs); | ||
230 | return 1; | ||
231 | } | 212 | } |
232 | 213 | ||
233 | #ifdef CONFIG_HOTPLUG_CPU | 214 | #ifdef CONFIG_HOTPLUG_CPU |
234 | #include <mach_apic.h> | 215 | #include <asm/genapic.h> |
235 | 216 | ||
236 | void fixup_irqs(cpumask_t map) | 217 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ |
218 | void fixup_irqs(void) | ||
237 | { | 219 | { |
238 | unsigned int irq; | 220 | unsigned int irq; |
239 | static int warned; | 221 | static int warned; |
240 | struct irq_desc *desc; | 222 | struct irq_desc *desc; |
241 | 223 | ||
242 | for_each_irq_desc(irq, desc) { | 224 | for_each_irq_desc(irq, desc) { |
243 | cpumask_t mask; | 225 | const struct cpumask *affinity; |
244 | 226 | ||
227 | if (!desc) | ||
228 | continue; | ||
245 | if (irq == 2) | 229 | if (irq == 2) |
246 | continue; | 230 | continue; |
247 | 231 | ||
248 | cpus_and(mask, desc->affinity, map); | 232 | affinity = desc->affinity; |
249 | if (any_online_cpu(mask) == NR_CPUS) { | 233 | if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { |
250 | printk("Breaking affinity for irq %i\n", irq); | 234 | printk("Breaking affinity for irq %i\n", irq); |
251 | mask = map; | 235 | affinity = cpu_all_mask; |
252 | } | 236 | } |
253 | if (desc->chip->set_affinity) | 237 | if (desc->chip->set_affinity) |
254 | desc->chip->set_affinity(irq, mask); | 238 | desc->chip->set_affinity(irq, affinity); |
255 | else if (desc->action && !(warned++)) | 239 | else if (desc->action && !(warned++)) |
256 | printk("Cannot set affinity for irq %i\n", irq); | 240 | printk("Cannot set affinity for irq %i\n", irq); |
257 | } | 241 | } |
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 60eb84eb77a0..977d8b43a0dd 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
@@ -13,12 +13,19 @@ | |||
13 | #include <linux/seq_file.h> | 13 | #include <linux/seq_file.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/delay.h> | 15 | #include <linux/delay.h> |
16 | #include <asm/uaccess.h> | 16 | #include <linux/ftrace.h> |
17 | #include <linux/uaccess.h> | ||
18 | #include <linux/smp.h> | ||
17 | #include <asm/io_apic.h> | 19 | #include <asm/io_apic.h> |
18 | #include <asm/idle.h> | 20 | #include <asm/idle.h> |
19 | #include <asm/smp.h> | 21 | #include <asm/apic.h> |
22 | |||
23 | DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); | ||
24 | EXPORT_PER_CPU_SYMBOL(irq_stat); | ||
25 | |||
26 | DEFINE_PER_CPU(struct pt_regs *, irq_regs); | ||
27 | EXPORT_PER_CPU_SYMBOL(irq_regs); | ||
20 | 28 | ||
21 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | ||
22 | /* | 29 | /* |
23 | * Probabilistic stack overflow check: | 30 | * Probabilistic stack overflow check: |
24 | * | 31 | * |
@@ -28,95 +35,71 @@ | |||
28 | */ | 35 | */ |
29 | static inline void stack_overflow_check(struct pt_regs *regs) | 36 | static inline void stack_overflow_check(struct pt_regs *regs) |
30 | { | 37 | { |
38 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | ||
31 | u64 curbase = (u64)task_stack_page(current); | 39 | u64 curbase = (u64)task_stack_page(current); |
32 | static unsigned long warned = -60*HZ; | 40 | |
33 | 41 | WARN_ONCE(regs->sp >= curbase && | |
34 | if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE && | 42 | regs->sp <= curbase + THREAD_SIZE && |
35 | regs->sp < curbase + sizeof(struct thread_info) + 128 && | 43 | regs->sp < curbase + sizeof(struct thread_info) + |
36 | time_after(jiffies, warned + 60*HZ)) { | 44 | sizeof(struct pt_regs) + 128, |
37 | printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", | 45 | |
38 | current->comm, curbase, regs->sp); | 46 | "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", |
39 | show_stack(NULL,NULL); | 47 | current->comm, curbase, regs->sp); |
40 | warned = jiffies; | ||
41 | } | ||
42 | } | ||
43 | #endif | 48 | #endif |
49 | } | ||
44 | 50 | ||
45 | /* | 51 | bool handle_irq(unsigned irq, struct pt_regs *regs) |
46 | * do_IRQ handles all normal device IRQ's (the special | ||
47 | * SMP cross-CPU interrupts have their own specific | ||
48 | * handlers). | ||
49 | */ | ||
50 | asmlinkage unsigned int do_IRQ(struct pt_regs *regs) | ||
51 | { | 52 | { |
52 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
53 | struct irq_desc *desc; | 53 | struct irq_desc *desc; |
54 | 54 | ||
55 | /* high bit used in ret_from_ code */ | ||
56 | unsigned vector = ~regs->orig_ax; | ||
57 | unsigned irq; | ||
58 | |||
59 | exit_idle(); | ||
60 | irq_enter(); | ||
61 | irq = __get_cpu_var(vector_irq)[vector]; | ||
62 | |||
63 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | ||
64 | stack_overflow_check(regs); | 55 | stack_overflow_check(regs); |
65 | #endif | ||
66 | 56 | ||
67 | desc = irq_to_desc(irq); | 57 | desc = irq_to_desc(irq); |
68 | if (likely(desc)) | 58 | if (unlikely(!desc)) |
69 | generic_handle_irq_desc(irq, desc); | 59 | return false; |
70 | else { | ||
71 | if (!disable_apic) | ||
72 | ack_APIC_irq(); | ||
73 | |||
74 | if (printk_ratelimit()) | ||
75 | printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n", | ||
76 | __func__, smp_processor_id(), vector); | ||
77 | } | ||
78 | 60 | ||
79 | irq_exit(); | 61 | generic_handle_irq_desc(irq, desc); |
80 | 62 | return true; | |
81 | set_irq_regs(old_regs); | ||
82 | return 1; | ||
83 | } | 63 | } |
84 | 64 | ||
85 | #ifdef CONFIG_HOTPLUG_CPU | 65 | #ifdef CONFIG_HOTPLUG_CPU |
86 | void fixup_irqs(cpumask_t map) | 66 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ |
67 | void fixup_irqs(void) | ||
87 | { | 68 | { |
88 | unsigned int irq; | 69 | unsigned int irq; |
89 | static int warned; | 70 | static int warned; |
90 | struct irq_desc *desc; | 71 | struct irq_desc *desc; |
91 | 72 | ||
92 | for_each_irq_desc(irq, desc) { | 73 | for_each_irq_desc(irq, desc) { |
93 | cpumask_t mask; | ||
94 | int break_affinity = 0; | 74 | int break_affinity = 0; |
95 | int set_affinity = 1; | 75 | int set_affinity = 1; |
76 | const struct cpumask *affinity; | ||
96 | 77 | ||
78 | if (!desc) | ||
79 | continue; | ||
97 | if (irq == 2) | 80 | if (irq == 2) |
98 | continue; | 81 | continue; |
99 | 82 | ||
100 | /* interrupt's are disabled at this point */ | 83 | /* interrupt's are disabled at this point */ |
101 | spin_lock(&desc->lock); | 84 | spin_lock(&desc->lock); |
102 | 85 | ||
86 | affinity = desc->affinity; | ||
103 | if (!irq_has_action(irq) || | 87 | if (!irq_has_action(irq) || |
104 | cpus_equal(desc->affinity, map)) { | 88 | cpumask_equal(affinity, cpu_online_mask)) { |
105 | spin_unlock(&desc->lock); | 89 | spin_unlock(&desc->lock); |
106 | continue; | 90 | continue; |
107 | } | 91 | } |
108 | 92 | ||
109 | cpus_and(mask, desc->affinity, map); | 93 | if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { |
110 | if (cpus_empty(mask)) { | ||
111 | break_affinity = 1; | 94 | break_affinity = 1; |
112 | mask = map; | 95 | affinity = cpu_all_mask; |
113 | } | 96 | } |
114 | 97 | ||
115 | if (desc->chip->mask) | 98 | if (desc->chip->mask) |
116 | desc->chip->mask(irq); | 99 | desc->chip->mask(irq); |
117 | 100 | ||
118 | if (desc->chip->set_affinity) | 101 | if (desc->chip->set_affinity) |
119 | desc->chip->set_affinity(irq, mask); | 102 | desc->chip->set_affinity(irq, affinity); |
120 | else if (!(warned++)) | 103 | else if (!(warned++)) |
121 | set_affinity = 0; | 104 | set_affinity = 0; |
122 | 105 | ||
@@ -142,18 +125,18 @@ extern void call_softirq(void); | |||
142 | 125 | ||
143 | asmlinkage void do_softirq(void) | 126 | asmlinkage void do_softirq(void) |
144 | { | 127 | { |
145 | __u32 pending; | 128 | __u32 pending; |
146 | unsigned long flags; | 129 | unsigned long flags; |
147 | 130 | ||
148 | if (in_interrupt()) | 131 | if (in_interrupt()) |
149 | return; | 132 | return; |
150 | 133 | ||
151 | local_irq_save(flags); | 134 | local_irq_save(flags); |
152 | pending = local_softirq_pending(); | 135 | pending = local_softirq_pending(); |
153 | /* Switch to interrupt stack */ | 136 | /* Switch to interrupt stack */ |
154 | if (pending) { | 137 | if (pending) { |
155 | call_softirq(); | 138 | call_softirq(); |
156 | WARN_ON_ONCE(softirq_count()); | 139 | WARN_ON_ONCE(softirq_count()); |
157 | } | 140 | } |
158 | local_irq_restore(flags); | 141 | local_irq_restore(flags); |
159 | } | 142 | } |
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 845aa9803e80..bf629cadec1a 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c | |||
@@ -9,18 +9,18 @@ | |||
9 | #include <linux/kernel_stat.h> | 9 | #include <linux/kernel_stat.h> |
10 | #include <linux/sysdev.h> | 10 | #include <linux/sysdev.h> |
11 | #include <linux/bitops.h> | 11 | #include <linux/bitops.h> |
12 | #include <linux/io.h> | ||
13 | #include <linux/delay.h> | ||
12 | 14 | ||
13 | #include <asm/atomic.h> | 15 | #include <asm/atomic.h> |
14 | #include <asm/system.h> | 16 | #include <asm/system.h> |
15 | #include <asm/io.h> | ||
16 | #include <asm/timer.h> | 17 | #include <asm/timer.h> |
17 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
18 | #include <asm/delay.h> | ||
19 | #include <asm/desc.h> | 19 | #include <asm/desc.h> |
20 | #include <asm/apic.h> | 20 | #include <asm/apic.h> |
21 | #include <asm/arch_hooks.h> | 21 | #include <asm/arch_hooks.h> |
22 | #include <asm/i8259.h> | 22 | #include <asm/i8259.h> |
23 | 23 | #include <asm/traps.h> | |
24 | 24 | ||
25 | 25 | ||
26 | /* | 26 | /* |
@@ -34,12 +34,10 @@ | |||
34 | * leads to races. IBM designers who came up with it should | 34 | * leads to races. IBM designers who came up with it should |
35 | * be shot. | 35 | * be shot. |
36 | */ | 36 | */ |
37 | |||
38 | 37 | ||
39 | static irqreturn_t math_error_irq(int cpl, void *dev_id) | 38 | static irqreturn_t math_error_irq(int cpl, void *dev_id) |
40 | { | 39 | { |
41 | extern void math_error(void __user *); | 40 | outb(0, 0xF0); |
42 | outb(0,0xF0); | ||
43 | if (ignore_fpu_irq || !boot_cpu_data.hard_math) | 41 | if (ignore_fpu_irq || !boot_cpu_data.hard_math) |
44 | return IRQ_NONE; | 42 | return IRQ_NONE; |
45 | math_error((void __user *)get_irq_regs()->ip); | 43 | math_error((void __user *)get_irq_regs()->ip); |
@@ -56,7 +54,7 @@ static struct irqaction fpu_irq = { | |||
56 | .name = "fpu", | 54 | .name = "fpu", |
57 | }; | 55 | }; |
58 | 56 | ||
59 | void __init init_ISA_irqs (void) | 57 | void __init init_ISA_irqs(void) |
60 | { | 58 | { |
61 | int i; | 59 | int i; |
62 | 60 | ||
@@ -68,8 +66,7 @@ void __init init_ISA_irqs (void) | |||
68 | /* | 66 | /* |
69 | * 16 old-style INTA-cycle interrupts: | 67 | * 16 old-style INTA-cycle interrupts: |
70 | */ | 68 | */ |
71 | for (i = 0; i < 16; i++) { | 69 | for (i = 0; i < NR_IRQS_LEGACY; i++) { |
72 | /* first time call this irq_desc */ | ||
73 | struct irq_desc *desc = irq_to_desc(i); | 70 | struct irq_desc *desc = irq_to_desc(i); |
74 | 71 | ||
75 | desc->status = IRQ_DISABLED; | 72 | desc->status = IRQ_DISABLED; |
@@ -111,6 +108,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { | |||
111 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 | 108 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 |
112 | }; | 109 | }; |
113 | 110 | ||
111 | int vector_used_by_percpu_irq(unsigned int vector) | ||
112 | { | ||
113 | int cpu; | ||
114 | |||
115 | for_each_online_cpu(cpu) { | ||
116 | if (per_cpu(vector_irq, cpu)[vector] != -1) | ||
117 | return 1; | ||
118 | } | ||
119 | |||
120 | return 0; | ||
121 | } | ||
122 | |||
114 | /* Overridden in paravirt.c */ | 123 | /* Overridden in paravirt.c */ |
115 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); | 124 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); |
116 | 125 | ||
@@ -129,7 +138,7 @@ void __init native_init_IRQ(void) | |||
129 | for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { | 138 | for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { |
130 | /* SYSCALL_VECTOR was reserved in trap_init. */ | 139 | /* SYSCALL_VECTOR was reserved in trap_init. */ |
131 | if (i != SYSCALL_VECTOR) | 140 | if (i != SYSCALL_VECTOR) |
132 | set_intr_gate(i, interrupt[i]); | 141 | set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); |
133 | } | 142 | } |
134 | 143 | ||
135 | 144 | ||
@@ -140,17 +149,26 @@ void __init native_init_IRQ(void) | |||
140 | */ | 149 | */ |
141 | alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); | 150 | alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); |
142 | 151 | ||
143 | /* IPI for invalidation */ | 152 | /* IPIs for invalidation */ |
144 | alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); | 153 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); |
154 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); | ||
155 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); | ||
156 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); | ||
157 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); | ||
158 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); | ||
159 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); | ||
160 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); | ||
145 | 161 | ||
146 | /* IPI for generic function call */ | 162 | /* IPI for generic function call */ |
147 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | 163 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); |
148 | 164 | ||
149 | /* IPI for single call function */ | 165 | /* IPI for single call function */ |
150 | set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); | 166 | alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, |
167 | call_function_single_interrupt); | ||
151 | 168 | ||
152 | /* Low priority IPI to cleanup after moving an irq */ | 169 | /* Low priority IPI to cleanup after moving an irq */ |
153 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); | 170 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); |
171 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); | ||
154 | #endif | 172 | #endif |
155 | 173 | ||
156 | #ifdef CONFIG_X86_LOCAL_APIC | 174 | #ifdef CONFIG_X86_LOCAL_APIC |
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index ff0235391285..da481a1e3f30 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c | |||
@@ -11,54 +11,19 @@ | |||
11 | #include <linux/kernel_stat.h> | 11 | #include <linux/kernel_stat.h> |
12 | #include <linux/sysdev.h> | 12 | #include <linux/sysdev.h> |
13 | #include <linux/bitops.h> | 13 | #include <linux/bitops.h> |
14 | #include <linux/acpi.h> | ||
15 | #include <linux/io.h> | ||
16 | #include <linux/delay.h> | ||
14 | 17 | ||
15 | #include <asm/acpi.h> | ||
16 | #include <asm/atomic.h> | 18 | #include <asm/atomic.h> |
17 | #include <asm/system.h> | 19 | #include <asm/system.h> |
18 | #include <asm/io.h> | ||
19 | #include <asm/hw_irq.h> | 20 | #include <asm/hw_irq.h> |
20 | #include <asm/pgtable.h> | 21 | #include <asm/pgtable.h> |
21 | #include <asm/delay.h> | ||
22 | #include <asm/desc.h> | 22 | #include <asm/desc.h> |
23 | #include <asm/apic.h> | 23 | #include <asm/apic.h> |
24 | #include <asm/i8259.h> | 24 | #include <asm/i8259.h> |
25 | 25 | ||
26 | /* | 26 | /* |
27 | * Common place to define all x86 IRQ vectors | ||
28 | * | ||
29 | * This builds up the IRQ handler stubs using some ugly macros in irq.h | ||
30 | * | ||
31 | * These macros create the low-level assembly IRQ routines that save | ||
32 | * register context and call do_IRQ(). do_IRQ() then does all the | ||
33 | * operations that are needed to keep the AT (or SMP IOAPIC) | ||
34 | * interrupt-controller happy. | ||
35 | */ | ||
36 | |||
37 | #define IRQ_NAME2(nr) nr##_interrupt(void) | ||
38 | #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) | ||
39 | |||
40 | /* | ||
41 | * SMP has a few special interrupts for IPI messages | ||
42 | */ | ||
43 | |||
44 | #define BUILD_IRQ(nr) \ | ||
45 | asmlinkage void IRQ_NAME(nr); \ | ||
46 | asm("\n.text\n.p2align\n" \ | ||
47 | "IRQ" #nr "_interrupt:\n\t" \ | ||
48 | "push $~(" #nr ") ; " \ | ||
49 | "jmp common_interrupt\n" \ | ||
50 | ".previous"); | ||
51 | |||
52 | #define BI(x,y) \ | ||
53 | BUILD_IRQ(x##y) | ||
54 | |||
55 | #define BUILD_16_IRQS(x) \ | ||
56 | BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ | ||
57 | BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ | ||
58 | BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ | ||
59 | BI(x,c) BI(x,d) BI(x,e) BI(x,f) | ||
60 | |||
61 | /* | ||
62 | * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: | 27 | * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: |
63 | * (these are usually mapped to vectors 0x30-0x3f) | 28 | * (these are usually mapped to vectors 0x30-0x3f) |
64 | */ | 29 | */ |
@@ -73,37 +38,6 @@ | |||
73 | * | 38 | * |
74 | * (these are usually mapped into the 0x30-0xff vector range) | 39 | * (these are usually mapped into the 0x30-0xff vector range) |
75 | */ | 40 | */ |
76 | BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3) | ||
77 | BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7) | ||
78 | BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb) | ||
79 | BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) | ||
80 | |||
81 | #undef BUILD_16_IRQS | ||
82 | #undef BI | ||
83 | |||
84 | |||
85 | #define IRQ(x,y) \ | ||
86 | IRQ##x##y##_interrupt | ||
87 | |||
88 | #define IRQLIST_16(x) \ | ||
89 | IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ | ||
90 | IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ | ||
91 | IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ | ||
92 | IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) | ||
93 | |||
94 | /* for the irq vectors */ | ||
95 | static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { | ||
96 | IRQLIST_16(0x2), IRQLIST_16(0x3), | ||
97 | IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), | ||
98 | IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), | ||
99 | IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf) | ||
100 | }; | ||
101 | |||
102 | #undef IRQ | ||
103 | #undef IRQLIST_16 | ||
104 | |||
105 | |||
106 | |||
107 | 41 | ||
108 | /* | 42 | /* |
109 | * IRQ2 is cascade interrupt to second interrupt controller | 43 | * IRQ2 is cascade interrupt to second interrupt controller |
@@ -135,15 +69,26 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { | |||
135 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 | 69 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 |
136 | }; | 70 | }; |
137 | 71 | ||
138 | void __init init_ISA_irqs(void) | 72 | int vector_used_by_percpu_irq(unsigned int vector) |
73 | { | ||
74 | int cpu; | ||
75 | |||
76 | for_each_online_cpu(cpu) { | ||
77 | if (per_cpu(vector_irq, cpu)[vector] != -1) | ||
78 | return 1; | ||
79 | } | ||
80 | |||
81 | return 0; | ||
82 | } | ||
83 | |||
84 | static void __init init_ISA_irqs(void) | ||
139 | { | 85 | { |
140 | int i; | 86 | int i; |
141 | 87 | ||
142 | init_bsp_APIC(); | 88 | init_bsp_APIC(); |
143 | init_8259A(0); | 89 | init_8259A(0); |
144 | 90 | ||
145 | for (i = 0; i < 16; i++) { | 91 | for (i = 0; i < NR_IRQS_LEGACY; i++) { |
146 | /* first time call this irq_desc */ | ||
147 | struct irq_desc *desc = irq_to_desc(i); | 92 | struct irq_desc *desc = irq_to_desc(i); |
148 | 93 | ||
149 | desc->status = IRQ_DISABLED; | 94 | desc->status = IRQ_DISABLED; |
@@ -188,6 +133,7 @@ static void __init smp_intr_init(void) | |||
188 | 133 | ||
189 | /* Low priority IPI to cleanup after moving an irq */ | 134 | /* Low priority IPI to cleanup after moving an irq */ |
190 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); | 135 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); |
136 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); | ||
191 | #endif | 137 | #endif |
192 | } | 138 | } |
193 | 139 | ||
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 10435a120d22..5c4f55483849 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -46,7 +46,7 @@ | |||
46 | #include <asm/apicdef.h> | 46 | #include <asm/apicdef.h> |
47 | #include <asm/system.h> | 47 | #include <asm/system.h> |
48 | 48 | ||
49 | #include <mach_ipi.h> | 49 | #include <asm/genapic.h> |
50 | 50 | ||
51 | /* | 51 | /* |
52 | * Put the error code here just in case the user cares: | 52 | * Put the error code here just in case the user cares: |
@@ -347,7 +347,7 @@ void kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code) | |||
347 | */ | 347 | */ |
348 | void kgdb_roundup_cpus(unsigned long flags) | 348 | void kgdb_roundup_cpus(unsigned long flags) |
349 | { | 349 | { |
350 | send_IPI_allbutself(APIC_DM_NMI); | 350 | apic->send_IPI_allbutself(APIC_DM_NMI); |
351 | } | 351 | } |
352 | #endif | 352 | #endif |
353 | 353 | ||
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 6c27679ec6aa..e948b28a5a9a 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -376,9 +376,10 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) | |||
376 | 376 | ||
377 | void __kprobes arch_remove_kprobe(struct kprobe *p) | 377 | void __kprobes arch_remove_kprobe(struct kprobe *p) |
378 | { | 378 | { |
379 | mutex_lock(&kprobe_mutex); | 379 | if (p->ainsn.insn) { |
380 | free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); | 380 | free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); |
381 | mutex_unlock(&kprobe_mutex); | 381 | p->ainsn.insn = NULL; |
382 | } | ||
382 | } | 383 | } |
383 | 384 | ||
384 | static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) | 385 | static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) |
@@ -445,7 +446,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | |||
445 | static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, | 446 | static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, |
446 | struct kprobe_ctlblk *kcb) | 447 | struct kprobe_ctlblk *kcb) |
447 | { | 448 | { |
448 | #if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM) | 449 | #if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER) |
449 | if (p->ainsn.boostable == 1 && !p->post_handler) { | 450 | if (p->ainsn.boostable == 1 && !p->post_handler) { |
450 | /* Boost up -- we can execute copied instructions directly */ | 451 | /* Boost up -- we can execute copied instructions directly */ |
451 | reset_current_kprobe(); | 452 | reset_current_kprobe(); |
@@ -694,7 +695,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
694 | /* | 695 | /* |
695 | * It is possible to have multiple instances associated with a given | 696 | * It is possible to have multiple instances associated with a given |
696 | * task either because multiple functions in the call path have | 697 | * task either because multiple functions in the call path have |
697 | * return probes installed on them, and/or more then one | 698 | * return probes installed on them, and/or more than one |
698 | * return probe was registered for a target function. | 699 | * return probe was registered for a target function. |
699 | * | 700 | * |
700 | * We can handle this because: | 701 | * We can handle this because: |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 774ac4991568..652fce6d2cce 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -89,17 +89,17 @@ static cycle_t kvm_clock_read(void) | |||
89 | */ | 89 | */ |
90 | static unsigned long kvm_get_tsc_khz(void) | 90 | static unsigned long kvm_get_tsc_khz(void) |
91 | { | 91 | { |
92 | return preset_lpj; | 92 | struct pvclock_vcpu_time_info *src; |
93 | src = &per_cpu(hv_clock, 0); | ||
94 | return pvclock_tsc_khz(src); | ||
93 | } | 95 | } |
94 | 96 | ||
95 | static void kvm_get_preset_lpj(void) | 97 | static void kvm_get_preset_lpj(void) |
96 | { | 98 | { |
97 | struct pvclock_vcpu_time_info *src; | ||
98 | unsigned long khz; | 99 | unsigned long khz; |
99 | u64 lpj; | 100 | u64 lpj; |
100 | 101 | ||
101 | src = &per_cpu(hv_clock, 0); | 102 | khz = kvm_get_tsc_khz(); |
102 | khz = pvclock_tsc_khz(src); | ||
103 | 103 | ||
104 | lpj = ((u64)khz * 1000); | 104 | lpj = ((u64)khz * 1000); |
105 | do_div(lpj, HZ); | 105 | do_div(lpj, HZ); |
@@ -128,7 +128,7 @@ static int kvm_register_clock(char *txt) | |||
128 | } | 128 | } |
129 | 129 | ||
130 | #ifdef CONFIG_X86_LOCAL_APIC | 130 | #ifdef CONFIG_X86_LOCAL_APIC |
131 | static void kvm_setup_secondary_clock(void) | 131 | static void __cpuinit kvm_setup_secondary_clock(void) |
132 | { | 132 | { |
133 | /* | 133 | /* |
134 | * Now that the first cpu already had this clocksource initialized, | 134 | * Now that the first cpu already had this clocksource initialized, |
@@ -194,5 +194,7 @@ void __init kvmclock_init(void) | |||
194 | #endif | 194 | #endif |
195 | kvm_get_preset_lpj(); | 195 | kvm_get_preset_lpj(); |
196 | clocksource_register(&kvm_clock); | 196 | clocksource_register(&kvm_clock); |
197 | pv_info.paravirt_enabled = 1; | ||
198 | pv_info.name = "KVM"; | ||
197 | } | 199 | } |
198 | } | 200 | } |
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index eee32b43fee3..71f1d99a635d 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
@@ -12,8 +12,8 @@ | |||
12 | #include <linux/mm.h> | 12 | #include <linux/mm.h> |
13 | #include <linux/smp.h> | 13 | #include <linux/smp.h> |
14 | #include <linux/vmalloc.h> | 14 | #include <linux/vmalloc.h> |
15 | #include <linux/uaccess.h> | ||
15 | 16 | ||
16 | #include <asm/uaccess.h> | ||
17 | #include <asm/system.h> | 17 | #include <asm/system.h> |
18 | #include <asm/ldt.h> | 18 | #include <asm/ldt.h> |
19 | #include <asm/desc.h> | 19 | #include <asm/desc.h> |
@@ -93,7 +93,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old) | |||
93 | if (err < 0) | 93 | if (err < 0) |
94 | return err; | 94 | return err; |
95 | 95 | ||
96 | for(i = 0; i < old->size; i++) | 96 | for (i = 0; i < old->size; i++) |
97 | write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); | 97 | write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); |
98 | return 0; | 98 | return 0; |
99 | } | 99 | } |
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 3b599518c322..8815f3c7fec7 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c | |||
@@ -252,7 +252,7 @@ EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer); | |||
252 | /* | 252 | /* |
253 | * The MFPGT timers on the CS5536 provide us with suitable timers to use | 253 | * The MFPGT timers on the CS5536 provide us with suitable timers to use |
254 | * as clock event sources - not as good as a HPET or APIC, but certainly | 254 | * as clock event sources - not as good as a HPET or APIC, but certainly |
255 | * better then the PIT. This isn't a general purpose MFGPT driver, but | 255 | * better than the PIT. This isn't a general purpose MFGPT driver, but |
256 | * a simplified one designed specifically to act as a clock event source. | 256 | * a simplified one designed specifically to act as a clock event source. |
257 | * For full details about the MFGPT, please consult the CS5536 data sheet. | 257 | * For full details about the MFGPT, please consult the CS5536 data sheet. |
258 | */ | 258 | */ |
@@ -287,7 +287,7 @@ static struct clock_event_device mfgpt_clockevent = { | |||
287 | .set_mode = mfgpt_set_mode, | 287 | .set_mode = mfgpt_set_mode, |
288 | .set_next_event = mfgpt_next_event, | 288 | .set_next_event = mfgpt_next_event, |
289 | .rating = 250, | 289 | .rating = 250, |
290 | .cpumask = CPU_MASK_ALL, | 290 | .cpumask = cpu_all_mask, |
291 | .shift = 32 | 291 | .shift = 32 |
292 | }; | 292 | }; |
293 | 293 | ||
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 5f8e5d75a254..c25fdb382292 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
@@ -10,7 +10,7 @@ | |||
10 | * This driver allows to upgrade microcode on AMD | 10 | * This driver allows to upgrade microcode on AMD |
11 | * family 0x10 and 0x11 processors. | 11 | * family 0x10 and 0x11 processors. |
12 | * | 12 | * |
13 | * Licensed unter the terms of the GNU General Public | 13 | * Licensed under the terms of the GNU General Public |
14 | * License version 2. See file COPYING for details. | 14 | * License version 2. See file COPYING for details. |
15 | */ | 15 | */ |
16 | 16 | ||
@@ -32,9 +32,9 @@ | |||
32 | #include <linux/platform_device.h> | 32 | #include <linux/platform_device.h> |
33 | #include <linux/pci.h> | 33 | #include <linux/pci.h> |
34 | #include <linux/pci_ids.h> | 34 | #include <linux/pci_ids.h> |
35 | #include <linux/uaccess.h> | ||
35 | 36 | ||
36 | #include <asm/msr.h> | 37 | #include <asm/msr.h> |
37 | #include <asm/uaccess.h> | ||
38 | #include <asm/processor.h> | 38 | #include <asm/processor.h> |
39 | #include <asm/microcode.h> | 39 | #include <asm/microcode.h> |
40 | 40 | ||
@@ -47,43 +47,38 @@ MODULE_LICENSE("GPL v2"); | |||
47 | #define UCODE_UCODE_TYPE 0x00000001 | 47 | #define UCODE_UCODE_TYPE 0x00000001 |
48 | 48 | ||
49 | struct equiv_cpu_entry { | 49 | struct equiv_cpu_entry { |
50 | unsigned int installed_cpu; | 50 | u32 installed_cpu; |
51 | unsigned int fixed_errata_mask; | 51 | u32 fixed_errata_mask; |
52 | unsigned int fixed_errata_compare; | 52 | u32 fixed_errata_compare; |
53 | unsigned int equiv_cpu; | 53 | u16 equiv_cpu; |
54 | }; | 54 | u16 res; |
55 | } __attribute__((packed)); | ||
55 | 56 | ||
56 | struct microcode_header_amd { | 57 | struct microcode_header_amd { |
57 | unsigned int data_code; | 58 | u32 data_code; |
58 | unsigned int patch_id; | 59 | u32 patch_id; |
59 | unsigned char mc_patch_data_id[2]; | 60 | u16 mc_patch_data_id; |
60 | unsigned char mc_patch_data_len; | 61 | u8 mc_patch_data_len; |
61 | unsigned char init_flag; | 62 | u8 init_flag; |
62 | unsigned int mc_patch_data_checksum; | 63 | u32 mc_patch_data_checksum; |
63 | unsigned int nb_dev_id; | 64 | u32 nb_dev_id; |
64 | unsigned int sb_dev_id; | 65 | u32 sb_dev_id; |
65 | unsigned char processor_rev_id[2]; | 66 | u16 processor_rev_id; |
66 | unsigned char nb_rev_id; | 67 | u8 nb_rev_id; |
67 | unsigned char sb_rev_id; | 68 | u8 sb_rev_id; |
68 | unsigned char bios_api_rev; | 69 | u8 bios_api_rev; |
69 | unsigned char reserved1[3]; | 70 | u8 reserved1[3]; |
70 | unsigned int match_reg[8]; | 71 | u32 match_reg[8]; |
71 | }; | 72 | } __attribute__((packed)); |
72 | 73 | ||
73 | struct microcode_amd { | 74 | struct microcode_amd { |
74 | struct microcode_header_amd hdr; | 75 | struct microcode_header_amd hdr; |
75 | unsigned int mpb[0]; | 76 | unsigned int mpb[0]; |
76 | }; | 77 | }; |
77 | 78 | ||
78 | #define UCODE_MAX_SIZE (2048) | 79 | #define UCODE_MAX_SIZE 2048 |
79 | #define DEFAULT_UCODE_DATASIZE (896) | 80 | #define UCODE_CONTAINER_SECTION_HDR 8 |
80 | #define MC_HEADER_SIZE (sizeof(struct microcode_header_amd)) | 81 | #define UCODE_CONTAINER_HEADER_SIZE 12 |
81 | #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) | ||
82 | #define DWSIZE (sizeof(u32)) | ||
83 | /* For now we support a fixed ucode total size only */ | ||
84 | #define get_totalsize(mc) \ | ||
85 | ((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \ | ||
86 | + MC_HEADER_SIZE) | ||
87 | 82 | ||
88 | /* serialize access to the physical write */ | 83 | /* serialize access to the physical write */ |
89 | static DEFINE_SPINLOCK(microcode_update_lock); | 84 | static DEFINE_SPINLOCK(microcode_update_lock); |
@@ -93,31 +88,24 @@ static struct equiv_cpu_entry *equiv_cpu_table; | |||
93 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) | 88 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) |
94 | { | 89 | { |
95 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 90 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
91 | u32 dummy; | ||
96 | 92 | ||
97 | memset(csig, 0, sizeof(*csig)); | 93 | memset(csig, 0, sizeof(*csig)); |
98 | |||
99 | if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { | 94 | if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { |
100 | printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n", | 95 | printk(KERN_WARNING "microcode: CPU%d: AMD CPU family 0x%x not " |
101 | cpu); | 96 | "supported\n", cpu, c->x86); |
102 | return -1; | 97 | return -1; |
103 | } | 98 | } |
104 | 99 | rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); | |
105 | asm volatile("movl %1, %%ecx; rdmsr" | 100 | printk(KERN_INFO "microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev); |
106 | : "=a" (csig->rev) | ||
107 | : "i" (0x0000008B) : "ecx"); | ||
108 | |||
109 | printk(KERN_INFO "microcode: collect_cpu_info_amd : patch_id=0x%x\n", | ||
110 | csig->rev); | ||
111 | |||
112 | return 0; | 101 | return 0; |
113 | } | 102 | } |
114 | 103 | ||
115 | static int get_matching_microcode(int cpu, void *mc, int rev) | 104 | static int get_matching_microcode(int cpu, void *mc, int rev) |
116 | { | 105 | { |
117 | struct microcode_header_amd *mc_header = mc; | 106 | struct microcode_header_amd *mc_header = mc; |
118 | struct pci_dev *nb_pci_dev, *sb_pci_dev; | ||
119 | unsigned int current_cpu_id; | 107 | unsigned int current_cpu_id; |
120 | unsigned int equiv_cpu_id = 0x00; | 108 | u16 equiv_cpu_id = 0; |
121 | unsigned int i = 0; | 109 | unsigned int i = 0; |
122 | 110 | ||
123 | BUG_ON(equiv_cpu_table == NULL); | 111 | BUG_ON(equiv_cpu_table == NULL); |
@@ -132,57 +120,25 @@ static int get_matching_microcode(int cpu, void *mc, int rev) | |||
132 | } | 120 | } |
133 | 121 | ||
134 | if (!equiv_cpu_id) { | 122 | if (!equiv_cpu_id) { |
135 | printk(KERN_ERR "microcode: CPU%d cpu_id " | 123 | printk(KERN_WARNING "microcode: CPU%d: cpu revision " |
136 | "not found in equivalent cpu table \n", cpu); | 124 | "not listed in equivalent cpu table\n", cpu); |
137 | return 0; | 125 | return 0; |
138 | } | 126 | } |
139 | 127 | ||
140 | if ((mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff)) { | 128 | if (mc_header->processor_rev_id != equiv_cpu_id) { |
141 | printk(KERN_ERR | 129 | printk(KERN_ERR "microcode: CPU%d: patch mismatch " |
142 | "microcode: CPU%d patch does not match " | 130 | "(processor_rev_id: %x, equiv_cpu_id: %x)\n", |
143 | "(patch is %x, cpu extended is %x) \n", | 131 | cpu, mc_header->processor_rev_id, equiv_cpu_id); |
144 | cpu, mc_header->processor_rev_id[0], | ||
145 | (equiv_cpu_id & 0xff)); | ||
146 | return 0; | 132 | return 0; |
147 | } | 133 | } |
148 | 134 | ||
149 | if ((mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff)) { | 135 | /* ucode might be chipset specific -- currently we don't support this */ |
150 | printk(KERN_ERR "microcode: CPU%d patch does not match " | 136 | if (mc_header->nb_dev_id || mc_header->sb_dev_id) { |
151 | "(patch is %x, cpu base id is %x) \n", | 137 | printk(KERN_ERR "microcode: CPU%d: loading of chipset " |
152 | cpu, mc_header->processor_rev_id[1], | 138 | "specific code not yet supported\n", cpu); |
153 | ((equiv_cpu_id >> 16) & 0xff)); | ||
154 | |||
155 | return 0; | 139 | return 0; |
156 | } | 140 | } |
157 | 141 | ||
158 | /* ucode may be northbridge specific */ | ||
159 | if (mc_header->nb_dev_id) { | ||
160 | nb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, | ||
161 | (mc_header->nb_dev_id & 0xff), | ||
162 | NULL); | ||
163 | if ((!nb_pci_dev) || | ||
164 | (mc_header->nb_rev_id != nb_pci_dev->revision)) { | ||
165 | printk(KERN_ERR "microcode: CPU%d NB mismatch \n", cpu); | ||
166 | pci_dev_put(nb_pci_dev); | ||
167 | return 0; | ||
168 | } | ||
169 | pci_dev_put(nb_pci_dev); | ||
170 | } | ||
171 | |||
172 | /* ucode may be southbridge specific */ | ||
173 | if (mc_header->sb_dev_id) { | ||
174 | sb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, | ||
175 | (mc_header->sb_dev_id & 0xff), | ||
176 | NULL); | ||
177 | if ((!sb_pci_dev) || | ||
178 | (mc_header->sb_rev_id != sb_pci_dev->revision)) { | ||
179 | printk(KERN_ERR "microcode: CPU%d SB mismatch \n", cpu); | ||
180 | pci_dev_put(sb_pci_dev); | ||
181 | return 0; | ||
182 | } | ||
183 | pci_dev_put(sb_pci_dev); | ||
184 | } | ||
185 | |||
186 | if (mc_header->patch_id <= rev) | 142 | if (mc_header->patch_id <= rev) |
187 | return 0; | 143 | return 0; |
188 | 144 | ||
@@ -192,12 +148,10 @@ static int get_matching_microcode(int cpu, void *mc, int rev) | |||
192 | static void apply_microcode_amd(int cpu) | 148 | static void apply_microcode_amd(int cpu) |
193 | { | 149 | { |
194 | unsigned long flags; | 150 | unsigned long flags; |
195 | unsigned int eax, edx; | 151 | u32 rev, dummy; |
196 | unsigned int rev; | ||
197 | int cpu_num = raw_smp_processor_id(); | 152 | int cpu_num = raw_smp_processor_id(); |
198 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; | 153 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; |
199 | struct microcode_amd *mc_amd = uci->mc; | 154 | struct microcode_amd *mc_amd = uci->mc; |
200 | unsigned long addr; | ||
201 | 155 | ||
202 | /* We should bind the task to the CPU */ | 156 | /* We should bind the task to the CPU */ |
203 | BUG_ON(cpu_num != cpu); | 157 | BUG_ON(cpu_num != cpu); |
@@ -206,42 +160,34 @@ static void apply_microcode_amd(int cpu) | |||
206 | return; | 160 | return; |
207 | 161 | ||
208 | spin_lock_irqsave(µcode_update_lock, flags); | 162 | spin_lock_irqsave(µcode_update_lock, flags); |
209 | 163 | wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); | |
210 | addr = (unsigned long)&mc_amd->hdr.data_code; | ||
211 | edx = (unsigned int)(((unsigned long)upper_32_bits(addr))); | ||
212 | eax = (unsigned int)(((unsigned long)lower_32_bits(addr))); | ||
213 | |||
214 | asm volatile("movl %0, %%ecx; wrmsr" : | ||
215 | : "i" (0xc0010020), "a" (eax), "d" (edx) : "ecx"); | ||
216 | |||
217 | /* get patch id after patching */ | 164 | /* get patch id after patching */ |
218 | asm volatile("movl %1, %%ecx; rdmsr" | 165 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); |
219 | : "=a" (rev) | ||
220 | : "i" (0x0000008B) : "ecx"); | ||
221 | |||
222 | spin_unlock_irqrestore(µcode_update_lock, flags); | 166 | spin_unlock_irqrestore(µcode_update_lock, flags); |
223 | 167 | ||
224 | /* check current patch id and patch's id for match */ | 168 | /* check current patch id and patch's id for match */ |
225 | if (rev != mc_amd->hdr.patch_id) { | 169 | if (rev != mc_amd->hdr.patch_id) { |
226 | printk(KERN_ERR "microcode: CPU%d update from revision " | 170 | printk(KERN_ERR "microcode: CPU%d: update failed " |
227 | "0x%x to 0x%x failed\n", cpu_num, | 171 | "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id); |
228 | mc_amd->hdr.patch_id, rev); | ||
229 | return; | 172 | return; |
230 | } | 173 | } |
231 | 174 | ||
232 | printk(KERN_INFO "microcode: CPU%d updated from revision " | 175 | printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n", |
233 | "0x%x to 0x%x \n", | 176 | cpu, rev); |
234 | cpu_num, uci->cpu_sig.rev, mc_amd->hdr.patch_id); | ||
235 | 177 | ||
236 | uci->cpu_sig.rev = rev; | 178 | uci->cpu_sig.rev = rev; |
237 | } | 179 | } |
238 | 180 | ||
239 | static void * get_next_ucode(u8 *buf, unsigned int size, | 181 | static int get_ucode_data(void *to, const u8 *from, size_t n) |
240 | int (*get_ucode_data)(void *, const void *, size_t), | 182 | { |
241 | unsigned int *mc_size) | 183 | memcpy(to, from, n); |
184 | return 0; | ||
185 | } | ||
186 | |||
187 | static void *get_next_ucode(const u8 *buf, unsigned int size, | ||
188 | unsigned int *mc_size) | ||
242 | { | 189 | { |
243 | unsigned int total_size; | 190 | unsigned int total_size; |
244 | #define UCODE_CONTAINER_SECTION_HDR 8 | ||
245 | u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; | 191 | u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; |
246 | void *mc; | 192 | void *mc; |
247 | 193 | ||
@@ -249,39 +195,37 @@ static void * get_next_ucode(u8 *buf, unsigned int size, | |||
249 | return NULL; | 195 | return NULL; |
250 | 196 | ||
251 | if (section_hdr[0] != UCODE_UCODE_TYPE) { | 197 | if (section_hdr[0] != UCODE_UCODE_TYPE) { |
252 | printk(KERN_ERR "microcode: error! " | 198 | printk(KERN_ERR "microcode: error: invalid type field in " |
253 | "Wrong microcode payload type field\n"); | 199 | "container file section header\n"); |
254 | return NULL; | 200 | return NULL; |
255 | } | 201 | } |
256 | 202 | ||
257 | total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); | 203 | total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); |
258 | 204 | ||
259 | printk(KERN_INFO "microcode: size %u, total_size %u\n", | 205 | printk(KERN_DEBUG "microcode: size %u, total_size %u\n", |
260 | size, total_size); | 206 | size, total_size); |
261 | 207 | ||
262 | if (total_size > size || total_size > UCODE_MAX_SIZE) { | 208 | if (total_size > size || total_size > UCODE_MAX_SIZE) { |
263 | printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); | 209 | printk(KERN_ERR "microcode: error: size mismatch\n"); |
264 | return NULL; | 210 | return NULL; |
265 | } | 211 | } |
266 | 212 | ||
267 | mc = vmalloc(UCODE_MAX_SIZE); | 213 | mc = vmalloc(UCODE_MAX_SIZE); |
268 | if (mc) { | 214 | if (mc) { |
269 | memset(mc, 0, UCODE_MAX_SIZE); | 215 | memset(mc, 0, UCODE_MAX_SIZE); |
270 | if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size)) { | 216 | if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, |
217 | total_size)) { | ||
271 | vfree(mc); | 218 | vfree(mc); |
272 | mc = NULL; | 219 | mc = NULL; |
273 | } else | 220 | } else |
274 | *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; | 221 | *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; |
275 | } | 222 | } |
276 | #undef UCODE_CONTAINER_SECTION_HDR | ||
277 | return mc; | 223 | return mc; |
278 | } | 224 | } |
279 | 225 | ||
280 | 226 | ||
281 | static int install_equiv_cpu_table(u8 *buf, | 227 | static int install_equiv_cpu_table(const u8 *buf) |
282 | int (*get_ucode_data)(void *, const void *, size_t)) | ||
283 | { | 228 | { |
284 | #define UCODE_CONTAINER_HEADER_SIZE 12 | ||
285 | u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; | 229 | u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; |
286 | unsigned int *buf_pos = (unsigned int *)container_hdr; | 230 | unsigned int *buf_pos = (unsigned int *)container_hdr; |
287 | unsigned long size; | 231 | unsigned long size; |
@@ -292,14 +236,15 @@ static int install_equiv_cpu_table(u8 *buf, | |||
292 | size = buf_pos[2]; | 236 | size = buf_pos[2]; |
293 | 237 | ||
294 | if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { | 238 | if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { |
295 | printk(KERN_ERR "microcode: error! " | 239 | printk(KERN_ERR "microcode: error: invalid type field in " |
296 | "Wrong microcode equivalnet cpu table\n"); | 240 | "container file section header\n"); |
297 | return 0; | 241 | return 0; |
298 | } | 242 | } |
299 | 243 | ||
300 | equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); | 244 | equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); |
301 | if (!equiv_cpu_table) { | 245 | if (!equiv_cpu_table) { |
302 | printk(KERN_ERR "microcode: error, can't allocate memory for equiv CPU table\n"); | 246 | printk(KERN_ERR "microcode: failed to allocate " |
247 | "equivalent CPU table\n"); | ||
303 | return 0; | 248 | return 0; |
304 | } | 249 | } |
305 | 250 | ||
@@ -310,7 +255,6 @@ static int install_equiv_cpu_table(u8 *buf, | |||
310 | } | 255 | } |
311 | 256 | ||
312 | return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ | 257 | return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ |
313 | #undef UCODE_CONTAINER_HEADER_SIZE | ||
314 | } | 258 | } |
315 | 259 | ||
316 | static void free_equiv_cpu_table(void) | 260 | static void free_equiv_cpu_table(void) |
@@ -321,18 +265,20 @@ static void free_equiv_cpu_table(void) | |||
321 | } | 265 | } |
322 | } | 266 | } |
323 | 267 | ||
324 | static int generic_load_microcode(int cpu, void *data, size_t size, | 268 | static int generic_load_microcode(int cpu, const u8 *data, size_t size) |
325 | int (*get_ucode_data)(void *, const void *, size_t)) | ||
326 | { | 269 | { |
327 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 270 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
328 | u8 *ucode_ptr = data, *new_mc = NULL, *mc; | 271 | const u8 *ucode_ptr = data; |
272 | void *new_mc = NULL; | ||
273 | void *mc; | ||
329 | int new_rev = uci->cpu_sig.rev; | 274 | int new_rev = uci->cpu_sig.rev; |
330 | unsigned int leftover; | 275 | unsigned int leftover; |
331 | unsigned long offset; | 276 | unsigned long offset; |
332 | 277 | ||
333 | offset = install_equiv_cpu_table(ucode_ptr, get_ucode_data); | 278 | offset = install_equiv_cpu_table(ucode_ptr); |
334 | if (!offset) { | 279 | if (!offset) { |
335 | printk(KERN_ERR "microcode: installing equivalent cpu table failed\n"); | 280 | printk(KERN_ERR "microcode: failed to create " |
281 | "equivalent cpu table\n"); | ||
336 | return -EINVAL; | 282 | return -EINVAL; |
337 | } | 283 | } |
338 | 284 | ||
@@ -343,7 +289,7 @@ static int generic_load_microcode(int cpu, void *data, size_t size, | |||
343 | unsigned int uninitialized_var(mc_size); | 289 | unsigned int uninitialized_var(mc_size); |
344 | struct microcode_header_amd *mc_header; | 290 | struct microcode_header_amd *mc_header; |
345 | 291 | ||
346 | mc = get_next_ucode(ucode_ptr, leftover, get_ucode_data, &mc_size); | 292 | mc = get_next_ucode(ucode_ptr, leftover, &mc_size); |
347 | if (!mc) | 293 | if (!mc) |
348 | break; | 294 | break; |
349 | 295 | ||
@@ -353,7 +299,7 @@ static int generic_load_microcode(int cpu, void *data, size_t size, | |||
353 | vfree(new_mc); | 299 | vfree(new_mc); |
354 | new_rev = mc_header->patch_id; | 300 | new_rev = mc_header->patch_id; |
355 | new_mc = mc; | 301 | new_mc = mc; |
356 | } else | 302 | } else |
357 | vfree(mc); | 303 | vfree(mc); |
358 | 304 | ||
359 | ucode_ptr += mc_size; | 305 | ucode_ptr += mc_size; |
@@ -365,9 +311,9 @@ static int generic_load_microcode(int cpu, void *data, size_t size, | |||
365 | if (uci->mc) | 311 | if (uci->mc) |
366 | vfree(uci->mc); | 312 | vfree(uci->mc); |
367 | uci->mc = new_mc; | 313 | uci->mc = new_mc; |
368 | pr_debug("microcode: CPU%d found a matching microcode update with" | 314 | pr_debug("microcode: CPU%d found a matching microcode " |
369 | " version 0x%x (current=0x%x)\n", | 315 | "update with version 0x%x (current=0x%x)\n", |
370 | cpu, new_rev, uci->cpu_sig.rev); | 316 | cpu, new_rev, uci->cpu_sig.rev); |
371 | } else | 317 | } else |
372 | vfree(new_mc); | 318 | vfree(new_mc); |
373 | } | 319 | } |
@@ -377,12 +323,6 @@ static int generic_load_microcode(int cpu, void *data, size_t size, | |||
377 | return (int)leftover; | 323 | return (int)leftover; |
378 | } | 324 | } |
379 | 325 | ||
380 | static int get_ucode_fw(void *to, const void *from, size_t n) | ||
381 | { | ||
382 | memcpy(to, from, n); | ||
383 | return 0; | ||
384 | } | ||
385 | |||
386 | static int request_microcode_fw(int cpu, struct device *device) | 326 | static int request_microcode_fw(int cpu, struct device *device) |
387 | { | 327 | { |
388 | const char *fw_name = "amd-ucode/microcode_amd.bin"; | 328 | const char *fw_name = "amd-ucode/microcode_amd.bin"; |
@@ -394,12 +334,11 @@ static int request_microcode_fw(int cpu, struct device *device) | |||
394 | 334 | ||
395 | ret = request_firmware(&firmware, fw_name, device); | 335 | ret = request_firmware(&firmware, fw_name, device); |
396 | if (ret) { | 336 | if (ret) { |
397 | printk(KERN_ERR "microcode: ucode data file %s load failed\n", fw_name); | 337 | printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); |
398 | return ret; | 338 | return ret; |
399 | } | 339 | } |
400 | 340 | ||
401 | ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, | 341 | ret = generic_load_microcode(cpu, firmware->data, firmware->size); |
402 | &get_ucode_fw); | ||
403 | 342 | ||
404 | release_firmware(firmware); | 343 | release_firmware(firmware); |
405 | 344 | ||
@@ -408,8 +347,8 @@ static int request_microcode_fw(int cpu, struct device *device) | |||
408 | 347 | ||
409 | static int request_microcode_user(int cpu, const void __user *buf, size_t size) | 348 | static int request_microcode_user(int cpu, const void __user *buf, size_t size) |
410 | { | 349 | { |
411 | printk(KERN_WARNING "microcode: AMD microcode update via /dev/cpu/microcode" | 350 | printk(KERN_INFO "microcode: AMD microcode update via " |
412 | "is not supported\n"); | 351 | "/dev/cpu/microcode not supported\n"); |
413 | return -1; | 352 | return -1; |
414 | } | 353 | } |
415 | 354 | ||
@@ -433,3 +372,4 @@ struct microcode_ops * __init init_amd_microcode(void) | |||
433 | { | 372 | { |
434 | return µcode_amd_ops; | 373 | return µcode_amd_ops; |
435 | } | 374 | } |
375 | |||
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 82fb2809ce32..c9b721ba968c 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -99,7 +99,7 @@ MODULE_LICENSE("GPL"); | |||
99 | 99 | ||
100 | #define MICROCODE_VERSION "2.00" | 100 | #define MICROCODE_VERSION "2.00" |
101 | 101 | ||
102 | struct microcode_ops *microcode_ops; | 102 | static struct microcode_ops *microcode_ops; |
103 | 103 | ||
104 | /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ | 104 | /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ |
105 | static DEFINE_MUTEX(microcode_mutex); | 105 | static DEFINE_MUTEX(microcode_mutex); |
@@ -203,7 +203,7 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); | |||
203 | #endif | 203 | #endif |
204 | 204 | ||
205 | /* fake device for request_firmware */ | 205 | /* fake device for request_firmware */ |
206 | struct platform_device *microcode_pdev; | 206 | static struct platform_device *microcode_pdev; |
207 | 207 | ||
208 | static ssize_t reload_store(struct sys_device *dev, | 208 | static ssize_t reload_store(struct sys_device *dev, |
209 | struct sysdev_attribute *attr, | 209 | struct sysdev_attribute *attr, |
@@ -272,13 +272,18 @@ static struct attribute_group mc_attr_group = { | |||
272 | .name = "microcode", | 272 | .name = "microcode", |
273 | }; | 273 | }; |
274 | 274 | ||
275 | static void microcode_fini_cpu(int cpu) | 275 | static void __microcode_fini_cpu(int cpu) |
276 | { | 276 | { |
277 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 277 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
278 | 278 | ||
279 | mutex_lock(µcode_mutex); | ||
280 | microcode_ops->microcode_fini_cpu(cpu); | 279 | microcode_ops->microcode_fini_cpu(cpu); |
281 | uci->valid = 0; | 280 | uci->valid = 0; |
281 | } | ||
282 | |||
283 | static void microcode_fini_cpu(int cpu) | ||
284 | { | ||
285 | mutex_lock(µcode_mutex); | ||
286 | __microcode_fini_cpu(cpu); | ||
282 | mutex_unlock(µcode_mutex); | 287 | mutex_unlock(µcode_mutex); |
283 | } | 288 | } |
284 | 289 | ||
@@ -306,12 +311,16 @@ static int microcode_resume_cpu(int cpu) | |||
306 | * to this cpu (a bit of paranoia): | 311 | * to this cpu (a bit of paranoia): |
307 | */ | 312 | */ |
308 | if (microcode_ops->collect_cpu_info(cpu, &nsig)) { | 313 | if (microcode_ops->collect_cpu_info(cpu, &nsig)) { |
309 | microcode_fini_cpu(cpu); | 314 | __microcode_fini_cpu(cpu); |
315 | printk(KERN_ERR "failed to collect_cpu_info for resuming cpu #%d\n", | ||
316 | cpu); | ||
310 | return -1; | 317 | return -1; |
311 | } | 318 | } |
312 | 319 | ||
313 | if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) { | 320 | if ((nsig.sig != uci->cpu_sig.sig) || (nsig.pf != uci->cpu_sig.pf)) { |
314 | microcode_fini_cpu(cpu); | 321 | __microcode_fini_cpu(cpu); |
322 | printk(KERN_ERR "cached ucode doesn't match the resuming cpu #%d\n", | ||
323 | cpu); | ||
315 | /* Should we look for a new ucode here? */ | 324 | /* Should we look for a new ucode here? */ |
316 | return 1; | 325 | return 1; |
317 | } | 326 | } |
@@ -319,7 +328,7 @@ static int microcode_resume_cpu(int cpu) | |||
319 | return 0; | 328 | return 0; |
320 | } | 329 | } |
321 | 330 | ||
322 | void microcode_update_cpu(int cpu) | 331 | static void microcode_update_cpu(int cpu) |
323 | { | 332 | { |
324 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 333 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
325 | int err = 0; | 334 | int err = 0; |
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 622dc4a21784..5e9f4fc51385 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c | |||
@@ -87,9 +87,9 @@ | |||
87 | #include <linux/cpu.h> | 87 | #include <linux/cpu.h> |
88 | #include <linux/firmware.h> | 88 | #include <linux/firmware.h> |
89 | #include <linux/platform_device.h> | 89 | #include <linux/platform_device.h> |
90 | #include <linux/uaccess.h> | ||
90 | 91 | ||
91 | #include <asm/msr.h> | 92 | #include <asm/msr.h> |
92 | #include <asm/uaccess.h> | ||
93 | #include <asm/processor.h> | 93 | #include <asm/processor.h> |
94 | #include <asm/microcode.h> | 94 | #include <asm/microcode.h> |
95 | 95 | ||
@@ -155,6 +155,7 @@ static DEFINE_SPINLOCK(microcode_update_lock); | |||
155 | static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) | 155 | static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) |
156 | { | 156 | { |
157 | struct cpuinfo_x86 *c = &cpu_data(cpu_num); | 157 | struct cpuinfo_x86 *c = &cpu_data(cpu_num); |
158 | unsigned long flags; | ||
158 | unsigned int val[2]; | 159 | unsigned int val[2]; |
159 | 160 | ||
160 | memset(csig, 0, sizeof(*csig)); | 161 | memset(csig, 0, sizeof(*csig)); |
@@ -174,11 +175,16 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) | |||
174 | csig->pf = 1 << ((val[1] >> 18) & 7); | 175 | csig->pf = 1 << ((val[1] >> 18) & 7); |
175 | } | 176 | } |
176 | 177 | ||
178 | /* serialize access to the physical write to MSR 0x79 */ | ||
179 | spin_lock_irqsave(µcode_update_lock, flags); | ||
180 | |||
177 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); | 181 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); |
178 | /* see notes above for revision 1.07. Apparent chip bug */ | 182 | /* see notes above for revision 1.07. Apparent chip bug */ |
179 | sync_core(); | 183 | sync_core(); |
180 | /* get the current revision from MSR 0x8B */ | 184 | /* get the current revision from MSR 0x8B */ |
181 | rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); | 185 | rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); |
186 | spin_unlock_irqrestore(µcode_update_lock, flags); | ||
187 | |||
182 | pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", | 188 | pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", |
183 | csig->sig, csig->pf, csig->rev); | 189 | csig->sig, csig->pf, csig->rev); |
184 | 190 | ||
@@ -190,7 +196,7 @@ static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf) | |||
190 | return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; | 196 | return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; |
191 | } | 197 | } |
192 | 198 | ||
193 | static inline int | 199 | static inline int |
194 | update_match_revision(struct microcode_header_intel *mc_header, int rev) | 200 | update_match_revision(struct microcode_header_intel *mc_header, int rev) |
195 | { | 201 | { |
196 | return (mc_header->rev <= rev) ? 0 : 1; | 202 | return (mc_header->rev <= rev) ? 0 : 1; |
@@ -436,8 +442,8 @@ static int request_microcode_fw(int cpu, struct device *device) | |||
436 | return ret; | 442 | return ret; |
437 | } | 443 | } |
438 | 444 | ||
439 | ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, | 445 | ret = generic_load_microcode(cpu, (void *)firmware->data, |
440 | &get_ucode_fw); | 446 | firmware->size, &get_ucode_fw); |
441 | 447 | ||
442 | release_firmware(firmware); | 448 | release_firmware(firmware); |
443 | 449 | ||
@@ -454,7 +460,7 @@ static int request_microcode_user(int cpu, const void __user *buf, size_t size) | |||
454 | /* We should bind the task to the CPU */ | 460 | /* We should bind the task to the CPU */ |
455 | BUG_ON(cpu != raw_smp_processor_id()); | 461 | BUG_ON(cpu != raw_smp_processor_id()); |
456 | 462 | ||
457 | return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user); | 463 | return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user); |
458 | } | 464 | } |
459 | 465 | ||
460 | static void microcode_fini_cpu(int cpu) | 466 | static void microcode_fini_cpu(int cpu) |
@@ -465,7 +471,7 @@ static void microcode_fini_cpu(int cpu) | |||
465 | uci->mc = NULL; | 471 | uci->mc = NULL; |
466 | } | 472 | } |
467 | 473 | ||
468 | struct microcode_ops microcode_intel_ops = { | 474 | static struct microcode_ops microcode_intel_ops = { |
469 | .request_microcode_user = request_microcode_user, | 475 | .request_microcode_user = request_microcode_user, |
470 | .request_microcode_fw = request_microcode_fw, | 476 | .request_microcode_fw = request_microcode_fw, |
471 | .collect_cpu_info = collect_cpu_info, | 477 | .collect_cpu_info = collect_cpu_info, |
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index efc2f361fe85..666e43df51f9 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c | |||
@@ -13,8 +13,7 @@ | |||
13 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
14 | #include <asm/acpi.h> | 14 | #include <asm/acpi.h> |
15 | #include <asm/mmconfig.h> | 15 | #include <asm/mmconfig.h> |
16 | 16 | #include <asm/pci_x86.h> | |
17 | #include "../pci/pci.h" | ||
18 | 17 | ||
19 | struct pci_hostbridge_probe { | 18 | struct pci_hostbridge_probe { |
20 | u32 bus; | 19 | u32 bus; |
diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c index 3db0a5442eb1..0edd819050e7 100644 --- a/arch/x86/kernel/module_32.c +++ b/arch/x86/kernel/module_32.c | |||
@@ -42,7 +42,7 @@ void module_free(struct module *mod, void *module_region) | |||
42 | { | 42 | { |
43 | vfree(module_region); | 43 | vfree(module_region); |
44 | /* FIXME: If module_region == mod->init_region, trim exception | 44 | /* FIXME: If module_region == mod->init_region, trim exception |
45 | table entries. */ | 45 | table entries. */ |
46 | } | 46 | } |
47 | 47 | ||
48 | /* We don't need anything special. */ | 48 | /* We don't need anything special. */ |
@@ -113,13 +113,13 @@ int module_finalize(const Elf_Ehdr *hdr, | |||
113 | *para = NULL; | 113 | *para = NULL; |
114 | char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; | 114 | char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; |
115 | 115 | ||
116 | for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { | 116 | for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { |
117 | if (!strcmp(".text", secstrings + s->sh_name)) | 117 | if (!strcmp(".text", secstrings + s->sh_name)) |
118 | text = s; | 118 | text = s; |
119 | if (!strcmp(".altinstructions", secstrings + s->sh_name)) | 119 | if (!strcmp(".altinstructions", secstrings + s->sh_name)) |
120 | alt = s; | 120 | alt = s; |
121 | if (!strcmp(".smp_locks", secstrings + s->sh_name)) | 121 | if (!strcmp(".smp_locks", secstrings + s->sh_name)) |
122 | locks= s; | 122 | locks = s; |
123 | if (!strcmp(".parainstructions", secstrings + s->sh_name)) | 123 | if (!strcmp(".parainstructions", secstrings + s->sh_name)) |
124 | para = s; | 124 | para = s; |
125 | } | 125 | } |
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c index 6ba87830d4b1..c23880b90b5c 100644 --- a/arch/x86/kernel/module_64.c +++ b/arch/x86/kernel/module_64.c | |||
@@ -30,14 +30,14 @@ | |||
30 | #include <asm/page.h> | 30 | #include <asm/page.h> |
31 | #include <asm/pgtable.h> | 31 | #include <asm/pgtable.h> |
32 | 32 | ||
33 | #define DEBUGP(fmt...) | 33 | #define DEBUGP(fmt...) |
34 | 34 | ||
35 | #ifndef CONFIG_UML | 35 | #ifndef CONFIG_UML |
36 | void module_free(struct module *mod, void *module_region) | 36 | void module_free(struct module *mod, void *module_region) |
37 | { | 37 | { |
38 | vfree(module_region); | 38 | vfree(module_region); |
39 | /* FIXME: If module_region == mod->init_region, trim exception | 39 | /* FIXME: If module_region == mod->init_region, trim exception |
40 | table entries. */ | 40 | table entries. */ |
41 | } | 41 | } |
42 | 42 | ||
43 | void *module_alloc(unsigned long size) | 43 | void *module_alloc(unsigned long size) |
@@ -77,7 +77,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | |||
77 | Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; | 77 | Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; |
78 | Elf64_Sym *sym; | 78 | Elf64_Sym *sym; |
79 | void *loc; | 79 | void *loc; |
80 | u64 val; | 80 | u64 val; |
81 | 81 | ||
82 | DEBUGP("Applying relocate section %u to %u\n", relsec, | 82 | DEBUGP("Applying relocate section %u to %u\n", relsec, |
83 | sechdrs[relsec].sh_info); | 83 | sechdrs[relsec].sh_info); |
@@ -91,11 +91,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | |||
91 | sym = (Elf64_Sym *)sechdrs[symindex].sh_addr | 91 | sym = (Elf64_Sym *)sechdrs[symindex].sh_addr |
92 | + ELF64_R_SYM(rel[i].r_info); | 92 | + ELF64_R_SYM(rel[i].r_info); |
93 | 93 | ||
94 | DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", | 94 | DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", |
95 | (int)ELF64_R_TYPE(rel[i].r_info), | 95 | (int)ELF64_R_TYPE(rel[i].r_info), |
96 | sym->st_value, rel[i].r_addend, (u64)loc); | 96 | sym->st_value, rel[i].r_addend, (u64)loc); |
97 | 97 | ||
98 | val = sym->st_value + rel[i].r_addend; | 98 | val = sym->st_value + rel[i].r_addend; |
99 | 99 | ||
100 | switch (ELF64_R_TYPE(rel[i].r_info)) { | 100 | switch (ELF64_R_TYPE(rel[i].r_info)) { |
101 | case R_X86_64_NONE: | 101 | case R_X86_64_NONE: |
@@ -113,16 +113,16 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | |||
113 | if ((s64)val != *(s32 *)loc) | 113 | if ((s64)val != *(s32 *)loc) |
114 | goto overflow; | 114 | goto overflow; |
115 | break; | 115 | break; |
116 | case R_X86_64_PC32: | 116 | case R_X86_64_PC32: |
117 | val -= (u64)loc; | 117 | val -= (u64)loc; |
118 | *(u32 *)loc = val; | 118 | *(u32 *)loc = val; |
119 | #if 0 | 119 | #if 0 |
120 | if ((s64)val != *(s32 *)loc) | 120 | if ((s64)val != *(s32 *)loc) |
121 | goto overflow; | 121 | goto overflow; |
122 | #endif | 122 | #endif |
123 | break; | 123 | break; |
124 | default: | 124 | default: |
125 | printk(KERN_ERR "module %s: Unknown rela relocation: %Lu\n", | 125 | printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n", |
126 | me->name, ELF64_R_TYPE(rel[i].r_info)); | 126 | me->name, ELF64_R_TYPE(rel[i].r_info)); |
127 | return -ENOEXEC; | 127 | return -ENOEXEC; |
128 | } | 128 | } |
@@ -130,7 +130,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | |||
130 | return 0; | 130 | return 0; |
131 | 131 | ||
132 | overflow: | 132 | overflow: |
133 | printk(KERN_ERR "overflow in relocation type %d val %Lx\n", | 133 | printk(KERN_ERR "overflow in relocation type %d val %Lx\n", |
134 | (int)ELF64_R_TYPE(rel[i].r_info), val); | 134 | (int)ELF64_R_TYPE(rel[i].r_info), val); |
135 | printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", | 135 | printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", |
136 | me->name); | 136 | me->name); |
@@ -143,13 +143,13 @@ int apply_relocate(Elf_Shdr *sechdrs, | |||
143 | unsigned int relsec, | 143 | unsigned int relsec, |
144 | struct module *me) | 144 | struct module *me) |
145 | { | 145 | { |
146 | printk("non add relocation not supported\n"); | 146 | printk(KERN_ERR "non add relocation not supported\n"); |
147 | return -ENOSYS; | 147 | return -ENOSYS; |
148 | } | 148 | } |
149 | 149 | ||
150 | int module_finalize(const Elf_Ehdr *hdr, | 150 | int module_finalize(const Elf_Ehdr *hdr, |
151 | const Elf_Shdr *sechdrs, | 151 | const Elf_Shdr *sechdrs, |
152 | struct module *me) | 152 | struct module *me) |
153 | { | 153 | { |
154 | const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, | 154 | const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, |
155 | *para = NULL; | 155 | *para = NULL; |
@@ -161,7 +161,7 @@ int module_finalize(const Elf_Ehdr *hdr, | |||
161 | if (!strcmp(".altinstructions", secstrings + s->sh_name)) | 161 | if (!strcmp(".altinstructions", secstrings + s->sh_name)) |
162 | alt = s; | 162 | alt = s; |
163 | if (!strcmp(".smp_locks", secstrings + s->sh_name)) | 163 | if (!strcmp(".smp_locks", secstrings + s->sh_name)) |
164 | locks= s; | 164 | locks = s; |
165 | if (!strcmp(".parainstructions", secstrings + s->sh_name)) | 165 | if (!strcmp(".parainstructions", secstrings + s->sh_name)) |
166 | para = s; | 166 | para = s; |
167 | } | 167 | } |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index f98f4e1dba09..200764453195 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -2,8 +2,8 @@ | |||
2 | * Intel Multiprocessor Specification 1.1 and 1.4 | 2 | * Intel Multiprocessor Specification 1.1 and 1.4 |
3 | * compliant MP-table parsing routines. | 3 | * compliant MP-table parsing routines. |
4 | * | 4 | * |
5 | * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> | 5 | * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> |
6 | * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> | 6 | * (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com> |
7 | * (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de> | 7 | * (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de> |
8 | */ | 8 | */ |
9 | 9 | ||
@@ -16,25 +16,20 @@ | |||
16 | #include <linux/bitops.h> | 16 | #include <linux/bitops.h> |
17 | #include <linux/acpi.h> | 17 | #include <linux/acpi.h> |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/smp.h> | ||
19 | 20 | ||
20 | #include <asm/smp.h> | ||
21 | #include <asm/mtrr.h> | 21 | #include <asm/mtrr.h> |
22 | #include <asm/mpspec.h> | 22 | #include <asm/mpspec.h> |
23 | #include <asm/pgalloc.h> | 23 | #include <asm/pgalloc.h> |
24 | #include <asm/io_apic.h> | 24 | #include <asm/io_apic.h> |
25 | #include <asm/proto.h> | 25 | #include <asm/proto.h> |
26 | #include <asm/acpi.h> | ||
27 | #include <asm/bios_ebda.h> | 26 | #include <asm/bios_ebda.h> |
28 | #include <asm/e820.h> | 27 | #include <asm/e820.h> |
29 | #include <asm/trampoline.h> | 28 | #include <asm/trampoline.h> |
30 | #include <asm/setup.h> | 29 | #include <asm/setup.h> |
30 | #include <asm/smp.h> | ||
31 | 31 | ||
32 | #include <mach_apic.h> | 32 | #include <asm/genapic.h> |
33 | #ifdef CONFIG_X86_32 | ||
34 | #include <mach_apicdef.h> | ||
35 | #include <mach_mpparse.h> | ||
36 | #endif | ||
37 | |||
38 | /* | 33 | /* |
39 | * Checksum an MP configuration block. | 34 | * Checksum an MP configuration block. |
40 | */ | 35 | */ |
@@ -49,12 +44,12 @@ static int __init mpf_checksum(unsigned char *mp, int len) | |||
49 | return sum & 0xFF; | 44 | return sum & 0xFF; |
50 | } | 45 | } |
51 | 46 | ||
52 | static void __init MP_processor_info(struct mpc_config_processor *m) | 47 | static void __init MP_processor_info(struct mpc_cpu *m) |
53 | { | 48 | { |
54 | int apicid; | 49 | int apicid; |
55 | char *bootup_cpu = ""; | 50 | char *bootup_cpu = ""; |
56 | 51 | ||
57 | if (!(m->mpc_cpuflag & CPU_ENABLED)) { | 52 | if (!(m->cpuflag & CPU_ENABLED)) { |
58 | disabled_cpus++; | 53 | disabled_cpus++; |
59 | return; | 54 | return; |
60 | } | 55 | } |
@@ -62,54 +57,54 @@ static void __init MP_processor_info(struct mpc_config_processor *m) | |||
62 | if (x86_quirks->mpc_apic_id) | 57 | if (x86_quirks->mpc_apic_id) |
63 | apicid = x86_quirks->mpc_apic_id(m); | 58 | apicid = x86_quirks->mpc_apic_id(m); |
64 | else | 59 | else |
65 | apicid = m->mpc_apicid; | 60 | apicid = m->apicid; |
66 | 61 | ||
67 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { | 62 | if (m->cpuflag & CPU_BOOTPROCESSOR) { |
68 | bootup_cpu = " (Bootup-CPU)"; | 63 | bootup_cpu = " (Bootup-CPU)"; |
69 | boot_cpu_physical_apicid = m->mpc_apicid; | 64 | boot_cpu_physical_apicid = m->apicid; |
70 | } | 65 | } |
71 | 66 | ||
72 | printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu); | 67 | printk(KERN_INFO "Processor #%d%s\n", m->apicid, bootup_cpu); |
73 | generic_processor_info(apicid, m->mpc_apicver); | 68 | generic_processor_info(apicid, m->apicver); |
74 | } | 69 | } |
75 | 70 | ||
76 | #ifdef CONFIG_X86_IO_APIC | 71 | #ifdef CONFIG_X86_IO_APIC |
77 | static void __init MP_bus_info(struct mpc_config_bus *m) | 72 | static void __init MP_bus_info(struct mpc_bus *m) |
78 | { | 73 | { |
79 | char str[7]; | 74 | char str[7]; |
80 | memcpy(str, m->mpc_bustype, 6); | 75 | memcpy(str, m->bustype, 6); |
81 | str[6] = 0; | 76 | str[6] = 0; |
82 | 77 | ||
83 | if (x86_quirks->mpc_oem_bus_info) | 78 | if (x86_quirks->mpc_oem_bus_info) |
84 | x86_quirks->mpc_oem_bus_info(m, str); | 79 | x86_quirks->mpc_oem_bus_info(m, str); |
85 | else | 80 | else |
86 | apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str); | 81 | apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str); |
87 | 82 | ||
88 | #if MAX_MP_BUSSES < 256 | 83 | #if MAX_MP_BUSSES < 256 |
89 | if (m->mpc_busid >= MAX_MP_BUSSES) { | 84 | if (m->busid >= MAX_MP_BUSSES) { |
90 | printk(KERN_WARNING "MP table busid value (%d) for bustype %s " | 85 | printk(KERN_WARNING "MP table busid value (%d) for bustype %s " |
91 | " is too large, max. supported is %d\n", | 86 | " is too large, max. supported is %d\n", |
92 | m->mpc_busid, str, MAX_MP_BUSSES - 1); | 87 | m->busid, str, MAX_MP_BUSSES - 1); |
93 | return; | 88 | return; |
94 | } | 89 | } |
95 | #endif | 90 | #endif |
96 | 91 | ||
97 | if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { | 92 | if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { |
98 | set_bit(m->mpc_busid, mp_bus_not_pci); | 93 | set_bit(m->busid, mp_bus_not_pci); |
99 | #if defined(CONFIG_EISA) || defined (CONFIG_MCA) | 94 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) |
100 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; | 95 | mp_bus_id_to_type[m->busid] = MP_BUS_ISA; |
101 | #endif | 96 | #endif |
102 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { | 97 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { |
103 | if (x86_quirks->mpc_oem_pci_bus) | 98 | if (x86_quirks->mpc_oem_pci_bus) |
104 | x86_quirks->mpc_oem_pci_bus(m); | 99 | x86_quirks->mpc_oem_pci_bus(m); |
105 | 100 | ||
106 | clear_bit(m->mpc_busid, mp_bus_not_pci); | 101 | clear_bit(m->busid, mp_bus_not_pci); |
107 | #if defined(CONFIG_EISA) || defined (CONFIG_MCA) | 102 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) |
108 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; | 103 | mp_bus_id_to_type[m->busid] = MP_BUS_PCI; |
109 | } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { | 104 | } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { |
110 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; | 105 | mp_bus_id_to_type[m->busid] = MP_BUS_EISA; |
111 | } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) { | 106 | } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) { |
112 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; | 107 | mp_bus_id_to_type[m->busid] = MP_BUS_MCA; |
113 | #endif | 108 | #endif |
114 | } else | 109 | } else |
115 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); | 110 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); |
@@ -133,89 +128,88 @@ static int bad_ioapic(unsigned long address) | |||
133 | return 0; | 128 | return 0; |
134 | } | 129 | } |
135 | 130 | ||
136 | static void __init MP_ioapic_info(struct mpc_config_ioapic *m) | 131 | static void __init MP_ioapic_info(struct mpc_ioapic *m) |
137 | { | 132 | { |
138 | if (!(m->mpc_flags & MPC_APIC_USABLE)) | 133 | if (!(m->flags & MPC_APIC_USABLE)) |
139 | return; | 134 | return; |
140 | 135 | ||
141 | printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", | 136 | printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", |
142 | m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); | 137 | m->apicid, m->apicver, m->apicaddr); |
143 | 138 | ||
144 | if (bad_ioapic(m->mpc_apicaddr)) | 139 | if (bad_ioapic(m->apicaddr)) |
145 | return; | 140 | return; |
146 | 141 | ||
147 | mp_ioapics[nr_ioapics].mp_apicaddr = m->mpc_apicaddr; | 142 | mp_ioapics[nr_ioapics].apicaddr = m->apicaddr; |
148 | mp_ioapics[nr_ioapics].mp_apicid = m->mpc_apicid; | 143 | mp_ioapics[nr_ioapics].apicid = m->apicid; |
149 | mp_ioapics[nr_ioapics].mp_type = m->mpc_type; | 144 | mp_ioapics[nr_ioapics].type = m->type; |
150 | mp_ioapics[nr_ioapics].mp_apicver = m->mpc_apicver; | 145 | mp_ioapics[nr_ioapics].apicver = m->apicver; |
151 | mp_ioapics[nr_ioapics].mp_flags = m->mpc_flags; | 146 | mp_ioapics[nr_ioapics].flags = m->flags; |
152 | nr_ioapics++; | 147 | nr_ioapics++; |
153 | } | 148 | } |
154 | 149 | ||
155 | static void print_MP_intsrc_info(struct mpc_config_intsrc *m) | 150 | static void print_MP_intsrc_info(struct mpc_intsrc *m) |
156 | { | 151 | { |
157 | apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," | 152 | apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," |
158 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", | 153 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", |
159 | m->mpc_irqtype, m->mpc_irqflag & 3, | 154 | m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus, |
160 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, | 155 | m->srcbusirq, m->dstapic, m->dstirq); |
161 | m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); | ||
162 | } | 156 | } |
163 | 157 | ||
164 | static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) | 158 | static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq) |
165 | { | 159 | { |
166 | apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," | 160 | apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," |
167 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", | 161 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", |
168 | mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, | 162 | mp_irq->irqtype, mp_irq->irqflag & 3, |
169 | (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, | 163 | (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus, |
170 | mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); | 164 | mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq); |
171 | } | 165 | } |
172 | 166 | ||
173 | static void __init assign_to_mp_irq(struct mpc_config_intsrc *m, | 167 | static void __init assign_to_mp_irq(struct mpc_intsrc *m, |
174 | struct mp_config_intsrc *mp_irq) | 168 | struct mpc_intsrc *mp_irq) |
175 | { | 169 | { |
176 | mp_irq->mp_dstapic = m->mpc_dstapic; | 170 | mp_irq->dstapic = m->dstapic; |
177 | mp_irq->mp_type = m->mpc_type; | 171 | mp_irq->type = m->type; |
178 | mp_irq->mp_irqtype = m->mpc_irqtype; | 172 | mp_irq->irqtype = m->irqtype; |
179 | mp_irq->mp_irqflag = m->mpc_irqflag; | 173 | mp_irq->irqflag = m->irqflag; |
180 | mp_irq->mp_srcbus = m->mpc_srcbus; | 174 | mp_irq->srcbus = m->srcbus; |
181 | mp_irq->mp_srcbusirq = m->mpc_srcbusirq; | 175 | mp_irq->srcbusirq = m->srcbusirq; |
182 | mp_irq->mp_dstirq = m->mpc_dstirq; | 176 | mp_irq->dstirq = m->dstirq; |
183 | } | 177 | } |
184 | 178 | ||
185 | static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, | 179 | static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq, |
186 | struct mpc_config_intsrc *m) | 180 | struct mpc_intsrc *m) |
187 | { | 181 | { |
188 | m->mpc_dstapic = mp_irq->mp_dstapic; | 182 | m->dstapic = mp_irq->dstapic; |
189 | m->mpc_type = mp_irq->mp_type; | 183 | m->type = mp_irq->type; |
190 | m->mpc_irqtype = mp_irq->mp_irqtype; | 184 | m->irqtype = mp_irq->irqtype; |
191 | m->mpc_irqflag = mp_irq->mp_irqflag; | 185 | m->irqflag = mp_irq->irqflag; |
192 | m->mpc_srcbus = mp_irq->mp_srcbus; | 186 | m->srcbus = mp_irq->srcbus; |
193 | m->mpc_srcbusirq = mp_irq->mp_srcbusirq; | 187 | m->srcbusirq = mp_irq->srcbusirq; |
194 | m->mpc_dstirq = mp_irq->mp_dstirq; | 188 | m->dstirq = mp_irq->dstirq; |
195 | } | 189 | } |
196 | 190 | ||
197 | static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, | 191 | static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq, |
198 | struct mpc_config_intsrc *m) | 192 | struct mpc_intsrc *m) |
199 | { | 193 | { |
200 | if (mp_irq->mp_dstapic != m->mpc_dstapic) | 194 | if (mp_irq->dstapic != m->dstapic) |
201 | return 1; | 195 | return 1; |
202 | if (mp_irq->mp_type != m->mpc_type) | 196 | if (mp_irq->type != m->type) |
203 | return 2; | 197 | return 2; |
204 | if (mp_irq->mp_irqtype != m->mpc_irqtype) | 198 | if (mp_irq->irqtype != m->irqtype) |
205 | return 3; | 199 | return 3; |
206 | if (mp_irq->mp_irqflag != m->mpc_irqflag) | 200 | if (mp_irq->irqflag != m->irqflag) |
207 | return 4; | 201 | return 4; |
208 | if (mp_irq->mp_srcbus != m->mpc_srcbus) | 202 | if (mp_irq->srcbus != m->srcbus) |
209 | return 5; | 203 | return 5; |
210 | if (mp_irq->mp_srcbusirq != m->mpc_srcbusirq) | 204 | if (mp_irq->srcbusirq != m->srcbusirq) |
211 | return 6; | 205 | return 6; |
212 | if (mp_irq->mp_dstirq != m->mpc_dstirq) | 206 | if (mp_irq->dstirq != m->dstirq) |
213 | return 7; | 207 | return 7; |
214 | 208 | ||
215 | return 0; | 209 | return 0; |
216 | } | 210 | } |
217 | 211 | ||
218 | static void __init MP_intsrc_info(struct mpc_config_intsrc *m) | 212 | static void __init MP_intsrc_info(struct mpc_intsrc *m) |
219 | { | 213 | { |
220 | int i; | 214 | int i; |
221 | 215 | ||
@@ -233,57 +227,55 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m) | |||
233 | 227 | ||
234 | #endif | 228 | #endif |
235 | 229 | ||
236 | static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) | 230 | static void __init MP_lintsrc_info(struct mpc_lintsrc *m) |
237 | { | 231 | { |
238 | apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x," | 232 | apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x," |
239 | " IRQ %02x, APIC ID %x, APIC LINT %02x\n", | 233 | " IRQ %02x, APIC ID %x, APIC LINT %02x\n", |
240 | m->mpc_irqtype, m->mpc_irqflag & 3, | 234 | m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbusid, |
241 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, | 235 | m->srcbusirq, m->destapic, m->destapiclint); |
242 | m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); | ||
243 | } | 236 | } |
244 | 237 | ||
245 | /* | 238 | /* |
246 | * Read/parse the MPC | 239 | * Read/parse the MPC |
247 | */ | 240 | */ |
248 | 241 | ||
249 | static int __init smp_check_mpc(struct mp_config_table *mpc, char *oem, | 242 | static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str) |
250 | char *str) | ||
251 | { | 243 | { |
252 | 244 | ||
253 | if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) { | 245 | if (memcmp(mpc->signature, MPC_SIGNATURE, 4)) { |
254 | printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", | 246 | printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", |
255 | mpc->mpc_signature[0], mpc->mpc_signature[1], | 247 | mpc->signature[0], mpc->signature[1], |
256 | mpc->mpc_signature[2], mpc->mpc_signature[3]); | 248 | mpc->signature[2], mpc->signature[3]); |
257 | return 0; | 249 | return 0; |
258 | } | 250 | } |
259 | if (mpf_checksum((unsigned char *)mpc, mpc->mpc_length)) { | 251 | if (mpf_checksum((unsigned char *)mpc, mpc->length)) { |
260 | printk(KERN_ERR "MPTABLE: checksum error!\n"); | 252 | printk(KERN_ERR "MPTABLE: checksum error!\n"); |
261 | return 0; | 253 | return 0; |
262 | } | 254 | } |
263 | if (mpc->mpc_spec != 0x01 && mpc->mpc_spec != 0x04) { | 255 | if (mpc->spec != 0x01 && mpc->spec != 0x04) { |
264 | printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n", | 256 | printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n", |
265 | mpc->mpc_spec); | 257 | mpc->spec); |
266 | return 0; | 258 | return 0; |
267 | } | 259 | } |
268 | if (!mpc->mpc_lapic) { | 260 | if (!mpc->lapic) { |
269 | printk(KERN_ERR "MPTABLE: null local APIC address!\n"); | 261 | printk(KERN_ERR "MPTABLE: null local APIC address!\n"); |
270 | return 0; | 262 | return 0; |
271 | } | 263 | } |
272 | memcpy(oem, mpc->mpc_oem, 8); | 264 | memcpy(oem, mpc->oem, 8); |
273 | oem[8] = 0; | 265 | oem[8] = 0; |
274 | printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem); | 266 | printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem); |
275 | 267 | ||
276 | memcpy(str, mpc->mpc_productid, 12); | 268 | memcpy(str, mpc->productid, 12); |
277 | str[12] = 0; | 269 | str[12] = 0; |
278 | 270 | ||
279 | printk(KERN_INFO "MPTABLE: Product ID: %s\n", str); | 271 | printk(KERN_INFO "MPTABLE: Product ID: %s\n", str); |
280 | 272 | ||
281 | printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic); | 273 | printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->lapic); |
282 | 274 | ||
283 | return 1; | 275 | return 1; |
284 | } | 276 | } |
285 | 277 | ||
286 | static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | 278 | static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) |
287 | { | 279 | { |
288 | char str[16]; | 280 | char str[16]; |
289 | char oem[10]; | 281 | char oem[10]; |
@@ -295,27 +287,18 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |||
295 | return 0; | 287 | return 0; |
296 | 288 | ||
297 | #ifdef CONFIG_X86_32 | 289 | #ifdef CONFIG_X86_32 |
298 | /* | 290 | generic_mps_oem_check(mpc, oem, str); |
299 | * need to make sure summit and es7000's mps_oem_check is safe to be | ||
300 | * called early via genericarch 's mps_oem_check | ||
301 | */ | ||
302 | if (early) { | ||
303 | #ifdef CONFIG_X86_NUMAQ | ||
304 | numaq_mps_oem_check(mpc, oem, str); | ||
305 | #endif | ||
306 | } else | ||
307 | mps_oem_check(mpc, oem, str); | ||
308 | #endif | 291 | #endif |
309 | /* save the local APIC address, it might be non-default */ | 292 | /* save the local APIC address, it might be non-default */ |
310 | if (!acpi_lapic) | 293 | if (!acpi_lapic) |
311 | mp_lapic_addr = mpc->mpc_lapic; | 294 | mp_lapic_addr = mpc->lapic; |
312 | 295 | ||
313 | if (early) | 296 | if (early) |
314 | return 1; | 297 | return 1; |
315 | 298 | ||
316 | if (mpc->mpc_oemptr && x86_quirks->smp_read_mpc_oem) { | 299 | if (mpc->oemptr && x86_quirks->smp_read_mpc_oem) { |
317 | struct mp_config_oemtable *oem_table = (struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr; | 300 | struct mpc_oemtable *oem_table = (void *)(long)mpc->oemptr; |
318 | x86_quirks->smp_read_mpc_oem(oem_table, mpc->mpc_oemsize); | 301 | x86_quirks->smp_read_mpc_oem(oem_table, mpc->oemsize); |
319 | } | 302 | } |
320 | 303 | ||
321 | /* | 304 | /* |
@@ -324,12 +307,11 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |||
324 | if (x86_quirks->mpc_record) | 307 | if (x86_quirks->mpc_record) |
325 | *x86_quirks->mpc_record = 0; | 308 | *x86_quirks->mpc_record = 0; |
326 | 309 | ||
327 | while (count < mpc->mpc_length) { | 310 | while (count < mpc->length) { |
328 | switch (*mpt) { | 311 | switch (*mpt) { |
329 | case MP_PROCESSOR: | 312 | case MP_PROCESSOR: |
330 | { | 313 | { |
331 | struct mpc_config_processor *m = | 314 | struct mpc_cpu *m = (struct mpc_cpu *)mpt; |
332 | (struct mpc_config_processor *)mpt; | ||
333 | /* ACPI may have already provided this data */ | 315 | /* ACPI may have already provided this data */ |
334 | if (!acpi_lapic) | 316 | if (!acpi_lapic) |
335 | MP_processor_info(m); | 317 | MP_processor_info(m); |
@@ -339,8 +321,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |||
339 | } | 321 | } |
340 | case MP_BUS: | 322 | case MP_BUS: |
341 | { | 323 | { |
342 | struct mpc_config_bus *m = | 324 | struct mpc_bus *m = (struct mpc_bus *)mpt; |
343 | (struct mpc_config_bus *)mpt; | ||
344 | #ifdef CONFIG_X86_IO_APIC | 325 | #ifdef CONFIG_X86_IO_APIC |
345 | MP_bus_info(m); | 326 | MP_bus_info(m); |
346 | #endif | 327 | #endif |
@@ -351,30 +332,28 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |||
351 | case MP_IOAPIC: | 332 | case MP_IOAPIC: |
352 | { | 333 | { |
353 | #ifdef CONFIG_X86_IO_APIC | 334 | #ifdef CONFIG_X86_IO_APIC |
354 | struct mpc_config_ioapic *m = | 335 | struct mpc_ioapic *m = (struct mpc_ioapic *)mpt; |
355 | (struct mpc_config_ioapic *)mpt; | ||
356 | MP_ioapic_info(m); | 336 | MP_ioapic_info(m); |
357 | #endif | 337 | #endif |
358 | mpt += sizeof(struct mpc_config_ioapic); | 338 | mpt += sizeof(struct mpc_ioapic); |
359 | count += sizeof(struct mpc_config_ioapic); | 339 | count += sizeof(struct mpc_ioapic); |
360 | break; | 340 | break; |
361 | } | 341 | } |
362 | case MP_INTSRC: | 342 | case MP_INTSRC: |
363 | { | 343 | { |
364 | #ifdef CONFIG_X86_IO_APIC | 344 | #ifdef CONFIG_X86_IO_APIC |
365 | struct mpc_config_intsrc *m = | 345 | struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; |
366 | (struct mpc_config_intsrc *)mpt; | ||
367 | 346 | ||
368 | MP_intsrc_info(m); | 347 | MP_intsrc_info(m); |
369 | #endif | 348 | #endif |
370 | mpt += sizeof(struct mpc_config_intsrc); | 349 | mpt += sizeof(struct mpc_intsrc); |
371 | count += sizeof(struct mpc_config_intsrc); | 350 | count += sizeof(struct mpc_intsrc); |
372 | break; | 351 | break; |
373 | } | 352 | } |
374 | case MP_LINTSRC: | 353 | case MP_LINTSRC: |
375 | { | 354 | { |
376 | struct mpc_config_lintsrc *m = | 355 | struct mpc_lintsrc *m = |
377 | (struct mpc_config_lintsrc *)mpt; | 356 | (struct mpc_lintsrc *)mpt; |
378 | MP_lintsrc_info(m); | 357 | MP_lintsrc_info(m); |
379 | mpt += sizeof(*m); | 358 | mpt += sizeof(*m); |
380 | count += sizeof(*m); | 359 | count += sizeof(*m); |
@@ -385,21 +364,21 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |||
385 | printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); | 364 | printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); |
386 | printk(KERN_ERR "type %x\n", *mpt); | 365 | printk(KERN_ERR "type %x\n", *mpt); |
387 | print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, | 366 | print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, |
388 | 1, mpc, mpc->mpc_length, 1); | 367 | 1, mpc, mpc->length, 1); |
389 | count = mpc->mpc_length; | 368 | count = mpc->length; |
390 | break; | 369 | break; |
391 | } | 370 | } |
392 | if (x86_quirks->mpc_record) | 371 | if (x86_quirks->mpc_record) |
393 | (*x86_quirks->mpc_record)++; | 372 | (*x86_quirks->mpc_record)++; |
394 | } | 373 | } |
395 | 374 | ||
396 | #ifdef CONFIG_X86_GENERICARCH | 375 | #ifdef CONFIG_X86_BIGSMP |
397 | generic_bigsmp_probe(); | 376 | generic_bigsmp_probe(); |
398 | #endif | 377 | #endif |
399 | 378 | ||
400 | #ifdef CONFIG_X86_32 | 379 | if (apic->setup_apic_routing) |
401 | setup_apic_routing(); | 380 | apic->setup_apic_routing(); |
402 | #endif | 381 | |
403 | if (!num_processors) | 382 | if (!num_processors) |
404 | printk(KERN_ERR "MPTABLE: no processors registered!\n"); | 383 | printk(KERN_ERR "MPTABLE: no processors registered!\n"); |
405 | return num_processors; | 384 | return num_processors; |
@@ -417,16 +396,16 @@ static int __init ELCR_trigger(unsigned int irq) | |||
417 | 396 | ||
418 | static void __init construct_default_ioirq_mptable(int mpc_default_type) | 397 | static void __init construct_default_ioirq_mptable(int mpc_default_type) |
419 | { | 398 | { |
420 | struct mpc_config_intsrc intsrc; | 399 | struct mpc_intsrc intsrc; |
421 | int i; | 400 | int i; |
422 | int ELCR_fallback = 0; | 401 | int ELCR_fallback = 0; |
423 | 402 | ||
424 | intsrc.mpc_type = MP_INTSRC; | 403 | intsrc.type = MP_INTSRC; |
425 | intsrc.mpc_irqflag = 0; /* conforming */ | 404 | intsrc.irqflag = 0; /* conforming */ |
426 | intsrc.mpc_srcbus = 0; | 405 | intsrc.srcbus = 0; |
427 | intsrc.mpc_dstapic = mp_ioapics[0].mp_apicid; | 406 | intsrc.dstapic = mp_ioapics[0].apicid; |
428 | 407 | ||
429 | intsrc.mpc_irqtype = mp_INT; | 408 | intsrc.irqtype = mp_INT; |
430 | 409 | ||
431 | /* | 410 | /* |
432 | * If true, we have an ISA/PCI system with no IRQ entries | 411 | * If true, we have an ISA/PCI system with no IRQ entries |
@@ -469,30 +448,30 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) | |||
469 | * irqflag field (level sensitive, active high polarity). | 448 | * irqflag field (level sensitive, active high polarity). |
470 | */ | 449 | */ |
471 | if (ELCR_trigger(i)) | 450 | if (ELCR_trigger(i)) |
472 | intsrc.mpc_irqflag = 13; | 451 | intsrc.irqflag = 13; |
473 | else | 452 | else |
474 | intsrc.mpc_irqflag = 0; | 453 | intsrc.irqflag = 0; |
475 | } | 454 | } |
476 | 455 | ||
477 | intsrc.mpc_srcbusirq = i; | 456 | intsrc.srcbusirq = i; |
478 | intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ | 457 | intsrc.dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ |
479 | MP_intsrc_info(&intsrc); | 458 | MP_intsrc_info(&intsrc); |
480 | } | 459 | } |
481 | 460 | ||
482 | intsrc.mpc_irqtype = mp_ExtINT; | 461 | intsrc.irqtype = mp_ExtINT; |
483 | intsrc.mpc_srcbusirq = 0; | 462 | intsrc.srcbusirq = 0; |
484 | intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ | 463 | intsrc.dstirq = 0; /* 8259A to INTIN0 */ |
485 | MP_intsrc_info(&intsrc); | 464 | MP_intsrc_info(&intsrc); |
486 | } | 465 | } |
487 | 466 | ||
488 | 467 | ||
489 | static void __init construct_ioapic_table(int mpc_default_type) | 468 | static void __init construct_ioapic_table(int mpc_default_type) |
490 | { | 469 | { |
491 | struct mpc_config_ioapic ioapic; | 470 | struct mpc_ioapic ioapic; |
492 | struct mpc_config_bus bus; | 471 | struct mpc_bus bus; |
493 | 472 | ||
494 | bus.mpc_type = MP_BUS; | 473 | bus.type = MP_BUS; |
495 | bus.mpc_busid = 0; | 474 | bus.busid = 0; |
496 | switch (mpc_default_type) { | 475 | switch (mpc_default_type) { |
497 | default: | 476 | default: |
498 | printk(KERN_ERR "???\nUnknown standard configuration %d\n", | 477 | printk(KERN_ERR "???\nUnknown standard configuration %d\n", |
@@ -500,29 +479,29 @@ static void __init construct_ioapic_table(int mpc_default_type) | |||
500 | /* fall through */ | 479 | /* fall through */ |
501 | case 1: | 480 | case 1: |
502 | case 5: | 481 | case 5: |
503 | memcpy(bus.mpc_bustype, "ISA ", 6); | 482 | memcpy(bus.bustype, "ISA ", 6); |
504 | break; | 483 | break; |
505 | case 2: | 484 | case 2: |
506 | case 6: | 485 | case 6: |
507 | case 3: | 486 | case 3: |
508 | memcpy(bus.mpc_bustype, "EISA ", 6); | 487 | memcpy(bus.bustype, "EISA ", 6); |
509 | break; | 488 | break; |
510 | case 4: | 489 | case 4: |
511 | case 7: | 490 | case 7: |
512 | memcpy(bus.mpc_bustype, "MCA ", 6); | 491 | memcpy(bus.bustype, "MCA ", 6); |
513 | } | 492 | } |
514 | MP_bus_info(&bus); | 493 | MP_bus_info(&bus); |
515 | if (mpc_default_type > 4) { | 494 | if (mpc_default_type > 4) { |
516 | bus.mpc_busid = 1; | 495 | bus.busid = 1; |
517 | memcpy(bus.mpc_bustype, "PCI ", 6); | 496 | memcpy(bus.bustype, "PCI ", 6); |
518 | MP_bus_info(&bus); | 497 | MP_bus_info(&bus); |
519 | } | 498 | } |
520 | 499 | ||
521 | ioapic.mpc_type = MP_IOAPIC; | 500 | ioapic.type = MP_IOAPIC; |
522 | ioapic.mpc_apicid = 2; | 501 | ioapic.apicid = 2; |
523 | ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; | 502 | ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01; |
524 | ioapic.mpc_flags = MPC_APIC_USABLE; | 503 | ioapic.flags = MPC_APIC_USABLE; |
525 | ioapic.mpc_apicaddr = 0xFEC00000; | 504 | ioapic.apicaddr = 0xFEC00000; |
526 | MP_ioapic_info(&ioapic); | 505 | MP_ioapic_info(&ioapic); |
527 | 506 | ||
528 | /* | 507 | /* |
@@ -536,8 +515,8 @@ static inline void __init construct_ioapic_table(int mpc_default_type) { } | |||
536 | 515 | ||
537 | static inline void __init construct_default_ISA_mptable(int mpc_default_type) | 516 | static inline void __init construct_default_ISA_mptable(int mpc_default_type) |
538 | { | 517 | { |
539 | struct mpc_config_processor processor; | 518 | struct mpc_cpu processor; |
540 | struct mpc_config_lintsrc lintsrc; | 519 | struct mpc_lintsrc lintsrc; |
541 | int linttypes[2] = { mp_ExtINT, mp_NMI }; | 520 | int linttypes[2] = { mp_ExtINT, mp_NMI }; |
542 | int i; | 521 | int i; |
543 | 522 | ||
@@ -549,65 +528,65 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) | |||
549 | /* | 528 | /* |
550 | * 2 CPUs, numbered 0 & 1. | 529 | * 2 CPUs, numbered 0 & 1. |
551 | */ | 530 | */ |
552 | processor.mpc_type = MP_PROCESSOR; | 531 | processor.type = MP_PROCESSOR; |
553 | /* Either an integrated APIC or a discrete 82489DX. */ | 532 | /* Either an integrated APIC or a discrete 82489DX. */ |
554 | processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; | 533 | processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01; |
555 | processor.mpc_cpuflag = CPU_ENABLED; | 534 | processor.cpuflag = CPU_ENABLED; |
556 | processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | | 535 | processor.cpufeature = (boot_cpu_data.x86 << 8) | |
557 | (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; | 536 | (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; |
558 | processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; | 537 | processor.featureflag = boot_cpu_data.x86_capability[0]; |
559 | processor.mpc_reserved[0] = 0; | 538 | processor.reserved[0] = 0; |
560 | processor.mpc_reserved[1] = 0; | 539 | processor.reserved[1] = 0; |
561 | for (i = 0; i < 2; i++) { | 540 | for (i = 0; i < 2; i++) { |
562 | processor.mpc_apicid = i; | 541 | processor.apicid = i; |
563 | MP_processor_info(&processor); | 542 | MP_processor_info(&processor); |
564 | } | 543 | } |
565 | 544 | ||
566 | construct_ioapic_table(mpc_default_type); | 545 | construct_ioapic_table(mpc_default_type); |
567 | 546 | ||
568 | lintsrc.mpc_type = MP_LINTSRC; | 547 | lintsrc.type = MP_LINTSRC; |
569 | lintsrc.mpc_irqflag = 0; /* conforming */ | 548 | lintsrc.irqflag = 0; /* conforming */ |
570 | lintsrc.mpc_srcbusid = 0; | 549 | lintsrc.srcbusid = 0; |
571 | lintsrc.mpc_srcbusirq = 0; | 550 | lintsrc.srcbusirq = 0; |
572 | lintsrc.mpc_destapic = MP_APIC_ALL; | 551 | lintsrc.destapic = MP_APIC_ALL; |
573 | for (i = 0; i < 2; i++) { | 552 | for (i = 0; i < 2; i++) { |
574 | lintsrc.mpc_irqtype = linttypes[i]; | 553 | lintsrc.irqtype = linttypes[i]; |
575 | lintsrc.mpc_destapiclint = i; | 554 | lintsrc.destapiclint = i; |
576 | MP_lintsrc_info(&lintsrc); | 555 | MP_lintsrc_info(&lintsrc); |
577 | } | 556 | } |
578 | } | 557 | } |
579 | 558 | ||
580 | static struct intel_mp_floating *mpf_found; | 559 | static struct mpf_intel *mpf_found; |
581 | 560 | ||
582 | /* | 561 | /* |
583 | * Scan the memory blocks for an SMP configuration block. | 562 | * Scan the memory blocks for an SMP configuration block. |
584 | */ | 563 | */ |
585 | static void __init __get_smp_config(unsigned int early) | 564 | static void __init __get_smp_config(unsigned int early) |
586 | { | 565 | { |
587 | struct intel_mp_floating *mpf = mpf_found; | 566 | struct mpf_intel *mpf = mpf_found; |
567 | |||
568 | if (!mpf) | ||
569 | return; | ||
588 | 570 | ||
589 | if (x86_quirks->mach_get_smp_config) { | ||
590 | if (x86_quirks->mach_get_smp_config(early)) | ||
591 | return; | ||
592 | } | ||
593 | if (acpi_lapic && early) | 571 | if (acpi_lapic && early) |
594 | return; | 572 | return; |
573 | |||
595 | /* | 574 | /* |
596 | * ACPI supports both logical (e.g. Hyper-Threading) and physical | 575 | * MPS doesn't support hyperthreading, aka only have |
597 | * processors, where MPS only supports physical. | 576 | * thread 0 apic id in MPS table |
598 | */ | 577 | */ |
599 | if (acpi_lapic && acpi_ioapic) { | 578 | if (acpi_lapic && acpi_ioapic) |
600 | printk(KERN_INFO "Using ACPI (MADT) for SMP configuration " | ||
601 | "information\n"); | ||
602 | return; | 579 | return; |
603 | } else if (acpi_lapic) | 580 | |
604 | printk(KERN_INFO "Using ACPI for processor (LAPIC) " | 581 | if (x86_quirks->mach_get_smp_config) { |
605 | "configuration information\n"); | 582 | if (x86_quirks->mach_get_smp_config(early)) |
583 | return; | ||
584 | } | ||
606 | 585 | ||
607 | printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", | 586 | printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", |
608 | mpf->mpf_specification); | 587 | mpf->specification); |
609 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) | 588 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) |
610 | if (mpf->mpf_feature2 & (1 << 7)) { | 589 | if (mpf->feature2 & (1 << 7)) { |
611 | printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); | 590 | printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); |
612 | pic_mode = 1; | 591 | pic_mode = 1; |
613 | } else { | 592 | } else { |
@@ -618,7 +597,7 @@ static void __init __get_smp_config(unsigned int early) | |||
618 | /* | 597 | /* |
619 | * Now see if we need to read further. | 598 | * Now see if we need to read further. |
620 | */ | 599 | */ |
621 | if (mpf->mpf_feature1 != 0) { | 600 | if (mpf->feature1 != 0) { |
622 | if (early) { | 601 | if (early) { |
623 | /* | 602 | /* |
624 | * local APIC has default address | 603 | * local APIC has default address |
@@ -628,16 +607,16 @@ static void __init __get_smp_config(unsigned int early) | |||
628 | } | 607 | } |
629 | 608 | ||
630 | printk(KERN_INFO "Default MP configuration #%d\n", | 609 | printk(KERN_INFO "Default MP configuration #%d\n", |
631 | mpf->mpf_feature1); | 610 | mpf->feature1); |
632 | construct_default_ISA_mptable(mpf->mpf_feature1); | 611 | construct_default_ISA_mptable(mpf->feature1); |
633 | 612 | ||
634 | } else if (mpf->mpf_physptr) { | 613 | } else if (mpf->physptr) { |
635 | 614 | ||
636 | /* | 615 | /* |
637 | * Read the physical hardware table. Anything here will | 616 | * Read the physical hardware table. Anything here will |
638 | * override the defaults. | 617 | * override the defaults. |
639 | */ | 618 | */ |
640 | if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { | 619 | if (!smp_read_mpc(phys_to_virt(mpf->physptr), early)) { |
641 | #ifdef CONFIG_X86_LOCAL_APIC | 620 | #ifdef CONFIG_X86_LOCAL_APIC |
642 | smp_found_config = 0; | 621 | smp_found_config = 0; |
643 | #endif | 622 | #endif |
@@ -657,15 +636,15 @@ static void __init __get_smp_config(unsigned int early) | |||
657 | * ISA defaults and hope it will work. | 636 | * ISA defaults and hope it will work. |
658 | */ | 637 | */ |
659 | if (!mp_irq_entries) { | 638 | if (!mp_irq_entries) { |
660 | struct mpc_config_bus bus; | 639 | struct mpc_bus bus; |
661 | 640 | ||
662 | printk(KERN_ERR "BIOS bug, no explicit IRQ entries, " | 641 | printk(KERN_ERR "BIOS bug, no explicit IRQ entries, " |
663 | "using default mptable. " | 642 | "using default mptable. " |
664 | "(tell your hw vendor)\n"); | 643 | "(tell your hw vendor)\n"); |
665 | 644 | ||
666 | bus.mpc_type = MP_BUS; | 645 | bus.type = MP_BUS; |
667 | bus.mpc_busid = 0; | 646 | bus.busid = 0; |
668 | memcpy(bus.mpc_bustype, "ISA ", 6); | 647 | memcpy(bus.bustype, "ISA ", 6); |
669 | MP_bus_info(&bus); | 648 | MP_bus_info(&bus); |
670 | 649 | ||
671 | construct_default_ioirq_mptable(0); | 650 | construct_default_ioirq_mptable(0); |
@@ -695,32 +674,32 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
695 | unsigned reserve) | 674 | unsigned reserve) |
696 | { | 675 | { |
697 | unsigned int *bp = phys_to_virt(base); | 676 | unsigned int *bp = phys_to_virt(base); |
698 | struct intel_mp_floating *mpf; | 677 | struct mpf_intel *mpf; |
699 | 678 | ||
700 | apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", | 679 | apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", |
701 | bp, length); | 680 | bp, length); |
702 | BUILD_BUG_ON(sizeof(*mpf) != 16); | 681 | BUILD_BUG_ON(sizeof(*mpf) != 16); |
703 | 682 | ||
704 | while (length > 0) { | 683 | while (length > 0) { |
705 | mpf = (struct intel_mp_floating *)bp; | 684 | mpf = (struct mpf_intel *)bp; |
706 | if ((*bp == SMP_MAGIC_IDENT) && | 685 | if ((*bp == SMP_MAGIC_IDENT) && |
707 | (mpf->mpf_length == 1) && | 686 | (mpf->length == 1) && |
708 | !mpf_checksum((unsigned char *)bp, 16) && | 687 | !mpf_checksum((unsigned char *)bp, 16) && |
709 | ((mpf->mpf_specification == 1) | 688 | ((mpf->specification == 1) |
710 | || (mpf->mpf_specification == 4))) { | 689 | || (mpf->specification == 4))) { |
711 | #ifdef CONFIG_X86_LOCAL_APIC | 690 | #ifdef CONFIG_X86_LOCAL_APIC |
712 | smp_found_config = 1; | 691 | smp_found_config = 1; |
713 | #endif | 692 | #endif |
714 | mpf_found = mpf; | 693 | mpf_found = mpf; |
715 | 694 | ||
716 | printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", | 695 | printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", |
717 | mpf, virt_to_phys(mpf)); | 696 | mpf, (u64)virt_to_phys(mpf)); |
718 | 697 | ||
719 | if (!reserve) | 698 | if (!reserve) |
720 | return 1; | 699 | return 1; |
721 | reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, | 700 | reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, |
722 | BOOTMEM_DEFAULT); | 701 | BOOTMEM_DEFAULT); |
723 | if (mpf->mpf_physptr) { | 702 | if (mpf->physptr) { |
724 | unsigned long size = PAGE_SIZE; | 703 | unsigned long size = PAGE_SIZE; |
725 | #ifdef CONFIG_X86_32 | 704 | #ifdef CONFIG_X86_32 |
726 | /* | 705 | /* |
@@ -729,14 +708,14 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
729 | * the bottom is mapped now. | 708 | * the bottom is mapped now. |
730 | * PC-9800's MPC table places on the very last | 709 | * PC-9800's MPC table places on the very last |
731 | * of physical memory; so that simply reserving | 710 | * of physical memory; so that simply reserving |
732 | * PAGE_SIZE from mpg->mpf_physptr yields BUG() | 711 | * PAGE_SIZE from mpf->physptr yields BUG() |
733 | * in reserve_bootmem. | 712 | * in reserve_bootmem. |
734 | */ | 713 | */ |
735 | unsigned long end = max_low_pfn * PAGE_SIZE; | 714 | unsigned long end = max_low_pfn * PAGE_SIZE; |
736 | if (mpf->mpf_physptr + size > end) | 715 | if (mpf->physptr + size > end) |
737 | size = end - mpf->mpf_physptr; | 716 | size = end - mpf->physptr; |
738 | #endif | 717 | #endif |
739 | reserve_bootmem_generic(mpf->mpf_physptr, size, | 718 | reserve_bootmem_generic(mpf->physptr, size, |
740 | BOOTMEM_DEFAULT); | 719 | BOOTMEM_DEFAULT); |
741 | } | 720 | } |
742 | 721 | ||
@@ -803,28 +782,28 @@ void __init find_smp_config(void) | |||
803 | #ifdef CONFIG_X86_IO_APIC | 782 | #ifdef CONFIG_X86_IO_APIC |
804 | static u8 __initdata irq_used[MAX_IRQ_SOURCES]; | 783 | static u8 __initdata irq_used[MAX_IRQ_SOURCES]; |
805 | 784 | ||
806 | static int __init get_MP_intsrc_index(struct mpc_config_intsrc *m) | 785 | static int __init get_MP_intsrc_index(struct mpc_intsrc *m) |
807 | { | 786 | { |
808 | int i; | 787 | int i; |
809 | 788 | ||
810 | if (m->mpc_irqtype != mp_INT) | 789 | if (m->irqtype != mp_INT) |
811 | return 0; | 790 | return 0; |
812 | 791 | ||
813 | if (m->mpc_irqflag != 0x0f) | 792 | if (m->irqflag != 0x0f) |
814 | return 0; | 793 | return 0; |
815 | 794 | ||
816 | /* not legacy */ | 795 | /* not legacy */ |
817 | 796 | ||
818 | for (i = 0; i < mp_irq_entries; i++) { | 797 | for (i = 0; i < mp_irq_entries; i++) { |
819 | if (mp_irqs[i].mp_irqtype != mp_INT) | 798 | if (mp_irqs[i].irqtype != mp_INT) |
820 | continue; | 799 | continue; |
821 | 800 | ||
822 | if (mp_irqs[i].mp_irqflag != 0x0f) | 801 | if (mp_irqs[i].irqflag != 0x0f) |
823 | continue; | 802 | continue; |
824 | 803 | ||
825 | if (mp_irqs[i].mp_srcbus != m->mpc_srcbus) | 804 | if (mp_irqs[i].srcbus != m->srcbus) |
826 | continue; | 805 | continue; |
827 | if (mp_irqs[i].mp_srcbusirq != m->mpc_srcbusirq) | 806 | if (mp_irqs[i].srcbusirq != m->srcbusirq) |
828 | continue; | 807 | continue; |
829 | if (irq_used[i]) { | 808 | if (irq_used[i]) { |
830 | /* already claimed */ | 809 | /* already claimed */ |
@@ -840,10 +819,10 @@ static int __init get_MP_intsrc_index(struct mpc_config_intsrc *m) | |||
840 | 819 | ||
841 | #define SPARE_SLOT_NUM 20 | 820 | #define SPARE_SLOT_NUM 20 |
842 | 821 | ||
843 | static struct mpc_config_intsrc __initdata *m_spare[SPARE_SLOT_NUM]; | 822 | static struct mpc_intsrc __initdata *m_spare[SPARE_SLOT_NUM]; |
844 | #endif | 823 | #endif |
845 | 824 | ||
846 | static int __init replace_intsrc_all(struct mp_config_table *mpc, | 825 | static int __init replace_intsrc_all(struct mpc_table *mpc, |
847 | unsigned long mpc_new_phys, | 826 | unsigned long mpc_new_phys, |
848 | unsigned long mpc_new_length) | 827 | unsigned long mpc_new_length) |
849 | { | 828 | { |
@@ -855,36 +834,33 @@ static int __init replace_intsrc_all(struct mp_config_table *mpc, | |||
855 | int count = sizeof(*mpc); | 834 | int count = sizeof(*mpc); |
856 | unsigned char *mpt = ((unsigned char *)mpc) + count; | 835 | unsigned char *mpt = ((unsigned char *)mpc) + count; |
857 | 836 | ||
858 | printk(KERN_INFO "mpc_length %x\n", mpc->mpc_length); | 837 | printk(KERN_INFO "mpc_length %x\n", mpc->length); |
859 | while (count < mpc->mpc_length) { | 838 | while (count < mpc->length) { |
860 | switch (*mpt) { | 839 | switch (*mpt) { |
861 | case MP_PROCESSOR: | 840 | case MP_PROCESSOR: |
862 | { | 841 | { |
863 | struct mpc_config_processor *m = | 842 | struct mpc_cpu *m = (struct mpc_cpu *)mpt; |
864 | (struct mpc_config_processor *)mpt; | ||
865 | mpt += sizeof(*m); | 843 | mpt += sizeof(*m); |
866 | count += sizeof(*m); | 844 | count += sizeof(*m); |
867 | break; | 845 | break; |
868 | } | 846 | } |
869 | case MP_BUS: | 847 | case MP_BUS: |
870 | { | 848 | { |
871 | struct mpc_config_bus *m = | 849 | struct mpc_bus *m = (struct mpc_bus *)mpt; |
872 | (struct mpc_config_bus *)mpt; | ||
873 | mpt += sizeof(*m); | 850 | mpt += sizeof(*m); |
874 | count += sizeof(*m); | 851 | count += sizeof(*m); |
875 | break; | 852 | break; |
876 | } | 853 | } |
877 | case MP_IOAPIC: | 854 | case MP_IOAPIC: |
878 | { | 855 | { |
879 | mpt += sizeof(struct mpc_config_ioapic); | 856 | mpt += sizeof(struct mpc_ioapic); |
880 | count += sizeof(struct mpc_config_ioapic); | 857 | count += sizeof(struct mpc_ioapic); |
881 | break; | 858 | break; |
882 | } | 859 | } |
883 | case MP_INTSRC: | 860 | case MP_INTSRC: |
884 | { | 861 | { |
885 | #ifdef CONFIG_X86_IO_APIC | 862 | #ifdef CONFIG_X86_IO_APIC |
886 | struct mpc_config_intsrc *m = | 863 | struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; |
887 | (struct mpc_config_intsrc *)mpt; | ||
888 | 864 | ||
889 | printk(KERN_INFO "OLD "); | 865 | printk(KERN_INFO "OLD "); |
890 | print_MP_intsrc_info(m); | 866 | print_MP_intsrc_info(m); |
@@ -905,14 +881,14 @@ static int __init replace_intsrc_all(struct mp_config_table *mpc, | |||
905 | nr_m_spare++; | 881 | nr_m_spare++; |
906 | } | 882 | } |
907 | #endif | 883 | #endif |
908 | mpt += sizeof(struct mpc_config_intsrc); | 884 | mpt += sizeof(struct mpc_intsrc); |
909 | count += sizeof(struct mpc_config_intsrc); | 885 | count += sizeof(struct mpc_intsrc); |
910 | break; | 886 | break; |
911 | } | 887 | } |
912 | case MP_LINTSRC: | 888 | case MP_LINTSRC: |
913 | { | 889 | { |
914 | struct mpc_config_lintsrc *m = | 890 | struct mpc_lintsrc *m = |
915 | (struct mpc_config_lintsrc *)mpt; | 891 | (struct mpc_lintsrc *)mpt; |
916 | mpt += sizeof(*m); | 892 | mpt += sizeof(*m); |
917 | count += sizeof(*m); | 893 | count += sizeof(*m); |
918 | break; | 894 | break; |
@@ -922,7 +898,7 @@ static int __init replace_intsrc_all(struct mp_config_table *mpc, | |||
922 | printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); | 898 | printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); |
923 | printk(KERN_ERR "type %x\n", *mpt); | 899 | printk(KERN_ERR "type %x\n", *mpt); |
924 | print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, | 900 | print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, |
925 | 1, mpc, mpc->mpc_length, 1); | 901 | 1, mpc, mpc->length, 1); |
926 | goto out; | 902 | goto out; |
927 | } | 903 | } |
928 | } | 904 | } |
@@ -932,10 +908,10 @@ static int __init replace_intsrc_all(struct mp_config_table *mpc, | |||
932 | if (irq_used[i]) | 908 | if (irq_used[i]) |
933 | continue; | 909 | continue; |
934 | 910 | ||
935 | if (mp_irqs[i].mp_irqtype != mp_INT) | 911 | if (mp_irqs[i].irqtype != mp_INT) |
936 | continue; | 912 | continue; |
937 | 913 | ||
938 | if (mp_irqs[i].mp_irqflag != 0x0f) | 914 | if (mp_irqs[i].irqflag != 0x0f) |
939 | continue; | 915 | continue; |
940 | 916 | ||
941 | if (nr_m_spare > 0) { | 917 | if (nr_m_spare > 0) { |
@@ -944,9 +920,8 @@ static int __init replace_intsrc_all(struct mp_config_table *mpc, | |||
944 | assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]); | 920 | assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]); |
945 | m_spare[nr_m_spare] = NULL; | 921 | m_spare[nr_m_spare] = NULL; |
946 | } else { | 922 | } else { |
947 | struct mpc_config_intsrc *m = | 923 | struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; |
948 | (struct mpc_config_intsrc *)mpt; | 924 | count += sizeof(struct mpc_intsrc); |
949 | count += sizeof(struct mpc_config_intsrc); | ||
950 | if (!mpc_new_phys) { | 925 | if (!mpc_new_phys) { |
951 | printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count); | 926 | printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count); |
952 | } else { | 927 | } else { |
@@ -958,17 +933,16 @@ static int __init replace_intsrc_all(struct mp_config_table *mpc, | |||
958 | } | 933 | } |
959 | } | 934 | } |
960 | assign_to_mpc_intsrc(&mp_irqs[i], m); | 935 | assign_to_mpc_intsrc(&mp_irqs[i], m); |
961 | mpc->mpc_length = count; | 936 | mpc->length = count; |
962 | mpt += sizeof(struct mpc_config_intsrc); | 937 | mpt += sizeof(struct mpc_intsrc); |
963 | } | 938 | } |
964 | print_mp_irq_info(&mp_irqs[i]); | 939 | print_mp_irq_info(&mp_irqs[i]); |
965 | } | 940 | } |
966 | #endif | 941 | #endif |
967 | out: | 942 | out: |
968 | /* update checksum */ | 943 | /* update checksum */ |
969 | mpc->mpc_checksum = 0; | 944 | mpc->checksum = 0; |
970 | mpc->mpc_checksum -= mpf_checksum((unsigned char *)mpc, | 945 | mpc->checksum -= mpf_checksum((unsigned char *)mpc, mpc->length); |
971 | mpc->mpc_length); | ||
972 | 946 | ||
973 | return 0; | 947 | return 0; |
974 | } | 948 | } |
@@ -1013,9 +987,8 @@ static int __init update_mp_table(void) | |||
1013 | { | 987 | { |
1014 | char str[16]; | 988 | char str[16]; |
1015 | char oem[10]; | 989 | char oem[10]; |
1016 | struct intel_mp_floating *mpf; | 990 | struct mpf_intel *mpf; |
1017 | struct mp_config_table *mpc; | 991 | struct mpc_table *mpc, *mpc_new; |
1018 | struct mp_config_table *mpc_new; | ||
1019 | 992 | ||
1020 | if (!enable_update_mptable) | 993 | if (!enable_update_mptable) |
1021 | return 0; | 994 | return 0; |
@@ -1027,21 +1000,21 @@ static int __init update_mp_table(void) | |||
1027 | /* | 1000 | /* |
1028 | * Now see if we need to go further. | 1001 | * Now see if we need to go further. |
1029 | */ | 1002 | */ |
1030 | if (mpf->mpf_feature1 != 0) | 1003 | if (mpf->feature1 != 0) |
1031 | return 0; | 1004 | return 0; |
1032 | 1005 | ||
1033 | if (!mpf->mpf_physptr) | 1006 | if (!mpf->physptr) |
1034 | return 0; | 1007 | return 0; |
1035 | 1008 | ||
1036 | mpc = phys_to_virt(mpf->mpf_physptr); | 1009 | mpc = phys_to_virt(mpf->physptr); |
1037 | 1010 | ||
1038 | if (!smp_check_mpc(mpc, oem, str)) | 1011 | if (!smp_check_mpc(mpc, oem, str)) |
1039 | return 0; | 1012 | return 0; |
1040 | 1013 | ||
1041 | printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); | 1014 | printk(KERN_INFO "mpf: %llx\n", (u64)virt_to_phys(mpf)); |
1042 | printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); | 1015 | printk(KERN_INFO "physptr: %x\n", mpf->physptr); |
1043 | 1016 | ||
1044 | if (mpc_new_phys && mpc->mpc_length > mpc_new_length) { | 1017 | if (mpc_new_phys && mpc->length > mpc_new_length) { |
1045 | mpc_new_phys = 0; | 1018 | mpc_new_phys = 0; |
1046 | printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n", | 1019 | printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n", |
1047 | mpc_new_length); | 1020 | mpc_new_length); |
@@ -1050,33 +1023,33 @@ static int __init update_mp_table(void) | |||
1050 | if (!mpc_new_phys) { | 1023 | if (!mpc_new_phys) { |
1051 | unsigned char old, new; | 1024 | unsigned char old, new; |
1052 | /* check if we can change the postion */ | 1025 | /* check if we can change the postion */ |
1053 | mpc->mpc_checksum = 0; | 1026 | mpc->checksum = 0; |
1054 | old = mpf_checksum((unsigned char *)mpc, mpc->mpc_length); | 1027 | old = mpf_checksum((unsigned char *)mpc, mpc->length); |
1055 | mpc->mpc_checksum = 0xff; | 1028 | mpc->checksum = 0xff; |
1056 | new = mpf_checksum((unsigned char *)mpc, mpc->mpc_length); | 1029 | new = mpf_checksum((unsigned char *)mpc, mpc->length); |
1057 | if (old == new) { | 1030 | if (old == new) { |
1058 | printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n"); | 1031 | printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n"); |
1059 | return 0; | 1032 | return 0; |
1060 | } | 1033 | } |
1061 | printk(KERN_INFO "use in-positon replacing\n"); | 1034 | printk(KERN_INFO "use in-positon replacing\n"); |
1062 | } else { | 1035 | } else { |
1063 | mpf->mpf_physptr = mpc_new_phys; | 1036 | mpf->physptr = mpc_new_phys; |
1064 | mpc_new = phys_to_virt(mpc_new_phys); | 1037 | mpc_new = phys_to_virt(mpc_new_phys); |
1065 | memcpy(mpc_new, mpc, mpc->mpc_length); | 1038 | memcpy(mpc_new, mpc, mpc->length); |
1066 | mpc = mpc_new; | 1039 | mpc = mpc_new; |
1067 | /* check if we can modify that */ | 1040 | /* check if we can modify that */ |
1068 | if (mpc_new_phys - mpf->mpf_physptr) { | 1041 | if (mpc_new_phys - mpf->physptr) { |
1069 | struct intel_mp_floating *mpf_new; | 1042 | struct mpf_intel *mpf_new; |
1070 | /* steal 16 bytes from [0, 1k) */ | 1043 | /* steal 16 bytes from [0, 1k) */ |
1071 | printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); | 1044 | printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); |
1072 | mpf_new = phys_to_virt(0x400 - 16); | 1045 | mpf_new = phys_to_virt(0x400 - 16); |
1073 | memcpy(mpf_new, mpf, 16); | 1046 | memcpy(mpf_new, mpf, 16); |
1074 | mpf = mpf_new; | 1047 | mpf = mpf_new; |
1075 | mpf->mpf_physptr = mpc_new_phys; | 1048 | mpf->physptr = mpc_new_phys; |
1076 | } | 1049 | } |
1077 | mpf->mpf_checksum = 0; | 1050 | mpf->checksum = 0; |
1078 | mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16); | 1051 | mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16); |
1079 | printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr); | 1052 | printk(KERN_INFO "physptr new: %x\n", mpf->physptr); |
1080 | } | 1053 | } |
1081 | 1054 | ||
1082 | /* | 1055 | /* |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 82a7c7ed6d45..3cf3413ec626 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -35,10 +35,10 @@ | |||
35 | #include <linux/device.h> | 35 | #include <linux/device.h> |
36 | #include <linux/cpu.h> | 36 | #include <linux/cpu.h> |
37 | #include <linux/notifier.h> | 37 | #include <linux/notifier.h> |
38 | #include <linux/uaccess.h> | ||
38 | 39 | ||
39 | #include <asm/processor.h> | 40 | #include <asm/processor.h> |
40 | #include <asm/msr.h> | 41 | #include <asm/msr.h> |
41 | #include <asm/uaccess.h> | ||
42 | #include <asm/system.h> | 42 | #include <asm/system.h> |
43 | 43 | ||
44 | static struct class *msr_class; | 44 | static struct class *msr_class; |
@@ -136,7 +136,7 @@ static int msr_open(struct inode *inode, struct file *file) | |||
136 | lock_kernel(); | 136 | lock_kernel(); |
137 | cpu = iminor(file->f_path.dentry->d_inode); | 137 | cpu = iminor(file->f_path.dentry->d_inode); |
138 | 138 | ||
139 | if (cpu >= NR_CPUS || !cpu_online(cpu)) { | 139 | if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { |
140 | ret = -ENXIO; /* No such CPU */ | 140 | ret = -ENXIO; /* No such CPU */ |
141 | goto out; | 141 | goto out; |
142 | } | 142 | } |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 2c97f07f1c2c..bdfad80c3cf1 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -29,14 +29,12 @@ | |||
29 | 29 | ||
30 | #include <asm/i8259.h> | 30 | #include <asm/i8259.h> |
31 | #include <asm/io_apic.h> | 31 | #include <asm/io_apic.h> |
32 | #include <asm/smp.h> | ||
33 | #include <asm/nmi.h> | ||
34 | #include <asm/proto.h> | 32 | #include <asm/proto.h> |
35 | #include <asm/timer.h> | 33 | #include <asm/timer.h> |
36 | 34 | ||
37 | #include <asm/mce.h> | 35 | #include <asm/mce.h> |
38 | 36 | ||
39 | #include <mach_traps.h> | 37 | #include <asm/mach_traps.h> |
40 | 38 | ||
41 | int unknown_nmi_panic; | 39 | int unknown_nmi_panic; |
42 | int nmi_watchdog_enabled; | 40 | int nmi_watchdog_enabled; |
@@ -63,11 +61,7 @@ static int endflag __initdata; | |||
63 | 61 | ||
64 | static inline unsigned int get_nmi_count(int cpu) | 62 | static inline unsigned int get_nmi_count(int cpu) |
65 | { | 63 | { |
66 | #ifdef CONFIG_X86_64 | 64 | return per_cpu(irq_stat, cpu).__nmi_count; |
67 | return cpu_pda(cpu)->__nmi_count; | ||
68 | #else | ||
69 | return nmi_count(cpu); | ||
70 | #endif | ||
71 | } | 65 | } |
72 | 66 | ||
73 | static inline int mce_in_progress(void) | 67 | static inline int mce_in_progress(void) |
@@ -84,12 +78,8 @@ static inline int mce_in_progress(void) | |||
84 | */ | 78 | */ |
85 | static inline unsigned int get_timer_irqs(int cpu) | 79 | static inline unsigned int get_timer_irqs(int cpu) |
86 | { | 80 | { |
87 | #ifdef CONFIG_X86_64 | ||
88 | return read_pda(apic_timer_irqs) + read_pda(irq0_irqs); | ||
89 | #else | ||
90 | return per_cpu(irq_stat, cpu).apic_timer_irqs + | 81 | return per_cpu(irq_stat, cpu).apic_timer_irqs + |
91 | per_cpu(irq_stat, cpu).irq0_irqs; | 82 | per_cpu(irq_stat, cpu).irq0_irqs; |
92 | #endif | ||
93 | } | 83 | } |
94 | 84 | ||
95 | #ifdef CONFIG_SMP | 85 | #ifdef CONFIG_SMP |
@@ -131,6 +121,11 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count) | |||
131 | atomic_dec(&nmi_active); | 121 | atomic_dec(&nmi_active); |
132 | } | 122 | } |
133 | 123 | ||
124 | static void __acpi_nmi_disable(void *__unused) | ||
125 | { | ||
126 | apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | ||
127 | } | ||
128 | |||
134 | int __init check_nmi_watchdog(void) | 129 | int __init check_nmi_watchdog(void) |
135 | { | 130 | { |
136 | unsigned int *prev_nmi_count; | 131 | unsigned int *prev_nmi_count; |
@@ -179,8 +174,12 @@ int __init check_nmi_watchdog(void) | |||
179 | kfree(prev_nmi_count); | 174 | kfree(prev_nmi_count); |
180 | return 0; | 175 | return 0; |
181 | error: | 176 | error: |
182 | if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259) | 177 | if (nmi_watchdog == NMI_IO_APIC) { |
183 | disable_8259A_irq(0); | 178 | if (!timer_through_8259) |
179 | disable_8259A_irq(0); | ||
180 | on_each_cpu(__acpi_nmi_disable, NULL, 1); | ||
181 | } | ||
182 | |||
184 | #ifdef CONFIG_X86_32 | 183 | #ifdef CONFIG_X86_32 |
185 | timer_ack = 0; | 184 | timer_ack = 0; |
186 | #endif | 185 | #endif |
@@ -199,12 +198,17 @@ static int __init setup_nmi_watchdog(char *str) | |||
199 | ++str; | 198 | ++str; |
200 | } | 199 | } |
201 | 200 | ||
202 | get_option(&str, &nmi); | 201 | if (!strncmp(str, "lapic", 5)) |
203 | 202 | nmi_watchdog = NMI_LOCAL_APIC; | |
204 | if (nmi >= NMI_INVALID) | 203 | else if (!strncmp(str, "ioapic", 6)) |
205 | return 0; | 204 | nmi_watchdog = NMI_IO_APIC; |
205 | else { | ||
206 | get_option(&str, &nmi); | ||
207 | if (nmi >= NMI_INVALID) | ||
208 | return 0; | ||
209 | nmi_watchdog = nmi; | ||
210 | } | ||
206 | 211 | ||
207 | nmi_watchdog = nmi; | ||
208 | return 1; | 212 | return 1; |
209 | } | 213 | } |
210 | __setup("nmi_watchdog=", setup_nmi_watchdog); | 214 | __setup("nmi_watchdog=", setup_nmi_watchdog); |
@@ -285,11 +289,6 @@ void acpi_nmi_enable(void) | |||
285 | on_each_cpu(__acpi_nmi_enable, NULL, 1); | 289 | on_each_cpu(__acpi_nmi_enable, NULL, 1); |
286 | } | 290 | } |
287 | 291 | ||
288 | static void __acpi_nmi_disable(void *__unused) | ||
289 | { | ||
290 | apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | ||
291 | } | ||
292 | |||
293 | /* | 292 | /* |
294 | * Disable timer based NMIs on all CPUs: | 293 | * Disable timer based NMIs on all CPUs: |
295 | */ | 294 | */ |
@@ -340,6 +339,8 @@ void stop_apic_nmi_watchdog(void *unused) | |||
340 | return; | 339 | return; |
341 | if (nmi_watchdog == NMI_LOCAL_APIC) | 340 | if (nmi_watchdog == NMI_LOCAL_APIC) |
342 | lapic_watchdog_stop(); | 341 | lapic_watchdog_stop(); |
342 | else | ||
343 | __acpi_nmi_disable(NULL); | ||
343 | __get_cpu_var(wd_enabled) = 0; | 344 | __get_cpu_var(wd_enabled) = 0; |
344 | atomic_dec(&nmi_active); | 345 | atomic_dec(&nmi_active); |
345 | } | 346 | } |
@@ -465,6 +466,24 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
465 | 466 | ||
466 | #ifdef CONFIG_SYSCTL | 467 | #ifdef CONFIG_SYSCTL |
467 | 468 | ||
469 | static void enable_ioapic_nmi_watchdog_single(void *unused) | ||
470 | { | ||
471 | __get_cpu_var(wd_enabled) = 1; | ||
472 | atomic_inc(&nmi_active); | ||
473 | __acpi_nmi_enable(NULL); | ||
474 | } | ||
475 | |||
476 | static void enable_ioapic_nmi_watchdog(void) | ||
477 | { | ||
478 | on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1); | ||
479 | touch_nmi_watchdog(); | ||
480 | } | ||
481 | |||
482 | static void disable_ioapic_nmi_watchdog(void) | ||
483 | { | ||
484 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); | ||
485 | } | ||
486 | |||
468 | static int __init setup_unknown_nmi_panic(char *str) | 487 | static int __init setup_unknown_nmi_panic(char *str) |
469 | { | 488 | { |
470 | unknown_nmi_panic = 1; | 489 | unknown_nmi_panic = 1; |
@@ -507,6 +526,11 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, | |||
507 | enable_lapic_nmi_watchdog(); | 526 | enable_lapic_nmi_watchdog(); |
508 | else | 527 | else |
509 | disable_lapic_nmi_watchdog(); | 528 | disable_lapic_nmi_watchdog(); |
529 | } else if (nmi_watchdog == NMI_IO_APIC) { | ||
530 | if (nmi_watchdog_enabled) | ||
531 | enable_ioapic_nmi_watchdog(); | ||
532 | else | ||
533 | disable_ioapic_nmi_watchdog(); | ||
510 | } else { | 534 | } else { |
511 | printk(KERN_WARNING | 535 | printk(KERN_WARNING |
512 | "NMI watchdog doesn't know what hardware to touch\n"); | 536 | "NMI watchdog doesn't know what hardware to touch\n"); |
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 4caff39078e0..0cc41a1d2550 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * Copyright (C) 2002, IBM Corp. | 4 | * Copyright (C) 2002, IBM Corp. |
5 | * | 5 | * |
6 | * All rights reserved. | 6 | * All rights reserved. |
7 | * | 7 | * |
8 | * This program is free software; you can redistribute it and/or modify | 8 | * This program is free software; you can redistribute it and/or modify |
9 | * it under the terms of the GNU General Public License as published by | 9 | * it under the terms of the GNU General Public License as published by |
@@ -23,17 +23,18 @@ | |||
23 | * Send feedback to <gone@us.ibm.com> | 23 | * Send feedback to <gone@us.ibm.com> |
24 | */ | 24 | */ |
25 | 25 | ||
26 | #include <linux/mm.h> | 26 | #include <linux/nodemask.h> |
27 | #include <linux/bootmem.h> | 27 | #include <linux/bootmem.h> |
28 | #include <linux/mmzone.h> | 28 | #include <linux/mmzone.h> |
29 | #include <linux/module.h> | 29 | #include <linux/module.h> |
30 | #include <linux/nodemask.h> | 30 | #include <linux/mm.h> |
31 | #include <asm/numaq.h> | 31 | |
32 | #include <asm/topology.h> | ||
33 | #include <asm/processor.h> | 32 | #include <asm/processor.h> |
34 | #include <asm/mpspec.h> | 33 | #include <asm/topology.h> |
35 | #include <asm/e820.h> | 34 | #include <asm/genapic.h> |
35 | #include <asm/numaq.h> | ||
36 | #include <asm/setup.h> | 36 | #include <asm/setup.h> |
37 | #include <asm/e820.h> | ||
37 | 38 | ||
38 | #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) | 39 | #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) |
39 | 40 | ||
@@ -91,19 +92,20 @@ static int __init numaq_pre_time_init(void) | |||
91 | } | 92 | } |
92 | 93 | ||
93 | int found_numaq; | 94 | int found_numaq; |
95 | |||
94 | /* | 96 | /* |
95 | * Have to match translation table entries to main table entries by counter | 97 | * Have to match translation table entries to main table entries by counter |
96 | * hence the mpc_record variable .... can't see a less disgusting way of | 98 | * hence the mpc_record variable .... can't see a less disgusting way of |
97 | * doing this .... | 99 | * doing this .... |
98 | */ | 100 | */ |
99 | struct mpc_config_translation { | 101 | struct mpc_config_translation { |
100 | unsigned char mpc_type; | 102 | unsigned char mpc_type; |
101 | unsigned char trans_len; | 103 | unsigned char trans_len; |
102 | unsigned char trans_type; | 104 | unsigned char trans_type; |
103 | unsigned char trans_quad; | 105 | unsigned char trans_quad; |
104 | unsigned char trans_global; | 106 | unsigned char trans_global; |
105 | unsigned char trans_local; | 107 | unsigned char trans_local; |
106 | unsigned short trans_reserved; | 108 | unsigned short trans_reserved; |
107 | }; | 109 | }; |
108 | 110 | ||
109 | /* x86_quirks member */ | 111 | /* x86_quirks member */ |
@@ -117,16 +119,15 @@ static inline int generate_logical_apicid(int quad, int phys_apicid) | |||
117 | } | 119 | } |
118 | 120 | ||
119 | /* x86_quirks member */ | 121 | /* x86_quirks member */ |
120 | static int mpc_apic_id(struct mpc_config_processor *m) | 122 | static int mpc_apic_id(struct mpc_cpu *m) |
121 | { | 123 | { |
122 | int quad = translation_table[mpc_record]->trans_quad; | 124 | int quad = translation_table[mpc_record]->trans_quad; |
123 | int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid); | 125 | int logical_apicid = generate_logical_apicid(quad, m->apicid); |
124 | 126 | ||
125 | printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", | 127 | printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", |
126 | m->mpc_apicid, | 128 | m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8, |
127 | (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, | 129 | (m->cpufeature & CPU_MODEL_MASK) >> 4, |
128 | (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, | 130 | m->apicver, quad, logical_apicid); |
129 | m->mpc_apicver, quad, logical_apicid); | ||
130 | return logical_apicid; | 131 | return logical_apicid; |
131 | } | 132 | } |
132 | 133 | ||
@@ -135,26 +136,26 @@ int mp_bus_id_to_node[MAX_MP_BUSSES]; | |||
135 | int mp_bus_id_to_local[MAX_MP_BUSSES]; | 136 | int mp_bus_id_to_local[MAX_MP_BUSSES]; |
136 | 137 | ||
137 | /* x86_quirks member */ | 138 | /* x86_quirks member */ |
138 | static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name) | 139 | static void mpc_oem_bus_info(struct mpc_bus *m, char *name) |
139 | { | 140 | { |
140 | int quad = translation_table[mpc_record]->trans_quad; | 141 | int quad = translation_table[mpc_record]->trans_quad; |
141 | int local = translation_table[mpc_record]->trans_local; | 142 | int local = translation_table[mpc_record]->trans_local; |
142 | 143 | ||
143 | mp_bus_id_to_node[m->mpc_busid] = quad; | 144 | mp_bus_id_to_node[m->busid] = quad; |
144 | mp_bus_id_to_local[m->mpc_busid] = local; | 145 | mp_bus_id_to_local[m->busid] = local; |
145 | printk(KERN_INFO "Bus #%d is %s (node %d)\n", | 146 | printk(KERN_INFO "Bus #%d is %s (node %d)\n", |
146 | m->mpc_busid, name, quad); | 147 | m->busid, name, quad); |
147 | } | 148 | } |
148 | 149 | ||
149 | int quad_local_to_mp_bus_id [NR_CPUS/4][4]; | 150 | int quad_local_to_mp_bus_id [NR_CPUS/4][4]; |
150 | 151 | ||
151 | /* x86_quirks member */ | 152 | /* x86_quirks member */ |
152 | static void mpc_oem_pci_bus(struct mpc_config_bus *m) | 153 | static void mpc_oem_pci_bus(struct mpc_bus *m) |
153 | { | 154 | { |
154 | int quad = translation_table[mpc_record]->trans_quad; | 155 | int quad = translation_table[mpc_record]->trans_quad; |
155 | int local = translation_table[mpc_record]->trans_local; | 156 | int local = translation_table[mpc_record]->trans_local; |
156 | 157 | ||
157 | quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; | 158 | quad_local_to_mp_bus_id[quad][local] = m->busid; |
158 | } | 159 | } |
159 | 160 | ||
160 | static void __init MP_translation_info(struct mpc_config_translation *m) | 161 | static void __init MP_translation_info(struct mpc_config_translation *m) |
@@ -186,7 +187,7 @@ static int __init mpf_checksum(unsigned char *mp, int len) | |||
186 | * Read/parse the MPC oem tables | 187 | * Read/parse the MPC oem tables |
187 | */ | 188 | */ |
188 | 189 | ||
189 | static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, | 190 | static void __init smp_read_mpc_oem(struct mpc_oemtable *oemtable, |
190 | unsigned short oemsize) | 191 | unsigned short oemsize) |
191 | { | 192 | { |
192 | int count = sizeof(*oemtable); /* the header size */ | 193 | int count = sizeof(*oemtable); /* the header size */ |
@@ -195,18 +196,18 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, | |||
195 | mpc_record = 0; | 196 | mpc_record = 0; |
196 | printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", | 197 | printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", |
197 | oemtable); | 198 | oemtable); |
198 | if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) { | 199 | if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) { |
199 | printk(KERN_WARNING | 200 | printk(KERN_WARNING |
200 | "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", | 201 | "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", |
201 | oemtable->oem_signature[0], oemtable->oem_signature[1], | 202 | oemtable->signature[0], oemtable->signature[1], |
202 | oemtable->oem_signature[2], oemtable->oem_signature[3]); | 203 | oemtable->signature[2], oemtable->signature[3]); |
203 | return; | 204 | return; |
204 | } | 205 | } |
205 | if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) { | 206 | if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) { |
206 | printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); | 207 | printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); |
207 | return; | 208 | return; |
208 | } | 209 | } |
209 | while (count < oemtable->oem_length) { | 210 | while (count < oemtable->length) { |
210 | switch (*oemptr) { | 211 | switch (*oemptr) { |
211 | case MP_TRANSLATION: | 212 | case MP_TRANSLATION: |
212 | { | 213 | { |
@@ -235,6 +236,13 @@ static int __init numaq_setup_ioapic_ids(void) | |||
235 | return 1; | 236 | return 1; |
236 | } | 237 | } |
237 | 238 | ||
239 | static int __init numaq_update_genapic(void) | ||
240 | { | ||
241 | apic->wakeup_cpu = wakeup_secondary_cpu_via_nmi; | ||
242 | |||
243 | return 0; | ||
244 | } | ||
245 | |||
238 | static struct x86_quirks numaq_x86_quirks __initdata = { | 246 | static struct x86_quirks numaq_x86_quirks __initdata = { |
239 | .arch_pre_time_init = numaq_pre_time_init, | 247 | .arch_pre_time_init = numaq_pre_time_init, |
240 | .arch_time_init = NULL, | 248 | .arch_time_init = NULL, |
@@ -250,10 +258,10 @@ static struct x86_quirks numaq_x86_quirks __initdata = { | |||
250 | .mpc_oem_pci_bus = mpc_oem_pci_bus, | 258 | .mpc_oem_pci_bus = mpc_oem_pci_bus, |
251 | .smp_read_mpc_oem = smp_read_mpc_oem, | 259 | .smp_read_mpc_oem = smp_read_mpc_oem, |
252 | .setup_ioapic_ids = numaq_setup_ioapic_ids, | 260 | .setup_ioapic_ids = numaq_setup_ioapic_ids, |
261 | .update_genapic = numaq_update_genapic, | ||
253 | }; | 262 | }; |
254 | 263 | ||
255 | void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, | 264 | void numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) |
256 | char *productid) | ||
257 | { | 265 | { |
258 | if (strncmp(oem, "IBM NUMA", 8)) | 266 | if (strncmp(oem, "IBM NUMA", 8)) |
259 | printk("Warning! Not a NUMA-Q system!\n"); | 267 | printk("Warning! Not a NUMA-Q system!\n"); |
@@ -285,3 +293,280 @@ int __init get_memcfg_numaq(void) | |||
285 | smp_dump_qct(); | 293 | smp_dump_qct(); |
286 | return 1; | 294 | return 1; |
287 | } | 295 | } |
296 | |||
297 | /* | ||
298 | * APIC driver for the IBM NUMAQ chipset. | ||
299 | */ | ||
300 | #define APIC_DEFINITION 1 | ||
301 | #include <linux/threads.h> | ||
302 | #include <linux/cpumask.h> | ||
303 | #include <asm/mpspec.h> | ||
304 | #include <asm/genapic.h> | ||
305 | #include <asm/fixmap.h> | ||
306 | #include <asm/apicdef.h> | ||
307 | #include <asm/ipi.h> | ||
308 | #include <linux/kernel.h> | ||
309 | #include <linux/string.h> | ||
310 | #include <linux/init.h> | ||
311 | #include <linux/numa.h> | ||
312 | #include <linux/smp.h> | ||
313 | #include <asm/numaq.h> | ||
314 | #include <asm/io.h> | ||
315 | #include <linux/mmzone.h> | ||
316 | #include <linux/nodemask.h> | ||
317 | |||
318 | #define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) | ||
319 | |||
320 | static inline unsigned int numaq_get_apic_id(unsigned long x) | ||
321 | { | ||
322 | return (x >> 24) & 0x0F; | ||
323 | } | ||
324 | |||
325 | static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) | ||
326 | { | ||
327 | default_send_IPI_mask_sequence_logical(mask, vector); | ||
328 | } | ||
329 | |||
330 | static inline void numaq_send_IPI_allbutself(int vector) | ||
331 | { | ||
332 | default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector); | ||
333 | } | ||
334 | |||
335 | static inline void numaq_send_IPI_all(int vector) | ||
336 | { | ||
337 | numaq_send_IPI_mask(cpu_online_mask, vector); | ||
338 | } | ||
339 | |||
340 | extern void numaq_mps_oem_check(struct mpc_table *, char *, char *); | ||
341 | |||
342 | #define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) | ||
343 | #define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) | ||
344 | |||
345 | /* | ||
346 | * Because we use NMIs rather than the INIT-STARTUP sequence to | ||
347 | * bootstrap the CPUs, the APIC may be in a weird state. Kick it: | ||
348 | */ | ||
349 | static inline void numaq_smp_callin_clear_local_apic(void) | ||
350 | { | ||
351 | clear_local_APIC(); | ||
352 | } | ||
353 | |||
354 | static inline void | ||
355 | numaq_store_NMI_vector(unsigned short *high, unsigned short *low) | ||
356 | { | ||
357 | printk("Storing NMI vector\n"); | ||
358 | *high = | ||
359 | *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH)); | ||
360 | *low = | ||
361 | *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)); | ||
362 | } | ||
363 | |||
364 | static inline const cpumask_t *numaq_target_cpus(void) | ||
365 | { | ||
366 | return &CPU_MASK_ALL; | ||
367 | } | ||
368 | |||
369 | static inline unsigned long | ||
370 | numaq_check_apicid_used(physid_mask_t bitmap, int apicid) | ||
371 | { | ||
372 | return physid_isset(apicid, bitmap); | ||
373 | } | ||
374 | |||
375 | static inline unsigned long numaq_check_apicid_present(int bit) | ||
376 | { | ||
377 | return physid_isset(bit, phys_cpu_present_map); | ||
378 | } | ||
379 | |||
380 | #define apicid_cluster(apicid) (apicid & 0xF0) | ||
381 | |||
382 | static inline int numaq_apic_id_registered(void) | ||
383 | { | ||
384 | return 1; | ||
385 | } | ||
386 | |||
387 | static inline void numaq_init_apic_ldr(void) | ||
388 | { | ||
389 | /* Already done in NUMA-Q firmware */ | ||
390 | } | ||
391 | |||
392 | static inline void numaq_setup_apic_routing(void) | ||
393 | { | ||
394 | printk("Enabling APIC mode: %s. Using %d I/O APICs\n", | ||
395 | "NUMA-Q", nr_ioapics); | ||
396 | } | ||
397 | |||
398 | /* | ||
399 | * Skip adding the timer int on secondary nodes, which causes | ||
400 | * a small but painful rift in the time-space continuum. | ||
401 | */ | ||
402 | static inline int numaq_multi_timer_check(int apic, int irq) | ||
403 | { | ||
404 | return apic != 0 && irq == 0; | ||
405 | } | ||
406 | |||
407 | static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) | ||
408 | { | ||
409 | /* We don't have a good way to do this yet - hack */ | ||
410 | return physids_promote(0xFUL); | ||
411 | } | ||
412 | |||
413 | /* Mapping from cpu number to logical apicid */ | ||
414 | extern u8 cpu_2_logical_apicid[]; | ||
415 | |||
416 | static inline int numaq_cpu_to_logical_apicid(int cpu) | ||
417 | { | ||
418 | if (cpu >= nr_cpu_ids) | ||
419 | return BAD_APICID; | ||
420 | return (int)cpu_2_logical_apicid[cpu]; | ||
421 | } | ||
422 | |||
423 | /* | ||
424 | * Supporting over 60 cpus on NUMA-Q requires a locality-dependent | ||
425 | * cpu to APIC ID relation to properly interact with the intelligent | ||
426 | * mode of the cluster controller. | ||
427 | */ | ||
428 | static inline int numaq_cpu_present_to_apicid(int mps_cpu) | ||
429 | { | ||
430 | if (mps_cpu < 60) | ||
431 | return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); | ||
432 | else | ||
433 | return BAD_APICID; | ||
434 | } | ||
435 | |||
436 | static inline int numaq_apicid_to_node(int logical_apicid) | ||
437 | { | ||
438 | return logical_apicid >> 4; | ||
439 | } | ||
440 | |||
441 | static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) | ||
442 | { | ||
443 | int node = numaq_apicid_to_node(logical_apicid); | ||
444 | int cpu = __ffs(logical_apicid & 0xf); | ||
445 | |||
446 | return physid_mask_of_physid(cpu + 4*node); | ||
447 | } | ||
448 | |||
449 | /* Where the IO area was mapped on multiquad, always 0 otherwise */ | ||
450 | void *xquad_portio; | ||
451 | |||
452 | static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) | ||
453 | { | ||
454 | return 1; | ||
455 | } | ||
456 | |||
457 | /* | ||
458 | * We use physical apicids here, not logical, so just return the default | ||
459 | * physical broadcast to stop people from breaking us | ||
460 | */ | ||
461 | static inline unsigned int numaq_cpu_mask_to_apicid(const cpumask_t *cpumask) | ||
462 | { | ||
463 | return 0x0F; | ||
464 | } | ||
465 | |||
466 | static inline unsigned int | ||
467 | numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
468 | const struct cpumask *andmask) | ||
469 | { | ||
470 | return 0x0F; | ||
471 | } | ||
472 | |||
473 | /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ | ||
474 | static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb) | ||
475 | { | ||
476 | return cpuid_apic >> index_msb; | ||
477 | } | ||
478 | static int __numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) | ||
479 | { | ||
480 | numaq_mps_oem_check(mpc, oem, productid); | ||
481 | return found_numaq; | ||
482 | } | ||
483 | |||
484 | static int probe_numaq(void) | ||
485 | { | ||
486 | /* already know from get_memcfg_numaq() */ | ||
487 | return found_numaq; | ||
488 | } | ||
489 | |||
490 | static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask) | ||
491 | { | ||
492 | /* Careful. Some cpus do not strictly honor the set of cpus | ||
493 | * specified in the interrupt destination when using lowest | ||
494 | * priority interrupt delivery mode. | ||
495 | * | ||
496 | * In particular there was a hyperthreading cpu observed to | ||
497 | * deliver interrupts to the wrong hyperthread when only one | ||
498 | * hyperthread was specified in the interrupt desitination. | ||
499 | */ | ||
500 | *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; | ||
501 | } | ||
502 | |||
503 | static void numaq_setup_portio_remap(void) | ||
504 | { | ||
505 | int num_quads = num_online_nodes(); | ||
506 | |||
507 | if (num_quads <= 1) | ||
508 | return; | ||
509 | |||
510 | printk("Remapping cross-quad port I/O for %d quads\n", num_quads); | ||
511 | xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); | ||
512 | printk("xquad_portio vaddr 0x%08lx, len %08lx\n", | ||
513 | (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); | ||
514 | } | ||
515 | |||
516 | struct genapic apic_numaq = { | ||
517 | |||
518 | .name = "NUMAQ", | ||
519 | .probe = probe_numaq, | ||
520 | .acpi_madt_oem_check = NULL, | ||
521 | .apic_id_registered = numaq_apic_id_registered, | ||
522 | |||
523 | .irq_delivery_mode = dest_LowestPrio, | ||
524 | /* physical delivery on LOCAL quad: */ | ||
525 | .irq_dest_mode = 0, | ||
526 | |||
527 | .target_cpus = numaq_target_cpus, | ||
528 | .disable_esr = 1, | ||
529 | .dest_logical = APIC_DEST_LOGICAL, | ||
530 | .check_apicid_used = numaq_check_apicid_used, | ||
531 | .check_apicid_present = numaq_check_apicid_present, | ||
532 | |||
533 | .vector_allocation_domain = numaq_vector_allocation_domain, | ||
534 | .init_apic_ldr = numaq_init_apic_ldr, | ||
535 | |||
536 | .ioapic_phys_id_map = numaq_ioapic_phys_id_map, | ||
537 | .setup_apic_routing = numaq_setup_apic_routing, | ||
538 | .multi_timer_check = numaq_multi_timer_check, | ||
539 | .apicid_to_node = numaq_apicid_to_node, | ||
540 | .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid, | ||
541 | .cpu_present_to_apicid = numaq_cpu_present_to_apicid, | ||
542 | .apicid_to_cpu_present = numaq_apicid_to_cpu_present, | ||
543 | .setup_portio_remap = numaq_setup_portio_remap, | ||
544 | .check_phys_apicid_present = numaq_check_phys_apicid_present, | ||
545 | .enable_apic_mode = NULL, | ||
546 | .phys_pkg_id = numaq_phys_pkg_id, | ||
547 | .mps_oem_check = __numaq_mps_oem_check, | ||
548 | |||
549 | .get_apic_id = numaq_get_apic_id, | ||
550 | .set_apic_id = NULL, | ||
551 | .apic_id_mask = 0x0F << 24, | ||
552 | |||
553 | .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid, | ||
554 | .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, | ||
555 | |||
556 | .send_IPI_mask = numaq_send_IPI_mask, | ||
557 | .send_IPI_mask_allbutself = NULL, | ||
558 | .send_IPI_allbutself = numaq_send_IPI_allbutself, | ||
559 | .send_IPI_all = numaq_send_IPI_all, | ||
560 | .send_IPI_self = default_send_IPI_self, | ||
561 | |||
562 | .wakeup_cpu = NULL, | ||
563 | .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, | ||
564 | .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH, | ||
565 | |||
566 | /* We don't do anything here because we use NMI's to boot instead */ | ||
567 | .wait_for_init_deassert = NULL, | ||
568 | |||
569 | .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, | ||
570 | .store_NMI_vector = numaq_store_NMI_vector, | ||
571 | .inquire_remote_apic = NULL, | ||
572 | }; | ||
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 0e9f1982b1dd..3a7c5a44082e 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c | |||
@@ -7,7 +7,8 @@ | |||
7 | 7 | ||
8 | #include <asm/paravirt.h> | 8 | #include <asm/paravirt.h> |
9 | 9 | ||
10 | static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) | 10 | static inline void |
11 | default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) | ||
11 | { | 12 | { |
12 | __raw_spin_lock(lock); | 13 | __raw_spin_lock(lock); |
13 | } | 14 | } |
@@ -25,13 +26,3 @@ struct pv_lock_ops pv_lock_ops = { | |||
25 | }; | 26 | }; |
26 | EXPORT_SYMBOL(pv_lock_ops); | 27 | EXPORT_SYMBOL(pv_lock_ops); |
27 | 28 | ||
28 | void __init paravirt_use_bytelocks(void) | ||
29 | { | ||
30 | #ifdef CONFIG_SMP | ||
31 | pv_lock_ops.spin_is_locked = __byte_spin_is_locked; | ||
32 | pv_lock_ops.spin_is_contended = __byte_spin_is_contended; | ||
33 | pv_lock_ops.spin_lock = __byte_spin_lock; | ||
34 | pv_lock_ops.spin_trylock = __byte_spin_trylock; | ||
35 | pv_lock_ops.spin_unlock = __byte_spin_unlock; | ||
36 | #endif | ||
37 | } | ||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e4c8fb608873..cea11c8e3049 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -44,6 +44,17 @@ void _paravirt_nop(void) | |||
44 | { | 44 | { |
45 | } | 45 | } |
46 | 46 | ||
47 | /* identity function, which can be inlined */ | ||
48 | u32 _paravirt_ident_32(u32 x) | ||
49 | { | ||
50 | return x; | ||
51 | } | ||
52 | |||
53 | u64 _paravirt_ident_64(u64 x) | ||
54 | { | ||
55 | return x; | ||
56 | } | ||
57 | |||
47 | static void __init default_banner(void) | 58 | static void __init default_banner(void) |
48 | { | 59 | { |
49 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | 60 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", |
@@ -138,9 +149,16 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, | |||
138 | if (opfunc == NULL) | 149 | if (opfunc == NULL) |
139 | /* If there's no function, patch it with a ud2a (BUG) */ | 150 | /* If there's no function, patch it with a ud2a (BUG) */ |
140 | ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); | 151 | ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); |
141 | else if (opfunc == paravirt_nop) | 152 | else if (opfunc == _paravirt_nop) |
142 | /* If the operation is a nop, then nop the callsite */ | 153 | /* If the operation is a nop, then nop the callsite */ |
143 | ret = paravirt_patch_nop(); | 154 | ret = paravirt_patch_nop(); |
155 | |||
156 | /* identity functions just return their single argument */ | ||
157 | else if (opfunc == _paravirt_ident_32) | ||
158 | ret = paravirt_patch_ident_32(insnbuf, len); | ||
159 | else if (opfunc == _paravirt_ident_64) | ||
160 | ret = paravirt_patch_ident_64(insnbuf, len); | ||
161 | |||
144 | else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || | 162 | else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || |
145 | type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || | 163 | type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || |
146 | type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || | 164 | type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || |
@@ -292,10 +310,10 @@ struct pv_time_ops pv_time_ops = { | |||
292 | 310 | ||
293 | struct pv_irq_ops pv_irq_ops = { | 311 | struct pv_irq_ops pv_irq_ops = { |
294 | .init_IRQ = native_init_IRQ, | 312 | .init_IRQ = native_init_IRQ, |
295 | .save_fl = native_save_fl, | 313 | .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), |
296 | .restore_fl = native_restore_fl, | 314 | .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), |
297 | .irq_disable = native_irq_disable, | 315 | .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), |
298 | .irq_enable = native_irq_enable, | 316 | .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable), |
299 | .safe_halt = native_safe_halt, | 317 | .safe_halt = native_safe_halt, |
300 | .halt = native_halt, | 318 | .halt = native_halt, |
301 | #ifdef CONFIG_X86_64 | 319 | #ifdef CONFIG_X86_64 |
@@ -373,6 +391,14 @@ struct pv_apic_ops pv_apic_ops = { | |||
373 | #endif | 391 | #endif |
374 | }; | 392 | }; |
375 | 393 | ||
394 | #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) | ||
395 | /* 32-bit pagetable entries */ | ||
396 | #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32) | ||
397 | #else | ||
398 | /* 64-bit pagetable entries */ | ||
399 | #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) | ||
400 | #endif | ||
401 | |||
376 | struct pv_mmu_ops pv_mmu_ops = { | 402 | struct pv_mmu_ops pv_mmu_ops = { |
377 | #ifndef CONFIG_X86_64 | 403 | #ifndef CONFIG_X86_64 |
378 | .pagetable_setup_start = native_pagetable_setup_start, | 404 | .pagetable_setup_start = native_pagetable_setup_start, |
@@ -424,22 +450,23 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
424 | .pmd_clear = native_pmd_clear, | 450 | .pmd_clear = native_pmd_clear, |
425 | #endif | 451 | #endif |
426 | .set_pud = native_set_pud, | 452 | .set_pud = native_set_pud, |
427 | .pmd_val = native_pmd_val, | 453 | |
428 | .make_pmd = native_make_pmd, | 454 | .pmd_val = PTE_IDENT, |
455 | .make_pmd = PTE_IDENT, | ||
429 | 456 | ||
430 | #if PAGETABLE_LEVELS == 4 | 457 | #if PAGETABLE_LEVELS == 4 |
431 | .pud_val = native_pud_val, | 458 | .pud_val = PTE_IDENT, |
432 | .make_pud = native_make_pud, | 459 | .make_pud = PTE_IDENT, |
460 | |||
433 | .set_pgd = native_set_pgd, | 461 | .set_pgd = native_set_pgd, |
434 | #endif | 462 | #endif |
435 | #endif /* PAGETABLE_LEVELS >= 3 */ | 463 | #endif /* PAGETABLE_LEVELS >= 3 */ |
436 | 464 | ||
437 | .pte_val = native_pte_val, | 465 | .pte_val = PTE_IDENT, |
438 | .pte_flags = native_pte_flags, | 466 | .pgd_val = PTE_IDENT, |
439 | .pgd_val = native_pgd_val, | ||
440 | 467 | ||
441 | .make_pte = native_make_pte, | 468 | .make_pte = PTE_IDENT, |
442 | .make_pgd = native_make_pgd, | 469 | .make_pgd = PTE_IDENT, |
443 | 470 | ||
444 | .dup_mmap = paravirt_nop, | 471 | .dup_mmap = paravirt_nop, |
445 | .exit_mmap = paravirt_nop, | 472 | .exit_mmap = paravirt_nop, |
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 9fe644f4861d..d9f32e6d6ab6 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c | |||
@@ -12,6 +12,18 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); | |||
12 | DEF_NATIVE(pv_cpu_ops, clts, "clts"); | 12 | DEF_NATIVE(pv_cpu_ops, clts, "clts"); |
13 | DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); | 13 | DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); |
14 | 14 | ||
15 | unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) | ||
16 | { | ||
17 | /* arg in %eax, return in %eax */ | ||
18 | return 0; | ||
19 | } | ||
20 | |||
21 | unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) | ||
22 | { | ||
23 | /* arg in %edx:%eax, return in %edx:%eax */ | ||
24 | return 0; | ||
25 | } | ||
26 | |||
15 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | 27 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, |
16 | unsigned long addr, unsigned len) | 28 | unsigned long addr, unsigned len) |
17 | { | 29 | { |
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 061d01df9ae6..3f08f34f93eb 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c | |||
@@ -19,6 +19,21 @@ DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); | |||
19 | DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl"); | 19 | DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl"); |
20 | DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); | 20 | DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); |
21 | 21 | ||
22 | DEF_NATIVE(, mov32, "mov %edi, %eax"); | ||
23 | DEF_NATIVE(, mov64, "mov %rdi, %rax"); | ||
24 | |||
25 | unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) | ||
26 | { | ||
27 | return paravirt_patch_insns(insnbuf, len, | ||
28 | start__mov32, end__mov32); | ||
29 | } | ||
30 | |||
31 | unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) | ||
32 | { | ||
33 | return paravirt_patch_insns(insnbuf, len, | ||
34 | start__mov64, end__mov64); | ||
35 | } | ||
36 | |||
22 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | 37 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, |
23 | unsigned long addr, unsigned len) | 38 | unsigned long addr, unsigned len) |
24 | { | 39 | { |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index e1e731d78f38..d28bbdc35e4e 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
@@ -1567,7 +1567,7 @@ static int __init calgary_parse_options(char *p) | |||
1567 | ++p; | 1567 | ++p; |
1568 | if (*p == '\0') | 1568 | if (*p == '\0') |
1569 | break; | 1569 | break; |
1570 | bridge = simple_strtol(p, &endp, 0); | 1570 | bridge = simple_strtoul(p, &endp, 0); |
1571 | if (p == endp) | 1571 | if (p == endp) |
1572 | break; | 1572 | break; |
1573 | 1573 | ||
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 192624820217..b25428533141 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <asm/proto.h> | 6 | #include <asm/proto.h> |
7 | #include <asm/dma.h> | 7 | #include <asm/dma.h> |
8 | #include <asm/iommu.h> | 8 | #include <asm/iommu.h> |
9 | #include <asm/gart.h> | ||
9 | #include <asm/calgary.h> | 10 | #include <asm/calgary.h> |
10 | #include <asm/amd_iommu.h> | 11 | #include <asm/amd_iommu.h> |
11 | 12 | ||
@@ -30,11 +31,6 @@ int no_iommu __read_mostly; | |||
30 | /* Set this to 1 if there is a HW IOMMU in the system */ | 31 | /* Set this to 1 if there is a HW IOMMU in the system */ |
31 | int iommu_detected __read_mostly = 0; | 32 | int iommu_detected __read_mostly = 0; |
32 | 33 | ||
33 | /* This tells the BIO block layer to assume merging. Default to off | ||
34 | because we cannot guarantee merging later. */ | ||
35 | int iommu_bio_merge __read_mostly = 0; | ||
36 | EXPORT_SYMBOL(iommu_bio_merge); | ||
37 | |||
38 | dma_addr_t bad_dma_address __read_mostly = 0; | 34 | dma_addr_t bad_dma_address __read_mostly = 0; |
39 | EXPORT_SYMBOL(bad_dma_address); | 35 | EXPORT_SYMBOL(bad_dma_address); |
40 | 36 | ||
@@ -42,7 +38,7 @@ EXPORT_SYMBOL(bad_dma_address); | |||
42 | be probably a smaller DMA mask, but this is bug-to-bug compatible | 38 | be probably a smaller DMA mask, but this is bug-to-bug compatible |
43 | to older i386. */ | 39 | to older i386. */ |
44 | struct device x86_dma_fallback_dev = { | 40 | struct device x86_dma_fallback_dev = { |
45 | .bus_id = "fallback device", | 41 | .init_name = "fallback device", |
46 | .coherent_dma_mask = DMA_32BIT_MASK, | 42 | .coherent_dma_mask = DMA_32BIT_MASK, |
47 | .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, | 43 | .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, |
48 | }; | 44 | }; |
@@ -105,11 +101,15 @@ static void __init dma32_free_bootmem(void) | |||
105 | dma32_bootmem_ptr = NULL; | 101 | dma32_bootmem_ptr = NULL; |
106 | dma32_bootmem_size = 0; | 102 | dma32_bootmem_size = 0; |
107 | } | 103 | } |
104 | #endif | ||
108 | 105 | ||
109 | void __init pci_iommu_alloc(void) | 106 | void __init pci_iommu_alloc(void) |
110 | { | 107 | { |
108 | #ifdef CONFIG_X86_64 | ||
111 | /* free the range so iommu could get some range less than 4G */ | 109 | /* free the range so iommu could get some range less than 4G */ |
112 | dma32_free_bootmem(); | 110 | dma32_free_bootmem(); |
111 | #endif | ||
112 | |||
113 | /* | 113 | /* |
114 | * The order of these functions is important for | 114 | * The order of these functions is important for |
115 | * fall-back/fail-over reasons | 115 | * fall-back/fail-over reasons |
@@ -125,15 +125,6 @@ void __init pci_iommu_alloc(void) | |||
125 | pci_swiotlb_init(); | 125 | pci_swiotlb_init(); |
126 | } | 126 | } |
127 | 127 | ||
128 | unsigned long iommu_nr_pages(unsigned long addr, unsigned long len) | ||
129 | { | ||
130 | unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); | ||
131 | |||
132 | return size >> PAGE_SHIFT; | ||
133 | } | ||
134 | EXPORT_SYMBOL(iommu_nr_pages); | ||
135 | #endif | ||
136 | |||
137 | void *dma_generic_alloc_coherent(struct device *dev, size_t size, | 128 | void *dma_generic_alloc_coherent(struct device *dev, size_t size, |
138 | dma_addr_t *dma_addr, gfp_t flag) | 129 | dma_addr_t *dma_addr, gfp_t flag) |
139 | { | 130 | { |
@@ -188,7 +179,6 @@ static __init int iommu_setup(char *p) | |||
188 | } | 179 | } |
189 | 180 | ||
190 | if (!strncmp(p, "biomerge", 8)) { | 181 | if (!strncmp(p, "biomerge", 8)) { |
191 | iommu_bio_merge = 4096; | ||
192 | iommu_merge = 1; | 182 | iommu_merge = 1; |
193 | force_iommu = 1; | 183 | force_iommu = 1; |
194 | } | 184 | } |
@@ -300,8 +290,8 @@ fs_initcall(pci_iommu_init); | |||
300 | static __devinit void via_no_dac(struct pci_dev *dev) | 290 | static __devinit void via_no_dac(struct pci_dev *dev) |
301 | { | 291 | { |
302 | if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { | 292 | if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { |
303 | printk(KERN_INFO "PCI: VIA PCI bridge detected." | 293 | printk(KERN_INFO |
304 | "Disabling DAC.\n"); | 294 | "PCI: VIA PCI bridge detected. Disabling DAC.\n"); |
305 | forbid_dac = 1; | 295 | forbid_dac = 1; |
306 | } | 296 | } |
307 | } | 297 | } |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index a42b02b4df68..d5768b1af080 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * This allows to use PCI devices that only support 32bit addresses on systems | 5 | * This allows to use PCI devices that only support 32bit addresses on systems |
6 | * with more than 4GB. | 6 | * with more than 4GB. |
7 | * | 7 | * |
8 | * See Documentation/DMA-mapping.txt for the interface specification. | 8 | * See Documentation/PCI/PCI-DMA-mapping.txt for the interface specification. |
9 | * | 9 | * |
10 | * Copyright 2002 Andi Kleen, SuSE Labs. | 10 | * Copyright 2002 Andi Kleen, SuSE Labs. |
11 | * Subject to the GNU General Public License v2 only. | 11 | * Subject to the GNU General Public License v2 only. |
@@ -52,7 +52,7 @@ static u32 *iommu_gatt_base; /* Remapping table */ | |||
52 | * to trigger bugs with some popular PCI cards, in particular 3ware (but | 52 | * to trigger bugs with some popular PCI cards, in particular 3ware (but |
53 | * has been also also seen with Qlogic at least). | 53 | * has been also also seen with Qlogic at least). |
54 | */ | 54 | */ |
55 | int iommu_fullflush = 1; | 55 | static int iommu_fullflush = 1; |
56 | 56 | ||
57 | /* Allocation bitmap for the remapping area: */ | 57 | /* Allocation bitmap for the remapping area: */ |
58 | static DEFINE_SPINLOCK(iommu_bitmap_lock); | 58 | static DEFINE_SPINLOCK(iommu_bitmap_lock); |
@@ -123,6 +123,8 @@ static void free_iommu(unsigned long offset, int size) | |||
123 | 123 | ||
124 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | 124 | spin_lock_irqsave(&iommu_bitmap_lock, flags); |
125 | iommu_area_free(iommu_gart_bitmap, offset, size); | 125 | iommu_area_free(iommu_gart_bitmap, offset, size); |
126 | if (offset >= next_bit) | ||
127 | next_bit = offset + size; | ||
126 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | 128 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); |
127 | } | 129 | } |
128 | 130 | ||
@@ -743,10 +745,8 @@ void __init gart_iommu_init(void) | |||
743 | unsigned long scratch; | 745 | unsigned long scratch; |
744 | long i; | 746 | long i; |
745 | 747 | ||
746 | if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) { | 748 | if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) |
747 | printk(KERN_INFO "PCI-GART: No AMD GART found.\n"); | ||
748 | return; | 749 | return; |
749 | } | ||
750 | 750 | ||
751 | #ifndef CONFIG_AGP_AMD64 | 751 | #ifndef CONFIG_AGP_AMD64 |
752 | no_agp = 1; | 752 | no_agp = 1; |
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 3c539d111abb..d59c91747665 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c | |||
@@ -3,6 +3,8 @@ | |||
3 | #include <linux/pci.h> | 3 | #include <linux/pci.h> |
4 | #include <linux/cache.h> | 4 | #include <linux/cache.h> |
5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
6 | #include <linux/swiotlb.h> | ||
7 | #include <linux/bootmem.h> | ||
6 | #include <linux/dma-mapping.h> | 8 | #include <linux/dma-mapping.h> |
7 | 9 | ||
8 | #include <asm/iommu.h> | 10 | #include <asm/iommu.h> |
@@ -11,6 +13,31 @@ | |||
11 | 13 | ||
12 | int swiotlb __read_mostly; | 14 | int swiotlb __read_mostly; |
13 | 15 | ||
16 | void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs) | ||
17 | { | ||
18 | return alloc_bootmem_low_pages(size); | ||
19 | } | ||
20 | |||
21 | void *swiotlb_alloc(unsigned order, unsigned long nslabs) | ||
22 | { | ||
23 | return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); | ||
24 | } | ||
25 | |||
26 | dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) | ||
27 | { | ||
28 | return paddr; | ||
29 | } | ||
30 | |||
31 | phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr) | ||
32 | { | ||
33 | return baddr; | ||
34 | } | ||
35 | |||
36 | int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size) | ||
37 | { | ||
38 | return 0; | ||
39 | } | ||
40 | |||
14 | static dma_addr_t | 41 | static dma_addr_t |
15 | swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, | 42 | swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, |
16 | int direction) | 43 | int direction) |
@@ -50,8 +77,10 @@ struct dma_mapping_ops swiotlb_dma_ops = { | |||
50 | void __init pci_swiotlb_init(void) | 77 | void __init pci_swiotlb_init(void) |
51 | { | 78 | { |
52 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ | 79 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ |
80 | #ifdef CONFIG_X86_64 | ||
53 | if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) | 81 | if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) |
54 | swiotlb = 1; | 82 | swiotlb = 1; |
83 | #endif | ||
55 | if (swiotlb_force) | 84 | if (swiotlb_force) |
56 | swiotlb = 1; | 85 | swiotlb = 1; |
57 | if (swiotlb) { | 86 | if (swiotlb) { |
diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c new file mode 100644 index 000000000000..22337b75de62 --- /dev/null +++ b/arch/x86/kernel/probe_32.c | |||
@@ -0,0 +1,411 @@ | |||
1 | /* | ||
2 | * Default generic APIC driver. This handles up to 8 CPUs. | ||
3 | * | ||
4 | * Copyright 2003 Andi Kleen, SuSE Labs. | ||
5 | * Subject to the GNU Public License, v.2 | ||
6 | * | ||
7 | * Generic x86 APIC driver probe layer. | ||
8 | */ | ||
9 | #include <linux/threads.h> | ||
10 | #include <linux/cpumask.h> | ||
11 | #include <linux/string.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/ctype.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/errno.h> | ||
16 | #include <asm/fixmap.h> | ||
17 | #include <asm/mpspec.h> | ||
18 | #include <asm/apicdef.h> | ||
19 | #include <asm/genapic.h> | ||
20 | #include <asm/setup.h> | ||
21 | |||
22 | #include <linux/threads.h> | ||
23 | #include <linux/cpumask.h> | ||
24 | #include <asm/mpspec.h> | ||
25 | #include <asm/genapic.h> | ||
26 | #include <asm/fixmap.h> | ||
27 | #include <asm/apicdef.h> | ||
28 | #include <linux/kernel.h> | ||
29 | #include <linux/string.h> | ||
30 | #include <linux/smp.h> | ||
31 | #include <linux/init.h> | ||
32 | #include <asm/genapic.h> | ||
33 | #include <asm/ipi.h> | ||
34 | |||
35 | #include <linux/smp.h> | ||
36 | #include <linux/init.h> | ||
37 | #include <linux/interrupt.h> | ||
38 | #include <asm/acpi.h> | ||
39 | #include <asm/arch_hooks.h> | ||
40 | #include <asm/e820.h> | ||
41 | #include <asm/setup.h> | ||
42 | |||
43 | #include <asm/genapic.h> | ||
44 | |||
45 | #ifdef CONFIG_HOTPLUG_CPU | ||
46 | #define DEFAULT_SEND_IPI (1) | ||
47 | #else | ||
48 | #define DEFAULT_SEND_IPI (0) | ||
49 | #endif | ||
50 | |||
51 | int no_broadcast = DEFAULT_SEND_IPI; | ||
52 | |||
53 | #ifdef CONFIG_X86_LOCAL_APIC | ||
54 | |||
55 | static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
56 | { | ||
57 | /* | ||
58 | * Careful. Some cpus do not strictly honor the set of cpus | ||
59 | * specified in the interrupt destination when using lowest | ||
60 | * priority interrupt delivery mode. | ||
61 | * | ||
62 | * In particular there was a hyperthreading cpu observed to | ||
63 | * deliver interrupts to the wrong hyperthread when only one | ||
64 | * hyperthread was specified in the interrupt desitination. | ||
65 | */ | ||
66 | *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } }; | ||
67 | } | ||
68 | |||
69 | /* should be called last. */ | ||
70 | static int probe_default(void) | ||
71 | { | ||
72 | return 1; | ||
73 | } | ||
74 | |||
75 | struct genapic apic_default = { | ||
76 | |||
77 | .name = "default", | ||
78 | .probe = probe_default, | ||
79 | .acpi_madt_oem_check = NULL, | ||
80 | .apic_id_registered = default_apic_id_registered, | ||
81 | |||
82 | .irq_delivery_mode = dest_LowestPrio, | ||
83 | /* logical delivery broadcast to all CPUs: */ | ||
84 | .irq_dest_mode = 1, | ||
85 | |||
86 | .target_cpus = default_target_cpus, | ||
87 | .disable_esr = 0, | ||
88 | .dest_logical = APIC_DEST_LOGICAL, | ||
89 | .check_apicid_used = default_check_apicid_used, | ||
90 | .check_apicid_present = default_check_apicid_present, | ||
91 | |||
92 | .vector_allocation_domain = default_vector_allocation_domain, | ||
93 | .init_apic_ldr = default_init_apic_ldr, | ||
94 | |||
95 | .ioapic_phys_id_map = default_ioapic_phys_id_map, | ||
96 | .setup_apic_routing = default_setup_apic_routing, | ||
97 | .multi_timer_check = NULL, | ||
98 | .apicid_to_node = default_apicid_to_node, | ||
99 | .cpu_to_logical_apicid = default_cpu_to_logical_apicid, | ||
100 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | ||
101 | .apicid_to_cpu_present = default_apicid_to_cpu_present, | ||
102 | .setup_portio_remap = NULL, | ||
103 | .check_phys_apicid_present = default_check_phys_apicid_present, | ||
104 | .enable_apic_mode = NULL, | ||
105 | .phys_pkg_id = default_phys_pkg_id, | ||
106 | .mps_oem_check = NULL, | ||
107 | |||
108 | .get_apic_id = default_get_apic_id, | ||
109 | .set_apic_id = NULL, | ||
110 | .apic_id_mask = 0x0F << 24, | ||
111 | |||
112 | .cpu_mask_to_apicid = default_cpu_mask_to_apicid, | ||
113 | .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, | ||
114 | |||
115 | .send_IPI_mask = default_send_IPI_mask_logical, | ||
116 | .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical, | ||
117 | .send_IPI_allbutself = default_send_IPI_allbutself, | ||
118 | .send_IPI_all = default_send_IPI_all, | ||
119 | .send_IPI_self = default_send_IPI_self, | ||
120 | |||
121 | .wakeup_cpu = NULL, | ||
122 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
123 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
124 | |||
125 | .wait_for_init_deassert = default_wait_for_init_deassert, | ||
126 | |||
127 | .smp_callin_clear_local_apic = NULL, | ||
128 | .store_NMI_vector = NULL, | ||
129 | .inquire_remote_apic = default_inquire_remote_apic, | ||
130 | }; | ||
131 | |||
132 | extern struct genapic apic_numaq; | ||
133 | extern struct genapic apic_summit; | ||
134 | extern struct genapic apic_bigsmp; | ||
135 | extern struct genapic apic_es7000; | ||
136 | extern struct genapic apic_default; | ||
137 | |||
138 | struct genapic *apic = &apic_default; | ||
139 | |||
140 | static struct genapic *apic_probe[] __initdata = { | ||
141 | #ifdef CONFIG_X86_NUMAQ | ||
142 | &apic_numaq, | ||
143 | #endif | ||
144 | #ifdef CONFIG_X86_SUMMIT | ||
145 | &apic_summit, | ||
146 | #endif | ||
147 | #ifdef CONFIG_X86_BIGSMP | ||
148 | &apic_bigsmp, | ||
149 | #endif | ||
150 | #ifdef CONFIG_X86_ES7000 | ||
151 | &apic_es7000, | ||
152 | #endif | ||
153 | &apic_default, /* must be last */ | ||
154 | NULL, | ||
155 | }; | ||
156 | |||
157 | static int cmdline_apic __initdata; | ||
158 | static int __init parse_apic(char *arg) | ||
159 | { | ||
160 | int i; | ||
161 | |||
162 | if (!arg) | ||
163 | return -EINVAL; | ||
164 | |||
165 | for (i = 0; apic_probe[i]; i++) { | ||
166 | if (!strcmp(apic_probe[i]->name, arg)) { | ||
167 | apic = apic_probe[i]; | ||
168 | cmdline_apic = 1; | ||
169 | return 0; | ||
170 | } | ||
171 | } | ||
172 | |||
173 | if (x86_quirks->update_genapic) | ||
174 | x86_quirks->update_genapic(); | ||
175 | |||
176 | /* Parsed again by __setup for debug/verbose */ | ||
177 | return 0; | ||
178 | } | ||
179 | early_param("apic", parse_apic); | ||
180 | |||
181 | void __init generic_bigsmp_probe(void) | ||
182 | { | ||
183 | #ifdef CONFIG_X86_BIGSMP | ||
184 | /* | ||
185 | * This routine is used to switch to bigsmp mode when | ||
186 | * - There is no apic= option specified by the user | ||
187 | * - generic_apic_probe() has chosen apic_default as the sub_arch | ||
188 | * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support | ||
189 | */ | ||
190 | |||
191 | if (!cmdline_apic && apic == &apic_default) { | ||
192 | if (apic_bigsmp.probe()) { | ||
193 | apic = &apic_bigsmp; | ||
194 | if (x86_quirks->update_genapic) | ||
195 | x86_quirks->update_genapic(); | ||
196 | printk(KERN_INFO "Overriding APIC driver with %s\n", | ||
197 | apic->name); | ||
198 | } | ||
199 | } | ||
200 | #endif | ||
201 | } | ||
202 | |||
203 | void __init generic_apic_probe(void) | ||
204 | { | ||
205 | if (!cmdline_apic) { | ||
206 | int i; | ||
207 | for (i = 0; apic_probe[i]; i++) { | ||
208 | if (apic_probe[i]->probe()) { | ||
209 | apic = apic_probe[i]; | ||
210 | break; | ||
211 | } | ||
212 | } | ||
213 | /* Not visible without early console */ | ||
214 | if (!apic_probe[i]) | ||
215 | panic("Didn't find an APIC driver"); | ||
216 | |||
217 | if (x86_quirks->update_genapic) | ||
218 | x86_quirks->update_genapic(); | ||
219 | } | ||
220 | printk(KERN_INFO "Using APIC driver %s\n", apic->name); | ||
221 | } | ||
222 | |||
223 | /* These functions can switch the APIC even after the initial ->probe() */ | ||
224 | |||
225 | int __init | ||
226 | generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) | ||
227 | { | ||
228 | int i; | ||
229 | |||
230 | for (i = 0; apic_probe[i]; ++i) { | ||
231 | if (!apic_probe[i]->mps_oem_check) | ||
232 | continue; | ||
233 | if (!apic_probe[i]->mps_oem_check(mpc, oem, productid)) | ||
234 | continue; | ||
235 | |||
236 | if (!cmdline_apic) { | ||
237 | apic = apic_probe[i]; | ||
238 | if (x86_quirks->update_genapic) | ||
239 | x86_quirks->update_genapic(); | ||
240 | printk(KERN_INFO "Switched to APIC driver `%s'.\n", | ||
241 | apic->name); | ||
242 | } | ||
243 | return 1; | ||
244 | } | ||
245 | return 0; | ||
246 | } | ||
247 | |||
248 | int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
249 | { | ||
250 | int i; | ||
251 | |||
252 | for (i = 0; apic_probe[i]; ++i) { | ||
253 | if (!apic_probe[i]->acpi_madt_oem_check) | ||
254 | continue; | ||
255 | if (!apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) | ||
256 | continue; | ||
257 | |||
258 | if (!cmdline_apic) { | ||
259 | apic = apic_probe[i]; | ||
260 | if (x86_quirks->update_genapic) | ||
261 | x86_quirks->update_genapic(); | ||
262 | printk(KERN_INFO "Switched to APIC driver `%s'.\n", | ||
263 | apic->name); | ||
264 | } | ||
265 | return 1; | ||
266 | } | ||
267 | return 0; | ||
268 | } | ||
269 | |||
270 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
271 | |||
272 | /** | ||
273 | * pre_intr_init_hook - initialisation prior to setting up interrupt vectors | ||
274 | * | ||
275 | * Description: | ||
276 | * Perform any necessary interrupt initialisation prior to setting up | ||
277 | * the "ordinary" interrupt call gates. For legacy reasons, the ISA | ||
278 | * interrupts should be initialised here if the machine emulates a PC | ||
279 | * in any way. | ||
280 | **/ | ||
281 | void __init pre_intr_init_hook(void) | ||
282 | { | ||
283 | if (x86_quirks->arch_pre_intr_init) { | ||
284 | if (x86_quirks->arch_pre_intr_init()) | ||
285 | return; | ||
286 | } | ||
287 | init_ISA_irqs(); | ||
288 | } | ||
289 | |||
290 | /** | ||
291 | * intr_init_hook - post gate setup interrupt initialisation | ||
292 | * | ||
293 | * Description: | ||
294 | * Fill in any interrupts that may have been left out by the general | ||
295 | * init_IRQ() routine. interrupts having to do with the machine rather | ||
296 | * than the devices on the I/O bus (like APIC interrupts in intel MP | ||
297 | * systems) are started here. | ||
298 | **/ | ||
299 | void __init intr_init_hook(void) | ||
300 | { | ||
301 | if (x86_quirks->arch_intr_init) { | ||
302 | if (x86_quirks->arch_intr_init()) | ||
303 | return; | ||
304 | } | ||
305 | } | ||
306 | |||
307 | /** | ||
308 | * pre_setup_arch_hook - hook called prior to any setup_arch() execution | ||
309 | * | ||
310 | * Description: | ||
311 | * generally used to activate any machine specific identification | ||
312 | * routines that may be needed before setup_arch() runs. On Voyager | ||
313 | * this is used to get the board revision and type. | ||
314 | **/ | ||
315 | void __init pre_setup_arch_hook(void) | ||
316 | { | ||
317 | } | ||
318 | |||
319 | /** | ||
320 | * trap_init_hook - initialise system specific traps | ||
321 | * | ||
322 | * Description: | ||
323 | * Called as the final act of trap_init(). Used in VISWS to initialise | ||
324 | * the various board specific APIC traps. | ||
325 | **/ | ||
326 | void __init trap_init_hook(void) | ||
327 | { | ||
328 | if (x86_quirks->arch_trap_init) { | ||
329 | if (x86_quirks->arch_trap_init()) | ||
330 | return; | ||
331 | } | ||
332 | } | ||
333 | |||
334 | static struct irqaction irq0 = { | ||
335 | .handler = timer_interrupt, | ||
336 | .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL, | ||
337 | .mask = CPU_MASK_NONE, | ||
338 | .name = "timer" | ||
339 | }; | ||
340 | |||
341 | /** | ||
342 | * pre_time_init_hook - do any specific initialisations before. | ||
343 | * | ||
344 | **/ | ||
345 | void __init pre_time_init_hook(void) | ||
346 | { | ||
347 | if (x86_quirks->arch_pre_time_init) | ||
348 | x86_quirks->arch_pre_time_init(); | ||
349 | } | ||
350 | |||
351 | /** | ||
352 | * time_init_hook - do any specific initialisations for the system timer. | ||
353 | * | ||
354 | * Description: | ||
355 | * Must plug the system timer interrupt source at HZ into the IRQ listed | ||
356 | * in irq_vectors.h:TIMER_IRQ | ||
357 | **/ | ||
358 | void __init time_init_hook(void) | ||
359 | { | ||
360 | if (x86_quirks->arch_time_init) { | ||
361 | /* | ||
362 | * A nonzero return code does not mean failure, it means | ||
363 | * that the architecture quirk does not want any | ||
364 | * generic (timer) setup to be performed after this: | ||
365 | */ | ||
366 | if (x86_quirks->arch_time_init()) | ||
367 | return; | ||
368 | } | ||
369 | |||
370 | irq0.mask = cpumask_of_cpu(0); | ||
371 | setup_irq(0, &irq0); | ||
372 | } | ||
373 | |||
374 | #ifdef CONFIG_MCA | ||
375 | /** | ||
376 | * mca_nmi_hook - hook into MCA specific NMI chain | ||
377 | * | ||
378 | * Description: | ||
379 | * The MCA (Microchannel Architecture) has an NMI chain for NMI sources | ||
380 | * along the MCA bus. Use this to hook into that chain if you will need | ||
381 | * it. | ||
382 | **/ | ||
383 | void mca_nmi_hook(void) | ||
384 | { | ||
385 | /* | ||
386 | * If I recall correctly, there's a whole bunch of other things that | ||
387 | * we can do to check for NMI problems, but that's all I know about | ||
388 | * at the moment. | ||
389 | */ | ||
390 | pr_warning("NMI generated from unknown source!\n"); | ||
391 | } | ||
392 | #endif | ||
393 | |||
394 | static __init int no_ipi_broadcast(char *str) | ||
395 | { | ||
396 | get_option(&str, &no_broadcast); | ||
397 | pr_info("Using %s mode\n", | ||
398 | no_broadcast ? "No IPI Broadcast" : "IPI Broadcast"); | ||
399 | return 1; | ||
400 | } | ||
401 | __setup("no_ipi_broadcast=", no_ipi_broadcast); | ||
402 | |||
403 | static int __init print_ipi_mode(void) | ||
404 | { | ||
405 | pr_info("Using IPI %s mode\n", | ||
406 | no_broadcast ? "No-Shortcut" : "Shortcut"); | ||
407 | return 0; | ||
408 | } | ||
409 | |||
410 | late_initcall(print_ipi_mode); | ||
411 | |||
diff --git a/arch/x86/kernel/probe_roms_32.c b/arch/x86/kernel/probe_roms_32.c index 675a48c404a5..071e7fea42e5 100644 --- a/arch/x86/kernel/probe_roms_32.c +++ b/arch/x86/kernel/probe_roms_32.c | |||
@@ -18,7 +18,7 @@ | |||
18 | #include <asm/setup.h> | 18 | #include <asm/setup.h> |
19 | #include <asm/sections.h> | 19 | #include <asm/sections.h> |
20 | #include <asm/io.h> | 20 | #include <asm/io.h> |
21 | #include <setup_arch.h> | 21 | #include <asm/setup_arch.h> |
22 | 22 | ||
23 | static struct resource system_rom_resource = { | 23 | static struct resource system_rom_resource = { |
24 | .name = "System ROM", | 24 | .name = "System ROM", |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c622772744d8..87b69d4fac16 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -1,13 +1,16 @@ | |||
1 | #include <linux/errno.h> | 1 | #include <linux/errno.h> |
2 | #include <linux/kernel.h> | 2 | #include <linux/kernel.h> |
3 | #include <linux/mm.h> | 3 | #include <linux/mm.h> |
4 | #include <asm/idle.h> | ||
4 | #include <linux/smp.h> | 5 | #include <linux/smp.h> |
5 | #include <linux/slab.h> | 6 | #include <linux/slab.h> |
6 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
7 | #include <linux/module.h> | 8 | #include <linux/module.h> |
8 | #include <linux/pm.h> | 9 | #include <linux/pm.h> |
9 | #include <linux/clockchips.h> | 10 | #include <linux/clockchips.h> |
11 | #include <linux/ftrace.h> | ||
10 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/apic.h> | ||
11 | 14 | ||
12 | unsigned long idle_halt; | 15 | unsigned long idle_halt; |
13 | EXPORT_SYMBOL(idle_halt); | 16 | EXPORT_SYMBOL(idle_halt); |
@@ -100,6 +103,9 @@ static inline int hlt_use_halt(void) | |||
100 | void default_idle(void) | 103 | void default_idle(void) |
101 | { | 104 | { |
102 | if (hlt_use_halt()) { | 105 | if (hlt_use_halt()) { |
106 | struct power_trace it; | ||
107 | |||
108 | trace_power_start(&it, POWER_CSTATE, 1); | ||
103 | current_thread_info()->status &= ~TS_POLLING; | 109 | current_thread_info()->status &= ~TS_POLLING; |
104 | /* | 110 | /* |
105 | * TS_POLLING-cleared state must be visible before we | 111 | * TS_POLLING-cleared state must be visible before we |
@@ -112,6 +118,7 @@ void default_idle(void) | |||
112 | else | 118 | else |
113 | local_irq_enable(); | 119 | local_irq_enable(); |
114 | current_thread_info()->status |= TS_POLLING; | 120 | current_thread_info()->status |= TS_POLLING; |
121 | trace_power_end(&it); | ||
115 | } else { | 122 | } else { |
116 | local_irq_enable(); | 123 | local_irq_enable(); |
117 | /* loop is done by the caller */ | 124 | /* loop is done by the caller */ |
@@ -122,6 +129,21 @@ void default_idle(void) | |||
122 | EXPORT_SYMBOL(default_idle); | 129 | EXPORT_SYMBOL(default_idle); |
123 | #endif | 130 | #endif |
124 | 131 | ||
132 | void stop_this_cpu(void *dummy) | ||
133 | { | ||
134 | local_irq_disable(); | ||
135 | /* | ||
136 | * Remove this CPU: | ||
137 | */ | ||
138 | cpu_clear(smp_processor_id(), cpu_online_map); | ||
139 | disable_local_APIC(); | ||
140 | |||
141 | for (;;) { | ||
142 | if (hlt_works(smp_processor_id())) | ||
143 | halt(); | ||
144 | } | ||
145 | } | ||
146 | |||
125 | static void do_nothing(void *unused) | 147 | static void do_nothing(void *unused) |
126 | { | 148 | { |
127 | } | 149 | } |
@@ -154,24 +176,37 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); | |||
154 | */ | 176 | */ |
155 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) | 177 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) |
156 | { | 178 | { |
179 | struct power_trace it; | ||
180 | |||
181 | trace_power_start(&it, POWER_CSTATE, (ax>>4)+1); | ||
157 | if (!need_resched()) { | 182 | if (!need_resched()) { |
183 | if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) | ||
184 | clflush((void *)¤t_thread_info()->flags); | ||
185 | |||
158 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 186 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
159 | smp_mb(); | 187 | smp_mb(); |
160 | if (!need_resched()) | 188 | if (!need_resched()) |
161 | __mwait(ax, cx); | 189 | __mwait(ax, cx); |
162 | } | 190 | } |
191 | trace_power_end(&it); | ||
163 | } | 192 | } |
164 | 193 | ||
165 | /* Default MONITOR/MWAIT with no hints, used for default C1 state */ | 194 | /* Default MONITOR/MWAIT with no hints, used for default C1 state */ |
166 | static void mwait_idle(void) | 195 | static void mwait_idle(void) |
167 | { | 196 | { |
197 | struct power_trace it; | ||
168 | if (!need_resched()) { | 198 | if (!need_resched()) { |
199 | trace_power_start(&it, POWER_CSTATE, 1); | ||
200 | if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) | ||
201 | clflush((void *)¤t_thread_info()->flags); | ||
202 | |||
169 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 203 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
170 | smp_mb(); | 204 | smp_mb(); |
171 | if (!need_resched()) | 205 | if (!need_resched()) |
172 | __sti_mwait(0, 0); | 206 | __sti_mwait(0, 0); |
173 | else | 207 | else |
174 | local_irq_enable(); | 208 | local_irq_enable(); |
209 | trace_power_end(&it); | ||
175 | } else | 210 | } else |
176 | local_irq_enable(); | 211 | local_irq_enable(); |
177 | } | 212 | } |
@@ -183,9 +218,13 @@ static void mwait_idle(void) | |||
183 | */ | 218 | */ |
184 | static void poll_idle(void) | 219 | static void poll_idle(void) |
185 | { | 220 | { |
221 | struct power_trace it; | ||
222 | |||
223 | trace_power_start(&it, POWER_CSTATE, 0); | ||
186 | local_irq_enable(); | 224 | local_irq_enable(); |
187 | while (!need_resched()) | 225 | while (!need_resched()) |
188 | cpu_relax(); | 226 | cpu_relax(); |
227 | trace_power_end(&it); | ||
189 | } | 228 | } |
190 | 229 | ||
191 | /* | 230 | /* |
@@ -270,7 +309,7 @@ static void c1e_idle(void) | |||
270 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); | 309 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); |
271 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { | 310 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { |
272 | c1e_detected = 1; | 311 | c1e_detected = 1; |
273 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | 312 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) |
274 | mark_tsc_unstable("TSC halt in AMD C1E"); | 313 | mark_tsc_unstable("TSC halt in AMD C1E"); |
275 | printk(KERN_INFO "System has AMD C1E enabled\n"); | 314 | printk(KERN_INFO "System has AMD C1E enabled\n"); |
276 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); | 315 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); |
@@ -311,7 +350,7 @@ static void c1e_idle(void) | |||
311 | 350 | ||
312 | void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | 351 | void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) |
313 | { | 352 | { |
314 | #ifdef CONFIG_X86_SMP | 353 | #ifdef CONFIG_SMP |
315 | if (pm_idle == poll_idle && smp_num_siblings > 1) { | 354 | if (pm_idle == poll_idle && smp_num_siblings > 1) { |
316 | printk(KERN_WARNING "WARNING: polling idle and HT enabled," | 355 | printk(KERN_WARNING "WARNING: polling idle and HT enabled," |
317 | " performance may degrade.\n"); | 356 | " performance may degrade.\n"); |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0a1302fe6d45..fec79ad85dc6 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -11,6 +11,7 @@ | |||
11 | 11 | ||
12 | #include <stdarg.h> | 12 | #include <stdarg.h> |
13 | 13 | ||
14 | #include <linux/stackprotector.h> | ||
14 | #include <linux/cpu.h> | 15 | #include <linux/cpu.h> |
15 | #include <linux/errno.h> | 16 | #include <linux/errno.h> |
16 | #include <linux/sched.h> | 17 | #include <linux/sched.h> |
@@ -38,11 +39,13 @@ | |||
38 | #include <linux/percpu.h> | 39 | #include <linux/percpu.h> |
39 | #include <linux/prctl.h> | 40 | #include <linux/prctl.h> |
40 | #include <linux/dmi.h> | 41 | #include <linux/dmi.h> |
42 | #include <linux/ftrace.h> | ||
43 | #include <linux/uaccess.h> | ||
44 | #include <linux/io.h> | ||
45 | #include <linux/kdebug.h> | ||
41 | 46 | ||
42 | #include <asm/uaccess.h> | ||
43 | #include <asm/pgtable.h> | 47 | #include <asm/pgtable.h> |
44 | #include <asm/system.h> | 48 | #include <asm/system.h> |
45 | #include <asm/io.h> | ||
46 | #include <asm/ldt.h> | 49 | #include <asm/ldt.h> |
47 | #include <asm/processor.h> | 50 | #include <asm/processor.h> |
48 | #include <asm/i387.h> | 51 | #include <asm/i387.h> |
@@ -55,19 +58,15 @@ | |||
55 | 58 | ||
56 | #include <asm/tlbflush.h> | 59 | #include <asm/tlbflush.h> |
57 | #include <asm/cpu.h> | 60 | #include <asm/cpu.h> |
58 | #include <asm/kdebug.h> | ||
59 | #include <asm/idle.h> | 61 | #include <asm/idle.h> |
60 | #include <asm/syscalls.h> | 62 | #include <asm/syscalls.h> |
61 | #include <asm/smp.h> | 63 | #include <asm/ds.h> |
62 | 64 | ||
63 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 65 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
64 | 66 | ||
65 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | 67 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; |
66 | EXPORT_PER_CPU_SYMBOL(current_task); | 68 | EXPORT_PER_CPU_SYMBOL(current_task); |
67 | 69 | ||
68 | DEFINE_PER_CPU(int, cpu_number); | ||
69 | EXPORT_PER_CPU_SYMBOL(cpu_number); | ||
70 | |||
71 | /* | 70 | /* |
72 | * Return saved PC of a blocked thread. | 71 | * Return saved PC of a blocked thread. |
73 | */ | 72 | */ |
@@ -93,6 +92,15 @@ void cpu_idle(void) | |||
93 | { | 92 | { |
94 | int cpu = smp_processor_id(); | 93 | int cpu = smp_processor_id(); |
95 | 94 | ||
95 | /* | ||
96 | * If we're the non-boot CPU, nothing set the stack canary up | ||
97 | * for us. CPU0 already has it initialized but no harm in | ||
98 | * doing it again. This is a good place for updating it, as | ||
99 | * we wont ever return from this function (so the invalid | ||
100 | * canaries already on the stack wont ever trigger). | ||
101 | */ | ||
102 | boot_init_stack_canary(); | ||
103 | |||
96 | current_thread_info()->status |= TS_POLLING; | 104 | current_thread_info()->status |= TS_POLLING; |
97 | 105 | ||
98 | /* endless idle loop with no priority at all */ | 106 | /* endless idle loop with no priority at all */ |
@@ -110,7 +118,6 @@ void cpu_idle(void) | |||
110 | play_dead(); | 118 | play_dead(); |
111 | 119 | ||
112 | local_irq_disable(); | 120 | local_irq_disable(); |
113 | __get_cpu_var(irq_stat).idle_timestamp = jiffies; | ||
114 | /* Don't trace irqs off for idle */ | 121 | /* Don't trace irqs off for idle */ |
115 | stop_critical_timings(); | 122 | stop_critical_timings(); |
116 | pm_idle(); | 123 | pm_idle(); |
@@ -134,7 +141,7 @@ void __show_regs(struct pt_regs *regs, int all) | |||
134 | if (user_mode_vm(regs)) { | 141 | if (user_mode_vm(regs)) { |
135 | sp = regs->sp; | 142 | sp = regs->sp; |
136 | ss = regs->ss & 0xffff; | 143 | ss = regs->ss & 0xffff; |
137 | savesegment(gs, gs); | 144 | gs = get_user_gs(regs); |
138 | } else { | 145 | } else { |
139 | sp = (unsigned long) (®s->sp); | 146 | sp = (unsigned long) (®s->sp); |
140 | savesegment(ss, ss); | 147 | savesegment(ss, ss); |
@@ -203,7 +210,7 @@ extern void kernel_thread_helper(void); | |||
203 | /* | 210 | /* |
204 | * Create a kernel thread | 211 | * Create a kernel thread |
205 | */ | 212 | */ |
206 | int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) | 213 | int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) |
207 | { | 214 | { |
208 | struct pt_regs regs; | 215 | struct pt_regs regs; |
209 | 216 | ||
@@ -215,6 +222,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) | |||
215 | regs.ds = __USER_DS; | 222 | regs.ds = __USER_DS; |
216 | regs.es = __USER_DS; | 223 | regs.es = __USER_DS; |
217 | regs.fs = __KERNEL_PERCPU; | 224 | regs.fs = __KERNEL_PERCPU; |
225 | regs.gs = __KERNEL_STACK_CANARY; | ||
218 | regs.orig_ax = -1; | 226 | regs.orig_ax = -1; |
219 | regs.ip = (unsigned long) kernel_thread_helper; | 227 | regs.ip = (unsigned long) kernel_thread_helper; |
220 | regs.cs = __KERNEL_CS | get_kernel_rpl(); | 228 | regs.cs = __KERNEL_CS | get_kernel_rpl(); |
@@ -250,14 +258,8 @@ void exit_thread(void) | |||
250 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; | 258 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; |
251 | put_cpu(); | 259 | put_cpu(); |
252 | } | 260 | } |
253 | #ifdef CONFIG_X86_DS | 261 | |
254 | /* Free any DS contexts that have not been properly released. */ | 262 | ds_exit_thread(current); |
255 | if (unlikely(current->thread.ds_ctx)) { | ||
256 | /* we clear debugctl to make sure DS is not used. */ | ||
257 | update_debugctlmsr(0); | ||
258 | ds_free(current->thread.ds_ctx); | ||
259 | } | ||
260 | #endif /* CONFIG_X86_DS */ | ||
261 | } | 263 | } |
262 | 264 | ||
263 | void flush_thread(void) | 265 | void flush_thread(void) |
@@ -270,7 +272,7 @@ void flush_thread(void) | |||
270 | tsk->thread.debugreg3 = 0; | 272 | tsk->thread.debugreg3 = 0; |
271 | tsk->thread.debugreg6 = 0; | 273 | tsk->thread.debugreg6 = 0; |
272 | tsk->thread.debugreg7 = 0; | 274 | tsk->thread.debugreg7 = 0; |
273 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | 275 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); |
274 | clear_tsk_thread_flag(tsk, TIF_DEBUG); | 276 | clear_tsk_thread_flag(tsk, TIF_DEBUG); |
275 | /* | 277 | /* |
276 | * Forget coprocessor state.. | 278 | * Forget coprocessor state.. |
@@ -297,9 +299,9 @@ void prepare_to_copy(struct task_struct *tsk) | |||
297 | 299 | ||
298 | int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | 300 | int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, |
299 | unsigned long unused, | 301 | unsigned long unused, |
300 | struct task_struct * p, struct pt_regs * regs) | 302 | struct task_struct *p, struct pt_regs *regs) |
301 | { | 303 | { |
302 | struct pt_regs * childregs; | 304 | struct pt_regs *childregs; |
303 | struct task_struct *tsk; | 305 | struct task_struct *tsk; |
304 | int err; | 306 | int err; |
305 | 307 | ||
@@ -313,7 +315,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | |||
313 | 315 | ||
314 | p->thread.ip = (unsigned long) ret_from_fork; | 316 | p->thread.ip = (unsigned long) ret_from_fork; |
315 | 317 | ||
316 | savesegment(gs, p->thread.gs); | 318 | task_user_gs(p) = get_user_gs(regs); |
317 | 319 | ||
318 | tsk = current; | 320 | tsk = current; |
319 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { | 321 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { |
@@ -339,13 +341,19 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | |||
339 | kfree(p->thread.io_bitmap_ptr); | 341 | kfree(p->thread.io_bitmap_ptr); |
340 | p->thread.io_bitmap_max = 0; | 342 | p->thread.io_bitmap_max = 0; |
341 | } | 343 | } |
344 | |||
345 | ds_copy_thread(p, current); | ||
346 | |||
347 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); | ||
348 | p->thread.debugctlmsr = 0; | ||
349 | |||
342 | return err; | 350 | return err; |
343 | } | 351 | } |
344 | 352 | ||
345 | void | 353 | void |
346 | start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | 354 | start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) |
347 | { | 355 | { |
348 | __asm__("movl %0, %%gs" :: "r"(0)); | 356 | set_user_gs(regs, 0); |
349 | regs->fs = 0; | 357 | regs->fs = 0; |
350 | set_fs(USER_DS); | 358 | set_fs(USER_DS); |
351 | regs->ds = __USER_DS; | 359 | regs->ds = __USER_DS; |
@@ -419,48 +427,19 @@ int set_tsc_mode(unsigned int val) | |||
419 | return 0; | 427 | return 0; |
420 | } | 428 | } |
421 | 429 | ||
422 | #ifdef CONFIG_X86_DS | ||
423 | static int update_debugctl(struct thread_struct *prev, | ||
424 | struct thread_struct *next, unsigned long debugctl) | ||
425 | { | ||
426 | unsigned long ds_prev = 0; | ||
427 | unsigned long ds_next = 0; | ||
428 | |||
429 | if (prev->ds_ctx) | ||
430 | ds_prev = (unsigned long)prev->ds_ctx->ds; | ||
431 | if (next->ds_ctx) | ||
432 | ds_next = (unsigned long)next->ds_ctx->ds; | ||
433 | |||
434 | if (ds_next != ds_prev) { | ||
435 | /* we clear debugctl to make sure DS | ||
436 | * is not in use when we change it */ | ||
437 | debugctl = 0; | ||
438 | update_debugctlmsr(0); | ||
439 | wrmsr(MSR_IA32_DS_AREA, ds_next, 0); | ||
440 | } | ||
441 | return debugctl; | ||
442 | } | ||
443 | #else | ||
444 | static int update_debugctl(struct thread_struct *prev, | ||
445 | struct thread_struct *next, unsigned long debugctl) | ||
446 | { | ||
447 | return debugctl; | ||
448 | } | ||
449 | #endif /* CONFIG_X86_DS */ | ||
450 | |||
451 | static noinline void | 430 | static noinline void |
452 | __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | 431 | __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, |
453 | struct tss_struct *tss) | 432 | struct tss_struct *tss) |
454 | { | 433 | { |
455 | struct thread_struct *prev, *next; | 434 | struct thread_struct *prev, *next; |
456 | unsigned long debugctl; | ||
457 | 435 | ||
458 | prev = &prev_p->thread; | 436 | prev = &prev_p->thread; |
459 | next = &next_p->thread; | 437 | next = &next_p->thread; |
460 | 438 | ||
461 | debugctl = update_debugctl(prev, next, prev->debugctlmsr); | 439 | if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || |
462 | 440 | test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) | |
463 | if (next->debugctlmsr != debugctl) | 441 | ds_switch_to(prev_p, next_p); |
442 | else if (next->debugctlmsr != prev->debugctlmsr) | ||
464 | update_debugctlmsr(next->debugctlmsr); | 443 | update_debugctlmsr(next->debugctlmsr); |
465 | 444 | ||
466 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | 445 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { |
@@ -482,15 +461,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
482 | hard_enable_TSC(); | 461 | hard_enable_TSC(); |
483 | } | 462 | } |
484 | 463 | ||
485 | #ifdef CONFIG_X86_PTRACE_BTS | ||
486 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | ||
487 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | ||
488 | |||
489 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | ||
490 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | ||
491 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
492 | |||
493 | |||
494 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { | 464 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { |
495 | /* | 465 | /* |
496 | * Disable the bitmap via an invalid offset. We still cache | 466 | * Disable the bitmap via an invalid offset. We still cache |
@@ -548,7 +518,8 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
548 | * the task-switch, and shows up in ret_from_fork in entry.S, | 518 | * the task-switch, and shows up in ret_from_fork in entry.S, |
549 | * for example. | 519 | * for example. |
550 | */ | 520 | */ |
551 | struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | 521 | __notrace_funcgraph struct task_struct * |
522 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
552 | { | 523 | { |
553 | struct thread_struct *prev = &prev_p->thread, | 524 | struct thread_struct *prev = &prev_p->thread, |
554 | *next = &next_p->thread; | 525 | *next = &next_p->thread; |
@@ -579,7 +550,7 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct | |||
579 | * used %fs or %gs (it does not today), or if the kernel is | 550 | * used %fs or %gs (it does not today), or if the kernel is |
580 | * running inside of a hypervisor layer. | 551 | * running inside of a hypervisor layer. |
581 | */ | 552 | */ |
582 | savesegment(gs, prev->gs); | 553 | lazy_save_gs(prev->gs); |
583 | 554 | ||
584 | /* | 555 | /* |
585 | * Load the per-thread Thread-Local Storage descriptor. | 556 | * Load the per-thread Thread-Local Storage descriptor. |
@@ -625,31 +596,31 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct | |||
625 | * Restore %gs if needed (which is common) | 596 | * Restore %gs if needed (which is common) |
626 | */ | 597 | */ |
627 | if (prev->gs | next->gs) | 598 | if (prev->gs | next->gs) |
628 | loadsegment(gs, next->gs); | 599 | lazy_load_gs(next->gs); |
629 | 600 | ||
630 | x86_write_percpu(current_task, next_p); | 601 | percpu_write(current_task, next_p); |
631 | 602 | ||
632 | return prev_p; | 603 | return prev_p; |
633 | } | 604 | } |
634 | 605 | ||
635 | asmlinkage int sys_fork(struct pt_regs regs) | 606 | int sys_fork(struct pt_regs *regs) |
636 | { | 607 | { |
637 | return do_fork(SIGCHLD, regs.sp, ®s, 0, NULL, NULL); | 608 | return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); |
638 | } | 609 | } |
639 | 610 | ||
640 | asmlinkage int sys_clone(struct pt_regs regs) | 611 | int sys_clone(struct pt_regs *regs) |
641 | { | 612 | { |
642 | unsigned long clone_flags; | 613 | unsigned long clone_flags; |
643 | unsigned long newsp; | 614 | unsigned long newsp; |
644 | int __user *parent_tidptr, *child_tidptr; | 615 | int __user *parent_tidptr, *child_tidptr; |
645 | 616 | ||
646 | clone_flags = regs.bx; | 617 | clone_flags = regs->bx; |
647 | newsp = regs.cx; | 618 | newsp = regs->cx; |
648 | parent_tidptr = (int __user *)regs.dx; | 619 | parent_tidptr = (int __user *)regs->dx; |
649 | child_tidptr = (int __user *)regs.di; | 620 | child_tidptr = (int __user *)regs->di; |
650 | if (!newsp) | 621 | if (!newsp) |
651 | newsp = regs.sp; | 622 | newsp = regs->sp; |
652 | return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr); | 623 | return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr); |
653 | } | 624 | } |
654 | 625 | ||
655 | /* | 626 | /* |
@@ -662,27 +633,27 @@ asmlinkage int sys_clone(struct pt_regs regs) | |||
662 | * do not have enough call-clobbered registers to hold all | 633 | * do not have enough call-clobbered registers to hold all |
663 | * the information you need. | 634 | * the information you need. |
664 | */ | 635 | */ |
665 | asmlinkage int sys_vfork(struct pt_regs regs) | 636 | int sys_vfork(struct pt_regs *regs) |
666 | { | 637 | { |
667 | return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, ®s, 0, NULL, NULL); | 638 | return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL); |
668 | } | 639 | } |
669 | 640 | ||
670 | /* | 641 | /* |
671 | * sys_execve() executes a new program. | 642 | * sys_execve() executes a new program. |
672 | */ | 643 | */ |
673 | asmlinkage int sys_execve(struct pt_regs regs) | 644 | int sys_execve(struct pt_regs *regs) |
674 | { | 645 | { |
675 | int error; | 646 | int error; |
676 | char * filename; | 647 | char *filename; |
677 | 648 | ||
678 | filename = getname((char __user *) regs.bx); | 649 | filename = getname((char __user *) regs->bx); |
679 | error = PTR_ERR(filename); | 650 | error = PTR_ERR(filename); |
680 | if (IS_ERR(filename)) | 651 | if (IS_ERR(filename)) |
681 | goto out; | 652 | goto out; |
682 | error = do_execve(filename, | 653 | error = do_execve(filename, |
683 | (char __user * __user *) regs.cx, | 654 | (char __user * __user *) regs->cx, |
684 | (char __user * __user *) regs.dx, | 655 | (char __user * __user *) regs->dx, |
685 | ®s); | 656 | regs); |
686 | if (error == 0) { | 657 | if (error == 0) { |
687 | /* Make sure we don't return using sysenter.. */ | 658 | /* Make sure we don't return using sysenter.. */ |
688 | set_thread_flag(TIF_IRET); | 659 | set_thread_flag(TIF_IRET); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c958120fb1b6..836ef6575f01 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -16,6 +16,7 @@ | |||
16 | 16 | ||
17 | #include <stdarg.h> | 17 | #include <stdarg.h> |
18 | 18 | ||
19 | #include <linux/stackprotector.h> | ||
19 | #include <linux/cpu.h> | 20 | #include <linux/cpu.h> |
20 | #include <linux/errno.h> | 21 | #include <linux/errno.h> |
21 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
@@ -39,22 +40,30 @@ | |||
39 | #include <linux/prctl.h> | 40 | #include <linux/prctl.h> |
40 | #include <linux/uaccess.h> | 41 | #include <linux/uaccess.h> |
41 | #include <linux/io.h> | 42 | #include <linux/io.h> |
43 | #include <linux/ftrace.h> | ||
44 | #include <linux/dmi.h> | ||
42 | 45 | ||
43 | #include <asm/pgtable.h> | 46 | #include <asm/pgtable.h> |
44 | #include <asm/system.h> | 47 | #include <asm/system.h> |
45 | #include <asm/processor.h> | 48 | #include <asm/processor.h> |
46 | #include <asm/i387.h> | 49 | #include <asm/i387.h> |
47 | #include <asm/mmu_context.h> | 50 | #include <asm/mmu_context.h> |
48 | #include <asm/pda.h> | ||
49 | #include <asm/prctl.h> | 51 | #include <asm/prctl.h> |
50 | #include <asm/desc.h> | 52 | #include <asm/desc.h> |
51 | #include <asm/proto.h> | 53 | #include <asm/proto.h> |
52 | #include <asm/ia32.h> | 54 | #include <asm/ia32.h> |
53 | #include <asm/idle.h> | 55 | #include <asm/idle.h> |
54 | #include <asm/syscalls.h> | 56 | #include <asm/syscalls.h> |
57 | #include <asm/ds.h> | ||
55 | 58 | ||
56 | asmlinkage extern void ret_from_fork(void); | 59 | asmlinkage extern void ret_from_fork(void); |
57 | 60 | ||
61 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | ||
62 | EXPORT_PER_CPU_SYMBOL(current_task); | ||
63 | |||
64 | DEFINE_PER_CPU(unsigned long, old_rsp); | ||
65 | static DEFINE_PER_CPU(unsigned char, is_idle); | ||
66 | |||
58 | unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; | 67 | unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; |
59 | 68 | ||
60 | static ATOMIC_NOTIFIER_HEAD(idle_notifier); | 69 | static ATOMIC_NOTIFIER_HEAD(idle_notifier); |
@@ -73,13 +82,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister); | |||
73 | 82 | ||
74 | void enter_idle(void) | 83 | void enter_idle(void) |
75 | { | 84 | { |
76 | write_pda(isidle, 1); | 85 | percpu_write(is_idle, 1); |
77 | atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); | 86 | atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); |
78 | } | 87 | } |
79 | 88 | ||
80 | static void __exit_idle(void) | 89 | static void __exit_idle(void) |
81 | { | 90 | { |
82 | if (test_and_clear_bit_pda(0, isidle) == 0) | 91 | if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) |
83 | return; | 92 | return; |
84 | atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); | 93 | atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); |
85 | } | 94 | } |
@@ -109,6 +118,16 @@ static inline void play_dead(void) | |||
109 | void cpu_idle(void) | 118 | void cpu_idle(void) |
110 | { | 119 | { |
111 | current_thread_info()->status |= TS_POLLING; | 120 | current_thread_info()->status |= TS_POLLING; |
121 | |||
122 | /* | ||
123 | * If we're the non-boot CPU, nothing set the stack canary up | ||
124 | * for us. CPU0 already has it initialized but no harm in | ||
125 | * doing it again. This is a good place for updating it, as | ||
126 | * we wont ever return from this function (so the invalid | ||
127 | * canaries already on the stack wont ever trigger). | ||
128 | */ | ||
129 | boot_init_stack_canary(); | ||
130 | |||
112 | /* endless idle loop with no priority at all */ | 131 | /* endless idle loop with no priority at all */ |
113 | while (1) { | 132 | while (1) { |
114 | tick_nohz_stop_sched_tick(1); | 133 | tick_nohz_stop_sched_tick(1); |
@@ -149,14 +168,18 @@ void __show_regs(struct pt_regs *regs, int all) | |||
149 | unsigned long d0, d1, d2, d3, d6, d7; | 168 | unsigned long d0, d1, d2, d3, d6, d7; |
150 | unsigned int fsindex, gsindex; | 169 | unsigned int fsindex, gsindex; |
151 | unsigned int ds, cs, es; | 170 | unsigned int ds, cs, es; |
171 | const char *board; | ||
152 | 172 | ||
153 | printk("\n"); | 173 | printk("\n"); |
154 | print_modules(); | 174 | print_modules(); |
155 | printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n", | 175 | board = dmi_get_system_info(DMI_PRODUCT_NAME); |
176 | if (!board) | ||
177 | board = ""; | ||
178 | printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n", | ||
156 | current->pid, current->comm, print_tainted(), | 179 | current->pid, current->comm, print_tainted(), |
157 | init_utsname()->release, | 180 | init_utsname()->release, |
158 | (int)strcspn(init_utsname()->version, " "), | 181 | (int)strcspn(init_utsname()->version, " "), |
159 | init_utsname()->version); | 182 | init_utsname()->version, board); |
160 | printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); | 183 | printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); |
161 | printk_address(regs->ip, 1); | 184 | printk_address(regs->ip, 1); |
162 | printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, | 185 | printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, |
@@ -235,14 +258,8 @@ void exit_thread(void) | |||
235 | t->io_bitmap_max = 0; | 258 | t->io_bitmap_max = 0; |
236 | put_cpu(); | 259 | put_cpu(); |
237 | } | 260 | } |
238 | #ifdef CONFIG_X86_DS | 261 | |
239 | /* Free any DS contexts that have not been properly released. */ | 262 | ds_exit_thread(current); |
240 | if (unlikely(t->ds_ctx)) { | ||
241 | /* we clear debugctl to make sure DS is not used. */ | ||
242 | update_debugctlmsr(0); | ||
243 | ds_free(t->ds_ctx); | ||
244 | } | ||
245 | #endif /* CONFIG_X86_DS */ | ||
246 | } | 263 | } |
247 | 264 | ||
248 | void flush_thread(void) | 265 | void flush_thread(void) |
@@ -372,6 +389,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | |||
372 | if (err) | 389 | if (err) |
373 | goto out; | 390 | goto out; |
374 | } | 391 | } |
392 | |||
393 | ds_copy_thread(p, me); | ||
394 | |||
395 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); | ||
396 | p->thread.debugctlmsr = 0; | ||
397 | |||
375 | err = 0; | 398 | err = 0; |
376 | out: | 399 | out: |
377 | if (err && p->thread.io_bitmap_ptr) { | 400 | if (err && p->thread.io_bitmap_ptr) { |
@@ -390,7 +413,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | |||
390 | load_gs_index(0); | 413 | load_gs_index(0); |
391 | regs->ip = new_ip; | 414 | regs->ip = new_ip; |
392 | regs->sp = new_sp; | 415 | regs->sp = new_sp; |
393 | write_pda(oldrsp, new_sp); | 416 | percpu_write(old_rsp, new_sp); |
394 | regs->cs = __USER_CS; | 417 | regs->cs = __USER_CS; |
395 | regs->ss = __USER_DS; | 418 | regs->ss = __USER_DS; |
396 | regs->flags = 0x200; | 419 | regs->flags = 0x200; |
@@ -470,35 +493,14 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
470 | struct tss_struct *tss) | 493 | struct tss_struct *tss) |
471 | { | 494 | { |
472 | struct thread_struct *prev, *next; | 495 | struct thread_struct *prev, *next; |
473 | unsigned long debugctl; | ||
474 | 496 | ||
475 | prev = &prev_p->thread, | 497 | prev = &prev_p->thread, |
476 | next = &next_p->thread; | 498 | next = &next_p->thread; |
477 | 499 | ||
478 | debugctl = prev->debugctlmsr; | 500 | if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || |
479 | 501 | test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) | |
480 | #ifdef CONFIG_X86_DS | 502 | ds_switch_to(prev_p, next_p); |
481 | { | 503 | else if (next->debugctlmsr != prev->debugctlmsr) |
482 | unsigned long ds_prev = 0, ds_next = 0; | ||
483 | |||
484 | if (prev->ds_ctx) | ||
485 | ds_prev = (unsigned long)prev->ds_ctx->ds; | ||
486 | if (next->ds_ctx) | ||
487 | ds_next = (unsigned long)next->ds_ctx->ds; | ||
488 | |||
489 | if (ds_next != ds_prev) { | ||
490 | /* | ||
491 | * We clear debugctl to make sure DS | ||
492 | * is not in use when we change it: | ||
493 | */ | ||
494 | debugctl = 0; | ||
495 | update_debugctlmsr(0); | ||
496 | wrmsrl(MSR_IA32_DS_AREA, ds_next); | ||
497 | } | ||
498 | } | ||
499 | #endif /* CONFIG_X86_DS */ | ||
500 | |||
501 | if (next->debugctlmsr != debugctl) | ||
502 | update_debugctlmsr(next->debugctlmsr); | 504 | update_debugctlmsr(next->debugctlmsr); |
503 | 505 | ||
504 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | 506 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { |
@@ -533,14 +535,6 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
533 | */ | 535 | */ |
534 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); | 536 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); |
535 | } | 537 | } |
536 | |||
537 | #ifdef CONFIG_X86_PTRACE_BTS | ||
538 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | ||
539 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | ||
540 | |||
541 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | ||
542 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | ||
543 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
544 | } | 538 | } |
545 | 539 | ||
546 | /* | 540 | /* |
@@ -551,8 +545,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
551 | * - could test fs/gs bitsliced | 545 | * - could test fs/gs bitsliced |
552 | * | 546 | * |
553 | * Kprobes not supported here. Set the probe on schedule instead. | 547 | * Kprobes not supported here. Set the probe on schedule instead. |
548 | * Function graph tracer not supported too. | ||
554 | */ | 549 | */ |
555 | struct task_struct * | 550 | __notrace_funcgraph struct task_struct * |
556 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | 551 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
557 | { | 552 | { |
558 | struct thread_struct *prev = &prev_p->thread; | 553 | struct thread_struct *prev = &prev_p->thread; |
@@ -639,21 +634,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
639 | /* | 634 | /* |
640 | * Switch the PDA and FPU contexts. | 635 | * Switch the PDA and FPU contexts. |
641 | */ | 636 | */ |
642 | prev->usersp = read_pda(oldrsp); | 637 | prev->usersp = percpu_read(old_rsp); |
643 | write_pda(oldrsp, next->usersp); | 638 | percpu_write(old_rsp, next->usersp); |
644 | write_pda(pcurrent, next_p); | 639 | percpu_write(current_task, next_p); |
645 | 640 | ||
646 | write_pda(kernelstack, | 641 | percpu_write(kernel_stack, |
647 | (unsigned long)task_stack_page(next_p) + | 642 | (unsigned long)task_stack_page(next_p) + |
648 | THREAD_SIZE - PDA_STACKOFFSET); | 643 | THREAD_SIZE - KERNEL_STACK_OFFSET); |
649 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
650 | write_pda(stack_canary, next_p->stack_canary); | ||
651 | /* | ||
652 | * Build time only check to make sure the stack_canary is at | ||
653 | * offset 40 in the pda; this is a gcc ABI requirement | ||
654 | */ | ||
655 | BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); | ||
656 | #endif | ||
657 | 644 | ||
658 | /* | 645 | /* |
659 | * Now maybe reload the debug registers and handle I/O bitmaps | 646 | * Now maybe reload the debug registers and handle I/O bitmaps |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0a6d8c12e10d..7ec39ab37a2d 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -75,10 +75,7 @@ static inline bool invalid_selector(u16 value) | |||
75 | static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) | 75 | static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) |
76 | { | 76 | { |
77 | BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); | 77 | BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); |
78 | regno >>= 2; | 78 | return ®s->bx + (regno >> 2); |
79 | if (regno > FS) | ||
80 | --regno; | ||
81 | return ®s->bx + regno; | ||
82 | } | 79 | } |
83 | 80 | ||
84 | static u16 get_segment_reg(struct task_struct *task, unsigned long offset) | 81 | static u16 get_segment_reg(struct task_struct *task, unsigned long offset) |
@@ -90,9 +87,10 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset) | |||
90 | if (offset != offsetof(struct user_regs_struct, gs)) | 87 | if (offset != offsetof(struct user_regs_struct, gs)) |
91 | retval = *pt_regs_access(task_pt_regs(task), offset); | 88 | retval = *pt_regs_access(task_pt_regs(task), offset); |
92 | else { | 89 | else { |
93 | retval = task->thread.gs; | ||
94 | if (task == current) | 90 | if (task == current) |
95 | savesegment(gs, retval); | 91 | retval = get_user_gs(task_pt_regs(task)); |
92 | else | ||
93 | retval = task_user_gs(task); | ||
96 | } | 94 | } |
97 | return retval; | 95 | return retval; |
98 | } | 96 | } |
@@ -126,13 +124,10 @@ static int set_segment_reg(struct task_struct *task, | |||
126 | break; | 124 | break; |
127 | 125 | ||
128 | case offsetof(struct user_regs_struct, gs): | 126 | case offsetof(struct user_regs_struct, gs): |
129 | task->thread.gs = value; | ||
130 | if (task == current) | 127 | if (task == current) |
131 | /* | 128 | set_user_gs(task_pt_regs(task), value); |
132 | * The user-mode %gs is not affected by | 129 | else |
133 | * kernel entry, so we must update the CPU. | 130 | task_user_gs(task) = value; |
134 | */ | ||
135 | loadsegment(gs, value); | ||
136 | } | 131 | } |
137 | 132 | ||
138 | return 0; | 133 | return 0; |
@@ -581,158 +576,91 @@ static int ioperm_get(struct task_struct *target, | |||
581 | } | 576 | } |
582 | 577 | ||
583 | #ifdef CONFIG_X86_PTRACE_BTS | 578 | #ifdef CONFIG_X86_PTRACE_BTS |
584 | /* | ||
585 | * The configuration for a particular BTS hardware implementation. | ||
586 | */ | ||
587 | struct bts_configuration { | ||
588 | /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */ | ||
589 | unsigned char sizeof_bts; | ||
590 | /* the size of a field in the BTS record in bytes */ | ||
591 | unsigned char sizeof_field; | ||
592 | /* a bitmask to enable/disable BTS in DEBUGCTL MSR */ | ||
593 | unsigned long debugctl_mask; | ||
594 | }; | ||
595 | static struct bts_configuration bts_cfg; | ||
596 | |||
597 | #define BTS_MAX_RECORD_SIZE (8 * 3) | ||
598 | |||
599 | |||
600 | /* | ||
601 | * Branch Trace Store (BTS) uses the following format. Different | ||
602 | * architectures vary in the size of those fields. | ||
603 | * - source linear address | ||
604 | * - destination linear address | ||
605 | * - flags | ||
606 | * | ||
607 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
608 | * architectures use 32bit pointers in 32bit mode. | ||
609 | * | ||
610 | * We compute the base address for the first 8 fields based on: | ||
611 | * - the field size stored in the DS configuration | ||
612 | * - the relative field position | ||
613 | * | ||
614 | * In order to store additional information in the BTS buffer, we use | ||
615 | * a special source address to indicate that the record requires | ||
616 | * special interpretation. | ||
617 | * | ||
618 | * Netburst indicated via a bit in the flags field whether the branch | ||
619 | * was predicted; this is ignored. | ||
620 | */ | ||
621 | |||
622 | enum bts_field { | ||
623 | bts_from = 0, | ||
624 | bts_to, | ||
625 | bts_flags, | ||
626 | |||
627 | bts_escape = (unsigned long)-1, | ||
628 | bts_qual = bts_to, | ||
629 | bts_jiffies = bts_flags | ||
630 | }; | ||
631 | |||
632 | static inline unsigned long bts_get(const char *base, enum bts_field field) | ||
633 | { | ||
634 | base += (bts_cfg.sizeof_field * field); | ||
635 | return *(unsigned long *)base; | ||
636 | } | ||
637 | |||
638 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) | ||
639 | { | ||
640 | base += (bts_cfg.sizeof_field * field);; | ||
641 | (*(unsigned long *)base) = val; | ||
642 | } | ||
643 | |||
644 | /* | ||
645 | * Translate a BTS record from the raw format into the bts_struct format | ||
646 | * | ||
647 | * out (out): bts_struct interpretation | ||
648 | * raw: raw BTS record | ||
649 | */ | ||
650 | static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw) | ||
651 | { | ||
652 | memset(out, 0, sizeof(*out)); | ||
653 | if (bts_get(raw, bts_from) == bts_escape) { | ||
654 | out->qualifier = bts_get(raw, bts_qual); | ||
655 | out->variant.jiffies = bts_get(raw, bts_jiffies); | ||
656 | } else { | ||
657 | out->qualifier = BTS_BRANCH; | ||
658 | out->variant.lbr.from_ip = bts_get(raw, bts_from); | ||
659 | out->variant.lbr.to_ip = bts_get(raw, bts_to); | ||
660 | } | ||
661 | } | ||
662 | |||
663 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, | 579 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, |
664 | struct bts_struct __user *out) | 580 | struct bts_struct __user *out) |
665 | { | 581 | { |
666 | struct bts_struct ret; | 582 | const struct bts_trace *trace; |
667 | const void *bts_record; | 583 | struct bts_struct bts; |
668 | size_t bts_index, bts_end; | 584 | const unsigned char *at; |
669 | int error; | 585 | int error; |
670 | 586 | ||
671 | error = ds_get_bts_end(child, &bts_end); | 587 | trace = ds_read_bts(child->bts); |
672 | if (error < 0) | 588 | if (!trace) |
673 | return error; | 589 | return -EPERM; |
674 | |||
675 | if (bts_end <= index) | ||
676 | return -EINVAL; | ||
677 | 590 | ||
678 | error = ds_get_bts_index(child, &bts_index); | 591 | at = trace->ds.top - ((index + 1) * trace->ds.size); |
679 | if (error < 0) | 592 | if ((void *)at < trace->ds.begin) |
680 | return error; | 593 | at += (trace->ds.n * trace->ds.size); |
681 | 594 | ||
682 | /* translate the ptrace bts index into the ds bts index */ | 595 | if (!trace->read) |
683 | bts_index += bts_end - (index + 1); | 596 | return -EOPNOTSUPP; |
684 | if (bts_end <= bts_index) | ||
685 | bts_index -= bts_end; | ||
686 | 597 | ||
687 | error = ds_access_bts(child, bts_index, &bts_record); | 598 | error = trace->read(child->bts, at, &bts); |
688 | if (error < 0) | 599 | if (error < 0) |
689 | return error; | 600 | return error; |
690 | 601 | ||
691 | ptrace_bts_translate_record(&ret, bts_record); | 602 | if (copy_to_user(out, &bts, sizeof(bts))) |
692 | |||
693 | if (copy_to_user(out, &ret, sizeof(ret))) | ||
694 | return -EFAULT; | 603 | return -EFAULT; |
695 | 604 | ||
696 | return sizeof(ret); | 605 | return sizeof(bts); |
697 | } | 606 | } |
698 | 607 | ||
699 | static int ptrace_bts_drain(struct task_struct *child, | 608 | static int ptrace_bts_drain(struct task_struct *child, |
700 | long size, | 609 | long size, |
701 | struct bts_struct __user *out) | 610 | struct bts_struct __user *out) |
702 | { | 611 | { |
703 | struct bts_struct ret; | 612 | const struct bts_trace *trace; |
704 | const unsigned char *raw; | 613 | const unsigned char *at; |
705 | size_t end, i; | 614 | int error, drained = 0; |
706 | int error; | ||
707 | 615 | ||
708 | error = ds_get_bts_index(child, &end); | 616 | trace = ds_read_bts(child->bts); |
709 | if (error < 0) | 617 | if (!trace) |
710 | return error; | 618 | return -EPERM; |
619 | |||
620 | if (!trace->read) | ||
621 | return -EOPNOTSUPP; | ||
711 | 622 | ||
712 | if (size < (end * sizeof(struct bts_struct))) | 623 | if (size < (trace->ds.top - trace->ds.begin)) |
713 | return -EIO; | 624 | return -EIO; |
714 | 625 | ||
715 | error = ds_access_bts(child, 0, (const void **)&raw); | 626 | for (at = trace->ds.begin; (void *)at < trace->ds.top; |
716 | if (error < 0) | 627 | out++, drained++, at += trace->ds.size) { |
717 | return error; | 628 | struct bts_struct bts; |
629 | int error; | ||
718 | 630 | ||
719 | for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) { | 631 | error = trace->read(child->bts, at, &bts); |
720 | ptrace_bts_translate_record(&ret, raw); | 632 | if (error < 0) |
633 | return error; | ||
721 | 634 | ||
722 | if (copy_to_user(out, &ret, sizeof(ret))) | 635 | if (copy_to_user(out, &bts, sizeof(bts))) |
723 | return -EFAULT; | 636 | return -EFAULT; |
724 | } | 637 | } |
725 | 638 | ||
726 | error = ds_clear_bts(child); | 639 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); |
640 | |||
641 | error = ds_reset_bts(child->bts); | ||
727 | if (error < 0) | 642 | if (error < 0) |
728 | return error; | 643 | return error; |
729 | 644 | ||
730 | return end; | 645 | return drained; |
646 | } | ||
647 | |||
648 | static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size) | ||
649 | { | ||
650 | child->bts_buffer = alloc_locked_buffer(size); | ||
651 | if (!child->bts_buffer) | ||
652 | return -ENOMEM; | ||
653 | |||
654 | child->bts_size = size; | ||
655 | |||
656 | return 0; | ||
731 | } | 657 | } |
732 | 658 | ||
733 | static void ptrace_bts_ovfl(struct task_struct *child) | 659 | static void ptrace_bts_free_buffer(struct task_struct *child) |
734 | { | 660 | { |
735 | send_sig(child->thread.bts_ovfl_signal, child, 0); | 661 | free_locked_buffer(child->bts_buffer, child->bts_size); |
662 | child->bts_buffer = NULL; | ||
663 | child->bts_size = 0; | ||
736 | } | 664 | } |
737 | 665 | ||
738 | static int ptrace_bts_config(struct task_struct *child, | 666 | static int ptrace_bts_config(struct task_struct *child, |
@@ -740,114 +668,86 @@ static int ptrace_bts_config(struct task_struct *child, | |||
740 | const struct ptrace_bts_config __user *ucfg) | 668 | const struct ptrace_bts_config __user *ucfg) |
741 | { | 669 | { |
742 | struct ptrace_bts_config cfg; | 670 | struct ptrace_bts_config cfg; |
743 | int error = 0; | 671 | unsigned int flags = 0; |
744 | 672 | ||
745 | error = -EOPNOTSUPP; | ||
746 | if (!bts_cfg.sizeof_bts) | ||
747 | goto errout; | ||
748 | |||
749 | error = -EIO; | ||
750 | if (cfg_size < sizeof(cfg)) | 673 | if (cfg_size < sizeof(cfg)) |
751 | goto errout; | 674 | return -EIO; |
752 | 675 | ||
753 | error = -EFAULT; | ||
754 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) | 676 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) |
755 | goto errout; | 677 | return -EFAULT; |
756 | 678 | ||
757 | error = -EINVAL; | 679 | if (child->bts) { |
758 | if ((cfg.flags & PTRACE_BTS_O_SIGNAL) && | 680 | ds_release_bts(child->bts); |
759 | !(cfg.flags & PTRACE_BTS_O_ALLOC)) | 681 | child->bts = NULL; |
760 | goto errout; | 682 | } |
761 | 683 | ||
762 | if (cfg.flags & PTRACE_BTS_O_ALLOC) { | 684 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { |
763 | ds_ovfl_callback_t ovfl = NULL; | 685 | if (!cfg.signal) |
764 | unsigned int sig = 0; | 686 | return -EINVAL; |
765 | 687 | ||
766 | /* we ignore the error in case we were not tracing child */ | 688 | return -EOPNOTSUPP; |
767 | (void)ds_release_bts(child); | ||
768 | 689 | ||
769 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { | 690 | child->thread.bts_ovfl_signal = cfg.signal; |
770 | if (!cfg.signal) | 691 | } |
771 | goto errout; | ||
772 | 692 | ||
773 | sig = cfg.signal; | 693 | if ((cfg.flags & PTRACE_BTS_O_ALLOC) && |
774 | ovfl = ptrace_bts_ovfl; | 694 | (cfg.size != child->bts_size)) { |
775 | } | 695 | int error; |
776 | 696 | ||
777 | error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); | 697 | ptrace_bts_free_buffer(child); |
778 | if (error < 0) | ||
779 | goto errout; | ||
780 | 698 | ||
781 | child->thread.bts_ovfl_signal = sig; | 699 | error = ptrace_bts_allocate_buffer(child, cfg.size); |
700 | if (error < 0) | ||
701 | return error; | ||
782 | } | 702 | } |
783 | 703 | ||
784 | error = -EINVAL; | ||
785 | if (!child->thread.ds_ctx && cfg.flags) | ||
786 | goto errout; | ||
787 | |||
788 | if (cfg.flags & PTRACE_BTS_O_TRACE) | 704 | if (cfg.flags & PTRACE_BTS_O_TRACE) |
789 | child->thread.debugctlmsr |= bts_cfg.debugctl_mask; | 705 | flags |= BTS_USER; |
790 | else | ||
791 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; | ||
792 | 706 | ||
793 | if (cfg.flags & PTRACE_BTS_O_SCHED) | 707 | if (cfg.flags & PTRACE_BTS_O_SCHED) |
794 | set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 708 | flags |= BTS_TIMESTAMPS; |
795 | else | ||
796 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | ||
797 | 709 | ||
798 | error = sizeof(cfg); | 710 | child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size, |
711 | /* ovfl = */ NULL, /* th = */ (size_t)-1, | ||
712 | flags); | ||
713 | if (IS_ERR(child->bts)) { | ||
714 | int error = PTR_ERR(child->bts); | ||
799 | 715 | ||
800 | out: | 716 | ptrace_bts_free_buffer(child); |
801 | if (child->thread.debugctlmsr) | 717 | child->bts = NULL; |
802 | set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
803 | else | ||
804 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
805 | 718 | ||
806 | return error; | 719 | return error; |
720 | } | ||
807 | 721 | ||
808 | errout: | 722 | return sizeof(cfg); |
809 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; | ||
810 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | ||
811 | goto out; | ||
812 | } | 723 | } |
813 | 724 | ||
814 | static int ptrace_bts_status(struct task_struct *child, | 725 | static int ptrace_bts_status(struct task_struct *child, |
815 | long cfg_size, | 726 | long cfg_size, |
816 | struct ptrace_bts_config __user *ucfg) | 727 | struct ptrace_bts_config __user *ucfg) |
817 | { | 728 | { |
729 | const struct bts_trace *trace; | ||
818 | struct ptrace_bts_config cfg; | 730 | struct ptrace_bts_config cfg; |
819 | size_t end; | ||
820 | const void *base, *max; | ||
821 | int error; | ||
822 | 731 | ||
823 | if (cfg_size < sizeof(cfg)) | 732 | if (cfg_size < sizeof(cfg)) |
824 | return -EIO; | 733 | return -EIO; |
825 | 734 | ||
826 | error = ds_get_bts_end(child, &end); | 735 | trace = ds_read_bts(child->bts); |
827 | if (error < 0) | 736 | if (!trace) |
828 | return error; | 737 | return -EPERM; |
829 | |||
830 | error = ds_access_bts(child, /* index = */ 0, &base); | ||
831 | if (error < 0) | ||
832 | return error; | ||
833 | |||
834 | error = ds_access_bts(child, /* index = */ end, &max); | ||
835 | if (error < 0) | ||
836 | return error; | ||
837 | 738 | ||
838 | memset(&cfg, 0, sizeof(cfg)); | 739 | memset(&cfg, 0, sizeof(cfg)); |
839 | cfg.size = (max - base); | 740 | cfg.size = trace->ds.end - trace->ds.begin; |
840 | cfg.signal = child->thread.bts_ovfl_signal; | 741 | cfg.signal = child->thread.bts_ovfl_signal; |
841 | cfg.bts_size = sizeof(struct bts_struct); | 742 | cfg.bts_size = sizeof(struct bts_struct); |
842 | 743 | ||
843 | if (cfg.signal) | 744 | if (cfg.signal) |
844 | cfg.flags |= PTRACE_BTS_O_SIGNAL; | 745 | cfg.flags |= PTRACE_BTS_O_SIGNAL; |
845 | 746 | ||
846 | if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && | 747 | if (trace->ds.flags & BTS_USER) |
847 | child->thread.debugctlmsr & bts_cfg.debugctl_mask) | ||
848 | cfg.flags |= PTRACE_BTS_O_TRACE; | 748 | cfg.flags |= PTRACE_BTS_O_TRACE; |
849 | 749 | ||
850 | if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) | 750 | if (trace->ds.flags & BTS_TIMESTAMPS) |
851 | cfg.flags |= PTRACE_BTS_O_SCHED; | 751 | cfg.flags |= PTRACE_BTS_O_SCHED; |
852 | 752 | ||
853 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) | 753 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) |
@@ -856,110 +756,77 @@ static int ptrace_bts_status(struct task_struct *child, | |||
856 | return sizeof(cfg); | 756 | return sizeof(cfg); |
857 | } | 757 | } |
858 | 758 | ||
859 | static int ptrace_bts_write_record(struct task_struct *child, | 759 | static int ptrace_bts_clear(struct task_struct *child) |
860 | const struct bts_struct *in) | ||
861 | { | 760 | { |
862 | unsigned char bts_record[BTS_MAX_RECORD_SIZE]; | 761 | const struct bts_trace *trace; |
863 | 762 | ||
864 | BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts); | 763 | trace = ds_read_bts(child->bts); |
764 | if (!trace) | ||
765 | return -EPERM; | ||
865 | 766 | ||
866 | memset(bts_record, 0, bts_cfg.sizeof_bts); | 767 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); |
867 | switch (in->qualifier) { | ||
868 | case BTS_INVALID: | ||
869 | break; | ||
870 | 768 | ||
871 | case BTS_BRANCH: | 769 | return ds_reset_bts(child->bts); |
872 | bts_set(bts_record, bts_from, in->variant.lbr.from_ip); | 770 | } |
873 | bts_set(bts_record, bts_to, in->variant.lbr.to_ip); | ||
874 | break; | ||
875 | 771 | ||
876 | case BTS_TASK_ARRIVES: | 772 | static int ptrace_bts_size(struct task_struct *child) |
877 | case BTS_TASK_DEPARTS: | 773 | { |
878 | bts_set(bts_record, bts_from, bts_escape); | 774 | const struct bts_trace *trace; |
879 | bts_set(bts_record, bts_qual, in->qualifier); | ||
880 | bts_set(bts_record, bts_jiffies, in->variant.jiffies); | ||
881 | break; | ||
882 | 775 | ||
883 | default: | 776 | trace = ds_read_bts(child->bts); |
884 | return -EINVAL; | 777 | if (!trace) |
885 | } | 778 | return -EPERM; |
886 | 779 | ||
887 | /* The writing task will be the switched-to task on a context | 780 | return (trace->ds.top - trace->ds.begin) / trace->ds.size; |
888 | * switch. It needs to write into the switched-from task's BTS | ||
889 | * buffer. */ | ||
890 | return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts); | ||
891 | } | 781 | } |
892 | 782 | ||
893 | void ptrace_bts_take_timestamp(struct task_struct *tsk, | 783 | static void ptrace_bts_fork(struct task_struct *tsk) |
894 | enum bts_qualifier qualifier) | ||
895 | { | 784 | { |
896 | struct bts_struct rec = { | 785 | tsk->bts = NULL; |
897 | .qualifier = qualifier, | 786 | tsk->bts_buffer = NULL; |
898 | .variant.jiffies = jiffies_64 | 787 | tsk->bts_size = 0; |
899 | }; | 788 | tsk->thread.bts_ovfl_signal = 0; |
900 | |||
901 | ptrace_bts_write_record(tsk, &rec); | ||
902 | } | 789 | } |
903 | 790 | ||
904 | static const struct bts_configuration bts_cfg_netburst = { | 791 | static void ptrace_bts_untrace(struct task_struct *child) |
905 | .sizeof_bts = sizeof(long) * 3, | 792 | { |
906 | .sizeof_field = sizeof(long), | 793 | if (unlikely(child->bts)) { |
907 | .debugctl_mask = (1<<2)|(1<<3)|(1<<5) | 794 | ds_release_bts(child->bts); |
908 | }; | 795 | child->bts = NULL; |
796 | |||
797 | /* We cannot update total_vm and locked_vm since | ||
798 | child's mm is already gone. But we can reclaim the | ||
799 | memory. */ | ||
800 | kfree(child->bts_buffer); | ||
801 | child->bts_buffer = NULL; | ||
802 | child->bts_size = 0; | ||
803 | } | ||
804 | } | ||
909 | 805 | ||
910 | static const struct bts_configuration bts_cfg_pentium_m = { | 806 | static void ptrace_bts_detach(struct task_struct *child) |
911 | .sizeof_bts = sizeof(long) * 3, | 807 | { |
912 | .sizeof_field = sizeof(long), | 808 | if (unlikely(child->bts)) { |
913 | .debugctl_mask = (1<<6)|(1<<7) | 809 | ds_release_bts(child->bts); |
914 | }; | 810 | child->bts = NULL; |
915 | 811 | ||
916 | static const struct bts_configuration bts_cfg_core2 = { | 812 | ptrace_bts_free_buffer(child); |
917 | .sizeof_bts = 8 * 3, | 813 | } |
918 | .sizeof_field = 8, | 814 | } |
919 | .debugctl_mask = (1<<6)|(1<<7)|(1<<9) | 815 | #else |
920 | }; | 816 | static inline void ptrace_bts_fork(struct task_struct *tsk) {} |
817 | static inline void ptrace_bts_detach(struct task_struct *child) {} | ||
818 | static inline void ptrace_bts_untrace(struct task_struct *child) {} | ||
819 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
921 | 820 | ||
922 | static inline void bts_configure(const struct bts_configuration *cfg) | 821 | void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags) |
923 | { | 822 | { |
924 | bts_cfg = *cfg; | 823 | ptrace_bts_fork(child); |
925 | } | 824 | } |
926 | 825 | ||
927 | void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c) | 826 | void x86_ptrace_untrace(struct task_struct *child) |
928 | { | 827 | { |
929 | switch (c->x86) { | 828 | ptrace_bts_untrace(child); |
930 | case 0x6: | ||
931 | switch (c->x86_model) { | ||
932 | case 0xD: | ||
933 | case 0xE: /* Pentium M */ | ||
934 | bts_configure(&bts_cfg_pentium_m); | ||
935 | break; | ||
936 | case 0xF: /* Core2 */ | ||
937 | case 0x1C: /* Atom */ | ||
938 | bts_configure(&bts_cfg_core2); | ||
939 | break; | ||
940 | default: | ||
941 | /* sorry, don't know about them */ | ||
942 | break; | ||
943 | } | ||
944 | break; | ||
945 | case 0xF: | ||
946 | switch (c->x86_model) { | ||
947 | case 0x0: | ||
948 | case 0x1: | ||
949 | case 0x2: /* Netburst */ | ||
950 | bts_configure(&bts_cfg_netburst); | ||
951 | break; | ||
952 | default: | ||
953 | /* sorry, don't know about them */ | ||
954 | break; | ||
955 | } | ||
956 | break; | ||
957 | default: | ||
958 | /* sorry, don't know about them */ | ||
959 | break; | ||
960 | } | ||
961 | } | 829 | } |
962 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
963 | 830 | ||
964 | /* | 831 | /* |
965 | * Called by kernel/ptrace.c when detaching.. | 832 | * Called by kernel/ptrace.c when detaching.. |
@@ -972,15 +839,7 @@ void ptrace_disable(struct task_struct *child) | |||
972 | #ifdef TIF_SYSCALL_EMU | 839 | #ifdef TIF_SYSCALL_EMU |
973 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); | 840 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); |
974 | #endif | 841 | #endif |
975 | #ifdef CONFIG_X86_PTRACE_BTS | 842 | ptrace_bts_detach(child); |
976 | (void)ds_release_bts(child); | ||
977 | |||
978 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; | ||
979 | if (!child->thread.debugctlmsr) | ||
980 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
981 | |||
982 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | ||
983 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
984 | } | 843 | } |
985 | 844 | ||
986 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 845 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
@@ -1112,7 +971,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
1112 | break; | 971 | break; |
1113 | 972 | ||
1114 | case PTRACE_BTS_SIZE: | 973 | case PTRACE_BTS_SIZE: |
1115 | ret = ds_get_bts_index(child, /* pos = */ NULL); | 974 | ret = ptrace_bts_size(child); |
1116 | break; | 975 | break; |
1117 | 976 | ||
1118 | case PTRACE_BTS_GET: | 977 | case PTRACE_BTS_GET: |
@@ -1121,7 +980,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
1121 | break; | 980 | break; |
1122 | 981 | ||
1123 | case PTRACE_BTS_CLEAR: | 982 | case PTRACE_BTS_CLEAR: |
1124 | ret = ds_clear_bts(child); | 983 | ret = ptrace_bts_clear(child); |
1125 | break; | 984 | break; |
1126 | 985 | ||
1127 | case PTRACE_BTS_DRAIN: | 986 | case PTRACE_BTS_DRAIN: |
@@ -1384,6 +1243,14 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, | |||
1384 | 1243 | ||
1385 | case PTRACE_GET_THREAD_AREA: | 1244 | case PTRACE_GET_THREAD_AREA: |
1386 | case PTRACE_SET_THREAD_AREA: | 1245 | case PTRACE_SET_THREAD_AREA: |
1246 | #ifdef CONFIG_X86_PTRACE_BTS | ||
1247 | case PTRACE_BTS_CONFIG: | ||
1248 | case PTRACE_BTS_STATUS: | ||
1249 | case PTRACE_BTS_SIZE: | ||
1250 | case PTRACE_BTS_GET: | ||
1251 | case PTRACE_BTS_CLEAR: | ||
1252 | case PTRACE_BTS_DRAIN: | ||
1253 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
1387 | return arch_ptrace(child, request, addr, data); | 1254 | return arch_ptrace(child, request, addr, data); |
1388 | 1255 | ||
1389 | default: | 1256 | default: |
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 67465ed89310..309949e9e1c1 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -168,6 +168,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31, | |||
168 | ich_force_enable_hpet); | 168 | ich_force_enable_hpet); |
169 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, | 169 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, |
170 | ich_force_enable_hpet); | 170 | ich_force_enable_hpet); |
171 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_4, | ||
172 | ich_force_enable_hpet); | ||
171 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, | 173 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, |
172 | ich_force_enable_hpet); | 174 | ich_force_enable_hpet); |
173 | 175 | ||
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index c3cd512484e5..32e8f0af292c 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -12,6 +12,9 @@ | |||
12 | #include <asm/proto.h> | 12 | #include <asm/proto.h> |
13 | #include <asm/reboot_fixups.h> | 13 | #include <asm/reboot_fixups.h> |
14 | #include <asm/reboot.h> | 14 | #include <asm/reboot.h> |
15 | #include <asm/pci_x86.h> | ||
16 | #include <asm/virtext.h> | ||
17 | #include <asm/cpu.h> | ||
15 | 18 | ||
16 | #ifdef CONFIG_X86_32 | 19 | #ifdef CONFIG_X86_32 |
17 | # include <linux/dmi.h> | 20 | # include <linux/dmi.h> |
@@ -21,8 +24,7 @@ | |||
21 | # include <asm/iommu.h> | 24 | # include <asm/iommu.h> |
22 | #endif | 25 | #endif |
23 | 26 | ||
24 | #include <mach_ipi.h> | 27 | #include <asm/genapic.h> |
25 | |||
26 | 28 | ||
27 | /* | 29 | /* |
28 | * Power off function, if any | 30 | * Power off function, if any |
@@ -39,7 +41,16 @@ int reboot_force; | |||
39 | static int reboot_cpu = -1; | 41 | static int reboot_cpu = -1; |
40 | #endif | 42 | #endif |
41 | 43 | ||
42 | /* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | 44 | /* This is set if we need to go through the 'emergency' path. |
45 | * When machine_emergency_restart() is called, we may be on | ||
46 | * an inconsistent state and won't be able to do a clean cleanup | ||
47 | */ | ||
48 | static int reboot_emergency; | ||
49 | |||
50 | /* This is set by the PCI code if either type 1 or type 2 PCI is detected */ | ||
51 | bool port_cf9_safe = false; | ||
52 | |||
53 | /* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] | ||
43 | warm Don't set the cold reboot flag | 54 | warm Don't set the cold reboot flag |
44 | cold Set the cold reboot flag | 55 | cold Set the cold reboot flag |
45 | bios Reboot by jumping through the BIOS (only for X86_32) | 56 | bios Reboot by jumping through the BIOS (only for X86_32) |
@@ -48,6 +59,7 @@ static int reboot_cpu = -1; | |||
48 | kbd Use the keyboard controller. cold reset (default) | 59 | kbd Use the keyboard controller. cold reset (default) |
49 | acpi Use the RESET_REG in the FADT | 60 | acpi Use the RESET_REG in the FADT |
50 | efi Use efi reset_system runtime service | 61 | efi Use efi reset_system runtime service |
62 | pci Use the so-called "PCI reset register", CF9 | ||
51 | force Avoid anything that could hang. | 63 | force Avoid anything that could hang. |
52 | */ | 64 | */ |
53 | static int __init reboot_setup(char *str) | 65 | static int __init reboot_setup(char *str) |
@@ -82,6 +94,7 @@ static int __init reboot_setup(char *str) | |||
82 | case 'k': | 94 | case 'k': |
83 | case 't': | 95 | case 't': |
84 | case 'e': | 96 | case 'e': |
97 | case 'p': | ||
85 | reboot_type = *str; | 98 | reboot_type = *str; |
86 | break; | 99 | break; |
87 | 100 | ||
@@ -172,6 +185,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
172 | DMI_MATCH(DMI_BOARD_NAME, "0KW626"), | 185 | DMI_MATCH(DMI_BOARD_NAME, "0KW626"), |
173 | }, | 186 | }, |
174 | }, | 187 | }, |
188 | { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */ | ||
189 | .callback = set_bios_reboot, | ||
190 | .ident = "Dell OptiPlex 330", | ||
191 | .matches = { | ||
192 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), | ||
193 | DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 330"), | ||
194 | DMI_MATCH(DMI_BOARD_NAME, "0KP561"), | ||
195 | }, | ||
196 | }, | ||
175 | { /* Handle problems with rebooting on Dell 2400's */ | 197 | { /* Handle problems with rebooting on Dell 2400's */ |
176 | .callback = set_bios_reboot, | 198 | .callback = set_bios_reboot, |
177 | .ident = "Dell PowerEdge 2400", | 199 | .ident = "Dell PowerEdge 2400", |
@@ -354,6 +376,48 @@ static inline void kb_wait(void) | |||
354 | } | 376 | } |
355 | } | 377 | } |
356 | 378 | ||
379 | static void vmxoff_nmi(int cpu, struct die_args *args) | ||
380 | { | ||
381 | cpu_emergency_vmxoff(); | ||
382 | } | ||
383 | |||
384 | /* Use NMIs as IPIs to tell all CPUs to disable virtualization | ||
385 | */ | ||
386 | static void emergency_vmx_disable_all(void) | ||
387 | { | ||
388 | /* Just make sure we won't change CPUs while doing this */ | ||
389 | local_irq_disable(); | ||
390 | |||
391 | /* We need to disable VMX on all CPUs before rebooting, otherwise | ||
392 | * we risk hanging up the machine, because the CPU ignore INIT | ||
393 | * signals when VMX is enabled. | ||
394 | * | ||
395 | * We can't take any locks and we may be on an inconsistent | ||
396 | * state, so we use NMIs as IPIs to tell the other CPUs to disable | ||
397 | * VMX and halt. | ||
398 | * | ||
399 | * For safety, we will avoid running the nmi_shootdown_cpus() | ||
400 | * stuff unnecessarily, but we don't have a way to check | ||
401 | * if other CPUs have VMX enabled. So we will call it only if the | ||
402 | * CPU we are running on has VMX enabled. | ||
403 | * | ||
404 | * We will miss cases where VMX is not enabled on all CPUs. This | ||
405 | * shouldn't do much harm because KVM always enable VMX on all | ||
406 | * CPUs anyway. But we can miss it on the small window where KVM | ||
407 | * is still enabling VMX. | ||
408 | */ | ||
409 | if (cpu_has_vmx() && cpu_vmx_enabled()) { | ||
410 | /* Disable VMX on this CPU. | ||
411 | */ | ||
412 | cpu_vmxoff(); | ||
413 | |||
414 | /* Halt and disable VMX on the other CPUs */ | ||
415 | nmi_shootdown_cpus(vmxoff_nmi); | ||
416 | |||
417 | } | ||
418 | } | ||
419 | |||
420 | |||
357 | void __attribute__((weak)) mach_reboot_fixups(void) | 421 | void __attribute__((weak)) mach_reboot_fixups(void) |
358 | { | 422 | { |
359 | } | 423 | } |
@@ -362,6 +426,9 @@ static void native_machine_emergency_restart(void) | |||
362 | { | 426 | { |
363 | int i; | 427 | int i; |
364 | 428 | ||
429 | if (reboot_emergency) | ||
430 | emergency_vmx_disable_all(); | ||
431 | |||
365 | /* Tell the BIOS if we want cold or warm reboot */ | 432 | /* Tell the BIOS if we want cold or warm reboot */ |
366 | *((unsigned short *)__va(0x472)) = reboot_mode; | 433 | *((unsigned short *)__va(0x472)) = reboot_mode; |
367 | 434 | ||
@@ -398,12 +465,27 @@ static void native_machine_emergency_restart(void) | |||
398 | reboot_type = BOOT_KBD; | 465 | reboot_type = BOOT_KBD; |
399 | break; | 466 | break; |
400 | 467 | ||
401 | |||
402 | case BOOT_EFI: | 468 | case BOOT_EFI: |
403 | if (efi_enabled) | 469 | if (efi_enabled) |
404 | efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD, | 470 | efi.reset_system(reboot_mode ? |
471 | EFI_RESET_WARM : | ||
472 | EFI_RESET_COLD, | ||
405 | EFI_SUCCESS, 0, NULL); | 473 | EFI_SUCCESS, 0, NULL); |
474 | reboot_type = BOOT_KBD; | ||
475 | break; | ||
476 | |||
477 | case BOOT_CF9: | ||
478 | port_cf9_safe = true; | ||
479 | /* fall through */ | ||
406 | 480 | ||
481 | case BOOT_CF9_COND: | ||
482 | if (port_cf9_safe) { | ||
483 | u8 cf9 = inb(0xcf9) & ~6; | ||
484 | outb(cf9|2, 0xcf9); /* Request hard reset */ | ||
485 | udelay(50); | ||
486 | outb(cf9|6, 0xcf9); /* Actually do the reset */ | ||
487 | udelay(50); | ||
488 | } | ||
407 | reboot_type = BOOT_KBD; | 489 | reboot_type = BOOT_KBD; |
408 | break; | 490 | break; |
409 | } | 491 | } |
@@ -420,7 +502,7 @@ void native_machine_shutdown(void) | |||
420 | 502 | ||
421 | #ifdef CONFIG_X86_32 | 503 | #ifdef CONFIG_X86_32 |
422 | /* See if there has been given a command line override */ | 504 | /* See if there has been given a command line override */ |
423 | if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && | 505 | if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) && |
424 | cpu_online(reboot_cpu)) | 506 | cpu_online(reboot_cpu)) |
425 | reboot_cpu_id = reboot_cpu; | 507 | reboot_cpu_id = reboot_cpu; |
426 | #endif | 508 | #endif |
@@ -430,7 +512,7 @@ void native_machine_shutdown(void) | |||
430 | reboot_cpu_id = smp_processor_id(); | 512 | reboot_cpu_id = smp_processor_id(); |
431 | 513 | ||
432 | /* Make certain I only run on the appropriate processor */ | 514 | /* Make certain I only run on the appropriate processor */ |
433 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id)); | 515 | set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); |
434 | 516 | ||
435 | /* O.K Now that I'm on the appropriate processor, | 517 | /* O.K Now that I'm on the appropriate processor, |
436 | * stop all of the others. | 518 | * stop all of the others. |
@@ -453,17 +535,28 @@ void native_machine_shutdown(void) | |||
453 | #endif | 535 | #endif |
454 | } | 536 | } |
455 | 537 | ||
538 | static void __machine_emergency_restart(int emergency) | ||
539 | { | ||
540 | reboot_emergency = emergency; | ||
541 | machine_ops.emergency_restart(); | ||
542 | } | ||
543 | |||
456 | static void native_machine_restart(char *__unused) | 544 | static void native_machine_restart(char *__unused) |
457 | { | 545 | { |
458 | printk("machine restart\n"); | 546 | printk("machine restart\n"); |
459 | 547 | ||
460 | if (!reboot_force) | 548 | if (!reboot_force) |
461 | machine_shutdown(); | 549 | machine_shutdown(); |
462 | machine_emergency_restart(); | 550 | __machine_emergency_restart(0); |
463 | } | 551 | } |
464 | 552 | ||
465 | static void native_machine_halt(void) | 553 | static void native_machine_halt(void) |
466 | { | 554 | { |
555 | /* stop other cpus and apics */ | ||
556 | machine_shutdown(); | ||
557 | |||
558 | /* stop this cpu */ | ||
559 | stop_this_cpu(NULL); | ||
467 | } | 560 | } |
468 | 561 | ||
469 | static void native_machine_power_off(void) | 562 | static void native_machine_power_off(void) |
@@ -498,7 +591,7 @@ void machine_shutdown(void) | |||
498 | 591 | ||
499 | void machine_emergency_restart(void) | 592 | void machine_emergency_restart(void) |
500 | { | 593 | { |
501 | machine_ops.emergency_restart(); | 594 | __machine_emergency_restart(1); |
502 | } | 595 | } |
503 | 596 | ||
504 | void machine_restart(char *cmd) | 597 | void machine_restart(char *cmd) |
@@ -558,10 +651,7 @@ static int crash_nmi_callback(struct notifier_block *self, | |||
558 | 651 | ||
559 | static void smp_send_nmi_allbutself(void) | 652 | static void smp_send_nmi_allbutself(void) |
560 | { | 653 | { |
561 | cpumask_t mask = cpu_online_map; | 654 | apic->send_IPI_allbutself(NMI_VECTOR); |
562 | cpu_clear(safe_smp_processor_id(), mask); | ||
563 | if (!cpus_empty(mask)) | ||
564 | send_IPI_mask(mask, NMI_VECTOR); | ||
565 | } | 655 | } |
566 | 656 | ||
567 | static struct notifier_block crash_nmi_nb = { | 657 | static struct notifier_block crash_nmi_nb = { |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0fa6790c1dd3..8fce6c714514 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -81,7 +81,7 @@ | |||
81 | #include <asm/io_apic.h> | 81 | #include <asm/io_apic.h> |
82 | #include <asm/ist.h> | 82 | #include <asm/ist.h> |
83 | #include <asm/vmi.h> | 83 | #include <asm/vmi.h> |
84 | #include <setup_arch.h> | 84 | #include <asm/setup_arch.h> |
85 | #include <asm/bios_ebda.h> | 85 | #include <asm/bios_ebda.h> |
86 | #include <asm/cacheflush.h> | 86 | #include <asm/cacheflush.h> |
87 | #include <asm/processor.h> | 87 | #include <asm/processor.h> |
@@ -89,15 +89,17 @@ | |||
89 | 89 | ||
90 | #include <asm/system.h> | 90 | #include <asm/system.h> |
91 | #include <asm/vsyscall.h> | 91 | #include <asm/vsyscall.h> |
92 | #include <asm/smp.h> | 92 | #include <asm/cpu.h> |
93 | #include <asm/desc.h> | 93 | #include <asm/desc.h> |
94 | #include <asm/dma.h> | 94 | #include <asm/dma.h> |
95 | #include <asm/iommu.h> | 95 | #include <asm/iommu.h> |
96 | #include <asm/gart.h> | ||
96 | #include <asm/mmu_context.h> | 97 | #include <asm/mmu_context.h> |
97 | #include <asm/proto.h> | 98 | #include <asm/proto.h> |
98 | 99 | ||
99 | #include <mach_apic.h> | 100 | #include <asm/genapic.h> |
100 | #include <asm/paravirt.h> | 101 | #include <asm/paravirt.h> |
102 | #include <asm/hypervisor.h> | ||
101 | 103 | ||
102 | #include <asm/percpu.h> | 104 | #include <asm/percpu.h> |
103 | #include <asm/topology.h> | 105 | #include <asm/topology.h> |
@@ -110,6 +112,20 @@ | |||
110 | #define ARCH_SETUP | 112 | #define ARCH_SETUP |
111 | #endif | 113 | #endif |
112 | 114 | ||
115 | unsigned int boot_cpu_id __read_mostly; | ||
116 | |||
117 | #ifdef CONFIG_X86_64 | ||
118 | int default_cpu_present_to_apicid(int mps_cpu) | ||
119 | { | ||
120 | return __default_cpu_present_to_apicid(mps_cpu); | ||
121 | } | ||
122 | |||
123 | int default_check_phys_apicid_present(int boot_cpu_physical_apicid) | ||
124 | { | ||
125 | return __default_check_phys_apicid_present(boot_cpu_physical_apicid); | ||
126 | } | ||
127 | #endif | ||
128 | |||
113 | #ifndef CONFIG_DEBUG_BOOT_PARAMS | 129 | #ifndef CONFIG_DEBUG_BOOT_PARAMS |
114 | struct boot_params __initdata boot_params; | 130 | struct boot_params __initdata boot_params; |
115 | #else | 131 | #else |
@@ -448,6 +464,7 @@ static void __init reserve_early_setup_data(void) | |||
448 | * @size: Size of the crashkernel memory to reserve. | 464 | * @size: Size of the crashkernel memory to reserve. |
449 | * Returns the base address on success, and -1ULL on failure. | 465 | * Returns the base address on success, and -1ULL on failure. |
450 | */ | 466 | */ |
467 | static | ||
451 | unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) | 468 | unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) |
452 | { | 469 | { |
453 | const unsigned long long alignment = 16<<20; /* 16M */ | 470 | const unsigned long long alignment = 16<<20; /* 16M */ |
@@ -583,165 +600,27 @@ static int __init setup_elfcorehdr(char *arg) | |||
583 | early_param("elfcorehdr", setup_elfcorehdr); | 600 | early_param("elfcorehdr", setup_elfcorehdr); |
584 | #endif | 601 | #endif |
585 | 602 | ||
586 | static struct x86_quirks default_x86_quirks __initdata; | 603 | static int __init default_update_genapic(void) |
587 | |||
588 | struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; | ||
589 | |||
590 | /* | ||
591 | * Some BIOSes seem to corrupt the low 64k of memory during events | ||
592 | * like suspend/resume and unplugging an HDMI cable. Reserve all | ||
593 | * remaining free memory in that area and fill it with a distinct | ||
594 | * pattern. | ||
595 | */ | ||
596 | #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION | ||
597 | #define MAX_SCAN_AREAS 8 | ||
598 | |||
599 | static int __read_mostly memory_corruption_check = -1; | ||
600 | |||
601 | static unsigned __read_mostly corruption_check_size = 64*1024; | ||
602 | static unsigned __read_mostly corruption_check_period = 60; /* seconds */ | ||
603 | |||
604 | static struct e820entry scan_areas[MAX_SCAN_AREAS]; | ||
605 | static int num_scan_areas; | ||
606 | |||
607 | |||
608 | static int set_corruption_check(char *arg) | ||
609 | { | 604 | { |
610 | char *end; | 605 | #ifdef CONFIG_SMP |
611 | 606 | if (!apic->wakeup_cpu) | |
612 | memory_corruption_check = simple_strtol(arg, &end, 10); | 607 | apic->wakeup_cpu = wakeup_secondary_cpu_via_init; |
613 | |||
614 | return (*end == 0) ? 0 : -EINVAL; | ||
615 | } | ||
616 | early_param("memory_corruption_check", set_corruption_check); | ||
617 | |||
618 | static int set_corruption_check_period(char *arg) | ||
619 | { | ||
620 | char *end; | ||
621 | |||
622 | corruption_check_period = simple_strtoul(arg, &end, 10); | ||
623 | |||
624 | return (*end == 0) ? 0 : -EINVAL; | ||
625 | } | ||
626 | early_param("memory_corruption_check_period", set_corruption_check_period); | ||
627 | |||
628 | static int set_corruption_check_size(char *arg) | ||
629 | { | ||
630 | char *end; | ||
631 | unsigned size; | ||
632 | |||
633 | size = memparse(arg, &end); | ||
634 | |||
635 | if (*end == '\0') | ||
636 | corruption_check_size = size; | ||
637 | |||
638 | return (size == corruption_check_size) ? 0 : -EINVAL; | ||
639 | } | ||
640 | early_param("memory_corruption_check_size", set_corruption_check_size); | ||
641 | |||
642 | |||
643 | static void __init setup_bios_corruption_check(void) | ||
644 | { | ||
645 | u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ | ||
646 | |||
647 | if (memory_corruption_check == -1) { | ||
648 | memory_corruption_check = | ||
649 | #ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK | ||
650 | 1 | ||
651 | #else | ||
652 | 0 | ||
653 | #endif | 608 | #endif |
654 | ; | ||
655 | } | ||
656 | |||
657 | if (corruption_check_size == 0) | ||
658 | memory_corruption_check = 0; | ||
659 | |||
660 | if (!memory_corruption_check) | ||
661 | return; | ||
662 | |||
663 | corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); | ||
664 | 609 | ||
665 | while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { | 610 | return 0; |
666 | u64 size; | ||
667 | addr = find_e820_area_size(addr, &size, PAGE_SIZE); | ||
668 | |||
669 | if (addr == 0) | ||
670 | break; | ||
671 | |||
672 | if ((addr + size) > corruption_check_size) | ||
673 | size = corruption_check_size - addr; | ||
674 | |||
675 | if (size == 0) | ||
676 | break; | ||
677 | |||
678 | e820_update_range(addr, size, E820_RAM, E820_RESERVED); | ||
679 | scan_areas[num_scan_areas].addr = addr; | ||
680 | scan_areas[num_scan_areas].size = size; | ||
681 | num_scan_areas++; | ||
682 | |||
683 | /* Assume we've already mapped this early memory */ | ||
684 | memset(__va(addr), 0, size); | ||
685 | |||
686 | addr += size; | ||
687 | } | ||
688 | |||
689 | printk(KERN_INFO "Scanning %d areas for low memory corruption\n", | ||
690 | num_scan_areas); | ||
691 | update_e820(); | ||
692 | } | ||
693 | |||
694 | static struct timer_list periodic_check_timer; | ||
695 | |||
696 | void check_for_bios_corruption(void) | ||
697 | { | ||
698 | int i; | ||
699 | int corruption = 0; | ||
700 | |||
701 | if (!memory_corruption_check) | ||
702 | return; | ||
703 | |||
704 | for(i = 0; i < num_scan_areas; i++) { | ||
705 | unsigned long *addr = __va(scan_areas[i].addr); | ||
706 | unsigned long size = scan_areas[i].size; | ||
707 | |||
708 | for(; size; addr++, size -= sizeof(unsigned long)) { | ||
709 | if (!*addr) | ||
710 | continue; | ||
711 | printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n", | ||
712 | addr, __pa(addr), *addr); | ||
713 | corruption = 1; | ||
714 | *addr = 0; | ||
715 | } | ||
716 | } | ||
717 | |||
718 | WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n"); | ||
719 | } | ||
720 | |||
721 | static void periodic_check_for_corruption(unsigned long data) | ||
722 | { | ||
723 | check_for_bios_corruption(); | ||
724 | mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ)); | ||
725 | } | 611 | } |
726 | 612 | ||
727 | void start_periodic_check_for_corruption(void) | 613 | static struct x86_quirks default_x86_quirks __initdata = { |
728 | { | 614 | .update_genapic = default_update_genapic, |
729 | if (!memory_corruption_check || corruption_check_period == 0) | 615 | }; |
730 | return; | ||
731 | |||
732 | printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", | ||
733 | corruption_check_period); | ||
734 | 616 | ||
735 | init_timer(&periodic_check_timer); | 617 | struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; |
736 | periodic_check_timer.function = &periodic_check_for_corruption; | ||
737 | periodic_check_for_corruption(0); | ||
738 | } | ||
739 | #endif | ||
740 | 618 | ||
619 | #ifdef CONFIG_X86_RESERVE_LOW_64K | ||
741 | static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) | 620 | static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) |
742 | { | 621 | { |
743 | printk(KERN_NOTICE | 622 | printk(KERN_NOTICE |
744 | "%s detected: BIOS may corrupt low RAM, working it around.\n", | 623 | "%s detected: BIOS may corrupt low RAM, working around it.\n", |
745 | d->ident); | 624 | d->ident); |
746 | 625 | ||
747 | e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED); | 626 | e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED); |
@@ -749,6 +628,7 @@ static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) | |||
749 | 628 | ||
750 | return 0; | 629 | return 0; |
751 | } | 630 | } |
631 | #endif | ||
752 | 632 | ||
753 | /* List of systems that have known low memory corruption BIOS problems */ | 633 | /* List of systems that have known low memory corruption BIOS problems */ |
754 | static struct dmi_system_id __initdata bad_bios_dmi_table[] = { | 634 | static struct dmi_system_id __initdata bad_bios_dmi_table[] = { |
@@ -764,7 +644,7 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { | |||
764 | .callback = dmi_low_memory_corruption, | 644 | .callback = dmi_low_memory_corruption, |
765 | .ident = "Phoenix BIOS", | 645 | .ident = "Phoenix BIOS", |
766 | .matches = { | 646 | .matches = { |
767 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), | 647 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"), |
768 | }, | 648 | }, |
769 | }, | 649 | }, |
770 | #endif | 650 | #endif |
@@ -794,6 +674,9 @@ void __init setup_arch(char **cmdline_p) | |||
794 | printk(KERN_INFO "Command line: %s\n", boot_command_line); | 674 | printk(KERN_INFO "Command line: %s\n", boot_command_line); |
795 | #endif | 675 | #endif |
796 | 676 | ||
677 | /* VMI may relocate the fixmap; do this before touching ioremap area */ | ||
678 | vmi_init(); | ||
679 | |||
797 | early_cpu_init(); | 680 | early_cpu_init(); |
798 | early_ioremap_init(); | 681 | early_ioremap_init(); |
799 | 682 | ||
@@ -880,13 +763,8 @@ void __init setup_arch(char **cmdline_p) | |||
880 | check_efer(); | 763 | check_efer(); |
881 | #endif | 764 | #endif |
882 | 765 | ||
883 | #if defined(CONFIG_VMI) && defined(CONFIG_X86_32) | 766 | /* Must be before kernel pagetables are setup */ |
884 | /* | 767 | vmi_activate(); |
885 | * Must be before kernel pagetables are setup | ||
886 | * or fixmap area is touched. | ||
887 | */ | ||
888 | vmi_init(); | ||
889 | #endif | ||
890 | 768 | ||
891 | /* after early param, so could get panic from serial */ | 769 | /* after early param, so could get panic from serial */ |
892 | reserve_early_setup_data(); | 770 | reserve_early_setup_data(); |
@@ -909,6 +787,12 @@ void __init setup_arch(char **cmdline_p) | |||
909 | 787 | ||
910 | dmi_check_system(bad_bios_dmi_table); | 788 | dmi_check_system(bad_bios_dmi_table); |
911 | 789 | ||
790 | /* | ||
791 | * VMware detection requires dmi to be available, so this | ||
792 | * needs to be done after dmi_scan_machine, for the BP. | ||
793 | */ | ||
794 | init_hypervisor(&boot_cpu_data); | ||
795 | |||
912 | #ifdef CONFIG_X86_32 | 796 | #ifdef CONFIG_X86_32 |
913 | probe_roms(); | 797 | probe_roms(); |
914 | #endif | 798 | #endif |
@@ -1021,12 +905,11 @@ void __init setup_arch(char **cmdline_p) | |||
1021 | */ | 905 | */ |
1022 | acpi_reserve_bootmem(); | 906 | acpi_reserve_bootmem(); |
1023 | #endif | 907 | #endif |
1024 | #ifdef CONFIG_X86_FIND_SMP_CONFIG | ||
1025 | /* | 908 | /* |
1026 | * Find and reserve possible boot-time SMP configuration: | 909 | * Find and reserve possible boot-time SMP configuration: |
1027 | */ | 910 | */ |
1028 | find_smp_config(); | 911 | find_smp_config(); |
1029 | #endif | 912 | |
1030 | reserve_crashkernel(); | 913 | reserve_crashkernel(); |
1031 | 914 | ||
1032 | #ifdef CONFIG_X86_64 | 915 | #ifdef CONFIG_X86_64 |
@@ -1053,9 +936,7 @@ void __init setup_arch(char **cmdline_p) | |||
1053 | map_vsyscall(); | 936 | map_vsyscall(); |
1054 | #endif | 937 | #endif |
1055 | 938 | ||
1056 | #ifdef CONFIG_X86_GENERICARCH | ||
1057 | generic_apic_probe(); | 939 | generic_apic_probe(); |
1058 | #endif | ||
1059 | 940 | ||
1060 | early_quirks(); | 941 | early_quirks(); |
1061 | 942 | ||
@@ -1082,7 +963,7 @@ void __init setup_arch(char **cmdline_p) | |||
1082 | ioapic_init_mappings(); | 963 | ioapic_init_mappings(); |
1083 | 964 | ||
1084 | /* need to wait for io_apic is mapped */ | 965 | /* need to wait for io_apic is mapped */ |
1085 | nr_irqs = probe_nr_irqs(); | 966 | probe_nr_irqs_gsi(); |
1086 | 967 | ||
1087 | kvm_guest_init(); | 968 | kvm_guest_init(); |
1088 | 969 | ||
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ae0c0d3bb770..d992e6cff730 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -5,133 +5,54 @@ | |||
5 | #include <linux/percpu.h> | 5 | #include <linux/percpu.h> |
6 | #include <linux/kexec.h> | 6 | #include <linux/kexec.h> |
7 | #include <linux/crash_dump.h> | 7 | #include <linux/crash_dump.h> |
8 | #include <asm/smp.h> | 8 | #include <linux/smp.h> |
9 | #include <asm/percpu.h> | 9 | #include <linux/topology.h> |
10 | #include <asm/sections.h> | 10 | #include <asm/sections.h> |
11 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
12 | #include <asm/setup.h> | 12 | #include <asm/setup.h> |
13 | #include <asm/topology.h> | ||
14 | #include <asm/mpspec.h> | 13 | #include <asm/mpspec.h> |
15 | #include <asm/apicdef.h> | 14 | #include <asm/apicdef.h> |
16 | #include <asm/highmem.h> | 15 | #include <asm/highmem.h> |
16 | #include <asm/proto.h> | ||
17 | #include <asm/cpumask.h> | ||
18 | #include <asm/cpu.h> | ||
19 | #include <asm/stackprotector.h> | ||
17 | 20 | ||
18 | #ifdef CONFIG_X86_LOCAL_APIC | 21 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS |
19 | unsigned int num_processors; | 22 | # define DBG(x...) printk(KERN_DEBUG x) |
20 | unsigned disabled_cpus __cpuinitdata; | ||
21 | /* Processor that is doing the boot up */ | ||
22 | unsigned int boot_cpu_physical_apicid = -1U; | ||
23 | unsigned int max_physical_apicid; | ||
24 | EXPORT_SYMBOL(boot_cpu_physical_apicid); | ||
25 | |||
26 | /* Bitmask of physically existing CPUs */ | ||
27 | physid_mask_t phys_cpu_present_map; | ||
28 | #endif | ||
29 | |||
30 | /* map cpu index to physical APIC ID */ | ||
31 | DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); | ||
32 | DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); | ||
33 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); | ||
34 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | ||
35 | |||
36 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | ||
37 | #define X86_64_NUMA 1 | ||
38 | |||
39 | /* map cpu index to node index */ | ||
40 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); | ||
41 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); | ||
42 | |||
43 | /* which logical CPUs are on which nodes */ | ||
44 | cpumask_t *node_to_cpumask_map; | ||
45 | EXPORT_SYMBOL(node_to_cpumask_map); | ||
46 | |||
47 | /* setup node_to_cpumask_map */ | ||
48 | static void __init setup_node_to_cpumask_map(void); | ||
49 | |||
50 | #else | 23 | #else |
51 | static inline void setup_node_to_cpumask_map(void) { } | 24 | # define DBG(x...) |
52 | #endif | 25 | #endif |
53 | 26 | ||
54 | #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) | 27 | DEFINE_PER_CPU(int, cpu_number); |
55 | /* | 28 | EXPORT_PER_CPU_SYMBOL(cpu_number); |
56 | * Copy data used in early init routines from the initial arrays to the | ||
57 | * per cpu data areas. These arrays then become expendable and the | ||
58 | * *_early_ptr's are zeroed indicating that the static arrays are gone. | ||
59 | */ | ||
60 | static void __init setup_per_cpu_maps(void) | ||
61 | { | ||
62 | int cpu; | ||
63 | 29 | ||
64 | for_each_possible_cpu(cpu) { | 30 | #ifdef CONFIG_X86_64 |
65 | per_cpu(x86_cpu_to_apicid, cpu) = | 31 | #define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) |
66 | early_per_cpu_map(x86_cpu_to_apicid, cpu); | 32 | #else |
67 | per_cpu(x86_bios_cpu_apicid, cpu) = | 33 | #define BOOT_PERCPU_OFFSET 0 |
68 | early_per_cpu_map(x86_bios_cpu_apicid, cpu); | ||
69 | #ifdef X86_64_NUMA | ||
70 | per_cpu(x86_cpu_to_node_map, cpu) = | ||
71 | early_per_cpu_map(x86_cpu_to_node_map, cpu); | ||
72 | #endif | 34 | #endif |
73 | } | ||
74 | 35 | ||
75 | /* indicate the early static arrays will soon be gone */ | 36 | DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; |
76 | early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; | 37 | EXPORT_PER_CPU_SYMBOL(this_cpu_off); |
77 | early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; | ||
78 | #ifdef X86_64_NUMA | ||
79 | early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; | ||
80 | #endif | ||
81 | } | ||
82 | 38 | ||
83 | #ifdef CONFIG_X86_32 | 39 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { |
84 | /* | 40 | [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, |
85 | * Great future not-so-futuristic plan: make i386 and x86_64 do it | 41 | }; |
86 | * the same way | ||
87 | */ | ||
88 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; | ||
89 | EXPORT_SYMBOL(__per_cpu_offset); | 42 | EXPORT_SYMBOL(__per_cpu_offset); |
90 | static inline void setup_cpu_pda_map(void) { } | ||
91 | |||
92 | #elif !defined(CONFIG_SMP) | ||
93 | static inline void setup_cpu_pda_map(void) { } | ||
94 | |||
95 | #else /* CONFIG_SMP && CONFIG_X86_64 */ | ||
96 | 43 | ||
97 | /* | 44 | static inline void setup_percpu_segment(int cpu) |
98 | * Allocate cpu_pda pointer table and array via alloc_bootmem. | ||
99 | */ | ||
100 | static void __init setup_cpu_pda_map(void) | ||
101 | { | 45 | { |
102 | char *pda; | 46 | #ifdef CONFIG_X86_32 |
103 | struct x8664_pda **new_cpu_pda; | 47 | struct desc_struct gdt; |
104 | unsigned long size; | ||
105 | int cpu; | ||
106 | |||
107 | size = roundup(sizeof(struct x8664_pda), cache_line_size()); | ||
108 | |||
109 | /* allocate cpu_pda array and pointer table */ | ||
110 | { | ||
111 | unsigned long tsize = nr_cpu_ids * sizeof(void *); | ||
112 | unsigned long asize = size * (nr_cpu_ids - 1); | ||
113 | |||
114 | tsize = roundup(tsize, cache_line_size()); | ||
115 | new_cpu_pda = alloc_bootmem(tsize + asize); | ||
116 | pda = (char *)new_cpu_pda + tsize; | ||
117 | } | ||
118 | |||
119 | /* initialize pointer table to static pda's */ | ||
120 | for_each_possible_cpu(cpu) { | ||
121 | if (cpu == 0) { | ||
122 | /* leave boot cpu pda in place */ | ||
123 | new_cpu_pda[0] = cpu_pda(0); | ||
124 | continue; | ||
125 | } | ||
126 | new_cpu_pda[cpu] = (struct x8664_pda *)pda; | ||
127 | new_cpu_pda[cpu]->in_bootmem = 1; | ||
128 | pda += size; | ||
129 | } | ||
130 | 48 | ||
131 | /* point to new pointer table */ | 49 | pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF, |
132 | _cpu_pda = new_cpu_pda; | 50 | 0x2 | DESCTYPE_S, 0x8); |
133 | } | 51 | gdt.s = 1; |
52 | write_gdt_entry(get_cpu_gdt_table(cpu), | ||
53 | GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); | ||
134 | #endif | 54 | #endif |
55 | } | ||
135 | 56 | ||
136 | /* | 57 | /* |
137 | * Great future plan: | 58 | * Great future plan: |
@@ -140,251 +61,86 @@ static void __init setup_cpu_pda_map(void) | |||
140 | */ | 61 | */ |
141 | void __init setup_per_cpu_areas(void) | 62 | void __init setup_per_cpu_areas(void) |
142 | { | 63 | { |
143 | ssize_t size, old_size; | 64 | ssize_t size; |
144 | char *ptr; | 65 | char *ptr; |
145 | int cpu; | 66 | int cpu; |
146 | unsigned long align = 1; | ||
147 | |||
148 | /* Setup cpu_pda map */ | ||
149 | setup_cpu_pda_map(); | ||
150 | 67 | ||
151 | /* Copy section for each CPU (we discard the original) */ | 68 | /* Copy section for each CPU (we discard the original) */ |
152 | old_size = PERCPU_ENOUGH_ROOM; | 69 | size = roundup(PERCPU_ENOUGH_ROOM, PAGE_SIZE); |
153 | align = max_t(unsigned long, PAGE_SIZE, align); | 70 | |
154 | size = roundup(old_size, align); | 71 | pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", |
155 | printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", | 72 | NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); |
156 | size); | 73 | |
74 | pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size); | ||
157 | 75 | ||
158 | for_each_possible_cpu(cpu) { | 76 | for_each_possible_cpu(cpu) { |
159 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 77 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
160 | ptr = __alloc_bootmem(size, align, | 78 | ptr = alloc_bootmem_pages(size); |
161 | __pa(MAX_DMA_ADDRESS)); | ||
162 | #else | 79 | #else |
163 | int node = early_cpu_to_node(cpu); | 80 | int node = early_cpu_to_node(cpu); |
164 | if (!node_online(node) || !NODE_DATA(node)) { | 81 | if (!node_online(node) || !NODE_DATA(node)) { |
165 | ptr = __alloc_bootmem(size, align, | 82 | ptr = alloc_bootmem_pages(size); |
166 | __pa(MAX_DMA_ADDRESS)); | 83 | pr_info("cpu %d has no node %d or node-local memory\n", |
167 | printk(KERN_INFO | ||
168 | "cpu %d has no node %d or node-local memory\n", | ||
169 | cpu, node); | 84 | cpu, node); |
170 | if (ptr) | 85 | pr_debug("per cpu data for cpu%d at %016lx\n", |
171 | printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", | 86 | cpu, __pa(ptr)); |
172 | cpu, __pa(ptr)); | 87 | } else { |
173 | } | 88 | ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); |
174 | else { | 89 | pr_debug("per cpu data for cpu%d on node%d at %016lx\n", |
175 | ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, | 90 | cpu, node, __pa(ptr)); |
176 | __pa(MAX_DMA_ADDRESS)); | ||
177 | if (ptr) | ||
178 | printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", | ||
179 | cpu, node, __pa(ptr)); | ||
180 | } | 91 | } |
181 | #endif | 92 | #endif |
93 | |||
94 | memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); | ||
182 | per_cpu_offset(cpu) = ptr - __per_cpu_start; | 95 | per_cpu_offset(cpu) = ptr - __per_cpu_start; |
183 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); | 96 | per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); |
97 | per_cpu(cpu_number, cpu) = cpu; | ||
98 | setup_percpu_segment(cpu); | ||
99 | setup_stack_canary_segment(cpu); | ||
100 | /* | ||
101 | * Copy data used in early init routines from the | ||
102 | * initial arrays to the per cpu data areas. These | ||
103 | * arrays then become expendable and the *_early_ptr's | ||
104 | * are zeroed indicating that the static arrays are | ||
105 | * gone. | ||
106 | */ | ||
107 | #ifdef CONFIG_X86_LOCAL_APIC | ||
108 | per_cpu(x86_cpu_to_apicid, cpu) = | ||
109 | early_per_cpu_map(x86_cpu_to_apicid, cpu); | ||
110 | per_cpu(x86_bios_cpu_apicid, cpu) = | ||
111 | early_per_cpu_map(x86_bios_cpu_apicid, cpu); | ||
112 | #endif | ||
113 | #ifdef CONFIG_X86_64 | ||
114 | per_cpu(irq_stack_ptr, cpu) = | ||
115 | per_cpu(irq_stack_union.irq_stack, cpu) + | ||
116 | IRQ_STACK_SIZE - 64; | ||
117 | #ifdef CONFIG_NUMA | ||
118 | per_cpu(x86_cpu_to_node_map, cpu) = | ||
119 | early_per_cpu_map(x86_cpu_to_node_map, cpu); | ||
120 | #endif | ||
121 | #endif | ||
122 | /* | ||
123 | * Up to this point, the boot CPU has been using .data.init | ||
124 | * area. Reload any changed state for the boot CPU. | ||
125 | */ | ||
126 | if (cpu == boot_cpu_id) | ||
127 | switch_to_new_gdt(cpu); | ||
128 | |||
129 | DBG("PERCPU: cpu %4d %p\n", cpu, ptr); | ||
184 | } | 130 | } |
185 | 131 | ||
186 | printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", | 132 | /* indicate the early static arrays will soon be gone */ |
187 | NR_CPUS, nr_cpu_ids, nr_node_ids); | 133 | #ifdef CONFIG_X86_LOCAL_APIC |
188 | 134 | early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; | |
189 | /* Setup percpu data maps */ | 135 | early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; |
190 | setup_per_cpu_maps(); | 136 | #endif |
137 | #if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) | ||
138 | early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; | ||
139 | #endif | ||
191 | 140 | ||
192 | /* Setup node to cpumask map */ | 141 | /* Setup node to cpumask map */ |
193 | setup_node_to_cpumask_map(); | 142 | setup_node_to_cpumask_map(); |
194 | } | ||
195 | |||
196 | #endif | ||
197 | 143 | ||
198 | #ifdef X86_64_NUMA | 144 | /* Setup cpu initialized, callin, callout masks */ |
199 | 145 | setup_cpu_local_masks(); | |
200 | /* | ||
201 | * Allocate node_to_cpumask_map based on number of available nodes | ||
202 | * Requires node_possible_map to be valid. | ||
203 | * | ||
204 | * Note: node_to_cpumask() is not valid until after this is done. | ||
205 | */ | ||
206 | static void __init setup_node_to_cpumask_map(void) | ||
207 | { | ||
208 | unsigned int node, num = 0; | ||
209 | cpumask_t *map; | ||
210 | |||
211 | /* setup nr_node_ids if not done yet */ | ||
212 | if (nr_node_ids == MAX_NUMNODES) { | ||
213 | for_each_node_mask(node, node_possible_map) | ||
214 | num = node; | ||
215 | nr_node_ids = num + 1; | ||
216 | } | ||
217 | |||
218 | /* allocate the map */ | ||
219 | map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); | ||
220 | |||
221 | pr_debug("Node to cpumask map at %p for %d nodes\n", | ||
222 | map, nr_node_ids); | ||
223 | |||
224 | /* node_to_cpumask() will now work */ | ||
225 | node_to_cpumask_map = map; | ||
226 | } | ||
227 | |||
228 | void __cpuinit numa_set_node(int cpu, int node) | ||
229 | { | ||
230 | int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); | ||
231 | |||
232 | if (cpu_pda(cpu) && node != NUMA_NO_NODE) | ||
233 | cpu_pda(cpu)->nodenumber = node; | ||
234 | |||
235 | if (cpu_to_node_map) | ||
236 | cpu_to_node_map[cpu] = node; | ||
237 | |||
238 | else if (per_cpu_offset(cpu)) | ||
239 | per_cpu(x86_cpu_to_node_map, cpu) = node; | ||
240 | |||
241 | else | ||
242 | pr_debug("Setting node for non-present cpu %d\n", cpu); | ||
243 | } | ||
244 | |||
245 | void __cpuinit numa_clear_node(int cpu) | ||
246 | { | ||
247 | numa_set_node(cpu, NUMA_NO_NODE); | ||
248 | } | ||
249 | |||
250 | #ifndef CONFIG_DEBUG_PER_CPU_MAPS | ||
251 | |||
252 | void __cpuinit numa_add_cpu(int cpu) | ||
253 | { | ||
254 | cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | ||
255 | } | ||
256 | |||
257 | void __cpuinit numa_remove_cpu(int cpu) | ||
258 | { | ||
259 | cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); | ||
260 | } | ||
261 | |||
262 | #else /* CONFIG_DEBUG_PER_CPU_MAPS */ | ||
263 | |||
264 | /* | ||
265 | * --------- debug versions of the numa functions --------- | ||
266 | */ | ||
267 | static void __cpuinit numa_set_cpumask(int cpu, int enable) | ||
268 | { | ||
269 | int node = cpu_to_node(cpu); | ||
270 | cpumask_t *mask; | ||
271 | char buf[64]; | ||
272 | |||
273 | if (node_to_cpumask_map == NULL) { | ||
274 | printk(KERN_ERR "node_to_cpumask_map NULL\n"); | ||
275 | dump_stack(); | ||
276 | return; | ||
277 | } | ||
278 | |||
279 | mask = &node_to_cpumask_map[node]; | ||
280 | if (enable) | ||
281 | cpu_set(cpu, *mask); | ||
282 | else | ||
283 | cpu_clear(cpu, *mask); | ||
284 | |||
285 | cpulist_scnprintf(buf, sizeof(buf), *mask); | ||
286 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", | ||
287 | enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); | ||
288 | } | ||
289 | |||
290 | void __cpuinit numa_add_cpu(int cpu) | ||
291 | { | ||
292 | numa_set_cpumask(cpu, 1); | ||
293 | } | ||
294 | |||
295 | void __cpuinit numa_remove_cpu(int cpu) | ||
296 | { | ||
297 | numa_set_cpumask(cpu, 0); | ||
298 | } | ||
299 | |||
300 | int cpu_to_node(int cpu) | ||
301 | { | ||
302 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) { | ||
303 | printk(KERN_WARNING | ||
304 | "cpu_to_node(%d): usage too early!\n", cpu); | ||
305 | dump_stack(); | ||
306 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | ||
307 | } | ||
308 | return per_cpu(x86_cpu_to_node_map, cpu); | ||
309 | } | ||
310 | EXPORT_SYMBOL(cpu_to_node); | ||
311 | |||
312 | /* | ||
313 | * Same function as cpu_to_node() but used if called before the | ||
314 | * per_cpu areas are setup. | ||
315 | */ | ||
316 | int early_cpu_to_node(int cpu) | ||
317 | { | ||
318 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) | ||
319 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | ||
320 | |||
321 | if (!per_cpu_offset(cpu)) { | ||
322 | printk(KERN_WARNING | ||
323 | "early_cpu_to_node(%d): no per_cpu area!\n", cpu); | ||
324 | dump_stack(); | ||
325 | return NUMA_NO_NODE; | ||
326 | } | ||
327 | return per_cpu(x86_cpu_to_node_map, cpu); | ||
328 | } | 146 | } |
329 | |||
330 | |||
331 | /* empty cpumask */ | ||
332 | static const cpumask_t cpu_mask_none; | ||
333 | |||
334 | /* | ||
335 | * Returns a pointer to the bitmask of CPUs on Node 'node'. | ||
336 | */ | ||
337 | const cpumask_t *_node_to_cpumask_ptr(int node) | ||
338 | { | ||
339 | if (node_to_cpumask_map == NULL) { | ||
340 | printk(KERN_WARNING | ||
341 | "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", | ||
342 | node); | ||
343 | dump_stack(); | ||
344 | return (const cpumask_t *)&cpu_online_map; | ||
345 | } | ||
346 | if (node >= nr_node_ids) { | ||
347 | printk(KERN_WARNING | ||
348 | "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n", | ||
349 | node, nr_node_ids); | ||
350 | dump_stack(); | ||
351 | return &cpu_mask_none; | ||
352 | } | ||
353 | return &node_to_cpumask_map[node]; | ||
354 | } | ||
355 | EXPORT_SYMBOL(_node_to_cpumask_ptr); | ||
356 | |||
357 | /* | ||
358 | * Returns a bitmask of CPUs on Node 'node'. | ||
359 | * | ||
360 | * Side note: this function creates the returned cpumask on the stack | ||
361 | * so with a high NR_CPUS count, excessive stack space is used. The | ||
362 | * node_to_cpumask_ptr function should be used whenever possible. | ||
363 | */ | ||
364 | cpumask_t node_to_cpumask(int node) | ||
365 | { | ||
366 | if (node_to_cpumask_map == NULL) { | ||
367 | printk(KERN_WARNING | ||
368 | "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); | ||
369 | dump_stack(); | ||
370 | return cpu_online_map; | ||
371 | } | ||
372 | if (node >= nr_node_ids) { | ||
373 | printk(KERN_WARNING | ||
374 | "node_to_cpumask(%d): node > nr_node_ids(%d)\n", | ||
375 | node, nr_node_ids); | ||
376 | dump_stack(); | ||
377 | return cpu_mask_none; | ||
378 | } | ||
379 | return node_to_cpumask_map[node]; | ||
380 | } | ||
381 | EXPORT_SYMBOL(node_to_cpumask); | ||
382 | |||
383 | /* | ||
384 | * --------- end of debug versions of the numa functions --------- | ||
385 | */ | ||
386 | |||
387 | #endif /* CONFIG_DEBUG_PER_CPU_MAPS */ | ||
388 | |||
389 | #endif /* X86_64_NUMA */ | ||
390 | |||
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h deleted file mode 100644 index cc673aa55ce4..000000000000 --- a/arch/x86/kernel/sigframe.h +++ /dev/null | |||
@@ -1,42 +0,0 @@ | |||
1 | #ifdef CONFIG_X86_32 | ||
2 | struct sigframe { | ||
3 | char __user *pretcode; | ||
4 | int sig; | ||
5 | struct sigcontext sc; | ||
6 | /* | ||
7 | * fpstate is unused. fpstate is moved/allocated after | ||
8 | * retcode[] below. This movement allows to have the FP state and the | ||
9 | * future state extensions (xsave) stay together. | ||
10 | * And at the same time retaining the unused fpstate, prevents changing | ||
11 | * the offset of extramask[] in the sigframe and thus prevent any | ||
12 | * legacy application accessing/modifying it. | ||
13 | */ | ||
14 | struct _fpstate fpstate_unused; | ||
15 | unsigned long extramask[_NSIG_WORDS-1]; | ||
16 | char retcode[8]; | ||
17 | /* fp state follows here */ | ||
18 | }; | ||
19 | |||
20 | struct rt_sigframe { | ||
21 | char __user *pretcode; | ||
22 | int sig; | ||
23 | struct siginfo __user *pinfo; | ||
24 | void __user *puc; | ||
25 | struct siginfo info; | ||
26 | struct ucontext uc; | ||
27 | char retcode[8]; | ||
28 | /* fp state follows here */ | ||
29 | }; | ||
30 | #else | ||
31 | struct rt_sigframe { | ||
32 | char __user *pretcode; | ||
33 | struct ucontext uc; | ||
34 | struct siginfo info; | ||
35 | /* fp state follows here */ | ||
36 | }; | ||
37 | |||
38 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
39 | sigset_t *set, struct pt_regs *regs); | ||
40 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | ||
41 | sigset_t *set, struct pt_regs *regs); | ||
42 | #endif | ||
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal.c index d6dd057d0f22..7cdcd16885ed 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal.c | |||
@@ -1,36 +1,41 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 1991, 1992 Linus Torvalds | 2 | * Copyright (C) 1991, 1992 Linus Torvalds |
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs | ||
3 | * | 4 | * |
4 | * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson | 5 | * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson |
5 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes | 6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes |
7 | * 2000-2002 x86-64 support by Andi Kleen | ||
6 | */ | 8 | */ |
7 | #include <linux/list.h> | ||
8 | 9 | ||
9 | #include <linux/personality.h> | 10 | #include <linux/sched.h> |
10 | #include <linux/binfmts.h> | 11 | #include <linux/mm.h> |
11 | #include <linux/suspend.h> | 12 | #include <linux/smp.h> |
12 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
13 | #include <linux/ptrace.h> | ||
14 | #include <linux/signal.h> | 14 | #include <linux/signal.h> |
15 | #include <linux/stddef.h> | ||
16 | #include <linux/unistd.h> | ||
17 | #include <linux/errno.h> | 15 | #include <linux/errno.h> |
18 | #include <linux/sched.h> | ||
19 | #include <linux/wait.h> | 16 | #include <linux/wait.h> |
17 | #include <linux/ptrace.h> | ||
20 | #include <linux/tracehook.h> | 18 | #include <linux/tracehook.h> |
21 | #include <linux/elf.h> | 19 | #include <linux/unistd.h> |
22 | #include <linux/smp.h> | 20 | #include <linux/stddef.h> |
23 | #include <linux/mm.h> | 21 | #include <linux/personality.h> |
22 | #include <linux/uaccess.h> | ||
24 | 23 | ||
25 | #include <asm/processor.h> | 24 | #include <asm/processor.h> |
26 | #include <asm/ucontext.h> | 25 | #include <asm/ucontext.h> |
27 | #include <asm/uaccess.h> | ||
28 | #include <asm/i387.h> | 26 | #include <asm/i387.h> |
29 | #include <asm/vdso.h> | 27 | #include <asm/vdso.h> |
28 | |||
29 | #ifdef CONFIG_X86_64 | ||
30 | #include <asm/proto.h> | ||
31 | #include <asm/ia32_unistd.h> | ||
32 | #include <asm/mce.h> | ||
33 | #endif /* CONFIG_X86_64 */ | ||
34 | |||
30 | #include <asm/syscall.h> | 35 | #include <asm/syscall.h> |
31 | #include <asm/syscalls.h> | 36 | #include <asm/syscalls.h> |
32 | 37 | ||
33 | #include "sigframe.h" | 38 | #include <asm/sigframe.h> |
34 | 39 | ||
35 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) | 40 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) |
36 | 41 | ||
@@ -45,99 +50,24 @@ | |||
45 | # define FIX_EFLAGS __FIX_EFLAGS | 50 | # define FIX_EFLAGS __FIX_EFLAGS |
46 | #endif | 51 | #endif |
47 | 52 | ||
48 | /* | 53 | #define COPY(x) do { \ |
49 | * Atomically swap in the new signal mask, and wait for a signal. | 54 | get_user_ex(regs->x, &sc->x); \ |
50 | */ | 55 | } while (0) |
51 | asmlinkage int | ||
52 | sys_sigsuspend(int history0, int history1, old_sigset_t mask) | ||
53 | { | ||
54 | mask &= _BLOCKABLE; | ||
55 | spin_lock_irq(¤t->sighand->siglock); | ||
56 | current->saved_sigmask = current->blocked; | ||
57 | siginitset(¤t->blocked, mask); | ||
58 | recalc_sigpending(); | ||
59 | spin_unlock_irq(¤t->sighand->siglock); | ||
60 | |||
61 | current->state = TASK_INTERRUPTIBLE; | ||
62 | schedule(); | ||
63 | set_restore_sigmask(); | ||
64 | 56 | ||
65 | return -ERESTARTNOHAND; | 57 | #define GET_SEG(seg) ({ \ |
66 | } | 58 | unsigned short tmp; \ |
59 | get_user_ex(tmp, &sc->seg); \ | ||
60 | tmp; \ | ||
61 | }) | ||
67 | 62 | ||
68 | asmlinkage int | 63 | #define COPY_SEG(seg) do { \ |
69 | sys_sigaction(int sig, const struct old_sigaction __user *act, | 64 | regs->seg = GET_SEG(seg); \ |
70 | struct old_sigaction __user *oact) | 65 | } while (0) |
71 | { | ||
72 | struct k_sigaction new_ka, old_ka; | ||
73 | int ret; | ||
74 | 66 | ||
75 | if (act) { | 67 | #define COPY_SEG_CPL3(seg) do { \ |
76 | old_sigset_t mask; | 68 | regs->seg = GET_SEG(seg) | 3; \ |
69 | } while (0) | ||
77 | 70 | ||
78 | if (!access_ok(VERIFY_READ, act, sizeof(*act)) || | ||
79 | __get_user(new_ka.sa.sa_handler, &act->sa_handler) || | ||
80 | __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) | ||
81 | return -EFAULT; | ||
82 | |||
83 | __get_user(new_ka.sa.sa_flags, &act->sa_flags); | ||
84 | __get_user(mask, &act->sa_mask); | ||
85 | siginitset(&new_ka.sa.sa_mask, mask); | ||
86 | } | ||
87 | |||
88 | ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); | ||
89 | |||
90 | if (!ret && oact) { | ||
91 | if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || | ||
92 | __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || | ||
93 | __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) | ||
94 | return -EFAULT; | ||
95 | |||
96 | __put_user(old_ka.sa.sa_flags, &oact->sa_flags); | ||
97 | __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); | ||
98 | } | ||
99 | |||
100 | return ret; | ||
101 | } | ||
102 | |||
103 | asmlinkage int sys_sigaltstack(unsigned long bx) | ||
104 | { | ||
105 | /* | ||
106 | * This is needed to make gcc realize it doesn't own the | ||
107 | * "struct pt_regs" | ||
108 | */ | ||
109 | struct pt_regs *regs = (struct pt_regs *)&bx; | ||
110 | const stack_t __user *uss = (const stack_t __user *)bx; | ||
111 | stack_t __user *uoss = (stack_t __user *)regs->cx; | ||
112 | |||
113 | return do_sigaltstack(uss, uoss, regs->sp); | ||
114 | } | ||
115 | |||
116 | #define COPY(x) { \ | ||
117 | err |= __get_user(regs->x, &sc->x); \ | ||
118 | } | ||
119 | |||
120 | #define COPY_SEG(seg) { \ | ||
121 | unsigned short tmp; \ | ||
122 | err |= __get_user(tmp, &sc->seg); \ | ||
123 | regs->seg = tmp; \ | ||
124 | } | ||
125 | |||
126 | #define COPY_SEG_STRICT(seg) { \ | ||
127 | unsigned short tmp; \ | ||
128 | err |= __get_user(tmp, &sc->seg); \ | ||
129 | regs->seg = tmp | 3; \ | ||
130 | } | ||
131 | |||
132 | #define GET_SEG(seg) { \ | ||
133 | unsigned short tmp; \ | ||
134 | err |= __get_user(tmp, &sc->seg); \ | ||
135 | loadsegment(seg, tmp); \ | ||
136 | } | ||
137 | |||
138 | /* | ||
139 | * Do a signal return; undo the signal stack. | ||
140 | */ | ||
141 | static int | 71 | static int |
142 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | 72 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, |
143 | unsigned long *pax) | 73 | unsigned long *pax) |
@@ -149,150 +79,136 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
149 | /* Always make any pending restarted system calls return -EINTR */ | 79 | /* Always make any pending restarted system calls return -EINTR */ |
150 | current_thread_info()->restart_block.fn = do_no_restart_syscall; | 80 | current_thread_info()->restart_block.fn = do_no_restart_syscall; |
151 | 81 | ||
152 | GET_SEG(gs); | 82 | get_user_try { |
153 | COPY_SEG(fs); | ||
154 | COPY_SEG(es); | ||
155 | COPY_SEG(ds); | ||
156 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); | ||
157 | COPY(dx); COPY(cx); COPY(ip); | ||
158 | COPY_SEG_STRICT(cs); | ||
159 | COPY_SEG_STRICT(ss); | ||
160 | |||
161 | err |= __get_user(tmpflags, &sc->flags); | ||
162 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); | ||
163 | regs->orig_ax = -1; /* disable syscall checks */ | ||
164 | |||
165 | err |= __get_user(buf, &sc->fpstate); | ||
166 | err |= restore_i387_xstate(buf); | ||
167 | |||
168 | err |= __get_user(*pax, &sc->ax); | ||
169 | return err; | ||
170 | } | ||
171 | 83 | ||
172 | asmlinkage unsigned long sys_sigreturn(unsigned long __unused) | 84 | #ifdef CONFIG_X86_32 |
173 | { | 85 | set_user_gs(regs, GET_SEG(gs)); |
174 | struct sigframe __user *frame; | 86 | COPY_SEG(fs); |
175 | struct pt_regs *regs; | 87 | COPY_SEG(es); |
176 | unsigned long ax; | 88 | COPY_SEG(ds); |
177 | sigset_t set; | 89 | #endif /* CONFIG_X86_32 */ |
178 | 90 | ||
179 | regs = (struct pt_regs *) &__unused; | 91 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); |
180 | frame = (struct sigframe __user *)(regs->sp - 8); | 92 | COPY(dx); COPY(cx); COPY(ip); |
181 | 93 | ||
182 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | 94 | #ifdef CONFIG_X86_64 |
183 | goto badframe; | 95 | COPY(r8); |
184 | if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1 | 96 | COPY(r9); |
185 | && __copy_from_user(&set.sig[1], &frame->extramask, | 97 | COPY(r10); |
186 | sizeof(frame->extramask)))) | 98 | COPY(r11); |
187 | goto badframe; | 99 | COPY(r12); |
100 | COPY(r13); | ||
101 | COPY(r14); | ||
102 | COPY(r15); | ||
103 | #endif /* CONFIG_X86_64 */ | ||
188 | 104 | ||
189 | sigdelsetmask(&set, ~_BLOCKABLE); | 105 | #ifdef CONFIG_X86_32 |
190 | spin_lock_irq(¤t->sighand->siglock); | 106 | COPY_SEG_CPL3(cs); |
191 | current->blocked = set; | 107 | COPY_SEG_CPL3(ss); |
192 | recalc_sigpending(); | 108 | #else /* !CONFIG_X86_32 */ |
193 | spin_unlock_irq(¤t->sighand->siglock); | 109 | /* Kernel saves and restores only the CS segment register on signals, |
110 | * which is the bare minimum needed to allow mixed 32/64-bit code. | ||
111 | * App's signal handler can save/restore other segments if needed. */ | ||
112 | COPY_SEG_CPL3(cs); | ||
113 | #endif /* CONFIG_X86_32 */ | ||
194 | 114 | ||
195 | if (restore_sigcontext(regs, &frame->sc, &ax)) | 115 | get_user_ex(tmpflags, &sc->flags); |
196 | goto badframe; | 116 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); |
197 | return ax; | 117 | regs->orig_ax = -1; /* disable syscall checks */ |
198 | 118 | ||
199 | badframe: | 119 | get_user_ex(buf, &sc->fpstate); |
200 | if (show_unhandled_signals && printk_ratelimit()) { | 120 | err |= restore_i387_xstate(buf); |
201 | printk("%s%s[%d] bad frame in sigreturn frame:" | ||
202 | "%p ip:%lx sp:%lx oeax:%lx", | ||
203 | task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, | ||
204 | current->comm, task_pid_nr(current), frame, regs->ip, | ||
205 | regs->sp, regs->orig_ax); | ||
206 | print_vma_addr(" in ", regs->ip); | ||
207 | printk(KERN_CONT "\n"); | ||
208 | } | ||
209 | 121 | ||
210 | force_sig(SIGSEGV, current); | 122 | get_user_ex(*pax, &sc->ax); |
123 | } get_user_catch(err); | ||
211 | 124 | ||
212 | return 0; | 125 | return err; |
213 | } | 126 | } |
214 | 127 | ||
215 | static long do_rt_sigreturn(struct pt_regs *regs) | 128 | static int |
129 | setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, | ||
130 | struct pt_regs *regs, unsigned long mask) | ||
216 | { | 131 | { |
217 | struct rt_sigframe __user *frame; | 132 | int err = 0; |
218 | unsigned long ax; | ||
219 | sigset_t set; | ||
220 | |||
221 | frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); | ||
222 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
223 | goto badframe; | ||
224 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) | ||
225 | goto badframe; | ||
226 | |||
227 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
228 | spin_lock_irq(¤t->sighand->siglock); | ||
229 | current->blocked = set; | ||
230 | recalc_sigpending(); | ||
231 | spin_unlock_irq(¤t->sighand->siglock); | ||
232 | 133 | ||
233 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | 134 | put_user_try { |
234 | goto badframe; | ||
235 | 135 | ||
236 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) | 136 | #ifdef CONFIG_X86_32 |
237 | goto badframe; | 137 | put_user_ex(get_user_gs(regs), (unsigned int __user *)&sc->gs); |
138 | put_user_ex(regs->fs, (unsigned int __user *)&sc->fs); | ||
139 | put_user_ex(regs->es, (unsigned int __user *)&sc->es); | ||
140 | put_user_ex(regs->ds, (unsigned int __user *)&sc->ds); | ||
141 | #endif /* CONFIG_X86_32 */ | ||
238 | 142 | ||
239 | return ax; | 143 | put_user_ex(regs->di, &sc->di); |
144 | put_user_ex(regs->si, &sc->si); | ||
145 | put_user_ex(regs->bp, &sc->bp); | ||
146 | put_user_ex(regs->sp, &sc->sp); | ||
147 | put_user_ex(regs->bx, &sc->bx); | ||
148 | put_user_ex(regs->dx, &sc->dx); | ||
149 | put_user_ex(regs->cx, &sc->cx); | ||
150 | put_user_ex(regs->ax, &sc->ax); | ||
151 | #ifdef CONFIG_X86_64 | ||
152 | put_user_ex(regs->r8, &sc->r8); | ||
153 | put_user_ex(regs->r9, &sc->r9); | ||
154 | put_user_ex(regs->r10, &sc->r10); | ||
155 | put_user_ex(regs->r11, &sc->r11); | ||
156 | put_user_ex(regs->r12, &sc->r12); | ||
157 | put_user_ex(regs->r13, &sc->r13); | ||
158 | put_user_ex(regs->r14, &sc->r14); | ||
159 | put_user_ex(regs->r15, &sc->r15); | ||
160 | #endif /* CONFIG_X86_64 */ | ||
161 | |||
162 | put_user_ex(current->thread.trap_no, &sc->trapno); | ||
163 | put_user_ex(current->thread.error_code, &sc->err); | ||
164 | put_user_ex(regs->ip, &sc->ip); | ||
165 | #ifdef CONFIG_X86_32 | ||
166 | put_user_ex(regs->cs, (unsigned int __user *)&sc->cs); | ||
167 | put_user_ex(regs->flags, &sc->flags); | ||
168 | put_user_ex(regs->sp, &sc->sp_at_signal); | ||
169 | put_user_ex(regs->ss, (unsigned int __user *)&sc->ss); | ||
170 | #else /* !CONFIG_X86_32 */ | ||
171 | put_user_ex(regs->flags, &sc->flags); | ||
172 | put_user_ex(regs->cs, &sc->cs); | ||
173 | put_user_ex(0, &sc->gs); | ||
174 | put_user_ex(0, &sc->fs); | ||
175 | #endif /* CONFIG_X86_32 */ | ||
240 | 176 | ||
241 | badframe: | 177 | put_user_ex(fpstate, &sc->fpstate); |
242 | signal_fault(regs, frame, "rt_sigreturn"); | ||
243 | return 0; | ||
244 | } | ||
245 | 178 | ||
246 | asmlinkage int sys_rt_sigreturn(unsigned long __unused) | 179 | /* non-iBCS2 extensions.. */ |
247 | { | 180 | put_user_ex(mask, &sc->oldmask); |
248 | struct pt_regs *regs = (struct pt_regs *)&__unused; | 181 | put_user_ex(current->thread.cr2, &sc->cr2); |
182 | } put_user_catch(err); | ||
249 | 183 | ||
250 | return do_rt_sigreturn(regs); | 184 | return err; |
251 | } | 185 | } |
252 | 186 | ||
253 | /* | 187 | /* |
254 | * Set up a signal frame. | 188 | * Set up a signal frame. |
255 | */ | 189 | */ |
256 | static int | 190 | #ifdef CONFIG_X86_32 |
257 | setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, | 191 | static const struct { |
258 | struct pt_regs *regs, unsigned long mask) | 192 | u16 poplmovl; |
259 | { | 193 | u32 val; |
260 | int tmp, err = 0; | 194 | u16 int80; |
261 | 195 | } __attribute__((packed)) retcode = { | |
262 | err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs); | 196 | 0xb858, /* popl %eax; movl $..., %eax */ |
263 | savesegment(gs, tmp); | 197 | __NR_sigreturn, |
264 | err |= __put_user(tmp, (unsigned int __user *)&sc->gs); | 198 | 0x80cd, /* int $0x80 */ |
265 | 199 | }; | |
266 | err |= __put_user(regs->es, (unsigned int __user *)&sc->es); | 200 | |
267 | err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds); | 201 | static const struct { |
268 | err |= __put_user(regs->di, &sc->di); | 202 | u8 movl; |
269 | err |= __put_user(regs->si, &sc->si); | 203 | u32 val; |
270 | err |= __put_user(regs->bp, &sc->bp); | 204 | u16 int80; |
271 | err |= __put_user(regs->sp, &sc->sp); | 205 | u8 pad; |
272 | err |= __put_user(regs->bx, &sc->bx); | 206 | } __attribute__((packed)) rt_retcode = { |
273 | err |= __put_user(regs->dx, &sc->dx); | 207 | 0xb8, /* movl $..., %eax */ |
274 | err |= __put_user(regs->cx, &sc->cx); | 208 | __NR_rt_sigreturn, |
275 | err |= __put_user(regs->ax, &sc->ax); | 209 | 0x80cd, /* int $0x80 */ |
276 | err |= __put_user(current->thread.trap_no, &sc->trapno); | 210 | 0 |
277 | err |= __put_user(current->thread.error_code, &sc->err); | 211 | }; |
278 | err |= __put_user(regs->ip, &sc->ip); | ||
279 | err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); | ||
280 | err |= __put_user(regs->flags, &sc->flags); | ||
281 | err |= __put_user(regs->sp, &sc->sp_at_signal); | ||
282 | err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); | ||
283 | |||
284 | tmp = save_i387_xstate(fpstate); | ||
285 | if (tmp < 0) | ||
286 | err = 1; | ||
287 | else | ||
288 | err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); | ||
289 | |||
290 | /* non-iBCS2 extensions.. */ | ||
291 | err |= __put_user(mask, &sc->oldmask); | ||
292 | err |= __put_user(current->thread.cr2, &sc->cr2); | ||
293 | |||
294 | return err; | ||
295 | } | ||
296 | 212 | ||
297 | /* | 213 | /* |
298 | * Determine which stack to use.. | 214 | * Determine which stack to use.. |
@@ -328,6 +244,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, | |||
328 | if (used_math()) { | 244 | if (used_math()) { |
329 | sp = sp - sig_xstate_size; | 245 | sp = sp - sig_xstate_size; |
330 | *fpstate = (struct _fpstate *) sp; | 246 | *fpstate = (struct _fpstate *) sp; |
247 | if (save_i387_xstate(*fpstate) < 0) | ||
248 | return (void __user *)-1L; | ||
331 | } | 249 | } |
332 | 250 | ||
333 | sp -= frame_size; | 251 | sp -= frame_size; |
@@ -383,9 +301,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, | |||
383 | * reasons and because gdb uses it as a signature to notice | 301 | * reasons and because gdb uses it as a signature to notice |
384 | * signal handler stack frames. | 302 | * signal handler stack frames. |
385 | */ | 303 | */ |
386 | err |= __put_user(0xb858, (short __user *)(frame->retcode+0)); | 304 | err |= __put_user(*((u64 *)&retcode), (u64 *)frame->retcode); |
387 | err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2)); | ||
388 | err |= __put_user(0x80cd, (short __user *)(frame->retcode+6)); | ||
389 | 305 | ||
390 | if (err) | 306 | if (err) |
391 | return -EFAULT; | 307 | return -EFAULT; |
@@ -418,45 +334,41 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
418 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | 334 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) |
419 | return -EFAULT; | 335 | return -EFAULT; |
420 | 336 | ||
421 | err |= __put_user(sig, &frame->sig); | 337 | put_user_try { |
422 | err |= __put_user(&frame->info, &frame->pinfo); | 338 | put_user_ex(sig, &frame->sig); |
423 | err |= __put_user(&frame->uc, &frame->puc); | 339 | put_user_ex(&frame->info, &frame->pinfo); |
424 | err |= copy_siginfo_to_user(&frame->info, info); | 340 | put_user_ex(&frame->uc, &frame->puc); |
425 | if (err) | 341 | err |= copy_siginfo_to_user(&frame->info, info); |
426 | return -EFAULT; | ||
427 | |||
428 | /* Create the ucontext. */ | ||
429 | if (cpu_has_xsave) | ||
430 | err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); | ||
431 | else | ||
432 | err |= __put_user(0, &frame->uc.uc_flags); | ||
433 | err |= __put_user(0, &frame->uc.uc_link); | ||
434 | err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | ||
435 | err |= __put_user(sas_ss_flags(regs->sp), | ||
436 | &frame->uc.uc_stack.ss_flags); | ||
437 | err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
438 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, | ||
439 | regs, set->sig[0]); | ||
440 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
441 | if (err) | ||
442 | return -EFAULT; | ||
443 | 342 | ||
444 | /* Set up to return from userspace. */ | 343 | /* Create the ucontext. */ |
445 | restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); | 344 | if (cpu_has_xsave) |
446 | if (ka->sa.sa_flags & SA_RESTORER) | 345 | put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); |
447 | restorer = ka->sa.sa_restorer; | 346 | else |
448 | err |= __put_user(restorer, &frame->pretcode); | 347 | put_user_ex(0, &frame->uc.uc_flags); |
348 | put_user_ex(0, &frame->uc.uc_link); | ||
349 | put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | ||
350 | put_user_ex(sas_ss_flags(regs->sp), | ||
351 | &frame->uc.uc_stack.ss_flags); | ||
352 | put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
353 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, | ||
354 | regs, set->sig[0]); | ||
355 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
356 | |||
357 | /* Set up to return from userspace. */ | ||
358 | restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); | ||
359 | if (ka->sa.sa_flags & SA_RESTORER) | ||
360 | restorer = ka->sa.sa_restorer; | ||
361 | put_user_ex(restorer, &frame->pretcode); | ||
449 | 362 | ||
450 | /* | 363 | /* |
451 | * This is movl $__NR_rt_sigreturn, %ax ; int $0x80 | 364 | * This is movl $__NR_rt_sigreturn, %ax ; int $0x80 |
452 | * | 365 | * |
453 | * WE DO NOT USE IT ANY MORE! It's only left here for historical | 366 | * WE DO NOT USE IT ANY MORE! It's only left here for historical |
454 | * reasons and because gdb uses it as a signature to notice | 367 | * reasons and because gdb uses it as a signature to notice |
455 | * signal handler stack frames. | 368 | * signal handler stack frames. |
456 | */ | 369 | */ |
457 | err |= __put_user(0xb8, (char __user *)(frame->retcode+0)); | 370 | put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); |
458 | err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1)); | 371 | } put_user_catch(err); |
459 | err |= __put_user(0x80cd, (short __user *)(frame->retcode+5)); | ||
460 | 372 | ||
461 | if (err) | 373 | if (err) |
462 | return -EFAULT; | 374 | return -EFAULT; |
@@ -475,23 +387,286 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
475 | 387 | ||
476 | return 0; | 388 | return 0; |
477 | } | 389 | } |
390 | #else /* !CONFIG_X86_32 */ | ||
391 | /* | ||
392 | * Determine which stack to use.. | ||
393 | */ | ||
394 | static void __user * | ||
395 | get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size) | ||
396 | { | ||
397 | /* Default to using normal stack - redzone*/ | ||
398 | sp -= 128; | ||
399 | |||
400 | /* This is the X/Open sanctioned signal stack switching. */ | ||
401 | if (ka->sa.sa_flags & SA_ONSTACK) { | ||
402 | if (sas_ss_flags(sp) == 0) | ||
403 | sp = current->sas_ss_sp + current->sas_ss_size; | ||
404 | } | ||
405 | |||
406 | return (void __user *)round_down(sp - size, 64); | ||
407 | } | ||
408 | |||
409 | static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
410 | sigset_t *set, struct pt_regs *regs) | ||
411 | { | ||
412 | struct rt_sigframe __user *frame; | ||
413 | void __user *fp = NULL; | ||
414 | int err = 0; | ||
415 | struct task_struct *me = current; | ||
416 | |||
417 | if (used_math()) { | ||
418 | fp = get_stack(ka, regs->sp, sig_xstate_size); | ||
419 | frame = (void __user *)round_down( | ||
420 | (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; | ||
421 | |||
422 | if (save_i387_xstate(fp) < 0) | ||
423 | return -EFAULT; | ||
424 | } else | ||
425 | frame = get_stack(ka, regs->sp, sizeof(struct rt_sigframe)) - 8; | ||
426 | |||
427 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | ||
428 | return -EFAULT; | ||
429 | |||
430 | if (ka->sa.sa_flags & SA_SIGINFO) { | ||
431 | if (copy_siginfo_to_user(&frame->info, info)) | ||
432 | return -EFAULT; | ||
433 | } | ||
434 | |||
435 | put_user_try { | ||
436 | /* Create the ucontext. */ | ||
437 | if (cpu_has_xsave) | ||
438 | put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); | ||
439 | else | ||
440 | put_user_ex(0, &frame->uc.uc_flags); | ||
441 | put_user_ex(0, &frame->uc.uc_link); | ||
442 | put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | ||
443 | put_user_ex(sas_ss_flags(regs->sp), | ||
444 | &frame->uc.uc_stack.ss_flags); | ||
445 | put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
446 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); | ||
447 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
448 | |||
449 | /* Set up to return from userspace. If provided, use a stub | ||
450 | already in userspace. */ | ||
451 | /* x86-64 should always use SA_RESTORER. */ | ||
452 | if (ka->sa.sa_flags & SA_RESTORER) { | ||
453 | put_user_ex(ka->sa.sa_restorer, &frame->pretcode); | ||
454 | } else { | ||
455 | /* could use a vstub here */ | ||
456 | err |= -EFAULT; | ||
457 | } | ||
458 | } put_user_catch(err); | ||
459 | |||
460 | if (err) | ||
461 | return -EFAULT; | ||
462 | |||
463 | /* Set up registers for signal handler */ | ||
464 | regs->di = sig; | ||
465 | /* In case the signal handler was declared without prototypes */ | ||
466 | regs->ax = 0; | ||
467 | |||
468 | /* This also works for non SA_SIGINFO handlers because they expect the | ||
469 | next argument after the signal number on the stack. */ | ||
470 | regs->si = (unsigned long)&frame->info; | ||
471 | regs->dx = (unsigned long)&frame->uc; | ||
472 | regs->ip = (unsigned long) ka->sa.sa_handler; | ||
473 | |||
474 | regs->sp = (unsigned long)frame; | ||
475 | |||
476 | /* Set up the CS register to run signal handlers in 64-bit mode, | ||
477 | even if the handler happens to be interrupting 32-bit code. */ | ||
478 | regs->cs = __USER_CS; | ||
479 | |||
480 | return 0; | ||
481 | } | ||
482 | #endif /* CONFIG_X86_32 */ | ||
483 | |||
484 | #ifdef CONFIG_X86_32 | ||
485 | /* | ||
486 | * Atomically swap in the new signal mask, and wait for a signal. | ||
487 | */ | ||
488 | asmlinkage int | ||
489 | sys_sigsuspend(int history0, int history1, old_sigset_t mask) | ||
490 | { | ||
491 | mask &= _BLOCKABLE; | ||
492 | spin_lock_irq(¤t->sighand->siglock); | ||
493 | current->saved_sigmask = current->blocked; | ||
494 | siginitset(¤t->blocked, mask); | ||
495 | recalc_sigpending(); | ||
496 | spin_unlock_irq(¤t->sighand->siglock); | ||
497 | |||
498 | current->state = TASK_INTERRUPTIBLE; | ||
499 | schedule(); | ||
500 | set_restore_sigmask(); | ||
501 | |||
502 | return -ERESTARTNOHAND; | ||
503 | } | ||
504 | |||
505 | asmlinkage int | ||
506 | sys_sigaction(int sig, const struct old_sigaction __user *act, | ||
507 | struct old_sigaction __user *oact) | ||
508 | { | ||
509 | struct k_sigaction new_ka, old_ka; | ||
510 | int ret = 0; | ||
511 | |||
512 | if (act) { | ||
513 | old_sigset_t mask; | ||
514 | |||
515 | if (!access_ok(VERIFY_READ, act, sizeof(*act))) | ||
516 | return -EFAULT; | ||
517 | |||
518 | get_user_try { | ||
519 | get_user_ex(new_ka.sa.sa_handler, &act->sa_handler); | ||
520 | get_user_ex(new_ka.sa.sa_flags, &act->sa_flags); | ||
521 | get_user_ex(mask, &act->sa_mask); | ||
522 | get_user_ex(new_ka.sa.sa_restorer, &act->sa_restorer); | ||
523 | } get_user_catch(ret); | ||
524 | |||
525 | if (ret) | ||
526 | return -EFAULT; | ||
527 | siginitset(&new_ka.sa.sa_mask, mask); | ||
528 | } | ||
529 | |||
530 | ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); | ||
531 | |||
532 | if (!ret && oact) { | ||
533 | if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact))) | ||
534 | return -EFAULT; | ||
535 | |||
536 | put_user_try { | ||
537 | put_user_ex(old_ka.sa.sa_handler, &oact->sa_handler); | ||
538 | put_user_ex(old_ka.sa.sa_flags, &oact->sa_flags); | ||
539 | put_user_ex(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); | ||
540 | put_user_ex(old_ka.sa.sa_restorer, &oact->sa_restorer); | ||
541 | } put_user_catch(ret); | ||
542 | |||
543 | if (ret) | ||
544 | return -EFAULT; | ||
545 | } | ||
546 | |||
547 | return ret; | ||
548 | } | ||
549 | #endif /* CONFIG_X86_32 */ | ||
550 | |||
551 | #ifdef CONFIG_X86_32 | ||
552 | int sys_sigaltstack(struct pt_regs *regs) | ||
553 | { | ||
554 | const stack_t __user *uss = (const stack_t __user *)regs->bx; | ||
555 | stack_t __user *uoss = (stack_t __user *)regs->cx; | ||
556 | |||
557 | return do_sigaltstack(uss, uoss, regs->sp); | ||
558 | } | ||
559 | #else /* !CONFIG_X86_32 */ | ||
560 | asmlinkage long | ||
561 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | ||
562 | struct pt_regs *regs) | ||
563 | { | ||
564 | return do_sigaltstack(uss, uoss, regs->sp); | ||
565 | } | ||
566 | #endif /* CONFIG_X86_32 */ | ||
567 | |||
568 | /* | ||
569 | * Do a signal return; undo the signal stack. | ||
570 | */ | ||
571 | #ifdef CONFIG_X86_32 | ||
572 | unsigned long sys_sigreturn(struct pt_regs *regs) | ||
573 | { | ||
574 | struct sigframe __user *frame; | ||
575 | unsigned long ax; | ||
576 | sigset_t set; | ||
577 | |||
578 | frame = (struct sigframe __user *)(regs->sp - 8); | ||
579 | |||
580 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
581 | goto badframe; | ||
582 | if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1 | ||
583 | && __copy_from_user(&set.sig[1], &frame->extramask, | ||
584 | sizeof(frame->extramask)))) | ||
585 | goto badframe; | ||
586 | |||
587 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
588 | spin_lock_irq(¤t->sighand->siglock); | ||
589 | current->blocked = set; | ||
590 | recalc_sigpending(); | ||
591 | spin_unlock_irq(¤t->sighand->siglock); | ||
592 | |||
593 | if (restore_sigcontext(regs, &frame->sc, &ax)) | ||
594 | goto badframe; | ||
595 | return ax; | ||
596 | |||
597 | badframe: | ||
598 | signal_fault(regs, frame, "sigreturn"); | ||
599 | |||
600 | return 0; | ||
601 | } | ||
602 | #endif /* CONFIG_X86_32 */ | ||
603 | |||
604 | long sys_rt_sigreturn(struct pt_regs *regs) | ||
605 | { | ||
606 | struct rt_sigframe __user *frame; | ||
607 | unsigned long ax; | ||
608 | sigset_t set; | ||
609 | |||
610 | frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); | ||
611 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
612 | goto badframe; | ||
613 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) | ||
614 | goto badframe; | ||
615 | |||
616 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
617 | spin_lock_irq(¤t->sighand->siglock); | ||
618 | current->blocked = set; | ||
619 | recalc_sigpending(); | ||
620 | spin_unlock_irq(¤t->sighand->siglock); | ||
621 | |||
622 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | ||
623 | goto badframe; | ||
624 | |||
625 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) | ||
626 | goto badframe; | ||
627 | |||
628 | return ax; | ||
629 | |||
630 | badframe: | ||
631 | signal_fault(regs, frame, "rt_sigreturn"); | ||
632 | return 0; | ||
633 | } | ||
478 | 634 | ||
479 | /* | 635 | /* |
480 | * OK, we're invoking a handler: | 636 | * OK, we're invoking a handler: |
481 | */ | 637 | */ |
482 | static int signr_convert(int sig) | 638 | static int signr_convert(int sig) |
483 | { | 639 | { |
640 | #ifdef CONFIG_X86_32 | ||
484 | struct thread_info *info = current_thread_info(); | 641 | struct thread_info *info = current_thread_info(); |
485 | 642 | ||
486 | if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32) | 643 | if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32) |
487 | return info->exec_domain->signal_invmap[sig]; | 644 | return info->exec_domain->signal_invmap[sig]; |
645 | #endif /* CONFIG_X86_32 */ | ||
488 | return sig; | 646 | return sig; |
489 | } | 647 | } |
490 | 648 | ||
649 | #ifdef CONFIG_X86_32 | ||
650 | |||
491 | #define is_ia32 1 | 651 | #define is_ia32 1 |
492 | #define ia32_setup_frame __setup_frame | 652 | #define ia32_setup_frame __setup_frame |
493 | #define ia32_setup_rt_frame __setup_rt_frame | 653 | #define ia32_setup_rt_frame __setup_rt_frame |
494 | 654 | ||
655 | #else /* !CONFIG_X86_32 */ | ||
656 | |||
657 | #ifdef CONFIG_IA32_EMULATION | ||
658 | #define is_ia32 test_thread_flag(TIF_IA32) | ||
659 | #else /* !CONFIG_IA32_EMULATION */ | ||
660 | #define is_ia32 0 | ||
661 | #endif /* CONFIG_IA32_EMULATION */ | ||
662 | |||
663 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
664 | sigset_t *set, struct pt_regs *regs); | ||
665 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | ||
666 | sigset_t *set, struct pt_regs *regs); | ||
667 | |||
668 | #endif /* CONFIG_X86_32 */ | ||
669 | |||
495 | static int | 670 | static int |
496 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | 671 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, |
497 | sigset_t *set, struct pt_regs *regs) | 672 | sigset_t *set, struct pt_regs *regs) |
@@ -592,7 +767,13 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
592 | return 0; | 767 | return 0; |
593 | } | 768 | } |
594 | 769 | ||
770 | #ifdef CONFIG_X86_32 | ||
595 | #define NR_restart_syscall __NR_restart_syscall | 771 | #define NR_restart_syscall __NR_restart_syscall |
772 | #else /* !CONFIG_X86_32 */ | ||
773 | #define NR_restart_syscall \ | ||
774 | test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall | ||
775 | #endif /* CONFIG_X86_32 */ | ||
776 | |||
596 | /* | 777 | /* |
597 | * Note that 'init' is a special process: it doesn't get signals it doesn't | 778 | * Note that 'init' is a special process: it doesn't get signals it doesn't |
598 | * want to handle. Thus you cannot kill init even with a SIGKILL even by | 779 | * want to handle. Thus you cannot kill init even with a SIGKILL even by |
@@ -704,8 +885,9 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | |||
704 | struct task_struct *me = current; | 885 | struct task_struct *me = current; |
705 | 886 | ||
706 | if (show_unhandled_signals && printk_ratelimit()) { | 887 | if (show_unhandled_signals && printk_ratelimit()) { |
707 | printk(KERN_INFO | 888 | printk("%s" |
708 | "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", | 889 | "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", |
890 | task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, | ||
709 | me->comm, me->pid, where, frame, | 891 | me->comm, me->pid, where, frame, |
710 | regs->ip, regs->sp, regs->orig_ax); | 892 | regs->ip, regs->sp, regs->orig_ax); |
711 | print_vma_addr(" in ", regs->ip); | 893 | print_vma_addr(" in ", regs->ip); |
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c deleted file mode 100644 index a5c9627f4db9..000000000000 --- a/arch/x86/kernel/signal_64.c +++ /dev/null | |||
@@ -1,516 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs | ||
4 | * | ||
5 | * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson | ||
6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes | ||
7 | * 2000-2002 x86-64 support by Andi Kleen | ||
8 | */ | ||
9 | |||
10 | #include <linux/sched.h> | ||
11 | #include <linux/mm.h> | ||
12 | #include <linux/smp.h> | ||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/signal.h> | ||
15 | #include <linux/errno.h> | ||
16 | #include <linux/wait.h> | ||
17 | #include <linux/ptrace.h> | ||
18 | #include <linux/tracehook.h> | ||
19 | #include <linux/unistd.h> | ||
20 | #include <linux/stddef.h> | ||
21 | #include <linux/personality.h> | ||
22 | #include <linux/compiler.h> | ||
23 | #include <linux/uaccess.h> | ||
24 | |||
25 | #include <asm/processor.h> | ||
26 | #include <asm/ucontext.h> | ||
27 | #include <asm/i387.h> | ||
28 | #include <asm/proto.h> | ||
29 | #include <asm/ia32_unistd.h> | ||
30 | #include <asm/mce.h> | ||
31 | #include <asm/syscall.h> | ||
32 | #include <asm/syscalls.h> | ||
33 | #include "sigframe.h" | ||
34 | |||
35 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) | ||
36 | |||
37 | #define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ | ||
38 | X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ | ||
39 | X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ | ||
40 | X86_EFLAGS_CF) | ||
41 | |||
42 | #ifdef CONFIG_X86_32 | ||
43 | # define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF) | ||
44 | #else | ||
45 | # define FIX_EFLAGS __FIX_EFLAGS | ||
46 | #endif | ||
47 | |||
48 | asmlinkage long | ||
49 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | ||
50 | struct pt_regs *regs) | ||
51 | { | ||
52 | return do_sigaltstack(uss, uoss, regs->sp); | ||
53 | } | ||
54 | |||
55 | #define COPY(x) { \ | ||
56 | err |= __get_user(regs->x, &sc->x); \ | ||
57 | } | ||
58 | |||
59 | #define COPY_SEG_STRICT(seg) { \ | ||
60 | unsigned short tmp; \ | ||
61 | err |= __get_user(tmp, &sc->seg); \ | ||
62 | regs->seg = tmp | 3; \ | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * Do a signal return; undo the signal stack. | ||
67 | */ | ||
68 | static int | ||
69 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | ||
70 | unsigned long *pax) | ||
71 | { | ||
72 | void __user *buf; | ||
73 | unsigned int tmpflags; | ||
74 | unsigned int err = 0; | ||
75 | |||
76 | /* Always make any pending restarted system calls return -EINTR */ | ||
77 | current_thread_info()->restart_block.fn = do_no_restart_syscall; | ||
78 | |||
79 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); | ||
80 | COPY(dx); COPY(cx); COPY(ip); | ||
81 | COPY(r8); | ||
82 | COPY(r9); | ||
83 | COPY(r10); | ||
84 | COPY(r11); | ||
85 | COPY(r12); | ||
86 | COPY(r13); | ||
87 | COPY(r14); | ||
88 | COPY(r15); | ||
89 | |||
90 | /* Kernel saves and restores only the CS segment register on signals, | ||
91 | * which is the bare minimum needed to allow mixed 32/64-bit code. | ||
92 | * App's signal handler can save/restore other segments if needed. */ | ||
93 | COPY_SEG_STRICT(cs); | ||
94 | |||
95 | err |= __get_user(tmpflags, &sc->flags); | ||
96 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); | ||
97 | regs->orig_ax = -1; /* disable syscall checks */ | ||
98 | |||
99 | err |= __get_user(buf, &sc->fpstate); | ||
100 | err |= restore_i387_xstate(buf); | ||
101 | |||
102 | err |= __get_user(*pax, &sc->ax); | ||
103 | return err; | ||
104 | } | ||
105 | |||
106 | static long do_rt_sigreturn(struct pt_regs *regs) | ||
107 | { | ||
108 | struct rt_sigframe __user *frame; | ||
109 | unsigned long ax; | ||
110 | sigset_t set; | ||
111 | |||
112 | frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); | ||
113 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
114 | goto badframe; | ||
115 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) | ||
116 | goto badframe; | ||
117 | |||
118 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
119 | spin_lock_irq(¤t->sighand->siglock); | ||
120 | current->blocked = set; | ||
121 | recalc_sigpending(); | ||
122 | spin_unlock_irq(¤t->sighand->siglock); | ||
123 | |||
124 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | ||
125 | goto badframe; | ||
126 | |||
127 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) | ||
128 | goto badframe; | ||
129 | |||
130 | return ax; | ||
131 | |||
132 | badframe: | ||
133 | signal_fault(regs, frame, "rt_sigreturn"); | ||
134 | return 0; | ||
135 | } | ||
136 | |||
137 | asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) | ||
138 | { | ||
139 | return do_rt_sigreturn(regs); | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * Set up a signal frame. | ||
144 | */ | ||
145 | |||
146 | static inline int | ||
147 | setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, | ||
148 | unsigned long mask, struct task_struct *me) | ||
149 | { | ||
150 | int err = 0; | ||
151 | |||
152 | err |= __put_user(regs->cs, &sc->cs); | ||
153 | err |= __put_user(0, &sc->gs); | ||
154 | err |= __put_user(0, &sc->fs); | ||
155 | |||
156 | err |= __put_user(regs->di, &sc->di); | ||
157 | err |= __put_user(regs->si, &sc->si); | ||
158 | err |= __put_user(regs->bp, &sc->bp); | ||
159 | err |= __put_user(regs->sp, &sc->sp); | ||
160 | err |= __put_user(regs->bx, &sc->bx); | ||
161 | err |= __put_user(regs->dx, &sc->dx); | ||
162 | err |= __put_user(regs->cx, &sc->cx); | ||
163 | err |= __put_user(regs->ax, &sc->ax); | ||
164 | err |= __put_user(regs->r8, &sc->r8); | ||
165 | err |= __put_user(regs->r9, &sc->r9); | ||
166 | err |= __put_user(regs->r10, &sc->r10); | ||
167 | err |= __put_user(regs->r11, &sc->r11); | ||
168 | err |= __put_user(regs->r12, &sc->r12); | ||
169 | err |= __put_user(regs->r13, &sc->r13); | ||
170 | err |= __put_user(regs->r14, &sc->r14); | ||
171 | err |= __put_user(regs->r15, &sc->r15); | ||
172 | err |= __put_user(me->thread.trap_no, &sc->trapno); | ||
173 | err |= __put_user(me->thread.error_code, &sc->err); | ||
174 | err |= __put_user(regs->ip, &sc->ip); | ||
175 | err |= __put_user(regs->flags, &sc->flags); | ||
176 | err |= __put_user(mask, &sc->oldmask); | ||
177 | err |= __put_user(me->thread.cr2, &sc->cr2); | ||
178 | |||
179 | return err; | ||
180 | } | ||
181 | |||
182 | /* | ||
183 | * Determine which stack to use.. | ||
184 | */ | ||
185 | |||
186 | static void __user * | ||
187 | get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) | ||
188 | { | ||
189 | unsigned long sp; | ||
190 | |||
191 | /* Default to using normal stack - redzone*/ | ||
192 | sp = regs->sp - 128; | ||
193 | |||
194 | /* This is the X/Open sanctioned signal stack switching. */ | ||
195 | if (ka->sa.sa_flags & SA_ONSTACK) { | ||
196 | if (sas_ss_flags(sp) == 0) | ||
197 | sp = current->sas_ss_sp + current->sas_ss_size; | ||
198 | } | ||
199 | |||
200 | return (void __user *)round_down(sp - size, 64); | ||
201 | } | ||
202 | |||
203 | static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
204 | sigset_t *set, struct pt_regs *regs) | ||
205 | { | ||
206 | struct rt_sigframe __user *frame; | ||
207 | void __user *fp = NULL; | ||
208 | int err = 0; | ||
209 | struct task_struct *me = current; | ||
210 | |||
211 | if (used_math()) { | ||
212 | fp = get_stack(ka, regs, sig_xstate_size); | ||
213 | frame = (void __user *)round_down( | ||
214 | (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; | ||
215 | |||
216 | if (save_i387_xstate(fp) < 0) | ||
217 | return -EFAULT; | ||
218 | } else | ||
219 | frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; | ||
220 | |||
221 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | ||
222 | return -EFAULT; | ||
223 | |||
224 | if (ka->sa.sa_flags & SA_SIGINFO) { | ||
225 | if (copy_siginfo_to_user(&frame->info, info)) | ||
226 | return -EFAULT; | ||
227 | } | ||
228 | |||
229 | /* Create the ucontext. */ | ||
230 | if (cpu_has_xsave) | ||
231 | err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); | ||
232 | else | ||
233 | err |= __put_user(0, &frame->uc.uc_flags); | ||
234 | err |= __put_user(0, &frame->uc.uc_link); | ||
235 | err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | ||
236 | err |= __put_user(sas_ss_flags(regs->sp), | ||
237 | &frame->uc.uc_stack.ss_flags); | ||
238 | err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
239 | err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me); | ||
240 | err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate); | ||
241 | if (sizeof(*set) == 16) { | ||
242 | __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); | ||
243 | __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); | ||
244 | } else | ||
245 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
246 | |||
247 | /* Set up to return from userspace. If provided, use a stub | ||
248 | already in userspace. */ | ||
249 | /* x86-64 should always use SA_RESTORER. */ | ||
250 | if (ka->sa.sa_flags & SA_RESTORER) { | ||
251 | err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); | ||
252 | } else { | ||
253 | /* could use a vstub here */ | ||
254 | return -EFAULT; | ||
255 | } | ||
256 | |||
257 | if (err) | ||
258 | return -EFAULT; | ||
259 | |||
260 | /* Set up registers for signal handler */ | ||
261 | regs->di = sig; | ||
262 | /* In case the signal handler was declared without prototypes */ | ||
263 | regs->ax = 0; | ||
264 | |||
265 | /* This also works for non SA_SIGINFO handlers because they expect the | ||
266 | next argument after the signal number on the stack. */ | ||
267 | regs->si = (unsigned long)&frame->info; | ||
268 | regs->dx = (unsigned long)&frame->uc; | ||
269 | regs->ip = (unsigned long) ka->sa.sa_handler; | ||
270 | |||
271 | regs->sp = (unsigned long)frame; | ||
272 | |||
273 | /* Set up the CS register to run signal handlers in 64-bit mode, | ||
274 | even if the handler happens to be interrupting 32-bit code. */ | ||
275 | regs->cs = __USER_CS; | ||
276 | |||
277 | return 0; | ||
278 | } | ||
279 | |||
280 | /* | ||
281 | * OK, we're invoking a handler | ||
282 | */ | ||
283 | static int signr_convert(int sig) | ||
284 | { | ||
285 | return sig; | ||
286 | } | ||
287 | |||
288 | #ifdef CONFIG_IA32_EMULATION | ||
289 | #define is_ia32 test_thread_flag(TIF_IA32) | ||
290 | #else | ||
291 | #define is_ia32 0 | ||
292 | #endif | ||
293 | |||
294 | static int | ||
295 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
296 | sigset_t *set, struct pt_regs *regs) | ||
297 | { | ||
298 | int usig = signr_convert(sig); | ||
299 | int ret; | ||
300 | |||
301 | /* Set up the stack frame */ | ||
302 | if (is_ia32) { | ||
303 | if (ka->sa.sa_flags & SA_SIGINFO) | ||
304 | ret = ia32_setup_rt_frame(usig, ka, info, set, regs); | ||
305 | else | ||
306 | ret = ia32_setup_frame(usig, ka, set, regs); | ||
307 | } else | ||
308 | ret = __setup_rt_frame(sig, ka, info, set, regs); | ||
309 | |||
310 | if (ret) { | ||
311 | force_sigsegv(sig, current); | ||
312 | return -EFAULT; | ||
313 | } | ||
314 | |||
315 | return ret; | ||
316 | } | ||
317 | |||
318 | static int | ||
319 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | ||
320 | sigset_t *oldset, struct pt_regs *regs) | ||
321 | { | ||
322 | int ret; | ||
323 | |||
324 | /* Are we from a system call? */ | ||
325 | if (syscall_get_nr(current, regs) >= 0) { | ||
326 | /* If so, check system call restarting.. */ | ||
327 | switch (syscall_get_error(current, regs)) { | ||
328 | case -ERESTART_RESTARTBLOCK: | ||
329 | case -ERESTARTNOHAND: | ||
330 | regs->ax = -EINTR; | ||
331 | break; | ||
332 | |||
333 | case -ERESTARTSYS: | ||
334 | if (!(ka->sa.sa_flags & SA_RESTART)) { | ||
335 | regs->ax = -EINTR; | ||
336 | break; | ||
337 | } | ||
338 | /* fallthrough */ | ||
339 | case -ERESTARTNOINTR: | ||
340 | regs->ax = regs->orig_ax; | ||
341 | regs->ip -= 2; | ||
342 | break; | ||
343 | } | ||
344 | } | ||
345 | |||
346 | /* | ||
347 | * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF | ||
348 | * flag so that register information in the sigcontext is correct. | ||
349 | */ | ||
350 | if (unlikely(regs->flags & X86_EFLAGS_TF) && | ||
351 | likely(test_and_clear_thread_flag(TIF_FORCED_TF))) | ||
352 | regs->flags &= ~X86_EFLAGS_TF; | ||
353 | |||
354 | ret = setup_rt_frame(sig, ka, info, oldset, regs); | ||
355 | |||
356 | if (ret) | ||
357 | return ret; | ||
358 | |||
359 | #ifdef CONFIG_X86_64 | ||
360 | /* | ||
361 | * This has nothing to do with segment registers, | ||
362 | * despite the name. This magic affects uaccess.h | ||
363 | * macros' behavior. Reset it to the normal setting. | ||
364 | */ | ||
365 | set_fs(USER_DS); | ||
366 | #endif | ||
367 | |||
368 | /* | ||
369 | * Clear the direction flag as per the ABI for function entry. | ||
370 | */ | ||
371 | regs->flags &= ~X86_EFLAGS_DF; | ||
372 | |||
373 | /* | ||
374 | * Clear TF when entering the signal handler, but | ||
375 | * notify any tracer that was single-stepping it. | ||
376 | * The tracer may want to single-step inside the | ||
377 | * handler too. | ||
378 | */ | ||
379 | regs->flags &= ~X86_EFLAGS_TF; | ||
380 | |||
381 | spin_lock_irq(¤t->sighand->siglock); | ||
382 | sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); | ||
383 | if (!(ka->sa.sa_flags & SA_NODEFER)) | ||
384 | sigaddset(¤t->blocked, sig); | ||
385 | recalc_sigpending(); | ||
386 | spin_unlock_irq(¤t->sighand->siglock); | ||
387 | |||
388 | tracehook_signal_handler(sig, info, ka, regs, | ||
389 | test_thread_flag(TIF_SINGLESTEP)); | ||
390 | |||
391 | return 0; | ||
392 | } | ||
393 | |||
394 | #define NR_restart_syscall \ | ||
395 | test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall | ||
396 | /* | ||
397 | * Note that 'init' is a special process: it doesn't get signals it doesn't | ||
398 | * want to handle. Thus you cannot kill init even with a SIGKILL even by | ||
399 | * mistake. | ||
400 | */ | ||
401 | static void do_signal(struct pt_regs *regs) | ||
402 | { | ||
403 | struct k_sigaction ka; | ||
404 | siginfo_t info; | ||
405 | int signr; | ||
406 | sigset_t *oldset; | ||
407 | |||
408 | /* | ||
409 | * We want the common case to go fast, which is why we may in certain | ||
410 | * cases get here from kernel mode. Just return without doing anything | ||
411 | * if so. | ||
412 | * X86_32: vm86 regs switched out by assembly code before reaching | ||
413 | * here, so testing against kernel CS suffices. | ||
414 | */ | ||
415 | if (!user_mode(regs)) | ||
416 | return; | ||
417 | |||
418 | if (current_thread_info()->status & TS_RESTORE_SIGMASK) | ||
419 | oldset = ¤t->saved_sigmask; | ||
420 | else | ||
421 | oldset = ¤t->blocked; | ||
422 | |||
423 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); | ||
424 | if (signr > 0) { | ||
425 | /* | ||
426 | * Re-enable any watchpoints before delivering the | ||
427 | * signal to user space. The processor register will | ||
428 | * have been cleared if the watchpoint triggered | ||
429 | * inside the kernel. | ||
430 | */ | ||
431 | if (current->thread.debugreg7) | ||
432 | set_debugreg(current->thread.debugreg7, 7); | ||
433 | |||
434 | /* Whee! Actually deliver the signal. */ | ||
435 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { | ||
436 | /* | ||
437 | * A signal was successfully delivered; the saved | ||
438 | * sigmask will have been stored in the signal frame, | ||
439 | * and will be restored by sigreturn, so we can simply | ||
440 | * clear the TS_RESTORE_SIGMASK flag. | ||
441 | */ | ||
442 | current_thread_info()->status &= ~TS_RESTORE_SIGMASK; | ||
443 | } | ||
444 | return; | ||
445 | } | ||
446 | |||
447 | /* Did we come from a system call? */ | ||
448 | if (syscall_get_nr(current, regs) >= 0) { | ||
449 | /* Restart the system call - no handlers present */ | ||
450 | switch (syscall_get_error(current, regs)) { | ||
451 | case -ERESTARTNOHAND: | ||
452 | case -ERESTARTSYS: | ||
453 | case -ERESTARTNOINTR: | ||
454 | regs->ax = regs->orig_ax; | ||
455 | regs->ip -= 2; | ||
456 | break; | ||
457 | |||
458 | case -ERESTART_RESTARTBLOCK: | ||
459 | regs->ax = NR_restart_syscall; | ||
460 | regs->ip -= 2; | ||
461 | break; | ||
462 | } | ||
463 | } | ||
464 | |||
465 | /* | ||
466 | * If there's no signal to deliver, we just put the saved sigmask | ||
467 | * back. | ||
468 | */ | ||
469 | if (current_thread_info()->status & TS_RESTORE_SIGMASK) { | ||
470 | current_thread_info()->status &= ~TS_RESTORE_SIGMASK; | ||
471 | sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); | ||
472 | } | ||
473 | } | ||
474 | |||
475 | /* | ||
476 | * notification of userspace execution resumption | ||
477 | * - triggered by the TIF_WORK_MASK flags | ||
478 | */ | ||
479 | void | ||
480 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | ||
481 | { | ||
482 | #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) | ||
483 | /* notify userspace of pending MCEs */ | ||
484 | if (thread_info_flags & _TIF_MCE_NOTIFY) | ||
485 | mce_notify_user(); | ||
486 | #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ | ||
487 | |||
488 | /* deal with pending signal delivery */ | ||
489 | if (thread_info_flags & _TIF_SIGPENDING) | ||
490 | do_signal(regs); | ||
491 | |||
492 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { | ||
493 | clear_thread_flag(TIF_NOTIFY_RESUME); | ||
494 | tracehook_notify_resume(regs); | ||
495 | } | ||
496 | |||
497 | #ifdef CONFIG_X86_32 | ||
498 | clear_thread_flag(TIF_IRET); | ||
499 | #endif /* CONFIG_X86_32 */ | ||
500 | } | ||
501 | |||
502 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | ||
503 | { | ||
504 | struct task_struct *me = current; | ||
505 | |||
506 | if (show_unhandled_signals && printk_ratelimit()) { | ||
507 | printk(KERN_INFO | ||
508 | "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", | ||
509 | me->comm, me->pid, where, frame, | ||
510 | regs->ip, regs->sp, regs->orig_ax); | ||
511 | print_vma_addr(" in ", regs->ip); | ||
512 | printk(KERN_CONT "\n"); | ||
513 | } | ||
514 | |||
515 | force_sig(SIGSEGV, me); | ||
516 | } | ||
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 18f9b19f5f8f..eaaffae31cc0 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -1,8 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | * Intel SMP support routines. | 2 | * Intel SMP support routines. |
3 | * | 3 | * |
4 | * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> | 4 | * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> |
5 | * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> | 5 | * (c) 1998-99, 2000, 2009 Ingo Molnar <mingo@redhat.com> |
6 | * (c) 2002,2003 Andi Kleen, SuSE Labs. | 6 | * (c) 2002,2003 Andi Kleen, SuSE Labs. |
7 | * | 7 | * |
8 | * i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com> | 8 | * i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com> |
@@ -26,8 +26,7 @@ | |||
26 | #include <asm/tlbflush.h> | 26 | #include <asm/tlbflush.h> |
27 | #include <asm/mmu_context.h> | 27 | #include <asm/mmu_context.h> |
28 | #include <asm/proto.h> | 28 | #include <asm/proto.h> |
29 | #include <mach_ipi.h> | 29 | #include <asm/genapic.h> |
30 | #include <mach_apic.h> | ||
31 | /* | 30 | /* |
32 | * Some notes on x86 processor bugs affecting SMP operation: | 31 | * Some notes on x86 processor bugs affecting SMP operation: |
33 | * | 32 | * |
@@ -118,39 +117,33 @@ static void native_smp_send_reschedule(int cpu) | |||
118 | WARN_ON(1); | 117 | WARN_ON(1); |
119 | return; | 118 | return; |
120 | } | 119 | } |
121 | send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); | 120 | apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); |
122 | } | 121 | } |
123 | 122 | ||
124 | void native_send_call_func_single_ipi(int cpu) | 123 | void native_send_call_func_single_ipi(int cpu) |
125 | { | 124 | { |
126 | send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR); | 125 | apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); |
127 | } | 126 | } |
128 | 127 | ||
129 | void native_send_call_func_ipi(cpumask_t mask) | 128 | void native_send_call_func_ipi(const struct cpumask *mask) |
130 | { | 129 | { |
131 | cpumask_t allbutself; | 130 | cpumask_var_t allbutself; |
132 | 131 | ||
133 | allbutself = cpu_online_map; | 132 | if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) { |
134 | cpu_clear(smp_processor_id(), allbutself); | 133 | apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); |
134 | return; | ||
135 | } | ||
136 | |||
137 | cpumask_copy(allbutself, cpu_online_mask); | ||
138 | cpumask_clear_cpu(smp_processor_id(), allbutself); | ||
135 | 139 | ||
136 | if (cpus_equal(mask, allbutself) && | 140 | if (cpumask_equal(mask, allbutself) && |
137 | cpus_equal(cpu_online_map, cpu_callout_map)) | 141 | cpumask_equal(cpu_online_mask, cpu_callout_mask)) |
138 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); | 142 | apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR); |
139 | else | 143 | else |
140 | send_IPI_mask(mask, CALL_FUNCTION_VECTOR); | 144 | apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); |
141 | } | ||
142 | 145 | ||
143 | static void stop_this_cpu(void *dummy) | 146 | free_cpumask_var(allbutself); |
144 | { | ||
145 | local_irq_disable(); | ||
146 | /* | ||
147 | * Remove this CPU: | ||
148 | */ | ||
149 | cpu_clear(smp_processor_id(), cpu_online_map); | ||
150 | disable_local_APIC(); | ||
151 | if (hlt_works(smp_processor_id())) | ||
152 | for (;;) halt(); | ||
153 | for (;;); | ||
154 | } | 147 | } |
155 | 148 | ||
156 | /* | 149 | /* |
@@ -178,11 +171,7 @@ static void native_smp_send_stop(void) | |||
178 | void smp_reschedule_interrupt(struct pt_regs *regs) | 171 | void smp_reschedule_interrupt(struct pt_regs *regs) |
179 | { | 172 | { |
180 | ack_APIC_irq(); | 173 | ack_APIC_irq(); |
181 | #ifdef CONFIG_X86_32 | 174 | inc_irq_stat(irq_resched_count); |
182 | __get_cpu_var(irq_stat).irq_resched_count++; | ||
183 | #else | ||
184 | add_pda(irq_resched_count, 1); | ||
185 | #endif | ||
186 | } | 175 | } |
187 | 176 | ||
188 | void smp_call_function_interrupt(struct pt_regs *regs) | 177 | void smp_call_function_interrupt(struct pt_regs *regs) |
@@ -190,11 +179,7 @@ void smp_call_function_interrupt(struct pt_regs *regs) | |||
190 | ack_APIC_irq(); | 179 | ack_APIC_irq(); |
191 | irq_enter(); | 180 | irq_enter(); |
192 | generic_smp_call_function_interrupt(); | 181 | generic_smp_call_function_interrupt(); |
193 | #ifdef CONFIG_X86_32 | 182 | inc_irq_stat(irq_call_count); |
194 | __get_cpu_var(irq_stat).irq_call_count++; | ||
195 | #else | ||
196 | add_pda(irq_call_count, 1); | ||
197 | #endif | ||
198 | irq_exit(); | 183 | irq_exit(); |
199 | } | 184 | } |
200 | 185 | ||
@@ -203,11 +188,7 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) | |||
203 | ack_APIC_irq(); | 188 | ack_APIC_irq(); |
204 | irq_enter(); | 189 | irq_enter(); |
205 | generic_smp_call_function_single_interrupt(); | 190 | generic_smp_call_function_single_interrupt(); |
206 | #ifdef CONFIG_X86_32 | 191 | inc_irq_stat(irq_call_count); |
207 | __get_cpu_var(irq_stat).irq_call_count++; | ||
208 | #else | ||
209 | add_pda(irq_call_count, 1); | ||
210 | #endif | ||
211 | irq_exit(); | 192 | irq_exit(); |
212 | } | 193 | } |
213 | 194 | ||
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7b1093397319..af57f88186e7 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -1,8 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | * x86 SMP booting functions | 2 | * x86 SMP booting functions |
3 | * | 3 | * |
4 | * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> | 4 | * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> |
5 | * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> | 5 | * (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com> |
6 | * Copyright 2001 Andi Kleen, SuSE Labs. | 6 | * Copyright 2001 Andi Kleen, SuSE Labs. |
7 | * | 7 | * |
8 | * Much of the core SMP work is based on previous work by Thomas Radke, to | 8 | * Much of the core SMP work is based on previous work by Thomas Radke, to |
@@ -53,7 +53,6 @@ | |||
53 | #include <asm/nmi.h> | 53 | #include <asm/nmi.h> |
54 | #include <asm/irq.h> | 54 | #include <asm/irq.h> |
55 | #include <asm/idle.h> | 55 | #include <asm/idle.h> |
56 | #include <asm/smp.h> | ||
57 | #include <asm/trampoline.h> | 56 | #include <asm/trampoline.h> |
58 | #include <asm/cpu.h> | 57 | #include <asm/cpu.h> |
59 | #include <asm/numa.h> | 58 | #include <asm/numa.h> |
@@ -62,11 +61,12 @@ | |||
62 | #include <asm/mtrr.h> | 61 | #include <asm/mtrr.h> |
63 | #include <asm/vmi.h> | 62 | #include <asm/vmi.h> |
64 | #include <asm/genapic.h> | 63 | #include <asm/genapic.h> |
64 | #include <asm/setup.h> | ||
65 | #include <asm/uv/uv.h> | ||
65 | #include <linux/mc146818rtc.h> | 66 | #include <linux/mc146818rtc.h> |
66 | 67 | ||
67 | #include <mach_apic.h> | 68 | #include <asm/genapic.h> |
68 | #include <mach_wakecpu.h> | 69 | #include <asm/smpboot_hooks.h> |
69 | #include <smpboot_hooks.h> | ||
70 | 70 | ||
71 | #ifdef CONFIG_X86_32 | 71 | #ifdef CONFIG_X86_32 |
72 | u8 apicid_2_node[MAX_APICID]; | 72 | u8 apicid_2_node[MAX_APICID]; |
@@ -101,15 +101,6 @@ EXPORT_SYMBOL(smp_num_siblings); | |||
101 | /* Last level cache ID of each logical CPU */ | 101 | /* Last level cache ID of each logical CPU */ |
102 | DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; | 102 | DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; |
103 | 103 | ||
104 | /* bitmap of online cpus */ | ||
105 | cpumask_t cpu_online_map __read_mostly; | ||
106 | EXPORT_SYMBOL(cpu_online_map); | ||
107 | |||
108 | cpumask_t cpu_callin_map; | ||
109 | cpumask_t cpu_callout_map; | ||
110 | cpumask_t cpu_possible_map; | ||
111 | EXPORT_SYMBOL(cpu_possible_map); | ||
112 | |||
113 | /* representing HT siblings of each logical CPU */ | 104 | /* representing HT siblings of each logical CPU */ |
114 | DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); | 105 | DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); |
115 | EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); | 106 | EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); |
@@ -125,9 +116,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); | |||
125 | static atomic_t init_deasserted; | 116 | static atomic_t init_deasserted; |
126 | 117 | ||
127 | 118 | ||
128 | /* representing cpus for which sibling maps can be computed */ | ||
129 | static cpumask_t cpu_sibling_setup_map; | ||
130 | |||
131 | /* Set if we find a B stepping CPU */ | 119 | /* Set if we find a B stepping CPU */ |
132 | static int __cpuinitdata smp_b_stepping; | 120 | static int __cpuinitdata smp_b_stepping; |
133 | 121 | ||
@@ -145,7 +133,7 @@ EXPORT_SYMBOL(cpu_to_node_map); | |||
145 | static void map_cpu_to_node(int cpu, int node) | 133 | static void map_cpu_to_node(int cpu, int node) |
146 | { | 134 | { |
147 | printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node); | 135 | printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node); |
148 | cpu_set(cpu, node_to_cpumask_map[node]); | 136 | cpumask_set_cpu(cpu, &node_to_cpumask_map[node]); |
149 | cpu_to_node_map[cpu] = node; | 137 | cpu_to_node_map[cpu] = node; |
150 | } | 138 | } |
151 | 139 | ||
@@ -156,7 +144,7 @@ static void unmap_cpu_to_node(int cpu) | |||
156 | 144 | ||
157 | printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu); | 145 | printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu); |
158 | for (node = 0; node < MAX_NUMNODES; node++) | 146 | for (node = 0; node < MAX_NUMNODES; node++) |
159 | cpu_clear(cpu, node_to_cpumask_map[node]); | 147 | cpumask_clear_cpu(cpu, &node_to_cpumask_map[node]); |
160 | cpu_to_node_map[cpu] = 0; | 148 | cpu_to_node_map[cpu] = 0; |
161 | } | 149 | } |
162 | #else /* !(CONFIG_NUMA && CONFIG_X86_32) */ | 150 | #else /* !(CONFIG_NUMA && CONFIG_X86_32) */ |
@@ -174,7 +162,7 @@ static void map_cpu_to_logical_apicid(void) | |||
174 | { | 162 | { |
175 | int cpu = smp_processor_id(); | 163 | int cpu = smp_processor_id(); |
176 | int apicid = logical_smp_processor_id(); | 164 | int apicid = logical_smp_processor_id(); |
177 | int node = apicid_to_node(apicid); | 165 | int node = apic->apicid_to_node(apicid); |
178 | 166 | ||
179 | if (!node_online(node)) | 167 | if (!node_online(node)) |
180 | node = first_online_node; | 168 | node = first_online_node; |
@@ -207,14 +195,15 @@ static void __cpuinit smp_callin(void) | |||
207 | * our local APIC. We have to wait for the IPI or we'll | 195 | * our local APIC. We have to wait for the IPI or we'll |
208 | * lock up on an APIC access. | 196 | * lock up on an APIC access. |
209 | */ | 197 | */ |
210 | wait_for_init_deassert(&init_deasserted); | 198 | if (apic->wait_for_init_deassert) |
199 | apic->wait_for_init_deassert(&init_deasserted); | ||
211 | 200 | ||
212 | /* | 201 | /* |
213 | * (This works even if the APIC is not enabled.) | 202 | * (This works even if the APIC is not enabled.) |
214 | */ | 203 | */ |
215 | phys_id = read_apic_id(); | 204 | phys_id = read_apic_id(); |
216 | cpuid = smp_processor_id(); | 205 | cpuid = smp_processor_id(); |
217 | if (cpu_isset(cpuid, cpu_callin_map)) { | 206 | if (cpumask_test_cpu(cpuid, cpu_callin_mask)) { |
218 | panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, | 207 | panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, |
219 | phys_id, cpuid); | 208 | phys_id, cpuid); |
220 | } | 209 | } |
@@ -236,7 +225,7 @@ static void __cpuinit smp_callin(void) | |||
236 | /* | 225 | /* |
237 | * Has the boot CPU finished it's STARTUP sequence? | 226 | * Has the boot CPU finished it's STARTUP sequence? |
238 | */ | 227 | */ |
239 | if (cpu_isset(cpuid, cpu_callout_map)) | 228 | if (cpumask_test_cpu(cpuid, cpu_callout_mask)) |
240 | break; | 229 | break; |
241 | cpu_relax(); | 230 | cpu_relax(); |
242 | } | 231 | } |
@@ -254,7 +243,8 @@ static void __cpuinit smp_callin(void) | |||
254 | */ | 243 | */ |
255 | 244 | ||
256 | pr_debug("CALLIN, before setup_local_APIC().\n"); | 245 | pr_debug("CALLIN, before setup_local_APIC().\n"); |
257 | smp_callin_clear_local_apic(); | 246 | if (apic->smp_callin_clear_local_apic) |
247 | apic->smp_callin_clear_local_apic(); | ||
258 | setup_local_APIC(); | 248 | setup_local_APIC(); |
259 | end_local_APIC_setup(); | 249 | end_local_APIC_setup(); |
260 | map_cpu_to_logical_apicid(); | 250 | map_cpu_to_logical_apicid(); |
@@ -279,7 +269,7 @@ static void __cpuinit smp_callin(void) | |||
279 | /* | 269 | /* |
280 | * Allow the master to continue. | 270 | * Allow the master to continue. |
281 | */ | 271 | */ |
282 | cpu_set(cpuid, cpu_callin_map); | 272 | cpumask_set_cpu(cpuid, cpu_callin_mask); |
283 | } | 273 | } |
284 | 274 | ||
285 | static int __cpuinitdata unsafe_smp; | 275 | static int __cpuinitdata unsafe_smp; |
@@ -287,16 +277,14 @@ static int __cpuinitdata unsafe_smp; | |||
287 | /* | 277 | /* |
288 | * Activate a secondary processor. | 278 | * Activate a secondary processor. |
289 | */ | 279 | */ |
290 | static void __cpuinit start_secondary(void *unused) | 280 | notrace static void __cpuinit start_secondary(void *unused) |
291 | { | 281 | { |
292 | /* | 282 | /* |
293 | * Don't put *anything* before cpu_init(), SMP booting is too | 283 | * Don't put *anything* before cpu_init(), SMP booting is too |
294 | * fragile that we want to limit the things done here to the | 284 | * fragile that we want to limit the things done here to the |
295 | * most necessary things. | 285 | * most necessary things. |
296 | */ | 286 | */ |
297 | #ifdef CONFIG_VMI | ||
298 | vmi_bringup(); | 287 | vmi_bringup(); |
299 | #endif | ||
300 | cpu_init(); | 288 | cpu_init(); |
301 | preempt_disable(); | 289 | preempt_disable(); |
302 | smp_callin(); | 290 | smp_callin(); |
@@ -339,7 +327,7 @@ static void __cpuinit start_secondary(void *unused) | |||
339 | ipi_call_lock(); | 327 | ipi_call_lock(); |
340 | lock_vector_lock(); | 328 | lock_vector_lock(); |
341 | __setup_vector_irq(smp_processor_id()); | 329 | __setup_vector_irq(smp_processor_id()); |
342 | cpu_set(smp_processor_id(), cpu_online_map); | 330 | set_cpu_online(smp_processor_id(), true); |
343 | unlock_vector_lock(); | 331 | unlock_vector_lock(); |
344 | ipi_call_unlock(); | 332 | ipi_call_unlock(); |
345 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | 333 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; |
@@ -445,50 +433,52 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
445 | int i; | 433 | int i; |
446 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 434 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
447 | 435 | ||
448 | cpu_set(cpu, cpu_sibling_setup_map); | 436 | cpumask_set_cpu(cpu, cpu_sibling_setup_mask); |
449 | 437 | ||
450 | if (smp_num_siblings > 1) { | 438 | if (smp_num_siblings > 1) { |
451 | for_each_cpu_mask_nr(i, cpu_sibling_setup_map) { | 439 | for_each_cpu(i, cpu_sibling_setup_mask) { |
452 | if (c->phys_proc_id == cpu_data(i).phys_proc_id && | 440 | struct cpuinfo_x86 *o = &cpu_data(i); |
453 | c->cpu_core_id == cpu_data(i).cpu_core_id) { | 441 | |
454 | cpu_set(i, per_cpu(cpu_sibling_map, cpu)); | 442 | if (c->phys_proc_id == o->phys_proc_id && |
455 | cpu_set(cpu, per_cpu(cpu_sibling_map, i)); | 443 | c->cpu_core_id == o->cpu_core_id) { |
456 | cpu_set(i, per_cpu(cpu_core_map, cpu)); | 444 | cpumask_set_cpu(i, cpu_sibling_mask(cpu)); |
457 | cpu_set(cpu, per_cpu(cpu_core_map, i)); | 445 | cpumask_set_cpu(cpu, cpu_sibling_mask(i)); |
458 | cpu_set(i, c->llc_shared_map); | 446 | cpumask_set_cpu(i, cpu_core_mask(cpu)); |
459 | cpu_set(cpu, cpu_data(i).llc_shared_map); | 447 | cpumask_set_cpu(cpu, cpu_core_mask(i)); |
448 | cpumask_set_cpu(i, &c->llc_shared_map); | ||
449 | cpumask_set_cpu(cpu, &o->llc_shared_map); | ||
460 | } | 450 | } |
461 | } | 451 | } |
462 | } else { | 452 | } else { |
463 | cpu_set(cpu, per_cpu(cpu_sibling_map, cpu)); | 453 | cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); |
464 | } | 454 | } |
465 | 455 | ||
466 | cpu_set(cpu, c->llc_shared_map); | 456 | cpumask_set_cpu(cpu, &c->llc_shared_map); |
467 | 457 | ||
468 | if (current_cpu_data.x86_max_cores == 1) { | 458 | if (current_cpu_data.x86_max_cores == 1) { |
469 | per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu); | 459 | cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); |
470 | c->booted_cores = 1; | 460 | c->booted_cores = 1; |
471 | return; | 461 | return; |
472 | } | 462 | } |
473 | 463 | ||
474 | for_each_cpu_mask_nr(i, cpu_sibling_setup_map) { | 464 | for_each_cpu(i, cpu_sibling_setup_mask) { |
475 | if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && | 465 | if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && |
476 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { | 466 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { |
477 | cpu_set(i, c->llc_shared_map); | 467 | cpumask_set_cpu(i, &c->llc_shared_map); |
478 | cpu_set(cpu, cpu_data(i).llc_shared_map); | 468 | cpumask_set_cpu(cpu, &cpu_data(i).llc_shared_map); |
479 | } | 469 | } |
480 | if (c->phys_proc_id == cpu_data(i).phys_proc_id) { | 470 | if (c->phys_proc_id == cpu_data(i).phys_proc_id) { |
481 | cpu_set(i, per_cpu(cpu_core_map, cpu)); | 471 | cpumask_set_cpu(i, cpu_core_mask(cpu)); |
482 | cpu_set(cpu, per_cpu(cpu_core_map, i)); | 472 | cpumask_set_cpu(cpu, cpu_core_mask(i)); |
483 | /* | 473 | /* |
484 | * Does this new cpu bringup a new core? | 474 | * Does this new cpu bringup a new core? |
485 | */ | 475 | */ |
486 | if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) { | 476 | if (cpumask_weight(cpu_sibling_mask(cpu)) == 1) { |
487 | /* | 477 | /* |
488 | * for each core in package, increment | 478 | * for each core in package, increment |
489 | * the booted_cores for this new cpu | 479 | * the booted_cores for this new cpu |
490 | */ | 480 | */ |
491 | if (first_cpu(per_cpu(cpu_sibling_map, i)) == i) | 481 | if (cpumask_first(cpu_sibling_mask(i)) == i) |
492 | c->booted_cores++; | 482 | c->booted_cores++; |
493 | /* | 483 | /* |
494 | * increment the core count for all | 484 | * increment the core count for all |
@@ -503,7 +493,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
503 | } | 493 | } |
504 | 494 | ||
505 | /* maps the cpu to the sched domain representing multi-core */ | 495 | /* maps the cpu to the sched domain representing multi-core */ |
506 | cpumask_t cpu_coregroup_map(int cpu) | 496 | const struct cpumask *cpu_coregroup_mask(int cpu) |
507 | { | 497 | { |
508 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 498 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
509 | /* | 499 | /* |
@@ -511,9 +501,14 @@ cpumask_t cpu_coregroup_map(int cpu) | |||
511 | * And for power savings, we return cpu_core_map | 501 | * And for power savings, we return cpu_core_map |
512 | */ | 502 | */ |
513 | if (sched_mc_power_savings || sched_smt_power_savings) | 503 | if (sched_mc_power_savings || sched_smt_power_savings) |
514 | return per_cpu(cpu_core_map, cpu); | 504 | return cpu_core_mask(cpu); |
515 | else | 505 | else |
516 | return c->llc_shared_map; | 506 | return &c->llc_shared_map; |
507 | } | ||
508 | |||
509 | cpumask_t cpu_coregroup_map(int cpu) | ||
510 | { | ||
511 | return *cpu_coregroup_mask(cpu); | ||
517 | } | 512 | } |
518 | 513 | ||
519 | static void impress_friends(void) | 514 | static void impress_friends(void) |
@@ -525,7 +520,7 @@ static void impress_friends(void) | |||
525 | */ | 520 | */ |
526 | pr_debug("Before bogomips.\n"); | 521 | pr_debug("Before bogomips.\n"); |
527 | for_each_possible_cpu(cpu) | 522 | for_each_possible_cpu(cpu) |
528 | if (cpu_isset(cpu, cpu_callout_map)) | 523 | if (cpumask_test_cpu(cpu, cpu_callout_mask)) |
529 | bogosum += cpu_data(cpu).loops_per_jiffy; | 524 | bogosum += cpu_data(cpu).loops_per_jiffy; |
530 | printk(KERN_INFO | 525 | printk(KERN_INFO |
531 | "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", | 526 | "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", |
@@ -536,7 +531,7 @@ static void impress_friends(void) | |||
536 | pr_debug("Before bogocount - setting activated=1.\n"); | 531 | pr_debug("Before bogocount - setting activated=1.\n"); |
537 | } | 532 | } |
538 | 533 | ||
539 | static inline void __inquire_remote_apic(int apicid) | 534 | void __inquire_remote_apic(int apicid) |
540 | { | 535 | { |
541 | unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; | 536 | unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; |
542 | char *names[] = { "ID", "VERSION", "SPIV" }; | 537 | char *names[] = { "ID", "VERSION", "SPIV" }; |
@@ -575,14 +570,13 @@ static inline void __inquire_remote_apic(int apicid) | |||
575 | } | 570 | } |
576 | } | 571 | } |
577 | 572 | ||
578 | #ifdef WAKE_SECONDARY_VIA_NMI | ||
579 | /* | 573 | /* |
580 | * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal | 574 | * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal |
581 | * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this | 575 | * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this |
582 | * won't ... remember to clear down the APIC, etc later. | 576 | * won't ... remember to clear down the APIC, etc later. |
583 | */ | 577 | */ |
584 | static int __devinit | 578 | int __devinit |
585 | wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | 579 | wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) |
586 | { | 580 | { |
587 | unsigned long send_status, accept_status = 0; | 581 | unsigned long send_status, accept_status = 0; |
588 | int maxlvt; | 582 | int maxlvt; |
@@ -590,7 +584,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | |||
590 | /* Target chip */ | 584 | /* Target chip */ |
591 | /* Boot on the stack */ | 585 | /* Boot on the stack */ |
592 | /* Kick the second */ | 586 | /* Kick the second */ |
593 | apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid); | 587 | apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid); |
594 | 588 | ||
595 | pr_debug("Waiting for send to finish...\n"); | 589 | pr_debug("Waiting for send to finish...\n"); |
596 | send_status = safe_apic_wait_icr_idle(); | 590 | send_status = safe_apic_wait_icr_idle(); |
@@ -599,7 +593,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | |||
599 | * Give the other CPU some time to accept the IPI. | 593 | * Give the other CPU some time to accept the IPI. |
600 | */ | 594 | */ |
601 | udelay(200); | 595 | udelay(200); |
602 | if (APIC_INTEGRATED(apic_version[phys_apicid])) { | 596 | if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { |
603 | maxlvt = lapic_get_maxlvt(); | 597 | maxlvt = lapic_get_maxlvt(); |
604 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ | 598 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
605 | apic_write(APIC_ESR, 0); | 599 | apic_write(APIC_ESR, 0); |
@@ -614,11 +608,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | |||
614 | 608 | ||
615 | return (send_status | accept_status); | 609 | return (send_status | accept_status); |
616 | } | 610 | } |
617 | #endif /* WAKE_SECONDARY_VIA_NMI */ | ||
618 | 611 | ||
619 | #ifdef WAKE_SECONDARY_VIA_INIT | 612 | int __devinit |
620 | static int __devinit | 613 | wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) |
621 | wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | ||
622 | { | 614 | { |
623 | unsigned long send_status, accept_status = 0; | 615 | unsigned long send_status, accept_status = 0; |
624 | int maxlvt, num_starts, j; | 616 | int maxlvt, num_starts, j; |
@@ -737,7 +729,6 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
737 | 729 | ||
738 | return (send_status | accept_status); | 730 | return (send_status | accept_status); |
739 | } | 731 | } |
740 | #endif /* WAKE_SECONDARY_VIA_INIT */ | ||
741 | 732 | ||
742 | struct create_idle { | 733 | struct create_idle { |
743 | struct work_struct work; | 734 | struct work_struct work; |
@@ -755,57 +746,11 @@ static void __cpuinit do_fork_idle(struct work_struct *work) | |||
755 | complete(&c_idle->done); | 746 | complete(&c_idle->done); |
756 | } | 747 | } |
757 | 748 | ||
758 | #ifdef CONFIG_X86_64 | ||
759 | |||
760 | /* __ref because it's safe to call free_bootmem when after_bootmem == 0. */ | ||
761 | static void __ref free_bootmem_pda(struct x8664_pda *oldpda) | ||
762 | { | ||
763 | if (!after_bootmem) | ||
764 | free_bootmem((unsigned long)oldpda, sizeof(*oldpda)); | ||
765 | } | ||
766 | |||
767 | /* | ||
768 | * Allocate node local memory for the AP pda. | ||
769 | * | ||
770 | * Must be called after the _cpu_pda pointer table is initialized. | ||
771 | */ | ||
772 | int __cpuinit get_local_pda(int cpu) | ||
773 | { | ||
774 | struct x8664_pda *oldpda, *newpda; | ||
775 | unsigned long size = sizeof(struct x8664_pda); | ||
776 | int node = cpu_to_node(cpu); | ||
777 | |||
778 | if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem) | ||
779 | return 0; | ||
780 | |||
781 | oldpda = cpu_pda(cpu); | ||
782 | newpda = kmalloc_node(size, GFP_ATOMIC, node); | ||
783 | if (!newpda) { | ||
784 | printk(KERN_ERR "Could not allocate node local PDA " | ||
785 | "for CPU %d on node %d\n", cpu, node); | ||
786 | |||
787 | if (oldpda) | ||
788 | return 0; /* have a usable pda */ | ||
789 | else | ||
790 | return -1; | ||
791 | } | ||
792 | |||
793 | if (oldpda) { | ||
794 | memcpy(newpda, oldpda, size); | ||
795 | free_bootmem_pda(oldpda); | ||
796 | } | ||
797 | |||
798 | newpda->in_bootmem = 0; | ||
799 | cpu_pda(cpu) = newpda; | ||
800 | return 0; | ||
801 | } | ||
802 | #endif /* CONFIG_X86_64 */ | ||
803 | |||
804 | static int __cpuinit do_boot_cpu(int apicid, int cpu) | 749 | static int __cpuinit do_boot_cpu(int apicid, int cpu) |
805 | /* | 750 | /* |
806 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad | 751 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad |
807 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. | 752 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. |
808 | * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. | 753 | * Returns zero if CPU booted OK, else error code from ->wakeup_cpu. |
809 | */ | 754 | */ |
810 | { | 755 | { |
811 | unsigned long boot_error = 0; | 756 | unsigned long boot_error = 0; |
@@ -818,16 +763,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) | |||
818 | }; | 763 | }; |
819 | INIT_WORK(&c_idle.work, do_fork_idle); | 764 | INIT_WORK(&c_idle.work, do_fork_idle); |
820 | 765 | ||
821 | #ifdef CONFIG_X86_64 | ||
822 | /* Allocate node local memory for AP pdas */ | ||
823 | if (cpu > 0) { | ||
824 | boot_error = get_local_pda(cpu); | ||
825 | if (boot_error) | ||
826 | goto restore_state; | ||
827 | /* if can't get pda memory, can't start cpu */ | ||
828 | } | ||
829 | #endif | ||
830 | |||
831 | alternatives_smp_switch(1); | 766 | alternatives_smp_switch(1); |
832 | 767 | ||
833 | c_idle.idle = get_idle_for_cpu(cpu); | 768 | c_idle.idle = get_idle_for_cpu(cpu); |
@@ -857,14 +792,16 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) | |||
857 | 792 | ||
858 | set_idle_for_cpu(cpu, c_idle.idle); | 793 | set_idle_for_cpu(cpu, c_idle.idle); |
859 | do_rest: | 794 | do_rest: |
860 | #ifdef CONFIG_X86_32 | ||
861 | per_cpu(current_task, cpu) = c_idle.idle; | 795 | per_cpu(current_task, cpu) = c_idle.idle; |
862 | init_gdt(cpu); | 796 | #ifdef CONFIG_X86_32 |
863 | /* Stack for startup_32 can be just as for start_secondary onwards */ | 797 | /* Stack for startup_32 can be just as for start_secondary onwards */ |
864 | irq_ctx_init(cpu); | 798 | irq_ctx_init(cpu); |
865 | #else | 799 | #else |
866 | cpu_pda(cpu)->pcurrent = c_idle.idle; | ||
867 | clear_tsk_thread_flag(c_idle.idle, TIF_FORK); | 800 | clear_tsk_thread_flag(c_idle.idle, TIF_FORK); |
801 | initial_gs = per_cpu_offset(cpu); | ||
802 | per_cpu(kernel_stack, cpu) = | ||
803 | (unsigned long)task_stack_page(c_idle.idle) - | ||
804 | KERNEL_STACK_OFFSET + THREAD_SIZE; | ||
868 | #endif | 805 | #endif |
869 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); | 806 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); |
870 | initial_code = (unsigned long)start_secondary; | 807 | initial_code = (unsigned long)start_secondary; |
@@ -888,7 +825,8 @@ do_rest: | |||
888 | 825 | ||
889 | pr_debug("Setting warm reset code and vector.\n"); | 826 | pr_debug("Setting warm reset code and vector.\n"); |
890 | 827 | ||
891 | store_NMI_vector(&nmi_high, &nmi_low); | 828 | if (apic->store_NMI_vector) |
829 | apic->store_NMI_vector(&nmi_high, &nmi_low); | ||
892 | 830 | ||
893 | smpboot_setup_warm_reset_vector(start_ip); | 831 | smpboot_setup_warm_reset_vector(start_ip); |
894 | /* | 832 | /* |
@@ -903,26 +841,26 @@ do_rest: | |||
903 | /* | 841 | /* |
904 | * Starting actual IPI sequence... | 842 | * Starting actual IPI sequence... |
905 | */ | 843 | */ |
906 | boot_error = wakeup_secondary_cpu(apicid, start_ip); | 844 | boot_error = apic->wakeup_cpu(apicid, start_ip); |
907 | 845 | ||
908 | if (!boot_error) { | 846 | if (!boot_error) { |
909 | /* | 847 | /* |
910 | * allow APs to start initializing. | 848 | * allow APs to start initializing. |
911 | */ | 849 | */ |
912 | pr_debug("Before Callout %d.\n", cpu); | 850 | pr_debug("Before Callout %d.\n", cpu); |
913 | cpu_set(cpu, cpu_callout_map); | 851 | cpumask_set_cpu(cpu, cpu_callout_mask); |
914 | pr_debug("After Callout %d.\n", cpu); | 852 | pr_debug("After Callout %d.\n", cpu); |
915 | 853 | ||
916 | /* | 854 | /* |
917 | * Wait 5s total for a response | 855 | * Wait 5s total for a response |
918 | */ | 856 | */ |
919 | for (timeout = 0; timeout < 50000; timeout++) { | 857 | for (timeout = 0; timeout < 50000; timeout++) { |
920 | if (cpu_isset(cpu, cpu_callin_map)) | 858 | if (cpumask_test_cpu(cpu, cpu_callin_mask)) |
921 | break; /* It has booted */ | 859 | break; /* It has booted */ |
922 | udelay(100); | 860 | udelay(100); |
923 | } | 861 | } |
924 | 862 | ||
925 | if (cpu_isset(cpu, cpu_callin_map)) { | 863 | if (cpumask_test_cpu(cpu, cpu_callin_mask)) { |
926 | /* number CPUs logically, starting from 1 (BSP is 0) */ | 864 | /* number CPUs logically, starting from 1 (BSP is 0) */ |
927 | pr_debug("OK.\n"); | 865 | pr_debug("OK.\n"); |
928 | printk(KERN_INFO "CPU%d: ", cpu); | 866 | printk(KERN_INFO "CPU%d: ", cpu); |
@@ -937,19 +875,22 @@ do_rest: | |||
937 | else | 875 | else |
938 | /* trampoline code not run */ | 876 | /* trampoline code not run */ |
939 | printk(KERN_ERR "Not responding.\n"); | 877 | printk(KERN_ERR "Not responding.\n"); |
940 | if (get_uv_system_type() != UV_NON_UNIQUE_APIC) | 878 | if (apic->inquire_remote_apic) |
941 | inquire_remote_apic(apicid); | 879 | apic->inquire_remote_apic(apicid); |
942 | } | 880 | } |
943 | } | 881 | } |
944 | #ifdef CONFIG_X86_64 | 882 | |
945 | restore_state: | ||
946 | #endif | ||
947 | if (boot_error) { | 883 | if (boot_error) { |
948 | /* Try to put things back the way they were before ... */ | 884 | /* Try to put things back the way they were before ... */ |
949 | numa_remove_cpu(cpu); /* was set by numa_add_cpu */ | 885 | numa_remove_cpu(cpu); /* was set by numa_add_cpu */ |
950 | cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ | 886 | |
951 | cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ | 887 | /* was set by do_boot_cpu() */ |
952 | cpu_clear(cpu, cpu_present_map); | 888 | cpumask_clear_cpu(cpu, cpu_callout_mask); |
889 | |||
890 | /* was set by cpu_init() */ | ||
891 | cpumask_clear_cpu(cpu, cpu_initialized_mask); | ||
892 | |||
893 | set_cpu_present(cpu, false); | ||
953 | per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; | 894 | per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; |
954 | } | 895 | } |
955 | 896 | ||
@@ -966,7 +907,7 @@ restore_state: | |||
966 | 907 | ||
967 | int __cpuinit native_cpu_up(unsigned int cpu) | 908 | int __cpuinit native_cpu_up(unsigned int cpu) |
968 | { | 909 | { |
969 | int apicid = cpu_present_to_apicid(cpu); | 910 | int apicid = apic->cpu_present_to_apicid(cpu); |
970 | unsigned long flags; | 911 | unsigned long flags; |
971 | int err; | 912 | int err; |
972 | 913 | ||
@@ -983,7 +924,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
983 | /* | 924 | /* |
984 | * Already booted CPU? | 925 | * Already booted CPU? |
985 | */ | 926 | */ |
986 | if (cpu_isset(cpu, cpu_callin_map)) { | 927 | if (cpumask_test_cpu(cpu, cpu_callin_mask)) { |
987 | pr_debug("do_boot_cpu %d Already started\n", cpu); | 928 | pr_debug("do_boot_cpu %d Already started\n", cpu); |
988 | return -ENOSYS; | 929 | return -ENOSYS; |
989 | } | 930 | } |
@@ -1038,8 +979,9 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
1038 | */ | 979 | */ |
1039 | static __init void disable_smp(void) | 980 | static __init void disable_smp(void) |
1040 | { | 981 | { |
1041 | cpu_present_map = cpumask_of_cpu(0); | 982 | /* use the read/write pointers to the present and possible maps */ |
1042 | cpu_possible_map = cpumask_of_cpu(0); | 983 | cpumask_copy(&cpu_present_map, cpumask_of(0)); |
984 | cpumask_copy(&cpu_possible_map, cpumask_of(0)); | ||
1043 | smpboot_clear_io_apic_irqs(); | 985 | smpboot_clear_io_apic_irqs(); |
1044 | 986 | ||
1045 | if (smp_found_config) | 987 | if (smp_found_config) |
@@ -1047,8 +989,8 @@ static __init void disable_smp(void) | |||
1047 | else | 989 | else |
1048 | physid_set_mask_of_physid(0, &phys_cpu_present_map); | 990 | physid_set_mask_of_physid(0, &phys_cpu_present_map); |
1049 | map_cpu_to_logical_apicid(); | 991 | map_cpu_to_logical_apicid(); |
1050 | cpu_set(0, per_cpu(cpu_sibling_map, 0)); | 992 | cpumask_set_cpu(0, cpu_sibling_mask(0)); |
1051 | cpu_set(0, per_cpu(cpu_core_map, 0)); | 993 | cpumask_set_cpu(0, cpu_core_mask(0)); |
1052 | } | 994 | } |
1053 | 995 | ||
1054 | /* | 996 | /* |
@@ -1058,26 +1000,26 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1058 | { | 1000 | { |
1059 | preempt_disable(); | 1001 | preempt_disable(); |
1060 | 1002 | ||
1061 | #if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) | 1003 | #if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32) |
1062 | if (def_to_bigsmp && nr_cpu_ids > 8) { | 1004 | if (def_to_bigsmp && nr_cpu_ids > 8) { |
1063 | unsigned int cpu; | 1005 | unsigned int cpu; |
1064 | unsigned nr; | 1006 | unsigned nr; |
1065 | 1007 | ||
1066 | printk(KERN_WARNING | 1008 | printk(KERN_WARNING |
1067 | "More than 8 CPUs detected - skipping them.\n" | 1009 | "More than 8 CPUs detected - skipping them.\n" |
1068 | "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); | 1010 | "Use CONFIG_X86_BIGSMP.\n"); |
1069 | 1011 | ||
1070 | nr = 0; | 1012 | nr = 0; |
1071 | for_each_present_cpu(cpu) { | 1013 | for_each_present_cpu(cpu) { |
1072 | if (nr >= 8) | 1014 | if (nr >= 8) |
1073 | cpu_clear(cpu, cpu_present_map); | 1015 | set_cpu_present(cpu, false); |
1074 | nr++; | 1016 | nr++; |
1075 | } | 1017 | } |
1076 | 1018 | ||
1077 | nr = 0; | 1019 | nr = 0; |
1078 | for_each_possible_cpu(cpu) { | 1020 | for_each_possible_cpu(cpu) { |
1079 | if (nr >= 8) | 1021 | if (nr >= 8) |
1080 | cpu_clear(cpu, cpu_possible_map); | 1022 | set_cpu_possible(cpu, false); |
1081 | nr++; | 1023 | nr++; |
1082 | } | 1024 | } |
1083 | 1025 | ||
@@ -1086,8 +1028,10 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1086 | #endif | 1028 | #endif |
1087 | 1029 | ||
1088 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { | 1030 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { |
1089 | printk(KERN_WARNING "weird, boot CPU (#%d) not listed" | 1031 | printk(KERN_WARNING |
1090 | "by the BIOS.\n", hard_smp_processor_id()); | 1032 | "weird, boot CPU (#%d) not listed by the BIOS.\n", |
1033 | hard_smp_processor_id()); | ||
1034 | |||
1091 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); | 1035 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); |
1092 | } | 1036 | } |
1093 | 1037 | ||
@@ -1109,7 +1053,7 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1109 | * Should not be necessary because the MP table should list the boot | 1053 | * Should not be necessary because the MP table should list the boot |
1110 | * CPU too, but we do it for the sake of robustness anyway. | 1054 | * CPU too, but we do it for the sake of robustness anyway. |
1111 | */ | 1055 | */ |
1112 | if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { | 1056 | if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) { |
1113 | printk(KERN_NOTICE | 1057 | printk(KERN_NOTICE |
1114 | "weird, boot CPU (#%d) not listed by the BIOS.\n", | 1058 | "weird, boot CPU (#%d) not listed by the BIOS.\n", |
1115 | boot_cpu_physical_apicid); | 1059 | boot_cpu_physical_apicid); |
@@ -1127,6 +1071,7 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1127 | printk(KERN_ERR "... forcing use of dummy APIC emulation." | 1071 | printk(KERN_ERR "... forcing use of dummy APIC emulation." |
1128 | "(tell your hw vendor)\n"); | 1072 | "(tell your hw vendor)\n"); |
1129 | smpboot_clear_io_apic(); | 1073 | smpboot_clear_io_apic(); |
1074 | arch_disable_smp_support(); | ||
1130 | return -1; | 1075 | return -1; |
1131 | } | 1076 | } |
1132 | 1077 | ||
@@ -1158,7 +1103,7 @@ static void __init smp_cpu_index_default(void) | |||
1158 | for_each_possible_cpu(i) { | 1103 | for_each_possible_cpu(i) { |
1159 | c = &cpu_data(i); | 1104 | c = &cpu_data(i); |
1160 | /* mark all to hotplug */ | 1105 | /* mark all to hotplug */ |
1161 | c->cpu_index = NR_CPUS; | 1106 | c->cpu_index = nr_cpu_ids; |
1162 | } | 1107 | } |
1163 | } | 1108 | } |
1164 | 1109 | ||
@@ -1171,7 +1116,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1171 | preempt_disable(); | 1116 | preempt_disable(); |
1172 | smp_cpu_index_default(); | 1117 | smp_cpu_index_default(); |
1173 | current_cpu_data = boot_cpu_data; | 1118 | current_cpu_data = boot_cpu_data; |
1174 | cpu_callin_map = cpumask_of_cpu(0); | 1119 | cpumask_copy(cpu_callin_mask, cpumask_of(0)); |
1175 | mb(); | 1120 | mb(); |
1176 | /* | 1121 | /* |
1177 | * Setup boot CPU information | 1122 | * Setup boot CPU information |
@@ -1185,7 +1130,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1185 | 1130 | ||
1186 | #ifdef CONFIG_X86_64 | 1131 | #ifdef CONFIG_X86_64 |
1187 | enable_IR_x2apic(); | 1132 | enable_IR_x2apic(); |
1188 | setup_apic_routing(); | 1133 | default_setup_apic_routing(); |
1189 | #endif | 1134 | #endif |
1190 | 1135 | ||
1191 | if (smp_sanity_check(max_cpus) < 0) { | 1136 | if (smp_sanity_check(max_cpus) < 0) { |
@@ -1220,7 +1165,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1220 | 1165 | ||
1221 | map_cpu_to_logical_apicid(); | 1166 | map_cpu_to_logical_apicid(); |
1222 | 1167 | ||
1223 | setup_portio_remap(); | 1168 | if (apic->setup_portio_remap) |
1169 | apic->setup_portio_remap(); | ||
1224 | 1170 | ||
1225 | smpboot_setup_io_apic(); | 1171 | smpboot_setup_io_apic(); |
1226 | /* | 1172 | /* |
@@ -1242,12 +1188,9 @@ out: | |||
1242 | void __init native_smp_prepare_boot_cpu(void) | 1188 | void __init native_smp_prepare_boot_cpu(void) |
1243 | { | 1189 | { |
1244 | int me = smp_processor_id(); | 1190 | int me = smp_processor_id(); |
1245 | #ifdef CONFIG_X86_32 | 1191 | switch_to_new_gdt(me); |
1246 | init_gdt(me); | 1192 | /* already set me in cpu_online_mask in boot_cpu_init() */ |
1247 | #endif | 1193 | cpumask_set_cpu(me, cpu_callout_mask); |
1248 | switch_to_new_gdt(); | ||
1249 | /* already set me in cpu_online_map in boot_cpu_init() */ | ||
1250 | cpu_set(me, cpu_callout_map); | ||
1251 | per_cpu(cpu_state, me) = CPU_ONLINE; | 1194 | per_cpu(cpu_state, me) = CPU_ONLINE; |
1252 | } | 1195 | } |
1253 | 1196 | ||
@@ -1263,6 +1206,15 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1263 | check_nmi_watchdog(); | 1206 | check_nmi_watchdog(); |
1264 | } | 1207 | } |
1265 | 1208 | ||
1209 | static int __initdata setup_possible_cpus = -1; | ||
1210 | static int __init _setup_possible_cpus(char *str) | ||
1211 | { | ||
1212 | get_option(&str, &setup_possible_cpus); | ||
1213 | return 0; | ||
1214 | } | ||
1215 | early_param("possible_cpus", _setup_possible_cpus); | ||
1216 | |||
1217 | |||
1266 | /* | 1218 | /* |
1267 | * cpu_possible_map should be static, it cannot change as cpu's | 1219 | * cpu_possible_map should be static, it cannot change as cpu's |
1268 | * are onlined, or offlined. The reason is per-cpu data-structures | 1220 | * are onlined, or offlined. The reason is per-cpu data-structures |
@@ -1275,7 +1227,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1275 | * | 1227 | * |
1276 | * Three ways to find out the number of additional hotplug CPUs: | 1228 | * Three ways to find out the number of additional hotplug CPUs: |
1277 | * - If the BIOS specified disabled CPUs in ACPI/mptables use that. | 1229 | * - If the BIOS specified disabled CPUs in ACPI/mptables use that. |
1278 | * - The user can overwrite it with additional_cpus=NUM | 1230 | * - The user can overwrite it with possible_cpus=NUM |
1279 | * - Otherwise don't reserve additional CPUs. | 1231 | * - Otherwise don't reserve additional CPUs. |
1280 | * We do this because additional CPUs waste a lot of memory. | 1232 | * We do this because additional CPUs waste a lot of memory. |
1281 | * -AK | 1233 | * -AK |
@@ -1288,15 +1240,25 @@ __init void prefill_possible_map(void) | |||
1288 | if (!num_processors) | 1240 | if (!num_processors) |
1289 | num_processors = 1; | 1241 | num_processors = 1; |
1290 | 1242 | ||
1291 | possible = num_processors + disabled_cpus; | 1243 | if (setup_possible_cpus == -1) |
1292 | if (possible > NR_CPUS) | 1244 | possible = num_processors + disabled_cpus; |
1293 | possible = NR_CPUS; | 1245 | else |
1246 | possible = setup_possible_cpus; | ||
1247 | |||
1248 | total_cpus = max_t(int, possible, num_processors + disabled_cpus); | ||
1249 | |||
1250 | if (possible > CONFIG_NR_CPUS) { | ||
1251 | printk(KERN_WARNING | ||
1252 | "%d Processors exceeds NR_CPUS limit of %d\n", | ||
1253 | possible, CONFIG_NR_CPUS); | ||
1254 | possible = CONFIG_NR_CPUS; | ||
1255 | } | ||
1294 | 1256 | ||
1295 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", | 1257 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", |
1296 | possible, max_t(int, possible - num_processors, 0)); | 1258 | possible, max_t(int, possible - num_processors, 0)); |
1297 | 1259 | ||
1298 | for (i = 0; i < possible; i++) | 1260 | for (i = 0; i < possible; i++) |
1299 | cpu_set(i, cpu_possible_map); | 1261 | set_cpu_possible(i, true); |
1300 | 1262 | ||
1301 | nr_cpu_ids = possible; | 1263 | nr_cpu_ids = possible; |
1302 | } | 1264 | } |
@@ -1308,31 +1270,31 @@ static void remove_siblinginfo(int cpu) | |||
1308 | int sibling; | 1270 | int sibling; |
1309 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 1271 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
1310 | 1272 | ||
1311 | for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) { | 1273 | for_each_cpu(sibling, cpu_core_mask(cpu)) { |
1312 | cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); | 1274 | cpumask_clear_cpu(cpu, cpu_core_mask(sibling)); |
1313 | /*/ | 1275 | /*/ |
1314 | * last thread sibling in this cpu core going down | 1276 | * last thread sibling in this cpu core going down |
1315 | */ | 1277 | */ |
1316 | if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) | 1278 | if (cpumask_weight(cpu_sibling_mask(cpu)) == 1) |
1317 | cpu_data(sibling).booted_cores--; | 1279 | cpu_data(sibling).booted_cores--; |
1318 | } | 1280 | } |
1319 | 1281 | ||
1320 | for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu)) | 1282 | for_each_cpu(sibling, cpu_sibling_mask(cpu)) |
1321 | cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); | 1283 | cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling)); |
1322 | cpus_clear(per_cpu(cpu_sibling_map, cpu)); | 1284 | cpumask_clear(cpu_sibling_mask(cpu)); |
1323 | cpus_clear(per_cpu(cpu_core_map, cpu)); | 1285 | cpumask_clear(cpu_core_mask(cpu)); |
1324 | c->phys_proc_id = 0; | 1286 | c->phys_proc_id = 0; |
1325 | c->cpu_core_id = 0; | 1287 | c->cpu_core_id = 0; |
1326 | cpu_clear(cpu, cpu_sibling_setup_map); | 1288 | cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); |
1327 | } | 1289 | } |
1328 | 1290 | ||
1329 | static void __ref remove_cpu_from_maps(int cpu) | 1291 | static void __ref remove_cpu_from_maps(int cpu) |
1330 | { | 1292 | { |
1331 | cpu_clear(cpu, cpu_online_map); | 1293 | set_cpu_online(cpu, false); |
1332 | cpu_clear(cpu, cpu_callout_map); | 1294 | cpumask_clear_cpu(cpu, cpu_callout_mask); |
1333 | cpu_clear(cpu, cpu_callin_map); | 1295 | cpumask_clear_cpu(cpu, cpu_callin_mask); |
1334 | /* was set by cpu_init() */ | 1296 | /* was set by cpu_init() */ |
1335 | cpu_clear(cpu, cpu_initialized); | 1297 | cpumask_clear_cpu(cpu, cpu_initialized_mask); |
1336 | numa_remove_cpu(cpu); | 1298 | numa_remove_cpu(cpu); |
1337 | } | 1299 | } |
1338 | 1300 | ||
@@ -1355,7 +1317,7 @@ void cpu_disable_common(void) | |||
1355 | lock_vector_lock(); | 1317 | lock_vector_lock(); |
1356 | remove_cpu_from_maps(cpu); | 1318 | remove_cpu_from_maps(cpu); |
1357 | unlock_vector_lock(); | 1319 | unlock_vector_lock(); |
1358 | fixup_irqs(cpu_online_map); | 1320 | fixup_irqs(); |
1359 | } | 1321 | } |
1360 | 1322 | ||
1361 | int native_cpu_disable(void) | 1323 | int native_cpu_disable(void) |
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c deleted file mode 100644 index 397e309839dd..000000000000 --- a/arch/x86/kernel/smpcommon.c +++ /dev/null | |||
@@ -1,30 +0,0 @@ | |||
1 | /* | ||
2 | * SMP stuff which is common to all sub-architectures. | ||
3 | */ | ||
4 | #include <linux/module.h> | ||
5 | #include <asm/smp.h> | ||
6 | |||
7 | #ifdef CONFIG_X86_32 | ||
8 | DEFINE_PER_CPU(unsigned long, this_cpu_off); | ||
9 | EXPORT_PER_CPU_SYMBOL(this_cpu_off); | ||
10 | |||
11 | /* | ||
12 | * Initialize the CPU's GDT. This is either the boot CPU doing itself | ||
13 | * (still using the master per-cpu area), or a CPU doing it for a | ||
14 | * secondary which will soon come up. | ||
15 | */ | ||
16 | __cpuinit void init_gdt(int cpu) | ||
17 | { | ||
18 | struct desc_struct gdt; | ||
19 | |||
20 | pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF, | ||
21 | 0x2 | DESCTYPE_S, 0x8); | ||
22 | gdt.s = 1; | ||
23 | |||
24 | write_gdt_entry(get_cpu_gdt_table(cpu), | ||
25 | GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); | ||
26 | |||
27 | per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; | ||
28 | per_cpu(cpu_number, cpu) = cpu; | ||
29 | } | ||
30 | #endif | ||
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index a03e7f6d90c3..f7bddc2e37d1 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -1,11 +1,12 @@ | |||
1 | /* | 1 | /* |
2 | * Stack trace management functions | 2 | * Stack trace management functions |
3 | * | 3 | * |
4 | * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | 4 | * Copyright (C) 2006-2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> |
5 | */ | 5 | */ |
6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
7 | #include <linux/stacktrace.h> | 7 | #include <linux/stacktrace.h> |
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/uaccess.h> | ||
9 | #include <asm/stacktrace.h> | 10 | #include <asm/stacktrace.h> |
10 | 11 | ||
11 | static void save_stack_warning(void *data, char *msg) | 12 | static void save_stack_warning(void *data, char *msg) |
@@ -83,3 +84,66 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) | |||
83 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 84 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
84 | } | 85 | } |
85 | EXPORT_SYMBOL_GPL(save_stack_trace_tsk); | 86 | EXPORT_SYMBOL_GPL(save_stack_trace_tsk); |
87 | |||
88 | /* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ | ||
89 | |||
90 | struct stack_frame { | ||
91 | const void __user *next_fp; | ||
92 | unsigned long ret_addr; | ||
93 | }; | ||
94 | |||
95 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | ||
96 | { | ||
97 | int ret; | ||
98 | |||
99 | if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) | ||
100 | return 0; | ||
101 | |||
102 | ret = 1; | ||
103 | pagefault_disable(); | ||
104 | if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) | ||
105 | ret = 0; | ||
106 | pagefault_enable(); | ||
107 | |||
108 | return ret; | ||
109 | } | ||
110 | |||
111 | static inline void __save_stack_trace_user(struct stack_trace *trace) | ||
112 | { | ||
113 | const struct pt_regs *regs = task_pt_regs(current); | ||
114 | const void __user *fp = (const void __user *)regs->bp; | ||
115 | |||
116 | if (trace->nr_entries < trace->max_entries) | ||
117 | trace->entries[trace->nr_entries++] = regs->ip; | ||
118 | |||
119 | while (trace->nr_entries < trace->max_entries) { | ||
120 | struct stack_frame frame; | ||
121 | |||
122 | frame.next_fp = NULL; | ||
123 | frame.ret_addr = 0; | ||
124 | if (!copy_stack_frame(fp, &frame)) | ||
125 | break; | ||
126 | if ((unsigned long)fp < regs->sp) | ||
127 | break; | ||
128 | if (frame.ret_addr) { | ||
129 | trace->entries[trace->nr_entries++] = | ||
130 | frame.ret_addr; | ||
131 | } | ||
132 | if (fp == frame.next_fp) | ||
133 | break; | ||
134 | fp = frame.next_fp; | ||
135 | } | ||
136 | } | ||
137 | |||
138 | void save_stack_trace_user(struct stack_trace *trace) | ||
139 | { | ||
140 | /* | ||
141 | * Trace user stack if we are not a kernel thread | ||
142 | */ | ||
143 | if (current->mm) { | ||
144 | __save_stack_trace_user(trace); | ||
145 | } | ||
146 | if (trace->nr_entries < trace->max_entries) | ||
147 | trace->entries[trace->nr_entries++] = ULONG_MAX; | ||
148 | } | ||
149 | |||
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 7b987852e876..1e733eff9b33 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c | |||
@@ -30,8 +30,364 @@ | |||
30 | #include <linux/init.h> | 30 | #include <linux/init.h> |
31 | #include <asm/io.h> | 31 | #include <asm/io.h> |
32 | #include <asm/bios_ebda.h> | 32 | #include <asm/bios_ebda.h> |
33 | #include <asm/summit/mpparse.h> | ||
34 | 33 | ||
34 | /* | ||
35 | * APIC driver for the IBM "Summit" chipset. | ||
36 | */ | ||
37 | #define APIC_DEFINITION 1 | ||
38 | #include <linux/threads.h> | ||
39 | #include <linux/cpumask.h> | ||
40 | #include <asm/mpspec.h> | ||
41 | #include <asm/apic.h> | ||
42 | #include <asm/smp.h> | ||
43 | #include <asm/genapic.h> | ||
44 | #include <asm/fixmap.h> | ||
45 | #include <asm/apicdef.h> | ||
46 | #include <asm/ipi.h> | ||
47 | #include <linux/kernel.h> | ||
48 | #include <linux/string.h> | ||
49 | #include <linux/init.h> | ||
50 | #include <linux/gfp.h> | ||
51 | #include <linux/smp.h> | ||
52 | |||
53 | static inline unsigned summit_get_apic_id(unsigned long x) | ||
54 | { | ||
55 | return (x >> 24) & 0xFF; | ||
56 | } | ||
57 | |||
58 | static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector) | ||
59 | { | ||
60 | default_send_IPI_mask_sequence_logical(mask, vector); | ||
61 | } | ||
62 | |||
63 | static inline void summit_send_IPI_allbutself(int vector) | ||
64 | { | ||
65 | cpumask_t mask = cpu_online_map; | ||
66 | cpu_clear(smp_processor_id(), mask); | ||
67 | |||
68 | if (!cpus_empty(mask)) | ||
69 | summit_send_IPI_mask(&mask, vector); | ||
70 | } | ||
71 | |||
72 | static inline void summit_send_IPI_all(int vector) | ||
73 | { | ||
74 | summit_send_IPI_mask(&cpu_online_map, vector); | ||
75 | } | ||
76 | |||
77 | #include <asm/tsc.h> | ||
78 | |||
79 | extern int use_cyclone; | ||
80 | |||
81 | #ifdef CONFIG_X86_SUMMIT_NUMA | ||
82 | extern void setup_summit(void); | ||
83 | #else | ||
84 | #define setup_summit() {} | ||
85 | #endif | ||
86 | |||
87 | static inline int | ||
88 | summit_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) | ||
89 | { | ||
90 | if (!strncmp(oem, "IBM ENSW", 8) && | ||
91 | (!strncmp(productid, "VIGIL SMP", 9) | ||
92 | || !strncmp(productid, "EXA", 3) | ||
93 | || !strncmp(productid, "RUTHLESS SMP", 12))){ | ||
94 | mark_tsc_unstable("Summit based system"); | ||
95 | use_cyclone = 1; /*enable cyclone-timer*/ | ||
96 | setup_summit(); | ||
97 | return 1; | ||
98 | } | ||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | /* Hook from generic ACPI tables.c */ | ||
103 | static inline int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
104 | { | ||
105 | if (!strncmp(oem_id, "IBM", 3) && | ||
106 | (!strncmp(oem_table_id, "SERVIGIL", 8) | ||
107 | || !strncmp(oem_table_id, "EXA", 3))){ | ||
108 | mark_tsc_unstable("Summit based system"); | ||
109 | use_cyclone = 1; /*enable cyclone-timer*/ | ||
110 | setup_summit(); | ||
111 | return 1; | ||
112 | } | ||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | struct rio_table_hdr { | ||
117 | unsigned char version; /* Version number of this data structure */ | ||
118 | /* Version 3 adds chassis_num & WP_index */ | ||
119 | unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */ | ||
120 | unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */ | ||
121 | } __attribute__((packed)); | ||
122 | |||
123 | struct scal_detail { | ||
124 | unsigned char node_id; /* Scalability Node ID */ | ||
125 | unsigned long CBAR; /* Address of 1MB register space */ | ||
126 | unsigned char port0node; /* Node ID port connected to: 0xFF=None */ | ||
127 | unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ | ||
128 | unsigned char port1node; /* Node ID port connected to: 0xFF = None */ | ||
129 | unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ | ||
130 | unsigned char port2node; /* Node ID port connected to: 0xFF = None */ | ||
131 | unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */ | ||
132 | unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */ | ||
133 | } __attribute__((packed)); | ||
134 | |||
135 | struct rio_detail { | ||
136 | unsigned char node_id; /* RIO Node ID */ | ||
137 | unsigned long BBAR; /* Address of 1MB register space */ | ||
138 | unsigned char type; /* Type of device */ | ||
139 | unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/ | ||
140 | /* For CYC: Node ID of Twister that owns this CYC */ | ||
141 | unsigned char port0node; /* Node ID port connected to: 0xFF=None */ | ||
142 | unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ | ||
143 | unsigned char port1node; /* Node ID port connected to: 0xFF=None */ | ||
144 | unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ | ||
145 | unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */ | ||
146 | /* For CYC: 0 */ | ||
147 | unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */ | ||
148 | /* = 0 : the XAPIC is not used, ie:*/ | ||
149 | /* ints fwded to another XAPIC */ | ||
150 | /* Bits1:7 Reserved */ | ||
151 | /* For CYC: Bits0:7 Reserved */ | ||
152 | unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */ | ||
153 | /* lower slot numbers/PCI bus numbers */ | ||
154 | /* For CYC: No meaning */ | ||
155 | unsigned char chassis_num; /* 1 based Chassis number */ | ||
156 | /* For LookOut WPEGs this field indicates the */ | ||
157 | /* Expansion Chassis #, enumerated from Boot */ | ||
158 | /* Node WPEG external port, then Boot Node CYC */ | ||
159 | /* external port, then Next Vigil chassis WPEG */ | ||
160 | /* external port, etc. */ | ||
161 | /* Shared Lookouts have only 1 chassis number (the */ | ||
162 | /* first one assigned) */ | ||
163 | } __attribute__((packed)); | ||
164 | |||
165 | |||
166 | typedef enum { | ||
167 | CompatTwister = 0, /* Compatibility Twister */ | ||
168 | AltTwister = 1, /* Alternate Twister of internal 8-way */ | ||
169 | CompatCyclone = 2, /* Compatibility Cyclone */ | ||
170 | AltCyclone = 3, /* Alternate Cyclone of internal 8-way */ | ||
171 | CompatWPEG = 4, /* Compatibility WPEG */ | ||
172 | AltWPEG = 5, /* Second Planar WPEG */ | ||
173 | LookOutAWPEG = 6, /* LookOut WPEG */ | ||
174 | LookOutBWPEG = 7, /* LookOut WPEG */ | ||
175 | } node_type; | ||
176 | |||
177 | static inline int is_WPEG(struct rio_detail *rio){ | ||
178 | return (rio->type == CompatWPEG || rio->type == AltWPEG || | ||
179 | rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); | ||
180 | } | ||
181 | |||
182 | |||
183 | /* In clustered mode, the high nibble of APIC ID is a cluster number. | ||
184 | * The low nibble is a 4-bit bitmap. */ | ||
185 | #define XAPIC_DEST_CPUS_SHIFT 4 | ||
186 | #define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) | ||
187 | #define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) | ||
188 | |||
189 | #define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER) | ||
190 | |||
191 | static inline const cpumask_t *summit_target_cpus(void) | ||
192 | { | ||
193 | /* CPU_MASK_ALL (0xff) has undefined behaviour with | ||
194 | * dest_LowestPrio mode logical clustered apic interrupt routing | ||
195 | * Just start on cpu 0. IRQ balancing will spread load | ||
196 | */ | ||
197 | return &cpumask_of_cpu(0); | ||
198 | } | ||
199 | |||
200 | static inline unsigned long | ||
201 | summit_check_apicid_used(physid_mask_t bitmap, int apicid) | ||
202 | { | ||
203 | return 0; | ||
204 | } | ||
205 | |||
206 | /* we don't use the phys_cpu_present_map to indicate apicid presence */ | ||
207 | static inline unsigned long summit_check_apicid_present(int bit) | ||
208 | { | ||
209 | return 1; | ||
210 | } | ||
211 | |||
212 | #define apicid_cluster(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK) | ||
213 | |||
214 | extern u8 cpu_2_logical_apicid[]; | ||
215 | |||
216 | static inline void summit_init_apic_ldr(void) | ||
217 | { | ||
218 | unsigned long val, id; | ||
219 | int count = 0; | ||
220 | u8 my_id = (u8)hard_smp_processor_id(); | ||
221 | u8 my_cluster = (u8)apicid_cluster(my_id); | ||
222 | #ifdef CONFIG_SMP | ||
223 | u8 lid; | ||
224 | int i; | ||
225 | |||
226 | /* Create logical APIC IDs by counting CPUs already in cluster. */ | ||
227 | for (count = 0, i = nr_cpu_ids; --i >= 0; ) { | ||
228 | lid = cpu_2_logical_apicid[i]; | ||
229 | if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster) | ||
230 | ++count; | ||
231 | } | ||
232 | #endif | ||
233 | /* We only have a 4 wide bitmap in cluster mode. If a deranged | ||
234 | * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ | ||
235 | BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); | ||
236 | id = my_cluster | (1UL << count); | ||
237 | apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE); | ||
238 | val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; | ||
239 | val |= SET_APIC_LOGICAL_ID(id); | ||
240 | apic_write(APIC_LDR, val); | ||
241 | } | ||
242 | |||
243 | static inline int summit_apic_id_registered(void) | ||
244 | { | ||
245 | return 1; | ||
246 | } | ||
247 | |||
248 | static inline void summit_setup_apic_routing(void) | ||
249 | { | ||
250 | printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", | ||
251 | nr_ioapics); | ||
252 | } | ||
253 | |||
254 | static inline int summit_apicid_to_node(int logical_apicid) | ||
255 | { | ||
256 | #ifdef CONFIG_SMP | ||
257 | return apicid_2_node[hard_smp_processor_id()]; | ||
258 | #else | ||
259 | return 0; | ||
260 | #endif | ||
261 | } | ||
262 | |||
263 | /* Mapping from cpu number to logical apicid */ | ||
264 | static inline int summit_cpu_to_logical_apicid(int cpu) | ||
265 | { | ||
266 | #ifdef CONFIG_SMP | ||
267 | if (cpu >= nr_cpu_ids) | ||
268 | return BAD_APICID; | ||
269 | return (int)cpu_2_logical_apicid[cpu]; | ||
270 | #else | ||
271 | return logical_smp_processor_id(); | ||
272 | #endif | ||
273 | } | ||
274 | |||
275 | static inline int summit_cpu_present_to_apicid(int mps_cpu) | ||
276 | { | ||
277 | if (mps_cpu < nr_cpu_ids) | ||
278 | return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); | ||
279 | else | ||
280 | return BAD_APICID; | ||
281 | } | ||
282 | |||
283 | static inline physid_mask_t | ||
284 | summit_ioapic_phys_id_map(physid_mask_t phys_id_map) | ||
285 | { | ||
286 | /* For clustered we don't have a good way to do this yet - hack */ | ||
287 | return physids_promote(0x0F); | ||
288 | } | ||
289 | |||
290 | static inline physid_mask_t summit_apicid_to_cpu_present(int apicid) | ||
291 | { | ||
292 | return physid_mask_of_physid(0); | ||
293 | } | ||
294 | |||
295 | static inline void summit_setup_portio_remap(void) | ||
296 | { | ||
297 | } | ||
298 | |||
299 | static inline int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) | ||
300 | { | ||
301 | return 1; | ||
302 | } | ||
303 | |||
304 | static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) | ||
305 | { | ||
306 | int cpus_found = 0; | ||
307 | int num_bits_set; | ||
308 | int apicid; | ||
309 | int cpu; | ||
310 | |||
311 | num_bits_set = cpus_weight(*cpumask); | ||
312 | /* Return id to all */ | ||
313 | if (num_bits_set >= nr_cpu_ids) | ||
314 | return 0xFF; | ||
315 | /* | ||
316 | * The cpus in the mask must all be on the apic cluster. If are not | ||
317 | * on the same apicid cluster return default value of target_cpus(): | ||
318 | */ | ||
319 | cpu = first_cpu(*cpumask); | ||
320 | apicid = summit_cpu_to_logical_apicid(cpu); | ||
321 | |||
322 | while (cpus_found < num_bits_set) { | ||
323 | if (cpu_isset(cpu, *cpumask)) { | ||
324 | int new_apicid = summit_cpu_to_logical_apicid(cpu); | ||
325 | |||
326 | if (apicid_cluster(apicid) != | ||
327 | apicid_cluster(new_apicid)) { | ||
328 | printk ("%s: Not a valid mask!\n", __func__); | ||
329 | |||
330 | return 0xFF; | ||
331 | } | ||
332 | apicid = apicid | new_apicid; | ||
333 | cpus_found++; | ||
334 | } | ||
335 | cpu++; | ||
336 | } | ||
337 | return apicid; | ||
338 | } | ||
339 | |||
340 | static inline unsigned int | ||
341 | summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, | ||
342 | const struct cpumask *andmask) | ||
343 | { | ||
344 | int apicid = summit_cpu_to_logical_apicid(0); | ||
345 | cpumask_var_t cpumask; | ||
346 | |||
347 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) | ||
348 | return apicid; | ||
349 | |||
350 | cpumask_and(cpumask, inmask, andmask); | ||
351 | cpumask_and(cpumask, cpumask, cpu_online_mask); | ||
352 | apicid = summit_cpu_mask_to_apicid(cpumask); | ||
353 | |||
354 | free_cpumask_var(cpumask); | ||
355 | |||
356 | return apicid; | ||
357 | } | ||
358 | |||
359 | /* | ||
360 | * cpuid returns the value latched in the HW at reset, not the APIC ID | ||
361 | * register's value. For any box whose BIOS changes APIC IDs, like | ||
362 | * clustered APIC systems, we must use hard_smp_processor_id. | ||
363 | * | ||
364 | * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. | ||
365 | */ | ||
366 | static inline int summit_phys_pkg_id(int cpuid_apic, int index_msb) | ||
367 | { | ||
368 | return hard_smp_processor_id() >> index_msb; | ||
369 | } | ||
370 | |||
371 | static int probe_summit(void) | ||
372 | { | ||
373 | /* probed later in mptable/ACPI hooks */ | ||
374 | return 0; | ||
375 | } | ||
376 | |||
377 | static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask) | ||
378 | { | ||
379 | /* Careful. Some cpus do not strictly honor the set of cpus | ||
380 | * specified in the interrupt destination when using lowest | ||
381 | * priority interrupt delivery mode. | ||
382 | * | ||
383 | * In particular there was a hyperthreading cpu observed to | ||
384 | * deliver interrupts to the wrong hyperthread when only one | ||
385 | * hyperthread was specified in the interrupt desitination. | ||
386 | */ | ||
387 | *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; | ||
388 | } | ||
389 | |||
390 | #ifdef CONFIG_X86_SUMMIT_NUMA | ||
35 | static struct rio_table_hdr *rio_table_hdr __initdata; | 391 | static struct rio_table_hdr *rio_table_hdr __initdata; |
36 | static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; | 392 | static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; |
37 | static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; | 393 | static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; |
@@ -186,3 +542,61 @@ void __init setup_summit(void) | |||
186 | next_wpeg = 0; | 542 | next_wpeg = 0; |
187 | } while (next_wpeg != 0); | 543 | } while (next_wpeg != 0); |
188 | } | 544 | } |
545 | #endif | ||
546 | |||
547 | struct genapic apic_summit = { | ||
548 | |||
549 | .name = "summit", | ||
550 | .probe = probe_summit, | ||
551 | .acpi_madt_oem_check = summit_acpi_madt_oem_check, | ||
552 | .apic_id_registered = summit_apic_id_registered, | ||
553 | |||
554 | .irq_delivery_mode = dest_LowestPrio, | ||
555 | /* logical delivery broadcast to all CPUs: */ | ||
556 | .irq_dest_mode = 1, | ||
557 | |||
558 | .target_cpus = summit_target_cpus, | ||
559 | .disable_esr = 1, | ||
560 | .dest_logical = APIC_DEST_LOGICAL, | ||
561 | .check_apicid_used = summit_check_apicid_used, | ||
562 | .check_apicid_present = summit_check_apicid_present, | ||
563 | |||
564 | .vector_allocation_domain = summit_vector_allocation_domain, | ||
565 | .init_apic_ldr = summit_init_apic_ldr, | ||
566 | |||
567 | .ioapic_phys_id_map = summit_ioapic_phys_id_map, | ||
568 | .setup_apic_routing = summit_setup_apic_routing, | ||
569 | .multi_timer_check = NULL, | ||
570 | .apicid_to_node = summit_apicid_to_node, | ||
571 | .cpu_to_logical_apicid = summit_cpu_to_logical_apicid, | ||
572 | .cpu_present_to_apicid = summit_cpu_present_to_apicid, | ||
573 | .apicid_to_cpu_present = summit_apicid_to_cpu_present, | ||
574 | .setup_portio_remap = NULL, | ||
575 | .check_phys_apicid_present = summit_check_phys_apicid_present, | ||
576 | .enable_apic_mode = NULL, | ||
577 | .phys_pkg_id = summit_phys_pkg_id, | ||
578 | .mps_oem_check = summit_mps_oem_check, | ||
579 | |||
580 | .get_apic_id = summit_get_apic_id, | ||
581 | .set_apic_id = NULL, | ||
582 | .apic_id_mask = 0xFF << 24, | ||
583 | |||
584 | .cpu_mask_to_apicid = summit_cpu_mask_to_apicid, | ||
585 | .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, | ||
586 | |||
587 | .send_IPI_mask = summit_send_IPI_mask, | ||
588 | .send_IPI_mask_allbutself = NULL, | ||
589 | .send_IPI_allbutself = summit_send_IPI_allbutself, | ||
590 | .send_IPI_all = summit_send_IPI_all, | ||
591 | .send_IPI_self = default_send_IPI_self, | ||
592 | |||
593 | .wakeup_cpu = NULL, | ||
594 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
595 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
596 | |||
597 | .wait_for_init_deassert = default_wait_for_init_deassert, | ||
598 | |||
599 | .smp_callin_clear_local_apic = NULL, | ||
600 | .store_NMI_vector = NULL, | ||
601 | .inquire_remote_apic = default_inquire_remote_apic, | ||
602 | }; | ||
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index d44395ff34c3..3bdb64829b82 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -1,7 +1,7 @@ | |||
1 | ENTRY(sys_call_table) | 1 | ENTRY(sys_call_table) |
2 | .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ | 2 | .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ |
3 | .long sys_exit | 3 | .long sys_exit |
4 | .long sys_fork | 4 | .long ptregs_fork |
5 | .long sys_read | 5 | .long sys_read |
6 | .long sys_write | 6 | .long sys_write |
7 | .long sys_open /* 5 */ | 7 | .long sys_open /* 5 */ |
@@ -10,7 +10,7 @@ ENTRY(sys_call_table) | |||
10 | .long sys_creat | 10 | .long sys_creat |
11 | .long sys_link | 11 | .long sys_link |
12 | .long sys_unlink /* 10 */ | 12 | .long sys_unlink /* 10 */ |
13 | .long sys_execve | 13 | .long ptregs_execve |
14 | .long sys_chdir | 14 | .long sys_chdir |
15 | .long sys_time | 15 | .long sys_time |
16 | .long sys_mknod | 16 | .long sys_mknod |
@@ -88,7 +88,7 @@ ENTRY(sys_call_table) | |||
88 | .long sys_uselib | 88 | .long sys_uselib |
89 | .long sys_swapon | 89 | .long sys_swapon |
90 | .long sys_reboot | 90 | .long sys_reboot |
91 | .long old_readdir | 91 | .long sys_old_readdir |
92 | .long old_mmap /* 90 */ | 92 | .long old_mmap /* 90 */ |
93 | .long sys_munmap | 93 | .long sys_munmap |
94 | .long sys_truncate | 94 | .long sys_truncate |
@@ -109,17 +109,17 @@ ENTRY(sys_call_table) | |||
109 | .long sys_newlstat | 109 | .long sys_newlstat |
110 | .long sys_newfstat | 110 | .long sys_newfstat |
111 | .long sys_uname | 111 | .long sys_uname |
112 | .long sys_iopl /* 110 */ | 112 | .long ptregs_iopl /* 110 */ |
113 | .long sys_vhangup | 113 | .long sys_vhangup |
114 | .long sys_ni_syscall /* old "idle" system call */ | 114 | .long sys_ni_syscall /* old "idle" system call */ |
115 | .long sys_vm86old | 115 | .long ptregs_vm86old |
116 | .long sys_wait4 | 116 | .long sys_wait4 |
117 | .long sys_swapoff /* 115 */ | 117 | .long sys_swapoff /* 115 */ |
118 | .long sys_sysinfo | 118 | .long sys_sysinfo |
119 | .long sys_ipc | 119 | .long sys_ipc |
120 | .long sys_fsync | 120 | .long sys_fsync |
121 | .long sys_sigreturn | 121 | .long ptregs_sigreturn |
122 | .long sys_clone /* 120 */ | 122 | .long ptregs_clone /* 120 */ |
123 | .long sys_setdomainname | 123 | .long sys_setdomainname |
124 | .long sys_newuname | 124 | .long sys_newuname |
125 | .long sys_modify_ldt | 125 | .long sys_modify_ldt |
@@ -165,14 +165,14 @@ ENTRY(sys_call_table) | |||
165 | .long sys_mremap | 165 | .long sys_mremap |
166 | .long sys_setresuid16 | 166 | .long sys_setresuid16 |
167 | .long sys_getresuid16 /* 165 */ | 167 | .long sys_getresuid16 /* 165 */ |
168 | .long sys_vm86 | 168 | .long ptregs_vm86 |
169 | .long sys_ni_syscall /* Old sys_query_module */ | 169 | .long sys_ni_syscall /* Old sys_query_module */ |
170 | .long sys_poll | 170 | .long sys_poll |
171 | .long sys_nfsservctl | 171 | .long sys_nfsservctl |
172 | .long sys_setresgid16 /* 170 */ | 172 | .long sys_setresgid16 /* 170 */ |
173 | .long sys_getresgid16 | 173 | .long sys_getresgid16 |
174 | .long sys_prctl | 174 | .long sys_prctl |
175 | .long sys_rt_sigreturn | 175 | .long ptregs_rt_sigreturn |
176 | .long sys_rt_sigaction | 176 | .long sys_rt_sigaction |
177 | .long sys_rt_sigprocmask /* 175 */ | 177 | .long sys_rt_sigprocmask /* 175 */ |
178 | .long sys_rt_sigpending | 178 | .long sys_rt_sigpending |
@@ -185,11 +185,11 @@ ENTRY(sys_call_table) | |||
185 | .long sys_getcwd | 185 | .long sys_getcwd |
186 | .long sys_capget | 186 | .long sys_capget |
187 | .long sys_capset /* 185 */ | 187 | .long sys_capset /* 185 */ |
188 | .long sys_sigaltstack | 188 | .long ptregs_sigaltstack |
189 | .long sys_sendfile | 189 | .long sys_sendfile |
190 | .long sys_ni_syscall /* reserved for streams1 */ | 190 | .long sys_ni_syscall /* reserved for streams1 */ |
191 | .long sys_ni_syscall /* reserved for streams2 */ | 191 | .long sys_ni_syscall /* reserved for streams2 */ |
192 | .long sys_vfork /* 190 */ | 192 | .long ptregs_vfork /* 190 */ |
193 | .long sys_getrlimit | 193 | .long sys_getrlimit |
194 | .long sys_mmap2 | 194 | .long sys_mmap2 |
195 | .long sys_truncate64 | 195 | .long sys_truncate64 |
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 77b400f06ea2..764c74e871f2 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c | |||
@@ -38,7 +38,7 @@ | |||
38 | #include <asm/time.h> | 38 | #include <asm/time.h> |
39 | #include <asm/timer.h> | 39 | #include <asm/timer.h> |
40 | 40 | ||
41 | #include "do_timer.h" | 41 | #include <asm/do_timer.h> |
42 | 42 | ||
43 | int timer_ack; | 43 | int timer_ack; |
44 | 44 | ||
@@ -75,7 +75,7 @@ EXPORT_SYMBOL(profile_pc); | |||
75 | irqreturn_t timer_interrupt(int irq, void *dev_id) | 75 | irqreturn_t timer_interrupt(int irq, void *dev_id) |
76 | { | 76 | { |
77 | /* Keep nmi watchdog up to date */ | 77 | /* Keep nmi watchdog up to date */ |
78 | per_cpu(irq_stat, smp_processor_id()).irq0_irqs++; | 78 | inc_irq_stat(irq0_irqs); |
79 | 79 | ||
80 | #ifdef CONFIG_X86_IO_APIC | 80 | #ifdef CONFIG_X86_IO_APIC |
81 | if (timer_ack) { | 81 | if (timer_ack) { |
@@ -105,8 +105,8 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) | |||
105 | high bit of the PPI port B (0x61). Note that some PS/2s, | 105 | high bit of the PPI port B (0x61). Note that some PS/2s, |
106 | notably the 55SX, work fine if this is removed. */ | 106 | notably the 55SX, work fine if this is removed. */ |
107 | 107 | ||
108 | u8 irq_v = inb_p( 0x61 ); /* read the current state */ | 108 | u8 irq_v = inb_p(0x61); /* read the current state */ |
109 | outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */ | 109 | outb_p(irq_v | 0x80, 0x61); /* reset the IRQ */ |
110 | } | 110 | } |
111 | #endif | 111 | #endif |
112 | 112 | ||
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index cb19d650c216..e6e695acd725 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c | |||
@@ -17,10 +17,10 @@ | |||
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/time.h> | 18 | #include <linux/time.h> |
19 | #include <linux/mca.h> | 19 | #include <linux/mca.h> |
20 | #include <linux/nmi.h> | ||
20 | 21 | ||
21 | #include <asm/i8253.h> | 22 | #include <asm/i8253.h> |
22 | #include <asm/hpet.h> | 23 | #include <asm/hpet.h> |
23 | #include <asm/nmi.h> | ||
24 | #include <asm/vgtod.h> | 24 | #include <asm/vgtod.h> |
25 | #include <asm/time.h> | 25 | #include <asm/time.h> |
26 | #include <asm/timer.h> | 26 | #include <asm/timer.h> |
@@ -49,9 +49,9 @@ unsigned long profile_pc(struct pt_regs *regs) | |||
49 | } | 49 | } |
50 | EXPORT_SYMBOL(profile_pc); | 50 | EXPORT_SYMBOL(profile_pc); |
51 | 51 | ||
52 | irqreturn_t timer_interrupt(int irq, void *dev_id) | 52 | static irqreturn_t timer_interrupt(int irq, void *dev_id) |
53 | { | 53 | { |
54 | add_pda(irq0_irqs, 1); | 54 | inc_irq_stat(irq0_irqs); |
55 | 55 | ||
56 | global_clock_event->event_handler(global_clock_event); | 56 | global_clock_event->event_handler(global_clock_event); |
57 | 57 | ||
@@ -80,6 +80,8 @@ unsigned long __init calibrate_cpu(void) | |||
80 | break; | 80 | break; |
81 | no_ctr_free = (i == 4); | 81 | no_ctr_free = (i == 4); |
82 | if (no_ctr_free) { | 82 | if (no_ctr_free) { |
83 | WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... " | ||
84 | "cpu_khz value may be incorrect.\n"); | ||
83 | i = 3; | 85 | i = 3; |
84 | rdmsrl(MSR_K7_EVNTSEL3, evntsel3); | 86 | rdmsrl(MSR_K7_EVNTSEL3, evntsel3); |
85 | wrmsrl(MSR_K7_EVNTSEL3, 0); | 87 | wrmsrl(MSR_K7_EVNTSEL3, 0); |
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c deleted file mode 100644 index f4049f3513b6..000000000000 --- a/arch/x86/kernel/tlb_32.c +++ /dev/null | |||
@@ -1,257 +0,0 @@ | |||
1 | #include <linux/spinlock.h> | ||
2 | #include <linux/cpu.h> | ||
3 | #include <linux/interrupt.h> | ||
4 | |||
5 | #include <asm/tlbflush.h> | ||
6 | |||
7 | DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) | ||
8 | ____cacheline_aligned = { &init_mm, 0, }; | ||
9 | |||
10 | /* must come after the send_IPI functions above for inlining */ | ||
11 | #include <mach_ipi.h> | ||
12 | |||
13 | /* | ||
14 | * Smarter SMP flushing macros. | ||
15 | * c/o Linus Torvalds. | ||
16 | * | ||
17 | * These mean you can really definitely utterly forget about | ||
18 | * writing to user space from interrupts. (Its not allowed anyway). | ||
19 | * | ||
20 | * Optimizations Manfred Spraul <manfred@colorfullife.com> | ||
21 | */ | ||
22 | |||
23 | static cpumask_t flush_cpumask; | ||
24 | static struct mm_struct *flush_mm; | ||
25 | static unsigned long flush_va; | ||
26 | static DEFINE_SPINLOCK(tlbstate_lock); | ||
27 | |||
28 | /* | ||
29 | * We cannot call mmdrop() because we are in interrupt context, | ||
30 | * instead update mm->cpu_vm_mask. | ||
31 | * | ||
32 | * We need to reload %cr3 since the page tables may be going | ||
33 | * away from under us.. | ||
34 | */ | ||
35 | void leave_mm(int cpu) | ||
36 | { | ||
37 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) | ||
38 | BUG(); | ||
39 | cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); | ||
40 | load_cr3(swapper_pg_dir); | ||
41 | } | ||
42 | EXPORT_SYMBOL_GPL(leave_mm); | ||
43 | |||
44 | /* | ||
45 | * | ||
46 | * The flush IPI assumes that a thread switch happens in this order: | ||
47 | * [cpu0: the cpu that switches] | ||
48 | * 1) switch_mm() either 1a) or 1b) | ||
49 | * 1a) thread switch to a different mm | ||
50 | * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); | ||
51 | * Stop ipi delivery for the old mm. This is not synchronized with | ||
52 | * the other cpus, but smp_invalidate_interrupt ignore flush ipis | ||
53 | * for the wrong mm, and in the worst case we perform a superfluous | ||
54 | * tlb flush. | ||
55 | * 1a2) set cpu_tlbstate to TLBSTATE_OK | ||
56 | * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 | ||
57 | * was in lazy tlb mode. | ||
58 | * 1a3) update cpu_tlbstate[].active_mm | ||
59 | * Now cpu0 accepts tlb flushes for the new mm. | ||
60 | * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); | ||
61 | * Now the other cpus will send tlb flush ipis. | ||
62 | * 1a4) change cr3. | ||
63 | * 1b) thread switch without mm change | ||
64 | * cpu_tlbstate[].active_mm is correct, cpu0 already handles | ||
65 | * flush ipis. | ||
66 | * 1b1) set cpu_tlbstate to TLBSTATE_OK | ||
67 | * 1b2) test_and_set the cpu bit in cpu_vm_mask. | ||
68 | * Atomically set the bit [other cpus will start sending flush ipis], | ||
69 | * and test the bit. | ||
70 | * 1b3) if the bit was 0: leave_mm was called, flush the tlb. | ||
71 | * 2) switch %%esp, ie current | ||
72 | * | ||
73 | * The interrupt must handle 2 special cases: | ||
74 | * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. | ||
75 | * - the cpu performs speculative tlb reads, i.e. even if the cpu only | ||
76 | * runs in kernel space, the cpu could load tlb entries for user space | ||
77 | * pages. | ||
78 | * | ||
79 | * The good news is that cpu_tlbstate is local to each cpu, no | ||
80 | * write/read ordering problems. | ||
81 | */ | ||
82 | |||
83 | /* | ||
84 | * TLB flush IPI: | ||
85 | * | ||
86 | * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. | ||
87 | * 2) Leave the mm if we are in the lazy tlb mode. | ||
88 | */ | ||
89 | |||
90 | void smp_invalidate_interrupt(struct pt_regs *regs) | ||
91 | { | ||
92 | unsigned long cpu; | ||
93 | |||
94 | cpu = get_cpu(); | ||
95 | |||
96 | if (!cpu_isset(cpu, flush_cpumask)) | ||
97 | goto out; | ||
98 | /* | ||
99 | * This was a BUG() but until someone can quote me the | ||
100 | * line from the intel manual that guarantees an IPI to | ||
101 | * multiple CPUs is retried _only_ on the erroring CPUs | ||
102 | * its staying as a return | ||
103 | * | ||
104 | * BUG(); | ||
105 | */ | ||
106 | |||
107 | if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { | ||
108 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { | ||
109 | if (flush_va == TLB_FLUSH_ALL) | ||
110 | local_flush_tlb(); | ||
111 | else | ||
112 | __flush_tlb_one(flush_va); | ||
113 | } else | ||
114 | leave_mm(cpu); | ||
115 | } | ||
116 | ack_APIC_irq(); | ||
117 | smp_mb__before_clear_bit(); | ||
118 | cpu_clear(cpu, flush_cpumask); | ||
119 | smp_mb__after_clear_bit(); | ||
120 | out: | ||
121 | put_cpu_no_resched(); | ||
122 | __get_cpu_var(irq_stat).irq_tlb_count++; | ||
123 | } | ||
124 | |||
125 | void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, | ||
126 | unsigned long va) | ||
127 | { | ||
128 | cpumask_t cpumask = *cpumaskp; | ||
129 | |||
130 | /* | ||
131 | * A couple of (to be removed) sanity checks: | ||
132 | * | ||
133 | * - current CPU must not be in mask | ||
134 | * - mask must exist :) | ||
135 | */ | ||
136 | BUG_ON(cpus_empty(cpumask)); | ||
137 | BUG_ON(cpu_isset(smp_processor_id(), cpumask)); | ||
138 | BUG_ON(!mm); | ||
139 | |||
140 | #ifdef CONFIG_HOTPLUG_CPU | ||
141 | /* If a CPU which we ran on has gone down, OK. */ | ||
142 | cpus_and(cpumask, cpumask, cpu_online_map); | ||
143 | if (unlikely(cpus_empty(cpumask))) | ||
144 | return; | ||
145 | #endif | ||
146 | |||
147 | /* | ||
148 | * i'm not happy about this global shared spinlock in the | ||
149 | * MM hot path, but we'll see how contended it is. | ||
150 | * AK: x86-64 has a faster method that could be ported. | ||
151 | */ | ||
152 | spin_lock(&tlbstate_lock); | ||
153 | |||
154 | flush_mm = mm; | ||
155 | flush_va = va; | ||
156 | cpus_or(flush_cpumask, cpumask, flush_cpumask); | ||
157 | |||
158 | /* | ||
159 | * Make the above memory operations globally visible before | ||
160 | * sending the IPI. | ||
161 | */ | ||
162 | smp_mb(); | ||
163 | /* | ||
164 | * We have to send the IPI only to | ||
165 | * CPUs affected. | ||
166 | */ | ||
167 | send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); | ||
168 | |||
169 | while (!cpus_empty(flush_cpumask)) | ||
170 | /* nothing. lockup detection does not belong here */ | ||
171 | cpu_relax(); | ||
172 | |||
173 | flush_mm = NULL; | ||
174 | flush_va = 0; | ||
175 | spin_unlock(&tlbstate_lock); | ||
176 | } | ||
177 | |||
178 | void flush_tlb_current_task(void) | ||
179 | { | ||
180 | struct mm_struct *mm = current->mm; | ||
181 | cpumask_t cpu_mask; | ||
182 | |||
183 | preempt_disable(); | ||
184 | cpu_mask = mm->cpu_vm_mask; | ||
185 | cpu_clear(smp_processor_id(), cpu_mask); | ||
186 | |||
187 | local_flush_tlb(); | ||
188 | if (!cpus_empty(cpu_mask)) | ||
189 | flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); | ||
190 | preempt_enable(); | ||
191 | } | ||
192 | |||
193 | void flush_tlb_mm(struct mm_struct *mm) | ||
194 | { | ||
195 | cpumask_t cpu_mask; | ||
196 | |||
197 | preempt_disable(); | ||
198 | cpu_mask = mm->cpu_vm_mask; | ||
199 | cpu_clear(smp_processor_id(), cpu_mask); | ||
200 | |||
201 | if (current->active_mm == mm) { | ||
202 | if (current->mm) | ||
203 | local_flush_tlb(); | ||
204 | else | ||
205 | leave_mm(smp_processor_id()); | ||
206 | } | ||
207 | if (!cpus_empty(cpu_mask)) | ||
208 | flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); | ||
209 | |||
210 | preempt_enable(); | ||
211 | } | ||
212 | |||
213 | void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) | ||
214 | { | ||
215 | struct mm_struct *mm = vma->vm_mm; | ||
216 | cpumask_t cpu_mask; | ||
217 | |||
218 | preempt_disable(); | ||
219 | cpu_mask = mm->cpu_vm_mask; | ||
220 | cpu_clear(smp_processor_id(), cpu_mask); | ||
221 | |||
222 | if (current->active_mm == mm) { | ||
223 | if (current->mm) | ||
224 | __flush_tlb_one(va); | ||
225 | else | ||
226 | leave_mm(smp_processor_id()); | ||
227 | } | ||
228 | |||
229 | if (!cpus_empty(cpu_mask)) | ||
230 | flush_tlb_others(cpu_mask, mm, va); | ||
231 | |||
232 | preempt_enable(); | ||
233 | } | ||
234 | EXPORT_SYMBOL(flush_tlb_page); | ||
235 | |||
236 | static void do_flush_tlb_all(void *info) | ||
237 | { | ||
238 | unsigned long cpu = smp_processor_id(); | ||
239 | |||
240 | __flush_tlb_all(); | ||
241 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY) | ||
242 | leave_mm(cpu); | ||
243 | } | ||
244 | |||
245 | void flush_tlb_all(void) | ||
246 | { | ||
247 | on_each_cpu(do_flush_tlb_all, NULL, 1); | ||
248 | } | ||
249 | |||
250 | void reset_lazy_tlbstate(void) | ||
251 | { | ||
252 | int cpu = raw_smp_processor_id(); | ||
253 | |||
254 | per_cpu(cpu_tlbstate, cpu).state = 0; | ||
255 | per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; | ||
256 | } | ||
257 | |||
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c deleted file mode 100644 index 8f919ca69494..000000000000 --- a/arch/x86/kernel/tlb_64.c +++ /dev/null | |||
@@ -1,284 +0,0 @@ | |||
1 | #include <linux/init.h> | ||
2 | |||
3 | #include <linux/mm.h> | ||
4 | #include <linux/delay.h> | ||
5 | #include <linux/spinlock.h> | ||
6 | #include <linux/smp.h> | ||
7 | #include <linux/kernel_stat.h> | ||
8 | #include <linux/mc146818rtc.h> | ||
9 | #include <linux/interrupt.h> | ||
10 | |||
11 | #include <asm/mtrr.h> | ||
12 | #include <asm/pgalloc.h> | ||
13 | #include <asm/tlbflush.h> | ||
14 | #include <asm/mmu_context.h> | ||
15 | #include <asm/proto.h> | ||
16 | #include <asm/apicdef.h> | ||
17 | #include <asm/idle.h> | ||
18 | #include <asm/uv/uv_hub.h> | ||
19 | #include <asm/uv/uv_bau.h> | ||
20 | |||
21 | #include <mach_ipi.h> | ||
22 | /* | ||
23 | * Smarter SMP flushing macros. | ||
24 | * c/o Linus Torvalds. | ||
25 | * | ||
26 | * These mean you can really definitely utterly forget about | ||
27 | * writing to user space from interrupts. (Its not allowed anyway). | ||
28 | * | ||
29 | * Optimizations Manfred Spraul <manfred@colorfullife.com> | ||
30 | * | ||
31 | * More scalable flush, from Andi Kleen | ||
32 | * | ||
33 | * To avoid global state use 8 different call vectors. | ||
34 | * Each CPU uses a specific vector to trigger flushes on other | ||
35 | * CPUs. Depending on the received vector the target CPUs look into | ||
36 | * the right per cpu variable for the flush data. | ||
37 | * | ||
38 | * With more than 8 CPUs they are hashed to the 8 available | ||
39 | * vectors. The limited global vector space forces us to this right now. | ||
40 | * In future when interrupts are split into per CPU domains this could be | ||
41 | * fixed, at the cost of triggering multiple IPIs in some cases. | ||
42 | */ | ||
43 | |||
44 | union smp_flush_state { | ||
45 | struct { | ||
46 | cpumask_t flush_cpumask; | ||
47 | struct mm_struct *flush_mm; | ||
48 | unsigned long flush_va; | ||
49 | spinlock_t tlbstate_lock; | ||
50 | }; | ||
51 | char pad[SMP_CACHE_BYTES]; | ||
52 | } ____cacheline_aligned; | ||
53 | |||
54 | /* State is put into the per CPU data section, but padded | ||
55 | to a full cache line because other CPUs can access it and we don't | ||
56 | want false sharing in the per cpu data segment. */ | ||
57 | static DEFINE_PER_CPU(union smp_flush_state, flush_state); | ||
58 | |||
59 | /* | ||
60 | * We cannot call mmdrop() because we are in interrupt context, | ||
61 | * instead update mm->cpu_vm_mask. | ||
62 | */ | ||
63 | void leave_mm(int cpu) | ||
64 | { | ||
65 | if (read_pda(mmu_state) == TLBSTATE_OK) | ||
66 | BUG(); | ||
67 | cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); | ||
68 | load_cr3(swapper_pg_dir); | ||
69 | } | ||
70 | EXPORT_SYMBOL_GPL(leave_mm); | ||
71 | |||
72 | /* | ||
73 | * | ||
74 | * The flush IPI assumes that a thread switch happens in this order: | ||
75 | * [cpu0: the cpu that switches] | ||
76 | * 1) switch_mm() either 1a) or 1b) | ||
77 | * 1a) thread switch to a different mm | ||
78 | * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); | ||
79 | * Stop ipi delivery for the old mm. This is not synchronized with | ||
80 | * the other cpus, but smp_invalidate_interrupt ignore flush ipis | ||
81 | * for the wrong mm, and in the worst case we perform a superfluous | ||
82 | * tlb flush. | ||
83 | * 1a2) set cpu mmu_state to TLBSTATE_OK | ||
84 | * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 | ||
85 | * was in lazy tlb mode. | ||
86 | * 1a3) update cpu active_mm | ||
87 | * Now cpu0 accepts tlb flushes for the new mm. | ||
88 | * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); | ||
89 | * Now the other cpus will send tlb flush ipis. | ||
90 | * 1a4) change cr3. | ||
91 | * 1b) thread switch without mm change | ||
92 | * cpu active_mm is correct, cpu0 already handles | ||
93 | * flush ipis. | ||
94 | * 1b1) set cpu mmu_state to TLBSTATE_OK | ||
95 | * 1b2) test_and_set the cpu bit in cpu_vm_mask. | ||
96 | * Atomically set the bit [other cpus will start sending flush ipis], | ||
97 | * and test the bit. | ||
98 | * 1b3) if the bit was 0: leave_mm was called, flush the tlb. | ||
99 | * 2) switch %%esp, ie current | ||
100 | * | ||
101 | * The interrupt must handle 2 special cases: | ||
102 | * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. | ||
103 | * - the cpu performs speculative tlb reads, i.e. even if the cpu only | ||
104 | * runs in kernel space, the cpu could load tlb entries for user space | ||
105 | * pages. | ||
106 | * | ||
107 | * The good news is that cpu mmu_state is local to each cpu, no | ||
108 | * write/read ordering problems. | ||
109 | */ | ||
110 | |||
111 | /* | ||
112 | * TLB flush IPI: | ||
113 | * | ||
114 | * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. | ||
115 | * 2) Leave the mm if we are in the lazy tlb mode. | ||
116 | * | ||
117 | * Interrupts are disabled. | ||
118 | */ | ||
119 | |||
120 | asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) | ||
121 | { | ||
122 | int cpu; | ||
123 | int sender; | ||
124 | union smp_flush_state *f; | ||
125 | |||
126 | cpu = smp_processor_id(); | ||
127 | /* | ||
128 | * orig_rax contains the negated interrupt vector. | ||
129 | * Use that to determine where the sender put the data. | ||
130 | */ | ||
131 | sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; | ||
132 | f = &per_cpu(flush_state, sender); | ||
133 | |||
134 | if (!cpu_isset(cpu, f->flush_cpumask)) | ||
135 | goto out; | ||
136 | /* | ||
137 | * This was a BUG() but until someone can quote me the | ||
138 | * line from the intel manual that guarantees an IPI to | ||
139 | * multiple CPUs is retried _only_ on the erroring CPUs | ||
140 | * its staying as a return | ||
141 | * | ||
142 | * BUG(); | ||
143 | */ | ||
144 | |||
145 | if (f->flush_mm == read_pda(active_mm)) { | ||
146 | if (read_pda(mmu_state) == TLBSTATE_OK) { | ||
147 | if (f->flush_va == TLB_FLUSH_ALL) | ||
148 | local_flush_tlb(); | ||
149 | else | ||
150 | __flush_tlb_one(f->flush_va); | ||
151 | } else | ||
152 | leave_mm(cpu); | ||
153 | } | ||
154 | out: | ||
155 | ack_APIC_irq(); | ||
156 | cpu_clear(cpu, f->flush_cpumask); | ||
157 | add_pda(irq_tlb_count, 1); | ||
158 | } | ||
159 | |||
160 | void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, | ||
161 | unsigned long va) | ||
162 | { | ||
163 | int sender; | ||
164 | union smp_flush_state *f; | ||
165 | cpumask_t cpumask = *cpumaskp; | ||
166 | |||
167 | if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va)) | ||
168 | return; | ||
169 | |||
170 | /* Caller has disabled preemption */ | ||
171 | sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; | ||
172 | f = &per_cpu(flush_state, sender); | ||
173 | |||
174 | /* | ||
175 | * Could avoid this lock when | ||
176 | * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is | ||
177 | * probably not worth checking this for a cache-hot lock. | ||
178 | */ | ||
179 | spin_lock(&f->tlbstate_lock); | ||
180 | |||
181 | f->flush_mm = mm; | ||
182 | f->flush_va = va; | ||
183 | cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); | ||
184 | |||
185 | /* | ||
186 | * Make the above memory operations globally visible before | ||
187 | * sending the IPI. | ||
188 | */ | ||
189 | smp_mb(); | ||
190 | /* | ||
191 | * We have to send the IPI only to | ||
192 | * CPUs affected. | ||
193 | */ | ||
194 | send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); | ||
195 | |||
196 | while (!cpus_empty(f->flush_cpumask)) | ||
197 | cpu_relax(); | ||
198 | |||
199 | f->flush_mm = NULL; | ||
200 | f->flush_va = 0; | ||
201 | spin_unlock(&f->tlbstate_lock); | ||
202 | } | ||
203 | |||
204 | static int __cpuinit init_smp_flush(void) | ||
205 | { | ||
206 | int i; | ||
207 | |||
208 | for_each_possible_cpu(i) | ||
209 | spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); | ||
210 | |||
211 | return 0; | ||
212 | } | ||
213 | core_initcall(init_smp_flush); | ||
214 | |||
215 | void flush_tlb_current_task(void) | ||
216 | { | ||
217 | struct mm_struct *mm = current->mm; | ||
218 | cpumask_t cpu_mask; | ||
219 | |||
220 | preempt_disable(); | ||
221 | cpu_mask = mm->cpu_vm_mask; | ||
222 | cpu_clear(smp_processor_id(), cpu_mask); | ||
223 | |||
224 | local_flush_tlb(); | ||
225 | if (!cpus_empty(cpu_mask)) | ||
226 | flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); | ||
227 | preempt_enable(); | ||
228 | } | ||
229 | |||
230 | void flush_tlb_mm(struct mm_struct *mm) | ||
231 | { | ||
232 | cpumask_t cpu_mask; | ||
233 | |||
234 | preempt_disable(); | ||
235 | cpu_mask = mm->cpu_vm_mask; | ||
236 | cpu_clear(smp_processor_id(), cpu_mask); | ||
237 | |||
238 | if (current->active_mm == mm) { | ||
239 | if (current->mm) | ||
240 | local_flush_tlb(); | ||
241 | else | ||
242 | leave_mm(smp_processor_id()); | ||
243 | } | ||
244 | if (!cpus_empty(cpu_mask)) | ||
245 | flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); | ||
246 | |||
247 | preempt_enable(); | ||
248 | } | ||
249 | |||
250 | void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) | ||
251 | { | ||
252 | struct mm_struct *mm = vma->vm_mm; | ||
253 | cpumask_t cpu_mask; | ||
254 | |||
255 | preempt_disable(); | ||
256 | cpu_mask = mm->cpu_vm_mask; | ||
257 | cpu_clear(smp_processor_id(), cpu_mask); | ||
258 | |||
259 | if (current->active_mm == mm) { | ||
260 | if (current->mm) | ||
261 | __flush_tlb_one(va); | ||
262 | else | ||
263 | leave_mm(smp_processor_id()); | ||
264 | } | ||
265 | |||
266 | if (!cpus_empty(cpu_mask)) | ||
267 | flush_tlb_others(cpu_mask, mm, va); | ||
268 | |||
269 | preempt_enable(); | ||
270 | } | ||
271 | |||
272 | static void do_flush_tlb_all(void *info) | ||
273 | { | ||
274 | unsigned long cpu = smp_processor_id(); | ||
275 | |||
276 | __flush_tlb_all(); | ||
277 | if (read_pda(mmu_state) == TLBSTATE_LAZY) | ||
278 | leave_mm(cpu); | ||
279 | } | ||
280 | |||
281 | void flush_tlb_all(void) | ||
282 | { | ||
283 | on_each_cpu(do_flush_tlb_all, NULL, 1); | ||
284 | } | ||
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 04431f34fd16..f396e61bcb34 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | 12 | ||
13 | #include <asm/mmu_context.h> | 13 | #include <asm/mmu_context.h> |
14 | #include <asm/uv/uv.h> | ||
14 | #include <asm/uv/uv_mmrs.h> | 15 | #include <asm/uv/uv_mmrs.h> |
15 | #include <asm/uv/uv_hub.h> | 16 | #include <asm/uv/uv_hub.h> |
16 | #include <asm/uv/uv_bau.h> | 17 | #include <asm/uv/uv_bau.h> |
@@ -19,7 +20,7 @@ | |||
19 | #include <asm/tsc.h> | 20 | #include <asm/tsc.h> |
20 | #include <asm/irq_vectors.h> | 21 | #include <asm/irq_vectors.h> |
21 | 22 | ||
22 | #include <mach_apic.h> | 23 | #include <asm/genapic.h> |
23 | 24 | ||
24 | static struct bau_control **uv_bau_table_bases __read_mostly; | 25 | static struct bau_control **uv_bau_table_bases __read_mostly; |
25 | static int uv_bau_retry_limit __read_mostly; | 26 | static int uv_bau_retry_limit __read_mostly; |
@@ -200,6 +201,7 @@ static int uv_wait_completion(struct bau_desc *bau_desc, | |||
200 | destination_timeouts = 0; | 201 | destination_timeouts = 0; |
201 | } | 202 | } |
202 | } | 203 | } |
204 | cpu_relax(); | ||
203 | } | 205 | } |
204 | return FLUSH_COMPLETE; | 206 | return FLUSH_COMPLETE; |
205 | } | 207 | } |
@@ -209,14 +211,15 @@ static int uv_wait_completion(struct bau_desc *bau_desc, | |||
209 | * | 211 | * |
210 | * Send a broadcast and wait for a broadcast message to complete. | 212 | * Send a broadcast and wait for a broadcast message to complete. |
211 | * | 213 | * |
212 | * The cpumaskp mask contains the cpus the broadcast was sent to. | 214 | * The flush_mask contains the cpus the broadcast was sent to. |
213 | * | 215 | * |
214 | * Returns 1 if all remote flushing was done. The mask is zeroed. | 216 | * Returns NULL if all remote flushing was done. The mask is zeroed. |
215 | * Returns 0 if some remote flushing remains to be done. The mask is left | 217 | * Returns @flush_mask if some remote flushing remains to be done. The |
216 | * unchanged. | 218 | * mask will have some bits still set. |
217 | */ | 219 | */ |
218 | int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, | 220 | const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade, |
219 | cpumask_t *cpumaskp) | 221 | struct bau_desc *bau_desc, |
222 | struct cpumask *flush_mask) | ||
220 | { | 223 | { |
221 | int completion_status = 0; | 224 | int completion_status = 0; |
222 | int right_shift; | 225 | int right_shift; |
@@ -256,66 +259,76 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, | |||
256 | * the cpu's, all of which are still in the mask. | 259 | * the cpu's, all of which are still in the mask. |
257 | */ | 260 | */ |
258 | __get_cpu_var(ptcstats).ptc_i++; | 261 | __get_cpu_var(ptcstats).ptc_i++; |
259 | return 0; | 262 | return flush_mask; |
260 | } | 263 | } |
261 | 264 | ||
262 | /* | 265 | /* |
263 | * Success, so clear the remote cpu's from the mask so we don't | 266 | * Success, so clear the remote cpu's from the mask so we don't |
264 | * use the IPI method of shootdown on them. | 267 | * use the IPI method of shootdown on them. |
265 | */ | 268 | */ |
266 | for_each_cpu_mask(bit, *cpumaskp) { | 269 | for_each_cpu(bit, flush_mask) { |
267 | blade = uv_cpu_to_blade_id(bit); | 270 | blade = uv_cpu_to_blade_id(bit); |
268 | if (blade == this_blade) | 271 | if (blade == this_blade) |
269 | continue; | 272 | continue; |
270 | cpu_clear(bit, *cpumaskp); | 273 | cpumask_clear_cpu(bit, flush_mask); |
271 | } | 274 | } |
272 | if (!cpus_empty(*cpumaskp)) | 275 | if (!cpumask_empty(flush_mask)) |
273 | return 0; | 276 | return flush_mask; |
274 | return 1; | 277 | return NULL; |
275 | } | 278 | } |
276 | 279 | ||
277 | /** | 280 | /** |
278 | * uv_flush_tlb_others - globally purge translation cache of a virtual | 281 | * uv_flush_tlb_others - globally purge translation cache of a virtual |
279 | * address or all TLB's | 282 | * address or all TLB's |
280 | * @cpumaskp: mask of all cpu's in which the address is to be removed | 283 | * @cpumask: mask of all cpu's in which the address is to be removed |
281 | * @mm: mm_struct containing virtual address range | 284 | * @mm: mm_struct containing virtual address range |
282 | * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) | 285 | * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) |
286 | * @cpu: the current cpu | ||
283 | * | 287 | * |
284 | * This is the entry point for initiating any UV global TLB shootdown. | 288 | * This is the entry point for initiating any UV global TLB shootdown. |
285 | * | 289 | * |
286 | * Purges the translation caches of all specified processors of the given | 290 | * Purges the translation caches of all specified processors of the given |
287 | * virtual address, or purges all TLB's on specified processors. | 291 | * virtual address, or purges all TLB's on specified processors. |
288 | * | 292 | * |
289 | * The caller has derived the cpumaskp from the mm_struct and has subtracted | 293 | * The caller has derived the cpumask from the mm_struct. This function |
290 | * the local cpu from the mask. This function is called only if there | 294 | * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) |
291 | * are bits set in the mask. (e.g. flush_tlb_page()) | ||
292 | * | 295 | * |
293 | * The cpumaskp is converted into a nodemask of the nodes containing | 296 | * The cpumask is converted into a nodemask of the nodes containing |
294 | * the cpus. | 297 | * the cpus. |
295 | * | 298 | * |
296 | * Returns 1 if all remote flushing was done. | 299 | * Note that this function should be called with preemption disabled. |
297 | * Returns 0 if some remote flushing remains to be done. | 300 | * |
301 | * Returns NULL if all remote flushing was done. | ||
302 | * Returns pointer to cpumask if some remote flushing remains to be | ||
303 | * done. The returned pointer is valid till preemption is re-enabled. | ||
298 | */ | 304 | */ |
299 | int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, | 305 | const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, |
300 | unsigned long va) | 306 | struct mm_struct *mm, |
307 | unsigned long va, unsigned int cpu) | ||
301 | { | 308 | { |
309 | static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask); | ||
310 | struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask); | ||
302 | int i; | 311 | int i; |
303 | int bit; | 312 | int bit; |
304 | int blade; | 313 | int blade; |
305 | int cpu; | 314 | int uv_cpu; |
306 | int this_blade; | 315 | int this_blade; |
307 | int locals = 0; | 316 | int locals = 0; |
308 | struct bau_desc *bau_desc; | 317 | struct bau_desc *bau_desc; |
309 | 318 | ||
310 | cpu = uv_blade_processor_id(); | 319 | WARN_ON(!in_atomic()); |
320 | |||
321 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); | ||
322 | |||
323 | uv_cpu = uv_blade_processor_id(); | ||
311 | this_blade = uv_numa_blade_id(); | 324 | this_blade = uv_numa_blade_id(); |
312 | bau_desc = __get_cpu_var(bau_control).descriptor_base; | 325 | bau_desc = __get_cpu_var(bau_control).descriptor_base; |
313 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu; | 326 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu; |
314 | 327 | ||
315 | bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); | 328 | bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); |
316 | 329 | ||
317 | i = 0; | 330 | i = 0; |
318 | for_each_cpu_mask(bit, *cpumaskp) { | 331 | for_each_cpu(bit, flush_mask) { |
319 | blade = uv_cpu_to_blade_id(bit); | 332 | blade = uv_cpu_to_blade_id(bit); |
320 | BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); | 333 | BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); |
321 | if (blade == this_blade) { | 334 | if (blade == this_blade) { |
@@ -330,17 +343,17 @@ int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, | |||
330 | * no off_node flushing; return status for local node | 343 | * no off_node flushing; return status for local node |
331 | */ | 344 | */ |
332 | if (locals) | 345 | if (locals) |
333 | return 0; | 346 | return flush_mask; |
334 | else | 347 | else |
335 | return 1; | 348 | return NULL; |
336 | } | 349 | } |
337 | __get_cpu_var(ptcstats).requestor++; | 350 | __get_cpu_var(ptcstats).requestor++; |
338 | __get_cpu_var(ptcstats).ntargeted += i; | 351 | __get_cpu_var(ptcstats).ntargeted += i; |
339 | 352 | ||
340 | bau_desc->payload.address = va; | 353 | bau_desc->payload.address = va; |
341 | bau_desc->payload.sending_cpu = smp_processor_id(); | 354 | bau_desc->payload.sending_cpu = cpu; |
342 | 355 | ||
343 | return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp); | 356 | return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask); |
344 | } | 357 | } |
345 | 358 | ||
346 | /* | 359 | /* |
@@ -566,14 +579,10 @@ static int __init uv_ptc_init(void) | |||
566 | if (!is_uv_system()) | 579 | if (!is_uv_system()) |
567 | return 0; | 580 | return 0; |
568 | 581 | ||
569 | if (!proc_mkdir("sgi_uv", NULL)) | ||
570 | return -EINVAL; | ||
571 | |||
572 | proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL); | 582 | proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL); |
573 | if (!proc_uv_ptc) { | 583 | if (!proc_uv_ptc) { |
574 | printk(KERN_ERR "unable to create %s proc entry\n", | 584 | printk(KERN_ERR "unable to create %s proc entry\n", |
575 | UV_PTC_BASENAME); | 585 | UV_PTC_BASENAME); |
576 | remove_proc_entry("sgi_uv", NULL); | ||
577 | return -EINVAL; | 586 | return -EINVAL; |
578 | } | 587 | } |
579 | proc_uv_ptc->proc_fops = &proc_uv_ptc_operations; | 588 | proc_uv_ptc->proc_fops = &proc_uv_ptc_operations; |
@@ -586,7 +595,6 @@ static int __init uv_ptc_init(void) | |||
586 | static struct bau_control * __init uv_table_bases_init(int blade, int node) | 595 | static struct bau_control * __init uv_table_bases_init(int blade, int node) |
587 | { | 596 | { |
588 | int i; | 597 | int i; |
589 | int *ip; | ||
590 | struct bau_msg_status *msp; | 598 | struct bau_msg_status *msp; |
591 | struct bau_control *bau_tabp; | 599 | struct bau_control *bau_tabp; |
592 | 600 | ||
@@ -603,13 +611,6 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node) | |||
603 | bau_cpubits_clear(&msp->seen_by, (int) | 611 | bau_cpubits_clear(&msp->seen_by, (int) |
604 | uv_blade_nr_possible_cpus(blade)); | 612 | uv_blade_nr_possible_cpus(blade)); |
605 | 613 | ||
606 | bau_tabp->watching = | ||
607 | kmalloc_node(sizeof(int) * DEST_NUM_RESOURCES, GFP_KERNEL, node); | ||
608 | BUG_ON(!bau_tabp->watching); | ||
609 | |||
610 | for (i = 0, ip = bau_tabp->watching; i < DEST_Q_SIZE; i++, ip++) | ||
611 | *ip = 0; | ||
612 | |||
613 | uv_bau_table_bases[blade] = bau_tabp; | 614 | uv_bau_table_bases[blade] = bau_tabp; |
614 | 615 | ||
615 | return bau_tabp; | 616 | return bau_tabp; |
@@ -632,7 +633,6 @@ uv_table_bases_finish(int blade, int node, int cur_cpu, | |||
632 | bcp->bau_msg_head = bau_tablesp->va_queue_first; | 633 | bcp->bau_msg_head = bau_tablesp->va_queue_first; |
633 | bcp->va_queue_first = bau_tablesp->va_queue_first; | 634 | bcp->va_queue_first = bau_tablesp->va_queue_first; |
634 | bcp->va_queue_last = bau_tablesp->va_queue_last; | 635 | bcp->va_queue_last = bau_tablesp->va_queue_last; |
635 | bcp->watching = bau_tablesp->watching; | ||
636 | bcp->msg_statuses = bau_tablesp->msg_statuses; | 636 | bcp->msg_statuses = bau_tablesp->msg_statuses; |
637 | bcp->descriptor_base = adp; | 637 | bcp->descriptor_base = adp; |
638 | } | 638 | } |
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index 1106fac6024d..808031a5ba19 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c | |||
@@ -1,10 +1,26 @@ | |||
1 | #include <linux/io.h> | 1 | #include <linux/io.h> |
2 | 2 | ||
3 | #include <asm/trampoline.h> | 3 | #include <asm/trampoline.h> |
4 | #include <asm/e820.h> | ||
4 | 5 | ||
5 | /* ready for x86_64 and x86 */ | 6 | /* ready for x86_64 and x86 */ |
6 | unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); | 7 | unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); |
7 | 8 | ||
9 | void __init reserve_trampoline_memory(void) | ||
10 | { | ||
11 | #ifdef CONFIG_X86_32 | ||
12 | /* | ||
13 | * But first pinch a few for the stack/trampoline stuff | ||
14 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
15 | * trampoline before removing it. (see the GDT stuff) | ||
16 | */ | ||
17 | reserve_early(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE"); | ||
18 | #endif | ||
19 | /* Has to be in very low memory so we can execute real-mode AP code. */ | ||
20 | reserve_early(TRAMPOLINE_BASE, TRAMPOLINE_BASE + TRAMPOLINE_SIZE, | ||
21 | "TRAMPOLINE"); | ||
22 | } | ||
23 | |||
8 | /* | 24 | /* |
9 | * Currently trivial. Write the real->protected mode | 25 | * Currently trivial. Write the real->protected mode |
10 | * bootstrap into the page concerned. The caller | 26 | * bootstrap into the page concerned. The caller |
@@ -12,7 +28,6 @@ unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); | |||
12 | */ | 28 | */ |
13 | unsigned long setup_trampoline(void) | 29 | unsigned long setup_trampoline(void) |
14 | { | 30 | { |
15 | memcpy(trampoline_base, trampoline_data, | 31 | memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); |
16 | trampoline_end - trampoline_data); | ||
17 | return virt_to_phys(trampoline_base); | 32 | return virt_to_phys(trampoline_base); |
18 | } | 33 | } |
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 894293c598db..95a012a4664e 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <asm/page.h> | 29 | #include <asm/page.h> |
30 | #include <asm/msr.h> | 30 | #include <asm/msr.h> |
31 | #include <asm/segment.h> | 31 | #include <asm/segment.h> |
32 | #include <asm/processor-flags.h> | ||
32 | 33 | ||
33 | .section .rodata, "a", @progbits | 34 | .section .rodata, "a", @progbits |
34 | 35 | ||
@@ -37,7 +38,7 @@ | |||
37 | ENTRY(trampoline_data) | 38 | ENTRY(trampoline_data) |
38 | r_base = . | 39 | r_base = . |
39 | cli # We should be safe anyway | 40 | cli # We should be safe anyway |
40 | wbinvd | 41 | wbinvd |
41 | mov %cs, %ax # Code and data in the same place | 42 | mov %cs, %ax # Code and data in the same place |
42 | mov %ax, %ds | 43 | mov %ax, %ds |
43 | mov %ax, %es | 44 | mov %ax, %es |
@@ -73,9 +74,8 @@ r_base = . | |||
73 | lidtl tidt - r_base # load idt with 0, 0 | 74 | lidtl tidt - r_base # load idt with 0, 0 |
74 | lgdtl tgdt - r_base # load gdt with whatever is appropriate | 75 | lgdtl tgdt - r_base # load gdt with whatever is appropriate |
75 | 76 | ||
76 | xor %ax, %ax | 77 | mov $X86_CR0_PE, %ax # protected mode (PE) bit |
77 | inc %ax # protected mode (PE) bit | 78 | lmsw %ax # into protected mode |
78 | lmsw %ax # into protected mode | ||
79 | 79 | ||
80 | # flush prefetch and jump to startup_32 | 80 | # flush prefetch and jump to startup_32 |
81 | ljmpl *(startup_32_vector - r_base) | 81 | ljmpl *(startup_32_vector - r_base) |
@@ -86,9 +86,8 @@ startup_32: | |||
86 | movl $__KERNEL_DS, %eax # Initialize the %ds segment register | 86 | movl $__KERNEL_DS, %eax # Initialize the %ds segment register |
87 | movl %eax, %ds | 87 | movl %eax, %ds |
88 | 88 | ||
89 | xorl %eax, %eax | 89 | movl $X86_CR4_PAE, %eax |
90 | btsl $5, %eax # Enable PAE mode | 90 | movl %eax, %cr4 # Enable PAE mode |
91 | movl %eax, %cr4 | ||
92 | 91 | ||
93 | # Setup trampoline 4 level pagetables | 92 | # Setup trampoline 4 level pagetables |
94 | leal (trampoline_level4_pgt - r_base)(%esi), %eax | 93 | leal (trampoline_level4_pgt - r_base)(%esi), %eax |
@@ -99,9 +98,9 @@ startup_32: | |||
99 | xorl %edx, %edx | 98 | xorl %edx, %edx |
100 | wrmsr | 99 | wrmsr |
101 | 100 | ||
102 | xorl %eax, %eax | 101 | # Enable paging and in turn activate Long Mode |
103 | btsl $31, %eax # Enable paging and in turn activate Long Mode | 102 | # Enable protected mode |
104 | btsl $0, %eax # Enable protected mode | 103 | movl $(X86_CR0_PG | X86_CR0_PE), %eax |
105 | movl %eax, %cr0 | 104 | movl %eax, %cr0 |
106 | 105 | ||
107 | /* | 106 | /* |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 04d242ab0161..bde57f0f1616 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/module.h> | 20 | #include <linux/module.h> |
21 | #include <linux/ptrace.h> | 21 | #include <linux/ptrace.h> |
22 | #include <linux/string.h> | 22 | #include <linux/string.h> |
23 | #include <linux/unwind.h> | ||
24 | #include <linux/delay.h> | 23 | #include <linux/delay.h> |
25 | #include <linux/errno.h> | 24 | #include <linux/errno.h> |
26 | #include <linux/kexec.h> | 25 | #include <linux/kexec.h> |
@@ -51,30 +50,22 @@ | |||
51 | #include <asm/debugreg.h> | 50 | #include <asm/debugreg.h> |
52 | #include <asm/atomic.h> | 51 | #include <asm/atomic.h> |
53 | #include <asm/system.h> | 52 | #include <asm/system.h> |
54 | #include <asm/unwind.h> | ||
55 | #include <asm/traps.h> | 53 | #include <asm/traps.h> |
56 | #include <asm/desc.h> | 54 | #include <asm/desc.h> |
57 | #include <asm/i387.h> | 55 | #include <asm/i387.h> |
58 | 56 | ||
59 | #include <mach_traps.h> | 57 | #include <asm/mach_traps.h> |
60 | 58 | ||
61 | #ifdef CONFIG_X86_64 | 59 | #ifdef CONFIG_X86_64 |
62 | #include <asm/pgalloc.h> | 60 | #include <asm/pgalloc.h> |
63 | #include <asm/proto.h> | 61 | #include <asm/proto.h> |
64 | #include <asm/pda.h> | ||
65 | #else | 62 | #else |
66 | #include <asm/processor-flags.h> | 63 | #include <asm/processor-flags.h> |
67 | #include <asm/arch_hooks.h> | 64 | #include <asm/arch_hooks.h> |
68 | #include <asm/nmi.h> | ||
69 | #include <asm/smp.h> | ||
70 | #include <asm/io.h> | ||
71 | #include <asm/traps.h> | 65 | #include <asm/traps.h> |
72 | 66 | ||
73 | #include "cpu/mcheck/mce.h" | 67 | #include "cpu/mcheck/mce.h" |
74 | 68 | ||
75 | DECLARE_BITMAP(used_vectors, NR_VECTORS); | ||
76 | EXPORT_SYMBOL_GPL(used_vectors); | ||
77 | |||
78 | asmlinkage int system_call(void); | 69 | asmlinkage int system_call(void); |
79 | 70 | ||
80 | /* Do we ignore FPU interrupts ? */ | 71 | /* Do we ignore FPU interrupts ? */ |
@@ -89,6 +80,9 @@ gate_desc idt_table[256] | |||
89 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; | 80 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; |
90 | #endif | 81 | #endif |
91 | 82 | ||
83 | DECLARE_BITMAP(used_vectors, NR_VECTORS); | ||
84 | EXPORT_SYMBOL_GPL(used_vectors); | ||
85 | |||
92 | static int ignore_nmis; | 86 | static int ignore_nmis; |
93 | 87 | ||
94 | static inline void conditional_sti(struct pt_regs *regs) | 88 | static inline void conditional_sti(struct pt_regs *regs) |
@@ -292,8 +286,10 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | |||
292 | tsk->thread.error_code = error_code; | 286 | tsk->thread.error_code = error_code; |
293 | tsk->thread.trap_no = 8; | 287 | tsk->thread.trap_no = 8; |
294 | 288 | ||
295 | /* This is always a kernel trap and never fixable (and thus must | 289 | /* |
296 | never return). */ | 290 | * This is always a kernel trap and never fixable (and thus must |
291 | * never return). | ||
292 | */ | ||
297 | for (;;) | 293 | for (;;) |
298 | die(str, regs, error_code); | 294 | die(str, regs, error_code); |
299 | } | 295 | } |
@@ -481,11 +477,7 @@ do_nmi(struct pt_regs *regs, long error_code) | |||
481 | { | 477 | { |
482 | nmi_enter(); | 478 | nmi_enter(); |
483 | 479 | ||
484 | #ifdef CONFIG_X86_32 | 480 | inc_irq_stat(__nmi_count); |
485 | { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); } | ||
486 | #else | ||
487 | add_pda(__nmi_count, 1); | ||
488 | #endif | ||
489 | 481 | ||
490 | if (!ignore_nmis) | 482 | if (!ignore_nmis) |
491 | default_do_nmi(regs); | 483 | default_do_nmi(regs); |
@@ -524,9 +516,11 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) | |||
524 | } | 516 | } |
525 | 517 | ||
526 | #ifdef CONFIG_X86_64 | 518 | #ifdef CONFIG_X86_64 |
527 | /* Help handler running on IST stack to switch back to user stack | 519 | /* |
528 | for scheduling or signal handling. The actual stack switch is done in | 520 | * Help handler running on IST stack to switch back to user stack |
529 | entry.S */ | 521 | * for scheduling or signal handling. The actual stack switch is done in |
522 | * entry.S | ||
523 | */ | ||
530 | asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | 524 | asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) |
531 | { | 525 | { |
532 | struct pt_regs *regs = eregs; | 526 | struct pt_regs *regs = eregs; |
@@ -536,8 +530,10 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | |||
536 | /* Exception from user space */ | 530 | /* Exception from user space */ |
537 | else if (user_mode(eregs)) | 531 | else if (user_mode(eregs)) |
538 | regs = task_pt_regs(current); | 532 | regs = task_pt_regs(current); |
539 | /* Exception from kernel and interrupts are enabled. Move to | 533 | /* |
540 | kernel process stack. */ | 534 | * Exception from kernel and interrupts are enabled. Move to |
535 | * kernel process stack. | ||
536 | */ | ||
541 | else if (eregs->flags & X86_EFLAGS_IF) | 537 | else if (eregs->flags & X86_EFLAGS_IF) |
542 | regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); | 538 | regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); |
543 | if (eregs != regs) | 539 | if (eregs != regs) |
@@ -664,7 +660,7 @@ void math_error(void __user *ip) | |||
664 | { | 660 | { |
665 | struct task_struct *task; | 661 | struct task_struct *task; |
666 | siginfo_t info; | 662 | siginfo_t info; |
667 | unsigned short cwd, swd; | 663 | unsigned short cwd, swd, err; |
668 | 664 | ||
669 | /* | 665 | /* |
670 | * Save the info for the exception handler and clear the error. | 666 | * Save the info for the exception handler and clear the error. |
@@ -675,7 +671,6 @@ void math_error(void __user *ip) | |||
675 | task->thread.error_code = 0; | 671 | task->thread.error_code = 0; |
676 | info.si_signo = SIGFPE; | 672 | info.si_signo = SIGFPE; |
677 | info.si_errno = 0; | 673 | info.si_errno = 0; |
678 | info.si_code = __SI_FAULT; | ||
679 | info.si_addr = ip; | 674 | info.si_addr = ip; |
680 | /* | 675 | /* |
681 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | 676 | * (~cwd & swd) will mask out exceptions that are not set to unmasked |
@@ -689,34 +684,30 @@ void math_error(void __user *ip) | |||
689 | */ | 684 | */ |
690 | cwd = get_fpu_cwd(task); | 685 | cwd = get_fpu_cwd(task); |
691 | swd = get_fpu_swd(task); | 686 | swd = get_fpu_swd(task); |
692 | switch (swd & ~cwd & 0x3f) { | 687 | |
693 | case 0x000: /* No unmasked exception */ | 688 | err = swd & ~cwd; |
694 | #ifdef CONFIG_X86_32 | 689 | |
695 | return; | 690 | if (err & 0x001) { /* Invalid op */ |
696 | #endif | ||
697 | default: /* Multiple exceptions */ | ||
698 | break; | ||
699 | case 0x001: /* Invalid Op */ | ||
700 | /* | 691 | /* |
701 | * swd & 0x240 == 0x040: Stack Underflow | 692 | * swd & 0x240 == 0x040: Stack Underflow |
702 | * swd & 0x240 == 0x240: Stack Overflow | 693 | * swd & 0x240 == 0x240: Stack Overflow |
703 | * User must clear the SF bit (0x40) if set | 694 | * User must clear the SF bit (0x40) if set |
704 | */ | 695 | */ |
705 | info.si_code = FPE_FLTINV; | 696 | info.si_code = FPE_FLTINV; |
706 | break; | 697 | } else if (err & 0x004) { /* Divide by Zero */ |
707 | case 0x002: /* Denormalize */ | ||
708 | case 0x010: /* Underflow */ | ||
709 | info.si_code = FPE_FLTUND; | ||
710 | break; | ||
711 | case 0x004: /* Zero Divide */ | ||
712 | info.si_code = FPE_FLTDIV; | 698 | info.si_code = FPE_FLTDIV; |
713 | break; | 699 | } else if (err & 0x008) { /* Overflow */ |
714 | case 0x008: /* Overflow */ | ||
715 | info.si_code = FPE_FLTOVF; | 700 | info.si_code = FPE_FLTOVF; |
716 | break; | 701 | } else if (err & 0x012) { /* Denormal, Underflow */ |
717 | case 0x020: /* Precision */ | 702 | info.si_code = FPE_FLTUND; |
703 | } else if (err & 0x020) { /* Precision */ | ||
718 | info.si_code = FPE_FLTRES; | 704 | info.si_code = FPE_FLTRES; |
719 | break; | 705 | } else { |
706 | /* | ||
707 | * If we're using IRQ 13, or supposedly even some trap 16 | ||
708 | * implementations, it's possible we get a spurious trap... | ||
709 | */ | ||
710 | return; /* Spurious trap, no error */ | ||
720 | } | 711 | } |
721 | force_sig_info(SIGFPE, &info, task); | 712 | force_sig_info(SIGFPE, &info, task); |
722 | } | 713 | } |
@@ -904,7 +895,7 @@ asmlinkage void math_state_restore(void) | |||
904 | EXPORT_SYMBOL_GPL(math_state_restore); | 895 | EXPORT_SYMBOL_GPL(math_state_restore); |
905 | 896 | ||
906 | #ifndef CONFIG_MATH_EMULATION | 897 | #ifndef CONFIG_MATH_EMULATION |
907 | asmlinkage void math_emulate(long arg) | 898 | void math_emulate(struct math_emu_info *info) |
908 | { | 899 | { |
909 | printk(KERN_EMERG | 900 | printk(KERN_EMERG |
910 | "math-emulation not enabled and no coprocessor found.\n"); | 901 | "math-emulation not enabled and no coprocessor found.\n"); |
@@ -915,12 +906,16 @@ asmlinkage void math_emulate(long arg) | |||
915 | #endif /* CONFIG_MATH_EMULATION */ | 906 | #endif /* CONFIG_MATH_EMULATION */ |
916 | 907 | ||
917 | dotraplinkage void __kprobes | 908 | dotraplinkage void __kprobes |
918 | do_device_not_available(struct pt_regs *regs, long error) | 909 | do_device_not_available(struct pt_regs *regs, long error_code) |
919 | { | 910 | { |
920 | #ifdef CONFIG_X86_32 | 911 | #ifdef CONFIG_X86_32 |
921 | if (read_cr0() & X86_CR0_EM) { | 912 | if (read_cr0() & X86_CR0_EM) { |
913 | struct math_emu_info info = { }; | ||
914 | |||
922 | conditional_sti(regs); | 915 | conditional_sti(regs); |
923 | math_emulate(0); | 916 | |
917 | info.regs = regs; | ||
918 | math_emulate(&info); | ||
924 | } else { | 919 | } else { |
925 | math_state_restore(); /* interrupts still off */ | 920 | math_state_restore(); /* interrupts still off */ |
926 | conditional_sti(regs); | 921 | conditional_sti(regs); |
@@ -949,9 +944,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) | |||
949 | 944 | ||
950 | void __init trap_init(void) | 945 | void __init trap_init(void) |
951 | { | 946 | { |
952 | #ifdef CONFIG_X86_32 | ||
953 | int i; | 947 | int i; |
954 | #endif | ||
955 | 948 | ||
956 | #ifdef CONFIG_EISA | 949 | #ifdef CONFIG_EISA |
957 | void __iomem *p = early_ioremap(0x0FFFD9, 4); | 950 | void __iomem *p = early_ioremap(0x0FFFD9, 4); |
@@ -1008,11 +1001,15 @@ void __init trap_init(void) | |||
1008 | } | 1001 | } |
1009 | 1002 | ||
1010 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); | 1003 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); |
1004 | #endif | ||
1011 | 1005 | ||
1012 | /* Reserve all the builtin and the syscall vector: */ | 1006 | /* Reserve all the builtin and the syscall vector: */ |
1013 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) | 1007 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) |
1014 | set_bit(i, used_vectors); | 1008 | set_bit(i, used_vectors); |
1015 | 1009 | ||
1010 | #ifdef CONFIG_X86_64 | ||
1011 | set_bit(IA32_SYSCALL_VECTOR, used_vectors); | ||
1012 | #else | ||
1016 | set_bit(SYSCALL_VECTOR, used_vectors); | 1013 | set_bit(SYSCALL_VECTOR, used_vectors); |
1017 | #endif | 1014 | #endif |
1018 | /* | 1015 | /* |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 424093b157d3..83d53ce5d4c4 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <asm/vgtod.h> | 15 | #include <asm/vgtod.h> |
16 | #include <asm/time.h> | 16 | #include <asm/time.h> |
17 | #include <asm/delay.h> | 17 | #include <asm/delay.h> |
18 | #include <asm/hypervisor.h> | ||
18 | 19 | ||
19 | unsigned int cpu_khz; /* TSC clocks / usec, not used here */ | 20 | unsigned int cpu_khz; /* TSC clocks / usec, not used here */ |
20 | EXPORT_SYMBOL(cpu_khz); | 21 | EXPORT_SYMBOL(cpu_khz); |
@@ -31,6 +32,7 @@ static int tsc_unstable; | |||
31 | erroneous rdtsc usage on !cpu_has_tsc processors */ | 32 | erroneous rdtsc usage on !cpu_has_tsc processors */ |
32 | static int tsc_disabled = -1; | 33 | static int tsc_disabled = -1; |
33 | 34 | ||
35 | static int tsc_clocksource_reliable; | ||
34 | /* | 36 | /* |
35 | * Scheduler clock - returns current time in nanosec units. | 37 | * Scheduler clock - returns current time in nanosec units. |
36 | */ | 38 | */ |
@@ -98,6 +100,15 @@ int __init notsc_setup(char *str) | |||
98 | 100 | ||
99 | __setup("notsc", notsc_setup); | 101 | __setup("notsc", notsc_setup); |
100 | 102 | ||
103 | static int __init tsc_setup(char *str) | ||
104 | { | ||
105 | if (!strcmp(str, "reliable")) | ||
106 | tsc_clocksource_reliable = 1; | ||
107 | return 1; | ||
108 | } | ||
109 | |||
110 | __setup("tsc=", tsc_setup); | ||
111 | |||
101 | #define MAX_RETRIES 5 | 112 | #define MAX_RETRIES 5 |
102 | #define SMI_TRESHOLD 50000 | 113 | #define SMI_TRESHOLD 50000 |
103 | 114 | ||
@@ -352,9 +363,15 @@ unsigned long native_calibrate_tsc(void) | |||
352 | { | 363 | { |
353 | u64 tsc1, tsc2, delta, ref1, ref2; | 364 | u64 tsc1, tsc2, delta, ref1, ref2; |
354 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; | 365 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; |
355 | unsigned long flags, latch, ms, fast_calibrate; | 366 | unsigned long flags, latch, ms, fast_calibrate, tsc_khz; |
356 | int hpet = is_hpet_enabled(), i, loopmin; | 367 | int hpet = is_hpet_enabled(), i, loopmin; |
357 | 368 | ||
369 | tsc_khz = get_hypervisor_tsc_freq(); | ||
370 | if (tsc_khz) { | ||
371 | printk(KERN_INFO "TSC: Frequency read from the hypervisor\n"); | ||
372 | return tsc_khz; | ||
373 | } | ||
374 | |||
358 | local_irq_save(flags); | 375 | local_irq_save(flags); |
359 | fast_calibrate = quick_pit_calibrate(); | 376 | fast_calibrate = quick_pit_calibrate(); |
360 | local_irq_restore(flags); | 377 | local_irq_restore(flags); |
@@ -731,24 +748,21 @@ static struct dmi_system_id __initdata bad_tsc_dmi_table[] = { | |||
731 | {} | 748 | {} |
732 | }; | 749 | }; |
733 | 750 | ||
734 | /* | 751 | static void __init check_system_tsc_reliable(void) |
735 | * Geode_LX - the OLPC CPU has a possibly a very reliable TSC | 752 | { |
736 | */ | ||
737 | #ifdef CONFIG_MGEODE_LX | 753 | #ifdef CONFIG_MGEODE_LX |
738 | /* RTSC counts during suspend */ | 754 | /* RTSC counts during suspend */ |
739 | #define RTSC_SUSP 0x100 | 755 | #define RTSC_SUSP 0x100 |
740 | |||
741 | static void __init check_geode_tsc_reliable(void) | ||
742 | { | ||
743 | unsigned long res_low, res_high; | 756 | unsigned long res_low, res_high; |
744 | 757 | ||
745 | rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); | 758 | rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); |
759 | /* Geode_LX - the OLPC CPU has a possibly a very reliable TSC */ | ||
746 | if (res_low & RTSC_SUSP) | 760 | if (res_low & RTSC_SUSP) |
747 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; | 761 | tsc_clocksource_reliable = 1; |
748 | } | ||
749 | #else | ||
750 | static inline void check_geode_tsc_reliable(void) { } | ||
751 | #endif | 762 | #endif |
763 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) | ||
764 | tsc_clocksource_reliable = 1; | ||
765 | } | ||
752 | 766 | ||
753 | /* | 767 | /* |
754 | * Make an educated guess if the TSC is trustworthy and synchronized | 768 | * Make an educated guess if the TSC is trustworthy and synchronized |
@@ -759,7 +773,7 @@ __cpuinit int unsynchronized_tsc(void) | |||
759 | if (!cpu_has_tsc || tsc_unstable) | 773 | if (!cpu_has_tsc || tsc_unstable) |
760 | return 1; | 774 | return 1; |
761 | 775 | ||
762 | #ifdef CONFIG_X86_SMP | 776 | #ifdef CONFIG_SMP |
763 | if (apic_is_clustered_box()) | 777 | if (apic_is_clustered_box()) |
764 | return 1; | 778 | return 1; |
765 | #endif | 779 | #endif |
@@ -783,6 +797,8 @@ static void __init init_tsc_clocksource(void) | |||
783 | { | 797 | { |
784 | clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, | 798 | clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, |
785 | clocksource_tsc.shift); | 799 | clocksource_tsc.shift); |
800 | if (tsc_clocksource_reliable) | ||
801 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; | ||
786 | /* lower the rating if we already know its unstable: */ | 802 | /* lower the rating if we already know its unstable: */ |
787 | if (check_tsc_unstable()) { | 803 | if (check_tsc_unstable()) { |
788 | clocksource_tsc.rating = 0; | 804 | clocksource_tsc.rating = 0; |
@@ -843,7 +859,7 @@ void __init tsc_init(void) | |||
843 | if (unsynchronized_tsc()) | 859 | if (unsynchronized_tsc()) |
844 | mark_tsc_unstable("TSCs unsynchronized"); | 860 | mark_tsc_unstable("TSCs unsynchronized"); |
845 | 861 | ||
846 | check_geode_tsc_reliable(); | 862 | check_system_tsc_reliable(); |
847 | init_tsc_clocksource(); | 863 | init_tsc_clocksource(); |
848 | } | 864 | } |
849 | 865 | ||
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 9ffb01c31c40..bf36328f6ef9 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c | |||
@@ -46,7 +46,9 @@ static __cpuinit void check_tsc_warp(void) | |||
46 | cycles_t start, now, prev, end; | 46 | cycles_t start, now, prev, end; |
47 | int i; | 47 | int i; |
48 | 48 | ||
49 | rdtsc_barrier(); | ||
49 | start = get_cycles(); | 50 | start = get_cycles(); |
51 | rdtsc_barrier(); | ||
50 | /* | 52 | /* |
51 | * The measurement runs for 20 msecs: | 53 | * The measurement runs for 20 msecs: |
52 | */ | 54 | */ |
@@ -61,7 +63,9 @@ static __cpuinit void check_tsc_warp(void) | |||
61 | */ | 63 | */ |
62 | __raw_spin_lock(&sync_lock); | 64 | __raw_spin_lock(&sync_lock); |
63 | prev = last_tsc; | 65 | prev = last_tsc; |
66 | rdtsc_barrier(); | ||
64 | now = get_cycles(); | 67 | now = get_cycles(); |
68 | rdtsc_barrier(); | ||
65 | last_tsc = now; | 69 | last_tsc = now; |
66 | __raw_spin_unlock(&sync_lock); | 70 | __raw_spin_unlock(&sync_lock); |
67 | 71 | ||
@@ -108,6 +112,12 @@ void __cpuinit check_tsc_sync_source(int cpu) | |||
108 | if (unsynchronized_tsc()) | 112 | if (unsynchronized_tsc()) |
109 | return; | 113 | return; |
110 | 114 | ||
115 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { | ||
116 | printk(KERN_INFO | ||
117 | "Skipping synchronization checks as TSC is reliable.\n"); | ||
118 | return; | ||
119 | } | ||
120 | |||
111 | printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:", | 121 | printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:", |
112 | smp_processor_id(), cpu); | 122 | smp_processor_id(), cpu); |
113 | 123 | ||
@@ -161,7 +171,7 @@ void __cpuinit check_tsc_sync_target(void) | |||
161 | { | 171 | { |
162 | int cpus = 2; | 172 | int cpus = 2; |
163 | 173 | ||
164 | if (unsynchronized_tsc()) | 174 | if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) |
165 | return; | 175 | return; |
166 | 176 | ||
167 | /* | 177 | /* |
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 0c9667f0752a..4fd646e6dd43 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c | |||
@@ -32,9 +32,9 @@ | |||
32 | #include <asm/e820.h> | 32 | #include <asm/e820.h> |
33 | #include <asm/io.h> | 33 | #include <asm/io.h> |
34 | 34 | ||
35 | #include <mach_ipi.h> | 35 | #include <asm/genapic.h> |
36 | 36 | ||
37 | #include "mach_apic.h" | 37 | #include <asm/genapic.h> |
38 | 38 | ||
39 | #include <linux/kernel_stat.h> | 39 | #include <linux/kernel_stat.h> |
40 | 40 | ||
@@ -176,33 +176,31 @@ static int __init visws_get_smp_config(unsigned int early) | |||
176 | * No problem for Linux. | 176 | * No problem for Linux. |
177 | */ | 177 | */ |
178 | 178 | ||
179 | static void __init MP_processor_info(struct mpc_config_processor *m) | 179 | static void __init MP_processor_info(struct mpc_cpu *m) |
180 | { | 180 | { |
181 | int ver, logical_apicid; | 181 | int ver, logical_apicid; |
182 | physid_mask_t apic_cpus; | 182 | physid_mask_t apic_cpus; |
183 | 183 | ||
184 | if (!(m->mpc_cpuflag & CPU_ENABLED)) | 184 | if (!(m->cpuflag & CPU_ENABLED)) |
185 | return; | 185 | return; |
186 | 186 | ||
187 | logical_apicid = m->mpc_apicid; | 187 | logical_apicid = m->apicid; |
188 | printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n", | 188 | printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n", |
189 | m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "", | 189 | m->cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "", |
190 | m->mpc_apicid, | 190 | m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8, |
191 | (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, | 191 | (m->cpufeature & CPU_MODEL_MASK) >> 4, m->apicver); |
192 | (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, | ||
193 | m->mpc_apicver); | ||
194 | 192 | ||
195 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) | 193 | if (m->cpuflag & CPU_BOOTPROCESSOR) |
196 | boot_cpu_physical_apicid = m->mpc_apicid; | 194 | boot_cpu_physical_apicid = m->apicid; |
197 | 195 | ||
198 | ver = m->mpc_apicver; | 196 | ver = m->apicver; |
199 | if ((ver >= 0x14 && m->mpc_apicid >= 0xff) || m->mpc_apicid >= 0xf) { | 197 | if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) { |
200 | printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", | 198 | printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", |
201 | m->mpc_apicid, MAX_APICS); | 199 | m->apicid, MAX_APICS); |
202 | return; | 200 | return; |
203 | } | 201 | } |
204 | 202 | ||
205 | apic_cpus = apicid_to_cpu_present(m->mpc_apicid); | 203 | apic_cpus = apic->apicid_to_cpu_present(m->apicid); |
206 | physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); | 204 | physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); |
207 | /* | 205 | /* |
208 | * Validate version | 206 | * Validate version |
@@ -210,15 +208,15 @@ static void __init MP_processor_info(struct mpc_config_processor *m) | |||
210 | if (ver == 0x0) { | 208 | if (ver == 0x0) { |
211 | printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! " | 209 | printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! " |
212 | "fixing up to 0x10. (tell your hw vendor)\n", | 210 | "fixing up to 0x10. (tell your hw vendor)\n", |
213 | m->mpc_apicid); | 211 | m->apicid); |
214 | ver = 0x10; | 212 | ver = 0x10; |
215 | } | 213 | } |
216 | apic_version[m->mpc_apicid] = ver; | 214 | apic_version[m->apicid] = ver; |
217 | } | 215 | } |
218 | 216 | ||
219 | static int __init visws_find_smp_config(unsigned int reserve) | 217 | static int __init visws_find_smp_config(unsigned int reserve) |
220 | { | 218 | { |
221 | struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS); | 219 | struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); |
222 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); | 220 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); |
223 | 221 | ||
224 | if (ncpus > CO_CPU_MAX) { | 222 | if (ncpus > CO_CPU_MAX) { |
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 4eeb5cf9720d..d7ac84e7fc1c 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -158,7 +158,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) | |||
158 | ret = KVM86->regs32; | 158 | ret = KVM86->regs32; |
159 | 159 | ||
160 | ret->fs = current->thread.saved_fs; | 160 | ret->fs = current->thread.saved_fs; |
161 | loadsegment(gs, current->thread.saved_gs); | 161 | set_user_gs(ret, current->thread.saved_gs); |
162 | 162 | ||
163 | return ret; | 163 | return ret; |
164 | } | 164 | } |
@@ -197,9 +197,9 @@ out: | |||
197 | static int do_vm86_irq_handling(int subfunction, int irqnumber); | 197 | static int do_vm86_irq_handling(int subfunction, int irqnumber); |
198 | static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); | 198 | static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); |
199 | 199 | ||
200 | asmlinkage int sys_vm86old(struct pt_regs regs) | 200 | int sys_vm86old(struct pt_regs *regs) |
201 | { | 201 | { |
202 | struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.bx; | 202 | struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx; |
203 | struct kernel_vm86_struct info; /* declare this _on top_, | 203 | struct kernel_vm86_struct info; /* declare this _on top_, |
204 | * this avoids wasting of stack space. | 204 | * this avoids wasting of stack space. |
205 | * This remains on the stack until we | 205 | * This remains on the stack until we |
@@ -218,7 +218,7 @@ asmlinkage int sys_vm86old(struct pt_regs regs) | |||
218 | if (tmp) | 218 | if (tmp) |
219 | goto out; | 219 | goto out; |
220 | memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); | 220 | memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); |
221 | info.regs32 = ®s; | 221 | info.regs32 = regs; |
222 | tsk->thread.vm86_info = v86; | 222 | tsk->thread.vm86_info = v86; |
223 | do_sys_vm86(&info, tsk); | 223 | do_sys_vm86(&info, tsk); |
224 | ret = 0; /* we never return here */ | 224 | ret = 0; /* we never return here */ |
@@ -227,7 +227,7 @@ out: | |||
227 | } | 227 | } |
228 | 228 | ||
229 | 229 | ||
230 | asmlinkage int sys_vm86(struct pt_regs regs) | 230 | int sys_vm86(struct pt_regs *regs) |
231 | { | 231 | { |
232 | struct kernel_vm86_struct info; /* declare this _on top_, | 232 | struct kernel_vm86_struct info; /* declare this _on top_, |
233 | * this avoids wasting of stack space. | 233 | * this avoids wasting of stack space. |
@@ -239,12 +239,12 @@ asmlinkage int sys_vm86(struct pt_regs regs) | |||
239 | struct vm86plus_struct __user *v86; | 239 | struct vm86plus_struct __user *v86; |
240 | 240 | ||
241 | tsk = current; | 241 | tsk = current; |
242 | switch (regs.bx) { | 242 | switch (regs->bx) { |
243 | case VM86_REQUEST_IRQ: | 243 | case VM86_REQUEST_IRQ: |
244 | case VM86_FREE_IRQ: | 244 | case VM86_FREE_IRQ: |
245 | case VM86_GET_IRQ_BITS: | 245 | case VM86_GET_IRQ_BITS: |
246 | case VM86_GET_AND_RESET_IRQ: | 246 | case VM86_GET_AND_RESET_IRQ: |
247 | ret = do_vm86_irq_handling(regs.bx, (int)regs.cx); | 247 | ret = do_vm86_irq_handling(regs->bx, (int)regs->cx); |
248 | goto out; | 248 | goto out; |
249 | case VM86_PLUS_INSTALL_CHECK: | 249 | case VM86_PLUS_INSTALL_CHECK: |
250 | /* | 250 | /* |
@@ -261,14 +261,14 @@ asmlinkage int sys_vm86(struct pt_regs regs) | |||
261 | ret = -EPERM; | 261 | ret = -EPERM; |
262 | if (tsk->thread.saved_sp0) | 262 | if (tsk->thread.saved_sp0) |
263 | goto out; | 263 | goto out; |
264 | v86 = (struct vm86plus_struct __user *)regs.cx; | 264 | v86 = (struct vm86plus_struct __user *)regs->cx; |
265 | tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, | 265 | tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, |
266 | offsetof(struct kernel_vm86_struct, regs32) - | 266 | offsetof(struct kernel_vm86_struct, regs32) - |
267 | sizeof(info.regs)); | 267 | sizeof(info.regs)); |
268 | ret = -EFAULT; | 268 | ret = -EFAULT; |
269 | if (tmp) | 269 | if (tmp) |
270 | goto out; | 270 | goto out; |
271 | info.regs32 = ®s; | 271 | info.regs32 = regs; |
272 | info.vm86plus.is_vm86pus = 1; | 272 | info.vm86plus.is_vm86pus = 1; |
273 | tsk->thread.vm86_info = (struct vm86_struct __user *)v86; | 273 | tsk->thread.vm86_info = (struct vm86_struct __user *)v86; |
274 | do_sys_vm86(&info, tsk); | 274 | do_sys_vm86(&info, tsk); |
@@ -323,7 +323,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
323 | info->regs32->ax = 0; | 323 | info->regs32->ax = 0; |
324 | tsk->thread.saved_sp0 = tsk->thread.sp0; | 324 | tsk->thread.saved_sp0 = tsk->thread.sp0; |
325 | tsk->thread.saved_fs = info->regs32->fs; | 325 | tsk->thread.saved_fs = info->regs32->fs; |
326 | savesegment(gs, tsk->thread.saved_gs); | 326 | tsk->thread.saved_gs = get_user_gs(info->regs32); |
327 | 327 | ||
328 | tss = &per_cpu(init_tss, get_cpu()); | 328 | tss = &per_cpu(init_tss, get_cpu()); |
329 | tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; | 329 | tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 8b6c393ab9fd..f052c84ecbe4 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -266,109 +266,6 @@ static void vmi_nop(void) | |||
266 | { | 266 | { |
267 | } | 267 | } |
268 | 268 | ||
269 | #ifdef CONFIG_DEBUG_PAGE_TYPE | ||
270 | |||
271 | #ifdef CONFIG_X86_PAE | ||
272 | #define MAX_BOOT_PTS (2048+4+1) | ||
273 | #else | ||
274 | #define MAX_BOOT_PTS (1024+1) | ||
275 | #endif | ||
276 | |||
277 | /* | ||
278 | * During boot, mem_map is not yet available in paging_init, so stash | ||
279 | * all the boot page allocations here. | ||
280 | */ | ||
281 | static struct { | ||
282 | u32 pfn; | ||
283 | int type; | ||
284 | } boot_page_allocations[MAX_BOOT_PTS]; | ||
285 | static int num_boot_page_allocations; | ||
286 | static int boot_allocations_applied; | ||
287 | |||
288 | void vmi_apply_boot_page_allocations(void) | ||
289 | { | ||
290 | int i; | ||
291 | BUG_ON(!mem_map); | ||
292 | for (i = 0; i < num_boot_page_allocations; i++) { | ||
293 | struct page *page = pfn_to_page(boot_page_allocations[i].pfn); | ||
294 | page->type = boot_page_allocations[i].type; | ||
295 | page->type = boot_page_allocations[i].type & | ||
296 | ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
297 | } | ||
298 | boot_allocations_applied = 1; | ||
299 | } | ||
300 | |||
301 | static void record_page_type(u32 pfn, int type) | ||
302 | { | ||
303 | BUG_ON(num_boot_page_allocations >= MAX_BOOT_PTS); | ||
304 | boot_page_allocations[num_boot_page_allocations].pfn = pfn; | ||
305 | boot_page_allocations[num_boot_page_allocations].type = type; | ||
306 | num_boot_page_allocations++; | ||
307 | } | ||
308 | |||
309 | static void check_zeroed_page(u32 pfn, int type, struct page *page) | ||
310 | { | ||
311 | u32 *ptr; | ||
312 | int i; | ||
313 | int limit = PAGE_SIZE / sizeof(int); | ||
314 | |||
315 | if (page_address(page)) | ||
316 | ptr = (u32 *)page_address(page); | ||
317 | else | ||
318 | ptr = (u32 *)__va(pfn << PAGE_SHIFT); | ||
319 | /* | ||
320 | * When cloning the root in non-PAE mode, only the userspace | ||
321 | * pdes need to be zeroed. | ||
322 | */ | ||
323 | if (type & VMI_PAGE_CLONE) | ||
324 | limit = KERNEL_PGD_BOUNDARY; | ||
325 | for (i = 0; i < limit; i++) | ||
326 | BUG_ON(ptr[i]); | ||
327 | } | ||
328 | |||
329 | /* | ||
330 | * We stash the page type into struct page so we can verify the page | ||
331 | * types are used properly. | ||
332 | */ | ||
333 | static void vmi_set_page_type(u32 pfn, int type) | ||
334 | { | ||
335 | /* PAE can have multiple roots per page - don't track */ | ||
336 | if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP)) | ||
337 | return; | ||
338 | |||
339 | if (boot_allocations_applied) { | ||
340 | struct page *page = pfn_to_page(pfn); | ||
341 | if (type != VMI_PAGE_NORMAL) | ||
342 | BUG_ON(page->type); | ||
343 | else | ||
344 | BUG_ON(page->type == VMI_PAGE_NORMAL); | ||
345 | page->type = type & ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
346 | if (type & VMI_PAGE_ZEROED) | ||
347 | check_zeroed_page(pfn, type, page); | ||
348 | } else { | ||
349 | record_page_type(pfn, type); | ||
350 | } | ||
351 | } | ||
352 | |||
353 | static void vmi_check_page_type(u32 pfn, int type) | ||
354 | { | ||
355 | /* PAE can have multiple roots per page - skip checks */ | ||
356 | if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP)) | ||
357 | return; | ||
358 | |||
359 | type &= ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
360 | if (boot_allocations_applied) { | ||
361 | struct page *page = pfn_to_page(pfn); | ||
362 | BUG_ON((page->type ^ type) & VMI_PAGE_PAE); | ||
363 | BUG_ON(type == VMI_PAGE_NORMAL && page->type); | ||
364 | BUG_ON((type & page->type) == 0); | ||
365 | } | ||
366 | } | ||
367 | #else | ||
368 | #define vmi_set_page_type(p,t) do { } while (0) | ||
369 | #define vmi_check_page_type(p,t) do { } while (0) | ||
370 | #endif | ||
371 | |||
372 | #ifdef CONFIG_HIGHPTE | 269 | #ifdef CONFIG_HIGHPTE |
373 | static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) | 270 | static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) |
374 | { | 271 | { |
@@ -395,7 +292,6 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) | |||
395 | 292 | ||
396 | static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) | 293 | static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) |
397 | { | 294 | { |
398 | vmi_set_page_type(pfn, VMI_PAGE_L1); | ||
399 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); | 295 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); |
400 | } | 296 | } |
401 | 297 | ||
@@ -406,27 +302,32 @@ static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn) | |||
406 | * It is called only for swapper_pg_dir, which already has | 302 | * It is called only for swapper_pg_dir, which already has |
407 | * data on it. | 303 | * data on it. |
408 | */ | 304 | */ |
409 | vmi_set_page_type(pfn, VMI_PAGE_L2); | ||
410 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); | 305 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); |
411 | } | 306 | } |
412 | 307 | ||
413 | static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) | 308 | static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) |
414 | { | 309 | { |
415 | vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); | ||
416 | vmi_check_page_type(clonepfn, VMI_PAGE_L2); | ||
417 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); | 310 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); |
418 | } | 311 | } |
419 | 312 | ||
420 | static void vmi_release_pte(unsigned long pfn) | 313 | static void vmi_release_pte(unsigned long pfn) |
421 | { | 314 | { |
422 | vmi_ops.release_page(pfn, VMI_PAGE_L1); | 315 | vmi_ops.release_page(pfn, VMI_PAGE_L1); |
423 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | ||
424 | } | 316 | } |
425 | 317 | ||
426 | static void vmi_release_pmd(unsigned long pfn) | 318 | static void vmi_release_pmd(unsigned long pfn) |
427 | { | 319 | { |
428 | vmi_ops.release_page(pfn, VMI_PAGE_L2); | 320 | vmi_ops.release_page(pfn, VMI_PAGE_L2); |
429 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | 321 | } |
322 | |||
323 | /* | ||
324 | * We use the pgd_free hook for releasing the pgd page: | ||
325 | */ | ||
326 | static void vmi_pgd_free(struct mm_struct *mm, pgd_t *pgd) | ||
327 | { | ||
328 | unsigned long pfn = __pa(pgd) >> PAGE_SHIFT; | ||
329 | |||
330 | vmi_ops.release_page(pfn, VMI_PAGE_L2); | ||
430 | } | 331 | } |
431 | 332 | ||
432 | /* | 333 | /* |
@@ -450,26 +351,22 @@ static void vmi_release_pmd(unsigned long pfn) | |||
450 | 351 | ||
451 | static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 352 | static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
452 | { | 353 | { |
453 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
454 | vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | 354 | vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); |
455 | } | 355 | } |
456 | 356 | ||
457 | static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 357 | static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
458 | { | 358 | { |
459 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
460 | vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); | 359 | vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); |
461 | } | 360 | } |
462 | 361 | ||
463 | static void vmi_set_pte(pte_t *ptep, pte_t pte) | 362 | static void vmi_set_pte(pte_t *ptep, pte_t pte) |
464 | { | 363 | { |
465 | /* XXX because of set_pmd_pte, this can be called on PT or PD layers */ | 364 | /* XXX because of set_pmd_pte, this can be called on PT or PD layers */ |
466 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE | VMI_PAGE_PD); | ||
467 | vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); | 365 | vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); |
468 | } | 366 | } |
469 | 367 | ||
470 | static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | 368 | static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) |
471 | { | 369 | { |
472 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
473 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | 370 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); |
474 | } | 371 | } |
475 | 372 | ||
@@ -477,10 +374,8 @@ static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval) | |||
477 | { | 374 | { |
478 | #ifdef CONFIG_X86_PAE | 375 | #ifdef CONFIG_X86_PAE |
479 | const pte_t pte = { .pte = pmdval.pmd }; | 376 | const pte_t pte = { .pte = pmdval.pmd }; |
480 | vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD); | ||
481 | #else | 377 | #else |
482 | const pte_t pte = { pmdval.pud.pgd.pgd }; | 378 | const pte_t pte = { pmdval.pud.pgd.pgd }; |
483 | vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PGD); | ||
484 | #endif | 379 | #endif |
485 | vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD); | 380 | vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD); |
486 | } | 381 | } |
@@ -502,7 +397,6 @@ static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval) | |||
502 | 397 | ||
503 | static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | 398 | static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) |
504 | { | 399 | { |
505 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
506 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1)); | 400 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1)); |
507 | } | 401 | } |
508 | 402 | ||
@@ -510,21 +404,18 @@ static void vmi_set_pud(pud_t *pudp, pud_t pudval) | |||
510 | { | 404 | { |
511 | /* Um, eww */ | 405 | /* Um, eww */ |
512 | const pte_t pte = { .pte = pudval.pgd.pgd }; | 406 | const pte_t pte = { .pte = pudval.pgd.pgd }; |
513 | vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD); | ||
514 | vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); | 407 | vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); |
515 | } | 408 | } |
516 | 409 | ||
517 | static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 410 | static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
518 | { | 411 | { |
519 | const pte_t pte = { .pte = 0 }; | 412 | const pte_t pte = { .pte = 0 }; |
520 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
521 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | 413 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); |
522 | } | 414 | } |
523 | 415 | ||
524 | static void vmi_pmd_clear(pmd_t *pmd) | 416 | static void vmi_pmd_clear(pmd_t *pmd) |
525 | { | 417 | { |
526 | const pte_t pte = { .pte = 0 }; | 418 | const pte_t pte = { .pte = 0 }; |
527 | vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD); | ||
528 | vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); | 419 | vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); |
529 | } | 420 | } |
530 | #endif | 421 | #endif |
@@ -789,10 +680,11 @@ static inline int __init activate_vmi(void) | |||
789 | para_fill(pv_mmu_ops.write_cr2, SetCR2); | 680 | para_fill(pv_mmu_ops.write_cr2, SetCR2); |
790 | para_fill(pv_mmu_ops.write_cr3, SetCR3); | 681 | para_fill(pv_mmu_ops.write_cr3, SetCR3); |
791 | para_fill(pv_cpu_ops.write_cr4, SetCR4); | 682 | para_fill(pv_cpu_ops.write_cr4, SetCR4); |
792 | para_fill(pv_irq_ops.save_fl, GetInterruptMask); | 683 | |
793 | para_fill(pv_irq_ops.restore_fl, SetInterruptMask); | 684 | para_fill(pv_irq_ops.save_fl.func, GetInterruptMask); |
794 | para_fill(pv_irq_ops.irq_disable, DisableInterrupts); | 685 | para_fill(pv_irq_ops.restore_fl.func, SetInterruptMask); |
795 | para_fill(pv_irq_ops.irq_enable, EnableInterrupts); | 686 | para_fill(pv_irq_ops.irq_disable.func, DisableInterrupts); |
687 | para_fill(pv_irq_ops.irq_enable.func, EnableInterrupts); | ||
796 | 688 | ||
797 | para_fill(pv_cpu_ops.wbinvd, WBINVD); | 689 | para_fill(pv_cpu_ops.wbinvd, WBINVD); |
798 | para_fill(pv_cpu_ops.read_tsc, RDTSC); | 690 | para_fill(pv_cpu_ops.read_tsc, RDTSC); |
@@ -881,6 +773,7 @@ static inline int __init activate_vmi(void) | |||
881 | if (vmi_ops.release_page) { | 773 | if (vmi_ops.release_page) { |
882 | pv_mmu_ops.release_pte = vmi_release_pte; | 774 | pv_mmu_ops.release_pte = vmi_release_pte; |
883 | pv_mmu_ops.release_pmd = vmi_release_pmd; | 775 | pv_mmu_ops.release_pmd = vmi_release_pmd; |
776 | pv_mmu_ops.pgd_free = vmi_pgd_free; | ||
884 | } | 777 | } |
885 | 778 | ||
886 | /* Set linear is needed in all cases */ | 779 | /* Set linear is needed in all cases */ |
@@ -960,8 +853,6 @@ static inline int __init activate_vmi(void) | |||
960 | 853 | ||
961 | void __init vmi_init(void) | 854 | void __init vmi_init(void) |
962 | { | 855 | { |
963 | unsigned long flags; | ||
964 | |||
965 | if (!vmi_rom) | 856 | if (!vmi_rom) |
966 | probe_vmi_rom(); | 857 | probe_vmi_rom(); |
967 | else | 858 | else |
@@ -973,13 +864,21 @@ void __init vmi_init(void) | |||
973 | 864 | ||
974 | reserve_top_address(-vmi_rom->virtual_top); | 865 | reserve_top_address(-vmi_rom->virtual_top); |
975 | 866 | ||
976 | local_irq_save(flags); | ||
977 | activate_vmi(); | ||
978 | |||
979 | #ifdef CONFIG_X86_IO_APIC | 867 | #ifdef CONFIG_X86_IO_APIC |
980 | /* This is virtual hardware; timer routing is wired correctly */ | 868 | /* This is virtual hardware; timer routing is wired correctly */ |
981 | no_timer_check = 1; | 869 | no_timer_check = 1; |
982 | #endif | 870 | #endif |
871 | } | ||
872 | |||
873 | void __init vmi_activate(void) | ||
874 | { | ||
875 | unsigned long flags; | ||
876 | |||
877 | if (!vmi_rom) | ||
878 | return; | ||
879 | |||
880 | local_irq_save(flags); | ||
881 | activate_vmi(); | ||
983 | local_irq_restore(flags & X86_EFLAGS_IF); | 882 | local_irq_restore(flags & X86_EFLAGS_IF); |
984 | } | 883 | } |
985 | 884 | ||
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 254ee07f8635..a4791ef412d1 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c | |||
@@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void) | |||
226 | /* Upper bound is clockevent's use of ulong for cycle deltas. */ | 226 | /* Upper bound is clockevent's use of ulong for cycle deltas. */ |
227 | evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt); | 227 | evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt); |
228 | evt->min_delta_ns = clockevent_delta2ns(1, evt); | 228 | evt->min_delta_ns = clockevent_delta2ns(1, evt); |
229 | evt->cpumask = cpumask_of_cpu(cpu); | 229 | evt->cpumask = cpumask_of(cpu); |
230 | 230 | ||
231 | printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", | 231 | printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", |
232 | evt->name, evt->mult, evt->shift); | 232 | evt->name, evt->mult, evt->shift); |
@@ -256,7 +256,7 @@ void __devinit vmi_time_bsp_init(void) | |||
256 | */ | 256 | */ |
257 | clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); | 257 | clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); |
258 | local_irq_disable(); | 258 | local_irq_disable(); |
259 | #ifdef CONFIG_X86_SMP | 259 | #ifdef CONFIG_SMP |
260 | /* | 260 | /* |
261 | * XXX handle_percpu_irq only defined for SMP; we need to switch over | 261 | * XXX handle_percpu_irq only defined for SMP; we need to switch over |
262 | * to using it, since this is a local interrupt, which each CPU must | 262 | * to using it, since this is a local interrupt, which each CPU must |
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index a9b8560adbc2..3eba7f7bac05 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S | |||
@@ -44,6 +44,7 @@ SECTIONS | |||
44 | SCHED_TEXT | 44 | SCHED_TEXT |
45 | LOCK_TEXT | 45 | LOCK_TEXT |
46 | KPROBES_TEXT | 46 | KPROBES_TEXT |
47 | IRQENTRY_TEXT | ||
47 | *(.fixup) | 48 | *(.fixup) |
48 | *(.gnu.warning) | 49 | *(.gnu.warning) |
49 | _etext = .; /* End of text section */ | 50 | _etext = .; /* End of text section */ |
@@ -177,14 +178,7 @@ SECTIONS | |||
177 | __initramfs_end = .; | 178 | __initramfs_end = .; |
178 | } | 179 | } |
179 | #endif | 180 | #endif |
180 | . = ALIGN(PAGE_SIZE); | 181 | PERCPU(PAGE_SIZE) |
181 | .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { | ||
182 | __per_cpu_start = .; | ||
183 | *(.data.percpu.page_aligned) | ||
184 | *(.data.percpu) | ||
185 | *(.data.percpu.shared_aligned) | ||
186 | __per_cpu_end = .; | ||
187 | } | ||
188 | . = ALIGN(PAGE_SIZE); | 182 | . = ALIGN(PAGE_SIZE); |
189 | /* freed after init ends here */ | 183 | /* freed after init ends here */ |
190 | 184 | ||
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 46e05447405b..087a7f2c639b 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S | |||
@@ -5,6 +5,7 @@ | |||
5 | #define LOAD_OFFSET __START_KERNEL_map | 5 | #define LOAD_OFFSET __START_KERNEL_map |
6 | 6 | ||
7 | #include <asm-generic/vmlinux.lds.h> | 7 | #include <asm-generic/vmlinux.lds.h> |
8 | #include <asm/asm-offsets.h> | ||
8 | #include <asm/page.h> | 9 | #include <asm/page.h> |
9 | 10 | ||
10 | #undef i386 /* in case the preprocessor is a 32bit one */ | 11 | #undef i386 /* in case the preprocessor is a 32bit one */ |
@@ -13,12 +14,15 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") | |||
13 | OUTPUT_ARCH(i386:x86-64) | 14 | OUTPUT_ARCH(i386:x86-64) |
14 | ENTRY(phys_startup_64) | 15 | ENTRY(phys_startup_64) |
15 | jiffies_64 = jiffies; | 16 | jiffies_64 = jiffies; |
16 | _proxy_pda = 1; | ||
17 | PHDRS { | 17 | PHDRS { |
18 | text PT_LOAD FLAGS(5); /* R_E */ | 18 | text PT_LOAD FLAGS(5); /* R_E */ |
19 | data PT_LOAD FLAGS(7); /* RWE */ | 19 | data PT_LOAD FLAGS(7); /* RWE */ |
20 | user PT_LOAD FLAGS(7); /* RWE */ | 20 | user PT_LOAD FLAGS(7); /* RWE */ |
21 | data.init PT_LOAD FLAGS(7); /* RWE */ | 21 | data.init PT_LOAD FLAGS(7); /* RWE */ |
22 | #ifdef CONFIG_SMP | ||
23 | percpu PT_LOAD FLAGS(7); /* RWE */ | ||
24 | #endif | ||
25 | data.init2 PT_LOAD FLAGS(7); /* RWE */ | ||
22 | note PT_NOTE FLAGS(0); /* ___ */ | 26 | note PT_NOTE FLAGS(0); /* ___ */ |
23 | } | 27 | } |
24 | SECTIONS | 28 | SECTIONS |
@@ -35,6 +39,7 @@ SECTIONS | |||
35 | SCHED_TEXT | 39 | SCHED_TEXT |
36 | LOCK_TEXT | 40 | LOCK_TEXT |
37 | KPROBES_TEXT | 41 | KPROBES_TEXT |
42 | IRQENTRY_TEXT | ||
38 | *(.fixup) | 43 | *(.fixup) |
39 | *(.gnu.warning) | 44 | *(.gnu.warning) |
40 | _etext = .; /* End of text section */ | 45 | _etext = .; /* End of text section */ |
@@ -207,14 +212,28 @@ SECTIONS | |||
207 | __initramfs_end = .; | 212 | __initramfs_end = .; |
208 | #endif | 213 | #endif |
209 | 214 | ||
215 | #ifdef CONFIG_SMP | ||
216 | /* | ||
217 | * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the | ||
218 | * output PHDR, so the next output section - __data_nosave - should | ||
219 | * start another section data.init2. Also, pda should be at the head of | ||
220 | * percpu area. Preallocate it and define the percpu offset symbol | ||
221 | * so that it can be accessed as a percpu variable. | ||
222 | */ | ||
223 | . = ALIGN(PAGE_SIZE); | ||
224 | PERCPU_VADDR(0, :percpu) | ||
225 | #else | ||
210 | PERCPU(PAGE_SIZE) | 226 | PERCPU(PAGE_SIZE) |
227 | #endif | ||
211 | 228 | ||
212 | . = ALIGN(PAGE_SIZE); | 229 | . = ALIGN(PAGE_SIZE); |
213 | __init_end = .; | 230 | __init_end = .; |
214 | 231 | ||
215 | . = ALIGN(PAGE_SIZE); | 232 | . = ALIGN(PAGE_SIZE); |
216 | __nosave_begin = .; | 233 | __nosave_begin = .; |
217 | .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } | 234 | .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { |
235 | *(.data.nosave) | ||
236 | } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */ | ||
218 | . = ALIGN(PAGE_SIZE); | 237 | . = ALIGN(PAGE_SIZE); |
219 | __nosave_end = .; | 238 | __nosave_end = .; |
220 | 239 | ||
@@ -238,8 +257,21 @@ SECTIONS | |||
238 | DWARF_DEBUG | 257 | DWARF_DEBUG |
239 | } | 258 | } |
240 | 259 | ||
260 | /* | ||
261 | * Per-cpu symbols which need to be offset from __per_cpu_load | ||
262 | * for the boot processor. | ||
263 | */ | ||
264 | #define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load | ||
265 | INIT_PER_CPU(gdt_page); | ||
266 | INIT_PER_CPU(irq_stack_union); | ||
267 | |||
241 | /* | 268 | /* |
242 | * Build-time check on the image size: | 269 | * Build-time check on the image size: |
243 | */ | 270 | */ |
244 | ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), | 271 | ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), |
245 | "kernel image bigger than KERNEL_IMAGE_SIZE") | 272 | "kernel image bigger than KERNEL_IMAGE_SIZE") |
273 | |||
274 | #ifdef CONFIG_SMP | ||
275 | ASSERT((per_cpu__irq_stack_union == 0), | ||
276 | "irq_stack_union is not at start of per-cpu area"); | ||
277 | #endif | ||
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index a688f3bfaec2..c609205df594 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c | |||
@@ -37,6 +37,7 @@ static unsigned long vsmp_save_fl(void) | |||
37 | flags &= ~X86_EFLAGS_IF; | 37 | flags &= ~X86_EFLAGS_IF; |
38 | return flags; | 38 | return flags; |
39 | } | 39 | } |
40 | PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl); | ||
40 | 41 | ||
41 | static void vsmp_restore_fl(unsigned long flags) | 42 | static void vsmp_restore_fl(unsigned long flags) |
42 | { | 43 | { |
@@ -46,6 +47,7 @@ static void vsmp_restore_fl(unsigned long flags) | |||
46 | flags |= X86_EFLAGS_AC; | 47 | flags |= X86_EFLAGS_AC; |
47 | native_restore_fl(flags); | 48 | native_restore_fl(flags); |
48 | } | 49 | } |
50 | PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl); | ||
49 | 51 | ||
50 | static void vsmp_irq_disable(void) | 52 | static void vsmp_irq_disable(void) |
51 | { | 53 | { |
@@ -53,6 +55,7 @@ static void vsmp_irq_disable(void) | |||
53 | 55 | ||
54 | native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); | 56 | native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); |
55 | } | 57 | } |
58 | PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable); | ||
56 | 59 | ||
57 | static void vsmp_irq_enable(void) | 60 | static void vsmp_irq_enable(void) |
58 | { | 61 | { |
@@ -60,6 +63,7 @@ static void vsmp_irq_enable(void) | |||
60 | 63 | ||
61 | native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); | 64 | native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); |
62 | } | 65 | } |
66 | PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable); | ||
63 | 67 | ||
64 | static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, | 68 | static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, |
65 | unsigned long addr, unsigned len) | 69 | unsigned long addr, unsigned len) |
@@ -90,10 +94,10 @@ static void __init set_vsmp_pv_ops(void) | |||
90 | cap, ctl); | 94 | cap, ctl); |
91 | if (cap & ctl & (1 << 4)) { | 95 | if (cap & ctl & (1 << 4)) { |
92 | /* Setup irq ops and turn on vSMP IRQ fastpath handling */ | 96 | /* Setup irq ops and turn on vSMP IRQ fastpath handling */ |
93 | pv_irq_ops.irq_disable = vsmp_irq_disable; | 97 | pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); |
94 | pv_irq_ops.irq_enable = vsmp_irq_enable; | 98 | pv_irq_ops.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable); |
95 | pv_irq_ops.save_fl = vsmp_save_fl; | 99 | pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl); |
96 | pv_irq_ops.restore_fl = vsmp_restore_fl; | 100 | pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl); |
97 | pv_init_ops.patch = vsmp_patch; | 101 | pv_init_ops.patch = vsmp_patch; |
98 | 102 | ||
99 | ctl &= ~(1 << 4); | 103 | ctl &= ~(1 << 4); |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 0b8b6690a86d..44153afc9067 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -17,6 +17,9 @@ | |||
17 | * want per guest time just set the kernel.vsyscall64 sysctl to 0. | 17 | * want per guest time just set the kernel.vsyscall64 sysctl to 0. |
18 | */ | 18 | */ |
19 | 19 | ||
20 | /* Disable profiling for userspace code: */ | ||
21 | #define DISABLE_BRANCH_PROFILING | ||
22 | |||
20 | #include <linux/time.h> | 23 | #include <linux/time.h> |
21 | #include <linux/init.h> | 24 | #include <linux/init.h> |
22 | #include <linux/kernel.h> | 25 | #include <linux/kernel.h> |
@@ -128,7 +131,16 @@ static __always_inline void do_vgettimeofday(struct timeval * tv) | |||
128 | gettimeofday(tv,NULL); | 131 | gettimeofday(tv,NULL); |
129 | return; | 132 | return; |
130 | } | 133 | } |
134 | |||
135 | /* | ||
136 | * Surround the RDTSC by barriers, to make sure it's not | ||
137 | * speculated to outside the seqlock critical section and | ||
138 | * does not cause time warps: | ||
139 | */ | ||
140 | rdtsc_barrier(); | ||
131 | now = vread(); | 141 | now = vread(); |
142 | rdtsc_barrier(); | ||
143 | |||
132 | base = __vsyscall_gtod_data.clock.cycle_last; | 144 | base = __vsyscall_gtod_data.clock.cycle_last; |
133 | mask = __vsyscall_gtod_data.clock.mask; | 145 | mask = __vsyscall_gtod_data.clock.mask; |
134 | mult = __vsyscall_gtod_data.clock.mult; | 146 | mult = __vsyscall_gtod_data.clock.mult; |
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 695e426aa354..3909e3ba5ce3 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
@@ -58,5 +58,3 @@ EXPORT_SYMBOL(__memcpy); | |||
58 | EXPORT_SYMBOL(empty_zero_page); | 58 | EXPORT_SYMBOL(empty_zero_page); |
59 | EXPORT_SYMBOL(init_level4_pgt); | 59 | EXPORT_SYMBOL(init_level4_pgt); |
60 | EXPORT_SYMBOL(load_gs_index); | 60 | EXPORT_SYMBOL(load_gs_index); |
61 | |||
62 | EXPORT_SYMBOL(_proxy_pda); | ||
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index b13acb75e822..2b54fe002e94 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -159,7 +159,7 @@ int save_i387_xstate(void __user *buf) | |||
159 | * Restore the extended state if present. Otherwise, restore the FP/SSE | 159 | * Restore the extended state if present. Otherwise, restore the FP/SSE |
160 | * state. | 160 | * state. |
161 | */ | 161 | */ |
162 | int restore_user_xstate(void __user *buf) | 162 | static int restore_user_xstate(void __user *buf) |
163 | { | 163 | { |
164 | struct _fpx_sw_bytes fx_sw_user; | 164 | struct _fpx_sw_bytes fx_sw_user; |
165 | u64 mask; | 165 | u64 mask; |
@@ -310,7 +310,7 @@ static void __init setup_xstate_init(void) | |||
310 | /* | 310 | /* |
311 | * Enable and initialize the xsave feature. | 311 | * Enable and initialize the xsave feature. |
312 | */ | 312 | */ |
313 | void __init xsave_cntxt_init(void) | 313 | void __ref xsave_cntxt_init(void) |
314 | { | 314 | { |
315 | unsigned int eax, ebx, ecx, edx; | 315 | unsigned int eax, ebx, ecx, edx; |
316 | 316 | ||