diff options
Diffstat (limited to 'arch/x86_64/kernel')
42 files changed, 1950 insertions, 2005 deletions
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index b5aaeafc1cd3..3c7cbff04d3d 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile | |||
@@ -11,7 +11,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \ | |||
11 | pci-dma.o pci-nommu.o alternative.o | 11 | pci-dma.o pci-nommu.o alternative.o |
12 | 12 | ||
13 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 13 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
14 | obj-$(CONFIG_X86_MCE) += mce.o | 14 | obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o |
15 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o | 15 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o |
16 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o | 16 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o |
17 | obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ | 17 | obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ |
@@ -20,8 +20,8 @@ obj-$(CONFIG_X86_MSR) += msr.o | |||
20 | obj-$(CONFIG_MICROCODE) += microcode.o | 20 | obj-$(CONFIG_MICROCODE) += microcode.o |
21 | obj-$(CONFIG_X86_CPUID) += cpuid.o | 21 | obj-$(CONFIG_X86_CPUID) += cpuid.o |
22 | obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o | 22 | obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o |
23 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o | 23 | obj-y += apic.o nmi.o |
24 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o \ | 24 | obj-y += io_apic.o mpparse.o \ |
25 | genapic.o genapic_cluster.o genapic_flat.o | 25 | genapic.o genapic_cluster.o genapic_flat.o |
26 | obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o | 26 | obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o |
27 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o | 27 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o |
@@ -39,12 +39,14 @@ obj-$(CONFIG_K8_NB) += k8.o | |||
39 | obj-$(CONFIG_AUDIT) += audit.o | 39 | obj-$(CONFIG_AUDIT) += audit.o |
40 | 40 | ||
41 | obj-$(CONFIG_MODULES) += module.o | 41 | obj-$(CONFIG_MODULES) += module.o |
42 | obj-$(CONFIG_PCI) += early-quirks.o | ||
42 | 43 | ||
43 | obj-y += topology.o | 44 | obj-y += topology.o |
44 | obj-y += intel_cacheinfo.o | 45 | obj-y += intel_cacheinfo.o |
45 | 46 | ||
46 | CFLAGS_vsyscall.o := $(PROFILING) -g0 | 47 | CFLAGS_vsyscall.o := $(PROFILING) -g0 |
47 | 48 | ||
49 | therm_throt-y += ../../i386/kernel/cpu/mcheck/therm_throt.o | ||
48 | bootflag-y += ../../i386/kernel/bootflag.o | 50 | bootflag-y += ../../i386/kernel/bootflag.o |
49 | cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o | 51 | cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o |
50 | topology-y += ../../i386/kernel/topology.o | 52 | topology-y += ../../i386/kernel/topology.o |
@@ -54,4 +56,3 @@ quirks-y += ../../i386/kernel/quirks.o | |||
54 | i8237-y += ../../i386/kernel/i8237.o | 56 | i8237-y += ../../i386/kernel/i8237.o |
55 | msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o | 57 | msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o |
56 | alternative-y += ../../i386/kernel/alternative.o | 58 | alternative-y += ../../i386/kernel/alternative.o |
57 | |||
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index 58af8e73738b..b487396c4c5b 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/pci_ids.h> | 17 | #include <linux/pci_ids.h> |
18 | #include <linux/pci.h> | 18 | #include <linux/pci.h> |
19 | #include <linux/bitops.h> | 19 | #include <linux/bitops.h> |
20 | #include <linux/ioport.h> | ||
20 | #include <asm/e820.h> | 21 | #include <asm/e820.h> |
21 | #include <asm/io.h> | 22 | #include <asm/io.h> |
22 | #include <asm/proto.h> | 23 | #include <asm/proto.h> |
@@ -33,6 +34,18 @@ int fallback_aper_force __initdata = 0; | |||
33 | 34 | ||
34 | int fix_aperture __initdata = 1; | 35 | int fix_aperture __initdata = 1; |
35 | 36 | ||
37 | static struct resource gart_resource = { | ||
38 | .name = "GART", | ||
39 | .flags = IORESOURCE_MEM, | ||
40 | }; | ||
41 | |||
42 | static void __init insert_aperture_resource(u32 aper_base, u32 aper_size) | ||
43 | { | ||
44 | gart_resource.start = aper_base; | ||
45 | gart_resource.end = aper_base + aper_size - 1; | ||
46 | insert_resource(&iomem_resource, &gart_resource); | ||
47 | } | ||
48 | |||
36 | /* This code runs before the PCI subsystem is initialized, so just | 49 | /* This code runs before the PCI subsystem is initialized, so just |
37 | access the northbridge directly. */ | 50 | access the northbridge directly. */ |
38 | 51 | ||
@@ -48,7 +61,7 @@ static u32 __init allocate_aperture(void) | |||
48 | 61 | ||
49 | /* | 62 | /* |
50 | * Aperture has to be naturally aligned. This means an 2GB aperture won't | 63 | * Aperture has to be naturally aligned. This means an 2GB aperture won't |
51 | * have much chances to find a place in the lower 4GB of memory. | 64 | * have much chance of finding a place in the lower 4GB of memory. |
52 | * Unfortunately we cannot move it up because that would make the | 65 | * Unfortunately we cannot move it up because that would make the |
53 | * IOMMU useless. | 66 | * IOMMU useless. |
54 | */ | 67 | */ |
@@ -62,6 +75,7 @@ static u32 __init allocate_aperture(void) | |||
62 | } | 75 | } |
63 | printk("Mapping aperture over %d KB of RAM @ %lx\n", | 76 | printk("Mapping aperture over %d KB of RAM @ %lx\n", |
64 | aper_size >> 10, __pa(p)); | 77 | aper_size >> 10, __pa(p)); |
78 | insert_aperture_resource((u32)__pa(p), aper_size); | ||
65 | return (u32)__pa(p); | 79 | return (u32)__pa(p); |
66 | } | 80 | } |
67 | 81 | ||
@@ -198,7 +212,7 @@ void __init iommu_hole_init(void) | |||
198 | u64 aper_base, last_aper_base = 0; | 212 | u64 aper_base, last_aper_base = 0; |
199 | int valid_agp = 0; | 213 | int valid_agp = 0; |
200 | 214 | ||
201 | if (iommu_aperture_disabled || !fix_aperture) | 215 | if (iommu_aperture_disabled || !fix_aperture || !early_pci_allowed()) |
202 | return; | 216 | return; |
203 | 217 | ||
204 | printk("Checking aperture...\n"); | 218 | printk("Checking aperture...\n"); |
@@ -233,8 +247,13 @@ void __init iommu_hole_init(void) | |||
233 | last_aper_base = aper_base; | 247 | last_aper_base = aper_base; |
234 | } | 248 | } |
235 | 249 | ||
236 | if (!fix && !fallback_aper_force) | 250 | if (!fix && !fallback_aper_force) { |
251 | if (last_aper_base) { | ||
252 | unsigned long n = (32 * 1024 * 1024) << last_aper_order; | ||
253 | insert_aperture_resource((u32)last_aper_base, n); | ||
254 | } | ||
237 | return; | 255 | return; |
256 | } | ||
238 | 257 | ||
239 | if (!fallback_aper_force) | 258 | if (!fallback_aper_force) |
240 | aper_alloc = search_agp_bridge(&aper_order, &valid_agp); | 259 | aper_alloc = search_agp_bridge(&aper_order, &valid_agp); |
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 2b8cef037a65..135ff25e6b44 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/kernel_stat.h> | 25 | #include <linux/kernel_stat.h> |
26 | #include <linux/sysdev.h> | 26 | #include <linux/sysdev.h> |
27 | #include <linux/module.h> | 27 | #include <linux/module.h> |
28 | #include <linux/ioport.h> | ||
28 | 29 | ||
29 | #include <asm/atomic.h> | 30 | #include <asm/atomic.h> |
30 | #include <asm/smp.h> | 31 | #include <asm/smp.h> |
@@ -36,13 +37,20 @@ | |||
36 | #include <asm/idle.h> | 37 | #include <asm/idle.h> |
37 | #include <asm/proto.h> | 38 | #include <asm/proto.h> |
38 | #include <asm/timex.h> | 39 | #include <asm/timex.h> |
40 | #include <asm/apic.h> | ||
39 | 41 | ||
42 | int apic_mapped; | ||
40 | int apic_verbosity; | 43 | int apic_verbosity; |
41 | int apic_runs_main_timer; | 44 | int apic_runs_main_timer; |
42 | int apic_calibrate_pmtmr __initdata; | 45 | int apic_calibrate_pmtmr __initdata; |
43 | 46 | ||
44 | int disable_apic_timer __initdata; | 47 | int disable_apic_timer __initdata; |
45 | 48 | ||
49 | static struct resource lapic_resource = { | ||
50 | .name = "Local APIC", | ||
51 | .flags = IORESOURCE_MEM | IORESOURCE_BUSY, | ||
52 | }; | ||
53 | |||
46 | /* | 54 | /* |
47 | * cpu_mask that denotes the CPUs that needs timer interrupt coming in as | 55 | * cpu_mask that denotes the CPUs that needs timer interrupt coming in as |
48 | * IPIs in place of local APIC timers | 56 | * IPIs in place of local APIC timers |
@@ -136,72 +144,40 @@ void clear_local_APIC(void) | |||
136 | apic_read(APIC_ESR); | 144 | apic_read(APIC_ESR); |
137 | } | 145 | } |
138 | 146 | ||
139 | void __init connect_bsp_APIC(void) | ||
140 | { | ||
141 | if (pic_mode) { | ||
142 | /* | ||
143 | * Do not trust the local APIC being empty at bootup. | ||
144 | */ | ||
145 | clear_local_APIC(); | ||
146 | /* | ||
147 | * PIC mode, enable APIC mode in the IMCR, i.e. | ||
148 | * connect BSP's local APIC to INT and NMI lines. | ||
149 | */ | ||
150 | apic_printk(APIC_VERBOSE, "leaving PIC mode, enabling APIC mode.\n"); | ||
151 | outb(0x70, 0x22); | ||
152 | outb(0x01, 0x23); | ||
153 | } | ||
154 | } | ||
155 | |||
156 | void disconnect_bsp_APIC(int virt_wire_setup) | 147 | void disconnect_bsp_APIC(int virt_wire_setup) |
157 | { | 148 | { |
158 | if (pic_mode) { | 149 | /* Go back to Virtual Wire compatibility mode */ |
159 | /* | 150 | unsigned long value; |
160 | * Put the board back into PIC mode (has an effect | 151 | |
161 | * only on certain older boards). Note that APIC | 152 | /* For the spurious interrupt use vector F, and enable it */ |
162 | * interrupts, including IPIs, won't work beyond | 153 | value = apic_read(APIC_SPIV); |
163 | * this point! The only exception are INIT IPIs. | 154 | value &= ~APIC_VECTOR_MASK; |
164 | */ | 155 | value |= APIC_SPIV_APIC_ENABLED; |
165 | apic_printk(APIC_QUIET, "disabling APIC mode, entering PIC mode.\n"); | 156 | value |= 0xf; |
166 | outb(0x70, 0x22); | 157 | apic_write(APIC_SPIV, value); |
167 | outb(0x00, 0x23); | ||
168 | } | ||
169 | else { | ||
170 | /* Go back to Virtual Wire compatibility mode */ | ||
171 | unsigned long value; | ||
172 | |||
173 | /* For the spurious interrupt use vector F, and enable it */ | ||
174 | value = apic_read(APIC_SPIV); | ||
175 | value &= ~APIC_VECTOR_MASK; | ||
176 | value |= APIC_SPIV_APIC_ENABLED; | ||
177 | value |= 0xf; | ||
178 | apic_write(APIC_SPIV, value); | ||
179 | |||
180 | if (!virt_wire_setup) { | ||
181 | /* For LVT0 make it edge triggered, active high, external and enabled */ | ||
182 | value = apic_read(APIC_LVT0); | ||
183 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | | ||
184 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | ||
185 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); | ||
186 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | ||
187 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); | ||
188 | apic_write(APIC_LVT0, value); | ||
189 | } | ||
190 | else { | ||
191 | /* Disable LVT0 */ | ||
192 | apic_write(APIC_LVT0, APIC_LVT_MASKED); | ||
193 | } | ||
194 | 158 | ||
195 | /* For LVT1 make it edge triggered, active high, nmi and enabled */ | 159 | if (!virt_wire_setup) { |
196 | value = apic_read(APIC_LVT1); | 160 | /* For LVT0 make it edge triggered, active high, external and enabled */ |
197 | value &= ~( | 161 | value = apic_read(APIC_LVT0); |
198 | APIC_MODE_MASK | APIC_SEND_PENDING | | 162 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | |
199 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | 163 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | |
200 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | 164 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); |
201 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | 165 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; |
202 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); | 166 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); |
203 | apic_write(APIC_LVT1, value); | 167 | apic_write(APIC_LVT0, value); |
168 | } else { | ||
169 | /* Disable LVT0 */ | ||
170 | apic_write(APIC_LVT0, APIC_LVT_MASKED); | ||
204 | } | 171 | } |
172 | |||
173 | /* For LVT1 make it edge triggered, active high, nmi and enabled */ | ||
174 | value = apic_read(APIC_LVT1); | ||
175 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | | ||
176 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | ||
177 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | ||
178 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | ||
179 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); | ||
180 | apic_write(APIC_LVT1, value); | ||
205 | } | 181 | } |
206 | 182 | ||
207 | void disable_local_APIC(void) | 183 | void disable_local_APIC(void) |
@@ -297,8 +273,6 @@ void __init sync_Arb_IDs(void) | |||
297 | | APIC_DM_INIT); | 273 | | APIC_DM_INIT); |
298 | } | 274 | } |
299 | 275 | ||
300 | extern void __error_in_apic_c (void); | ||
301 | |||
302 | /* | 276 | /* |
303 | * An initial setup of the virtual wire mode. | 277 | * An initial setup of the virtual wire mode. |
304 | */ | 278 | */ |
@@ -345,8 +319,7 @@ void __cpuinit setup_local_APIC (void) | |||
345 | 319 | ||
346 | value = apic_read(APIC_LVR); | 320 | value = apic_read(APIC_LVR); |
347 | 321 | ||
348 | if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f) | 322 | BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f); |
349 | __error_in_apic_c(); | ||
350 | 323 | ||
351 | /* | 324 | /* |
352 | * Double-check whether this APIC is really registered. | 325 | * Double-check whether this APIC is really registered. |
@@ -399,32 +372,8 @@ void __cpuinit setup_local_APIC (void) | |||
399 | */ | 372 | */ |
400 | value |= APIC_SPIV_APIC_ENABLED; | 373 | value |= APIC_SPIV_APIC_ENABLED; |
401 | 374 | ||
402 | /* | 375 | /* We always use processor focus */ |
403 | * Some unknown Intel IO/APIC (or APIC) errata is biting us with | 376 | |
404 | * certain networking cards. If high frequency interrupts are | ||
405 | * happening on a particular IOAPIC pin, plus the IOAPIC routing | ||
406 | * entry is masked/unmasked at a high rate as well then sooner or | ||
407 | * later IOAPIC line gets 'stuck', no more interrupts are received | ||
408 | * from the device. If focus CPU is disabled then the hang goes | ||
409 | * away, oh well :-( | ||
410 | * | ||
411 | * [ This bug can be reproduced easily with a level-triggered | ||
412 | * PCI Ne2000 networking cards and PII/PIII processors, dual | ||
413 | * BX chipset. ] | ||
414 | */ | ||
415 | /* | ||
416 | * Actually disabling the focus CPU check just makes the hang less | ||
417 | * frequent as it makes the interrupt distributon model be more | ||
418 | * like LRU than MRU (the short-term load is more even across CPUs). | ||
419 | * See also the comment in end_level_ioapic_irq(). --macro | ||
420 | */ | ||
421 | #if 1 | ||
422 | /* Enable focus processor (bit==0) */ | ||
423 | value &= ~APIC_SPIV_FOCUS_DISABLED; | ||
424 | #else | ||
425 | /* Disable focus processor (bit==1) */ | ||
426 | value |= APIC_SPIV_FOCUS_DISABLED; | ||
427 | #endif | ||
428 | /* | 377 | /* |
429 | * Set spurious IRQ vector | 378 | * Set spurious IRQ vector |
430 | */ | 379 | */ |
@@ -442,7 +391,7 @@ void __cpuinit setup_local_APIC (void) | |||
442 | * TODO: set up through-local-APIC from through-I/O-APIC? --macro | 391 | * TODO: set up through-local-APIC from through-I/O-APIC? --macro |
443 | */ | 392 | */ |
444 | value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; | 393 | value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; |
445 | if (!smp_processor_id() && (pic_mode || !value)) { | 394 | if (!smp_processor_id() && !value) { |
446 | value = APIC_DM_EXTINT; | 395 | value = APIC_DM_EXTINT; |
447 | apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", smp_processor_id()); | 396 | apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", smp_processor_id()); |
448 | } else { | 397 | } else { |
@@ -479,8 +428,7 @@ void __cpuinit setup_local_APIC (void) | |||
479 | } | 428 | } |
480 | 429 | ||
481 | nmi_watchdog_default(); | 430 | nmi_watchdog_default(); |
482 | if (nmi_watchdog == NMI_LOCAL_APIC) | 431 | setup_apic_nmi_watchdog(NULL); |
483 | setup_apic_nmi_watchdog(); | ||
484 | apic_pm_activate(); | 432 | apic_pm_activate(); |
485 | } | 433 | } |
486 | 434 | ||
@@ -527,8 +475,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) | |||
527 | apic_pm_state.apic_tmict = apic_read(APIC_TMICT); | 475 | apic_pm_state.apic_tmict = apic_read(APIC_TMICT); |
528 | apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); | 476 | apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); |
529 | apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); | 477 | apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); |
530 | local_save_flags(flags); | 478 | local_irq_save(flags); |
531 | local_irq_disable(); | ||
532 | disable_local_APIC(); | 479 | disable_local_APIC(); |
533 | local_irq_restore(flags); | 480 | local_irq_restore(flags); |
534 | return 0; | 481 | return 0; |
@@ -606,18 +553,24 @@ static void apic_pm_activate(void) { } | |||
606 | 553 | ||
607 | static int __init apic_set_verbosity(char *str) | 554 | static int __init apic_set_verbosity(char *str) |
608 | { | 555 | { |
556 | if (str == NULL) { | ||
557 | skip_ioapic_setup = 0; | ||
558 | ioapic_force = 1; | ||
559 | return 0; | ||
560 | } | ||
609 | if (strcmp("debug", str) == 0) | 561 | if (strcmp("debug", str) == 0) |
610 | apic_verbosity = APIC_DEBUG; | 562 | apic_verbosity = APIC_DEBUG; |
611 | else if (strcmp("verbose", str) == 0) | 563 | else if (strcmp("verbose", str) == 0) |
612 | apic_verbosity = APIC_VERBOSE; | 564 | apic_verbosity = APIC_VERBOSE; |
613 | else | 565 | else { |
614 | printk(KERN_WARNING "APIC Verbosity level %s not recognised" | 566 | printk(KERN_WARNING "APIC Verbosity level %s not recognised" |
615 | " use apic=verbose or apic=debug", str); | 567 | " use apic=verbose or apic=debug\n", str); |
568 | return -EINVAL; | ||
569 | } | ||
616 | 570 | ||
617 | return 1; | 571 | return 0; |
618 | } | 572 | } |
619 | 573 | early_param("apic", apic_set_verbosity); | |
620 | __setup("apic=", apic_set_verbosity); | ||
621 | 574 | ||
622 | /* | 575 | /* |
623 | * Detect and enable local APICs on non-SMP boards. | 576 | * Detect and enable local APICs on non-SMP boards. |
@@ -638,6 +591,40 @@ static int __init detect_init_APIC (void) | |||
638 | return 0; | 591 | return 0; |
639 | } | 592 | } |
640 | 593 | ||
594 | #ifdef CONFIG_X86_IO_APIC | ||
595 | static struct resource * __init ioapic_setup_resources(void) | ||
596 | { | ||
597 | #define IOAPIC_RESOURCE_NAME_SIZE 11 | ||
598 | unsigned long n; | ||
599 | struct resource *res; | ||
600 | char *mem; | ||
601 | int i; | ||
602 | |||
603 | if (nr_ioapics <= 0) | ||
604 | return NULL; | ||
605 | |||
606 | n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); | ||
607 | n *= nr_ioapics; | ||
608 | |||
609 | res = alloc_bootmem(n); | ||
610 | |||
611 | if (!res) | ||
612 | return NULL; | ||
613 | |||
614 | memset(res, 0, n); | ||
615 | mem = (void *)&res[nr_ioapics]; | ||
616 | |||
617 | for (i = 0; i < nr_ioapics; i++) { | ||
618 | res[i].name = mem; | ||
619 | res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; | ||
620 | snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); | ||
621 | mem += IOAPIC_RESOURCE_NAME_SIZE; | ||
622 | } | ||
623 | |||
624 | return res; | ||
625 | } | ||
626 | #endif | ||
627 | |||
641 | void __init init_apic_mappings(void) | 628 | void __init init_apic_mappings(void) |
642 | { | 629 | { |
643 | unsigned long apic_phys; | 630 | unsigned long apic_phys; |
@@ -654,19 +641,26 @@ void __init init_apic_mappings(void) | |||
654 | apic_phys = mp_lapic_addr; | 641 | apic_phys = mp_lapic_addr; |
655 | 642 | ||
656 | set_fixmap_nocache(FIX_APIC_BASE, apic_phys); | 643 | set_fixmap_nocache(FIX_APIC_BASE, apic_phys); |
644 | apic_mapped = 1; | ||
657 | apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys); | 645 | apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys); |
658 | 646 | ||
647 | /* Put local APIC into the resource map. */ | ||
648 | lapic_resource.start = apic_phys; | ||
649 | lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; | ||
650 | insert_resource(&iomem_resource, &lapic_resource); | ||
651 | |||
659 | /* | 652 | /* |
660 | * Fetch the APIC ID of the BSP in case we have a | 653 | * Fetch the APIC ID of the BSP in case we have a |
661 | * default configuration (or the MP table is broken). | 654 | * default configuration (or the MP table is broken). |
662 | */ | 655 | */ |
663 | boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); | 656 | boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); |
664 | 657 | ||
665 | #ifdef CONFIG_X86_IO_APIC | ||
666 | { | 658 | { |
667 | unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; | 659 | unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; |
668 | int i; | 660 | int i; |
661 | struct resource *ioapic_res; | ||
669 | 662 | ||
663 | ioapic_res = ioapic_setup_resources(); | ||
670 | for (i = 0; i < nr_ioapics; i++) { | 664 | for (i = 0; i < nr_ioapics; i++) { |
671 | if (smp_found_config) { | 665 | if (smp_found_config) { |
672 | ioapic_phys = mp_ioapics[i].mpc_apicaddr; | 666 | ioapic_phys = mp_ioapics[i].mpc_apicaddr; |
@@ -678,9 +672,15 @@ void __init init_apic_mappings(void) | |||
678 | apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n", | 672 | apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n", |
679 | __fix_to_virt(idx), ioapic_phys); | 673 | __fix_to_virt(idx), ioapic_phys); |
680 | idx++; | 674 | idx++; |
675 | |||
676 | if (ioapic_res) { | ||
677 | ioapic_res->start = ioapic_phys; | ||
678 | ioapic_res->end = ioapic_phys + (4 * 1024) - 1; | ||
679 | insert_resource(&iomem_resource, ioapic_res); | ||
680 | ioapic_res++; | ||
681 | } | ||
681 | } | 682 | } |
682 | } | 683 | } |
683 | #endif | ||
684 | } | 684 | } |
685 | 685 | ||
686 | /* | 686 | /* |
@@ -951,7 +951,7 @@ void smp_local_timer_interrupt(struct pt_regs *regs) | |||
951 | * We take the 'long' return path, and there every subsystem | 951 | * We take the 'long' return path, and there every subsystem |
952 | * grabs the appropriate locks (kernel lock/ irq lock). | 952 | * grabs the appropriate locks (kernel lock/ irq lock). |
953 | * | 953 | * |
954 | * we might want to decouple profiling from the 'long path', | 954 | * We might want to decouple profiling from the 'long path', |
955 | * and do the profiling totally in assembly. | 955 | * and do the profiling totally in assembly. |
956 | * | 956 | * |
957 | * Currently this isn't too much of an issue (performance wise), | 957 | * Currently this isn't too much of an issue (performance wise), |
@@ -1123,19 +1123,15 @@ int __init APIC_init_uniprocessor (void) | |||
1123 | 1123 | ||
1124 | verify_local_APIC(); | 1124 | verify_local_APIC(); |
1125 | 1125 | ||
1126 | connect_bsp_APIC(); | ||
1127 | |||
1128 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); | 1126 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); |
1129 | apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id)); | 1127 | apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id)); |
1130 | 1128 | ||
1131 | setup_local_APIC(); | 1129 | setup_local_APIC(); |
1132 | 1130 | ||
1133 | #ifdef CONFIG_X86_IO_APIC | ||
1134 | if (smp_found_config && !skip_ioapic_setup && nr_ioapics) | 1131 | if (smp_found_config && !skip_ioapic_setup && nr_ioapics) |
1135 | setup_IO_APIC(); | 1132 | setup_IO_APIC(); |
1136 | else | 1133 | else |
1137 | nr_ioapics = 0; | 1134 | nr_ioapics = 0; |
1138 | #endif | ||
1139 | setup_boot_APIC_clock(); | 1135 | setup_boot_APIC_clock(); |
1140 | check_nmi_watchdog(); | 1136 | check_nmi_watchdog(); |
1141 | return 0; | 1137 | return 0; |
@@ -1144,14 +1140,17 @@ int __init APIC_init_uniprocessor (void) | |||
1144 | static __init int setup_disableapic(char *str) | 1140 | static __init int setup_disableapic(char *str) |
1145 | { | 1141 | { |
1146 | disable_apic = 1; | 1142 | disable_apic = 1; |
1147 | return 1; | 1143 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); |
1148 | } | 1144 | return 0; |
1145 | } | ||
1146 | early_param("disableapic", setup_disableapic); | ||
1149 | 1147 | ||
1148 | /* same as disableapic, for compatibility */ | ||
1150 | static __init int setup_nolapic(char *str) | 1149 | static __init int setup_nolapic(char *str) |
1151 | { | 1150 | { |
1152 | disable_apic = 1; | 1151 | return setup_disableapic(str); |
1153 | return 1; | ||
1154 | } | 1152 | } |
1153 | early_param("nolapic", setup_nolapic); | ||
1155 | 1154 | ||
1156 | static __init int setup_noapictimer(char *str) | 1155 | static __init int setup_noapictimer(char *str) |
1157 | { | 1156 | { |
@@ -1184,11 +1183,5 @@ static __init int setup_apicpmtimer(char *s) | |||
1184 | } | 1183 | } |
1185 | __setup("apicpmtimer", setup_apicpmtimer); | 1184 | __setup("apicpmtimer", setup_apicpmtimer); |
1186 | 1185 | ||
1187 | /* dummy parsing: see setup.c */ | ||
1188 | |||
1189 | __setup("disableapic", setup_disableapic); | ||
1190 | __setup("nolapic", setup_nolapic); /* same as disableapic, for compatibility */ | ||
1191 | |||
1192 | __setup("noapictimer", setup_noapictimer); | 1186 | __setup("noapictimer", setup_noapictimer); |
1193 | 1187 | ||
1194 | /* no "lapic" flag - we only use the lapic when the BIOS tells us so. */ | ||
diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c index d8d5750d6106..3525f884af82 100644 --- a/arch/x86_64/kernel/crash.c +++ b/arch/x86_64/kernel/crash.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <asm/nmi.h> | 23 | #include <asm/nmi.h> |
24 | #include <asm/hw_irq.h> | 24 | #include <asm/hw_irq.h> |
25 | #include <asm/mach_apic.h> | 25 | #include <asm/mach_apic.h> |
26 | #include <asm/kdebug.h> | ||
26 | 27 | ||
27 | /* This keeps a track of which one is crashing cpu. */ | 28 | /* This keeps a track of which one is crashing cpu. */ |
28 | static int crashing_cpu; | 29 | static int crashing_cpu; |
@@ -68,7 +69,7 @@ static void crash_save_this_cpu(struct pt_regs *regs, int cpu) | |||
68 | * for the data I pass, and I need tags | 69 | * for the data I pass, and I need tags |
69 | * on the data to indicate what information I have | 70 | * on the data to indicate what information I have |
70 | * squirrelled away. ELF notes happen to provide | 71 | * squirrelled away. ELF notes happen to provide |
71 | * all of that that no need to invent something new. | 72 | * all of that, no need to invent something new. |
72 | */ | 73 | */ |
73 | 74 | ||
74 | buf = (u32*)per_cpu_ptr(crash_notes, cpu); | 75 | buf = (u32*)per_cpu_ptr(crash_notes, cpu); |
@@ -95,15 +96,25 @@ static void crash_save_self(struct pt_regs *regs) | |||
95 | #ifdef CONFIG_SMP | 96 | #ifdef CONFIG_SMP |
96 | static atomic_t waiting_for_crash_ipi; | 97 | static atomic_t waiting_for_crash_ipi; |
97 | 98 | ||
98 | static int crash_nmi_callback(struct pt_regs *regs, int cpu) | 99 | static int crash_nmi_callback(struct notifier_block *self, |
100 | unsigned long val, void *data) | ||
99 | { | 101 | { |
102 | struct pt_regs *regs; | ||
103 | int cpu; | ||
104 | |||
105 | if (val != DIE_NMI_IPI) | ||
106 | return NOTIFY_OK; | ||
107 | |||
108 | regs = ((struct die_args *)data)->regs; | ||
109 | cpu = raw_smp_processor_id(); | ||
110 | |||
100 | /* | 111 | /* |
101 | * Don't do anything if this handler is invoked on crashing cpu. | 112 | * Don't do anything if this handler is invoked on crashing cpu. |
102 | * Otherwise, system will completely hang. Crashing cpu can get | 113 | * Otherwise, system will completely hang. Crashing cpu can get |
103 | * an NMI if system was initially booted with nmi_watchdog parameter. | 114 | * an NMI if system was initially booted with nmi_watchdog parameter. |
104 | */ | 115 | */ |
105 | if (cpu == crashing_cpu) | 116 | if (cpu == crashing_cpu) |
106 | return 1; | 117 | return NOTIFY_STOP; |
107 | local_irq_disable(); | 118 | local_irq_disable(); |
108 | 119 | ||
109 | crash_save_this_cpu(regs, cpu); | 120 | crash_save_this_cpu(regs, cpu); |
@@ -127,12 +138,17 @@ static void smp_send_nmi_allbutself(void) | |||
127 | * cpu hotplug shouldn't matter. | 138 | * cpu hotplug shouldn't matter. |
128 | */ | 139 | */ |
129 | 140 | ||
141 | static struct notifier_block crash_nmi_nb = { | ||
142 | .notifier_call = crash_nmi_callback, | ||
143 | }; | ||
144 | |||
130 | static void nmi_shootdown_cpus(void) | 145 | static void nmi_shootdown_cpus(void) |
131 | { | 146 | { |
132 | unsigned long msecs; | 147 | unsigned long msecs; |
133 | 148 | ||
134 | atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); | 149 | atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); |
135 | set_nmi_callback(crash_nmi_callback); | 150 | if (register_die_notifier(&crash_nmi_nb)) |
151 | return; /* return what? */ | ||
136 | 152 | ||
137 | /* | 153 | /* |
138 | * Ensure the new callback function is set before sending | 154 | * Ensure the new callback function is set before sending |
@@ -178,9 +194,7 @@ void machine_crash_shutdown(struct pt_regs *regs) | |||
178 | if(cpu_has_apic) | 194 | if(cpu_has_apic) |
179 | disable_local_APIC(); | 195 | disable_local_APIC(); |
180 | 196 | ||
181 | #if defined(CONFIG_X86_IO_APIC) | ||
182 | disable_IO_APIC(); | 197 | disable_IO_APIC(); |
183 | #endif | ||
184 | 198 | ||
185 | crash_save_self(regs); | 199 | crash_save_self(regs); |
186 | } | 200 | } |
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index 708a3cd9a27e..c0af3828df45 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c | |||
@@ -25,6 +25,8 @@ | |||
25 | #include <asm/bootsetup.h> | 25 | #include <asm/bootsetup.h> |
26 | #include <asm/sections.h> | 26 | #include <asm/sections.h> |
27 | 27 | ||
28 | struct e820map e820 __initdata; | ||
29 | |||
28 | /* | 30 | /* |
29 | * PFN of last memory page. | 31 | * PFN of last memory page. |
30 | */ | 32 | */ |
@@ -41,7 +43,7 @@ unsigned long end_pfn_map; | |||
41 | /* | 43 | /* |
42 | * Last pfn which the user wants to use. | 44 | * Last pfn which the user wants to use. |
43 | */ | 45 | */ |
44 | unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT; | 46 | static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; |
45 | 47 | ||
46 | extern struct resource code_resource, data_resource; | 48 | extern struct resource code_resource, data_resource; |
47 | 49 | ||
@@ -70,12 +72,7 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size) | |||
70 | return 1; | 72 | return 1; |
71 | } | 73 | } |
72 | #endif | 74 | #endif |
73 | /* kernel code + 640k memory hole (later should not be needed, but | 75 | /* kernel code */ |
74 | be paranoid for now) */ | ||
75 | if (last >= 640*1024 && addr < 1024*1024) { | ||
76 | *addrp = 1024*1024; | ||
77 | return 1; | ||
78 | } | ||
79 | if (last >= __pa_symbol(&_text) && last < __pa_symbol(&_end)) { | 76 | if (last >= __pa_symbol(&_text) && last < __pa_symbol(&_end)) { |
80 | *addrp = __pa_symbol(&_end); | 77 | *addrp = __pa_symbol(&_end); |
81 | return 1; | 78 | return 1; |
@@ -565,13 +562,6 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | |||
565 | * If we're lucky and live on a modern system, the setup code | 562 | * If we're lucky and live on a modern system, the setup code |
566 | * will have given us a memory map that we can use to properly | 563 | * will have given us a memory map that we can use to properly |
567 | * set up memory. If we aren't, we'll fake a memory map. | 564 | * set up memory. If we aren't, we'll fake a memory map. |
568 | * | ||
569 | * We check to see that the memory map contains at least 2 elements | ||
570 | * before we'll use it, because the detection code in setup.S may | ||
571 | * not be perfect and most every PC known to man has two memory | ||
572 | * regions: one from 0 to 640k, and one from 1mb up. (The IBM | ||
573 | * thinkpad 560x, for example, does not cooperate with the memory | ||
574 | * detection code.) | ||
575 | */ | 565 | */ |
576 | static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) | 566 | static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) |
577 | { | 567 | { |
@@ -589,34 +579,19 @@ static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) | |||
589 | if (start > end) | 579 | if (start > end) |
590 | return -1; | 580 | return -1; |
591 | 581 | ||
592 | /* | ||
593 | * Some BIOSes claim RAM in the 640k - 1M region. | ||
594 | * Not right. Fix it up. | ||
595 | * | ||
596 | * This should be removed on Hammer which is supposed to not | ||
597 | * have non e820 covered ISA mappings there, but I had some strange | ||
598 | * problems so it stays for now. -AK | ||
599 | */ | ||
600 | if (type == E820_RAM) { | ||
601 | if (start < 0x100000ULL && end > 0xA0000ULL) { | ||
602 | if (start < 0xA0000ULL) | ||
603 | add_memory_region(start, 0xA0000ULL-start, type); | ||
604 | if (end <= 0x100000ULL) | ||
605 | continue; | ||
606 | start = 0x100000ULL; | ||
607 | size = end - start; | ||
608 | } | ||
609 | } | ||
610 | |||
611 | add_memory_region(start, size, type); | 582 | add_memory_region(start, size, type); |
612 | } while (biosmap++,--nr_map); | 583 | } while (biosmap++,--nr_map); |
613 | return 0; | 584 | return 0; |
614 | } | 585 | } |
615 | 586 | ||
616 | void __init setup_memory_region(void) | 587 | void early_panic(char *msg) |
617 | { | 588 | { |
618 | char *who = "BIOS-e820"; | 589 | early_printk(msg); |
590 | panic(msg); | ||
591 | } | ||
619 | 592 | ||
593 | void __init setup_memory_region(void) | ||
594 | { | ||
620 | /* | 595 | /* |
621 | * Try to copy the BIOS-supplied E820-map. | 596 | * Try to copy the BIOS-supplied E820-map. |
622 | * | 597 | * |
@@ -624,51 +599,70 @@ void __init setup_memory_region(void) | |||
624 | * the next section from 1mb->appropriate_mem_k | 599 | * the next section from 1mb->appropriate_mem_k |
625 | */ | 600 | */ |
626 | sanitize_e820_map(E820_MAP, &E820_MAP_NR); | 601 | sanitize_e820_map(E820_MAP, &E820_MAP_NR); |
627 | if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) { | 602 | if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) |
628 | unsigned long mem_size; | 603 | early_panic("Cannot find a valid memory map"); |
629 | |||
630 | /* compare results from other methods and take the greater */ | ||
631 | if (ALT_MEM_K < EXT_MEM_K) { | ||
632 | mem_size = EXT_MEM_K; | ||
633 | who = "BIOS-88"; | ||
634 | } else { | ||
635 | mem_size = ALT_MEM_K; | ||
636 | who = "BIOS-e801"; | ||
637 | } | ||
638 | |||
639 | e820.nr_map = 0; | ||
640 | add_memory_region(0, LOWMEMSIZE(), E820_RAM); | ||
641 | add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM); | ||
642 | } | ||
643 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | 604 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); |
644 | e820_print_map(who); | 605 | e820_print_map("BIOS-e820"); |
645 | } | 606 | } |
646 | 607 | ||
647 | void __init parse_memopt(char *p, char **from) | 608 | static int __init parse_memopt(char *p) |
648 | { | 609 | { |
649 | end_user_pfn = memparse(p, from); | 610 | if (!p) |
611 | return -EINVAL; | ||
612 | end_user_pfn = memparse(p, &p); | ||
650 | end_user_pfn >>= PAGE_SHIFT; | 613 | end_user_pfn >>= PAGE_SHIFT; |
614 | return 0; | ||
651 | } | 615 | } |
616 | early_param("mem", parse_memopt); | ||
652 | 617 | ||
653 | void __init parse_memmapopt(char *p, char **from) | 618 | static int userdef __initdata; |
619 | |||
620 | static int __init parse_memmap_opt(char *p) | ||
654 | { | 621 | { |
622 | char *oldp; | ||
655 | unsigned long long start_at, mem_size; | 623 | unsigned long long start_at, mem_size; |
656 | 624 | ||
657 | mem_size = memparse(p, from); | 625 | if (!strcmp(p, "exactmap")) { |
658 | p = *from; | 626 | #ifdef CONFIG_CRASH_DUMP |
627 | /* If we are doing a crash dump, we | ||
628 | * still need to know the real mem | ||
629 | * size before original memory map is | ||
630 | * reset. | ||
631 | */ | ||
632 | saved_max_pfn = e820_end_of_ram(); | ||
633 | #endif | ||
634 | end_pfn_map = 0; | ||
635 | e820.nr_map = 0; | ||
636 | userdef = 1; | ||
637 | return 0; | ||
638 | } | ||
639 | |||
640 | oldp = p; | ||
641 | mem_size = memparse(p, &p); | ||
642 | if (p == oldp) | ||
643 | return -EINVAL; | ||
659 | if (*p == '@') { | 644 | if (*p == '@') { |
660 | start_at = memparse(p+1, from); | 645 | start_at = memparse(p+1, &p); |
661 | add_memory_region(start_at, mem_size, E820_RAM); | 646 | add_memory_region(start_at, mem_size, E820_RAM); |
662 | } else if (*p == '#') { | 647 | } else if (*p == '#') { |
663 | start_at = memparse(p+1, from); | 648 | start_at = memparse(p+1, &p); |
664 | add_memory_region(start_at, mem_size, E820_ACPI); | 649 | add_memory_region(start_at, mem_size, E820_ACPI); |
665 | } else if (*p == '$') { | 650 | } else if (*p == '$') { |
666 | start_at = memparse(p+1, from); | 651 | start_at = memparse(p+1, &p); |
667 | add_memory_region(start_at, mem_size, E820_RESERVED); | 652 | add_memory_region(start_at, mem_size, E820_RESERVED); |
668 | } else { | 653 | } else { |
669 | end_user_pfn = (mem_size >> PAGE_SHIFT); | 654 | end_user_pfn = (mem_size >> PAGE_SHIFT); |
670 | } | 655 | } |
671 | p = *from; | 656 | return *p == '\0' ? 0 : -EINVAL; |
657 | } | ||
658 | early_param("memmap", parse_memmap_opt); | ||
659 | |||
660 | void finish_e820_parsing(void) | ||
661 | { | ||
662 | if (userdef) { | ||
663 | printk(KERN_INFO "user-defined physical RAM map:\n"); | ||
664 | e820_print_map("user"); | ||
665 | } | ||
672 | } | 666 | } |
673 | 667 | ||
674 | unsigned long pci_mem_start = 0xaeedbabe; | 668 | unsigned long pci_mem_start = 0xaeedbabe; |
diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c new file mode 100644 index 000000000000..208e38a372c1 --- /dev/null +++ b/arch/x86_64/kernel/early-quirks.c | |||
@@ -0,0 +1,122 @@ | |||
1 | /* Various workarounds for chipset bugs. | ||
2 | This code runs very early and can't use the regular PCI subsystem | ||
3 | The entries are keyed to PCI bridges which usually identify chipsets | ||
4 | uniquely. | ||
5 | This is only for whole classes of chipsets with specific problems which | ||
6 | need early invasive action (e.g. before the timers are initialized). | ||
7 | Most PCI device specific workarounds can be done later and should be | ||
8 | in standard PCI quirks | ||
9 | Mainboard specific bugs should be handled by DMI entries. | ||
10 | CPU specific bugs in setup.c */ | ||
11 | |||
12 | #include <linux/pci.h> | ||
13 | #include <linux/acpi.h> | ||
14 | #include <linux/pci_ids.h> | ||
15 | #include <asm/pci-direct.h> | ||
16 | #include <asm/proto.h> | ||
17 | #include <asm/dma.h> | ||
18 | |||
19 | static void via_bugs(void) | ||
20 | { | ||
21 | #ifdef CONFIG_IOMMU | ||
22 | if ((end_pfn > MAX_DMA32_PFN || force_iommu) && | ||
23 | !iommu_aperture_allowed) { | ||
24 | printk(KERN_INFO | ||
25 | "Looks like a VIA chipset. Disabling IOMMU. Override with iommu=allowed\n"); | ||
26 | iommu_aperture_disabled = 1; | ||
27 | } | ||
28 | #endif | ||
29 | } | ||
30 | |||
31 | #ifdef CONFIG_ACPI | ||
32 | |||
33 | static int nvidia_hpet_detected __initdata; | ||
34 | |||
35 | static int __init nvidia_hpet_check(unsigned long phys, unsigned long size) | ||
36 | { | ||
37 | nvidia_hpet_detected = 1; | ||
38 | return 0; | ||
39 | } | ||
40 | #endif | ||
41 | |||
42 | static void nvidia_bugs(void) | ||
43 | { | ||
44 | #ifdef CONFIG_ACPI | ||
45 | /* | ||
46 | * All timer overrides on Nvidia are | ||
47 | * wrong unless HPET is enabled. | ||
48 | */ | ||
49 | nvidia_hpet_detected = 0; | ||
50 | acpi_table_parse(ACPI_HPET, nvidia_hpet_check); | ||
51 | if (nvidia_hpet_detected == 0) { | ||
52 | acpi_skip_timer_override = 1; | ||
53 | printk(KERN_INFO "Nvidia board " | ||
54 | "detected. Ignoring ACPI " | ||
55 | "timer override.\n"); | ||
56 | } | ||
57 | #endif | ||
58 | /* RED-PEN skip them on mptables too? */ | ||
59 | |||
60 | } | ||
61 | |||
62 | static void ati_bugs(void) | ||
63 | { | ||
64 | #if 1 /* for testing */ | ||
65 | printk("ATI board detected\n"); | ||
66 | #endif | ||
67 | /* No bugs right now */ | ||
68 | } | ||
69 | |||
70 | struct chipset { | ||
71 | u16 vendor; | ||
72 | void (*f)(void); | ||
73 | }; | ||
74 | |||
75 | static struct chipset early_qrk[] = { | ||
76 | { PCI_VENDOR_ID_NVIDIA, nvidia_bugs }, | ||
77 | { PCI_VENDOR_ID_VIA, via_bugs }, | ||
78 | { PCI_VENDOR_ID_ATI, ati_bugs }, | ||
79 | {} | ||
80 | }; | ||
81 | |||
82 | void __init early_quirks(void) | ||
83 | { | ||
84 | int num, slot, func; | ||
85 | |||
86 | if (!early_pci_allowed()) | ||
87 | return; | ||
88 | |||
89 | /* Poor man's PCI discovery */ | ||
90 | for (num = 0; num < 32; num++) { | ||
91 | for (slot = 0; slot < 32; slot++) { | ||
92 | for (func = 0; func < 8; func++) { | ||
93 | u32 class; | ||
94 | u32 vendor; | ||
95 | u8 type; | ||
96 | int i; | ||
97 | class = read_pci_config(num,slot,func, | ||
98 | PCI_CLASS_REVISION); | ||
99 | if (class == 0xffffffff) | ||
100 | break; | ||
101 | |||
102 | if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) | ||
103 | continue; | ||
104 | |||
105 | vendor = read_pci_config(num, slot, func, | ||
106 | PCI_VENDOR_ID); | ||
107 | vendor &= 0xffff; | ||
108 | |||
109 | for (i = 0; early_qrk[i].f; i++) | ||
110 | if (early_qrk[i].vendor == vendor) { | ||
111 | early_qrk[i].f(); | ||
112 | return; | ||
113 | } | ||
114 | |||
115 | type = read_pci_config_byte(num, slot, func, | ||
116 | PCI_HEADER_TYPE); | ||
117 | if (!(type & 0x80)) | ||
118 | break; | ||
119 | } | ||
120 | } | ||
121 | } | ||
122 | } | ||
diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c index 140051e07fa6..e22ecd54870d 100644 --- a/arch/x86_64/kernel/early_printk.c +++ b/arch/x86_64/kernel/early_printk.c | |||
@@ -215,20 +215,16 @@ void early_printk(const char *fmt, ...) | |||
215 | 215 | ||
216 | static int __initdata keep_early; | 216 | static int __initdata keep_early; |
217 | 217 | ||
218 | int __init setup_early_printk(char *opt) | 218 | static int __init setup_early_printk(char *buf) |
219 | { | 219 | { |
220 | char *space; | 220 | if (!buf) |
221 | char buf[256]; | 221 | return 0; |
222 | 222 | ||
223 | if (early_console_initialized) | 223 | if (early_console_initialized) |
224 | return 1; | 224 | return 0; |
225 | 225 | early_console_initialized = 1; | |
226 | strlcpy(buf,opt,sizeof(buf)); | ||
227 | space = strchr(buf, ' '); | ||
228 | if (space) | ||
229 | *space = 0; | ||
230 | 226 | ||
231 | if (strstr(buf,"keep")) | 227 | if (!strcmp(buf,"keep")) |
232 | keep_early = 1; | 228 | keep_early = 1; |
233 | 229 | ||
234 | if (!strncmp(buf, "serial", 6)) { | 230 | if (!strncmp(buf, "serial", 6)) { |
@@ -248,11 +244,12 @@ int __init setup_early_printk(char *opt) | |||
248 | early_console = &simnow_console; | 244 | early_console = &simnow_console; |
249 | keep_early = 1; | 245 | keep_early = 1; |
250 | } | 246 | } |
251 | early_console_initialized = 1; | ||
252 | register_console(early_console); | 247 | register_console(early_console); |
253 | return 0; | 248 | return 0; |
254 | } | 249 | } |
255 | 250 | ||
251 | early_param("earlyprintk", setup_early_printk); | ||
252 | |||
256 | void __init disable_early_printk(void) | 253 | void __init disable_early_printk(void) |
257 | { | 254 | { |
258 | if (!early_console_initialized || !early_console) | 255 | if (!early_console_initialized || !early_console) |
@@ -266,4 +263,3 @@ void __init disable_early_printk(void) | |||
266 | } | 263 | } |
267 | } | 264 | } |
268 | 265 | ||
269 | __setup("earlyprintk=", setup_early_printk); | ||
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index aa8d8939abc1..2802524104f3 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S | |||
@@ -4,8 +4,6 @@ | |||
4 | * Copyright (C) 1991, 1992 Linus Torvalds | 4 | * Copyright (C) 1991, 1992 Linus Torvalds |
5 | * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs | 5 | * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs |
6 | * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> | 6 | * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> |
7 | * | ||
8 | * $Id$ | ||
9 | */ | 7 | */ |
10 | 8 | ||
11 | /* | 9 | /* |
@@ -22,15 +20,25 @@ | |||
22 | * at the top of the kernel process stack. | 20 | * at the top of the kernel process stack. |
23 | * - partial stack frame: partially saved registers upto R11. | 21 | * - partial stack frame: partially saved registers upto R11. |
24 | * - full stack frame: Like partial stack frame, but all register saved. | 22 | * - full stack frame: Like partial stack frame, but all register saved. |
25 | * | 23 | * |
26 | * TODO: | 24 | * Some macro usage: |
27 | * - schedule it carefully for the final hardware. | 25 | * - CFI macros are used to generate dwarf2 unwind information for better |
26 | * backtraces. They don't change any code. | ||
27 | * - SAVE_ALL/RESTORE_ALL - Save/restore all registers | ||
28 | * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify. | ||
29 | * There are unfortunately lots of special cases where some registers | ||
30 | * not touched. The macro is a big mess that should be cleaned up. | ||
31 | * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. | ||
32 | * Gives a full stack frame. | ||
33 | * - ENTRY/END Define functions in the symbol table. | ||
34 | * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack | ||
35 | * frame that is otherwise undefined after a SYSCALL | ||
36 | * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. | ||
37 | * - errorentry/paranoidentry/zeroentry - Define exception entry points. | ||
28 | */ | 38 | */ |
29 | 39 | ||
30 | #define ASSEMBLY 1 | ||
31 | #include <linux/linkage.h> | 40 | #include <linux/linkage.h> |
32 | #include <asm/segment.h> | 41 | #include <asm/segment.h> |
33 | #include <asm/smp.h> | ||
34 | #include <asm/cache.h> | 42 | #include <asm/cache.h> |
35 | #include <asm/errno.h> | 43 | #include <asm/errno.h> |
36 | #include <asm/dwarf2.h> | 44 | #include <asm/dwarf2.h> |
@@ -115,6 +123,7 @@ | |||
115 | .macro CFI_DEFAULT_STACK start=1 | 123 | .macro CFI_DEFAULT_STACK start=1 |
116 | .if \start | 124 | .if \start |
117 | CFI_STARTPROC simple | 125 | CFI_STARTPROC simple |
126 | CFI_SIGNAL_FRAME | ||
118 | CFI_DEF_CFA rsp,SS+8 | 127 | CFI_DEF_CFA rsp,SS+8 |
119 | .else | 128 | .else |
120 | CFI_DEF_CFA_OFFSET SS+8 | 129 | CFI_DEF_CFA_OFFSET SS+8 |
@@ -146,6 +155,10 @@ | |||
146 | /* rdi: prev */ | 155 | /* rdi: prev */ |
147 | ENTRY(ret_from_fork) | 156 | ENTRY(ret_from_fork) |
148 | CFI_DEFAULT_STACK | 157 | CFI_DEFAULT_STACK |
158 | push kernel_eflags(%rip) | ||
159 | CFI_ADJUST_CFA_OFFSET 4 | ||
160 | popf # reset kernel eflags | ||
161 | CFI_ADJUST_CFA_OFFSET -4 | ||
149 | call schedule_tail | 162 | call schedule_tail |
150 | GET_THREAD_INFO(%rcx) | 163 | GET_THREAD_INFO(%rcx) |
151 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) | 164 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) |
@@ -199,6 +212,7 @@ END(ret_from_fork) | |||
199 | 212 | ||
200 | ENTRY(system_call) | 213 | ENTRY(system_call) |
201 | CFI_STARTPROC simple | 214 | CFI_STARTPROC simple |
215 | CFI_SIGNAL_FRAME | ||
202 | CFI_DEF_CFA rsp,PDA_STACKOFFSET | 216 | CFI_DEF_CFA rsp,PDA_STACKOFFSET |
203 | CFI_REGISTER rip,rcx | 217 | CFI_REGISTER rip,rcx |
204 | /*CFI_REGISTER rflags,r11*/ | 218 | /*CFI_REGISTER rflags,r11*/ |
@@ -316,6 +330,7 @@ END(system_call) | |||
316 | */ | 330 | */ |
317 | ENTRY(int_ret_from_sys_call) | 331 | ENTRY(int_ret_from_sys_call) |
318 | CFI_STARTPROC simple | 332 | CFI_STARTPROC simple |
333 | CFI_SIGNAL_FRAME | ||
319 | CFI_DEF_CFA rsp,SS+8-ARGOFFSET | 334 | CFI_DEF_CFA rsp,SS+8-ARGOFFSET |
320 | /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/ | 335 | /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/ |
321 | CFI_REL_OFFSET rsp,RSP-ARGOFFSET | 336 | CFI_REL_OFFSET rsp,RSP-ARGOFFSET |
@@ -476,6 +491,7 @@ END(stub_rt_sigreturn) | |||
476 | */ | 491 | */ |
477 | .macro _frame ref | 492 | .macro _frame ref |
478 | CFI_STARTPROC simple | 493 | CFI_STARTPROC simple |
494 | CFI_SIGNAL_FRAME | ||
479 | CFI_DEF_CFA rsp,SS+8-\ref | 495 | CFI_DEF_CFA rsp,SS+8-\ref |
480 | /*CFI_REL_OFFSET ss,SS-\ref*/ | 496 | /*CFI_REL_OFFSET ss,SS-\ref*/ |
481 | CFI_REL_OFFSET rsp,RSP-\ref | 497 | CFI_REL_OFFSET rsp,RSP-\ref |
@@ -511,7 +527,12 @@ END(stub_rt_sigreturn) | |||
511 | testl $3,CS(%rdi) | 527 | testl $3,CS(%rdi) |
512 | je 1f | 528 | je 1f |
513 | swapgs | 529 | swapgs |
514 | 1: incl %gs:pda_irqcount # RED-PEN should check preempt count | 530 | /* irqcount is used to check if a CPU is already on an interrupt |
531 | stack or not. While this is essentially redundant with preempt_count | ||
532 | it is a little cheaper to use a separate counter in the PDA | ||
533 | (short of moving irq_enter into assembly, which would be too | ||
534 | much work) */ | ||
535 | 1: incl %gs:pda_irqcount | ||
515 | cmoveq %gs:pda_irqstackptr,%rsp | 536 | cmoveq %gs:pda_irqstackptr,%rsp |
516 | push %rbp # backlink for old unwinder | 537 | push %rbp # backlink for old unwinder |
517 | /* | 538 | /* |
@@ -619,8 +640,7 @@ retint_signal: | |||
619 | #ifdef CONFIG_PREEMPT | 640 | #ifdef CONFIG_PREEMPT |
620 | /* Returning to kernel space. Check if we need preemption */ | 641 | /* Returning to kernel space. Check if we need preemption */ |
621 | /* rcx: threadinfo. interrupts off. */ | 642 | /* rcx: threadinfo. interrupts off. */ |
622 | .p2align | 643 | ENTRY(retint_kernel) |
623 | retint_kernel: | ||
624 | cmpl $0,threadinfo_preempt_count(%rcx) | 644 | cmpl $0,threadinfo_preempt_count(%rcx) |
625 | jnz retint_restore_args | 645 | jnz retint_restore_args |
626 | bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx) | 646 | bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx) |
@@ -679,7 +699,6 @@ ENTRY(call_function_interrupt) | |||
679 | END(call_function_interrupt) | 699 | END(call_function_interrupt) |
680 | #endif | 700 | #endif |
681 | 701 | ||
682 | #ifdef CONFIG_X86_LOCAL_APIC | ||
683 | ENTRY(apic_timer_interrupt) | 702 | ENTRY(apic_timer_interrupt) |
684 | apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt | 703 | apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt |
685 | END(apic_timer_interrupt) | 704 | END(apic_timer_interrupt) |
@@ -691,7 +710,6 @@ END(error_interrupt) | |||
691 | ENTRY(spurious_interrupt) | 710 | ENTRY(spurious_interrupt) |
692 | apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt | 711 | apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt |
693 | END(spurious_interrupt) | 712 | END(spurious_interrupt) |
694 | #endif | ||
695 | 713 | ||
696 | /* | 714 | /* |
697 | * Exception entry points. | 715 | * Exception entry points. |
@@ -768,7 +786,9 @@ paranoid_exit\trace: | |||
768 | testl $3,CS(%rsp) | 786 | testl $3,CS(%rsp) |
769 | jnz paranoid_userspace\trace | 787 | jnz paranoid_userspace\trace |
770 | paranoid_swapgs\trace: | 788 | paranoid_swapgs\trace: |
789 | .if \trace | ||
771 | TRACE_IRQS_IRETQ 0 | 790 | TRACE_IRQS_IRETQ 0 |
791 | .endif | ||
772 | swapgs | 792 | swapgs |
773 | paranoid_restore\trace: | 793 | paranoid_restore\trace: |
774 | RESTORE_ALL 8 | 794 | RESTORE_ALL 8 |
@@ -814,7 +834,7 @@ paranoid_schedule\trace: | |||
814 | * Exception entry point. This expects an error code/orig_rax on the stack | 834 | * Exception entry point. This expects an error code/orig_rax on the stack |
815 | * and the exception handler in %rax. | 835 | * and the exception handler in %rax. |
816 | */ | 836 | */ |
817 | ENTRY(error_entry) | 837 | KPROBE_ENTRY(error_entry) |
818 | _frame RDI | 838 | _frame RDI |
819 | /* rdi slot contains rax, oldrax contains error code */ | 839 | /* rdi slot contains rax, oldrax contains error code */ |
820 | cld | 840 | cld |
@@ -898,7 +918,7 @@ error_kernelspace: | |||
898 | cmpq $gs_change,RIP(%rsp) | 918 | cmpq $gs_change,RIP(%rsp) |
899 | je error_swapgs | 919 | je error_swapgs |
900 | jmp error_sti | 920 | jmp error_sti |
901 | END(error_entry) | 921 | KPROBE_END(error_entry) |
902 | 922 | ||
903 | /* Reload gs selector with exception handling */ | 923 | /* Reload gs selector with exception handling */ |
904 | /* edi: new selector */ | 924 | /* edi: new selector */ |
@@ -1020,8 +1040,7 @@ ENDPROC(execve) | |||
1020 | 1040 | ||
1021 | KPROBE_ENTRY(page_fault) | 1041 | KPROBE_ENTRY(page_fault) |
1022 | errorentry do_page_fault | 1042 | errorentry do_page_fault |
1023 | END(page_fault) | 1043 | KPROBE_END(page_fault) |
1024 | .previous .text | ||
1025 | 1044 | ||
1026 | ENTRY(coprocessor_error) | 1045 | ENTRY(coprocessor_error) |
1027 | zeroentry do_coprocessor_error | 1046 | zeroentry do_coprocessor_error |
@@ -1042,8 +1061,7 @@ KPROBE_ENTRY(debug) | |||
1042 | CFI_ADJUST_CFA_OFFSET 8 | 1061 | CFI_ADJUST_CFA_OFFSET 8 |
1043 | paranoidentry do_debug, DEBUG_STACK | 1062 | paranoidentry do_debug, DEBUG_STACK |
1044 | paranoidexit | 1063 | paranoidexit |
1045 | END(debug) | 1064 | KPROBE_END(debug) |
1046 | .previous .text | ||
1047 | 1065 | ||
1048 | /* runs on exception stack */ | 1066 | /* runs on exception stack */ |
1049 | KPROBE_ENTRY(nmi) | 1067 | KPROBE_ENTRY(nmi) |
@@ -1057,8 +1075,7 @@ KPROBE_ENTRY(nmi) | |||
1057 | jmp paranoid_exit1 | 1075 | jmp paranoid_exit1 |
1058 | CFI_ENDPROC | 1076 | CFI_ENDPROC |
1059 | #endif | 1077 | #endif |
1060 | END(nmi) | 1078 | KPROBE_END(nmi) |
1061 | .previous .text | ||
1062 | 1079 | ||
1063 | KPROBE_ENTRY(int3) | 1080 | KPROBE_ENTRY(int3) |
1064 | INTR_FRAME | 1081 | INTR_FRAME |
@@ -1067,8 +1084,7 @@ KPROBE_ENTRY(int3) | |||
1067 | paranoidentry do_int3, DEBUG_STACK | 1084 | paranoidentry do_int3, DEBUG_STACK |
1068 | jmp paranoid_exit1 | 1085 | jmp paranoid_exit1 |
1069 | CFI_ENDPROC | 1086 | CFI_ENDPROC |
1070 | END(int3) | 1087 | KPROBE_END(int3) |
1071 | .previous .text | ||
1072 | 1088 | ||
1073 | ENTRY(overflow) | 1089 | ENTRY(overflow) |
1074 | zeroentry do_overflow | 1090 | zeroentry do_overflow |
@@ -1116,8 +1132,7 @@ END(stack_segment) | |||
1116 | 1132 | ||
1117 | KPROBE_ENTRY(general_protection) | 1133 | KPROBE_ENTRY(general_protection) |
1118 | errorentry do_general_protection | 1134 | errorentry do_general_protection |
1119 | END(general_protection) | 1135 | KPROBE_END(general_protection) |
1120 | .previous .text | ||
1121 | 1136 | ||
1122 | ENTRY(alignment_check) | 1137 | ENTRY(alignment_check) |
1123 | errorentry do_alignment_check | 1138 | errorentry do_alignment_check |
diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c index 3020917546de..cdb90e671b88 100644 --- a/arch/x86_64/kernel/genapic_cluster.c +++ b/arch/x86_64/kernel/genapic_cluster.c | |||
@@ -118,7 +118,6 @@ struct genapic apic_cluster = { | |||
118 | .name = "clustered", | 118 | .name = "clustered", |
119 | .int_delivery_mode = dest_Fixed, | 119 | .int_delivery_mode = dest_Fixed, |
120 | .int_dest_mode = (APIC_DEST_PHYSICAL != 0), | 120 | .int_dest_mode = (APIC_DEST_PHYSICAL != 0), |
121 | .int_delivery_dest = APIC_DEST_PHYSICAL | APIC_DM_FIXED, | ||
122 | .target_cpus = cluster_target_cpus, | 121 | .target_cpus = cluster_target_cpus, |
123 | .apic_id_registered = cluster_apic_id_registered, | 122 | .apic_id_registered = cluster_apic_id_registered, |
124 | .init_apic_ldr = cluster_init_apic_ldr, | 123 | .init_apic_ldr = cluster_init_apic_ldr, |
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c index eb86d374813a..50ad153eaac4 100644 --- a/arch/x86_64/kernel/genapic_flat.c +++ b/arch/x86_64/kernel/genapic_flat.c | |||
@@ -49,8 +49,7 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector) | |||
49 | unsigned long cfg; | 49 | unsigned long cfg; |
50 | unsigned long flags; | 50 | unsigned long flags; |
51 | 51 | ||
52 | local_save_flags(flags); | 52 | local_irq_save(flags); |
53 | local_irq_disable(); | ||
54 | 53 | ||
55 | /* | 54 | /* |
56 | * Wait for idle. | 55 | * Wait for idle. |
@@ -121,7 +120,6 @@ struct genapic apic_flat = { | |||
121 | .name = "flat", | 120 | .name = "flat", |
122 | .int_delivery_mode = dest_LowestPrio, | 121 | .int_delivery_mode = dest_LowestPrio, |
123 | .int_dest_mode = (APIC_DEST_LOGICAL != 0), | 122 | .int_dest_mode = (APIC_DEST_LOGICAL != 0), |
124 | .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST, | ||
125 | .target_cpus = flat_target_cpus, | 123 | .target_cpus = flat_target_cpus, |
126 | .apic_id_registered = flat_apic_id_registered, | 124 | .apic_id_registered = flat_apic_id_registered, |
127 | .init_apic_ldr = flat_init_apic_ldr, | 125 | .init_apic_ldr = flat_init_apic_ldr, |
@@ -180,7 +178,6 @@ struct genapic apic_physflat = { | |||
180 | .name = "physical flat", | 178 | .name = "physical flat", |
181 | .int_delivery_mode = dest_Fixed, | 179 | .int_delivery_mode = dest_Fixed, |
182 | .int_dest_mode = (APIC_DEST_PHYSICAL != 0), | 180 | .int_dest_mode = (APIC_DEST_PHYSICAL != 0), |
183 | .int_delivery_dest = APIC_DEST_PHYSICAL | APIC_DM_FIXED, | ||
184 | .target_cpus = physflat_target_cpus, | 181 | .target_cpus = physflat_target_cpus, |
185 | .apic_id_registered = flat_apic_id_registered, | 182 | .apic_id_registered = flat_apic_id_registered, |
186 | .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/ | 183 | .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/ |
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index c9739ca81d06..1e6f80870679 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S | |||
@@ -5,8 +5,6 @@ | |||
5 | * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> | 5 | * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> |
6 | * Copyright (C) 2000 Karsten Keil <kkeil@suse.de> | 6 | * Copyright (C) 2000 Karsten Keil <kkeil@suse.de> |
7 | * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de> | 7 | * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de> |
8 | * | ||
9 | * $Id: head.S,v 1.49 2002/03/19 17:39:25 ak Exp $ | ||
10 | */ | 8 | */ |
11 | 9 | ||
12 | 10 | ||
@@ -187,12 +185,15 @@ startup_64: | |||
187 | 185 | ||
188 | /* Finally jump to run C code and to be on real kernel address | 186 | /* Finally jump to run C code and to be on real kernel address |
189 | * Since we are running on identity-mapped space we have to jump | 187 | * Since we are running on identity-mapped space we have to jump |
190 | * to the full 64bit address , this is only possible as indirect | 188 | * to the full 64bit address, this is only possible as indirect |
191 | * jump | 189 | * jump. In addition we need to ensure %cs is set so we make this |
190 | * a far return. | ||
192 | */ | 191 | */ |
193 | movq initial_code(%rip),%rax | 192 | movq initial_code(%rip),%rax |
194 | pushq $0 # fake return address | 193 | pushq $0 # fake return address to stop unwinder |
195 | jmp *%rax | 194 | pushq $__KERNEL_CS # set correct cs |
195 | pushq %rax # target address in negative space | ||
196 | lretq | ||
196 | 197 | ||
197 | /* SMP bootup changes these two */ | 198 | /* SMP bootup changes these two */ |
198 | .align 8 | 199 | .align 8 |
@@ -371,7 +372,7 @@ ENTRY(cpu_gdt_table) | |||
371 | .quad 0,0 /* TSS */ | 372 | .quad 0,0 /* TSS */ |
372 | .quad 0,0 /* LDT */ | 373 | .quad 0,0 /* LDT */ |
373 | .quad 0,0,0 /* three TLS descriptors */ | 374 | .quad 0,0,0 /* three TLS descriptors */ |
374 | .quad 0 /* unused */ | 375 | .quad 0x0000f40000000000 /* node/CPU stored in limit */ |
375 | gdt_end: | 376 | gdt_end: |
376 | /* asm/segment.h:GDT_ENTRIES must match this */ | 377 | /* asm/segment.h:GDT_ENTRIES must match this */ |
377 | /* This should be a multiple of the cache line size */ | 378 | /* This should be a multiple of the cache line size */ |
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c index 36647ce6aecb..9561eb3c5b5c 100644 --- a/arch/x86_64/kernel/head64.c +++ b/arch/x86_64/kernel/head64.c | |||
@@ -45,38 +45,16 @@ static void __init copy_bootdata(char *real_mode_data) | |||
45 | new_data = *(int *) (x86_boot_params + NEW_CL_POINTER); | 45 | new_data = *(int *) (x86_boot_params + NEW_CL_POINTER); |
46 | if (!new_data) { | 46 | if (!new_data) { |
47 | if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) { | 47 | if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) { |
48 | printk("so old bootloader that it does not support commandline?!\n"); | ||
49 | return; | 48 | return; |
50 | } | 49 | } |
51 | new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET; | 50 | new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET; |
52 | printk("old bootloader convention, maybe loadlin?\n"); | ||
53 | } | 51 | } |
54 | command_line = (char *) ((u64)(new_data)); | 52 | command_line = (char *) ((u64)(new_data)); |
55 | memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE); | 53 | memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE); |
56 | printk("Bootdata ok (command line is %s)\n", saved_command_line); | ||
57 | } | ||
58 | |||
59 | static void __init setup_boot_cpu_data(void) | ||
60 | { | ||
61 | unsigned int dummy, eax; | ||
62 | |||
63 | /* get vendor info */ | ||
64 | cpuid(0, (unsigned int *)&boot_cpu_data.cpuid_level, | ||
65 | (unsigned int *)&boot_cpu_data.x86_vendor_id[0], | ||
66 | (unsigned int *)&boot_cpu_data.x86_vendor_id[8], | ||
67 | (unsigned int *)&boot_cpu_data.x86_vendor_id[4]); | ||
68 | |||
69 | /* get cpu type */ | ||
70 | cpuid(1, &eax, &dummy, &dummy, | ||
71 | (unsigned int *) &boot_cpu_data.x86_capability); | ||
72 | boot_cpu_data.x86 = (eax >> 8) & 0xf; | ||
73 | boot_cpu_data.x86_model = (eax >> 4) & 0xf; | ||
74 | boot_cpu_data.x86_mask = eax & 0xf; | ||
75 | } | 54 | } |
76 | 55 | ||
77 | void __init x86_64_start_kernel(char * real_mode_data) | 56 | void __init x86_64_start_kernel(char * real_mode_data) |
78 | { | 57 | { |
79 | char *s; | ||
80 | int i; | 58 | int i; |
81 | 59 | ||
82 | for (i = 0; i < 256; i++) | 60 | for (i = 0; i < 256; i++) |
@@ -84,10 +62,7 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
84 | asm volatile("lidt %0" :: "m" (idt_descr)); | 62 | asm volatile("lidt %0" :: "m" (idt_descr)); |
85 | clear_bss(); | 63 | clear_bss(); |
86 | 64 | ||
87 | /* | 65 | early_printk("Kernel alive\n"); |
88 | * This must be called really, really early: | ||
89 | */ | ||
90 | lockdep_init(); | ||
91 | 66 | ||
92 | /* | 67 | /* |
93 | * switch to init_level4_pgt from boot_level4_pgt | 68 | * switch to init_level4_pgt from boot_level4_pgt |
@@ -103,22 +78,5 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
103 | #ifdef CONFIG_SMP | 78 | #ifdef CONFIG_SMP |
104 | cpu_set(0, cpu_online_map); | 79 | cpu_set(0, cpu_online_map); |
105 | #endif | 80 | #endif |
106 | s = strstr(saved_command_line, "earlyprintk="); | ||
107 | if (s != NULL) | ||
108 | setup_early_printk(strchr(s, '=') + 1); | ||
109 | #ifdef CONFIG_NUMA | ||
110 | s = strstr(saved_command_line, "numa="); | ||
111 | if (s != NULL) | ||
112 | numa_setup(s+5); | ||
113 | #endif | ||
114 | #ifdef CONFIG_X86_IO_APIC | ||
115 | if (strstr(saved_command_line, "disableapic")) | ||
116 | disable_apic = 1; | ||
117 | #endif | ||
118 | /* You need early console to see that */ | ||
119 | if (__pa_symbol(&_end) >= KERNEL_TEXT_SIZE) | ||
120 | panic("Kernel too big for kernel mapping\n"); | ||
121 | |||
122 | setup_boot_cpu_data(); | ||
123 | start_kernel(); | 81 | start_kernel(); |
124 | } | 82 | } |
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c index 0434b1f8e3dd..2dd51f364ea2 100644 --- a/arch/x86_64/kernel/i8259.c +++ b/arch/x86_64/kernel/i8259.c | |||
@@ -55,7 +55,6 @@ | |||
55 | */ | 55 | */ |
56 | BUILD_16_IRQS(0x0) | 56 | BUILD_16_IRQS(0x0) |
57 | 57 | ||
58 | #ifdef CONFIG_X86_LOCAL_APIC | ||
59 | /* | 58 | /* |
60 | * The IO-APIC gives us many more interrupt sources. Most of these | 59 | * The IO-APIC gives us many more interrupt sources. Most of these |
61 | * are unused but an SMP system is supposed to have enough memory ... | 60 | * are unused but an SMP system is supposed to have enough memory ... |
@@ -75,8 +74,6 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) | |||
75 | BUILD_15_IRQS(0xe) | 74 | BUILD_15_IRQS(0xe) |
76 | #endif | 75 | #endif |
77 | 76 | ||
78 | #endif | ||
79 | |||
80 | #undef BUILD_16_IRQS | 77 | #undef BUILD_16_IRQS |
81 | #undef BUILD_15_IRQS | 78 | #undef BUILD_15_IRQS |
82 | #undef BI | 79 | #undef BI |
@@ -100,7 +97,6 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) | |||
100 | void (*interrupt[NR_IRQS])(void) = { | 97 | void (*interrupt[NR_IRQS])(void) = { |
101 | IRQLIST_16(0x0), | 98 | IRQLIST_16(0x0), |
102 | 99 | ||
103 | #ifdef CONFIG_X86_IO_APIC | ||
104 | IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3), | 100 | IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3), |
105 | IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), | 101 | IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), |
106 | IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), | 102 | IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), |
@@ -110,7 +106,6 @@ void (*interrupt[NR_IRQS])(void) = { | |||
110 | , IRQLIST_15(0xe) | 106 | , IRQLIST_15(0xe) |
111 | #endif | 107 | #endif |
112 | 108 | ||
113 | #endif | ||
114 | }; | 109 | }; |
115 | 110 | ||
116 | #undef IRQ | 111 | #undef IRQ |
@@ -128,6 +123,8 @@ void (*interrupt[NR_IRQS])(void) = { | |||
128 | 123 | ||
129 | DEFINE_SPINLOCK(i8259A_lock); | 124 | DEFINE_SPINLOCK(i8259A_lock); |
130 | 125 | ||
126 | static int i8259A_auto_eoi; | ||
127 | |||
131 | static void end_8259A_irq (unsigned int irq) | 128 | static void end_8259A_irq (unsigned int irq) |
132 | { | 129 | { |
133 | if (irq > 256) { | 130 | if (irq > 256) { |
@@ -341,6 +338,8 @@ void init_8259A(int auto_eoi) | |||
341 | { | 338 | { |
342 | unsigned long flags; | 339 | unsigned long flags; |
343 | 340 | ||
341 | i8259A_auto_eoi = auto_eoi; | ||
342 | |||
344 | spin_lock_irqsave(&i8259A_lock, flags); | 343 | spin_lock_irqsave(&i8259A_lock, flags); |
345 | 344 | ||
346 | outb(0xff, 0x21); /* mask all of 8259A-1 */ | 345 | outb(0xff, 0x21); /* mask all of 8259A-1 */ |
@@ -399,7 +398,7 @@ static void save_ELCR(char *trigger) | |||
399 | 398 | ||
400 | static int i8259A_resume(struct sys_device *dev) | 399 | static int i8259A_resume(struct sys_device *dev) |
401 | { | 400 | { |
402 | init_8259A(0); | 401 | init_8259A(i8259A_auto_eoi); |
403 | restore_ELCR(irq_trigger); | 402 | restore_ELCR(irq_trigger); |
404 | return 0; | 403 | return 0; |
405 | } | 404 | } |
@@ -453,9 +452,7 @@ void __init init_ISA_irqs (void) | |||
453 | { | 452 | { |
454 | int i; | 453 | int i; |
455 | 454 | ||
456 | #ifdef CONFIG_X86_LOCAL_APIC | ||
457 | init_bsp_APIC(); | 455 | init_bsp_APIC(); |
458 | #endif | ||
459 | init_8259A(0); | 456 | init_8259A(0); |
460 | 457 | ||
461 | for (i = 0; i < NR_IRQS; i++) { | 458 | for (i = 0; i < NR_IRQS; i++) { |
@@ -581,14 +578,12 @@ void __init init_IRQ(void) | |||
581 | set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | 578 | set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); |
582 | set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); | 579 | set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); |
583 | 580 | ||
584 | #ifdef CONFIG_X86_LOCAL_APIC | ||
585 | /* self generated IPI for local APIC timer */ | 581 | /* self generated IPI for local APIC timer */ |
586 | set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); | 582 | set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); |
587 | 583 | ||
588 | /* IPI vectors for APIC spurious and error interrupts */ | 584 | /* IPI vectors for APIC spurious and error interrupts */ |
589 | set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 585 | set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
590 | set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | 586 | set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); |
591 | #endif | ||
592 | 587 | ||
593 | /* | 588 | /* |
594 | * Set the clock to HZ Hz, we already have a valid | 589 | * Set the clock to HZ Hz, we already have a valid |
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 924a4a332954..0491019d4c8d 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c | |||
@@ -48,7 +48,7 @@ int sis_apic_bug; /* not actually supported, dummy for compile */ | |||
48 | 48 | ||
49 | static int no_timer_check; | 49 | static int no_timer_check; |
50 | 50 | ||
51 | int disable_timer_pin_1 __initdata; | 51 | static int disable_timer_pin_1 __initdata; |
52 | 52 | ||
53 | int timer_over_8254 __initdata = 0; | 53 | int timer_over_8254 __initdata = 0; |
54 | 54 | ||
@@ -111,6 +111,33 @@ int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; | |||
111 | FINAL; \ | 111 | FINAL; \ |
112 | } | 112 | } |
113 | 113 | ||
114 | union entry_union { | ||
115 | struct { u32 w1, w2; }; | ||
116 | struct IO_APIC_route_entry entry; | ||
117 | }; | ||
118 | |||
119 | static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) | ||
120 | { | ||
121 | union entry_union eu; | ||
122 | unsigned long flags; | ||
123 | spin_lock_irqsave(&ioapic_lock, flags); | ||
124 | eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); | ||
125 | eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); | ||
126 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
127 | return eu.entry; | ||
128 | } | ||
129 | |||
130 | static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | ||
131 | { | ||
132 | unsigned long flags; | ||
133 | union entry_union eu; | ||
134 | eu.entry = e; | ||
135 | spin_lock_irqsave(&ioapic_lock, flags); | ||
136 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); | ||
137 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); | ||
138 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
139 | } | ||
140 | |||
114 | #ifdef CONFIG_SMP | 141 | #ifdef CONFIG_SMP |
115 | static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) | 142 | static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) |
116 | { | 143 | { |
@@ -196,13 +223,9 @@ static void unmask_IO_APIC_irq (unsigned int irq) | |||
196 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) | 223 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) |
197 | { | 224 | { |
198 | struct IO_APIC_route_entry entry; | 225 | struct IO_APIC_route_entry entry; |
199 | unsigned long flags; | ||
200 | 226 | ||
201 | /* Check delivery_mode to be sure we're not clearing an SMI pin */ | 227 | /* Check delivery_mode to be sure we're not clearing an SMI pin */ |
202 | spin_lock_irqsave(&ioapic_lock, flags); | 228 | entry = ioapic_read_entry(apic, pin); |
203 | *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); | ||
204 | *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); | ||
205 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
206 | if (entry.delivery_mode == dest_SMI) | 229 | if (entry.delivery_mode == dest_SMI) |
207 | return; | 230 | return; |
208 | /* | 231 | /* |
@@ -210,10 +233,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) | |||
210 | */ | 233 | */ |
211 | memset(&entry, 0, sizeof(entry)); | 234 | memset(&entry, 0, sizeof(entry)); |
212 | entry.mask = 1; | 235 | entry.mask = 1; |
213 | spin_lock_irqsave(&ioapic_lock, flags); | 236 | ioapic_write_entry(apic, pin, entry); |
214 | io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); | ||
215 | io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); | ||
216 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
217 | } | 237 | } |
218 | 238 | ||
219 | static void clear_IO_APIC (void) | 239 | static void clear_IO_APIC (void) |
@@ -225,14 +245,6 @@ static void clear_IO_APIC (void) | |||
225 | clear_IO_APIC_pin(apic, pin); | 245 | clear_IO_APIC_pin(apic, pin); |
226 | } | 246 | } |
227 | 247 | ||
228 | /* | ||
229 | * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to | ||
230 | * specific CPU-side IRQs. | ||
231 | */ | ||
232 | |||
233 | #define MAX_PIRQS 8 | ||
234 | static int pirq_entries [MAX_PIRQS]; | ||
235 | static int pirqs_enabled; | ||
236 | int skip_ioapic_setup; | 248 | int skip_ioapic_setup; |
237 | int ioapic_force; | 249 | int ioapic_force; |
238 | 250 | ||
@@ -241,18 +253,17 @@ int ioapic_force; | |||
241 | static int __init disable_ioapic_setup(char *str) | 253 | static int __init disable_ioapic_setup(char *str) |
242 | { | 254 | { |
243 | skip_ioapic_setup = 1; | 255 | skip_ioapic_setup = 1; |
244 | return 1; | 256 | return 0; |
245 | } | 257 | } |
258 | early_param("noapic", disable_ioapic_setup); | ||
246 | 259 | ||
247 | static int __init enable_ioapic_setup(char *str) | 260 | /* Actually the next is obsolete, but keep it for paranoid reasons -AK */ |
261 | static int __init disable_timer_pin_setup(char *arg) | ||
248 | { | 262 | { |
249 | ioapic_force = 1; | 263 | disable_timer_pin_1 = 1; |
250 | skip_ioapic_setup = 0; | ||
251 | return 1; | 264 | return 1; |
252 | } | 265 | } |
253 | 266 | __setup("disable_timer_pin_1", disable_timer_pin_setup); | |
254 | __setup("noapic", disable_ioapic_setup); | ||
255 | __setup("apic", enable_ioapic_setup); | ||
256 | 267 | ||
257 | static int __init setup_disable_8254_timer(char *s) | 268 | static int __init setup_disable_8254_timer(char *s) |
258 | { | 269 | { |
@@ -268,135 +279,6 @@ static int __init setup_enable_8254_timer(char *s) | |||
268 | __setup("disable_8254_timer", setup_disable_8254_timer); | 279 | __setup("disable_8254_timer", setup_disable_8254_timer); |
269 | __setup("enable_8254_timer", setup_enable_8254_timer); | 280 | __setup("enable_8254_timer", setup_enable_8254_timer); |
270 | 281 | ||
271 | #include <asm/pci-direct.h> | ||
272 | #include <linux/pci_ids.h> | ||
273 | #include <linux/pci.h> | ||
274 | |||
275 | |||
276 | #ifdef CONFIG_ACPI | ||
277 | |||
278 | static int nvidia_hpet_detected __initdata; | ||
279 | |||
280 | static int __init nvidia_hpet_check(unsigned long phys, unsigned long size) | ||
281 | { | ||
282 | nvidia_hpet_detected = 1; | ||
283 | return 0; | ||
284 | } | ||
285 | #endif | ||
286 | |||
287 | /* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC | ||
288 | off. Check for an Nvidia or VIA PCI bridge and turn it off. | ||
289 | Use pci direct infrastructure because this runs before the PCI subsystem. | ||
290 | |||
291 | Can be overwritten with "apic" | ||
292 | |||
293 | And another hack to disable the IOMMU on VIA chipsets. | ||
294 | |||
295 | ... and others. Really should move this somewhere else. | ||
296 | |||
297 | Kludge-O-Rama. */ | ||
298 | void __init check_ioapic(void) | ||
299 | { | ||
300 | int num,slot,func; | ||
301 | /* Poor man's PCI discovery */ | ||
302 | for (num = 0; num < 32; num++) { | ||
303 | for (slot = 0; slot < 32; slot++) { | ||
304 | for (func = 0; func < 8; func++) { | ||
305 | u32 class; | ||
306 | u32 vendor; | ||
307 | u8 type; | ||
308 | class = read_pci_config(num,slot,func, | ||
309 | PCI_CLASS_REVISION); | ||
310 | if (class == 0xffffffff) | ||
311 | break; | ||
312 | |||
313 | if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) | ||
314 | continue; | ||
315 | |||
316 | vendor = read_pci_config(num, slot, func, | ||
317 | PCI_VENDOR_ID); | ||
318 | vendor &= 0xffff; | ||
319 | switch (vendor) { | ||
320 | case PCI_VENDOR_ID_VIA: | ||
321 | #ifdef CONFIG_IOMMU | ||
322 | if ((end_pfn > MAX_DMA32_PFN || | ||
323 | force_iommu) && | ||
324 | !iommu_aperture_allowed) { | ||
325 | printk(KERN_INFO | ||
326 | "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n"); | ||
327 | iommu_aperture_disabled = 1; | ||
328 | } | ||
329 | #endif | ||
330 | return; | ||
331 | case PCI_VENDOR_ID_NVIDIA: | ||
332 | #ifdef CONFIG_ACPI | ||
333 | /* | ||
334 | * All timer overrides on Nvidia are | ||
335 | * wrong unless HPET is enabled. | ||
336 | */ | ||
337 | nvidia_hpet_detected = 0; | ||
338 | acpi_table_parse(ACPI_HPET, | ||
339 | nvidia_hpet_check); | ||
340 | if (nvidia_hpet_detected == 0) { | ||
341 | acpi_skip_timer_override = 1; | ||
342 | printk(KERN_INFO "Nvidia board " | ||
343 | "detected. Ignoring ACPI " | ||
344 | "timer override.\n"); | ||
345 | } | ||
346 | #endif | ||
347 | /* RED-PEN skip them on mptables too? */ | ||
348 | return; | ||
349 | |||
350 | /* This should be actually default, but | ||
351 | for 2.6.16 let's do it for ATI only where | ||
352 | it's really needed. */ | ||
353 | case PCI_VENDOR_ID_ATI: | ||
354 | if (timer_over_8254 == 1) { | ||
355 | timer_over_8254 = 0; | ||
356 | printk(KERN_INFO | ||
357 | "ATI board detected. Disabling timer routing over 8254.\n"); | ||
358 | } | ||
359 | return; | ||
360 | } | ||
361 | |||
362 | |||
363 | /* No multi-function device? */ | ||
364 | type = read_pci_config_byte(num,slot,func, | ||
365 | PCI_HEADER_TYPE); | ||
366 | if (!(type & 0x80)) | ||
367 | break; | ||
368 | } | ||
369 | } | ||
370 | } | ||
371 | } | ||
372 | |||
373 | static int __init ioapic_pirq_setup(char *str) | ||
374 | { | ||
375 | int i, max; | ||
376 | int ints[MAX_PIRQS+1]; | ||
377 | |||
378 | get_options(str, ARRAY_SIZE(ints), ints); | ||
379 | |||
380 | for (i = 0; i < MAX_PIRQS; i++) | ||
381 | pirq_entries[i] = -1; | ||
382 | |||
383 | pirqs_enabled = 1; | ||
384 | apic_printk(APIC_VERBOSE, "PIRQ redirection, working around broken MP-BIOS.\n"); | ||
385 | max = MAX_PIRQS; | ||
386 | if (ints[0] < MAX_PIRQS) | ||
387 | max = ints[0]; | ||
388 | |||
389 | for (i = 0; i < max; i++) { | ||
390 | apic_printk(APIC_VERBOSE, "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); | ||
391 | /* | ||
392 | * PIRQs are mapped upside down, usually. | ||
393 | */ | ||
394 | pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; | ||
395 | } | ||
396 | return 1; | ||
397 | } | ||
398 | |||
399 | __setup("pirq=", ioapic_pirq_setup); | ||
400 | 282 | ||
401 | /* | 283 | /* |
402 | * Find the IRQ entry number of a certain pin. | 284 | * Find the IRQ entry number of a certain pin. |
@@ -425,9 +307,7 @@ static int __init find_isa_irq_pin(int irq, int type) | |||
425 | for (i = 0; i < mp_irq_entries; i++) { | 307 | for (i = 0; i < mp_irq_entries; i++) { |
426 | int lbus = mp_irqs[i].mpc_srcbus; | 308 | int lbus = mp_irqs[i].mpc_srcbus; |
427 | 309 | ||
428 | if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || | 310 | if (test_bit(lbus, mp_bus_not_pci) && |
429 | mp_bus_id_to_type[lbus] == MP_BUS_EISA || | ||
430 | mp_bus_id_to_type[lbus] == MP_BUS_MCA) && | ||
431 | (mp_irqs[i].mpc_irqtype == type) && | 311 | (mp_irqs[i].mpc_irqtype == type) && |
432 | (mp_irqs[i].mpc_srcbusirq == irq)) | 312 | (mp_irqs[i].mpc_srcbusirq == irq)) |
433 | 313 | ||
@@ -443,9 +323,7 @@ static int __init find_isa_irq_apic(int irq, int type) | |||
443 | for (i = 0; i < mp_irq_entries; i++) { | 323 | for (i = 0; i < mp_irq_entries; i++) { |
444 | int lbus = mp_irqs[i].mpc_srcbus; | 324 | int lbus = mp_irqs[i].mpc_srcbus; |
445 | 325 | ||
446 | if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || | 326 | if (test_bit(lbus, mp_bus_not_pci) && |
447 | mp_bus_id_to_type[lbus] == MP_BUS_EISA || | ||
448 | mp_bus_id_to_type[lbus] == MP_BUS_MCA) && | ||
449 | (mp_irqs[i].mpc_irqtype == type) && | 327 | (mp_irqs[i].mpc_irqtype == type) && |
450 | (mp_irqs[i].mpc_srcbusirq == irq)) | 328 | (mp_irqs[i].mpc_srcbusirq == irq)) |
451 | break; | 329 | break; |
@@ -485,7 +363,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) | |||
485 | mp_irqs[i].mpc_dstapic == MP_APIC_ALL) | 363 | mp_irqs[i].mpc_dstapic == MP_APIC_ALL) |
486 | break; | 364 | break; |
487 | 365 | ||
488 | if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) && | 366 | if (!test_bit(lbus, mp_bus_not_pci) && |
489 | !mp_irqs[i].mpc_irqtype && | 367 | !mp_irqs[i].mpc_irqtype && |
490 | (bus == lbus) && | 368 | (bus == lbus) && |
491 | (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { | 369 | (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { |
@@ -508,27 +386,6 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) | |||
508 | return best_guess; | 386 | return best_guess; |
509 | } | 387 | } |
510 | 388 | ||
511 | /* | ||
512 | * EISA Edge/Level control register, ELCR | ||
513 | */ | ||
514 | static int EISA_ELCR(unsigned int irq) | ||
515 | { | ||
516 | if (irq < 16) { | ||
517 | unsigned int port = 0x4d0 + (irq >> 3); | ||
518 | return (inb(port) >> (irq & 7)) & 1; | ||
519 | } | ||
520 | apic_printk(APIC_VERBOSE, "Broken MPtable reports ISA irq %d\n", irq); | ||
521 | return 0; | ||
522 | } | ||
523 | |||
524 | /* EISA interrupts are always polarity zero and can be edge or level | ||
525 | * trigger depending on the ELCR value. If an interrupt is listed as | ||
526 | * EISA conforming in the MP table, that means its trigger type must | ||
527 | * be read in from the ELCR */ | ||
528 | |||
529 | #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) | ||
530 | #define default_EISA_polarity(idx) (0) | ||
531 | |||
532 | /* ISA interrupts are always polarity zero edge triggered, | 389 | /* ISA interrupts are always polarity zero edge triggered, |
533 | * when listed as conforming in the MP table. */ | 390 | * when listed as conforming in the MP table. */ |
534 | 391 | ||
@@ -541,12 +398,6 @@ static int EISA_ELCR(unsigned int irq) | |||
541 | #define default_PCI_trigger(idx) (1) | 398 | #define default_PCI_trigger(idx) (1) |
542 | #define default_PCI_polarity(idx) (1) | 399 | #define default_PCI_polarity(idx) (1) |
543 | 400 | ||
544 | /* MCA interrupts are always polarity zero level triggered, | ||
545 | * when listed as conforming in the MP table. */ | ||
546 | |||
547 | #define default_MCA_trigger(idx) (1) | ||
548 | #define default_MCA_polarity(idx) (0) | ||
549 | |||
550 | static int __init MPBIOS_polarity(int idx) | 401 | static int __init MPBIOS_polarity(int idx) |
551 | { | 402 | { |
552 | int bus = mp_irqs[idx].mpc_srcbus; | 403 | int bus = mp_irqs[idx].mpc_srcbus; |
@@ -558,38 +409,11 @@ static int __init MPBIOS_polarity(int idx) | |||
558 | switch (mp_irqs[idx].mpc_irqflag & 3) | 409 | switch (mp_irqs[idx].mpc_irqflag & 3) |
559 | { | 410 | { |
560 | case 0: /* conforms, ie. bus-type dependent polarity */ | 411 | case 0: /* conforms, ie. bus-type dependent polarity */ |
561 | { | 412 | if (test_bit(bus, mp_bus_not_pci)) |
562 | switch (mp_bus_id_to_type[bus]) | 413 | polarity = default_ISA_polarity(idx); |
563 | { | 414 | else |
564 | case MP_BUS_ISA: /* ISA pin */ | 415 | polarity = default_PCI_polarity(idx); |
565 | { | ||
566 | polarity = default_ISA_polarity(idx); | ||
567 | break; | ||
568 | } | ||
569 | case MP_BUS_EISA: /* EISA pin */ | ||
570 | { | ||
571 | polarity = default_EISA_polarity(idx); | ||
572 | break; | ||
573 | } | ||
574 | case MP_BUS_PCI: /* PCI pin */ | ||
575 | { | ||
576 | polarity = default_PCI_polarity(idx); | ||
577 | break; | ||
578 | } | ||
579 | case MP_BUS_MCA: /* MCA pin */ | ||
580 | { | ||
581 | polarity = default_MCA_polarity(idx); | ||
582 | break; | ||
583 | } | ||
584 | default: | ||
585 | { | ||
586 | printk(KERN_WARNING "broken BIOS!!\n"); | ||
587 | polarity = 1; | ||
588 | break; | ||
589 | } | ||
590 | } | ||
591 | break; | 416 | break; |
592 | } | ||
593 | case 1: /* high active */ | 417 | case 1: /* high active */ |
594 | { | 418 | { |
595 | polarity = 0; | 419 | polarity = 0; |
@@ -627,38 +451,11 @@ static int MPBIOS_trigger(int idx) | |||
627 | switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) | 451 | switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) |
628 | { | 452 | { |
629 | case 0: /* conforms, ie. bus-type dependent */ | 453 | case 0: /* conforms, ie. bus-type dependent */ |
630 | { | 454 | if (test_bit(bus, mp_bus_not_pci)) |
631 | switch (mp_bus_id_to_type[bus]) | 455 | trigger = default_ISA_trigger(idx); |
632 | { | 456 | else |
633 | case MP_BUS_ISA: /* ISA pin */ | 457 | trigger = default_PCI_trigger(idx); |
634 | { | ||
635 | trigger = default_ISA_trigger(idx); | ||
636 | break; | ||
637 | } | ||
638 | case MP_BUS_EISA: /* EISA pin */ | ||
639 | { | ||
640 | trigger = default_EISA_trigger(idx); | ||
641 | break; | ||
642 | } | ||
643 | case MP_BUS_PCI: /* PCI pin */ | ||
644 | { | ||
645 | trigger = default_PCI_trigger(idx); | ||
646 | break; | ||
647 | } | ||
648 | case MP_BUS_MCA: /* MCA pin */ | ||
649 | { | ||
650 | trigger = default_MCA_trigger(idx); | ||
651 | break; | ||
652 | } | ||
653 | default: | ||
654 | { | ||
655 | printk(KERN_WARNING "broken BIOS!!\n"); | ||
656 | trigger = 1; | ||
657 | break; | ||
658 | } | ||
659 | } | ||
660 | break; | 458 | break; |
661 | } | ||
662 | case 1: /* edge */ | 459 | case 1: /* edge */ |
663 | { | 460 | { |
664 | trigger = 0; | 461 | trigger = 0; |
@@ -764,49 +561,17 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
764 | if (mp_irqs[idx].mpc_dstirq != pin) | 561 | if (mp_irqs[idx].mpc_dstirq != pin) |
765 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); | 562 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); |
766 | 563 | ||
767 | switch (mp_bus_id_to_type[bus]) | 564 | if (test_bit(bus, mp_bus_not_pci)) { |
768 | { | 565 | irq = mp_irqs[idx].mpc_srcbusirq; |
769 | case MP_BUS_ISA: /* ISA pin */ | 566 | } else { |
770 | case MP_BUS_EISA: | 567 | /* |
771 | case MP_BUS_MCA: | 568 | * PCI IRQs are mapped in order |
772 | { | 569 | */ |
773 | irq = mp_irqs[idx].mpc_srcbusirq; | 570 | i = irq = 0; |
774 | break; | 571 | while (i < apic) |
775 | } | 572 | irq += nr_ioapic_registers[i++]; |
776 | case MP_BUS_PCI: /* PCI pin */ | 573 | irq += pin; |
777 | { | 574 | irq = gsi_irq_sharing(irq); |
778 | /* | ||
779 | * PCI IRQs are mapped in order | ||
780 | */ | ||
781 | i = irq = 0; | ||
782 | while (i < apic) | ||
783 | irq += nr_ioapic_registers[i++]; | ||
784 | irq += pin; | ||
785 | irq = gsi_irq_sharing(irq); | ||
786 | break; | ||
787 | } | ||
788 | default: | ||
789 | { | ||
790 | printk(KERN_ERR "unknown bus type %d.\n",bus); | ||
791 | irq = 0; | ||
792 | break; | ||
793 | } | ||
794 | } | ||
795 | BUG_ON(irq >= NR_IRQS); | ||
796 | |||
797 | /* | ||
798 | * PCI IRQ command line redirection. Yes, limits are hardcoded. | ||
799 | */ | ||
800 | if ((pin >= 16) && (pin <= 23)) { | ||
801 | if (pirq_entries[pin-16] != -1) { | ||
802 | if (!pirq_entries[pin-16]) { | ||
803 | apic_printk(APIC_VERBOSE, "disabling PIRQ%d\n", pin-16); | ||
804 | } else { | ||
805 | irq = pirq_entries[pin-16]; | ||
806 | apic_printk(APIC_VERBOSE, "using PIRQ%d -> IRQ %d\n", | ||
807 | pin-16, irq); | ||
808 | } | ||
809 | } | ||
810 | } | 575 | } |
811 | BUG_ON(irq >= NR_IRQS); | 576 | BUG_ON(irq >= NR_IRQS); |
812 | return irq; | 577 | return irq; |
@@ -943,9 +708,9 @@ static void __init setup_IO_APIC_irqs(void) | |||
943 | if (!apic && (irq < 16)) | 708 | if (!apic && (irq < 16)) |
944 | disable_8259A_irq(irq); | 709 | disable_8259A_irq(irq); |
945 | } | 710 | } |
711 | ioapic_write_entry(apic, pin, entry); | ||
712 | |||
946 | spin_lock_irqsave(&ioapic_lock, flags); | 713 | spin_lock_irqsave(&ioapic_lock, flags); |
947 | io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); | ||
948 | io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); | ||
949 | set_native_irq_info(irq, TARGET_CPUS); | 714 | set_native_irq_info(irq, TARGET_CPUS); |
950 | spin_unlock_irqrestore(&ioapic_lock, flags); | 715 | spin_unlock_irqrestore(&ioapic_lock, flags); |
951 | } | 716 | } |
@@ -1083,10 +848,7 @@ void __apicdebuginit print_IO_APIC(void) | |||
1083 | for (i = 0; i <= reg_01.bits.entries; i++) { | 848 | for (i = 0; i <= reg_01.bits.entries; i++) { |
1084 | struct IO_APIC_route_entry entry; | 849 | struct IO_APIC_route_entry entry; |
1085 | 850 | ||
1086 | spin_lock_irqsave(&ioapic_lock, flags); | 851 | entry = ioapic_read_entry(apic, i); |
1087 | *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); | ||
1088 | *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); | ||
1089 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1090 | 852 | ||
1091 | printk(KERN_DEBUG " %02x %03X %02X ", | 853 | printk(KERN_DEBUG " %02x %03X %02X ", |
1092 | i, | 854 | i, |
@@ -1281,9 +1043,6 @@ static void __init enable_IO_APIC(void) | |||
1281 | irq_2_pin[i].pin = -1; | 1043 | irq_2_pin[i].pin = -1; |
1282 | irq_2_pin[i].next = 0; | 1044 | irq_2_pin[i].next = 0; |
1283 | } | 1045 | } |
1284 | if (!pirqs_enabled) | ||
1285 | for (i = 0; i < MAX_PIRQS; i++) | ||
1286 | pirq_entries[i] = -1; | ||
1287 | 1046 | ||
1288 | /* | 1047 | /* |
1289 | * The number of IO-APIC IRQ registers (== #pins): | 1048 | * The number of IO-APIC IRQ registers (== #pins): |
@@ -1299,11 +1058,7 @@ static void __init enable_IO_APIC(void) | |||
1299 | /* See if any of the pins is in ExtINT mode */ | 1058 | /* See if any of the pins is in ExtINT mode */ |
1300 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | 1059 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { |
1301 | struct IO_APIC_route_entry entry; | 1060 | struct IO_APIC_route_entry entry; |
1302 | spin_lock_irqsave(&ioapic_lock, flags); | 1061 | entry = ioapic_read_entry(apic, pin); |
1303 | *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); | ||
1304 | *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); | ||
1305 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1306 | |||
1307 | 1062 | ||
1308 | /* If the interrupt line is enabled and in ExtInt mode | 1063 | /* If the interrupt line is enabled and in ExtInt mode |
1309 | * I have found the pin where the i8259 is connected. | 1064 | * I have found the pin where the i8259 is connected. |
@@ -1355,7 +1110,6 @@ void disable_IO_APIC(void) | |||
1355 | */ | 1110 | */ |
1356 | if (ioapic_i8259.pin != -1) { | 1111 | if (ioapic_i8259.pin != -1) { |
1357 | struct IO_APIC_route_entry entry; | 1112 | struct IO_APIC_route_entry entry; |
1358 | unsigned long flags; | ||
1359 | 1113 | ||
1360 | memset(&entry, 0, sizeof(entry)); | 1114 | memset(&entry, 0, sizeof(entry)); |
1361 | entry.mask = 0; /* Enabled */ | 1115 | entry.mask = 0; /* Enabled */ |
@@ -1372,84 +1126,13 @@ void disable_IO_APIC(void) | |||
1372 | /* | 1126 | /* |
1373 | * Add it to the IO-APIC irq-routing table: | 1127 | * Add it to the IO-APIC irq-routing table: |
1374 | */ | 1128 | */ |
1375 | spin_lock_irqsave(&ioapic_lock, flags); | 1129 | ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); |
1376 | io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin, | ||
1377 | *(((int *)&entry)+1)); | ||
1378 | io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin, | ||
1379 | *(((int *)&entry)+0)); | ||
1380 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1381 | } | 1130 | } |
1382 | 1131 | ||
1383 | disconnect_bsp_APIC(ioapic_i8259.pin != -1); | 1132 | disconnect_bsp_APIC(ioapic_i8259.pin != -1); |
1384 | } | 1133 | } |
1385 | 1134 | ||
1386 | /* | 1135 | /* |
1387 | * function to set the IO-APIC physical IDs based on the | ||
1388 | * values stored in the MPC table. | ||
1389 | * | ||
1390 | * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 | ||
1391 | */ | ||
1392 | |||
1393 | static void __init setup_ioapic_ids_from_mpc (void) | ||
1394 | { | ||
1395 | union IO_APIC_reg_00 reg_00; | ||
1396 | int apic; | ||
1397 | int i; | ||
1398 | unsigned char old_id; | ||
1399 | unsigned long flags; | ||
1400 | |||
1401 | /* | ||
1402 | * Set the IOAPIC ID to the value stored in the MPC table. | ||
1403 | */ | ||
1404 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
1405 | |||
1406 | /* Read the register 0 value */ | ||
1407 | spin_lock_irqsave(&ioapic_lock, flags); | ||
1408 | reg_00.raw = io_apic_read(apic, 0); | ||
1409 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1410 | |||
1411 | old_id = mp_ioapics[apic].mpc_apicid; | ||
1412 | |||
1413 | |||
1414 | printk(KERN_INFO "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid); | ||
1415 | |||
1416 | |||
1417 | /* | ||
1418 | * We need to adjust the IRQ routing table | ||
1419 | * if the ID changed. | ||
1420 | */ | ||
1421 | if (old_id != mp_ioapics[apic].mpc_apicid) | ||
1422 | for (i = 0; i < mp_irq_entries; i++) | ||
1423 | if (mp_irqs[i].mpc_dstapic == old_id) | ||
1424 | mp_irqs[i].mpc_dstapic | ||
1425 | = mp_ioapics[apic].mpc_apicid; | ||
1426 | |||
1427 | /* | ||
1428 | * Read the right value from the MPC table and | ||
1429 | * write it into the ID register. | ||
1430 | */ | ||
1431 | apic_printk(APIC_VERBOSE,KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", | ||
1432 | mp_ioapics[apic].mpc_apicid); | ||
1433 | |||
1434 | reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; | ||
1435 | spin_lock_irqsave(&ioapic_lock, flags); | ||
1436 | io_apic_write(apic, 0, reg_00.raw); | ||
1437 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1438 | |||
1439 | /* | ||
1440 | * Sanity check | ||
1441 | */ | ||
1442 | spin_lock_irqsave(&ioapic_lock, flags); | ||
1443 | reg_00.raw = io_apic_read(apic, 0); | ||
1444 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1445 | if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) | ||
1446 | printk("could not set ID!\n"); | ||
1447 | else | ||
1448 | apic_printk(APIC_VERBOSE," ok.\n"); | ||
1449 | } | ||
1450 | } | ||
1451 | |||
1452 | /* | ||
1453 | * There is a nasty bug in some older SMP boards, their mptable lies | 1136 | * There is a nasty bug in some older SMP boards, their mptable lies |
1454 | * about the timer IRQ. We do the following to work around the situation: | 1137 | * about the timer IRQ. We do the following to work around the situation: |
1455 | * | 1138 | * |
@@ -1964,11 +1647,6 @@ void __init setup_IO_APIC(void) | |||
1964 | 1647 | ||
1965 | apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); | 1648 | apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); |
1966 | 1649 | ||
1967 | /* | ||
1968 | * Set up the IO-APIC IRQ routing table. | ||
1969 | */ | ||
1970 | if (!acpi_ioapic) | ||
1971 | setup_ioapic_ids_from_mpc(); | ||
1972 | sync_Arb_IDs(); | 1650 | sync_Arb_IDs(); |
1973 | setup_IO_APIC_irqs(); | 1651 | setup_IO_APIC_irqs(); |
1974 | init_IO_APIC_traps(); | 1652 | init_IO_APIC_traps(); |
@@ -1987,17 +1665,12 @@ static int ioapic_suspend(struct sys_device *dev, pm_message_t state) | |||
1987 | { | 1665 | { |
1988 | struct IO_APIC_route_entry *entry; | 1666 | struct IO_APIC_route_entry *entry; |
1989 | struct sysfs_ioapic_data *data; | 1667 | struct sysfs_ioapic_data *data; |
1990 | unsigned long flags; | ||
1991 | int i; | 1668 | int i; |
1992 | 1669 | ||
1993 | data = container_of(dev, struct sysfs_ioapic_data, dev); | 1670 | data = container_of(dev, struct sysfs_ioapic_data, dev); |
1994 | entry = data->entry; | 1671 | entry = data->entry; |
1995 | spin_lock_irqsave(&ioapic_lock, flags); | 1672 | for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) |
1996 | for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { | 1673 | *entry = ioapic_read_entry(dev->id, i); |
1997 | *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i); | ||
1998 | *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i); | ||
1999 | } | ||
2000 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2001 | 1674 | ||
2002 | return 0; | 1675 | return 0; |
2003 | } | 1676 | } |
@@ -2019,11 +1692,9 @@ static int ioapic_resume(struct sys_device *dev) | |||
2019 | reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; | 1692 | reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; |
2020 | io_apic_write(dev->id, 0, reg_00.raw); | 1693 | io_apic_write(dev->id, 0, reg_00.raw); |
2021 | } | 1694 | } |
2022 | for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { | ||
2023 | io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1)); | ||
2024 | io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0)); | ||
2025 | } | ||
2026 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1695 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1696 | for (i = 0; i < nr_ioapic_registers[dev->id]; i++) | ||
1697 | ioapic_write_entry(dev->id, i, entry[i]); | ||
2027 | 1698 | ||
2028 | return 0; | 1699 | return 0; |
2029 | } | 1700 | } |
@@ -2077,19 +1748,6 @@ device_initcall(ioapic_init_sysfs); | |||
2077 | 1748 | ||
2078 | #define IO_APIC_MAX_ID 0xFE | 1749 | #define IO_APIC_MAX_ID 0xFE |
2079 | 1750 | ||
2080 | int __init io_apic_get_version (int ioapic) | ||
2081 | { | ||
2082 | union IO_APIC_reg_01 reg_01; | ||
2083 | unsigned long flags; | ||
2084 | |||
2085 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2086 | reg_01.raw = io_apic_read(ioapic, 1); | ||
2087 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2088 | |||
2089 | return reg_01.bits.version; | ||
2090 | } | ||
2091 | |||
2092 | |||
2093 | int __init io_apic_get_redir_entries (int ioapic) | 1751 | int __init io_apic_get_redir_entries (int ioapic) |
2094 | { | 1752 | { |
2095 | union IO_APIC_reg_01 reg_01; | 1753 | union IO_APIC_reg_01 reg_01; |
@@ -2148,10 +1806,10 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p | |||
2148 | if (!ioapic && (irq < 16)) | 1806 | if (!ioapic && (irq < 16)) |
2149 | disable_8259A_irq(irq); | 1807 | disable_8259A_irq(irq); |
2150 | 1808 | ||
1809 | ioapic_write_entry(ioapic, pin, entry); | ||
1810 | |||
2151 | spin_lock_irqsave(&ioapic_lock, flags); | 1811 | spin_lock_irqsave(&ioapic_lock, flags); |
2152 | io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); | 1812 | set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); |
2153 | io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); | ||
2154 | set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); | ||
2155 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1813 | spin_unlock_irqrestore(&ioapic_lock, flags); |
2156 | 1814 | ||
2157 | return 0; | 1815 | return 0; |
diff --git a/arch/x86_64/kernel/ioport.c b/arch/x86_64/kernel/ioport.c index b81614970ecc..fe063d3cfe42 100644 --- a/arch/x86_64/kernel/ioport.c +++ b/arch/x86_64/kernel/ioport.c | |||
@@ -56,6 +56,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | |||
56 | 56 | ||
57 | memset(bitmap, 0xff, IO_BITMAP_BYTES); | 57 | memset(bitmap, 0xff, IO_BITMAP_BYTES); |
58 | t->io_bitmap_ptr = bitmap; | 58 | t->io_bitmap_ptr = bitmap; |
59 | set_thread_flag(TIF_IO_BITMAP); | ||
59 | } | 60 | } |
60 | 61 | ||
61 | /* | 62 | /* |
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index 5221a53e90c1..b3677e6ccc6e 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c | |||
@@ -20,11 +20,6 @@ | |||
20 | #include <asm/idle.h> | 20 | #include <asm/idle.h> |
21 | 21 | ||
22 | atomic_t irq_err_count; | 22 | atomic_t irq_err_count; |
23 | #ifdef CONFIG_X86_IO_APIC | ||
24 | #ifdef APIC_MISMATCH_DEBUG | ||
25 | atomic_t irq_mis_count; | ||
26 | #endif | ||
27 | #endif | ||
28 | 23 | ||
29 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | 24 | #ifdef CONFIG_DEBUG_STACKOVERFLOW |
30 | /* | 25 | /* |
@@ -92,18 +87,11 @@ skip: | |||
92 | for_each_online_cpu(j) | 87 | for_each_online_cpu(j) |
93 | seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); | 88 | seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); |
94 | seq_putc(p, '\n'); | 89 | seq_putc(p, '\n'); |
95 | #ifdef CONFIG_X86_LOCAL_APIC | ||
96 | seq_printf(p, "LOC: "); | 90 | seq_printf(p, "LOC: "); |
97 | for_each_online_cpu(j) | 91 | for_each_online_cpu(j) |
98 | seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); | 92 | seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); |
99 | seq_putc(p, '\n'); | 93 | seq_putc(p, '\n'); |
100 | #endif | ||
101 | seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); | 94 | seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); |
102 | #ifdef CONFIG_X86_IO_APIC | ||
103 | #ifdef APIC_MISMATCH_DEBUG | ||
104 | seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); | ||
105 | #endif | ||
106 | #endif | ||
107 | } | 95 | } |
108 | return 0; | 96 | return 0; |
109 | } | 97 | } |
diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c index 106076b370fc..0497e3bd5bff 100644 --- a/arch/x86_64/kernel/machine_kexec.c +++ b/arch/x86_64/kernel/machine_kexec.c | |||
@@ -15,6 +15,15 @@ | |||
15 | #include <asm/mmu_context.h> | 15 | #include <asm/mmu_context.h> |
16 | #include <asm/io.h> | 16 | #include <asm/io.h> |
17 | 17 | ||
18 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) | ||
19 | static u64 kexec_pgd[512] PAGE_ALIGNED; | ||
20 | static u64 kexec_pud0[512] PAGE_ALIGNED; | ||
21 | static u64 kexec_pmd0[512] PAGE_ALIGNED; | ||
22 | static u64 kexec_pte0[512] PAGE_ALIGNED; | ||
23 | static u64 kexec_pud1[512] PAGE_ALIGNED; | ||
24 | static u64 kexec_pmd1[512] PAGE_ALIGNED; | ||
25 | static u64 kexec_pte1[512] PAGE_ALIGNED; | ||
26 | |||
18 | static void init_level2_page(pmd_t *level2p, unsigned long addr) | 27 | static void init_level2_page(pmd_t *level2p, unsigned long addr) |
19 | { | 28 | { |
20 | unsigned long end_addr; | 29 | unsigned long end_addr; |
@@ -144,32 +153,19 @@ static void load_segments(void) | |||
144 | ); | 153 | ); |
145 | } | 154 | } |
146 | 155 | ||
147 | typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long indirection_page, | ||
148 | unsigned long control_code_buffer, | ||
149 | unsigned long start_address, | ||
150 | unsigned long pgtable) ATTRIB_NORET; | ||
151 | |||
152 | extern const unsigned char relocate_new_kernel[]; | ||
153 | extern const unsigned long relocate_new_kernel_size; | ||
154 | |||
155 | int machine_kexec_prepare(struct kimage *image) | 156 | int machine_kexec_prepare(struct kimage *image) |
156 | { | 157 | { |
157 | unsigned long start_pgtable, control_code_buffer; | 158 | unsigned long start_pgtable; |
158 | int result; | 159 | int result; |
159 | 160 | ||
160 | /* Calculate the offsets */ | 161 | /* Calculate the offsets */ |
161 | start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; | 162 | start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; |
162 | control_code_buffer = start_pgtable + PAGE_SIZE; | ||
163 | 163 | ||
164 | /* Setup the identity mapped 64bit page table */ | 164 | /* Setup the identity mapped 64bit page table */ |
165 | result = init_pgtable(image, start_pgtable); | 165 | result = init_pgtable(image, start_pgtable); |
166 | if (result) | 166 | if (result) |
167 | return result; | 167 | return result; |
168 | 168 | ||
169 | /* Place the code in the reboot code buffer */ | ||
170 | memcpy(__va(control_code_buffer), relocate_new_kernel, | ||
171 | relocate_new_kernel_size); | ||
172 | |||
173 | return 0; | 169 | return 0; |
174 | } | 170 | } |
175 | 171 | ||
@@ -184,28 +180,34 @@ void machine_kexec_cleanup(struct kimage *image) | |||
184 | */ | 180 | */ |
185 | NORET_TYPE void machine_kexec(struct kimage *image) | 181 | NORET_TYPE void machine_kexec(struct kimage *image) |
186 | { | 182 | { |
187 | unsigned long page_list; | 183 | unsigned long page_list[PAGES_NR]; |
188 | unsigned long control_code_buffer; | 184 | void *control_page; |
189 | unsigned long start_pgtable; | ||
190 | relocate_new_kernel_t rnk; | ||
191 | 185 | ||
192 | /* Interrupts aren't acceptable while we reboot */ | 186 | /* Interrupts aren't acceptable while we reboot */ |
193 | local_irq_disable(); | 187 | local_irq_disable(); |
194 | 188 | ||
195 | /* Calculate the offsets */ | 189 | control_page = page_address(image->control_code_page) + PAGE_SIZE; |
196 | page_list = image->head; | 190 | memcpy(control_page, relocate_kernel, PAGE_SIZE); |
197 | start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; | 191 | |
198 | control_code_buffer = start_pgtable + PAGE_SIZE; | 192 | page_list[PA_CONTROL_PAGE] = __pa(control_page); |
199 | 193 | page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; | |
200 | /* Set the low half of the page table to my identity mapped | 194 | page_list[PA_PGD] = __pa(kexec_pgd); |
201 | * page table for kexec. Leave the high half pointing at the | 195 | page_list[VA_PGD] = (unsigned long)kexec_pgd; |
202 | * kernel pages. Don't bother to flush the global pages | 196 | page_list[PA_PUD_0] = __pa(kexec_pud0); |
203 | * as that will happen when I fully switch to my identity mapped | 197 | page_list[VA_PUD_0] = (unsigned long)kexec_pud0; |
204 | * page table anyway. | 198 | page_list[PA_PMD_0] = __pa(kexec_pmd0); |
205 | */ | 199 | page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; |
206 | memcpy(__va(read_cr3()), __va(start_pgtable), PAGE_SIZE/2); | 200 | page_list[PA_PTE_0] = __pa(kexec_pte0); |
207 | __flush_tlb(); | 201 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; |
208 | 202 | page_list[PA_PUD_1] = __pa(kexec_pud1); | |
203 | page_list[VA_PUD_1] = (unsigned long)kexec_pud1; | ||
204 | page_list[PA_PMD_1] = __pa(kexec_pmd1); | ||
205 | page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; | ||
206 | page_list[PA_PTE_1] = __pa(kexec_pte1); | ||
207 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; | ||
208 | |||
209 | page_list[PA_TABLE_PAGE] = | ||
210 | (unsigned long)__pa(page_address(image->control_code_page)); | ||
209 | 211 | ||
210 | /* The segment registers are funny things, they have both a | 212 | /* The segment registers are funny things, they have both a |
211 | * visible and an invisible part. Whenever the visible part is | 213 | * visible and an invisible part. Whenever the visible part is |
@@ -222,7 +224,36 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
222 | */ | 224 | */ |
223 | set_gdt(phys_to_virt(0),0); | 225 | set_gdt(phys_to_virt(0),0); |
224 | set_idt(phys_to_virt(0),0); | 226 | set_idt(phys_to_virt(0),0); |
227 | |||
225 | /* now call it */ | 228 | /* now call it */ |
226 | rnk = (relocate_new_kernel_t) control_code_buffer; | 229 | relocate_kernel((unsigned long)image->head, (unsigned long)page_list, |
227 | (*rnk)(page_list, control_code_buffer, image->start, start_pgtable); | 230 | image->start); |
228 | } | 231 | } |
232 | |||
233 | /* crashkernel=size@addr specifies the location to reserve for | ||
234 | * a crash kernel. By reserving this memory we guarantee | ||
235 | * that linux never set's it up as a DMA target. | ||
236 | * Useful for holding code to do something appropriate | ||
237 | * after a kernel panic. | ||
238 | */ | ||
239 | static int __init setup_crashkernel(char *arg) | ||
240 | { | ||
241 | unsigned long size, base; | ||
242 | char *p; | ||
243 | if (!arg) | ||
244 | return -EINVAL; | ||
245 | size = memparse(arg, &p); | ||
246 | if (arg == p) | ||
247 | return -EINVAL; | ||
248 | if (*p == '@') { | ||
249 | base = memparse(p+1, &p); | ||
250 | /* FIXME: Do I want a sanity check to validate the | ||
251 | * memory range? Yes you do, but it's too early for | ||
252 | * e820 -AK */ | ||
253 | crashk_res.start = base; | ||
254 | crashk_res.end = base + size - 1; | ||
255 | } | ||
256 | return 0; | ||
257 | } | ||
258 | early_param("crashkernel", setup_crashkernel); | ||
259 | |||
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 4e017fb30fb3..bbea88801d88 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c | |||
@@ -182,7 +182,7 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
182 | goto out2; | 182 | goto out2; |
183 | 183 | ||
184 | memset(&m, 0, sizeof(struct mce)); | 184 | memset(&m, 0, sizeof(struct mce)); |
185 | m.cpu = safe_smp_processor_id(); | 185 | m.cpu = smp_processor_id(); |
186 | rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); | 186 | rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); |
187 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) | 187 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) |
188 | kill_it = 1; | 188 | kill_it = 1; |
@@ -274,6 +274,33 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
274 | atomic_dec(&mce_entry); | 274 | atomic_dec(&mce_entry); |
275 | } | 275 | } |
276 | 276 | ||
277 | #ifdef CONFIG_X86_MCE_INTEL | ||
278 | /*** | ||
279 | * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog | ||
280 | * @cpu: The CPU on which the event occured. | ||
281 | * @status: Event status information | ||
282 | * | ||
283 | * This function should be called by the thermal interrupt after the | ||
284 | * event has been processed and the decision was made to log the event | ||
285 | * further. | ||
286 | * | ||
287 | * The status parameter will be saved to the 'status' field of 'struct mce' | ||
288 | * and historically has been the register value of the | ||
289 | * MSR_IA32_THERMAL_STATUS (Intel) msr. | ||
290 | */ | ||
291 | void mce_log_therm_throt_event(unsigned int cpu, __u64 status) | ||
292 | { | ||
293 | struct mce m; | ||
294 | |||
295 | memset(&m, 0, sizeof(m)); | ||
296 | m.cpu = cpu; | ||
297 | m.bank = MCE_THERMAL_BANK; | ||
298 | m.status = status; | ||
299 | rdtscll(m.tsc); | ||
300 | mce_log(&m); | ||
301 | } | ||
302 | #endif /* CONFIG_X86_MCE_INTEL */ | ||
303 | |||
277 | /* | 304 | /* |
278 | * Periodic polling timer for "silent" machine check errors. | 305 | * Periodic polling timer for "silent" machine check errors. |
279 | */ | 306 | */ |
diff --git a/arch/x86_64/kernel/mce_intel.c b/arch/x86_64/kernel/mce_intel.c index 8f533d2c40cb..6551505d8a2c 100644 --- a/arch/x86_64/kernel/mce_intel.c +++ b/arch/x86_64/kernel/mce_intel.c | |||
@@ -11,36 +11,21 @@ | |||
11 | #include <asm/mce.h> | 11 | #include <asm/mce.h> |
12 | #include <asm/hw_irq.h> | 12 | #include <asm/hw_irq.h> |
13 | #include <asm/idle.h> | 13 | #include <asm/idle.h> |
14 | 14 | #include <asm/therm_throt.h> | |
15 | static DEFINE_PER_CPU(unsigned long, next_check); | ||
16 | 15 | ||
17 | asmlinkage void smp_thermal_interrupt(void) | 16 | asmlinkage void smp_thermal_interrupt(void) |
18 | { | 17 | { |
19 | struct mce m; | 18 | __u64 msr_val; |
20 | 19 | ||
21 | ack_APIC_irq(); | 20 | ack_APIC_irq(); |
22 | 21 | ||
23 | exit_idle(); | 22 | exit_idle(); |
24 | irq_enter(); | 23 | irq_enter(); |
25 | if (time_before(jiffies, __get_cpu_var(next_check))) | ||
26 | goto done; | ||
27 | |||
28 | __get_cpu_var(next_check) = jiffies + HZ*300; | ||
29 | memset(&m, 0, sizeof(m)); | ||
30 | m.cpu = smp_processor_id(); | ||
31 | m.bank = MCE_THERMAL_BANK; | ||
32 | rdtscll(m.tsc); | ||
33 | rdmsrl(MSR_IA32_THERM_STATUS, m.status); | ||
34 | if (m.status & 0x1) { | ||
35 | printk(KERN_EMERG | ||
36 | "CPU%d: Temperature above threshold, cpu clock throttled\n", m.cpu); | ||
37 | add_taint(TAINT_MACHINE_CHECK); | ||
38 | } else { | ||
39 | printk(KERN_EMERG "CPU%d: Temperature/speed normal\n", m.cpu); | ||
40 | } | ||
41 | 24 | ||
42 | mce_log(&m); | 25 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); |
43 | done: | 26 | if (therm_throt_process(msr_val & 1)) |
27 | mce_log_therm_throt_event(smp_processor_id(), msr_val); | ||
28 | |||
44 | irq_exit(); | 29 | irq_exit(); |
45 | } | 30 | } |
46 | 31 | ||
@@ -92,6 +77,9 @@ static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) | |||
92 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | 77 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); |
93 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", | 78 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", |
94 | cpu, tm2 ? "TM2" : "TM1"); | 79 | cpu, tm2 ? "TM2" : "TM1"); |
80 | |||
81 | /* enable thermal throttle processing */ | ||
82 | atomic_set(&therm_throt_en, 1); | ||
95 | return; | 83 | return; |
96 | } | 84 | } |
97 | 85 | ||
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index a1ab4197f8a1..20e88f4b564b 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c | |||
@@ -41,8 +41,7 @@ int acpi_found_madt; | |||
41 | * Various Linux-internal data structures created from the | 41 | * Various Linux-internal data structures created from the |
42 | * MP-table. | 42 | * MP-table. |
43 | */ | 43 | */ |
44 | unsigned char apic_version [MAX_APICS]; | 44 | DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); |
45 | unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; | ||
46 | int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; | 45 | int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; |
47 | 46 | ||
48 | static int mp_current_pci_id = 0; | 47 | static int mp_current_pci_id = 0; |
@@ -56,7 +55,6 @@ struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; | |||
56 | int mp_irq_entries; | 55 | int mp_irq_entries; |
57 | 56 | ||
58 | int nr_ioapics; | 57 | int nr_ioapics; |
59 | int pic_mode; | ||
60 | unsigned long mp_lapic_addr = 0; | 58 | unsigned long mp_lapic_addr = 0; |
61 | 59 | ||
62 | 60 | ||
@@ -71,19 +69,6 @@ unsigned disabled_cpus __initdata; | |||
71 | /* Bitmask of physically existing CPUs */ | 69 | /* Bitmask of physically existing CPUs */ |
72 | physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; | 70 | physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; |
73 | 71 | ||
74 | /* ACPI MADT entry parsing functions */ | ||
75 | #ifdef CONFIG_ACPI | ||
76 | extern struct acpi_boot_flags acpi_boot; | ||
77 | #ifdef CONFIG_X86_LOCAL_APIC | ||
78 | extern int acpi_parse_lapic (acpi_table_entry_header *header); | ||
79 | extern int acpi_parse_lapic_addr_ovr (acpi_table_entry_header *header); | ||
80 | extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header); | ||
81 | #endif /*CONFIG_X86_LOCAL_APIC*/ | ||
82 | #ifdef CONFIG_X86_IO_APIC | ||
83 | extern int acpi_parse_ioapic (acpi_table_entry_header *header); | ||
84 | #endif /*CONFIG_X86_IO_APIC*/ | ||
85 | #endif /*CONFIG_ACPI*/ | ||
86 | |||
87 | u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; | 72 | u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; |
88 | 73 | ||
89 | 74 | ||
@@ -108,24 +93,20 @@ static int __init mpf_checksum(unsigned char *mp, int len) | |||
108 | static void __cpuinit MP_processor_info (struct mpc_config_processor *m) | 93 | static void __cpuinit MP_processor_info (struct mpc_config_processor *m) |
109 | { | 94 | { |
110 | int cpu; | 95 | int cpu; |
111 | unsigned char ver; | ||
112 | cpumask_t tmp_map; | 96 | cpumask_t tmp_map; |
97 | char *bootup_cpu = ""; | ||
113 | 98 | ||
114 | if (!(m->mpc_cpuflag & CPU_ENABLED)) { | 99 | if (!(m->mpc_cpuflag & CPU_ENABLED)) { |
115 | disabled_cpus++; | 100 | disabled_cpus++; |
116 | return; | 101 | return; |
117 | } | 102 | } |
118 | |||
119 | printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n", | ||
120 | m->mpc_apicid, | ||
121 | (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8, | ||
122 | (m->mpc_cpufeature & CPU_MODEL_MASK)>>4, | ||
123 | m->mpc_apicver); | ||
124 | |||
125 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { | 103 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { |
126 | Dprintk(" Bootup CPU\n"); | 104 | bootup_cpu = " (Bootup-CPU)"; |
127 | boot_cpu_id = m->mpc_apicid; | 105 | boot_cpu_id = m->mpc_apicid; |
128 | } | 106 | } |
107 | |||
108 | printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu); | ||
109 | |||
129 | if (num_processors >= NR_CPUS) { | 110 | if (num_processors >= NR_CPUS) { |
130 | printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." | 111 | printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." |
131 | " Processor ignored.\n", NR_CPUS); | 112 | " Processor ignored.\n", NR_CPUS); |
@@ -136,24 +117,7 @@ static void __cpuinit MP_processor_info (struct mpc_config_processor *m) | |||
136 | cpus_complement(tmp_map, cpu_present_map); | 117 | cpus_complement(tmp_map, cpu_present_map); |
137 | cpu = first_cpu(tmp_map); | 118 | cpu = first_cpu(tmp_map); |
138 | 119 | ||
139 | #if MAX_APICS < 255 | ||
140 | if ((int)m->mpc_apicid > MAX_APICS) { | ||
141 | printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", | ||
142 | m->mpc_apicid, MAX_APICS); | ||
143 | return; | ||
144 | } | ||
145 | #endif | ||
146 | ver = m->mpc_apicver; | ||
147 | |||
148 | physid_set(m->mpc_apicid, phys_cpu_present_map); | 120 | physid_set(m->mpc_apicid, phys_cpu_present_map); |
149 | /* | ||
150 | * Validate version | ||
151 | */ | ||
152 | if (ver == 0x0) { | ||
153 | printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid); | ||
154 | ver = 0x10; | ||
155 | } | ||
156 | apic_version[m->mpc_apicid] = ver; | ||
157 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { | 121 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { |
158 | /* | 122 | /* |
159 | * bios_cpu_apicid is required to have processors listed | 123 | * bios_cpu_apicid is required to have processors listed |
@@ -178,15 +142,11 @@ static void __init MP_bus_info (struct mpc_config_bus *m) | |||
178 | Dprintk("Bus #%d is %s\n", m->mpc_busid, str); | 142 | Dprintk("Bus #%d is %s\n", m->mpc_busid, str); |
179 | 143 | ||
180 | if (strncmp(str, "ISA", 3) == 0) { | 144 | if (strncmp(str, "ISA", 3) == 0) { |
181 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; | 145 | set_bit(m->mpc_busid, mp_bus_not_pci); |
182 | } else if (strncmp(str, "EISA", 4) == 0) { | ||
183 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; | ||
184 | } else if (strncmp(str, "PCI", 3) == 0) { | 146 | } else if (strncmp(str, "PCI", 3) == 0) { |
185 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; | 147 | clear_bit(m->mpc_busid, mp_bus_not_pci); |
186 | mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; | 148 | mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; |
187 | mp_current_pci_id++; | 149 | mp_current_pci_id++; |
188 | } else if (strncmp(str, "MCA", 3) == 0) { | ||
189 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; | ||
190 | } else { | 150 | } else { |
191 | printk(KERN_ERR "Unknown bustype %s\n", str); | 151 | printk(KERN_ERR "Unknown bustype %s\n", str); |
192 | } | 152 | } |
@@ -197,8 +157,8 @@ static void __init MP_ioapic_info (struct mpc_config_ioapic *m) | |||
197 | if (!(m->mpc_flags & MPC_APIC_USABLE)) | 157 | if (!(m->mpc_flags & MPC_APIC_USABLE)) |
198 | return; | 158 | return; |
199 | 159 | ||
200 | printk("I/O APIC #%d Version %d at 0x%X.\n", | 160 | printk("I/O APIC #%d at 0x%X.\n", |
201 | m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); | 161 | m->mpc_apicid, m->mpc_apicaddr); |
202 | if (nr_ioapics >= MAX_IO_APICS) { | 162 | if (nr_ioapics >= MAX_IO_APICS) { |
203 | printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n", | 163 | printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n", |
204 | MAX_IO_APICS, nr_ioapics); | 164 | MAX_IO_APICS, nr_ioapics); |
@@ -232,19 +192,6 @@ static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) | |||
232 | m->mpc_irqtype, m->mpc_irqflag & 3, | 192 | m->mpc_irqtype, m->mpc_irqflag & 3, |
233 | (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, | 193 | (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, |
234 | m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); | 194 | m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); |
235 | /* | ||
236 | * Well it seems all SMP boards in existence | ||
237 | * use ExtINT/LVT1 == LINT0 and | ||
238 | * NMI/LVT2 == LINT1 - the following check | ||
239 | * will show us if this assumptions is false. | ||
240 | * Until then we do not have to add baggage. | ||
241 | */ | ||
242 | if ((m->mpc_irqtype == mp_ExtINT) && | ||
243 | (m->mpc_destapiclint != 0)) | ||
244 | BUG(); | ||
245 | if ((m->mpc_irqtype == mp_NMI) && | ||
246 | (m->mpc_destapiclint != 1)) | ||
247 | BUG(); | ||
248 | } | 195 | } |
249 | 196 | ||
250 | /* | 197 | /* |
@@ -258,7 +205,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) | |||
258 | unsigned char *mpt=((unsigned char *)mpc)+count; | 205 | unsigned char *mpt=((unsigned char *)mpc)+count; |
259 | 206 | ||
260 | if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) { | 207 | if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) { |
261 | printk("SMP mptable: bad signature [%c%c%c%c]!\n", | 208 | printk("MPTABLE: bad signature [%c%c%c%c]!\n", |
262 | mpc->mpc_signature[0], | 209 | mpc->mpc_signature[0], |
263 | mpc->mpc_signature[1], | 210 | mpc->mpc_signature[1], |
264 | mpc->mpc_signature[2], | 211 | mpc->mpc_signature[2], |
@@ -266,31 +213,31 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) | |||
266 | return 0; | 213 | return 0; |
267 | } | 214 | } |
268 | if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) { | 215 | if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) { |
269 | printk("SMP mptable: checksum error!\n"); | 216 | printk("MPTABLE: checksum error!\n"); |
270 | return 0; | 217 | return 0; |
271 | } | 218 | } |
272 | if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) { | 219 | if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) { |
273 | printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n", | 220 | printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n", |
274 | mpc->mpc_spec); | 221 | mpc->mpc_spec); |
275 | return 0; | 222 | return 0; |
276 | } | 223 | } |
277 | if (!mpc->mpc_lapic) { | 224 | if (!mpc->mpc_lapic) { |
278 | printk(KERN_ERR "SMP mptable: null local APIC address!\n"); | 225 | printk(KERN_ERR "MPTABLE: null local APIC address!\n"); |
279 | return 0; | 226 | return 0; |
280 | } | 227 | } |
281 | memcpy(str,mpc->mpc_oem,8); | 228 | memcpy(str,mpc->mpc_oem,8); |
282 | str[8]=0; | 229 | str[8] = 0; |
283 | printk(KERN_INFO "OEM ID: %s ",str); | 230 | printk(KERN_INFO "MPTABLE: OEM ID: %s ",str); |
284 | 231 | ||
285 | memcpy(str,mpc->mpc_productid,12); | 232 | memcpy(str,mpc->mpc_productid,12); |
286 | str[12]=0; | 233 | str[12] = 0; |
287 | printk("Product ID: %s ",str); | 234 | printk("MPTABLE: Product ID: %s ",str); |
288 | 235 | ||
289 | printk("APIC at: 0x%X\n",mpc->mpc_lapic); | 236 | printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic); |
290 | 237 | ||
291 | /* save the local APIC address, it might be non-default */ | 238 | /* save the local APIC address, it might be non-default */ |
292 | if (!acpi_lapic) | 239 | if (!acpi_lapic) |
293 | mp_lapic_addr = mpc->mpc_lapic; | 240 | mp_lapic_addr = mpc->mpc_lapic; |
294 | 241 | ||
295 | /* | 242 | /* |
296 | * Now process the configuration blocks. | 243 | * Now process the configuration blocks. |
@@ -302,7 +249,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) | |||
302 | struct mpc_config_processor *m= | 249 | struct mpc_config_processor *m= |
303 | (struct mpc_config_processor *)mpt; | 250 | (struct mpc_config_processor *)mpt; |
304 | if (!acpi_lapic) | 251 | if (!acpi_lapic) |
305 | MP_processor_info(m); | 252 | MP_processor_info(m); |
306 | mpt += sizeof(*m); | 253 | mpt += sizeof(*m); |
307 | count += sizeof(*m); | 254 | count += sizeof(*m); |
308 | break; | 255 | break; |
@@ -321,8 +268,8 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) | |||
321 | struct mpc_config_ioapic *m= | 268 | struct mpc_config_ioapic *m= |
322 | (struct mpc_config_ioapic *)mpt; | 269 | (struct mpc_config_ioapic *)mpt; |
323 | MP_ioapic_info(m); | 270 | MP_ioapic_info(m); |
324 | mpt+=sizeof(*m); | 271 | mpt += sizeof(*m); |
325 | count+=sizeof(*m); | 272 | count += sizeof(*m); |
326 | break; | 273 | break; |
327 | } | 274 | } |
328 | case MP_INTSRC: | 275 | case MP_INTSRC: |
@@ -331,8 +278,8 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) | |||
331 | (struct mpc_config_intsrc *)mpt; | 278 | (struct mpc_config_intsrc *)mpt; |
332 | 279 | ||
333 | MP_intsrc_info(m); | 280 | MP_intsrc_info(m); |
334 | mpt+=sizeof(*m); | 281 | mpt += sizeof(*m); |
335 | count+=sizeof(*m); | 282 | count += sizeof(*m); |
336 | break; | 283 | break; |
337 | } | 284 | } |
338 | case MP_LINTSRC: | 285 | case MP_LINTSRC: |
@@ -340,15 +287,15 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) | |||
340 | struct mpc_config_lintsrc *m= | 287 | struct mpc_config_lintsrc *m= |
341 | (struct mpc_config_lintsrc *)mpt; | 288 | (struct mpc_config_lintsrc *)mpt; |
342 | MP_lintsrc_info(m); | 289 | MP_lintsrc_info(m); |
343 | mpt+=sizeof(*m); | 290 | mpt += sizeof(*m); |
344 | count+=sizeof(*m); | 291 | count += sizeof(*m); |
345 | break; | 292 | break; |
346 | } | 293 | } |
347 | } | 294 | } |
348 | } | 295 | } |
349 | clustered_apic_check(); | 296 | clustered_apic_check(); |
350 | if (!num_processors) | 297 | if (!num_processors) |
351 | printk(KERN_ERR "SMP mptable: no processors registered!\n"); | 298 | printk(KERN_ERR "MPTABLE: no processors registered!\n"); |
352 | return num_processors; | 299 | return num_processors; |
353 | } | 300 | } |
354 | 301 | ||
@@ -444,13 +391,10 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) | |||
444 | * 2 CPUs, numbered 0 & 1. | 391 | * 2 CPUs, numbered 0 & 1. |
445 | */ | 392 | */ |
446 | processor.mpc_type = MP_PROCESSOR; | 393 | processor.mpc_type = MP_PROCESSOR; |
447 | /* Either an integrated APIC or a discrete 82489DX. */ | 394 | processor.mpc_apicver = 0; |
448 | processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; | ||
449 | processor.mpc_cpuflag = CPU_ENABLED; | 395 | processor.mpc_cpuflag = CPU_ENABLED; |
450 | processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | | 396 | processor.mpc_cpufeature = 0; |
451 | (boot_cpu_data.x86_model << 4) | | 397 | processor.mpc_featureflag = 0; |
452 | boot_cpu_data.x86_mask; | ||
453 | processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; | ||
454 | processor.mpc_reserved[0] = 0; | 398 | processor.mpc_reserved[0] = 0; |
455 | processor.mpc_reserved[1] = 0; | 399 | processor.mpc_reserved[1] = 0; |
456 | for (i = 0; i < 2; i++) { | 400 | for (i = 0; i < 2; i++) { |
@@ -469,14 +413,6 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) | |||
469 | case 5: | 413 | case 5: |
470 | memcpy(bus.mpc_bustype, "ISA ", 6); | 414 | memcpy(bus.mpc_bustype, "ISA ", 6); |
471 | break; | 415 | break; |
472 | case 2: | ||
473 | case 6: | ||
474 | case 3: | ||
475 | memcpy(bus.mpc_bustype, "EISA ", 6); | ||
476 | break; | ||
477 | case 4: | ||
478 | case 7: | ||
479 | memcpy(bus.mpc_bustype, "MCA ", 6); | ||
480 | } | 416 | } |
481 | MP_bus_info(&bus); | 417 | MP_bus_info(&bus); |
482 | if (mpc_default_type > 4) { | 418 | if (mpc_default_type > 4) { |
@@ -487,7 +423,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) | |||
487 | 423 | ||
488 | ioapic.mpc_type = MP_IOAPIC; | 424 | ioapic.mpc_type = MP_IOAPIC; |
489 | ioapic.mpc_apicid = 2; | 425 | ioapic.mpc_apicid = 2; |
490 | ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; | 426 | ioapic.mpc_apicver = 0; |
491 | ioapic.mpc_flags = MPC_APIC_USABLE; | 427 | ioapic.mpc_flags = MPC_APIC_USABLE; |
492 | ioapic.mpc_apicaddr = 0xFEC00000; | 428 | ioapic.mpc_apicaddr = 0xFEC00000; |
493 | MP_ioapic_info(&ioapic); | 429 | MP_ioapic_info(&ioapic); |
@@ -530,13 +466,6 @@ void __init get_smp_config (void) | |||
530 | printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n"); | 466 | printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n"); |
531 | 467 | ||
532 | printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); | 468 | printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); |
533 | if (mpf->mpf_feature2 & (1<<7)) { | ||
534 | printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); | ||
535 | pic_mode = 1; | ||
536 | } else { | ||
537 | printk(KERN_INFO " Virtual Wire compatibility mode.\n"); | ||
538 | pic_mode = 0; | ||
539 | } | ||
540 | 469 | ||
541 | /* | 470 | /* |
542 | * Now see if we need to read further. | 471 | * Now see if we need to read further. |
@@ -616,7 +545,7 @@ static int __init smp_scan_config (unsigned long base, unsigned long length) | |||
616 | return 0; | 545 | return 0; |
617 | } | 546 | } |
618 | 547 | ||
619 | void __init find_intel_smp (void) | 548 | void __init find_smp_config(void) |
620 | { | 549 | { |
621 | unsigned int address; | 550 | unsigned int address; |
622 | 551 | ||
@@ -633,9 +562,7 @@ void __init find_intel_smp (void) | |||
633 | smp_scan_config(0xF0000,0x10000)) | 562 | smp_scan_config(0xF0000,0x10000)) |
634 | return; | 563 | return; |
635 | /* | 564 | /* |
636 | * If it is an SMP machine we should know now, unless the | 565 | * If it is an SMP machine we should know now. |
637 | * configuration is in an EISA/MCA bus machine with an | ||
638 | * extended bios data area. | ||
639 | * | 566 | * |
640 | * there is a real-mode segmented pointer pointing to the | 567 | * there is a real-mode segmented pointer pointing to the |
641 | * 4K EBDA area at 0x40E, calculate and scan it here. | 568 | * 4K EBDA area at 0x40E, calculate and scan it here. |
@@ -656,69 +583,41 @@ void __init find_intel_smp (void) | |||
656 | printk(KERN_INFO "No mptable found.\n"); | 583 | printk(KERN_INFO "No mptable found.\n"); |
657 | } | 584 | } |
658 | 585 | ||
659 | /* | ||
660 | * - Intel MP Configuration Table | ||
661 | */ | ||
662 | void __init find_smp_config (void) | ||
663 | { | ||
664 | #ifdef CONFIG_X86_LOCAL_APIC | ||
665 | find_intel_smp(); | ||
666 | #endif | ||
667 | } | ||
668 | |||
669 | |||
670 | /* -------------------------------------------------------------------------- | 586 | /* -------------------------------------------------------------------------- |
671 | ACPI-based MP Configuration | 587 | ACPI-based MP Configuration |
672 | -------------------------------------------------------------------------- */ | 588 | -------------------------------------------------------------------------- */ |
673 | 589 | ||
674 | #ifdef CONFIG_ACPI | 590 | #ifdef CONFIG_ACPI |
675 | 591 | ||
676 | void __init mp_register_lapic_address ( | 592 | void __init mp_register_lapic_address(u64 address) |
677 | u64 address) | ||
678 | { | 593 | { |
679 | mp_lapic_addr = (unsigned long) address; | 594 | mp_lapic_addr = (unsigned long) address; |
680 | |||
681 | set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); | 595 | set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); |
682 | |||
683 | if (boot_cpu_id == -1U) | 596 | if (boot_cpu_id == -1U) |
684 | boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); | 597 | boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); |
685 | |||
686 | Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); | ||
687 | } | 598 | } |
688 | 599 | ||
689 | 600 | void __cpuinit mp_register_lapic (u8 id, u8 enabled) | |
690 | void __cpuinit mp_register_lapic ( | ||
691 | u8 id, | ||
692 | u8 enabled) | ||
693 | { | 601 | { |
694 | struct mpc_config_processor processor; | 602 | struct mpc_config_processor processor; |
695 | int boot_cpu = 0; | 603 | int boot_cpu = 0; |
696 | 604 | ||
697 | if (id >= MAX_APICS) { | 605 | if (id == boot_cpu_id) |
698 | printk(KERN_WARNING "Processor #%d invalid (max %d)\n", | ||
699 | id, MAX_APICS); | ||
700 | return; | ||
701 | } | ||
702 | |||
703 | if (id == boot_cpu_physical_apicid) | ||
704 | boot_cpu = 1; | 606 | boot_cpu = 1; |
705 | 607 | ||
706 | processor.mpc_type = MP_PROCESSOR; | 608 | processor.mpc_type = MP_PROCESSOR; |
707 | processor.mpc_apicid = id; | 609 | processor.mpc_apicid = id; |
708 | processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR)); | 610 | processor.mpc_apicver = 0; |
709 | processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0); | 611 | processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0); |
710 | processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0); | 612 | processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0); |
711 | processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | | 613 | processor.mpc_cpufeature = 0; |
712 | (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; | 614 | processor.mpc_featureflag = 0; |
713 | processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; | ||
714 | processor.mpc_reserved[0] = 0; | 615 | processor.mpc_reserved[0] = 0; |
715 | processor.mpc_reserved[1] = 0; | 616 | processor.mpc_reserved[1] = 0; |
716 | 617 | ||
717 | MP_processor_info(&processor); | 618 | MP_processor_info(&processor); |
718 | } | 619 | } |
719 | 620 | ||
720 | #ifdef CONFIG_X86_IO_APIC | ||
721 | |||
722 | #define MP_ISA_BUS 0 | 621 | #define MP_ISA_BUS 0 |
723 | #define MP_MAX_IOAPIC_PIN 127 | 622 | #define MP_MAX_IOAPIC_PIN 127 |
724 | 623 | ||
@@ -729,11 +628,9 @@ static struct mp_ioapic_routing { | |||
729 | u32 pin_programmed[4]; | 628 | u32 pin_programmed[4]; |
730 | } mp_ioapic_routing[MAX_IO_APICS]; | 629 | } mp_ioapic_routing[MAX_IO_APICS]; |
731 | 630 | ||
732 | 631 | static int mp_find_ioapic(int gsi) | |
733 | static int mp_find_ioapic ( | ||
734 | int gsi) | ||
735 | { | 632 | { |
736 | int i = 0; | 633 | int i = 0; |
737 | 634 | ||
738 | /* Find the IOAPIC that manages this GSI. */ | 635 | /* Find the IOAPIC that manages this GSI. */ |
739 | for (i = 0; i < nr_ioapics; i++) { | 636 | for (i = 0; i < nr_ioapics; i++) { |
@@ -743,17 +640,12 @@ static int mp_find_ioapic ( | |||
743 | } | 640 | } |
744 | 641 | ||
745 | printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); | 642 | printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); |
746 | |||
747 | return -1; | 643 | return -1; |
748 | } | 644 | } |
749 | |||
750 | 645 | ||
751 | void __init mp_register_ioapic ( | 646 | void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) |
752 | u8 id, | ||
753 | u32 address, | ||
754 | u32 gsi_base) | ||
755 | { | 647 | { |
756 | int idx = 0; | 648 | int idx = 0; |
757 | 649 | ||
758 | if (nr_ioapics >= MAX_IO_APICS) { | 650 | if (nr_ioapics >= MAX_IO_APICS) { |
759 | printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " | 651 | printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " |
@@ -774,7 +666,7 @@ void __init mp_register_ioapic ( | |||
774 | 666 | ||
775 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); | 667 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); |
776 | mp_ioapics[idx].mpc_apicid = id; | 668 | mp_ioapics[idx].mpc_apicid = id; |
777 | mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); | 669 | mp_ioapics[idx].mpc_apicver = 0; |
778 | 670 | ||
779 | /* | 671 | /* |
780 | * Build basic IRQ lookup table to facilitate gsi->io_apic lookups | 672 | * Build basic IRQ lookup table to facilitate gsi->io_apic lookups |
@@ -785,21 +677,15 @@ void __init mp_register_ioapic ( | |||
785 | mp_ioapic_routing[idx].gsi_end = gsi_base + | 677 | mp_ioapic_routing[idx].gsi_end = gsi_base + |
786 | io_apic_get_redir_entries(idx); | 678 | io_apic_get_redir_entries(idx); |
787 | 679 | ||
788 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " | 680 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, " |
789 | "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, | 681 | "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, |
790 | mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, | 682 | mp_ioapics[idx].mpc_apicaddr, |
791 | mp_ioapic_routing[idx].gsi_start, | 683 | mp_ioapic_routing[idx].gsi_start, |
792 | mp_ioapic_routing[idx].gsi_end); | 684 | mp_ioapic_routing[idx].gsi_end); |
793 | |||
794 | return; | ||
795 | } | 685 | } |
796 | 686 | ||
797 | 687 | void __init | |
798 | void __init mp_override_legacy_irq ( | 688 | mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) |
799 | u8 bus_irq, | ||
800 | u8 polarity, | ||
801 | u8 trigger, | ||
802 | u32 gsi) | ||
803 | { | 689 | { |
804 | struct mpc_config_intsrc intsrc; | 690 | struct mpc_config_intsrc intsrc; |
805 | int ioapic = -1; | 691 | int ioapic = -1; |
@@ -837,22 +723,18 @@ void __init mp_override_legacy_irq ( | |||
837 | mp_irqs[mp_irq_entries] = intsrc; | 723 | mp_irqs[mp_irq_entries] = intsrc; |
838 | if (++mp_irq_entries == MAX_IRQ_SOURCES) | 724 | if (++mp_irq_entries == MAX_IRQ_SOURCES) |
839 | panic("Max # of irq sources exceeded!\n"); | 725 | panic("Max # of irq sources exceeded!\n"); |
840 | |||
841 | return; | ||
842 | } | 726 | } |
843 | 727 | ||
844 | 728 | void __init mp_config_acpi_legacy_irqs(void) | |
845 | void __init mp_config_acpi_legacy_irqs (void) | ||
846 | { | 729 | { |
847 | struct mpc_config_intsrc intsrc; | 730 | struct mpc_config_intsrc intsrc; |
848 | int i = 0; | 731 | int i = 0; |
849 | int ioapic = -1; | 732 | int ioapic = -1; |
850 | 733 | ||
851 | /* | 734 | /* |
852 | * Fabricate the legacy ISA bus (bus #31). | 735 | * Fabricate the legacy ISA bus (bus #31). |
853 | */ | 736 | */ |
854 | mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; | 737 | set_bit(MP_ISA_BUS, mp_bus_not_pci); |
855 | Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); | ||
856 | 738 | ||
857 | /* | 739 | /* |
858 | * Locate the IOAPIC that manages the ISA IRQs (0-15). | 740 | * Locate the IOAPIC that manages the ISA IRQs (0-15). |
@@ -905,24 +787,22 @@ void __init mp_config_acpi_legacy_irqs (void) | |||
905 | if (++mp_irq_entries == MAX_IRQ_SOURCES) | 787 | if (++mp_irq_entries == MAX_IRQ_SOURCES) |
906 | panic("Max # of irq sources exceeded!\n"); | 788 | panic("Max # of irq sources exceeded!\n"); |
907 | } | 789 | } |
908 | |||
909 | return; | ||
910 | } | 790 | } |
911 | 791 | ||
912 | #define MAX_GSI_NUM 4096 | 792 | #define MAX_GSI_NUM 4096 |
913 | 793 | ||
914 | int mp_register_gsi(u32 gsi, int triggering, int polarity) | 794 | int mp_register_gsi(u32 gsi, int triggering, int polarity) |
915 | { | 795 | { |
916 | int ioapic = -1; | 796 | int ioapic = -1; |
917 | int ioapic_pin = 0; | 797 | int ioapic_pin = 0; |
918 | int idx, bit = 0; | 798 | int idx, bit = 0; |
919 | static int pci_irq = 16; | 799 | static int pci_irq = 16; |
920 | /* | 800 | /* |
921 | * Mapping between Global System Interrupts, which | 801 | * Mapping between Global System Interrupts, which |
922 | * represent all possible interrupts, to the IRQs | 802 | * represent all possible interrupts, to the IRQs |
923 | * assigned to actual devices. | 803 | * assigned to actual devices. |
924 | */ | 804 | */ |
925 | static int gsi_to_irq[MAX_GSI_NUM]; | 805 | static int gsi_to_irq[MAX_GSI_NUM]; |
926 | 806 | ||
927 | if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) | 807 | if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) |
928 | return gsi; | 808 | return gsi; |
@@ -996,6 +876,4 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) | |||
996 | polarity == ACPI_ACTIVE_HIGH ? 0 : 1); | 876 | polarity == ACPI_ACTIVE_HIGH ? 0 : 1); |
997 | return gsi; | 877 | return gsi; |
998 | } | 878 | } |
999 | |||
1000 | #endif /*CONFIG_X86_IO_APIC*/ | ||
1001 | #endif /*CONFIG_ACPI*/ | 879 | #endif /*CONFIG_ACPI*/ |
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 5baa0c726e97..4d6fb047952e 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c | |||
@@ -28,71 +28,138 @@ | |||
28 | #include <asm/mce.h> | 28 | #include <asm/mce.h> |
29 | #include <asm/intel_arch_perfmon.h> | 29 | #include <asm/intel_arch_perfmon.h> |
30 | 30 | ||
31 | /* | 31 | /* perfctr_nmi_owner tracks the ownership of the perfctr registers: |
32 | * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: | 32 | * evtsel_nmi_owner tracks the ownership of the event selection |
33 | * - it may be reserved by some other driver, or not | 33 | * - different performance counters/ event selection may be reserved for |
34 | * - when not reserved by some other driver, it may be used for | 34 | * different subsystems this reservation system just tries to coordinate |
35 | * the NMI watchdog, or not | 35 | * things a little |
36 | * | ||
37 | * This is maintained separately from nmi_active because the NMI | ||
38 | * watchdog may also be driven from the I/O APIC timer. | ||
39 | */ | 36 | */ |
40 | static DEFINE_SPINLOCK(lapic_nmi_owner_lock); | 37 | static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner); |
41 | static unsigned int lapic_nmi_owner; | 38 | static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]); |
42 | #define LAPIC_NMI_WATCHDOG (1<<0) | 39 | |
43 | #define LAPIC_NMI_RESERVED (1<<1) | 40 | /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's |
41 | * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) | ||
42 | */ | ||
43 | #define NMI_MAX_COUNTER_BITS 66 | ||
44 | 44 | ||
45 | /* nmi_active: | 45 | /* nmi_active: |
46 | * +1: the lapic NMI watchdog is active, but can be disabled | 46 | * >0: the lapic NMI watchdog is active, but can be disabled |
47 | * 0: the lapic NMI watchdog has not been set up, and cannot | 47 | * <0: the lapic NMI watchdog has not been set up, and cannot |
48 | * be enabled | 48 | * be enabled |
49 | * -1: the lapic NMI watchdog is disabled, but can be enabled | 49 | * 0: the lapic NMI watchdog is disabled, but can be enabled |
50 | */ | 50 | */ |
51 | int nmi_active; /* oprofile uses this */ | 51 | atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ |
52 | int panic_on_timeout; | 52 | int panic_on_timeout; |
53 | 53 | ||
54 | unsigned int nmi_watchdog = NMI_DEFAULT; | 54 | unsigned int nmi_watchdog = NMI_DEFAULT; |
55 | static unsigned int nmi_hz = HZ; | 55 | static unsigned int nmi_hz = HZ; |
56 | static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ | ||
57 | static unsigned int nmi_p4_cccr_val; | ||
58 | 56 | ||
59 | /* Note that these events don't tick when the CPU idles. This means | 57 | struct nmi_watchdog_ctlblk { |
60 | the frequency varies with CPU load. */ | 58 | int enabled; |
59 | u64 check_bit; | ||
60 | unsigned int cccr_msr; | ||
61 | unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ | ||
62 | unsigned int evntsel_msr; /* the MSR to select the events to handle */ | ||
63 | }; | ||
64 | static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); | ||
61 | 65 | ||
62 | #define K7_EVNTSEL_ENABLE (1 << 22) | 66 | /* local prototypes */ |
63 | #define K7_EVNTSEL_INT (1 << 20) | 67 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); |
64 | #define K7_EVNTSEL_OS (1 << 17) | ||
65 | #define K7_EVNTSEL_USR (1 << 16) | ||
66 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
67 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
68 | 68 | ||
69 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | 69 | /* converts an msr to an appropriate reservation bit */ |
70 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | 70 | static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) |
71 | { | ||
72 | /* returns the bit offset of the performance counter register */ | ||
73 | switch (boot_cpu_data.x86_vendor) { | ||
74 | case X86_VENDOR_AMD: | ||
75 | return (msr - MSR_K7_PERFCTR0); | ||
76 | case X86_VENDOR_INTEL: | ||
77 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
78 | return (msr - MSR_ARCH_PERFMON_PERFCTR0); | ||
79 | else | ||
80 | return (msr - MSR_P4_BPU_PERFCTR0); | ||
81 | } | ||
82 | return 0; | ||
83 | } | ||
71 | 84 | ||
72 | #define MSR_P4_MISC_ENABLE 0x1A0 | 85 | /* converts an msr to an appropriate reservation bit */ |
73 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) | 86 | static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) |
74 | #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) | 87 | { |
75 | #define MSR_P4_PERFCTR0 0x300 | 88 | /* returns the bit offset of the event selection register */ |
76 | #define MSR_P4_CCCR0 0x360 | 89 | switch (boot_cpu_data.x86_vendor) { |
77 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) | 90 | case X86_VENDOR_AMD: |
78 | #define P4_ESCR_OS (1<<3) | 91 | return (msr - MSR_K7_EVNTSEL0); |
79 | #define P4_ESCR_USR (1<<2) | 92 | case X86_VENDOR_INTEL: |
80 | #define P4_CCCR_OVF_PMI0 (1<<26) | 93 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) |
81 | #define P4_CCCR_OVF_PMI1 (1<<27) | 94 | return (msr - MSR_ARCH_PERFMON_EVENTSEL0); |
82 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | 95 | else |
83 | #define P4_CCCR_COMPLEMENT (1<<19) | 96 | return (msr - MSR_P4_BSU_ESCR0); |
84 | #define P4_CCCR_COMPARE (1<<18) | 97 | } |
85 | #define P4_CCCR_REQUIRED (3<<16) | 98 | return 0; |
86 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | 99 | } |
87 | #define P4_CCCR_ENABLE (1<<12) | 100 | |
88 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | 101 | /* checks for a bit availability (hack for oprofile) */ |
89 | CRU_ESCR0 (with any non-null event selector) through a complemented | 102 | int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) |
90 | max threshold. [IA32-Vol3, Section 14.9.9] */ | 103 | { |
91 | #define MSR_P4_IQ_COUNTER0 0x30C | 104 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); |
92 | #define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR) | 105 | |
93 | #define P4_NMI_IQ_CCCR0 \ | 106 | return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner))); |
94 | (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ | 107 | } |
95 | P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) | 108 | |
109 | /* checks the an msr for availability */ | ||
110 | int avail_to_resrv_perfctr_nmi(unsigned int msr) | ||
111 | { | ||
112 | unsigned int counter; | ||
113 | |||
114 | counter = nmi_perfctr_msr_to_bit(msr); | ||
115 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
116 | |||
117 | return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner))); | ||
118 | } | ||
119 | |||
120 | int reserve_perfctr_nmi(unsigned int msr) | ||
121 | { | ||
122 | unsigned int counter; | ||
123 | |||
124 | counter = nmi_perfctr_msr_to_bit(msr); | ||
125 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
126 | |||
127 | if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner))) | ||
128 | return 1; | ||
129 | return 0; | ||
130 | } | ||
131 | |||
132 | void release_perfctr_nmi(unsigned int msr) | ||
133 | { | ||
134 | unsigned int counter; | ||
135 | |||
136 | counter = nmi_perfctr_msr_to_bit(msr); | ||
137 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
138 | |||
139 | clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner)); | ||
140 | } | ||
141 | |||
142 | int reserve_evntsel_nmi(unsigned int msr) | ||
143 | { | ||
144 | unsigned int counter; | ||
145 | |||
146 | counter = nmi_evntsel_msr_to_bit(msr); | ||
147 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
148 | |||
149 | if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner))) | ||
150 | return 1; | ||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | void release_evntsel_nmi(unsigned int msr) | ||
155 | { | ||
156 | unsigned int counter; | ||
157 | |||
158 | counter = nmi_evntsel_msr_to_bit(msr); | ||
159 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
160 | |||
161 | clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)); | ||
162 | } | ||
96 | 163 | ||
97 | static __cpuinit inline int nmi_known_cpu(void) | 164 | static __cpuinit inline int nmi_known_cpu(void) |
98 | { | 165 | { |
@@ -109,7 +176,7 @@ static __cpuinit inline int nmi_known_cpu(void) | |||
109 | } | 176 | } |
110 | 177 | ||
111 | /* Run after command line and cpu_init init, but before all other checks */ | 178 | /* Run after command line and cpu_init init, but before all other checks */ |
112 | void __cpuinit nmi_watchdog_default(void) | 179 | void nmi_watchdog_default(void) |
113 | { | 180 | { |
114 | if (nmi_watchdog != NMI_DEFAULT) | 181 | if (nmi_watchdog != NMI_DEFAULT) |
115 | return; | 182 | return; |
@@ -145,6 +212,12 @@ int __init check_nmi_watchdog (void) | |||
145 | int *counts; | 212 | int *counts; |
146 | int cpu; | 213 | int cpu; |
147 | 214 | ||
215 | if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT)) | ||
216 | return 0; | ||
217 | |||
218 | if (!atomic_read(&nmi_active)) | ||
219 | return 0; | ||
220 | |||
148 | counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); | 221 | counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); |
149 | if (!counts) | 222 | if (!counts) |
150 | return -1; | 223 | return -1; |
@@ -162,26 +235,43 @@ int __init check_nmi_watchdog (void) | |||
162 | mdelay((10*1000)/nmi_hz); // wait 10 ticks | 235 | mdelay((10*1000)/nmi_hz); // wait 10 ticks |
163 | 236 | ||
164 | for_each_online_cpu(cpu) { | 237 | for_each_online_cpu(cpu) { |
238 | if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled) | ||
239 | continue; | ||
165 | if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) { | 240 | if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) { |
166 | endflag = 1; | ||
167 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", | 241 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", |
168 | cpu, | 242 | cpu, |
169 | counts[cpu], | 243 | counts[cpu], |
170 | cpu_pda(cpu)->__nmi_count); | 244 | cpu_pda(cpu)->__nmi_count); |
171 | nmi_active = 0; | 245 | per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0; |
172 | lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; | 246 | atomic_dec(&nmi_active); |
173 | nmi_perfctr_msr = 0; | ||
174 | kfree(counts); | ||
175 | return -1; | ||
176 | } | 247 | } |
177 | } | 248 | } |
249 | if (!atomic_read(&nmi_active)) { | ||
250 | kfree(counts); | ||
251 | atomic_set(&nmi_active, -1); | ||
252 | return -1; | ||
253 | } | ||
178 | endflag = 1; | 254 | endflag = 1; |
179 | printk("OK.\n"); | 255 | printk("OK.\n"); |
180 | 256 | ||
181 | /* now that we know it works we can reduce NMI frequency to | 257 | /* now that we know it works we can reduce NMI frequency to |
182 | something more reasonable; makes a difference in some configs */ | 258 | something more reasonable; makes a difference in some configs */ |
183 | if (nmi_watchdog == NMI_LOCAL_APIC) | 259 | if (nmi_watchdog == NMI_LOCAL_APIC) { |
260 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
261 | |||
184 | nmi_hz = 1; | 262 | nmi_hz = 1; |
263 | /* | ||
264 | * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter | ||
265 | * are writable, with higher bits sign extending from bit 31. | ||
266 | * So, we can only program the counter with 31 bit values and | ||
267 | * 32nd bit should be 1, for 33.. to be 1. | ||
268 | * Find the appropriate nmi_hz | ||
269 | */ | ||
270 | if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 && | ||
271 | ((u64)cpu_khz * 1000) > 0x7fffffffULL) { | ||
272 | nmi_hz = ((u64)cpu_khz * 1000) / 0x7fffffffUL + 1; | ||
273 | } | ||
274 | } | ||
185 | 275 | ||
186 | kfree(counts); | 276 | kfree(counts); |
187 | return 0; | 277 | return 0; |
@@ -201,91 +291,65 @@ int __init setup_nmi_watchdog(char *str) | |||
201 | 291 | ||
202 | get_option(&str, &nmi); | 292 | get_option(&str, &nmi); |
203 | 293 | ||
204 | if (nmi >= NMI_INVALID) | 294 | if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE)) |
205 | return 0; | 295 | return 0; |
296 | |||
297 | if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0)) | ||
298 | return 0; /* no lapic support */ | ||
206 | nmi_watchdog = nmi; | 299 | nmi_watchdog = nmi; |
207 | return 1; | 300 | return 1; |
208 | } | 301 | } |
209 | 302 | ||
210 | __setup("nmi_watchdog=", setup_nmi_watchdog); | 303 | __setup("nmi_watchdog=", setup_nmi_watchdog); |
211 | 304 | ||
212 | static void disable_intel_arch_watchdog(void); | ||
213 | |||
214 | static void disable_lapic_nmi_watchdog(void) | 305 | static void disable_lapic_nmi_watchdog(void) |
215 | { | 306 | { |
216 | if (nmi_active <= 0) | 307 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); |
308 | |||
309 | if (atomic_read(&nmi_active) <= 0) | ||
217 | return; | 310 | return; |
218 | switch (boot_cpu_data.x86_vendor) { | ||
219 | case X86_VENDOR_AMD: | ||
220 | wrmsr(MSR_K7_EVNTSEL0, 0, 0); | ||
221 | break; | ||
222 | case X86_VENDOR_INTEL: | ||
223 | if (boot_cpu_data.x86 == 15) { | ||
224 | wrmsr(MSR_P4_IQ_CCCR0, 0, 0); | ||
225 | wrmsr(MSR_P4_CRU_ESCR0, 0, 0); | ||
226 | } else if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
227 | disable_intel_arch_watchdog(); | ||
228 | } | ||
229 | break; | ||
230 | } | ||
231 | nmi_active = -1; | ||
232 | /* tell do_nmi() and others that we're not active any more */ | ||
233 | nmi_watchdog = 0; | ||
234 | } | ||
235 | 311 | ||
236 | static void enable_lapic_nmi_watchdog(void) | 312 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); |
237 | { | 313 | |
238 | if (nmi_active < 0) { | 314 | BUG_ON(atomic_read(&nmi_active) != 0); |
239 | nmi_watchdog = NMI_LOCAL_APIC; | ||
240 | touch_nmi_watchdog(); | ||
241 | setup_apic_nmi_watchdog(); | ||
242 | } | ||
243 | } | 315 | } |
244 | 316 | ||
245 | int reserve_lapic_nmi(void) | 317 | static void enable_lapic_nmi_watchdog(void) |
246 | { | 318 | { |
247 | unsigned int old_owner; | 319 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); |
248 | 320 | ||
249 | spin_lock(&lapic_nmi_owner_lock); | 321 | /* are we already enabled */ |
250 | old_owner = lapic_nmi_owner; | 322 | if (atomic_read(&nmi_active) != 0) |
251 | lapic_nmi_owner |= LAPIC_NMI_RESERVED; | 323 | return; |
252 | spin_unlock(&lapic_nmi_owner_lock); | ||
253 | if (old_owner & LAPIC_NMI_RESERVED) | ||
254 | return -EBUSY; | ||
255 | if (old_owner & LAPIC_NMI_WATCHDOG) | ||
256 | disable_lapic_nmi_watchdog(); | ||
257 | return 0; | ||
258 | } | ||
259 | 324 | ||
260 | void release_lapic_nmi(void) | 325 | /* are we lapic aware */ |
261 | { | 326 | if (nmi_known_cpu() <= 0) |
262 | unsigned int new_owner; | 327 | return; |
263 | 328 | ||
264 | spin_lock(&lapic_nmi_owner_lock); | 329 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); |
265 | new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED; | 330 | touch_nmi_watchdog(); |
266 | lapic_nmi_owner = new_owner; | ||
267 | spin_unlock(&lapic_nmi_owner_lock); | ||
268 | if (new_owner & LAPIC_NMI_WATCHDOG) | ||
269 | enable_lapic_nmi_watchdog(); | ||
270 | } | 331 | } |
271 | 332 | ||
272 | void disable_timer_nmi_watchdog(void) | 333 | void disable_timer_nmi_watchdog(void) |
273 | { | 334 | { |
274 | if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0)) | 335 | BUG_ON(nmi_watchdog != NMI_IO_APIC); |
336 | |||
337 | if (atomic_read(&nmi_active) <= 0) | ||
275 | return; | 338 | return; |
276 | 339 | ||
277 | disable_irq(0); | 340 | disable_irq(0); |
278 | unset_nmi_callback(); | 341 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); |
279 | nmi_active = -1; | 342 | |
280 | nmi_watchdog = NMI_NONE; | 343 | BUG_ON(atomic_read(&nmi_active) != 0); |
281 | } | 344 | } |
282 | 345 | ||
283 | void enable_timer_nmi_watchdog(void) | 346 | void enable_timer_nmi_watchdog(void) |
284 | { | 347 | { |
285 | if (nmi_active < 0) { | 348 | BUG_ON(nmi_watchdog != NMI_IO_APIC); |
286 | nmi_watchdog = NMI_IO_APIC; | 349 | |
350 | if (atomic_read(&nmi_active) == 0) { | ||
287 | touch_nmi_watchdog(); | 351 | touch_nmi_watchdog(); |
288 | nmi_active = 1; | 352 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); |
289 | enable_irq(0); | 353 | enable_irq(0); |
290 | } | 354 | } |
291 | } | 355 | } |
@@ -296,15 +360,20 @@ static int nmi_pm_active; /* nmi_active before suspend */ | |||
296 | 360 | ||
297 | static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) | 361 | static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) |
298 | { | 362 | { |
299 | nmi_pm_active = nmi_active; | 363 | /* only CPU0 goes here, other CPUs should be offline */ |
300 | disable_lapic_nmi_watchdog(); | 364 | nmi_pm_active = atomic_read(&nmi_active); |
365 | stop_apic_nmi_watchdog(NULL); | ||
366 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
301 | return 0; | 367 | return 0; |
302 | } | 368 | } |
303 | 369 | ||
304 | static int lapic_nmi_resume(struct sys_device *dev) | 370 | static int lapic_nmi_resume(struct sys_device *dev) |
305 | { | 371 | { |
306 | if (nmi_pm_active > 0) | 372 | /* only CPU0 goes here, other CPUs should be offline */ |
307 | enable_lapic_nmi_watchdog(); | 373 | if (nmi_pm_active > 0) { |
374 | setup_apic_nmi_watchdog(NULL); | ||
375 | touch_nmi_watchdog(); | ||
376 | } | ||
308 | return 0; | 377 | return 0; |
309 | } | 378 | } |
310 | 379 | ||
@@ -323,7 +392,13 @@ static int __init init_lapic_nmi_sysfs(void) | |||
323 | { | 392 | { |
324 | int error; | 393 | int error; |
325 | 394 | ||
326 | if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC) | 395 | /* should really be a BUG_ON but b/c this is an |
396 | * init call, it just doesn't work. -dcz | ||
397 | */ | ||
398 | if (nmi_watchdog != NMI_LOCAL_APIC) | ||
399 | return 0; | ||
400 | |||
401 | if ( atomic_read(&nmi_active) < 0 ) | ||
327 | return 0; | 402 | return 0; |
328 | 403 | ||
329 | error = sysdev_class_register(&nmi_sysclass); | 404 | error = sysdev_class_register(&nmi_sysclass); |
@@ -341,74 +416,209 @@ late_initcall(init_lapic_nmi_sysfs); | |||
341 | * Original code written by Keith Owens. | 416 | * Original code written by Keith Owens. |
342 | */ | 417 | */ |
343 | 418 | ||
344 | static void clear_msr_range(unsigned int base, unsigned int n) | 419 | /* Note that these events don't tick when the CPU idles. This means |
345 | { | 420 | the frequency varies with CPU load. */ |
346 | unsigned int i; | ||
347 | 421 | ||
348 | for(i = 0; i < n; ++i) | 422 | #define K7_EVNTSEL_ENABLE (1 << 22) |
349 | wrmsr(base+i, 0, 0); | 423 | #define K7_EVNTSEL_INT (1 << 20) |
350 | } | 424 | #define K7_EVNTSEL_OS (1 << 17) |
425 | #define K7_EVNTSEL_USR (1 << 16) | ||
426 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
427 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
351 | 428 | ||
352 | static void setup_k7_watchdog(void) | 429 | static int setup_k7_watchdog(void) |
353 | { | 430 | { |
354 | int i; | 431 | unsigned int perfctr_msr, evntsel_msr; |
355 | unsigned int evntsel; | 432 | unsigned int evntsel; |
433 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
356 | 434 | ||
357 | nmi_perfctr_msr = MSR_K7_PERFCTR0; | 435 | perfctr_msr = MSR_K7_PERFCTR0; |
436 | evntsel_msr = MSR_K7_EVNTSEL0; | ||
437 | if (!reserve_perfctr_nmi(perfctr_msr)) | ||
438 | goto fail; | ||
358 | 439 | ||
359 | for(i = 0; i < 4; ++i) { | 440 | if (!reserve_evntsel_nmi(evntsel_msr)) |
360 | /* Simulator may not support it */ | 441 | goto fail1; |
361 | if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL)) { | 442 | |
362 | nmi_perfctr_msr = 0; | 443 | /* Simulator may not support it */ |
363 | return; | 444 | if (checking_wrmsrl(evntsel_msr, 0UL)) |
364 | } | 445 | goto fail2; |
365 | wrmsrl(MSR_K7_PERFCTR0+i, 0UL); | 446 | wrmsrl(perfctr_msr, 0UL); |
366 | } | ||
367 | 447 | ||
368 | evntsel = K7_EVNTSEL_INT | 448 | evntsel = K7_EVNTSEL_INT |
369 | | K7_EVNTSEL_OS | 449 | | K7_EVNTSEL_OS |
370 | | K7_EVNTSEL_USR | 450 | | K7_EVNTSEL_USR |
371 | | K7_NMI_EVENT; | 451 | | K7_NMI_EVENT; |
372 | 452 | ||
373 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | 453 | /* setup the timer */ |
374 | wrmsrl(MSR_K7_PERFCTR0, -((u64)cpu_khz * 1000 / nmi_hz)); | 454 | wrmsr(evntsel_msr, evntsel, 0); |
455 | wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); | ||
375 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 456 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
376 | evntsel |= K7_EVNTSEL_ENABLE; | 457 | evntsel |= K7_EVNTSEL_ENABLE; |
377 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | 458 | wrmsr(evntsel_msr, evntsel, 0); |
459 | |||
460 | wd->perfctr_msr = perfctr_msr; | ||
461 | wd->evntsel_msr = evntsel_msr; | ||
462 | wd->cccr_msr = 0; //unused | ||
463 | wd->check_bit = 1ULL<<63; | ||
464 | return 1; | ||
465 | fail2: | ||
466 | release_evntsel_nmi(evntsel_msr); | ||
467 | fail1: | ||
468 | release_perfctr_nmi(perfctr_msr); | ||
469 | fail: | ||
470 | return 0; | ||
378 | } | 471 | } |
379 | 472 | ||
380 | static void disable_intel_arch_watchdog(void) | 473 | static void stop_k7_watchdog(void) |
381 | { | 474 | { |
382 | unsigned ebx; | 475 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
383 | 476 | ||
384 | /* | 477 | wrmsr(wd->evntsel_msr, 0, 0); |
385 | * Check whether the Architectural PerfMon supports | 478 | |
386 | * Unhalted Core Cycles Event or not. | 479 | release_evntsel_nmi(wd->evntsel_msr); |
387 | * NOTE: Corresponding bit = 0 in ebp indicates event present. | 480 | release_perfctr_nmi(wd->perfctr_msr); |
481 | } | ||
482 | |||
483 | /* Note that these events don't tick when the CPU idles. This means | ||
484 | the frequency varies with CPU load. */ | ||
485 | |||
486 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) | ||
487 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) | ||
488 | #define P4_ESCR_OS (1<<3) | ||
489 | #define P4_ESCR_USR (1<<2) | ||
490 | #define P4_CCCR_OVF_PMI0 (1<<26) | ||
491 | #define P4_CCCR_OVF_PMI1 (1<<27) | ||
492 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | ||
493 | #define P4_CCCR_COMPLEMENT (1<<19) | ||
494 | #define P4_CCCR_COMPARE (1<<18) | ||
495 | #define P4_CCCR_REQUIRED (3<<16) | ||
496 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | ||
497 | #define P4_CCCR_ENABLE (1<<12) | ||
498 | #define P4_CCCR_OVF (1<<31) | ||
499 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
500 | CRU_ESCR0 (with any non-null event selector) through a complemented | ||
501 | max threshold. [IA32-Vol3, Section 14.9.9] */ | ||
502 | |||
503 | static int setup_p4_watchdog(void) | ||
504 | { | ||
505 | unsigned int perfctr_msr, evntsel_msr, cccr_msr; | ||
506 | unsigned int evntsel, cccr_val; | ||
507 | unsigned int misc_enable, dummy; | ||
508 | unsigned int ht_num; | ||
509 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
510 | |||
511 | rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); | ||
512 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | ||
513 | return 0; | ||
514 | |||
515 | #ifdef CONFIG_SMP | ||
516 | /* detect which hyperthread we are on */ | ||
517 | if (smp_num_siblings == 2) { | ||
518 | unsigned int ebx, apicid; | ||
519 | |||
520 | ebx = cpuid_ebx(1); | ||
521 | apicid = (ebx >> 24) & 0xff; | ||
522 | ht_num = apicid & 1; | ||
523 | } else | ||
524 | #endif | ||
525 | ht_num = 0; | ||
526 | |||
527 | /* performance counters are shared resources | ||
528 | * assign each hyperthread its own set | ||
529 | * (re-use the ESCR0 register, seems safe | ||
530 | * and keeps the cccr_val the same) | ||
388 | */ | 531 | */ |
389 | ebx = cpuid_ebx(10); | 532 | if (!ht_num) { |
390 | if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | 533 | /* logical cpu 0 */ |
391 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0); | 534 | perfctr_msr = MSR_P4_IQ_PERFCTR0; |
535 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
536 | cccr_msr = MSR_P4_IQ_CCCR0; | ||
537 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); | ||
538 | } else { | ||
539 | /* logical cpu 1 */ | ||
540 | perfctr_msr = MSR_P4_IQ_PERFCTR1; | ||
541 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
542 | cccr_msr = MSR_P4_IQ_CCCR1; | ||
543 | cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); | ||
544 | } | ||
545 | |||
546 | if (!reserve_perfctr_nmi(perfctr_msr)) | ||
547 | goto fail; | ||
548 | |||
549 | if (!reserve_evntsel_nmi(evntsel_msr)) | ||
550 | goto fail1; | ||
551 | |||
552 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) | ||
553 | | P4_ESCR_OS | ||
554 | | P4_ESCR_USR; | ||
555 | |||
556 | cccr_val |= P4_CCCR_THRESHOLD(15) | ||
557 | | P4_CCCR_COMPLEMENT | ||
558 | | P4_CCCR_COMPARE | ||
559 | | P4_CCCR_REQUIRED; | ||
560 | |||
561 | wrmsr(evntsel_msr, evntsel, 0); | ||
562 | wrmsr(cccr_msr, cccr_val, 0); | ||
563 | wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); | ||
564 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
565 | cccr_val |= P4_CCCR_ENABLE; | ||
566 | wrmsr(cccr_msr, cccr_val, 0); | ||
567 | |||
568 | wd->perfctr_msr = perfctr_msr; | ||
569 | wd->evntsel_msr = evntsel_msr; | ||
570 | wd->cccr_msr = cccr_msr; | ||
571 | wd->check_bit = 1ULL<<39; | ||
572 | return 1; | ||
573 | fail1: | ||
574 | release_perfctr_nmi(perfctr_msr); | ||
575 | fail: | ||
576 | return 0; | ||
577 | } | ||
578 | |||
579 | static void stop_p4_watchdog(void) | ||
580 | { | ||
581 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
582 | |||
583 | wrmsr(wd->cccr_msr, 0, 0); | ||
584 | wrmsr(wd->evntsel_msr, 0, 0); | ||
585 | |||
586 | release_evntsel_nmi(wd->evntsel_msr); | ||
587 | release_perfctr_nmi(wd->perfctr_msr); | ||
392 | } | 588 | } |
393 | 589 | ||
590 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | ||
591 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | ||
592 | |||
394 | static int setup_intel_arch_watchdog(void) | 593 | static int setup_intel_arch_watchdog(void) |
395 | { | 594 | { |
595 | unsigned int ebx; | ||
596 | union cpuid10_eax eax; | ||
597 | unsigned int unused; | ||
598 | unsigned int perfctr_msr, evntsel_msr; | ||
396 | unsigned int evntsel; | 599 | unsigned int evntsel; |
397 | unsigned ebx; | 600 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
398 | 601 | ||
399 | /* | 602 | /* |
400 | * Check whether the Architectural PerfMon supports | 603 | * Check whether the Architectural PerfMon supports |
401 | * Unhalted Core Cycles Event or not. | 604 | * Unhalted Core Cycles Event or not. |
402 | * NOTE: Corresponding bit = 0 in ebp indicates event present. | 605 | * NOTE: Corresponding bit = 0 in ebx indicates event present. |
403 | */ | 606 | */ |
404 | ebx = cpuid_ebx(10); | 607 | cpuid(10, &(eax.full), &ebx, &unused, &unused); |
405 | if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | 608 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || |
406 | return 0; | 609 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) |
610 | goto fail; | ||
611 | |||
612 | perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; | ||
613 | evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0; | ||
407 | 614 | ||
408 | nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; | 615 | if (!reserve_perfctr_nmi(perfctr_msr)) |
616 | goto fail; | ||
409 | 617 | ||
410 | clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2); | 618 | if (!reserve_evntsel_nmi(evntsel_msr)) |
411 | clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2); | 619 | goto fail1; |
620 | |||
621 | wrmsrl(perfctr_msr, 0UL); | ||
412 | 622 | ||
413 | evntsel = ARCH_PERFMON_EVENTSEL_INT | 623 | evntsel = ARCH_PERFMON_EVENTSEL_INT |
414 | | ARCH_PERFMON_EVENTSEL_OS | 624 | | ARCH_PERFMON_EVENTSEL_OS |
@@ -416,84 +626,122 @@ static int setup_intel_arch_watchdog(void) | |||
416 | | ARCH_PERFMON_NMI_EVENT_SEL | 626 | | ARCH_PERFMON_NMI_EVENT_SEL |
417 | | ARCH_PERFMON_NMI_EVENT_UMASK; | 627 | | ARCH_PERFMON_NMI_EVENT_UMASK; |
418 | 628 | ||
419 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); | 629 | /* setup the timer */ |
420 | wrmsrl(MSR_ARCH_PERFMON_PERFCTR0, -((u64)cpu_khz * 1000 / nmi_hz)); | 630 | wrmsr(evntsel_msr, evntsel, 0); |
631 | wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); | ||
632 | |||
421 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 633 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
422 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 634 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; |
423 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); | 635 | wrmsr(evntsel_msr, evntsel, 0); |
636 | |||
637 | wd->perfctr_msr = perfctr_msr; | ||
638 | wd->evntsel_msr = evntsel_msr; | ||
639 | wd->cccr_msr = 0; //unused | ||
640 | wd->check_bit = 1ULL << (eax.split.bit_width - 1); | ||
424 | return 1; | 641 | return 1; |
642 | fail1: | ||
643 | release_perfctr_nmi(perfctr_msr); | ||
644 | fail: | ||
645 | return 0; | ||
425 | } | 646 | } |
426 | 647 | ||
427 | 648 | static void stop_intel_arch_watchdog(void) | |
428 | static int setup_p4_watchdog(void) | ||
429 | { | 649 | { |
430 | unsigned int misc_enable, dummy; | 650 | unsigned int ebx; |
651 | union cpuid10_eax eax; | ||
652 | unsigned int unused; | ||
653 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
431 | 654 | ||
432 | rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); | 655 | /* |
433 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | 656 | * Check whether the Architectural PerfMon supports |
434 | return 0; | 657 | * Unhalted Core Cycles Event or not. |
658 | * NOTE: Corresponding bit = 0 in ebx indicates event present. | ||
659 | */ | ||
660 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | ||
661 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | ||
662 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
663 | return; | ||
435 | 664 | ||
436 | nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; | 665 | wrmsr(wd->evntsel_msr, 0, 0); |
437 | nmi_p4_cccr_val = P4_NMI_IQ_CCCR0; | ||
438 | #ifdef CONFIG_SMP | ||
439 | if (smp_num_siblings == 2) | ||
440 | nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; | ||
441 | #endif | ||
442 | 666 | ||
443 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) | 667 | release_evntsel_nmi(wd->evntsel_msr); |
444 | clear_msr_range(0x3F1, 2); | 668 | release_perfctr_nmi(wd->perfctr_msr); |
445 | /* MSR 0x3F0 seems to have a default value of 0xFC00, but current | ||
446 | docs doesn't fully define it, so leave it alone for now. */ | ||
447 | if (boot_cpu_data.x86_model >= 0x3) { | ||
448 | /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */ | ||
449 | clear_msr_range(0x3A0, 26); | ||
450 | clear_msr_range(0x3BC, 3); | ||
451 | } else { | ||
452 | clear_msr_range(0x3A0, 31); | ||
453 | } | ||
454 | clear_msr_range(0x3C0, 6); | ||
455 | clear_msr_range(0x3C8, 6); | ||
456 | clear_msr_range(0x3E0, 2); | ||
457 | clear_msr_range(MSR_P4_CCCR0, 18); | ||
458 | clear_msr_range(MSR_P4_PERFCTR0, 18); | ||
459 | |||
460 | wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); | ||
461 | wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); | ||
462 | Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz * 1000UL / nmi_hz)); | ||
463 | wrmsrl(MSR_P4_IQ_COUNTER0, -((u64)cpu_khz * 1000 / nmi_hz)); | ||
464 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
465 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | ||
466 | return 1; | ||
467 | } | 669 | } |
468 | 670 | ||
469 | void setup_apic_nmi_watchdog(void) | 671 | void setup_apic_nmi_watchdog(void *unused) |
470 | { | 672 | { |
471 | switch (boot_cpu_data.x86_vendor) { | 673 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
472 | case X86_VENDOR_AMD: | 674 | |
473 | if (boot_cpu_data.x86 != 15) | 675 | /* only support LOCAL and IO APICs for now */ |
474 | return; | 676 | if ((nmi_watchdog != NMI_LOCAL_APIC) && |
475 | if (strstr(boot_cpu_data.x86_model_id, "Screwdriver")) | 677 | (nmi_watchdog != NMI_IO_APIC)) |
476 | return; | 678 | return; |
477 | setup_k7_watchdog(); | 679 | |
478 | break; | 680 | if (wd->enabled == 1) |
479 | case X86_VENDOR_INTEL: | 681 | return; |
480 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 682 | |
481 | if (!setup_intel_arch_watchdog()) | 683 | /* cheap hack to support suspend/resume */ |
684 | /* if cpu0 is not active neither should the other cpus */ | ||
685 | if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) | ||
686 | return; | ||
687 | |||
688 | if (nmi_watchdog == NMI_LOCAL_APIC) { | ||
689 | switch (boot_cpu_data.x86_vendor) { | ||
690 | case X86_VENDOR_AMD: | ||
691 | if (strstr(boot_cpu_data.x86_model_id, "Screwdriver")) | ||
482 | return; | 692 | return; |
483 | } else if (boot_cpu_data.x86 == 15) { | 693 | if (!setup_k7_watchdog()) |
694 | return; | ||
695 | break; | ||
696 | case X86_VENDOR_INTEL: | ||
697 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
698 | if (!setup_intel_arch_watchdog()) | ||
699 | return; | ||
700 | break; | ||
701 | } | ||
484 | if (!setup_p4_watchdog()) | 702 | if (!setup_p4_watchdog()) |
485 | return; | 703 | return; |
486 | } else { | 704 | break; |
705 | default: | ||
487 | return; | 706 | return; |
488 | } | 707 | } |
708 | } | ||
709 | wd->enabled = 1; | ||
710 | atomic_inc(&nmi_active); | ||
711 | } | ||
712 | |||
713 | void stop_apic_nmi_watchdog(void *unused) | ||
714 | { | ||
715 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
489 | 716 | ||
490 | break; | 717 | /* only support LOCAL and IO APICs for now */ |
718 | if ((nmi_watchdog != NMI_LOCAL_APIC) && | ||
719 | (nmi_watchdog != NMI_IO_APIC)) | ||
720 | return; | ||
491 | 721 | ||
492 | default: | 722 | if (wd->enabled == 0) |
493 | return; | 723 | return; |
724 | |||
725 | if (nmi_watchdog == NMI_LOCAL_APIC) { | ||
726 | switch (boot_cpu_data.x86_vendor) { | ||
727 | case X86_VENDOR_AMD: | ||
728 | if (strstr(boot_cpu_data.x86_model_id, "Screwdriver")) | ||
729 | return; | ||
730 | stop_k7_watchdog(); | ||
731 | break; | ||
732 | case X86_VENDOR_INTEL: | ||
733 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
734 | stop_intel_arch_watchdog(); | ||
735 | break; | ||
736 | } | ||
737 | stop_p4_watchdog(); | ||
738 | break; | ||
739 | default: | ||
740 | return; | ||
741 | } | ||
494 | } | 742 | } |
495 | lapic_nmi_owner = LAPIC_NMI_WATCHDOG; | 743 | wd->enabled = 0; |
496 | nmi_active = 1; | 744 | atomic_dec(&nmi_active); |
497 | } | 745 | } |
498 | 746 | ||
499 | /* | 747 | /* |
@@ -526,93 +774,109 @@ void touch_nmi_watchdog (void) | |||
526 | touch_softlockup_watchdog(); | 774 | touch_softlockup_watchdog(); |
527 | } | 775 | } |
528 | 776 | ||
529 | void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | 777 | int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) |
530 | { | 778 | { |
531 | int sum; | 779 | int sum; |
532 | int touched = 0; | 780 | int touched = 0; |
781 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
782 | u64 dummy; | ||
783 | int rc=0; | ||
784 | |||
785 | /* check for other users first */ | ||
786 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | ||
787 | == NOTIFY_STOP) { | ||
788 | rc = 1; | ||
789 | touched = 1; | ||
790 | } | ||
533 | 791 | ||
534 | sum = read_pda(apic_timer_irqs); | 792 | sum = read_pda(apic_timer_irqs); |
535 | if (__get_cpu_var(nmi_touch)) { | 793 | if (__get_cpu_var(nmi_touch)) { |
536 | __get_cpu_var(nmi_touch) = 0; | 794 | __get_cpu_var(nmi_touch) = 0; |
537 | touched = 1; | 795 | touched = 1; |
538 | } | 796 | } |
797 | |||
539 | #ifdef CONFIG_X86_MCE | 798 | #ifdef CONFIG_X86_MCE |
540 | /* Could check oops_in_progress here too, but it's safer | 799 | /* Could check oops_in_progress here too, but it's safer |
541 | not too */ | 800 | not too */ |
542 | if (atomic_read(&mce_entry) > 0) | 801 | if (atomic_read(&mce_entry) > 0) |
543 | touched = 1; | 802 | touched = 1; |
544 | #endif | 803 | #endif |
804 | /* if the apic timer isn't firing, this cpu isn't doing much */ | ||
545 | if (!touched && __get_cpu_var(last_irq_sum) == sum) { | 805 | if (!touched && __get_cpu_var(last_irq_sum) == sum) { |
546 | /* | 806 | /* |
547 | * Ayiee, looks like this CPU is stuck ... | 807 | * Ayiee, looks like this CPU is stuck ... |
548 | * wait a few IRQs (5 seconds) before doing the oops ... | 808 | * wait a few IRQs (5 seconds) before doing the oops ... |
549 | */ | 809 | */ |
550 | local_inc(&__get_cpu_var(alert_counter)); | 810 | local_inc(&__get_cpu_var(alert_counter)); |
551 | if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) { | 811 | if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) |
552 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | 812 | die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs, |
553 | == NOTIFY_STOP) { | 813 | panic_on_timeout); |
554 | local_set(&__get_cpu_var(alert_counter), 0); | ||
555 | return; | ||
556 | } | ||
557 | die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs); | ||
558 | } | ||
559 | } else { | 814 | } else { |
560 | __get_cpu_var(last_irq_sum) = sum; | 815 | __get_cpu_var(last_irq_sum) = sum; |
561 | local_set(&__get_cpu_var(alert_counter), 0); | 816 | local_set(&__get_cpu_var(alert_counter), 0); |
562 | } | 817 | } |
563 | if (nmi_perfctr_msr) { | 818 | |
564 | if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) { | 819 | /* see if the nmi watchdog went off */ |
565 | /* | 820 | if (wd->enabled) { |
566 | * P4 quirks: | 821 | if (nmi_watchdog == NMI_LOCAL_APIC) { |
567 | * - An overflown perfctr will assert its interrupt | 822 | rdmsrl(wd->perfctr_msr, dummy); |
568 | * until the OVF flag in its CCCR is cleared. | 823 | if (dummy & wd->check_bit){ |
569 | * - LVTPC is masked on interrupt and must be | 824 | /* this wasn't a watchdog timer interrupt */ |
570 | * unmasked by the LVTPC handler. | 825 | goto done; |
571 | */ | 826 | } |
572 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | 827 | |
573 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 828 | /* only Intel uses the cccr msr */ |
574 | } else if (nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | 829 | if (wd->cccr_msr != 0) { |
575 | /* | 830 | /* |
576 | * For Intel based architectural perfmon | 831 | * P4 quirks: |
577 | * - LVTPC is masked on interrupt and must be | 832 | * - An overflown perfctr will assert its interrupt |
578 | * unmasked by the LVTPC handler. | 833 | * until the OVF flag in its CCCR is cleared. |
834 | * - LVTPC is masked on interrupt and must be | ||
835 | * unmasked by the LVTPC handler. | ||
836 | */ | ||
837 | rdmsrl(wd->cccr_msr, dummy); | ||
838 | dummy &= ~P4_CCCR_OVF; | ||
839 | wrmsrl(wd->cccr_msr, dummy); | ||
840 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
841 | } else if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | ||
842 | /* | ||
843 | * ArchPerfom/Core Duo needs to re-unmask | ||
844 | * the apic vector | ||
845 | */ | ||
846 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
847 | } | ||
848 | /* start the cycle over again */ | ||
849 | wrmsrl(wd->perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); | ||
850 | rc = 1; | ||
851 | } else if (nmi_watchdog == NMI_IO_APIC) { | ||
852 | /* don't know how to accurately check for this. | ||
853 | * just assume it was a watchdog timer interrupt | ||
854 | * This matches the old behaviour. | ||
579 | */ | 855 | */ |
580 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 856 | rc = 1; |
581 | } | 857 | } else |
582 | wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); | 858 | printk(KERN_WARNING "Unknown enabled NMI hardware?!\n"); |
583 | } | 859 | } |
860 | done: | ||
861 | return rc; | ||
584 | } | 862 | } |
585 | 863 | ||
586 | static __kprobes int dummy_nmi_callback(struct pt_regs * regs, int cpu) | ||
587 | { | ||
588 | return 0; | ||
589 | } | ||
590 | |||
591 | static nmi_callback_t nmi_callback = dummy_nmi_callback; | ||
592 | |||
593 | asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) | 864 | asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) |
594 | { | 865 | { |
595 | int cpu = safe_smp_processor_id(); | ||
596 | |||
597 | nmi_enter(); | 866 | nmi_enter(); |
598 | add_pda(__nmi_count,1); | 867 | add_pda(__nmi_count,1); |
599 | if (!rcu_dereference(nmi_callback)(regs, cpu)) | 868 | default_do_nmi(regs); |
600 | default_do_nmi(regs); | ||
601 | nmi_exit(); | 869 | nmi_exit(); |
602 | } | 870 | } |
603 | 871 | ||
604 | void set_nmi_callback(nmi_callback_t callback) | 872 | int do_nmi_callback(struct pt_regs * regs, int cpu) |
605 | { | 873 | { |
606 | vmalloc_sync_all(); | 874 | #ifdef CONFIG_SYSCTL |
607 | rcu_assign_pointer(nmi_callback, callback); | 875 | if (unknown_nmi_panic) |
608 | } | 876 | return unknown_nmi_panic_callback(regs, cpu); |
609 | EXPORT_SYMBOL_GPL(set_nmi_callback); | 877 | #endif |
610 | 878 | return 0; | |
611 | void unset_nmi_callback(void) | ||
612 | { | ||
613 | nmi_callback = dummy_nmi_callback; | ||
614 | } | 879 | } |
615 | EXPORT_SYMBOL_GPL(unset_nmi_callback); | ||
616 | 880 | ||
617 | #ifdef CONFIG_SYSCTL | 881 | #ifdef CONFIG_SYSCTL |
618 | 882 | ||
@@ -621,36 +885,42 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) | |||
621 | unsigned char reason = get_nmi_reason(); | 885 | unsigned char reason = get_nmi_reason(); |
622 | char buf[64]; | 886 | char buf[64]; |
623 | 887 | ||
624 | if (!(reason & 0xc0)) { | 888 | sprintf(buf, "NMI received for unknown reason %02x\n", reason); |
625 | sprintf(buf, "NMI received for unknown reason %02x\n", reason); | 889 | die_nmi(buf, regs, 1); /* Always panic here */ |
626 | die_nmi(buf,regs); | ||
627 | } | ||
628 | return 0; | 890 | return 0; |
629 | } | 891 | } |
630 | 892 | ||
631 | /* | 893 | /* |
632 | * proc handler for /proc/sys/kernel/unknown_nmi_panic | 894 | * proc handler for /proc/sys/kernel/nmi |
633 | */ | 895 | */ |
634 | int proc_unknown_nmi_panic(struct ctl_table *table, int write, struct file *file, | 896 | int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, |
635 | void __user *buffer, size_t *length, loff_t *ppos) | 897 | void __user *buffer, size_t *length, loff_t *ppos) |
636 | { | 898 | { |
637 | int old_state; | 899 | int old_state; |
638 | 900 | ||
639 | old_state = unknown_nmi_panic; | 901 | nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; |
902 | old_state = nmi_watchdog_enabled; | ||
640 | proc_dointvec(table, write, file, buffer, length, ppos); | 903 | proc_dointvec(table, write, file, buffer, length, ppos); |
641 | if (!!old_state == !!unknown_nmi_panic) | 904 | if (!!old_state == !!nmi_watchdog_enabled) |
642 | return 0; | 905 | return 0; |
643 | 906 | ||
644 | if (unknown_nmi_panic) { | 907 | if (atomic_read(&nmi_active) < 0) { |
645 | if (reserve_lapic_nmi() < 0) { | 908 | printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); |
646 | unknown_nmi_panic = 0; | 909 | return -EIO; |
647 | return -EBUSY; | 910 | } |
648 | } else { | 911 | |
649 | set_nmi_callback(unknown_nmi_panic_callback); | 912 | /* if nmi_watchdog is not set yet, then set it */ |
650 | } | 913 | nmi_watchdog_default(); |
914 | |||
915 | if (nmi_watchdog == NMI_LOCAL_APIC) { | ||
916 | if (nmi_watchdog_enabled) | ||
917 | enable_lapic_nmi_watchdog(); | ||
918 | else | ||
919 | disable_lapic_nmi_watchdog(); | ||
651 | } else { | 920 | } else { |
652 | release_lapic_nmi(); | 921 | printk( KERN_WARNING |
653 | unset_nmi_callback(); | 922 | "NMI watchdog doesn't know what hardware to touch\n"); |
923 | return -EIO; | ||
654 | } | 924 | } |
655 | return 0; | 925 | return 0; |
656 | } | 926 | } |
@@ -659,8 +929,12 @@ int proc_unknown_nmi_panic(struct ctl_table *table, int write, struct file *file | |||
659 | 929 | ||
660 | EXPORT_SYMBOL(nmi_active); | 930 | EXPORT_SYMBOL(nmi_active); |
661 | EXPORT_SYMBOL(nmi_watchdog); | 931 | EXPORT_SYMBOL(nmi_watchdog); |
662 | EXPORT_SYMBOL(reserve_lapic_nmi); | 932 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); |
663 | EXPORT_SYMBOL(release_lapic_nmi); | 933 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); |
934 | EXPORT_SYMBOL(reserve_perfctr_nmi); | ||
935 | EXPORT_SYMBOL(release_perfctr_nmi); | ||
936 | EXPORT_SYMBOL(reserve_evntsel_nmi); | ||
937 | EXPORT_SYMBOL(release_evntsel_nmi); | ||
664 | EXPORT_SYMBOL(disable_timer_nmi_watchdog); | 938 | EXPORT_SYMBOL(disable_timer_nmi_watchdog); |
665 | EXPORT_SYMBOL(enable_timer_nmi_watchdog); | 939 | EXPORT_SYMBOL(enable_timer_nmi_watchdog); |
666 | EXPORT_SYMBOL(touch_nmi_watchdog); | 940 | EXPORT_SYMBOL(touch_nmi_watchdog); |
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index 146924ba5df5..cfb09b07ae99 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c | |||
@@ -86,7 +86,8 @@ | |||
86 | 86 | ||
87 | #define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */ | 87 | #define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */ |
88 | #define MAX_NUM_CHASSIS 8 /* max number of chassis */ | 88 | #define MAX_NUM_CHASSIS 8 /* max number of chassis */ |
89 | #define MAX_PHB_BUS_NUM (MAX_NUM_OF_PHBS * MAX_NUM_CHASSIS * 2) /* max dev->bus->number */ | 89 | /* MAX_PHB_BUS_NUM is the maximal possible dev->bus->number */ |
90 | #define MAX_PHB_BUS_NUM (MAX_NUM_OF_PHBS * MAX_NUM_CHASSIS * 2) | ||
90 | #define PHBS_PER_CALGARY 4 | 91 | #define PHBS_PER_CALGARY 4 |
91 | 92 | ||
92 | /* register offsets in Calgary's internal register space */ | 93 | /* register offsets in Calgary's internal register space */ |
@@ -111,31 +112,49 @@ static const unsigned long phb_offsets[] = { | |||
111 | 0xB000 /* PHB3 */ | 112 | 0xB000 /* PHB3 */ |
112 | }; | 113 | }; |
113 | 114 | ||
114 | static char bus_to_phb[MAX_PHB_BUS_NUM]; | ||
115 | void* tce_table_kva[MAX_PHB_BUS_NUM]; | ||
116 | unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED; | 115 | unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED; |
117 | static int translate_empty_slots __read_mostly = 0; | 116 | static int translate_empty_slots __read_mostly = 0; |
118 | static int calgary_detected __read_mostly = 0; | 117 | static int calgary_detected __read_mostly = 0; |
119 | 118 | ||
120 | /* | 119 | struct calgary_bus_info { |
121 | * the bitmap of PHBs the user requested that we disable | 120 | void *tce_space; |
122 | * translation on. | 121 | unsigned char translation_disabled; |
123 | */ | 122 | signed char phbid; |
124 | static DECLARE_BITMAP(translation_disabled, MAX_PHB_BUS_NUM); | 123 | }; |
124 | |||
125 | static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, }; | ||
125 | 126 | ||
126 | static void tce_cache_blast(struct iommu_table *tbl); | 127 | static void tce_cache_blast(struct iommu_table *tbl); |
127 | 128 | ||
128 | /* enable this to stress test the chip's TCE cache */ | 129 | /* enable this to stress test the chip's TCE cache */ |
129 | #ifdef CONFIG_IOMMU_DEBUG | 130 | #ifdef CONFIG_IOMMU_DEBUG |
130 | static inline void tce_cache_blast_stress(struct iommu_table *tbl) | 131 | int debugging __read_mostly = 1; |
132 | |||
133 | static inline unsigned long verify_bit_range(unsigned long* bitmap, | ||
134 | int expected, unsigned long start, unsigned long end) | ||
131 | { | 135 | { |
132 | tce_cache_blast(tbl); | 136 | unsigned long idx = start; |
137 | |||
138 | BUG_ON(start >= end); | ||
139 | |||
140 | while (idx < end) { | ||
141 | if (!!test_bit(idx, bitmap) != expected) | ||
142 | return idx; | ||
143 | ++idx; | ||
144 | } | ||
145 | |||
146 | /* all bits have the expected value */ | ||
147 | return ~0UL; | ||
133 | } | 148 | } |
134 | #else | 149 | #else /* debugging is disabled */ |
135 | static inline void tce_cache_blast_stress(struct iommu_table *tbl) | 150 | int debugging __read_mostly = 0; |
151 | |||
152 | static inline unsigned long verify_bit_range(unsigned long* bitmap, | ||
153 | int expected, unsigned long start, unsigned long end) | ||
136 | { | 154 | { |
155 | return ~0UL; | ||
137 | } | 156 | } |
138 | #endif /* BLAST_TCE_CACHE_ON_UNMAP */ | 157 | #endif /* CONFIG_IOMMU_DEBUG */ |
139 | 158 | ||
140 | static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen) | 159 | static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen) |
141 | { | 160 | { |
@@ -149,7 +168,7 @@ static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen) | |||
149 | 168 | ||
150 | static inline int translate_phb(struct pci_dev* dev) | 169 | static inline int translate_phb(struct pci_dev* dev) |
151 | { | 170 | { |
152 | int disabled = test_bit(dev->bus->number, translation_disabled); | 171 | int disabled = bus_info[dev->bus->number].translation_disabled; |
153 | return !disabled; | 172 | return !disabled; |
154 | } | 173 | } |
155 | 174 | ||
@@ -158,6 +177,7 @@ static void iommu_range_reserve(struct iommu_table *tbl, | |||
158 | { | 177 | { |
159 | unsigned long index; | 178 | unsigned long index; |
160 | unsigned long end; | 179 | unsigned long end; |
180 | unsigned long badbit; | ||
161 | 181 | ||
162 | index = start_addr >> PAGE_SHIFT; | 182 | index = start_addr >> PAGE_SHIFT; |
163 | 183 | ||
@@ -169,14 +189,15 @@ static void iommu_range_reserve(struct iommu_table *tbl, | |||
169 | if (end > tbl->it_size) /* don't go off the table */ | 189 | if (end > tbl->it_size) /* don't go off the table */ |
170 | end = tbl->it_size; | 190 | end = tbl->it_size; |
171 | 191 | ||
172 | while (index < end) { | 192 | badbit = verify_bit_range(tbl->it_map, 0, index, end); |
173 | if (test_bit(index, tbl->it_map)) | 193 | if (badbit != ~0UL) { |
194 | if (printk_ratelimit()) | ||
174 | printk(KERN_ERR "Calgary: entry already allocated at " | 195 | printk(KERN_ERR "Calgary: entry already allocated at " |
175 | "0x%lx tbl %p dma 0x%lx npages %u\n", | 196 | "0x%lx tbl %p dma 0x%lx npages %u\n", |
176 | index, tbl, start_addr, npages); | 197 | badbit, tbl, start_addr, npages); |
177 | ++index; | ||
178 | } | 198 | } |
179 | set_bit_string(tbl->it_map, start_addr >> PAGE_SHIFT, npages); | 199 | |
200 | set_bit_string(tbl->it_map, index, npages); | ||
180 | } | 201 | } |
181 | 202 | ||
182 | static unsigned long iommu_range_alloc(struct iommu_table *tbl, | 203 | static unsigned long iommu_range_alloc(struct iommu_table *tbl, |
@@ -243,7 +264,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | |||
243 | unsigned int npages) | 264 | unsigned int npages) |
244 | { | 265 | { |
245 | unsigned long entry; | 266 | unsigned long entry; |
246 | unsigned long i; | 267 | unsigned long badbit; |
247 | 268 | ||
248 | entry = dma_addr >> PAGE_SHIFT; | 269 | entry = dma_addr >> PAGE_SHIFT; |
249 | 270 | ||
@@ -251,16 +272,15 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | |||
251 | 272 | ||
252 | tce_free(tbl, entry, npages); | 273 | tce_free(tbl, entry, npages); |
253 | 274 | ||
254 | for (i = 0; i < npages; ++i) { | 275 | badbit = verify_bit_range(tbl->it_map, 1, entry, entry + npages); |
255 | if (!test_bit(entry + i, tbl->it_map)) | 276 | if (badbit != ~0UL) { |
277 | if (printk_ratelimit()) | ||
256 | printk(KERN_ERR "Calgary: bit is off at 0x%lx " | 278 | printk(KERN_ERR "Calgary: bit is off at 0x%lx " |
257 | "tbl %p dma 0x%Lx entry 0x%lx npages %u\n", | 279 | "tbl %p dma 0x%Lx entry 0x%lx npages %u\n", |
258 | entry + i, tbl, dma_addr, entry, npages); | 280 | badbit, tbl, dma_addr, entry, npages); |
259 | } | 281 | } |
260 | 282 | ||
261 | __clear_bit_string(tbl->it_map, entry, npages); | 283 | __clear_bit_string(tbl->it_map, entry, npages); |
262 | |||
263 | tce_cache_blast_stress(tbl); | ||
264 | } | 284 | } |
265 | 285 | ||
266 | static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | 286 | static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, |
@@ -454,7 +474,7 @@ static struct dma_mapping_ops calgary_dma_ops = { | |||
454 | 474 | ||
455 | static inline int busno_to_phbid(unsigned char num) | 475 | static inline int busno_to_phbid(unsigned char num) |
456 | { | 476 | { |
457 | return bus_to_phb[num]; | 477 | return bus_info[num].phbid; |
458 | } | 478 | } |
459 | 479 | ||
460 | static inline unsigned long split_queue_offset(unsigned char num) | 480 | static inline unsigned long split_queue_offset(unsigned char num) |
@@ -631,6 +651,10 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar) | |||
631 | if (ret) | 651 | if (ret) |
632 | return ret; | 652 | return ret; |
633 | 653 | ||
654 | tbl = dev->sysdata; | ||
655 | tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space; | ||
656 | tce_free(tbl, 0, tbl->it_size); | ||
657 | |||
634 | calgary_reserve_regions(dev); | 658 | calgary_reserve_regions(dev); |
635 | 659 | ||
636 | /* set TARs for each PHB */ | 660 | /* set TARs for each PHB */ |
@@ -654,11 +678,12 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar) | |||
654 | return 0; | 678 | return 0; |
655 | } | 679 | } |
656 | 680 | ||
657 | static void __init calgary_free_tar(struct pci_dev *dev) | 681 | static void __init calgary_free_bus(struct pci_dev *dev) |
658 | { | 682 | { |
659 | u64 val64; | 683 | u64 val64; |
660 | struct iommu_table *tbl = dev->sysdata; | 684 | struct iommu_table *tbl = dev->sysdata; |
661 | void __iomem *target; | 685 | void __iomem *target; |
686 | unsigned int bitmapsz; | ||
662 | 687 | ||
663 | target = calgary_reg(tbl->bbar, tar_offset(dev->bus->number)); | 688 | target = calgary_reg(tbl->bbar, tar_offset(dev->bus->number)); |
664 | val64 = be64_to_cpu(readq(target)); | 689 | val64 = be64_to_cpu(readq(target)); |
@@ -666,8 +691,15 @@ static void __init calgary_free_tar(struct pci_dev *dev) | |||
666 | writeq(cpu_to_be64(val64), target); | 691 | writeq(cpu_to_be64(val64), target); |
667 | readq(target); /* flush */ | 692 | readq(target); /* flush */ |
668 | 693 | ||
694 | bitmapsz = tbl->it_size / BITS_PER_BYTE; | ||
695 | free_pages((unsigned long)tbl->it_map, get_order(bitmapsz)); | ||
696 | tbl->it_map = NULL; | ||
697 | |||
669 | kfree(tbl); | 698 | kfree(tbl); |
670 | dev->sysdata = NULL; | 699 | dev->sysdata = NULL; |
700 | |||
701 | /* Can't free bootmem allocated memory after system is up :-( */ | ||
702 | bus_info[dev->bus->number].tce_space = NULL; | ||
671 | } | 703 | } |
672 | 704 | ||
673 | static void calgary_watchdog(unsigned long data) | 705 | static void calgary_watchdog(unsigned long data) |
@@ -772,12 +804,11 @@ static inline unsigned int __init locate_register_space(struct pci_dev *dev) | |||
772 | return address; | 804 | return address; |
773 | } | 805 | } |
774 | 806 | ||
775 | static int __init calgary_init_one_nontraslated(struct pci_dev *dev) | 807 | static void __init calgary_init_one_nontraslated(struct pci_dev *dev) |
776 | { | 808 | { |
809 | pci_dev_get(dev); | ||
777 | dev->sysdata = NULL; | 810 | dev->sysdata = NULL; |
778 | dev->bus->self = dev; | 811 | dev->bus->self = dev; |
779 | |||
780 | return 0; | ||
781 | } | 812 | } |
782 | 813 | ||
783 | static int __init calgary_init_one(struct pci_dev *dev) | 814 | static int __init calgary_init_one(struct pci_dev *dev) |
@@ -798,6 +829,7 @@ static int __init calgary_init_one(struct pci_dev *dev) | |||
798 | if (ret) | 829 | if (ret) |
799 | goto iounmap; | 830 | goto iounmap; |
800 | 831 | ||
832 | pci_dev_get(dev); | ||
801 | dev->bus->self = dev; | 833 | dev->bus->self = dev; |
802 | calgary_enable_translation(dev); | 834 | calgary_enable_translation(dev); |
803 | 835 | ||
@@ -824,10 +856,9 @@ static int __init calgary_init(void) | |||
824 | calgary_init_one_nontraslated(dev); | 856 | calgary_init_one_nontraslated(dev); |
825 | continue; | 857 | continue; |
826 | } | 858 | } |
827 | if (!tce_table_kva[dev->bus->number] && !translate_empty_slots) { | 859 | if (!bus_info[dev->bus->number].tce_space && !translate_empty_slots) |
828 | pci_dev_put(dev); | ||
829 | continue; | 860 | continue; |
830 | } | 861 | |
831 | ret = calgary_init_one(dev); | 862 | ret = calgary_init_one(dev); |
832 | if (ret) | 863 | if (ret) |
833 | goto error; | 864 | goto error; |
@@ -840,15 +871,18 @@ error: | |||
840 | dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM, | 871 | dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM, |
841 | PCI_DEVICE_ID_IBM_CALGARY, | 872 | PCI_DEVICE_ID_IBM_CALGARY, |
842 | dev); | 873 | dev); |
874 | if (!dev) | ||
875 | break; | ||
843 | if (!translate_phb(dev)) { | 876 | if (!translate_phb(dev)) { |
844 | pci_dev_put(dev); | 877 | pci_dev_put(dev); |
845 | continue; | 878 | continue; |
846 | } | 879 | } |
847 | if (!tce_table_kva[dev->bus->number] && !translate_empty_slots) | 880 | if (!bus_info[dev->bus->number].tce_space && !translate_empty_slots) |
848 | continue; | 881 | continue; |
882 | |||
849 | calgary_disable_translation(dev); | 883 | calgary_disable_translation(dev); |
850 | calgary_free_tar(dev); | 884 | calgary_free_bus(dev); |
851 | pci_dev_put(dev); | 885 | pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */ |
852 | } | 886 | } |
853 | 887 | ||
854 | return ret; | 888 | return ret; |
@@ -890,13 +924,15 @@ void __init detect_calgary(void) | |||
890 | if (swiotlb || no_iommu || iommu_detected) | 924 | if (swiotlb || no_iommu || iommu_detected) |
891 | return; | 925 | return; |
892 | 926 | ||
927 | if (!early_pci_allowed()) | ||
928 | return; | ||
929 | |||
893 | specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE); | 930 | specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE); |
894 | 931 | ||
895 | for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { | 932 | for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { |
896 | int dev; | 933 | int dev; |
897 | 934 | struct calgary_bus_info *info = &bus_info[bus]; | |
898 | tce_table_kva[bus] = NULL; | 935 | info->phbid = -1; |
899 | bus_to_phb[bus] = -1; | ||
900 | 936 | ||
901 | if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY) | 937 | if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY) |
902 | continue; | 938 | continue; |
@@ -907,12 +943,9 @@ void __init detect_calgary(void) | |||
907 | */ | 943 | */ |
908 | phb = (phb + 1) % PHBS_PER_CALGARY; | 944 | phb = (phb + 1) % PHBS_PER_CALGARY; |
909 | 945 | ||
910 | if (test_bit(bus, translation_disabled)) { | 946 | if (info->translation_disabled) |
911 | printk(KERN_INFO "Calgary: translation is disabled for " | ||
912 | "PHB 0x%x\n", bus); | ||
913 | /* skip this phb, don't allocate a tbl for it */ | ||
914 | continue; | 947 | continue; |
915 | } | 948 | |
916 | /* | 949 | /* |
917 | * Scan the slots of the PCI bus to see if there is a device present. | 950 | * Scan the slots of the PCI bus to see if there is a device present. |
918 | * The parent bus will be the zero-ith device, so start at 1. | 951 | * The parent bus will be the zero-ith device, so start at 1. |
@@ -923,8 +956,8 @@ void __init detect_calgary(void) | |||
923 | tbl = alloc_tce_table(); | 956 | tbl = alloc_tce_table(); |
924 | if (!tbl) | 957 | if (!tbl) |
925 | goto cleanup; | 958 | goto cleanup; |
926 | tce_table_kva[bus] = tbl; | 959 | info->tce_space = tbl; |
927 | bus_to_phb[bus] = phb; | 960 | info->phbid = phb; |
928 | calgary_found = 1; | 961 | calgary_found = 1; |
929 | break; | 962 | break; |
930 | } | 963 | } |
@@ -934,15 +967,20 @@ void __init detect_calgary(void) | |||
934 | if (calgary_found) { | 967 | if (calgary_found) { |
935 | iommu_detected = 1; | 968 | iommu_detected = 1; |
936 | calgary_detected = 1; | 969 | calgary_detected = 1; |
937 | printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected. " | 970 | printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n"); |
938 | "TCE table spec is %d.\n", specified_table_size); | 971 | printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " |
972 | "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, | ||
973 | debugging ? "enabled" : "disabled"); | ||
939 | } | 974 | } |
940 | return; | 975 | return; |
941 | 976 | ||
942 | cleanup: | 977 | cleanup: |
943 | for (--bus; bus >= 0; --bus) | 978 | for (--bus; bus >= 0; --bus) { |
944 | if (tce_table_kva[bus]) | 979 | struct calgary_bus_info *info = &bus_info[bus]; |
945 | free_tce_table(tce_table_kva[bus]); | 980 | |
981 | if (info->tce_space) | ||
982 | free_tce_table(info->tce_space); | ||
983 | } | ||
946 | } | 984 | } |
947 | 985 | ||
948 | int __init calgary_iommu_init(void) | 986 | int __init calgary_iommu_init(void) |
@@ -1016,7 +1054,7 @@ static int __init calgary_parse_options(char *p) | |||
1016 | if (bridge < MAX_PHB_BUS_NUM) { | 1054 | if (bridge < MAX_PHB_BUS_NUM) { |
1017 | printk(KERN_INFO "Calgary: disabling " | 1055 | printk(KERN_INFO "Calgary: disabling " |
1018 | "translation for PHB 0x%x\n", bridge); | 1056 | "translation for PHB 0x%x\n", bridge); |
1019 | set_bit(bridge, translation_disabled); | 1057 | bus_info[bridge].translation_disabled = 1; |
1020 | } | 1058 | } |
1021 | } | 1059 | } |
1022 | 1060 | ||
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index 9c44f4f2433d..4dcb671bd19f 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c | |||
@@ -236,6 +236,9 @@ __init int iommu_setup(char *p) | |||
236 | { | 236 | { |
237 | iommu_merge = 1; | 237 | iommu_merge = 1; |
238 | 238 | ||
239 | if (!p) | ||
240 | return -EINVAL; | ||
241 | |||
239 | while (*p) { | 242 | while (*p) { |
240 | if (!strncmp(p,"off",3)) | 243 | if (!strncmp(p,"off",3)) |
241 | no_iommu = 1; | 244 | no_iommu = 1; |
@@ -278,9 +281,9 @@ __init int iommu_setup(char *p) | |||
278 | if (*p == ',') | 281 | if (*p == ',') |
279 | ++p; | 282 | ++p; |
280 | } | 283 | } |
281 | return 1; | 284 | return 0; |
282 | } | 285 | } |
283 | __setup("iommu=", iommu_setup); | 286 | early_param("iommu", iommu_setup); |
284 | 287 | ||
285 | void __init pci_iommu_alloc(void) | 288 | void __init pci_iommu_alloc(void) |
286 | { | 289 | { |
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 6d3e61baf7a0..16261a8a3303 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c | |||
@@ -239,8 +239,6 @@ dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir) | |||
239 | { | 239 | { |
240 | unsigned long phys_mem, bus; | 240 | unsigned long phys_mem, bus; |
241 | 241 | ||
242 | BUG_ON(dir == DMA_NONE); | ||
243 | |||
244 | if (!dev) | 242 | if (!dev) |
245 | dev = &fallback_dev; | 243 | dev = &fallback_dev; |
246 | 244 | ||
@@ -383,7 +381,6 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | |||
383 | unsigned long pages = 0; | 381 | unsigned long pages = 0; |
384 | int need = 0, nextneed; | 382 | int need = 0, nextneed; |
385 | 383 | ||
386 | BUG_ON(dir == DMA_NONE); | ||
387 | if (nents == 0) | 384 | if (nents == 0) |
388 | return 0; | 385 | return 0; |
389 | 386 | ||
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c index aad7609d8e92..df09ab05a1bd 100644 --- a/arch/x86_64/kernel/pci-nommu.c +++ b/arch/x86_64/kernel/pci-nommu.c | |||
@@ -59,7 +59,6 @@ int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, | |||
59 | { | 59 | { |
60 | int i; | 60 | int i; |
61 | 61 | ||
62 | BUG_ON(direction == DMA_NONE); | ||
63 | for (i = 0; i < nents; i++ ) { | 62 | for (i = 0; i < nents; i++ ) { |
64 | struct scatterlist *s = &sg[i]; | 63 | struct scatterlist *s = &sg[i]; |
65 | BUG_ON(!s->page); | 64 | BUG_ON(!s->page); |
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index bb6745d13b8f..458006ae19f3 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c | |||
@@ -80,25 +80,25 @@ void idle_notifier_unregister(struct notifier_block *n) | |||
80 | } | 80 | } |
81 | EXPORT_SYMBOL(idle_notifier_unregister); | 81 | EXPORT_SYMBOL(idle_notifier_unregister); |
82 | 82 | ||
83 | enum idle_state { CPU_IDLE, CPU_NOT_IDLE }; | ||
84 | static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE; | ||
85 | |||
86 | void enter_idle(void) | 83 | void enter_idle(void) |
87 | { | 84 | { |
88 | __get_cpu_var(idle_state) = CPU_IDLE; | 85 | write_pda(isidle, 1); |
89 | atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); | 86 | atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); |
90 | } | 87 | } |
91 | 88 | ||
92 | static void __exit_idle(void) | 89 | static void __exit_idle(void) |
93 | { | 90 | { |
94 | __get_cpu_var(idle_state) = CPU_NOT_IDLE; | 91 | if (read_pda(isidle) == 0) |
92 | return; | ||
93 | write_pda(isidle, 0); | ||
95 | atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); | 94 | atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); |
96 | } | 95 | } |
97 | 96 | ||
98 | /* Called from interrupts to signify idle end */ | 97 | /* Called from interrupts to signify idle end */ |
99 | void exit_idle(void) | 98 | void exit_idle(void) |
100 | { | 99 | { |
101 | if (current->pid | read_pda(irqcount)) | 100 | /* idle loop has pid 0 */ |
101 | if (current->pid) | ||
102 | return; | 102 | return; |
103 | __exit_idle(); | 103 | __exit_idle(); |
104 | } | 104 | } |
@@ -220,6 +220,9 @@ void cpu_idle (void) | |||
220 | play_dead(); | 220 | play_dead(); |
221 | enter_idle(); | 221 | enter_idle(); |
222 | idle(); | 222 | idle(); |
223 | /* In many cases the interrupt that ended idle | ||
224 | has already called exit_idle. But some idle | ||
225 | loops can be woken up without interrupt. */ | ||
223 | __exit_idle(); | 226 | __exit_idle(); |
224 | } | 227 | } |
225 | 228 | ||
@@ -350,6 +353,7 @@ void exit_thread(void) | |||
350 | 353 | ||
351 | kfree(t->io_bitmap_ptr); | 354 | kfree(t->io_bitmap_ptr); |
352 | t->io_bitmap_ptr = NULL; | 355 | t->io_bitmap_ptr = NULL; |
356 | clear_thread_flag(TIF_IO_BITMAP); | ||
353 | /* | 357 | /* |
354 | * Careful, clear this in the TSS too: | 358 | * Careful, clear this in the TSS too: |
355 | */ | 359 | */ |
@@ -369,6 +373,7 @@ void flush_thread(void) | |||
369 | if (t->flags & _TIF_IA32) | 373 | if (t->flags & _TIF_IA32) |
370 | current_thread_info()->status |= TS_COMPAT; | 374 | current_thread_info()->status |= TS_COMPAT; |
371 | } | 375 | } |
376 | t->flags &= ~_TIF_DEBUG; | ||
372 | 377 | ||
373 | tsk->thread.debugreg0 = 0; | 378 | tsk->thread.debugreg0 = 0; |
374 | tsk->thread.debugreg1 = 0; | 379 | tsk->thread.debugreg1 = 0; |
@@ -461,7 +466,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, | |||
461 | asm("mov %%es,%0" : "=m" (p->thread.es)); | 466 | asm("mov %%es,%0" : "=m" (p->thread.es)); |
462 | asm("mov %%ds,%0" : "=m" (p->thread.ds)); | 467 | asm("mov %%ds,%0" : "=m" (p->thread.ds)); |
463 | 468 | ||
464 | if (unlikely(me->thread.io_bitmap_ptr != NULL)) { | 469 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { |
465 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); | 470 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); |
466 | if (!p->thread.io_bitmap_ptr) { | 471 | if (!p->thread.io_bitmap_ptr) { |
467 | p->thread.io_bitmap_max = 0; | 472 | p->thread.io_bitmap_max = 0; |
@@ -469,6 +474,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, | |||
469 | } | 474 | } |
470 | memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, | 475 | memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, |
471 | IO_BITMAP_BYTES); | 476 | IO_BITMAP_BYTES); |
477 | set_tsk_thread_flag(p, TIF_IO_BITMAP); | ||
472 | } | 478 | } |
473 | 479 | ||
474 | /* | 480 | /* |
@@ -498,6 +504,40 @@ out: | |||
498 | */ | 504 | */ |
499 | #define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r) | 505 | #define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r) |
500 | 506 | ||
507 | static inline void __switch_to_xtra(struct task_struct *prev_p, | ||
508 | struct task_struct *next_p, | ||
509 | struct tss_struct *tss) | ||
510 | { | ||
511 | struct thread_struct *prev, *next; | ||
512 | |||
513 | prev = &prev_p->thread, | ||
514 | next = &next_p->thread; | ||
515 | |||
516 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | ||
517 | loaddebug(next, 0); | ||
518 | loaddebug(next, 1); | ||
519 | loaddebug(next, 2); | ||
520 | loaddebug(next, 3); | ||
521 | /* no 4 and 5 */ | ||
522 | loaddebug(next, 6); | ||
523 | loaddebug(next, 7); | ||
524 | } | ||
525 | |||
526 | if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { | ||
527 | /* | ||
528 | * Copy the relevant range of the IO bitmap. | ||
529 | * Normally this is 128 bytes or less: | ||
530 | */ | ||
531 | memcpy(tss->io_bitmap, next->io_bitmap_ptr, | ||
532 | max(prev->io_bitmap_max, next->io_bitmap_max)); | ||
533 | } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { | ||
534 | /* | ||
535 | * Clear any possible leftover bits: | ||
536 | */ | ||
537 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); | ||
538 | } | ||
539 | } | ||
540 | |||
501 | /* | 541 | /* |
502 | * switch_to(x,y) should switch tasks from x to y. | 542 | * switch_to(x,y) should switch tasks from x to y. |
503 | * | 543 | * |
@@ -515,6 +555,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
515 | int cpu = smp_processor_id(); | 555 | int cpu = smp_processor_id(); |
516 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 556 | struct tss_struct *tss = &per_cpu(init_tss, cpu); |
517 | 557 | ||
558 | /* we're going to use this soon, after a few expensive things */ | ||
559 | if (next_p->fpu_counter>5) | ||
560 | prefetch(&next->i387.fxsave); | ||
561 | |||
518 | /* | 562 | /* |
519 | * Reload esp0, LDT and the page table pointer: | 563 | * Reload esp0, LDT and the page table pointer: |
520 | */ | 564 | */ |
@@ -583,41 +627,29 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
583 | And the AMD workaround requires it to be after DS reload. */ | 627 | And the AMD workaround requires it to be after DS reload. */ |
584 | unlazy_fpu(prev_p); | 628 | unlazy_fpu(prev_p); |
585 | write_pda(kernelstack, | 629 | write_pda(kernelstack, |
586 | task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); | 630 | (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); |
587 | 631 | #ifdef CONFIG_CC_STACKPROTECTOR | |
632 | write_pda(stack_canary, next_p->stack_canary); | ||
588 | /* | 633 | /* |
589 | * Now maybe reload the debug registers | 634 | * Build time only check to make sure the stack_canary is at |
635 | * offset 40 in the pda; this is a gcc ABI requirement | ||
590 | */ | 636 | */ |
591 | if (unlikely(next->debugreg7)) { | 637 | BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); |
592 | loaddebug(next, 0); | 638 | #endif |
593 | loaddebug(next, 1); | ||
594 | loaddebug(next, 2); | ||
595 | loaddebug(next, 3); | ||
596 | /* no 4 and 5 */ | ||
597 | loaddebug(next, 6); | ||
598 | loaddebug(next, 7); | ||
599 | } | ||
600 | |||
601 | 639 | ||
602 | /* | 640 | /* |
603 | * Handle the IO bitmap | 641 | * Now maybe reload the debug registers and handle I/O bitmaps |
604 | */ | 642 | */ |
605 | if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) { | 643 | if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)) |
606 | if (next->io_bitmap_ptr) | 644 | || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) |
607 | /* | 645 | __switch_to_xtra(prev_p, next_p, tss); |
608 | * Copy the relevant range of the IO bitmap. | ||
609 | * Normally this is 128 bytes or less: | ||
610 | */ | ||
611 | memcpy(tss->io_bitmap, next->io_bitmap_ptr, | ||
612 | max(prev->io_bitmap_max, next->io_bitmap_max)); | ||
613 | else { | ||
614 | /* | ||
615 | * Clear any possible leftover bits: | ||
616 | */ | ||
617 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); | ||
618 | } | ||
619 | } | ||
620 | 646 | ||
647 | /* If the task has used fpu the last 5 timeslices, just do a full | ||
648 | * restore of the math state immediately to avoid the trap; the | ||
649 | * chances of needing FPU soon are obviously high now | ||
650 | */ | ||
651 | if (next_p->fpu_counter>5) | ||
652 | math_state_restore(); | ||
621 | return prev_p; | 653 | return prev_p; |
622 | } | 654 | } |
623 | 655 | ||
@@ -834,7 +866,7 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) | |||
834 | 866 | ||
835 | unsigned long arch_align_stack(unsigned long sp) | 867 | unsigned long arch_align_stack(unsigned long sp) |
836 | { | 868 | { |
837 | if (randomize_va_space) | 869 | if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) |
838 | sp -= get_random_int() % 8192; | 870 | sp -= get_random_int() % 8192; |
839 | return sp & ~0xf; | 871 | return sp & ~0xf; |
840 | } | 872 | } |
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c index 2d50024c9f30..addc14af0c56 100644 --- a/arch/x86_64/kernel/ptrace.c +++ b/arch/x86_64/kernel/ptrace.c | |||
@@ -116,17 +116,17 @@ unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *r | |||
116 | return addr; | 116 | return addr; |
117 | } | 117 | } |
118 | 118 | ||
119 | static int is_at_popf(struct task_struct *child, struct pt_regs *regs) | 119 | static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) |
120 | { | 120 | { |
121 | int i, copied; | 121 | int i, copied; |
122 | unsigned char opcode[16]; | 122 | unsigned char opcode[15]; |
123 | unsigned long addr = convert_rip_to_linear(child, regs); | 123 | unsigned long addr = convert_rip_to_linear(child, regs); |
124 | 124 | ||
125 | copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); | 125 | copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); |
126 | for (i = 0; i < copied; i++) { | 126 | for (i = 0; i < copied; i++) { |
127 | switch (opcode[i]) { | 127 | switch (opcode[i]) { |
128 | /* popf */ | 128 | /* popf and iret */ |
129 | case 0x9d: | 129 | case 0x9d: case 0xcf: |
130 | return 1; | 130 | return 1; |
131 | 131 | ||
132 | /* CHECKME: 64 65 */ | 132 | /* CHECKME: 64 65 */ |
@@ -138,14 +138,17 @@ static int is_at_popf(struct task_struct *child, struct pt_regs *regs) | |||
138 | case 0x26: case 0x2e: | 138 | case 0x26: case 0x2e: |
139 | case 0x36: case 0x3e: | 139 | case 0x36: case 0x3e: |
140 | case 0x64: case 0x65: | 140 | case 0x64: case 0x65: |
141 | case 0xf0: case 0xf2: case 0xf3: | 141 | case 0xf2: case 0xf3: |
142 | continue; | 142 | continue; |
143 | 143 | ||
144 | /* REX prefixes */ | ||
145 | case 0x40 ... 0x4f: | 144 | case 0x40 ... 0x4f: |
145 | if (regs->cs != __USER_CS) | ||
146 | /* 32-bit mode: register increment */ | ||
147 | return 0; | ||
148 | /* 64-bit mode: REX prefix */ | ||
146 | continue; | 149 | continue; |
147 | 150 | ||
148 | /* CHECKME: f0, f2, f3 */ | 151 | /* CHECKME: f2, f3 */ |
149 | 152 | ||
150 | /* | 153 | /* |
151 | * pushf: NOTE! We should probably not let | 154 | * pushf: NOTE! We should probably not let |
@@ -186,10 +189,8 @@ static void set_singlestep(struct task_struct *child) | |||
186 | * ..but if TF is changed by the instruction we will trace, | 189 | * ..but if TF is changed by the instruction we will trace, |
187 | * don't mark it as being "us" that set it, so that we | 190 | * don't mark it as being "us" that set it, so that we |
188 | * won't clear it by hand later. | 191 | * won't clear it by hand later. |
189 | * | ||
190 | * AK: this is not enough, LAHF and IRET can change TF in user space too. | ||
191 | */ | 192 | */ |
192 | if (is_at_popf(child, regs)) | 193 | if (is_setting_trap_flag(child, regs)) |
193 | return; | 194 | return; |
194 | 195 | ||
195 | child->ptrace |= PT_DTRACE; | 196 | child->ptrace |= PT_DTRACE; |
@@ -420,9 +421,13 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
420 | if ((0x5554 >> ((data >> (16 + 4*i)) & 0xf)) & 1) | 421 | if ((0x5554 >> ((data >> (16 + 4*i)) & 0xf)) & 1) |
421 | break; | 422 | break; |
422 | if (i == 4) { | 423 | if (i == 4) { |
423 | child->thread.debugreg7 = data; | 424 | child->thread.debugreg7 = data; |
425 | if (data) | ||
426 | set_tsk_thread_flag(child, TIF_DEBUG); | ||
427 | else | ||
428 | clear_tsk_thread_flag(child, TIF_DEBUG); | ||
424 | ret = 0; | 429 | ret = 0; |
425 | } | 430 | } |
426 | break; | 431 | break; |
427 | } | 432 | } |
428 | break; | 433 | break; |
diff --git a/arch/x86_64/kernel/relocate_kernel.S b/arch/x86_64/kernel/relocate_kernel.S index d24fa9b72a2b..14e95872c6a3 100644 --- a/arch/x86_64/kernel/relocate_kernel.S +++ b/arch/x86_64/kernel/relocate_kernel.S | |||
@@ -7,31 +7,169 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
10 | #include <asm/page.h> | ||
11 | #include <asm/kexec.h> | ||
10 | 12 | ||
11 | /* | 13 | /* |
12 | * Must be relocatable PIC code callable as a C function, that once | 14 | * Must be relocatable PIC code callable as a C function |
13 | * it starts can not use the previous processes stack. | 15 | */ |
14 | */ | 16 | |
15 | .globl relocate_new_kernel | 17 | #define PTR(x) (x << 3) |
18 | #define PAGE_ALIGNED (1 << PAGE_SHIFT) | ||
19 | #define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */ | ||
20 | |||
21 | .text | ||
22 | .align PAGE_ALIGNED | ||
16 | .code64 | 23 | .code64 |
24 | .globl relocate_kernel | ||
25 | relocate_kernel: | ||
26 | /* %rdi indirection_page | ||
27 | * %rsi page_list | ||
28 | * %rdx start address | ||
29 | */ | ||
30 | |||
31 | /* map the control page at its virtual address */ | ||
32 | |||
33 | movq $0x0000ff8000000000, %r10 /* mask */ | ||
34 | mov $(39 - 3), %cl /* bits to shift */ | ||
35 | movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */ | ||
36 | |||
37 | movq %r11, %r9 | ||
38 | andq %r10, %r9 | ||
39 | shrq %cl, %r9 | ||
40 | |||
41 | movq PTR(VA_PGD)(%rsi), %r8 | ||
42 | addq %r8, %r9 | ||
43 | movq PTR(PA_PUD_0)(%rsi), %r8 | ||
44 | orq $PAGE_ATTR, %r8 | ||
45 | movq %r8, (%r9) | ||
46 | |||
47 | shrq $9, %r10 | ||
48 | sub $9, %cl | ||
49 | |||
50 | movq %r11, %r9 | ||
51 | andq %r10, %r9 | ||
52 | shrq %cl, %r9 | ||
53 | |||
54 | movq PTR(VA_PUD_0)(%rsi), %r8 | ||
55 | addq %r8, %r9 | ||
56 | movq PTR(PA_PMD_0)(%rsi), %r8 | ||
57 | orq $PAGE_ATTR, %r8 | ||
58 | movq %r8, (%r9) | ||
59 | |||
60 | shrq $9, %r10 | ||
61 | sub $9, %cl | ||
62 | |||
63 | movq %r11, %r9 | ||
64 | andq %r10, %r9 | ||
65 | shrq %cl, %r9 | ||
66 | |||
67 | movq PTR(VA_PMD_0)(%rsi), %r8 | ||
68 | addq %r8, %r9 | ||
69 | movq PTR(PA_PTE_0)(%rsi), %r8 | ||
70 | orq $PAGE_ATTR, %r8 | ||
71 | movq %r8, (%r9) | ||
72 | |||
73 | shrq $9, %r10 | ||
74 | sub $9, %cl | ||
75 | |||
76 | movq %r11, %r9 | ||
77 | andq %r10, %r9 | ||
78 | shrq %cl, %r9 | ||
79 | |||
80 | movq PTR(VA_PTE_0)(%rsi), %r8 | ||
81 | addq %r8, %r9 | ||
82 | movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 | ||
83 | orq $PAGE_ATTR, %r8 | ||
84 | movq %r8, (%r9) | ||
85 | |||
86 | /* identity map the control page at its physical address */ | ||
87 | |||
88 | movq $0x0000ff8000000000, %r10 /* mask */ | ||
89 | mov $(39 - 3), %cl /* bits to shift */ | ||
90 | movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */ | ||
91 | |||
92 | movq %r11, %r9 | ||
93 | andq %r10, %r9 | ||
94 | shrq %cl, %r9 | ||
95 | |||
96 | movq PTR(VA_PGD)(%rsi), %r8 | ||
97 | addq %r8, %r9 | ||
98 | movq PTR(PA_PUD_1)(%rsi), %r8 | ||
99 | orq $PAGE_ATTR, %r8 | ||
100 | movq %r8, (%r9) | ||
101 | |||
102 | shrq $9, %r10 | ||
103 | sub $9, %cl | ||
104 | |||
105 | movq %r11, %r9 | ||
106 | andq %r10, %r9 | ||
107 | shrq %cl, %r9 | ||
108 | |||
109 | movq PTR(VA_PUD_1)(%rsi), %r8 | ||
110 | addq %r8, %r9 | ||
111 | movq PTR(PA_PMD_1)(%rsi), %r8 | ||
112 | orq $PAGE_ATTR, %r8 | ||
113 | movq %r8, (%r9) | ||
114 | |||
115 | shrq $9, %r10 | ||
116 | sub $9, %cl | ||
117 | |||
118 | movq %r11, %r9 | ||
119 | andq %r10, %r9 | ||
120 | shrq %cl, %r9 | ||
121 | |||
122 | movq PTR(VA_PMD_1)(%rsi), %r8 | ||
123 | addq %r8, %r9 | ||
124 | movq PTR(PA_PTE_1)(%rsi), %r8 | ||
125 | orq $PAGE_ATTR, %r8 | ||
126 | movq %r8, (%r9) | ||
127 | |||
128 | shrq $9, %r10 | ||
129 | sub $9, %cl | ||
130 | |||
131 | movq %r11, %r9 | ||
132 | andq %r10, %r9 | ||
133 | shrq %cl, %r9 | ||
134 | |||
135 | movq PTR(VA_PTE_1)(%rsi), %r8 | ||
136 | addq %r8, %r9 | ||
137 | movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 | ||
138 | orq $PAGE_ATTR, %r8 | ||
139 | movq %r8, (%r9) | ||
140 | |||
17 | relocate_new_kernel: | 141 | relocate_new_kernel: |
18 | /* %rdi page_list | 142 | /* %rdi indirection_page |
19 | * %rsi reboot_code_buffer | 143 | * %rsi page_list |
20 | * %rdx start address | 144 | * %rdx start address |
21 | * %rcx page_table | ||
22 | * %r8 arg5 | ||
23 | * %r9 arg6 | ||
24 | */ | 145 | */ |
25 | 146 | ||
26 | /* zero out flags, and disable interrupts */ | 147 | /* zero out flags, and disable interrupts */ |
27 | pushq $0 | 148 | pushq $0 |
28 | popfq | 149 | popfq |
29 | 150 | ||
30 | /* set a new stack at the bottom of our page... */ | 151 | /* get physical address of control page now */ |
31 | lea 4096(%rsi), %rsp | 152 | /* this is impossible after page table switch */ |
153 | movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 | ||
154 | |||
155 | /* get physical address of page table now too */ | ||
156 | movq PTR(PA_TABLE_PAGE)(%rsi), %rcx | ||
32 | 157 | ||
33 | /* store the parameters back on the stack */ | 158 | /* switch to new set of page tables */ |
34 | pushq %rdx /* store the start address */ | 159 | movq PTR(PA_PGD)(%rsi), %r9 |
160 | movq %r9, %cr3 | ||
161 | |||
162 | /* setup a new stack at the end of the physical control page */ | ||
163 | lea 4096(%r8), %rsp | ||
164 | |||
165 | /* jump to identity mapped page */ | ||
166 | addq $(identity_mapped - relocate_kernel), %r8 | ||
167 | pushq %r8 | ||
168 | ret | ||
169 | |||
170 | identity_mapped: | ||
171 | /* store the start address on the stack */ | ||
172 | pushq %rdx | ||
35 | 173 | ||
36 | /* Set cr0 to a known state: | 174 | /* Set cr0 to a known state: |
37 | * 31 1 == Paging enabled | 175 | * 31 1 == Paging enabled |
@@ -136,8 +274,3 @@ relocate_new_kernel: | |||
136 | xorq %r15, %r15 | 274 | xorq %r15, %r15 |
137 | 275 | ||
138 | ret | 276 | ret |
139 | relocate_new_kernel_end: | ||
140 | |||
141 | .globl relocate_new_kernel_size | ||
142 | relocate_new_kernel_size: | ||
143 | .quad relocate_new_kernel_end - relocate_new_kernel | ||
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 4b39f0da17f3..f98e48cae6da 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c | |||
@@ -74,16 +74,6 @@ EXPORT_SYMBOL(boot_cpu_data); | |||
74 | 74 | ||
75 | unsigned long mmu_cr4_features; | 75 | unsigned long mmu_cr4_features; |
76 | 76 | ||
77 | int acpi_disabled; | ||
78 | EXPORT_SYMBOL(acpi_disabled); | ||
79 | #ifdef CONFIG_ACPI | ||
80 | extern int __initdata acpi_ht; | ||
81 | extern acpi_interrupt_flags acpi_sci_flags; | ||
82 | int __initdata acpi_force = 0; | ||
83 | #endif | ||
84 | |||
85 | int acpi_numa __initdata; | ||
86 | |||
87 | /* Boot loader ID as an integer, for the benefit of proc_dointvec */ | 77 | /* Boot loader ID as an integer, for the benefit of proc_dointvec */ |
88 | int bootloader_type; | 78 | int bootloader_type; |
89 | 79 | ||
@@ -107,7 +97,6 @@ struct sys_desc_table_struct { | |||
107 | 97 | ||
108 | struct edid_info edid_info; | 98 | struct edid_info edid_info; |
109 | EXPORT_SYMBOL_GPL(edid_info); | 99 | EXPORT_SYMBOL_GPL(edid_info); |
110 | struct e820map e820; | ||
111 | 100 | ||
112 | extern int root_mountflags; | 101 | extern int root_mountflags; |
113 | 102 | ||
@@ -276,185 +265,22 @@ static void __init probe_roms(void) | |||
276 | } | 265 | } |
277 | } | 266 | } |
278 | 267 | ||
279 | /* Check for full argument with no trailing characters */ | 268 | #ifdef CONFIG_PROC_VMCORE |
280 | static int fullarg(char *p, char *arg) | 269 | /* elfcorehdr= specifies the location of elf core header |
270 | * stored by the crashed kernel. This option will be passed | ||
271 | * by kexec loader to the capture kernel. | ||
272 | */ | ||
273 | static int __init setup_elfcorehdr(char *arg) | ||
281 | { | 274 | { |
282 | int l = strlen(arg); | 275 | char *end; |
283 | return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l])); | 276 | if (!arg) |
277 | return -EINVAL; | ||
278 | elfcorehdr_addr = memparse(arg, &end); | ||
279 | return end > arg ? 0 : -EINVAL; | ||
284 | } | 280 | } |
285 | 281 | early_param("elfcorehdr", setup_elfcorehdr); | |
286 | static __init void parse_cmdline_early (char ** cmdline_p) | ||
287 | { | ||
288 | char c = ' ', *to = command_line, *from = COMMAND_LINE; | ||
289 | int len = 0; | ||
290 | int userdef = 0; | ||
291 | |||
292 | for (;;) { | ||
293 | if (c != ' ') | ||
294 | goto next_char; | ||
295 | |||
296 | #ifdef CONFIG_SMP | ||
297 | /* | ||
298 | * If the BIOS enumerates physical processors before logical, | ||
299 | * maxcpus=N at enumeration-time can be used to disable HT. | ||
300 | */ | ||
301 | else if (!memcmp(from, "maxcpus=", 8)) { | ||
302 | extern unsigned int maxcpus; | ||
303 | |||
304 | maxcpus = simple_strtoul(from + 8, NULL, 0); | ||
305 | } | ||
306 | #endif | ||
307 | #ifdef CONFIG_ACPI | ||
308 | /* "acpi=off" disables both ACPI table parsing and interpreter init */ | ||
309 | if (fullarg(from,"acpi=off")) | ||
310 | disable_acpi(); | ||
311 | |||
312 | if (fullarg(from, "acpi=force")) { | ||
313 | /* add later when we do DMI horrors: */ | ||
314 | acpi_force = 1; | ||
315 | acpi_disabled = 0; | ||
316 | } | ||
317 | |||
318 | /* acpi=ht just means: do ACPI MADT parsing | ||
319 | at bootup, but don't enable the full ACPI interpreter */ | ||
320 | if (fullarg(from, "acpi=ht")) { | ||
321 | if (!acpi_force) | ||
322 | disable_acpi(); | ||
323 | acpi_ht = 1; | ||
324 | } | ||
325 | else if (fullarg(from, "pci=noacpi")) | ||
326 | acpi_disable_pci(); | ||
327 | else if (fullarg(from, "acpi=noirq")) | ||
328 | acpi_noirq_set(); | ||
329 | |||
330 | else if (fullarg(from, "acpi_sci=edge")) | ||
331 | acpi_sci_flags.trigger = 1; | ||
332 | else if (fullarg(from, "acpi_sci=level")) | ||
333 | acpi_sci_flags.trigger = 3; | ||
334 | else if (fullarg(from, "acpi_sci=high")) | ||
335 | acpi_sci_flags.polarity = 1; | ||
336 | else if (fullarg(from, "acpi_sci=low")) | ||
337 | acpi_sci_flags.polarity = 3; | ||
338 | |||
339 | /* acpi=strict disables out-of-spec workarounds */ | ||
340 | else if (fullarg(from, "acpi=strict")) { | ||
341 | acpi_strict = 1; | ||
342 | } | ||
343 | #ifdef CONFIG_X86_IO_APIC | ||
344 | else if (fullarg(from, "acpi_skip_timer_override")) | ||
345 | acpi_skip_timer_override = 1; | ||
346 | #endif | ||
347 | #endif | ||
348 | |||
349 | if (fullarg(from, "disable_timer_pin_1")) | ||
350 | disable_timer_pin_1 = 1; | ||
351 | if (fullarg(from, "enable_timer_pin_1")) | ||
352 | disable_timer_pin_1 = -1; | ||
353 | |||
354 | if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) { | ||
355 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | ||
356 | disable_apic = 1; | ||
357 | } | ||
358 | |||
359 | if (fullarg(from, "noapic")) | ||
360 | skip_ioapic_setup = 1; | ||
361 | |||
362 | if (fullarg(from,"apic")) { | ||
363 | skip_ioapic_setup = 0; | ||
364 | ioapic_force = 1; | ||
365 | } | ||
366 | |||
367 | if (!memcmp(from, "mem=", 4)) | ||
368 | parse_memopt(from+4, &from); | ||
369 | |||
370 | if (!memcmp(from, "memmap=", 7)) { | ||
371 | /* exactmap option is for used defined memory */ | ||
372 | if (!memcmp(from+7, "exactmap", 8)) { | ||
373 | #ifdef CONFIG_CRASH_DUMP | ||
374 | /* If we are doing a crash dump, we | ||
375 | * still need to know the real mem | ||
376 | * size before original memory map is | ||
377 | * reset. | ||
378 | */ | ||
379 | saved_max_pfn = e820_end_of_ram(); | ||
380 | #endif | ||
381 | from += 8+7; | ||
382 | end_pfn_map = 0; | ||
383 | e820.nr_map = 0; | ||
384 | userdef = 1; | ||
385 | } | ||
386 | else { | ||
387 | parse_memmapopt(from+7, &from); | ||
388 | userdef = 1; | ||
389 | } | ||
390 | } | ||
391 | |||
392 | #ifdef CONFIG_NUMA | ||
393 | if (!memcmp(from, "numa=", 5)) | ||
394 | numa_setup(from+5); | ||
395 | #endif | ||
396 | |||
397 | if (!memcmp(from,"iommu=",6)) { | ||
398 | iommu_setup(from+6); | ||
399 | } | ||
400 | |||
401 | if (fullarg(from,"oops=panic")) | ||
402 | panic_on_oops = 1; | ||
403 | |||
404 | if (!memcmp(from, "noexec=", 7)) | ||
405 | nonx_setup(from + 7); | ||
406 | |||
407 | #ifdef CONFIG_KEXEC | ||
408 | /* crashkernel=size@addr specifies the location to reserve for | ||
409 | * a crash kernel. By reserving this memory we guarantee | ||
410 | * that linux never set's it up as a DMA target. | ||
411 | * Useful for holding code to do something appropriate | ||
412 | * after a kernel panic. | ||
413 | */ | ||
414 | else if (!memcmp(from, "crashkernel=", 12)) { | ||
415 | unsigned long size, base; | ||
416 | size = memparse(from+12, &from); | ||
417 | if (*from == '@') { | ||
418 | base = memparse(from+1, &from); | ||
419 | /* FIXME: Do I want a sanity check | ||
420 | * to validate the memory range? | ||
421 | */ | ||
422 | crashk_res.start = base; | ||
423 | crashk_res.end = base + size - 1; | ||
424 | } | ||
425 | } | ||
426 | #endif | ||
427 | |||
428 | #ifdef CONFIG_PROC_VMCORE | ||
429 | /* elfcorehdr= specifies the location of elf core header | ||
430 | * stored by the crashed kernel. This option will be passed | ||
431 | * by kexec loader to the capture kernel. | ||
432 | */ | ||
433 | else if(!memcmp(from, "elfcorehdr=", 11)) | ||
434 | elfcorehdr_addr = memparse(from+11, &from); | ||
435 | #endif | ||
436 | |||
437 | #ifdef CONFIG_HOTPLUG_CPU | ||
438 | else if (!memcmp(from, "additional_cpus=", 16)) | ||
439 | setup_additional_cpus(from+16); | ||
440 | #endif | 282 | #endif |
441 | 283 | ||
442 | next_char: | ||
443 | c = *(from++); | ||
444 | if (!c) | ||
445 | break; | ||
446 | if (COMMAND_LINE_SIZE <= ++len) | ||
447 | break; | ||
448 | *(to++) = c; | ||
449 | } | ||
450 | if (userdef) { | ||
451 | printk(KERN_INFO "user-defined physical RAM map:\n"); | ||
452 | e820_print_map("user"); | ||
453 | } | ||
454 | *to = '\0'; | ||
455 | *cmdline_p = command_line; | ||
456 | } | ||
457 | |||
458 | #ifndef CONFIG_NUMA | 284 | #ifndef CONFIG_NUMA |
459 | static void __init | 285 | static void __init |
460 | contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | 286 | contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) |
@@ -521,6 +347,8 @@ static void discover_ebda(void) | |||
521 | 347 | ||
522 | void __init setup_arch(char **cmdline_p) | 348 | void __init setup_arch(char **cmdline_p) |
523 | { | 349 | { |
350 | printk(KERN_INFO "Command line: %s\n", saved_command_line); | ||
351 | |||
524 | ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); | 352 | ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); |
525 | screen_info = SCREEN_INFO; | 353 | screen_info = SCREEN_INFO; |
526 | edid_info = EDID_INFO; | 354 | edid_info = EDID_INFO; |
@@ -547,16 +375,21 @@ void __init setup_arch(char **cmdline_p) | |||
547 | data_resource.start = virt_to_phys(&_etext); | 375 | data_resource.start = virt_to_phys(&_etext); |
548 | data_resource.end = virt_to_phys(&_edata)-1; | 376 | data_resource.end = virt_to_phys(&_edata)-1; |
549 | 377 | ||
550 | parse_cmdline_early(cmdline_p); | ||
551 | |||
552 | early_identify_cpu(&boot_cpu_data); | 378 | early_identify_cpu(&boot_cpu_data); |
553 | 379 | ||
380 | strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); | ||
381 | *cmdline_p = command_line; | ||
382 | |||
383 | parse_early_param(); | ||
384 | |||
385 | finish_e820_parsing(); | ||
386 | |||
554 | /* | 387 | /* |
555 | * partially used pages are not usable - thus | 388 | * partially used pages are not usable - thus |
556 | * we are rounding upwards: | 389 | * we are rounding upwards: |
557 | */ | 390 | */ |
558 | end_pfn = e820_end_of_ram(); | 391 | end_pfn = e820_end_of_ram(); |
559 | num_physpages = end_pfn; /* for pfn_valid */ | 392 | num_physpages = end_pfn; |
560 | 393 | ||
561 | check_efer(); | 394 | check_efer(); |
562 | 395 | ||
@@ -576,6 +409,11 @@ void __init setup_arch(char **cmdline_p) | |||
576 | acpi_boot_table_init(); | 409 | acpi_boot_table_init(); |
577 | #endif | 410 | #endif |
578 | 411 | ||
412 | /* How many end-of-memory variables you have, grandma! */ | ||
413 | max_low_pfn = end_pfn; | ||
414 | max_pfn = end_pfn; | ||
415 | high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1; | ||
416 | |||
579 | #ifdef CONFIG_ACPI_NUMA | 417 | #ifdef CONFIG_ACPI_NUMA |
580 | /* | 418 | /* |
581 | * Parse SRAT to discover nodes. | 419 | * Parse SRAT to discover nodes. |
@@ -625,12 +463,10 @@ void __init setup_arch(char **cmdline_p) | |||
625 | */ | 463 | */ |
626 | acpi_reserve_bootmem(); | 464 | acpi_reserve_bootmem(); |
627 | #endif | 465 | #endif |
628 | #ifdef CONFIG_X86_LOCAL_APIC | ||
629 | /* | 466 | /* |
630 | * Find and reserve possible boot-time SMP configuration: | 467 | * Find and reserve possible boot-time SMP configuration: |
631 | */ | 468 | */ |
632 | find_smp_config(); | 469 | find_smp_config(); |
633 | #endif | ||
634 | #ifdef CONFIG_BLK_DEV_INITRD | 470 | #ifdef CONFIG_BLK_DEV_INITRD |
635 | if (LOADER_TYPE && INITRD_START) { | 471 | if (LOADER_TYPE && INITRD_START) { |
636 | if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { | 472 | if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { |
@@ -657,7 +493,9 @@ void __init setup_arch(char **cmdline_p) | |||
657 | 493 | ||
658 | paging_init(); | 494 | paging_init(); |
659 | 495 | ||
660 | check_ioapic(); | 496 | #ifdef CONFIG_PCI |
497 | early_quirks(); | ||
498 | #endif | ||
661 | 499 | ||
662 | /* | 500 | /* |
663 | * set this early, so we dont allocate cpu0 | 501 | * set this early, so we dont allocate cpu0 |
@@ -674,14 +512,12 @@ void __init setup_arch(char **cmdline_p) | |||
674 | 512 | ||
675 | init_cpu_to_node(); | 513 | init_cpu_to_node(); |
676 | 514 | ||
677 | #ifdef CONFIG_X86_LOCAL_APIC | ||
678 | /* | 515 | /* |
679 | * get boot-time SMP configuration: | 516 | * get boot-time SMP configuration: |
680 | */ | 517 | */ |
681 | if (smp_found_config) | 518 | if (smp_found_config) |
682 | get_smp_config(); | 519 | get_smp_config(); |
683 | init_apic_mappings(); | 520 | init_apic_mappings(); |
684 | #endif | ||
685 | 521 | ||
686 | /* | 522 | /* |
687 | * Request address space for all standard RAM and ROM resources | 523 | * Request address space for all standard RAM and ROM resources |
@@ -839,7 +675,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) | |||
839 | #endif | 675 | #endif |
840 | } | 676 | } |
841 | 677 | ||
842 | static void __init init_amd(struct cpuinfo_x86 *c) | 678 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) |
843 | { | 679 | { |
844 | unsigned level; | 680 | unsigned level; |
845 | 681 | ||
@@ -895,6 +731,12 @@ static void __init init_amd(struct cpuinfo_x86 *c) | |||
895 | 731 | ||
896 | /* Fix cpuid4 emulation for more */ | 732 | /* Fix cpuid4 emulation for more */ |
897 | num_cache_leaves = 3; | 733 | num_cache_leaves = 3; |
734 | |||
735 | /* When there is only one core no need to synchronize RDTSC */ | ||
736 | if (num_possible_cpus() == 1) | ||
737 | set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | ||
738 | else | ||
739 | clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | ||
898 | } | 740 | } |
899 | 741 | ||
900 | static void __cpuinit detect_ht(struct cpuinfo_x86 *c) | 742 | static void __cpuinit detect_ht(struct cpuinfo_x86 *c) |
@@ -976,8 +818,7 @@ static void srat_detect_node(void) | |||
976 | node = first_node(node_online_map); | 818 | node = first_node(node_online_map); |
977 | numa_set_node(cpu, node); | 819 | numa_set_node(cpu, node); |
978 | 820 | ||
979 | if (acpi_numa > 0) | 821 | printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); |
980 | printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); | ||
981 | #endif | 822 | #endif |
982 | } | 823 | } |
983 | 824 | ||
@@ -1011,6 +852,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
1011 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || | 852 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || |
1012 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) | 853 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) |
1013 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); | 854 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); |
855 | if (c->x86 == 6) | ||
856 | set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); | ||
1014 | set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | 857 | set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); |
1015 | c->x86_max_cores = intel_num_cpu_cores(c); | 858 | c->x86_max_cores = intel_num_cpu_cores(c); |
1016 | 859 | ||
@@ -1229,8 +1072,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
1229 | 1072 | ||
1230 | /* Intel-defined (#2) */ | 1073 | /* Intel-defined (#2) */ |
1231 | "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", | 1074 | "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", |
1232 | "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, | 1075 | "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, |
1233 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 1076 | NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL, |
1234 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 1077 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1235 | 1078 | ||
1236 | /* VIA/Cyrix/Centaur-defined */ | 1079 | /* VIA/Cyrix/Centaur-defined */ |
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 417de564456e..8c4b80fe71a1 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c | |||
@@ -24,7 +24,7 @@ | |||
24 | #include <asm/proto.h> | 24 | #include <asm/proto.h> |
25 | #include <asm/sections.h> | 25 | #include <asm/sections.h> |
26 | 26 | ||
27 | char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,}; | 27 | char x86_boot_params[BOOT_PARAM_SIZE] __initdata; |
28 | 28 | ||
29 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; | 29 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; |
30 | 30 | ||
@@ -46,8 +46,10 @@ Control non executable mappings for 64bit processes. | |||
46 | on Enable(default) | 46 | on Enable(default) |
47 | off Disable | 47 | off Disable |
48 | */ | 48 | */ |
49 | int __init nonx_setup(char *str) | 49 | static int __init nonx_setup(char *str) |
50 | { | 50 | { |
51 | if (!str) | ||
52 | return -EINVAL; | ||
51 | if (!strncmp(str, "on", 2)) { | 53 | if (!strncmp(str, "on", 2)) { |
52 | __supported_pte_mask |= _PAGE_NX; | 54 | __supported_pte_mask |= _PAGE_NX; |
53 | do_not_nx = 0; | 55 | do_not_nx = 0; |
@@ -55,9 +57,9 @@ int __init nonx_setup(char *str) | |||
55 | do_not_nx = 1; | 57 | do_not_nx = 1; |
56 | __supported_pte_mask &= ~_PAGE_NX; | 58 | __supported_pte_mask &= ~_PAGE_NX; |
57 | } | 59 | } |
58 | return 1; | 60 | return 0; |
59 | } | 61 | } |
60 | __setup("noexec=", nonx_setup); /* parsed early actually */ | 62 | early_param("noexec", nonx_setup); |
61 | 63 | ||
62 | int force_personality32 = 0; | 64 | int force_personality32 = 0; |
63 | 65 | ||
@@ -93,12 +95,9 @@ void __init setup_per_cpu_areas(void) | |||
93 | #endif | 95 | #endif |
94 | 96 | ||
95 | /* Copy section for each CPU (we discard the original) */ | 97 | /* Copy section for each CPU (we discard the original) */ |
96 | size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); | 98 | size = PERCPU_ENOUGH_ROOM; |
97 | #ifdef CONFIG_MODULES | ||
98 | if (size < PERCPU_ENOUGH_ROOM) | ||
99 | size = PERCPU_ENOUGH_ROOM; | ||
100 | #endif | ||
101 | 99 | ||
100 | printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size); | ||
102 | for_each_cpu_mask (i, cpu_possible_map) { | 101 | for_each_cpu_mask (i, cpu_possible_map) { |
103 | char *ptr; | 102 | char *ptr; |
104 | 103 | ||
@@ -122,7 +121,10 @@ void pda_init(int cpu) | |||
122 | 121 | ||
123 | /* Setup up data that may be needed in __get_free_pages early */ | 122 | /* Setup up data that may be needed in __get_free_pages early */ |
124 | asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); | 123 | asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); |
124 | /* Memory clobbers used to order PDA accessed */ | ||
125 | mb(); | ||
125 | wrmsrl(MSR_GS_BASE, pda); | 126 | wrmsrl(MSR_GS_BASE, pda); |
127 | mb(); | ||
126 | 128 | ||
127 | pda->cpunumber = cpu; | 129 | pda->cpunumber = cpu; |
128 | pda->irqcount = -1; | 130 | pda->irqcount = -1; |
@@ -178,6 +180,8 @@ void __cpuinit check_efer(void) | |||
178 | } | 180 | } |
179 | } | 181 | } |
180 | 182 | ||
183 | unsigned long kernel_eflags; | ||
184 | |||
181 | /* | 185 | /* |
182 | * cpu_init() initializes state that is per-CPU. Some data is already | 186 | * cpu_init() initializes state that is per-CPU. Some data is already |
183 | * initialized (naturally) in the bootstrap process, such as the GDT | 187 | * initialized (naturally) in the bootstrap process, such as the GDT |
@@ -235,28 +239,17 @@ void __cpuinit cpu_init (void) | |||
235 | * set up and load the per-CPU TSS | 239 | * set up and load the per-CPU TSS |
236 | */ | 240 | */ |
237 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { | 241 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { |
242 | static const unsigned int order[N_EXCEPTION_STACKS] = { | ||
243 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, | ||
244 | [DEBUG_STACK - 1] = DEBUG_STACK_ORDER | ||
245 | }; | ||
238 | if (cpu) { | 246 | if (cpu) { |
239 | static const unsigned int order[N_EXCEPTION_STACKS] = { | ||
240 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, | ||
241 | [DEBUG_STACK - 1] = DEBUG_STACK_ORDER | ||
242 | }; | ||
243 | |||
244 | estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); | 247 | estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); |
245 | if (!estacks) | 248 | if (!estacks) |
246 | panic("Cannot allocate exception stack %ld %d\n", | 249 | panic("Cannot allocate exception stack %ld %d\n", |
247 | v, cpu); | 250 | v, cpu); |
248 | } | 251 | } |
249 | switch (v + 1) { | 252 | estacks += PAGE_SIZE << order[v]; |
250 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
251 | case DEBUG_STACK: | ||
252 | cpu_pda(cpu)->debugstack = (unsigned long)estacks; | ||
253 | estacks += DEBUG_STKSZ; | ||
254 | break; | ||
255 | #endif | ||
256 | default: | ||
257 | estacks += EXCEPTION_STKSZ; | ||
258 | break; | ||
259 | } | ||
260 | orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks; | 253 | orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks; |
261 | } | 254 | } |
262 | 255 | ||
@@ -290,4 +283,6 @@ void __cpuinit cpu_init (void) | |||
290 | set_debugreg(0UL, 7); | 283 | set_debugreg(0UL, 7); |
291 | 284 | ||
292 | fpu_init(); | 285 | fpu_init(); |
286 | |||
287 | raw_local_save_flags(kernel_eflags); | ||
293 | } | 288 | } |
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c index 28161170fb0a..49ec324cd141 100644 --- a/arch/x86_64/kernel/signal.c +++ b/arch/x86_64/kernel/signal.c | |||
@@ -38,37 +38,6 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, | |||
38 | sigset_t *set, struct pt_regs * regs); | 38 | sigset_t *set, struct pt_regs * regs); |
39 | 39 | ||
40 | asmlinkage long | 40 | asmlinkage long |
41 | sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, struct pt_regs *regs) | ||
42 | { | ||
43 | sigset_t saveset, newset; | ||
44 | |||
45 | /* XXX: Don't preclude handling different sized sigset_t's. */ | ||
46 | if (sigsetsize != sizeof(sigset_t)) | ||
47 | return -EINVAL; | ||
48 | |||
49 | if (copy_from_user(&newset, unewset, sizeof(newset))) | ||
50 | return -EFAULT; | ||
51 | sigdelsetmask(&newset, ~_BLOCKABLE); | ||
52 | |||
53 | spin_lock_irq(¤t->sighand->siglock); | ||
54 | saveset = current->blocked; | ||
55 | current->blocked = newset; | ||
56 | recalc_sigpending(); | ||
57 | spin_unlock_irq(¤t->sighand->siglock); | ||
58 | #ifdef DEBUG_SIG | ||
59 | printk("rt_sigsuspend savset(%lx) newset(%lx) regs(%p) rip(%lx)\n", | ||
60 | saveset, newset, regs, regs->rip); | ||
61 | #endif | ||
62 | regs->rax = -EINTR; | ||
63 | while (1) { | ||
64 | current->state = TASK_INTERRUPTIBLE; | ||
65 | schedule(); | ||
66 | if (do_signal(regs, &saveset)) | ||
67 | return -EINTR; | ||
68 | } | ||
69 | } | ||
70 | |||
71 | asmlinkage long | ||
72 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | 41 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, |
73 | struct pt_regs *regs) | 42 | struct pt_regs *regs) |
74 | { | 43 | { |
@@ -308,11 +277,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
308 | #endif | 277 | #endif |
309 | 278 | ||
310 | /* Set up registers for signal handler */ | 279 | /* Set up registers for signal handler */ |
311 | { | ||
312 | struct exec_domain *ed = current_thread_info()->exec_domain; | ||
313 | if (unlikely(ed && ed->signal_invmap && sig < 32)) | ||
314 | sig = ed->signal_invmap[sig]; | ||
315 | } | ||
316 | regs->rdi = sig; | 280 | regs->rdi = sig; |
317 | /* In case the signal handler was declared without prototypes */ | 281 | /* In case the signal handler was declared without prototypes */ |
318 | regs->rax = 0; | 282 | regs->rax = 0; |
@@ -341,11 +305,11 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
341 | current->comm, current->pid, frame, regs->rip, frame->pretcode); | 305 | current->comm, current->pid, frame, regs->rip, frame->pretcode); |
342 | #endif | 306 | #endif |
343 | 307 | ||
344 | return 1; | 308 | return 0; |
345 | 309 | ||
346 | give_sigsegv: | 310 | give_sigsegv: |
347 | force_sigsegv(sig, current); | 311 | force_sigsegv(sig, current); |
348 | return 0; | 312 | return -EFAULT; |
349 | } | 313 | } |
350 | 314 | ||
351 | /* | 315 | /* |
@@ -408,7 +372,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
408 | #endif | 372 | #endif |
409 | ret = setup_rt_frame(sig, ka, info, oldset, regs); | 373 | ret = setup_rt_frame(sig, ka, info, oldset, regs); |
410 | 374 | ||
411 | if (ret) { | 375 | if (ret == 0) { |
412 | spin_lock_irq(¤t->sighand->siglock); | 376 | spin_lock_irq(¤t->sighand->siglock); |
413 | sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); | 377 | sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); |
414 | if (!(ka->sa.sa_flags & SA_NODEFER)) | 378 | if (!(ka->sa.sa_flags & SA_NODEFER)) |
@@ -425,11 +389,12 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
425 | * want to handle. Thus you cannot kill init even with a SIGKILL even by | 389 | * want to handle. Thus you cannot kill init even with a SIGKILL even by |
426 | * mistake. | 390 | * mistake. |
427 | */ | 391 | */ |
428 | int do_signal(struct pt_regs *regs, sigset_t *oldset) | 392 | static void do_signal(struct pt_regs *regs) |
429 | { | 393 | { |
430 | struct k_sigaction ka; | 394 | struct k_sigaction ka; |
431 | siginfo_t info; | 395 | siginfo_t info; |
432 | int signr; | 396 | int signr; |
397 | sigset_t *oldset; | ||
433 | 398 | ||
434 | /* | 399 | /* |
435 | * We want the common case to go fast, which | 400 | * We want the common case to go fast, which |
@@ -438,9 +403,11 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) | |||
438 | * if so. | 403 | * if so. |
439 | */ | 404 | */ |
440 | if (!user_mode(regs)) | 405 | if (!user_mode(regs)) |
441 | return 1; | 406 | return; |
442 | 407 | ||
443 | if (!oldset) | 408 | if (test_thread_flag(TIF_RESTORE_SIGMASK)) |
409 | oldset = ¤t->saved_sigmask; | ||
410 | else | ||
444 | oldset = ¤t->blocked; | 411 | oldset = ¤t->blocked; |
445 | 412 | ||
446 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); | 413 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); |
@@ -454,30 +421,46 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) | |||
454 | set_debugreg(current->thread.debugreg7, 7); | 421 | set_debugreg(current->thread.debugreg7, 7); |
455 | 422 | ||
456 | /* Whee! Actually deliver the signal. */ | 423 | /* Whee! Actually deliver the signal. */ |
457 | return handle_signal(signr, &info, &ka, oldset, regs); | 424 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { |
425 | /* a signal was successfully delivered; the saved | ||
426 | * sigmask will have been stored in the signal frame, | ||
427 | * and will be restored by sigreturn, so we can simply | ||
428 | * clear the TIF_RESTORE_SIGMASK flag */ | ||
429 | clear_thread_flag(TIF_RESTORE_SIGMASK); | ||
430 | } | ||
431 | return; | ||
458 | } | 432 | } |
459 | 433 | ||
460 | /* Did we come from a system call? */ | 434 | /* Did we come from a system call? */ |
461 | if ((long)regs->orig_rax >= 0) { | 435 | if ((long)regs->orig_rax >= 0) { |
462 | /* Restart the system call - no handlers present */ | 436 | /* Restart the system call - no handlers present */ |
463 | long res = regs->rax; | 437 | long res = regs->rax; |
464 | if (res == -ERESTARTNOHAND || | 438 | switch (res) { |
465 | res == -ERESTARTSYS || | 439 | case -ERESTARTNOHAND: |
466 | res == -ERESTARTNOINTR) { | 440 | case -ERESTARTSYS: |
441 | case -ERESTARTNOINTR: | ||
467 | regs->rax = regs->orig_rax; | 442 | regs->rax = regs->orig_rax; |
468 | regs->rip -= 2; | 443 | regs->rip -= 2; |
469 | } | 444 | break; |
470 | if (regs->rax == (unsigned long)-ERESTART_RESTARTBLOCK) { | 445 | case -ERESTART_RESTARTBLOCK: |
471 | regs->rax = test_thread_flag(TIF_IA32) ? | 446 | regs->rax = test_thread_flag(TIF_IA32) ? |
472 | __NR_ia32_restart_syscall : | 447 | __NR_ia32_restart_syscall : |
473 | __NR_restart_syscall; | 448 | __NR_restart_syscall; |
474 | regs->rip -= 2; | 449 | regs->rip -= 2; |
450 | break; | ||
475 | } | 451 | } |
476 | } | 452 | } |
477 | return 0; | 453 | |
454 | /* if there's no signal to deliver, we just put the saved sigmask | ||
455 | back. */ | ||
456 | if (test_thread_flag(TIF_RESTORE_SIGMASK)) { | ||
457 | clear_thread_flag(TIF_RESTORE_SIGMASK); | ||
458 | sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); | ||
459 | } | ||
478 | } | 460 | } |
479 | 461 | ||
480 | void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, __u32 thread_info_flags) | 462 | void |
463 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | ||
481 | { | 464 | { |
482 | #ifdef DEBUG_SIG | 465 | #ifdef DEBUG_SIG |
483 | printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%lx pending:%lx\n", | 466 | printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%lx pending:%lx\n", |
@@ -491,8 +474,8 @@ void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, __u32 thread_info_ | |||
491 | } | 474 | } |
492 | 475 | ||
493 | /* deal with pending signal delivery */ | 476 | /* deal with pending signal delivery */ |
494 | if (thread_info_flags & _TIF_SIGPENDING) | 477 | if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK)) |
495 | do_signal(regs,oldset); | 478 | do_signal(regs); |
496 | } | 479 | } |
497 | 480 | ||
498 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | 481 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) |
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 06af6ca60129..4f67697f5036 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c | |||
@@ -522,26 +522,3 @@ asmlinkage void smp_call_function_interrupt(void) | |||
522 | } | 522 | } |
523 | } | 523 | } |
524 | 524 | ||
525 | int safe_smp_processor_id(void) | ||
526 | { | ||
527 | unsigned apicid, i; | ||
528 | |||
529 | if (disable_apic) | ||
530 | return 0; | ||
531 | |||
532 | apicid = hard_smp_processor_id(); | ||
533 | if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid) | ||
534 | return apicid; | ||
535 | |||
536 | for (i = 0; i < NR_CPUS; ++i) { | ||
537 | if (x86_cpu_to_apicid[i] == apicid) | ||
538 | return i; | ||
539 | } | ||
540 | |||
541 | /* No entries in x86_cpu_to_apicid? Either no MPS|ACPI, | ||
542 | * or called too early. Either way, we must be CPU 0. */ | ||
543 | if (x86_cpu_to_apicid[0] == BAD_APICID) | ||
544 | return 0; | ||
545 | |||
546 | return 0; /* Should not happen */ | ||
547 | } | ||
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 3ae9ffddddc0..7b7a6870288a 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c | |||
@@ -1091,7 +1091,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) | |||
1091 | /* | 1091 | /* |
1092 | * Switch from PIC to APIC mode. | 1092 | * Switch from PIC to APIC mode. |
1093 | */ | 1093 | */ |
1094 | connect_bsp_APIC(); | ||
1095 | setup_local_APIC(); | 1094 | setup_local_APIC(); |
1096 | 1095 | ||
1097 | if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) { | 1096 | if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) { |
@@ -1176,12 +1175,9 @@ int __cpuinit __cpu_up(unsigned int cpu) | |||
1176 | void __init smp_cpus_done(unsigned int max_cpus) | 1175 | void __init smp_cpus_done(unsigned int max_cpus) |
1177 | { | 1176 | { |
1178 | smp_cleanup_boot(); | 1177 | smp_cleanup_boot(); |
1179 | |||
1180 | #ifdef CONFIG_X86_IO_APIC | ||
1181 | setup_ioapic_dest(); | 1178 | setup_ioapic_dest(); |
1182 | #endif | ||
1183 | |||
1184 | check_nmi_watchdog(); | 1179 | check_nmi_watchdog(); |
1180 | time_init_gtod(); | ||
1185 | } | 1181 | } |
1186 | 1182 | ||
1187 | #ifdef CONFIG_HOTPLUG_CPU | 1183 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -1234,6 +1230,8 @@ int __cpu_disable(void) | |||
1234 | if (cpu == 0) | 1230 | if (cpu == 0) |
1235 | return -EBUSY; | 1231 | return -EBUSY; |
1236 | 1232 | ||
1233 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
1234 | stop_apic_nmi_watchdog(NULL); | ||
1237 | clear_local_APIC(); | 1235 | clear_local_APIC(); |
1238 | 1236 | ||
1239 | /* | 1237 | /* |
@@ -1273,11 +1271,11 @@ void __cpu_die(unsigned int cpu) | |||
1273 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); | 1271 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); |
1274 | } | 1272 | } |
1275 | 1273 | ||
1276 | __init int setup_additional_cpus(char *s) | 1274 | static __init int setup_additional_cpus(char *s) |
1277 | { | 1275 | { |
1278 | return get_option(&s, &additional_cpus); | 1276 | return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL; |
1279 | } | 1277 | } |
1280 | __setup("additional_cpus=", setup_additional_cpus); | 1278 | early_param("additional_cpus", setup_additional_cpus); |
1281 | 1279 | ||
1282 | #else /* ... !CONFIG_HOTPLUG_CPU */ | 1280 | #else /* ... !CONFIG_HOTPLUG_CPU */ |
1283 | 1281 | ||
diff --git a/arch/x86_64/kernel/stacktrace.c b/arch/x86_64/kernel/stacktrace.c index 32cf55eb9af8..6026b31d037e 100644 --- a/arch/x86_64/kernel/stacktrace.c +++ b/arch/x86_64/kernel/stacktrace.c | |||
@@ -7,215 +7,49 @@ | |||
7 | */ | 7 | */ |
8 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
9 | #include <linux/stacktrace.h> | 9 | #include <linux/stacktrace.h> |
10 | #include <linux/module.h> | ||
11 | #include <asm/stacktrace.h> | ||
10 | 12 | ||
11 | #include <asm/smp.h> | 13 | static void save_stack_warning(void *data, char *msg) |
12 | |||
13 | static inline int | ||
14 | in_range(unsigned long start, unsigned long addr, unsigned long end) | ||
15 | { | 14 | { |
16 | return addr >= start && addr <= end; | ||
17 | } | 15 | } |
18 | 16 | ||
19 | static unsigned long | 17 | static void |
20 | get_stack_end(struct task_struct *task, unsigned long stack) | 18 | save_stack_warning_symbol(void *data, char *msg, unsigned long symbol) |
21 | { | 19 | { |
22 | unsigned long stack_start, stack_end, flags; | ||
23 | int i, cpu; | ||
24 | |||
25 | /* | ||
26 | * The most common case is that we are in the task stack: | ||
27 | */ | ||
28 | stack_start = (unsigned long)task->thread_info; | ||
29 | stack_end = stack_start + THREAD_SIZE; | ||
30 | |||
31 | if (in_range(stack_start, stack, stack_end)) | ||
32 | return stack_end; | ||
33 | |||
34 | /* | ||
35 | * We are in an interrupt if irqstackptr is set: | ||
36 | */ | ||
37 | raw_local_irq_save(flags); | ||
38 | cpu = safe_smp_processor_id(); | ||
39 | stack_end = (unsigned long)cpu_pda(cpu)->irqstackptr; | ||
40 | |||
41 | if (stack_end) { | ||
42 | stack_start = stack_end & ~(IRQSTACKSIZE-1); | ||
43 | if (in_range(stack_start, stack, stack_end)) | ||
44 | goto out_restore; | ||
45 | /* | ||
46 | * We get here if we are in an IRQ context but we | ||
47 | * are also in an exception stack. | ||
48 | */ | ||
49 | } | ||
50 | |||
51 | /* | ||
52 | * Iterate over all exception stacks, and figure out whether | ||
53 | * 'stack' is in one of them: | ||
54 | */ | ||
55 | for (i = 0; i < N_EXCEPTION_STACKS; i++) { | ||
56 | /* | ||
57 | * set 'end' to the end of the exception stack. | ||
58 | */ | ||
59 | stack_end = per_cpu(init_tss, cpu).ist[i]; | ||
60 | stack_start = stack_end - EXCEPTION_STKSZ; | ||
61 | |||
62 | /* | ||
63 | * Is 'stack' above this exception frame's end? | ||
64 | * If yes then skip to the next frame. | ||
65 | */ | ||
66 | if (stack >= stack_end) | ||
67 | continue; | ||
68 | /* | ||
69 | * Is 'stack' above this exception frame's start address? | ||
70 | * If yes then we found the right frame. | ||
71 | */ | ||
72 | if (stack >= stack_start) | ||
73 | goto out_restore; | ||
74 | |||
75 | /* | ||
76 | * If this is a debug stack, and if it has a larger size than | ||
77 | * the usual exception stacks, then 'stack' might still | ||
78 | * be within the lower portion of the debug stack: | ||
79 | */ | ||
80 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
81 | if (i == DEBUG_STACK - 1 && stack >= stack_end - DEBUG_STKSZ) { | ||
82 | /* | ||
83 | * Black magic. A large debug stack is composed of | ||
84 | * multiple exception stack entries, which we | ||
85 | * iterate through now. Dont look: | ||
86 | */ | ||
87 | do { | ||
88 | stack_end -= EXCEPTION_STKSZ; | ||
89 | stack_start -= EXCEPTION_STKSZ; | ||
90 | } while (stack < stack_start); | ||
91 | |||
92 | goto out_restore; | ||
93 | } | ||
94 | #endif | ||
95 | } | ||
96 | /* | ||
97 | * Ok, 'stack' is not pointing to any of the system stacks. | ||
98 | */ | ||
99 | stack_end = 0; | ||
100 | |||
101 | out_restore: | ||
102 | raw_local_irq_restore(flags); | ||
103 | |||
104 | return stack_end; | ||
105 | } | 20 | } |
106 | 21 | ||
107 | 22 | static int save_stack_stack(void *data, char *name) | |
108 | /* | ||
109 | * Save stack-backtrace addresses into a stack_trace buffer: | ||
110 | */ | ||
111 | static inline unsigned long | ||
112 | save_context_stack(struct stack_trace *trace, unsigned int skip, | ||
113 | unsigned long stack, unsigned long stack_end) | ||
114 | { | 23 | { |
115 | unsigned long addr; | 24 | struct stack_trace *trace = (struct stack_trace *)data; |
116 | 25 | return trace->all_contexts ? 0 : -1; | |
117 | #ifdef CONFIG_FRAME_POINTER | 26 | } |
118 | unsigned long prev_stack = 0; | ||
119 | 27 | ||
120 | while (in_range(prev_stack, stack, stack_end)) { | 28 | static void save_stack_address(void *data, unsigned long addr) |
121 | pr_debug("stack: %p\n", (void *)stack); | 29 | { |
122 | addr = (unsigned long)(((unsigned long *)stack)[1]); | 30 | struct stack_trace *trace = (struct stack_trace *)data; |
123 | pr_debug("addr: %p\n", (void *)addr); | 31 | if (trace->skip > 0) { |
124 | if (!skip) | 32 | trace->skip--; |
125 | trace->entries[trace->nr_entries++] = addr-1; | 33 | return; |
126 | else | ||
127 | skip--; | ||
128 | if (trace->nr_entries >= trace->max_entries) | ||
129 | break; | ||
130 | if (!addr) | ||
131 | return 0; | ||
132 | /* | ||
133 | * Stack frames must go forwards (otherwise a loop could | ||
134 | * happen if the stackframe is corrupted), so we move | ||
135 | * prev_stack forwards: | ||
136 | */ | ||
137 | prev_stack = stack; | ||
138 | stack = (unsigned long)(((unsigned long *)stack)[0]); | ||
139 | } | ||
140 | pr_debug("invalid: %p\n", (void *)stack); | ||
141 | #else | ||
142 | while (stack < stack_end) { | ||
143 | addr = ((unsigned long *)stack)[0]; | ||
144 | stack += sizeof(long); | ||
145 | if (__kernel_text_address(addr)) { | ||
146 | if (!skip) | ||
147 | trace->entries[trace->nr_entries++] = addr-1; | ||
148 | else | ||
149 | skip--; | ||
150 | if (trace->nr_entries >= trace->max_entries) | ||
151 | break; | ||
152 | } | ||
153 | } | 34 | } |
154 | #endif | 35 | if (trace->nr_entries < trace->max_entries - 1) |
155 | return stack; | 36 | trace->entries[trace->nr_entries++] = addr; |
156 | } | 37 | } |
157 | 38 | ||
158 | #define MAX_STACKS 10 | 39 | static struct stacktrace_ops save_stack_ops = { |
40 | .warning = save_stack_warning, | ||
41 | .warning_symbol = save_stack_warning_symbol, | ||
42 | .stack = save_stack_stack, | ||
43 | .address = save_stack_address, | ||
44 | }; | ||
159 | 45 | ||
160 | /* | 46 | /* |
161 | * Save stack-backtrace addresses into a stack_trace buffer. | 47 | * Save stack-backtrace addresses into a stack_trace buffer. |
162 | * If all_contexts is set, all contexts (hardirq, softirq and process) | ||
163 | * are saved. If not set then only the current context is saved. | ||
164 | */ | 48 | */ |
165 | void save_stack_trace(struct stack_trace *trace, | 49 | void save_stack_trace(struct stack_trace *trace, struct task_struct *task) |
166 | struct task_struct *task, int all_contexts, | ||
167 | unsigned int skip) | ||
168 | { | 50 | { |
169 | unsigned long stack = (unsigned long)&stack; | 51 | dump_trace(task, NULL, NULL, &save_stack_ops, trace); |
170 | int i, nr_stacks = 0, stacks_done[MAX_STACKS]; | 52 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
171 | |||
172 | WARN_ON(trace->nr_entries || !trace->max_entries); | ||
173 | |||
174 | if (!task) | ||
175 | task = current; | ||
176 | |||
177 | pr_debug("task: %p, ti: %p\n", task, task->thread_info); | ||
178 | |||
179 | if (!task || task == current) { | ||
180 | /* Grab rbp right from our regs: */ | ||
181 | asm ("mov %%rbp, %0" : "=r" (stack)); | ||
182 | pr_debug("rbp: %p\n", (void *)stack); | ||
183 | } else { | ||
184 | /* rbp is the last reg pushed by switch_to(): */ | ||
185 | stack = task->thread.rsp; | ||
186 | pr_debug("other task rsp: %p\n", (void *)stack); | ||
187 | stack = (unsigned long)(((unsigned long *)stack)[0]); | ||
188 | pr_debug("other task rbp: %p\n", (void *)stack); | ||
189 | } | ||
190 | |||
191 | while (1) { | ||
192 | unsigned long stack_end = get_stack_end(task, stack); | ||
193 | |||
194 | pr_debug("stack: %p\n", (void *)stack); | ||
195 | pr_debug("stack end: %p\n", (void *)stack_end); | ||
196 | |||
197 | /* | ||
198 | * Invalid stack addres? | ||
199 | */ | ||
200 | if (!stack_end) | ||
201 | return; | ||
202 | /* | ||
203 | * Were we in this stack already? (recursion) | ||
204 | */ | ||
205 | for (i = 0; i < nr_stacks; i++) | ||
206 | if (stacks_done[i] == stack_end) | ||
207 | return; | ||
208 | stacks_done[nr_stacks] = stack_end; | ||
209 | |||
210 | stack = save_context_stack(trace, skip, stack, stack_end); | ||
211 | if (!all_contexts || !stack || | ||
212 | trace->nr_entries >= trace->max_entries) | ||
213 | return; | ||
214 | trace->entries[trace->nr_entries++] = ULONG_MAX; | ||
215 | if (trace->nr_entries >= trace->max_entries) | ||
216 | return; | ||
217 | if (++nr_stacks >= MAX_STACKS) | ||
218 | return; | ||
219 | } | ||
220 | } | 53 | } |
54 | EXPORT_SYMBOL(save_stack_trace); | ||
221 | 55 | ||
diff --git a/arch/x86_64/kernel/tce.c b/arch/x86_64/kernel/tce.c index 5530dda3f27a..cbabfdf78e06 100644 --- a/arch/x86_64/kernel/tce.c +++ b/arch/x86_64/kernel/tce.c | |||
@@ -1,4 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * This file manages the translation entries for the IBM Calgary IOMMU. | ||
3 | * | ||
2 | * Derived from arch/powerpc/platforms/pseries/iommu.c | 4 | * Derived from arch/powerpc/platforms/pseries/iommu.c |
3 | * | 5 | * |
4 | * Copyright (C) IBM Corporation, 2006 | 6 | * Copyright (C) IBM Corporation, 2006 |
@@ -104,14 +106,6 @@ static int tce_table_setparms(struct pci_dev *dev, struct iommu_table *tbl) | |||
104 | /* set the tce table size - measured in entries */ | 106 | /* set the tce table size - measured in entries */ |
105 | tbl->it_size = table_size_to_number_of_entries(specified_table_size); | 107 | tbl->it_size = table_size_to_number_of_entries(specified_table_size); |
106 | 108 | ||
107 | tbl->it_base = (unsigned long)tce_table_kva[dev->bus->number]; | ||
108 | if (!tbl->it_base) { | ||
109 | printk(KERN_ERR "Calgary: iommu_table_setparms: " | ||
110 | "no table allocated?!\n"); | ||
111 | ret = -ENOMEM; | ||
112 | goto done; | ||
113 | } | ||
114 | |||
115 | /* | 109 | /* |
116 | * number of bytes needed for the bitmap size in number of | 110 | * number of bytes needed for the bitmap size in number of |
117 | * entries; we need one bit per entry | 111 | * entries; we need one bit per entry |
@@ -160,8 +154,6 @@ int build_tce_table(struct pci_dev *dev, void __iomem *bbar) | |||
160 | if (ret) | 154 | if (ret) |
161 | goto free_tbl; | 155 | goto free_tbl; |
162 | 156 | ||
163 | tce_free(tbl, 0, tbl->it_size); | ||
164 | |||
165 | tbl->bbar = bbar; | 157 | tbl->bbar = bbar; |
166 | 158 | ||
167 | /* | 159 | /* |
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 7700e6cd2bd9..1c255ee76e7c 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
@@ -24,6 +24,8 @@ | |||
24 | #include <linux/device.h> | 24 | #include <linux/device.h> |
25 | #include <linux/sysdev.h> | 25 | #include <linux/sysdev.h> |
26 | #include <linux/bcd.h> | 26 | #include <linux/bcd.h> |
27 | #include <linux/notifier.h> | ||
28 | #include <linux/cpu.h> | ||
27 | #include <linux/kallsyms.h> | 29 | #include <linux/kallsyms.h> |
28 | #include <linux/acpi.h> | 30 | #include <linux/acpi.h> |
29 | #ifdef CONFIG_ACPI | 31 | #ifdef CONFIG_ACPI |
@@ -39,9 +41,7 @@ | |||
39 | #include <asm/sections.h> | 41 | #include <asm/sections.h> |
40 | #include <linux/cpufreq.h> | 42 | #include <linux/cpufreq.h> |
41 | #include <linux/hpet.h> | 43 | #include <linux/hpet.h> |
42 | #ifdef CONFIG_X86_LOCAL_APIC | ||
43 | #include <asm/apic.h> | 44 | #include <asm/apic.h> |
44 | #endif | ||
45 | 45 | ||
46 | #ifdef CONFIG_CPU_FREQ | 46 | #ifdef CONFIG_CPU_FREQ |
47 | static void cpufreq_delayed_get(void); | 47 | static void cpufreq_delayed_get(void); |
@@ -49,7 +49,7 @@ static void cpufreq_delayed_get(void); | |||
49 | extern void i8254_timer_resume(void); | 49 | extern void i8254_timer_resume(void); |
50 | extern int using_apic_timer; | 50 | extern int using_apic_timer; |
51 | 51 | ||
52 | static char *time_init_gtod(void); | 52 | static char *timename = NULL; |
53 | 53 | ||
54 | DEFINE_SPINLOCK(rtc_lock); | 54 | DEFINE_SPINLOCK(rtc_lock); |
55 | EXPORT_SYMBOL(rtc_lock); | 55 | EXPORT_SYMBOL(rtc_lock); |
@@ -187,20 +187,15 @@ unsigned long profile_pc(struct pt_regs *regs) | |||
187 | { | 187 | { |
188 | unsigned long pc = instruction_pointer(regs); | 188 | unsigned long pc = instruction_pointer(regs); |
189 | 189 | ||
190 | /* Assume the lock function has either no stack frame or only a single | 190 | /* Assume the lock function has either no stack frame or a copy |
191 | word. This checks if the address on the stack looks like a kernel | 191 | of eflags from PUSHF |
192 | text address. | 192 | Eflags always has bits 22 and up cleared unlike kernel addresses. */ |
193 | There is a small window for false hits, but in that case the tick | ||
194 | is just accounted to the spinlock function. | ||
195 | Better would be to write these functions in assembler again | ||
196 | and check exactly. */ | ||
197 | if (!user_mode(regs) && in_lock_functions(pc)) { | 193 | if (!user_mode(regs) && in_lock_functions(pc)) { |
198 | char *v = *(char **)regs->rsp; | 194 | unsigned long *sp = (unsigned long *)regs->rsp; |
199 | if ((v >= _stext && v <= _etext) || | 195 | if (sp[0] >> 22) |
200 | (v >= _sinittext && v <= _einittext) || | 196 | return sp[0]; |
201 | (v >= (char *)MODULES_VADDR && v <= (char *)MODULES_END)) | 197 | if (sp[1] >> 22) |
202 | return (unsigned long)v; | 198 | return sp[1]; |
203 | return ((unsigned long *)regs->rsp)[1]; | ||
204 | } | 199 | } |
205 | return pc; | 200 | return pc; |
206 | } | 201 | } |
@@ -281,6 +276,7 @@ static void set_rtc_mmss(unsigned long nowtime) | |||
281 | * Note: This function is required to return accurate | 276 | * Note: This function is required to return accurate |
282 | * time even in the absence of multiple timer ticks. | 277 | * time even in the absence of multiple timer ticks. |
283 | */ | 278 | */ |
279 | static inline unsigned long long cycles_2_ns(unsigned long long cyc); | ||
284 | unsigned long long monotonic_clock(void) | 280 | unsigned long long monotonic_clock(void) |
285 | { | 281 | { |
286 | unsigned long seq; | 282 | unsigned long seq; |
@@ -305,8 +301,7 @@ unsigned long long monotonic_clock(void) | |||
305 | base = monotonic_base; | 301 | base = monotonic_base; |
306 | } while (read_seqretry(&xtime_lock, seq)); | 302 | } while (read_seqretry(&xtime_lock, seq)); |
307 | this_offset = get_cycles_sync(); | 303 | this_offset = get_cycles_sync(); |
308 | /* FIXME: 1000 or 1000000? */ | 304 | offset = cycles_2_ns(this_offset - last_offset); |
309 | offset = (this_offset - last_offset)*1000 / cpu_khz; | ||
310 | } | 305 | } |
311 | return base + offset; | 306 | return base + offset; |
312 | } | 307 | } |
@@ -410,8 +405,7 @@ void main_timer_handler(struct pt_regs *regs) | |||
410 | offset %= USEC_PER_TICK; | 405 | offset %= USEC_PER_TICK; |
411 | } | 406 | } |
412 | 407 | ||
413 | /* FIXME: 1000 or 1000000? */ | 408 | monotonic_base += cycles_2_ns(tsc - vxtime.last_tsc); |
414 | monotonic_base += (tsc - vxtime.last_tsc) * 1000000 / cpu_khz; | ||
415 | 409 | ||
416 | vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot; | 410 | vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot; |
417 | 411 | ||
@@ -441,12 +435,8 @@ void main_timer_handler(struct pt_regs *regs) | |||
441 | * have to call the local interrupt handler. | 435 | * have to call the local interrupt handler. |
442 | */ | 436 | */ |
443 | 437 | ||
444 | #ifndef CONFIG_X86_LOCAL_APIC | ||
445 | profile_tick(CPU_PROFILING, regs); | ||
446 | #else | ||
447 | if (!using_apic_timer) | 438 | if (!using_apic_timer) |
448 | smp_local_timer_interrupt(regs); | 439 | smp_local_timer_interrupt(regs); |
449 | #endif | ||
450 | 440 | ||
451 | /* | 441 | /* |
452 | * If we have an externally synchronized Linux clock, then update CMOS clock | 442 | * If we have an externally synchronized Linux clock, then update CMOS clock |
@@ -470,10 +460,8 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) | |||
470 | if (apic_runs_main_timer > 1) | 460 | if (apic_runs_main_timer > 1) |
471 | return IRQ_HANDLED; | 461 | return IRQ_HANDLED; |
472 | main_timer_handler(regs); | 462 | main_timer_handler(regs); |
473 | #ifdef CONFIG_X86_LOCAL_APIC | ||
474 | if (using_apic_timer) | 463 | if (using_apic_timer) |
475 | smp_send_timer_broadcast_ipi(); | 464 | smp_send_timer_broadcast_ipi(); |
476 | #endif | ||
477 | return IRQ_HANDLED; | 465 | return IRQ_HANDLED; |
478 | } | 466 | } |
479 | 467 | ||
@@ -893,11 +881,17 @@ static struct irqaction irq0 = { | |||
893 | timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL | 881 | timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL |
894 | }; | 882 | }; |
895 | 883 | ||
896 | void __init time_init(void) | 884 | static int __cpuinit |
885 | time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu) | ||
897 | { | 886 | { |
898 | char *timename; | 887 | unsigned cpu = (unsigned long) hcpu; |
899 | char *gtod; | 888 | if (action == CPU_ONLINE) |
889 | vsyscall_set_cpu(cpu); | ||
890 | return NOTIFY_DONE; | ||
891 | } | ||
900 | 892 | ||
893 | void __init time_init(void) | ||
894 | { | ||
901 | if (nohpet) | 895 | if (nohpet) |
902 | vxtime.hpet_address = 0; | 896 | vxtime.hpet_address = 0; |
903 | 897 | ||
@@ -931,18 +925,17 @@ void __init time_init(void) | |||
931 | } | 925 | } |
932 | 926 | ||
933 | vxtime.mode = VXTIME_TSC; | 927 | vxtime.mode = VXTIME_TSC; |
934 | gtod = time_init_gtod(); | ||
935 | |||
936 | printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n", | ||
937 | vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod); | ||
938 | printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", | ||
939 | cpu_khz / 1000, cpu_khz % 1000); | ||
940 | vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz; | 928 | vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz; |
941 | vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz; | 929 | vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz; |
942 | vxtime.last_tsc = get_cycles_sync(); | 930 | vxtime.last_tsc = get_cycles_sync(); |
931 | set_cyc2ns_scale(cpu_khz); | ||
943 | setup_irq(0, &irq0); | 932 | setup_irq(0, &irq0); |
933 | hotcpu_notifier(time_cpu_notifier, 0); | ||
934 | time_cpu_notifier(NULL, CPU_ONLINE, (void *)(long)smp_processor_id()); | ||
944 | 935 | ||
945 | set_cyc2ns_scale(cpu_khz); | 936 | #ifndef CONFIG_SMP |
937 | time_init_gtod(); | ||
938 | #endif | ||
946 | } | 939 | } |
947 | 940 | ||
948 | /* | 941 | /* |
@@ -973,12 +966,18 @@ __cpuinit int unsynchronized_tsc(void) | |||
973 | /* | 966 | /* |
974 | * Decide what mode gettimeofday should use. | 967 | * Decide what mode gettimeofday should use. |
975 | */ | 968 | */ |
976 | __init static char *time_init_gtod(void) | 969 | void time_init_gtod(void) |
977 | { | 970 | { |
978 | char *timetype; | 971 | char *timetype; |
979 | 972 | ||
980 | if (unsynchronized_tsc()) | 973 | if (unsynchronized_tsc()) |
981 | notsc = 1; | 974 | notsc = 1; |
975 | |||
976 | if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) | ||
977 | vgetcpu_mode = VGETCPU_RDTSCP; | ||
978 | else | ||
979 | vgetcpu_mode = VGETCPU_LSL; | ||
980 | |||
982 | if (vxtime.hpet_address && notsc) { | 981 | if (vxtime.hpet_address && notsc) { |
983 | timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; | 982 | timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; |
984 | if (hpet_use_timer) | 983 | if (hpet_use_timer) |
@@ -1001,7 +1000,16 @@ __init static char *time_init_gtod(void) | |||
1001 | timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC"; | 1000 | timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC"; |
1002 | vxtime.mode = VXTIME_TSC; | 1001 | vxtime.mode = VXTIME_TSC; |
1003 | } | 1002 | } |
1004 | return timetype; | 1003 | |
1004 | printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n", | ||
1005 | vxtime_hz / 1000000, vxtime_hz % 1000000, timename, timetype); | ||
1006 | printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", | ||
1007 | cpu_khz / 1000, cpu_khz % 1000); | ||
1008 | vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz; | ||
1009 | vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz; | ||
1010 | vxtime.last_tsc = get_cycles_sync(); | ||
1011 | |||
1012 | set_cyc2ns_scale(cpu_khz); | ||
1005 | } | 1013 | } |
1006 | 1014 | ||
1007 | __setup("report_lost_ticks", time_setup); | 1015 | __setup("report_lost_ticks", time_setup); |
@@ -1031,8 +1039,16 @@ static int timer_resume(struct sys_device *dev) | |||
1031 | unsigned long flags; | 1039 | unsigned long flags; |
1032 | unsigned long sec; | 1040 | unsigned long sec; |
1033 | unsigned long ctime = get_cmos_time(); | 1041 | unsigned long ctime = get_cmos_time(); |
1034 | unsigned long sleep_length = (ctime - sleep_start) * HZ; | 1042 | long sleep_length = (ctime - sleep_start) * HZ; |
1035 | 1043 | ||
1044 | if (sleep_length < 0) { | ||
1045 | printk(KERN_WARNING "Time skew detected in timer resume!\n"); | ||
1046 | /* The time after the resume must not be earlier than the time | ||
1047 | * before the suspend or some nasty things will happen | ||
1048 | */ | ||
1049 | sleep_length = 0; | ||
1050 | ctime = sleep_start; | ||
1051 | } | ||
1036 | if (vxtime.hpet_address) | 1052 | if (vxtime.hpet_address) |
1037 | hpet_reenable(); | 1053 | hpet_reenable(); |
1038 | else | 1054 | else |
diff --git a/arch/x86_64/kernel/trampoline.S b/arch/x86_64/kernel/trampoline.S index 23a03eb91fc7..c79b99a9e2f6 100644 --- a/arch/x86_64/kernel/trampoline.S +++ b/arch/x86_64/kernel/trampoline.S | |||
@@ -64,7 +64,7 @@ idt_48: | |||
64 | .word 0, 0 # idt base = 0L | 64 | .word 0, 0 # idt base = 0L |
65 | 65 | ||
66 | gdt_48: | 66 | gdt_48: |
67 | .short __KERNEL32_CS + 7 # gdt limit | 67 | .short GDT_ENTRIES*8 - 1 # gdt limit |
68 | .long cpu_gdt_table-__START_KERNEL_map | 68 | .long cpu_gdt_table-__START_KERNEL_map |
69 | 69 | ||
70 | .globl trampoline_end | 70 | .globl trampoline_end |
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index b1249774d1e8..01f2a8d254c2 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <asm/pda.h> | 45 | #include <asm/pda.h> |
46 | #include <asm/proto.h> | 46 | #include <asm/proto.h> |
47 | #include <asm/nmi.h> | 47 | #include <asm/nmi.h> |
48 | #include <asm/stacktrace.h> | ||
48 | 49 | ||
49 | asmlinkage void divide_error(void); | 50 | asmlinkage void divide_error(void); |
50 | asmlinkage void debug(void); | 51 | asmlinkage void debug(void); |
@@ -142,7 +143,7 @@ void printk_address(unsigned long address) | |||
142 | #endif | 143 | #endif |
143 | 144 | ||
144 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 145 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
145 | unsigned *usedp, const char **idp) | 146 | unsigned *usedp, char **idp) |
146 | { | 147 | { |
147 | static char ids[][8] = { | 148 | static char ids[][8] = { |
148 | [DEBUG_STACK - 1] = "#DB", | 149 | [DEBUG_STACK - 1] = "#DB", |
@@ -161,26 +162,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
161 | * 'stack' is in one of them: | 162 | * 'stack' is in one of them: |
162 | */ | 163 | */ |
163 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { | 164 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { |
164 | unsigned long end; | 165 | unsigned long end = per_cpu(orig_ist, cpu).ist[k]; |
165 | |||
166 | /* | ||
167 | * set 'end' to the end of the exception stack. | ||
168 | */ | ||
169 | switch (k + 1) { | ||
170 | /* | ||
171 | * TODO: this block is not needed i think, because | ||
172 | * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK] | ||
173 | * properly too. | ||
174 | */ | ||
175 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
176 | case DEBUG_STACK: | ||
177 | end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ; | ||
178 | break; | ||
179 | #endif | ||
180 | default: | ||
181 | end = per_cpu(orig_ist, cpu).ist[k]; | ||
182 | break; | ||
183 | } | ||
184 | /* | 166 | /* |
185 | * Is 'stack' above this exception frame's end? | 167 | * Is 'stack' above this exception frame's end? |
186 | * If yes then skip to the next frame. | 168 | * If yes then skip to the next frame. |
@@ -234,13 +216,19 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
234 | return NULL; | 216 | return NULL; |
235 | } | 217 | } |
236 | 218 | ||
237 | static int show_trace_unwind(struct unwind_frame_info *info, void *context) | 219 | struct ops_and_data { |
220 | struct stacktrace_ops *ops; | ||
221 | void *data; | ||
222 | }; | ||
223 | |||
224 | static int dump_trace_unwind(struct unwind_frame_info *info, void *context) | ||
238 | { | 225 | { |
226 | struct ops_and_data *oad = (struct ops_and_data *)context; | ||
239 | int n = 0; | 227 | int n = 0; |
240 | 228 | ||
241 | while (unwind(info) == 0 && UNW_PC(info)) { | 229 | while (unwind(info) == 0 && UNW_PC(info)) { |
242 | n++; | 230 | n++; |
243 | printk_address(UNW_PC(info)); | 231 | oad->ops->address(oad->data, UNW_PC(info)); |
244 | if (arch_unw_user_mode(info)) | 232 | if (arch_unw_user_mode(info)) |
245 | break; | 233 | break; |
246 | } | 234 | } |
@@ -254,45 +242,53 @@ static int show_trace_unwind(struct unwind_frame_info *info, void *context) | |||
254 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | 242 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack |
255 | */ | 243 | */ |
256 | 244 | ||
257 | void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack) | 245 | void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack, |
246 | struct stacktrace_ops *ops, void *data) | ||
258 | { | 247 | { |
259 | const unsigned cpu = safe_smp_processor_id(); | 248 | const unsigned cpu = smp_processor_id(); |
260 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; | 249 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; |
261 | unsigned used = 0; | 250 | unsigned used = 0; |
262 | 251 | ||
263 | printk("\nCall Trace:\n"); | ||
264 | |||
265 | if (!tsk) | 252 | if (!tsk) |
266 | tsk = current; | 253 | tsk = current; |
267 | 254 | ||
268 | if (call_trace >= 0) { | 255 | if (call_trace >= 0) { |
269 | int unw_ret = 0; | 256 | int unw_ret = 0; |
270 | struct unwind_frame_info info; | 257 | struct unwind_frame_info info; |
258 | struct ops_and_data oad = { .ops = ops, .data = data }; | ||
271 | 259 | ||
272 | if (regs) { | 260 | if (regs) { |
273 | if (unwind_init_frame_info(&info, tsk, regs) == 0) | 261 | if (unwind_init_frame_info(&info, tsk, regs) == 0) |
274 | unw_ret = show_trace_unwind(&info, NULL); | 262 | unw_ret = dump_trace_unwind(&info, &oad); |
275 | } else if (tsk == current) | 263 | } else if (tsk == current) |
276 | unw_ret = unwind_init_running(&info, show_trace_unwind, NULL); | 264 | unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); |
277 | else { | 265 | else { |
278 | if (unwind_init_blocked(&info, tsk) == 0) | 266 | if (unwind_init_blocked(&info, tsk) == 0) |
279 | unw_ret = show_trace_unwind(&info, NULL); | 267 | unw_ret = dump_trace_unwind(&info, &oad); |
280 | } | 268 | } |
281 | if (unw_ret > 0) { | 269 | if (unw_ret > 0) { |
282 | if (call_trace == 1 && !arch_unw_user_mode(&info)) { | 270 | if (call_trace == 1 && !arch_unw_user_mode(&info)) { |
283 | print_symbol("DWARF2 unwinder stuck at %s\n", | 271 | ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", |
284 | UNW_PC(&info)); | 272 | UNW_PC(&info)); |
285 | if ((long)UNW_SP(&info) < 0) { | 273 | if ((long)UNW_SP(&info) < 0) { |
286 | printk("Leftover inexact backtrace:\n"); | 274 | ops->warning(data, "Leftover inexact backtrace:\n"); |
287 | stack = (unsigned long *)UNW_SP(&info); | 275 | stack = (unsigned long *)UNW_SP(&info); |
276 | if (!stack) | ||
277 | return; | ||
288 | } else | 278 | } else |
289 | printk("Full inexact backtrace again:\n"); | 279 | ops->warning(data, "Full inexact backtrace again:\n"); |
290 | } else if (call_trace >= 1) | 280 | } else if (call_trace >= 1) |
291 | return; | 281 | return; |
292 | else | 282 | else |
293 | printk("Full inexact backtrace again:\n"); | 283 | ops->warning(data, "Full inexact backtrace again:\n"); |
294 | } else | 284 | } else |
295 | printk("Inexact backtrace:\n"); | 285 | ops->warning(data, "Inexact backtrace:\n"); |
286 | } | ||
287 | if (!stack) { | ||
288 | unsigned long dummy; | ||
289 | stack = &dummy; | ||
290 | if (tsk && tsk != current) | ||
291 | stack = (unsigned long *)tsk->thread.rsp; | ||
296 | } | 292 | } |
297 | 293 | ||
298 | /* | 294 | /* |
@@ -303,7 +299,9 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s | |||
303 | #define HANDLE_STACK(cond) \ | 299 | #define HANDLE_STACK(cond) \ |
304 | do while (cond) { \ | 300 | do while (cond) { \ |
305 | unsigned long addr = *stack++; \ | 301 | unsigned long addr = *stack++; \ |
306 | if (kernel_text_address(addr)) { \ | 302 | if (oops_in_progress ? \ |
303 | __kernel_text_address(addr) : \ | ||
304 | kernel_text_address(addr)) { \ | ||
307 | /* \ | 305 | /* \ |
308 | * If the address is either in the text segment of the \ | 306 | * If the address is either in the text segment of the \ |
309 | * kernel, or in the region which contains vmalloc'ed \ | 307 | * kernel, or in the region which contains vmalloc'ed \ |
@@ -312,7 +310,7 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s | |||
312 | * down the cause of the crash will be able to figure \ | 310 | * down the cause of the crash will be able to figure \ |
313 | * out the call path that was taken. \ | 311 | * out the call path that was taken. \ |
314 | */ \ | 312 | */ \ |
315 | printk_address(addr); \ | 313 | ops->address(data, addr); \ |
316 | } \ | 314 | } \ |
317 | } while (0) | 315 | } while (0) |
318 | 316 | ||
@@ -321,16 +319,17 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s | |||
321 | * current stack address. If the stacks consist of nested | 319 | * current stack address. If the stacks consist of nested |
322 | * exceptions | 320 | * exceptions |
323 | */ | 321 | */ |
324 | for ( ; ; ) { | 322 | for (;;) { |
325 | const char *id; | 323 | char *id; |
326 | unsigned long *estack_end; | 324 | unsigned long *estack_end; |
327 | estack_end = in_exception_stack(cpu, (unsigned long)stack, | 325 | estack_end = in_exception_stack(cpu, (unsigned long)stack, |
328 | &used, &id); | 326 | &used, &id); |
329 | 327 | ||
330 | if (estack_end) { | 328 | if (estack_end) { |
331 | printk(" <%s>", id); | 329 | if (ops->stack(data, id) < 0) |
330 | break; | ||
332 | HANDLE_STACK (stack < estack_end); | 331 | HANDLE_STACK (stack < estack_end); |
333 | printk(" <EOE>"); | 332 | ops->stack(data, "<EOE>"); |
334 | /* | 333 | /* |
335 | * We link to the next stack via the | 334 | * We link to the next stack via the |
336 | * second-to-last pointer (index -2 to end) in the | 335 | * second-to-last pointer (index -2 to end) in the |
@@ -345,7 +344,8 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s | |||
345 | (IRQSTACKSIZE - 64) / sizeof(*irqstack); | 344 | (IRQSTACKSIZE - 64) / sizeof(*irqstack); |
346 | 345 | ||
347 | if (stack >= irqstack && stack < irqstack_end) { | 346 | if (stack >= irqstack && stack < irqstack_end) { |
348 | printk(" <IRQ>"); | 347 | if (ops->stack(data, "IRQ") < 0) |
348 | break; | ||
349 | HANDLE_STACK (stack < irqstack_end); | 349 | HANDLE_STACK (stack < irqstack_end); |
350 | /* | 350 | /* |
351 | * We link to the next stack (which would be | 351 | * We link to the next stack (which would be |
@@ -354,7 +354,7 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s | |||
354 | */ | 354 | */ |
355 | stack = (unsigned long *) (irqstack_end[-1]); | 355 | stack = (unsigned long *) (irqstack_end[-1]); |
356 | irqstack_end = NULL; | 356 | irqstack_end = NULL; |
357 | printk(" <EOI>"); | 357 | ops->stack(data, "EOI"); |
358 | continue; | 358 | continue; |
359 | } | 359 | } |
360 | } | 360 | } |
@@ -362,19 +362,57 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s | |||
362 | } | 362 | } |
363 | 363 | ||
364 | /* | 364 | /* |
365 | * This prints the process stack: | 365 | * This handles the process stack: |
366 | */ | 366 | */ |
367 | HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0); | 367 | HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0); |
368 | #undef HANDLE_STACK | 368 | #undef HANDLE_STACK |
369 | } | ||
370 | EXPORT_SYMBOL(dump_trace); | ||
371 | |||
372 | static void | ||
373 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
374 | { | ||
375 | print_symbol(msg, symbol); | ||
376 | printk("\n"); | ||
377 | } | ||
378 | |||
379 | static void print_trace_warning(void *data, char *msg) | ||
380 | { | ||
381 | printk("%s\n", msg); | ||
382 | } | ||
383 | |||
384 | static int print_trace_stack(void *data, char *name) | ||
385 | { | ||
386 | printk(" <%s> ", name); | ||
387 | return 0; | ||
388 | } | ||
389 | |||
390 | static void print_trace_address(void *data, unsigned long addr) | ||
391 | { | ||
392 | printk_address(addr); | ||
393 | } | ||
394 | |||
395 | static struct stacktrace_ops print_trace_ops = { | ||
396 | .warning = print_trace_warning, | ||
397 | .warning_symbol = print_trace_warning_symbol, | ||
398 | .stack = print_trace_stack, | ||
399 | .address = print_trace_address, | ||
400 | }; | ||
369 | 401 | ||
402 | void | ||
403 | show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack) | ||
404 | { | ||
405 | printk("\nCall Trace:\n"); | ||
406 | dump_trace(tsk, regs, stack, &print_trace_ops, NULL); | ||
370 | printk("\n"); | 407 | printk("\n"); |
371 | } | 408 | } |
372 | 409 | ||
373 | static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp) | 410 | static void |
411 | _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp) | ||
374 | { | 412 | { |
375 | unsigned long *stack; | 413 | unsigned long *stack; |
376 | int i; | 414 | int i; |
377 | const int cpu = safe_smp_processor_id(); | 415 | const int cpu = smp_processor_id(); |
378 | unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr); | 416 | unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr); |
379 | unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); | 417 | unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); |
380 | 418 | ||
@@ -428,7 +466,7 @@ void show_registers(struct pt_regs *regs) | |||
428 | int i; | 466 | int i; |
429 | int in_kernel = !user_mode(regs); | 467 | int in_kernel = !user_mode(regs); |
430 | unsigned long rsp; | 468 | unsigned long rsp; |
431 | const int cpu = safe_smp_processor_id(); | 469 | const int cpu = smp_processor_id(); |
432 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; | 470 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; |
433 | 471 | ||
434 | rsp = regs->rsp; | 472 | rsp = regs->rsp; |
@@ -503,9 +541,11 @@ static unsigned int die_nest_count; | |||
503 | 541 | ||
504 | unsigned __kprobes long oops_begin(void) | 542 | unsigned __kprobes long oops_begin(void) |
505 | { | 543 | { |
506 | int cpu = safe_smp_processor_id(); | 544 | int cpu = smp_processor_id(); |
507 | unsigned long flags; | 545 | unsigned long flags; |
508 | 546 | ||
547 | oops_enter(); | ||
548 | |||
509 | /* racy, but better than risking deadlock. */ | 549 | /* racy, but better than risking deadlock. */ |
510 | local_irq_save(flags); | 550 | local_irq_save(flags); |
511 | if (!spin_trylock(&die_lock)) { | 551 | if (!spin_trylock(&die_lock)) { |
@@ -534,6 +574,7 @@ void __kprobes oops_end(unsigned long flags) | |||
534 | spin_unlock_irqrestore(&die_lock, flags); | 574 | spin_unlock_irqrestore(&die_lock, flags); |
535 | if (panic_on_oops) | 575 | if (panic_on_oops) |
536 | panic("Fatal exception"); | 576 | panic("Fatal exception"); |
577 | oops_exit(); | ||
537 | } | 578 | } |
538 | 579 | ||
539 | void __kprobes __die(const char * str, struct pt_regs * regs, long err) | 580 | void __kprobes __die(const char * str, struct pt_regs * regs, long err) |
@@ -570,7 +611,7 @@ void die(const char * str, struct pt_regs * regs, long err) | |||
570 | do_exit(SIGSEGV); | 611 | do_exit(SIGSEGV); |
571 | } | 612 | } |
572 | 613 | ||
573 | void __kprobes die_nmi(char *str, struct pt_regs *regs) | 614 | void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) |
574 | { | 615 | { |
575 | unsigned long flags = oops_begin(); | 616 | unsigned long flags = oops_begin(); |
576 | 617 | ||
@@ -578,13 +619,12 @@ void __kprobes die_nmi(char *str, struct pt_regs *regs) | |||
578 | * We are in trouble anyway, lets at least try | 619 | * We are in trouble anyway, lets at least try |
579 | * to get a message out. | 620 | * to get a message out. |
580 | */ | 621 | */ |
581 | printk(str, safe_smp_processor_id()); | 622 | printk(str, smp_processor_id()); |
582 | show_registers(regs); | 623 | show_registers(regs); |
583 | if (kexec_should_crash(current)) | 624 | if (kexec_should_crash(current)) |
584 | crash_kexec(regs); | 625 | crash_kexec(regs); |
585 | if (panic_on_timeout || panic_on_oops) | 626 | if (do_panic || panic_on_oops) |
586 | panic("nmi watchdog"); | 627 | panic("Non maskable interrupt"); |
587 | printk("console shuts up ...\n"); | ||
588 | oops_end(flags); | 628 | oops_end(flags); |
589 | nmi_exit(); | 629 | nmi_exit(); |
590 | local_irq_enable(); | 630 | local_irq_enable(); |
@@ -730,8 +770,15 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, | |||
730 | static __kprobes void | 770 | static __kprobes void |
731 | mem_parity_error(unsigned char reason, struct pt_regs * regs) | 771 | mem_parity_error(unsigned char reason, struct pt_regs * regs) |
732 | { | 772 | { |
733 | printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); | 773 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", |
734 | printk("You probably have a hardware problem with your RAM chips\n"); | 774 | reason); |
775 | printk(KERN_EMERG "You probably have a hardware problem with your " | ||
776 | "RAM chips\n"); | ||
777 | |||
778 | if (panic_on_unrecovered_nmi) | ||
779 | panic("NMI: Not continuing"); | ||
780 | |||
781 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | ||
735 | 782 | ||
736 | /* Clear and disable the memory parity error line. */ | 783 | /* Clear and disable the memory parity error line. */ |
737 | reason = (reason & 0xf) | 4; | 784 | reason = (reason & 0xf) | 4; |
@@ -754,9 +801,15 @@ io_check_error(unsigned char reason, struct pt_regs * regs) | |||
754 | 801 | ||
755 | static __kprobes void | 802 | static __kprobes void |
756 | unknown_nmi_error(unsigned char reason, struct pt_regs * regs) | 803 | unknown_nmi_error(unsigned char reason, struct pt_regs * regs) |
757 | { printk("Uhhuh. NMI received for unknown reason %02x.\n", reason); | 804 | { |
758 | printk("Dazed and confused, but trying to continue\n"); | 805 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", |
759 | printk("Do you have a strange power saving mode enabled?\n"); | 806 | reason); |
807 | printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); | ||
808 | |||
809 | if (panic_on_unrecovered_nmi) | ||
810 | panic("NMI: Not continuing"); | ||
811 | |||
812 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | ||
760 | } | 813 | } |
761 | 814 | ||
762 | /* Runs on IST stack. This code must keep interrupts off all the time. | 815 | /* Runs on IST stack. This code must keep interrupts off all the time. |
@@ -776,17 +829,15 @@ asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs) | |||
776 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) | 829 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) |
777 | == NOTIFY_STOP) | 830 | == NOTIFY_STOP) |
778 | return; | 831 | return; |
779 | #ifdef CONFIG_X86_LOCAL_APIC | ||
780 | /* | 832 | /* |
781 | * Ok, so this is none of the documented NMI sources, | 833 | * Ok, so this is none of the documented NMI sources, |
782 | * so it must be the NMI watchdog. | 834 | * so it must be the NMI watchdog. |
783 | */ | 835 | */ |
784 | if (nmi_watchdog > 0) { | 836 | if (nmi_watchdog_tick(regs,reason)) |
785 | nmi_watchdog_tick(regs,reason); | ||
786 | return; | 837 | return; |
787 | } | 838 | if (!do_nmi_callback(regs,cpu)) |
788 | #endif | 839 | unknown_nmi_error(reason, regs); |
789 | unknown_nmi_error(reason, regs); | 840 | |
790 | return; | 841 | return; |
791 | } | 842 | } |
792 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) | 843 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) |
@@ -1071,6 +1122,7 @@ asmlinkage void math_state_restore(void) | |||
1071 | init_fpu(me); | 1122 | init_fpu(me); |
1072 | restore_fpu_checking(&me->thread.i387.fxsave); | 1123 | restore_fpu_checking(&me->thread.i387.fxsave); |
1073 | task_thread_info(me)->status |= TS_USEDFPU; | 1124 | task_thread_info(me)->status |= TS_USEDFPU; |
1125 | me->fpu_counter++; | ||
1074 | } | 1126 | } |
1075 | 1127 | ||
1076 | void __init trap_init(void) | 1128 | void __init trap_init(void) |
@@ -1109,24 +1161,30 @@ void __init trap_init(void) | |||
1109 | } | 1161 | } |
1110 | 1162 | ||
1111 | 1163 | ||
1112 | /* Actual parsing is done early in setup.c. */ | 1164 | static int __init oops_setup(char *s) |
1113 | static int __init oops_dummy(char *s) | ||
1114 | { | 1165 | { |
1115 | panic_on_oops = 1; | 1166 | if (!s) |
1116 | return 1; | 1167 | return -EINVAL; |
1168 | if (!strcmp(s, "panic")) | ||
1169 | panic_on_oops = 1; | ||
1170 | return 0; | ||
1117 | } | 1171 | } |
1118 | __setup("oops=", oops_dummy); | 1172 | early_param("oops", oops_setup); |
1119 | 1173 | ||
1120 | static int __init kstack_setup(char *s) | 1174 | static int __init kstack_setup(char *s) |
1121 | { | 1175 | { |
1176 | if (!s) | ||
1177 | return -EINVAL; | ||
1122 | kstack_depth_to_print = simple_strtoul(s,NULL,0); | 1178 | kstack_depth_to_print = simple_strtoul(s,NULL,0); |
1123 | return 1; | 1179 | return 0; |
1124 | } | 1180 | } |
1125 | __setup("kstack=", kstack_setup); | 1181 | early_param("kstack", kstack_setup); |
1126 | 1182 | ||
1127 | #ifdef CONFIG_STACK_UNWIND | 1183 | #ifdef CONFIG_STACK_UNWIND |
1128 | static int __init call_trace_setup(char *s) | 1184 | static int __init call_trace_setup(char *s) |
1129 | { | 1185 | { |
1186 | if (!s) | ||
1187 | return -EINVAL; | ||
1130 | if (strcmp(s, "old") == 0) | 1188 | if (strcmp(s, "old") == 0) |
1131 | call_trace = -1; | 1189 | call_trace = -1; |
1132 | else if (strcmp(s, "both") == 0) | 1190 | else if (strcmp(s, "both") == 0) |
@@ -1135,7 +1193,7 @@ static int __init call_trace_setup(char *s) | |||
1135 | call_trace = 1; | 1193 | call_trace = 1; |
1136 | else if (strcmp(s, "new") == 0) | 1194 | else if (strcmp(s, "new") == 0) |
1137 | call_trace = 2; | 1195 | call_trace = 2; |
1138 | return 1; | 1196 | return 0; |
1139 | } | 1197 | } |
1140 | __setup("call_trace=", call_trace_setup); | 1198 | early_param("call_trace", call_trace_setup); |
1141 | #endif | 1199 | #endif |
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 7c4de31471d4..d0564f1bcb0b 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S | |||
@@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") | |||
13 | OUTPUT_ARCH(i386:x86-64) | 13 | OUTPUT_ARCH(i386:x86-64) |
14 | ENTRY(phys_startup_64) | 14 | ENTRY(phys_startup_64) |
15 | jiffies_64 = jiffies; | 15 | jiffies_64 = jiffies; |
16 | PHDRS { | ||
17 | text PT_LOAD FLAGS(5); /* R_E */ | ||
18 | data PT_LOAD FLAGS(7); /* RWE */ | ||
19 | user PT_LOAD FLAGS(7); /* RWE */ | ||
20 | note PT_NOTE FLAGS(4); /* R__ */ | ||
21 | } | ||
16 | SECTIONS | 22 | SECTIONS |
17 | { | 23 | { |
18 | . = __START_KERNEL; | 24 | . = __START_KERNEL; |
@@ -31,7 +37,7 @@ SECTIONS | |||
31 | KPROBES_TEXT | 37 | KPROBES_TEXT |
32 | *(.fixup) | 38 | *(.fixup) |
33 | *(.gnu.warning) | 39 | *(.gnu.warning) |
34 | } = 0x9090 | 40 | } :text = 0x9090 |
35 | /* out-of-line lock text */ | 41 | /* out-of-line lock text */ |
36 | .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) { *(.text.lock) } | 42 | .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) { *(.text.lock) } |
37 | 43 | ||
@@ -57,7 +63,7 @@ SECTIONS | |||
57 | .data : AT(ADDR(.data) - LOAD_OFFSET) { | 63 | .data : AT(ADDR(.data) - LOAD_OFFSET) { |
58 | *(.data) | 64 | *(.data) |
59 | CONSTRUCTORS | 65 | CONSTRUCTORS |
60 | } | 66 | } :data |
61 | 67 | ||
62 | _edata = .; /* End of data section */ | 68 | _edata = .; /* End of data section */ |
63 | 69 | ||
@@ -89,7 +95,7 @@ SECTIONS | |||
89 | #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) | 95 | #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) |
90 | 96 | ||
91 | . = VSYSCALL_ADDR; | 97 | . = VSYSCALL_ADDR; |
92 | .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } | 98 | .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } :user |
93 | __vsyscall_0 = VSYSCALL_VIRT_ADDR; | 99 | __vsyscall_0 = VSYSCALL_VIRT_ADDR; |
94 | 100 | ||
95 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 101 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); |
@@ -99,6 +105,9 @@ SECTIONS | |||
99 | .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) } | 105 | .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) } |
100 | vxtime = VVIRT(.vxtime); | 106 | vxtime = VVIRT(.vxtime); |
101 | 107 | ||
108 | .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) } | ||
109 | vgetcpu_mode = VVIRT(.vgetcpu_mode); | ||
110 | |||
102 | .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) } | 111 | .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) } |
103 | wall_jiffies = VVIRT(.wall_jiffies); | 112 | wall_jiffies = VVIRT(.wall_jiffies); |
104 | 113 | ||
@@ -132,7 +141,7 @@ SECTIONS | |||
132 | . = ALIGN(8192); /* init_task */ | 141 | . = ALIGN(8192); /* init_task */ |
133 | .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { | 142 | .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { |
134 | *(.data.init_task) | 143 | *(.data.init_task) |
135 | } | 144 | } :data |
136 | 145 | ||
137 | . = ALIGN(4096); | 146 | . = ALIGN(4096); |
138 | .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { | 147 | .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { |
@@ -207,14 +216,12 @@ SECTIONS | |||
207 | __initramfs_start = .; | 216 | __initramfs_start = .; |
208 | .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) } | 217 | .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) } |
209 | __initramfs_end = .; | 218 | __initramfs_end = .; |
210 | /* temporary here to work around NR_CPUS. If you see this comment in 2.6.17+ | 219 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); |
211 | complain */ | ||
212 | . = ALIGN(4096); | ||
213 | __init_end = .; | ||
214 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | ||
215 | __per_cpu_start = .; | 220 | __per_cpu_start = .; |
216 | .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } | 221 | .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } |
217 | __per_cpu_end = .; | 222 | __per_cpu_end = .; |
223 | . = ALIGN(4096); | ||
224 | __init_end = .; | ||
218 | 225 | ||
219 | . = ALIGN(4096); | 226 | . = ALIGN(4096); |
220 | __nosave_begin = .; | 227 | __nosave_begin = .; |
diff --git a/arch/x86_64/kernel/vsmp.c b/arch/x86_64/kernel/vsmp.c index 92f70c74965f..044e852bd25e 100644 --- a/arch/x86_64/kernel/vsmp.c +++ b/arch/x86_64/kernel/vsmp.c | |||
@@ -20,6 +20,9 @@ static int __init vsmp_init(void) | |||
20 | void *address; | 20 | void *address; |
21 | unsigned int cap, ctl; | 21 | unsigned int cap, ctl; |
22 | 22 | ||
23 | if (!early_pci_allowed()) | ||
24 | return 0; | ||
25 | |||
23 | /* Check if we are running on a ScaleMP vSMP box */ | 26 | /* Check if we are running on a ScaleMP vSMP box */ |
24 | if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) != PCI_VENDOR_ID_SCALEMP) || | 27 | if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) != PCI_VENDOR_ID_SCALEMP) || |
25 | (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) != PCI_DEVICE_ID_SCALEMP_VSMP_CTL)) | 28 | (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) != PCI_DEVICE_ID_SCALEMP_VSMP_CTL)) |
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index f603037df162..ac48c3857ddb 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/seqlock.h> | 26 | #include <linux/seqlock.h> |
27 | #include <linux/jiffies.h> | 27 | #include <linux/jiffies.h> |
28 | #include <linux/sysctl.h> | 28 | #include <linux/sysctl.h> |
29 | #include <linux/getcpu.h> | ||
29 | 30 | ||
30 | #include <asm/vsyscall.h> | 31 | #include <asm/vsyscall.h> |
31 | #include <asm/pgtable.h> | 32 | #include <asm/pgtable.h> |
@@ -33,11 +34,15 @@ | |||
33 | #include <asm/fixmap.h> | 34 | #include <asm/fixmap.h> |
34 | #include <asm/errno.h> | 35 | #include <asm/errno.h> |
35 | #include <asm/io.h> | 36 | #include <asm/io.h> |
37 | #include <asm/segment.h> | ||
38 | #include <asm/desc.h> | ||
39 | #include <asm/topology.h> | ||
36 | 40 | ||
37 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) | 41 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) |
38 | 42 | ||
39 | int __sysctl_vsyscall __section_sysctl_vsyscall = 1; | 43 | int __sysctl_vsyscall __section_sysctl_vsyscall = 1; |
40 | seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; | 44 | seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; |
45 | int __vgetcpu_mode __section_vgetcpu_mode; | ||
41 | 46 | ||
42 | #include <asm/unistd.h> | 47 | #include <asm/unistd.h> |
43 | 48 | ||
@@ -72,7 +77,8 @@ static __always_inline void do_vgettimeofday(struct timeval * tv) | |||
72 | __vxtime.tsc_quot) >> 32; | 77 | __vxtime.tsc_quot) >> 32; |
73 | /* See comment in x86_64 do_gettimeofday. */ | 78 | /* See comment in x86_64 do_gettimeofday. */ |
74 | } else { | 79 | } else { |
75 | usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) - | 80 | usec += ((readl((void __iomem *) |
81 | fix_to_virt(VSYSCALL_HPET) + 0xf0) - | ||
76 | __vxtime.last) * __vxtime.quot) >> 32; | 82 | __vxtime.last) * __vxtime.quot) >> 32; |
77 | } | 83 | } |
78 | } while (read_seqretry(&__xtime_lock, sequence)); | 84 | } while (read_seqretry(&__xtime_lock, sequence)); |
@@ -127,9 +133,46 @@ time_t __vsyscall(1) vtime(time_t *t) | |||
127 | return __xtime.tv_sec; | 133 | return __xtime.tv_sec; |
128 | } | 134 | } |
129 | 135 | ||
130 | long __vsyscall(2) venosys_0(void) | 136 | /* Fast way to get current CPU and node. |
137 | This helps to do per node and per CPU caches in user space. | ||
138 | The result is not guaranteed without CPU affinity, but usually | ||
139 | works out because the scheduler tries to keep a thread on the same | ||
140 | CPU. | ||
141 | |||
142 | tcache must point to a two element sized long array. | ||
143 | All arguments can be NULL. */ | ||
144 | long __vsyscall(2) | ||
145 | vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) | ||
131 | { | 146 | { |
132 | return -ENOSYS; | 147 | unsigned int dummy, p; |
148 | unsigned long j = 0; | ||
149 | |||
150 | /* Fast cache - only recompute value once per jiffies and avoid | ||
151 | relatively costly rdtscp/cpuid otherwise. | ||
152 | This works because the scheduler usually keeps the process | ||
153 | on the same CPU and this syscall doesn't guarantee its | ||
154 | results anyways. | ||
155 | We do this here because otherwise user space would do it on | ||
156 | its own in a likely inferior way (no access to jiffies). | ||
157 | If you don't like it pass NULL. */ | ||
158 | if (tcache && tcache->t0 == (j = __jiffies)) { | ||
159 | p = tcache->t1; | ||
160 | } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { | ||
161 | /* Load per CPU data from RDTSCP */ | ||
162 | rdtscp(dummy, dummy, p); | ||
163 | } else { | ||
164 | /* Load per CPU data from GDT */ | ||
165 | asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); | ||
166 | } | ||
167 | if (tcache) { | ||
168 | tcache->t0 = j; | ||
169 | tcache->t1 = p; | ||
170 | } | ||
171 | if (cpu) | ||
172 | *cpu = p & 0xfff; | ||
173 | if (node) | ||
174 | *node = p >> 12; | ||
175 | return 0; | ||
133 | } | 176 | } |
134 | 177 | ||
135 | long __vsyscall(3) venosys_1(void) | 178 | long __vsyscall(3) venosys_1(void) |
@@ -149,7 +192,8 @@ static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp, | |||
149 | void __user *buffer, size_t *lenp, loff_t *ppos) | 192 | void __user *buffer, size_t *lenp, loff_t *ppos) |
150 | { | 193 | { |
151 | extern u16 vsysc1, vsysc2; | 194 | extern u16 vsysc1, vsysc2; |
152 | u16 *map1, *map2; | 195 | u16 __iomem *map1; |
196 | u16 __iomem *map2; | ||
153 | int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); | 197 | int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); |
154 | if (!write) | 198 | if (!write) |
155 | return ret; | 199 | return ret; |
@@ -164,11 +208,11 @@ static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp, | |||
164 | goto out; | 208 | goto out; |
165 | } | 209 | } |
166 | if (!sysctl_vsyscall) { | 210 | if (!sysctl_vsyscall) { |
167 | *map1 = SYSCALL; | 211 | writew(SYSCALL, map1); |
168 | *map2 = SYSCALL; | 212 | writew(SYSCALL, map2); |
169 | } else { | 213 | } else { |
170 | *map1 = NOP2; | 214 | writew(NOP2, map1); |
171 | *map2 = NOP2; | 215 | writew(NOP2, map2); |
172 | } | 216 | } |
173 | iounmap(map2); | 217 | iounmap(map2); |
174 | out: | 218 | out: |
@@ -200,6 +244,43 @@ static ctl_table kernel_root_table2[] = { | |||
200 | 244 | ||
201 | #endif | 245 | #endif |
202 | 246 | ||
247 | static void __cpuinit write_rdtscp_cb(void *info) | ||
248 | { | ||
249 | write_rdtscp_aux((unsigned long)info); | ||
250 | } | ||
251 | |||
252 | void __cpuinit vsyscall_set_cpu(int cpu) | ||
253 | { | ||
254 | unsigned long *d; | ||
255 | unsigned long node = 0; | ||
256 | #ifdef CONFIG_NUMA | ||
257 | node = cpu_to_node[cpu]; | ||
258 | #endif | ||
259 | if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) { | ||
260 | void *info = (void *)((node << 12) | cpu); | ||
261 | /* Can happen on preemptive kernel */ | ||
262 | if (get_cpu() == cpu) | ||
263 | write_rdtscp_cb(info); | ||
264 | #ifdef CONFIG_SMP | ||
265 | else { | ||
266 | /* the notifier is unfortunately not executed on the | ||
267 | target CPU */ | ||
268 | smp_call_function_single(cpu,write_rdtscp_cb,info,0,1); | ||
269 | } | ||
270 | #endif | ||
271 | put_cpu(); | ||
272 | } | ||
273 | |||
274 | /* Store cpu number in limit so that it can be loaded quickly | ||
275 | in user space in vgetcpu. | ||
276 | 12 bits for the CPU and 8 bits for the node. */ | ||
277 | d = (unsigned long *)(cpu_gdt(cpu) + GDT_ENTRY_PER_CPU); | ||
278 | *d = 0x0f40000000000ULL; | ||
279 | *d |= cpu; | ||
280 | *d |= (node & 0xf) << 12; | ||
281 | *d |= (node >> 4) << 48; | ||
282 | } | ||
283 | |||
203 | static void __init map_vsyscall(void) | 284 | static void __init map_vsyscall(void) |
204 | { | 285 | { |
205 | extern char __vsyscall_0; | 286 | extern char __vsyscall_0; |
@@ -214,6 +295,7 @@ static int __init vsyscall_init(void) | |||
214 | VSYSCALL_ADDR(__NR_vgettimeofday))); | 295 | VSYSCALL_ADDR(__NR_vgettimeofday))); |
215 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); | 296 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); |
216 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); | 297 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); |
298 | BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu)); | ||
217 | map_vsyscall(); | 299 | map_vsyscall(); |
218 | #ifdef CONFIG_SYSCTL | 300 | #ifdef CONFIG_SYSCTL |
219 | register_sysctl_table(kernel_root_table2, 0); | 301 | register_sysctl_table(kernel_root_table2, 0); |
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index 370952c4ff22..c3454af5e3a2 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c | |||
@@ -29,6 +29,7 @@ EXPORT_SYMBOL(__put_user_8); | |||
29 | EXPORT_SYMBOL(copy_user_generic); | 29 | EXPORT_SYMBOL(copy_user_generic); |
30 | EXPORT_SYMBOL(copy_from_user); | 30 | EXPORT_SYMBOL(copy_from_user); |
31 | EXPORT_SYMBOL(copy_to_user); | 31 | EXPORT_SYMBOL(copy_to_user); |
32 | EXPORT_SYMBOL(__copy_from_user_inatomic); | ||
32 | 33 | ||
33 | EXPORT_SYMBOL(copy_page); | 34 | EXPORT_SYMBOL(copy_page); |
34 | EXPORT_SYMBOL(clear_page); | 35 | EXPORT_SYMBOL(clear_page); |