diff options
author | Linus Torvalds <torvalds@g5.osdl.org> | 2005-11-14 22:56:02 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-11-14 22:56:02 -0500 |
commit | 4060994c3e337b40e0f6fa8ce2cc178e021baf3d (patch) | |
tree | 980297c1747ca89354bc879cc5d17903eacb19e2 /arch/x86_64/kernel | |
parent | 0174f72f848dfe7dc7488799776303c81b181b16 (diff) | |
parent | d3ee871e63d0a0c70413dc0aa5534b8d6cd6ec37 (diff) |
Merge x86-64 update from Andi
Diffstat (limited to 'arch/x86_64/kernel')
-rw-r--r-- | arch/x86_64/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86_64/kernel/aperture.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/apic.c | 10 | ||||
-rw-r--r-- | arch/x86_64/kernel/e820.c | 3 | ||||
-rw-r--r-- | arch/x86_64/kernel/entry.S | 3 | ||||
-rw-r--r-- | arch/x86_64/kernel/head.S | 37 | ||||
-rw-r--r-- | arch/x86_64/kernel/head64.c | 14 | ||||
-rw-r--r-- | arch/x86_64/kernel/i8259.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/io_apic.c | 80 | ||||
-rw-r--r-- | arch/x86_64/kernel/mce.c | 17 | ||||
-rw-r--r-- | arch/x86_64/kernel/mce_amd.c | 538 | ||||
-rw-r--r-- | arch/x86_64/kernel/mpparse.c | 23 | ||||
-rw-r--r-- | arch/x86_64/kernel/pci-gart.c | 8 | ||||
-rw-r--r-- | arch/x86_64/kernel/process.c | 47 | ||||
-rw-r--r-- | arch/x86_64/kernel/reboot.c | 7 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup.c | 89 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup64.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/signal.c | 17 | ||||
-rw-r--r-- | arch/x86_64/kernel/smp.c | 7 | ||||
-rw-r--r-- | arch/x86_64/kernel/smpboot.c | 111 | ||||
-rw-r--r-- | arch/x86_64/kernel/sys_x86_64.c | 14 | ||||
-rw-r--r-- | arch/x86_64/kernel/traps.c | 44 | ||||
-rw-r--r-- | arch/x86_64/kernel/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/x8664_ksyms.c | 3 |
24 files changed, 888 insertions, 193 deletions
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 14328cab5d3a..fe4cbd1c4b2f 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile | |||
@@ -11,6 +11,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \ | |||
11 | 11 | ||
12 | obj-$(CONFIG_X86_MCE) += mce.o | 12 | obj-$(CONFIG_X86_MCE) += mce.o |
13 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o | 13 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o |
14 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o | ||
14 | obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ | 15 | obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ |
15 | obj-$(CONFIG_ACPI) += acpi/ | 16 | obj-$(CONFIG_ACPI) += acpi/ |
16 | obj-$(CONFIG_X86_MSR) += msr.o | 17 | obj-$(CONFIG_X86_MSR) += msr.o |
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index 962ad4823b6a..c7f4fdd20f05 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c | |||
@@ -196,7 +196,7 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) | |||
196 | void __init iommu_hole_init(void) | 196 | void __init iommu_hole_init(void) |
197 | { | 197 | { |
198 | int fix, num; | 198 | int fix, num; |
199 | u32 aper_size, aper_alloc = 0, aper_order, last_aper_order = 0; | 199 | u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; |
200 | u64 aper_base, last_aper_base = 0; | 200 | u64 aper_base, last_aper_base = 0; |
201 | int valid_agp = 0; | 201 | int valid_agp = 0; |
202 | 202 | ||
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index b6e7715d877f..18691ce4c759 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c | |||
@@ -833,6 +833,16 @@ int setup_profiling_timer(unsigned int multiplier) | |||
833 | return 0; | 833 | return 0; |
834 | } | 834 | } |
835 | 835 | ||
836 | #ifdef CONFIG_X86_MCE_AMD | ||
837 | void setup_threshold_lvt(unsigned long lvt_off) | ||
838 | { | ||
839 | unsigned int v = 0; | ||
840 | unsigned long reg = (lvt_off << 4) + 0x500; | ||
841 | v |= THRESHOLD_APIC_VECTOR; | ||
842 | apic_write(reg, v); | ||
843 | } | ||
844 | #endif /* CONFIG_X86_MCE_AMD */ | ||
845 | |||
836 | #undef APIC_DIVISOR | 846 | #undef APIC_DIVISOR |
837 | 847 | ||
838 | /* | 848 | /* |
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index ab3f87aaff70..17579a1a174b 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c | |||
@@ -23,8 +23,7 @@ | |||
23 | #include <asm/e820.h> | 23 | #include <asm/e820.h> |
24 | #include <asm/proto.h> | 24 | #include <asm/proto.h> |
25 | #include <asm/bootsetup.h> | 25 | #include <asm/bootsetup.h> |
26 | 26 | #include <asm/sections.h> | |
27 | extern char _end[]; | ||
28 | 27 | ||
29 | /* | 28 | /* |
30 | * PFN of last memory page. | 29 | * PFN of last memory page. |
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 7937971d1853..9ff42041bb6b 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S | |||
@@ -612,6 +612,9 @@ retint_kernel: | |||
612 | ENTRY(thermal_interrupt) | 612 | ENTRY(thermal_interrupt) |
613 | apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt | 613 | apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt |
614 | 614 | ||
615 | ENTRY(threshold_interrupt) | ||
616 | apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt | ||
617 | |||
615 | #ifdef CONFIG_SMP | 618 | #ifdef CONFIG_SMP |
616 | ENTRY(reschedule_interrupt) | 619 | ENTRY(reschedule_interrupt) |
617 | apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt | 620 | apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt |
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index b92e5f45ed46..15290968e49d 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S | |||
@@ -12,6 +12,7 @@ | |||
12 | 12 | ||
13 | #include <linux/linkage.h> | 13 | #include <linux/linkage.h> |
14 | #include <linux/threads.h> | 14 | #include <linux/threads.h> |
15 | #include <linux/init.h> | ||
15 | #include <asm/desc.h> | 16 | #include <asm/desc.h> |
16 | #include <asm/segment.h> | 17 | #include <asm/segment.h> |
17 | #include <asm/page.h> | 18 | #include <asm/page.h> |
@@ -70,7 +71,7 @@ startup_32: | |||
70 | movl %eax, %cr4 | 71 | movl %eax, %cr4 |
71 | 72 | ||
72 | /* Setup early boot stage 4 level pagetables */ | 73 | /* Setup early boot stage 4 level pagetables */ |
73 | movl $(init_level4_pgt - __START_KERNEL_map), %eax | 74 | movl $(boot_level4_pgt - __START_KERNEL_map), %eax |
74 | movl %eax, %cr3 | 75 | movl %eax, %cr3 |
75 | 76 | ||
76 | /* Setup EFER (Extended Feature Enable Register) */ | 77 | /* Setup EFER (Extended Feature Enable Register) */ |
@@ -113,7 +114,7 @@ startup_64: | |||
113 | movq %rax, %cr4 | 114 | movq %rax, %cr4 |
114 | 115 | ||
115 | /* Setup early boot stage 4 level pagetables. */ | 116 | /* Setup early boot stage 4 level pagetables. */ |
116 | movq $(init_level4_pgt - __START_KERNEL_map), %rax | 117 | movq $(boot_level4_pgt - __START_KERNEL_map), %rax |
117 | movq %rax, %cr3 | 118 | movq %rax, %cr3 |
118 | 119 | ||
119 | /* Check if nx is implemented */ | 120 | /* Check if nx is implemented */ |
@@ -240,20 +241,10 @@ ljumpvector: | |||
240 | ENTRY(stext) | 241 | ENTRY(stext) |
241 | ENTRY(_stext) | 242 | ENTRY(_stext) |
242 | 243 | ||
243 | /* | ||
244 | * This default setting generates an ident mapping at address 0x100000 | ||
245 | * and a mapping for the kernel that precisely maps virtual address | ||
246 | * 0xffffffff80000000 to physical address 0x000000. (always using | ||
247 | * 2Mbyte large pages provided by PAE mode) | ||
248 | */ | ||
249 | .org 0x1000 | 244 | .org 0x1000 |
250 | ENTRY(init_level4_pgt) | 245 | ENTRY(init_level4_pgt) |
251 | .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */ | 246 | /* This gets initialized in x86_64_start_kernel */ |
252 | .fill 255,8,0 | 247 | .fill 512,8,0 |
253 | .quad 0x000000000000a007 + __PHYSICAL_START | ||
254 | .fill 254,8,0 | ||
255 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ | ||
256 | .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ | ||
257 | 248 | ||
258 | .org 0x2000 | 249 | .org 0x2000 |
259 | ENTRY(level3_ident_pgt) | 250 | ENTRY(level3_ident_pgt) |
@@ -350,6 +341,24 @@ ENTRY(wakeup_level4_pgt) | |||
350 | .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ | 341 | .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ |
351 | #endif | 342 | #endif |
352 | 343 | ||
344 | #ifndef CONFIG_HOTPLUG_CPU | ||
345 | __INITDATA | ||
346 | #endif | ||
347 | /* | ||
348 | * This default setting generates an ident mapping at address 0x100000 | ||
349 | * and a mapping for the kernel that precisely maps virtual address | ||
350 | * 0xffffffff80000000 to physical address 0x000000. (always using | ||
351 | * 2Mbyte large pages provided by PAE mode) | ||
352 | */ | ||
353 | .align PAGE_SIZE | ||
354 | ENTRY(boot_level4_pgt) | ||
355 | .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */ | ||
356 | .fill 255,8,0 | ||
357 | .quad 0x000000000000a007 + __PHYSICAL_START | ||
358 | .fill 254,8,0 | ||
359 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ | ||
360 | .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ | ||
361 | |||
353 | .data | 362 | .data |
354 | 363 | ||
355 | .align 16 | 364 | .align 16 |
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c index cf6ab147a2a5..b675c5add01e 100644 --- a/arch/x86_64/kernel/head64.c +++ b/arch/x86_64/kernel/head64.c | |||
@@ -19,14 +19,15 @@ | |||
19 | #include <asm/bootsetup.h> | 19 | #include <asm/bootsetup.h> |
20 | #include <asm/setup.h> | 20 | #include <asm/setup.h> |
21 | #include <asm/desc.h> | 21 | #include <asm/desc.h> |
22 | #include <asm/pgtable.h> | ||
23 | #include <asm/sections.h> | ||
22 | 24 | ||
23 | /* Don't add a printk in there. printk relies on the PDA which is not initialized | 25 | /* Don't add a printk in there. printk relies on the PDA which is not initialized |
24 | yet. */ | 26 | yet. */ |
25 | static void __init clear_bss(void) | 27 | static void __init clear_bss(void) |
26 | { | 28 | { |
27 | extern char __bss_start[], __bss_end[]; | ||
28 | memset(__bss_start, 0, | 29 | memset(__bss_start, 0, |
29 | (unsigned long) __bss_end - (unsigned long) __bss_start); | 30 | (unsigned long) __bss_stop - (unsigned long) __bss_start); |
30 | } | 31 | } |
31 | 32 | ||
32 | #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ | 33 | #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ |
@@ -75,8 +76,6 @@ static void __init setup_boot_cpu_data(void) | |||
75 | boot_cpu_data.x86_mask = eax & 0xf; | 76 | boot_cpu_data.x86_mask = eax & 0xf; |
76 | } | 77 | } |
77 | 78 | ||
78 | extern char _end[]; | ||
79 | |||
80 | void __init x86_64_start_kernel(char * real_mode_data) | 79 | void __init x86_64_start_kernel(char * real_mode_data) |
81 | { | 80 | { |
82 | char *s; | 81 | char *s; |
@@ -86,6 +85,13 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
86 | set_intr_gate(i, early_idt_handler); | 85 | set_intr_gate(i, early_idt_handler); |
87 | asm volatile("lidt %0" :: "m" (idt_descr)); | 86 | asm volatile("lidt %0" :: "m" (idt_descr)); |
88 | clear_bss(); | 87 | clear_bss(); |
88 | |||
89 | /* | ||
90 | * switch to init_level4_pgt from boot_level4_pgt | ||
91 | */ | ||
92 | memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t)); | ||
93 | asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); | ||
94 | |||
89 | pda_init(0); | 95 | pda_init(0); |
90 | copy_bootdata(real_mode_data); | 96 | copy_bootdata(real_mode_data); |
91 | #ifdef CONFIG_SMP | 97 | #ifdef CONFIG_SMP |
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c index a9368d4c4aba..6e5101ad3d1a 100644 --- a/arch/x86_64/kernel/i8259.c +++ b/arch/x86_64/kernel/i8259.c | |||
@@ -492,6 +492,7 @@ void invalidate_interrupt5(void); | |||
492 | void invalidate_interrupt6(void); | 492 | void invalidate_interrupt6(void); |
493 | void invalidate_interrupt7(void); | 493 | void invalidate_interrupt7(void); |
494 | void thermal_interrupt(void); | 494 | void thermal_interrupt(void); |
495 | void threshold_interrupt(void); | ||
495 | void i8254_timer_resume(void); | 496 | void i8254_timer_resume(void); |
496 | 497 | ||
497 | static void setup_timer_hardware(void) | 498 | static void setup_timer_hardware(void) |
@@ -580,6 +581,7 @@ void __init init_IRQ(void) | |||
580 | set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | 581 | set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); |
581 | #endif | 582 | #endif |
582 | set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | 583 | set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); |
584 | set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); | ||
583 | 585 | ||
584 | #ifdef CONFIG_X86_LOCAL_APIC | 586 | #ifdef CONFIG_X86_LOCAL_APIC |
585 | /* self generated IPI for local APIC timer */ | 587 | /* self generated IPI for local APIC timer */ |
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index c8eee20cd519..97154ab058b4 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c | |||
@@ -57,7 +57,7 @@ int nr_ioapic_registers[MAX_IO_APICS]; | |||
57 | * Rough estimation of how many shared IRQs there are, can | 57 | * Rough estimation of how many shared IRQs there are, can |
58 | * be changed anytime. | 58 | * be changed anytime. |
59 | */ | 59 | */ |
60 | #define MAX_PLUS_SHARED_IRQS NR_IRQS | 60 | #define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS |
61 | #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) | 61 | #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) |
62 | 62 | ||
63 | /* | 63 | /* |
@@ -85,6 +85,7 @@ int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; | |||
85 | int pin; \ | 85 | int pin; \ |
86 | struct irq_pin_list *entry = irq_2_pin + irq; \ | 86 | struct irq_pin_list *entry = irq_2_pin + irq; \ |
87 | \ | 87 | \ |
88 | BUG_ON(irq >= NR_IRQS); \ | ||
88 | for (;;) { \ | 89 | for (;;) { \ |
89 | unsigned int reg; \ | 90 | unsigned int reg; \ |
90 | pin = entry->pin; \ | 91 | pin = entry->pin; \ |
@@ -127,6 +128,8 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) | |||
127 | } | 128 | } |
128 | #endif | 129 | #endif |
129 | 130 | ||
131 | static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF }; | ||
132 | |||
130 | /* | 133 | /* |
131 | * The common case is 1:1 IRQ<->pin mappings. Sometimes there are | 134 | * The common case is 1:1 IRQ<->pin mappings. Sometimes there are |
132 | * shared ISA-space IRQs, so we have to support them. We are super | 135 | * shared ISA-space IRQs, so we have to support them. We are super |
@@ -137,6 +140,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) | |||
137 | static int first_free_entry = NR_IRQS; | 140 | static int first_free_entry = NR_IRQS; |
138 | struct irq_pin_list *entry = irq_2_pin + irq; | 141 | struct irq_pin_list *entry = irq_2_pin + irq; |
139 | 142 | ||
143 | BUG_ON(irq >= NR_IRQS); | ||
140 | while (entry->next) | 144 | while (entry->next) |
141 | entry = irq_2_pin + entry->next; | 145 | entry = irq_2_pin + entry->next; |
142 | 146 | ||
@@ -144,7 +148,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) | |||
144 | entry->next = first_free_entry; | 148 | entry->next = first_free_entry; |
145 | entry = irq_2_pin + entry->next; | 149 | entry = irq_2_pin + entry->next; |
146 | if (++first_free_entry >= PIN_MAP_SIZE) | 150 | if (++first_free_entry >= PIN_MAP_SIZE) |
147 | panic("io_apic.c: whoops"); | 151 | panic("io_apic.c: ran out of irq_2_pin entries!"); |
148 | } | 152 | } |
149 | entry->apic = apic; | 153 | entry->apic = apic; |
150 | entry->pin = pin; | 154 | entry->pin = pin; |
@@ -420,6 +424,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) | |||
420 | best_guess = irq; | 424 | best_guess = irq; |
421 | } | 425 | } |
422 | } | 426 | } |
427 | BUG_ON(best_guess >= NR_IRQS); | ||
423 | return best_guess; | 428 | return best_guess; |
424 | } | 429 | } |
425 | 430 | ||
@@ -610,6 +615,64 @@ static inline int irq_trigger(int idx) | |||
610 | return MPBIOS_trigger(idx); | 615 | return MPBIOS_trigger(idx); |
611 | } | 616 | } |
612 | 617 | ||
618 | static int next_irq = 16; | ||
619 | |||
620 | /* | ||
621 | * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ | ||
622 | * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number | ||
623 | * from ACPI, which can reach 800 in large boxen. | ||
624 | * | ||
625 | * Compact the sparse GSI space into a sequential IRQ series and reuse | ||
626 | * vectors if possible. | ||
627 | */ | ||
628 | int gsi_irq_sharing(int gsi) | ||
629 | { | ||
630 | int i, tries, vector; | ||
631 | |||
632 | BUG_ON(gsi >= NR_IRQ_VECTORS); | ||
633 | |||
634 | if (platform_legacy_irq(gsi)) | ||
635 | return gsi; | ||
636 | |||
637 | if (gsi_2_irq[gsi] != 0xFF) | ||
638 | return (int)gsi_2_irq[gsi]; | ||
639 | |||
640 | tries = NR_IRQS; | ||
641 | try_again: | ||
642 | vector = assign_irq_vector(gsi); | ||
643 | |||
644 | /* | ||
645 | * Sharing vectors means sharing IRQs, so scan irq_vectors for previous | ||
646 | * use of vector and if found, return that IRQ. However, we never want | ||
647 | * to share legacy IRQs, which usually have a different trigger mode | ||
648 | * than PCI. | ||
649 | */ | ||
650 | for (i = 0; i < NR_IRQS; i++) | ||
651 | if (IO_APIC_VECTOR(i) == vector) | ||
652 | break; | ||
653 | if (platform_legacy_irq(i)) { | ||
654 | if (--tries >= 0) { | ||
655 | IO_APIC_VECTOR(i) = 0; | ||
656 | goto try_again; | ||
657 | } | ||
658 | panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi); | ||
659 | } | ||
660 | if (i < NR_IRQS) { | ||
661 | gsi_2_irq[gsi] = i; | ||
662 | printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n", | ||
663 | gsi, vector, i); | ||
664 | return i; | ||
665 | } | ||
666 | |||
667 | i = next_irq++; | ||
668 | BUG_ON(i >= NR_IRQS); | ||
669 | gsi_2_irq[gsi] = i; | ||
670 | IO_APIC_VECTOR(i) = vector; | ||
671 | printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n", | ||
672 | gsi, vector, i); | ||
673 | return i; | ||
674 | } | ||
675 | |||
613 | static int pin_2_irq(int idx, int apic, int pin) | 676 | static int pin_2_irq(int idx, int apic, int pin) |
614 | { | 677 | { |
615 | int irq, i; | 678 | int irq, i; |
@@ -639,6 +702,7 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
639 | while (i < apic) | 702 | while (i < apic) |
640 | irq += nr_ioapic_registers[i++]; | 703 | irq += nr_ioapic_registers[i++]; |
641 | irq += pin; | 704 | irq += pin; |
705 | irq = gsi_irq_sharing(irq); | ||
642 | break; | 706 | break; |
643 | } | 707 | } |
644 | default: | 708 | default: |
@@ -648,6 +712,7 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
648 | break; | 712 | break; |
649 | } | 713 | } |
650 | } | 714 | } |
715 | BUG_ON(irq >= NR_IRQS); | ||
651 | 716 | ||
652 | /* | 717 | /* |
653 | * PCI IRQ command line redirection. Yes, limits are hardcoded. | 718 | * PCI IRQ command line redirection. Yes, limits are hardcoded. |
@@ -663,6 +728,7 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
663 | } | 728 | } |
664 | } | 729 | } |
665 | } | 730 | } |
731 | BUG_ON(irq >= NR_IRQS); | ||
666 | return irq; | 732 | return irq; |
667 | } | 733 | } |
668 | 734 | ||
@@ -690,8 +756,8 @@ int assign_irq_vector(int irq) | |||
690 | { | 756 | { |
691 | static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; | 757 | static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; |
692 | 758 | ||
693 | BUG_ON(irq >= NR_IRQ_VECTORS); | 759 | BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); |
694 | if (IO_APIC_VECTOR(irq) > 0) | 760 | if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) |
695 | return IO_APIC_VECTOR(irq); | 761 | return IO_APIC_VECTOR(irq); |
696 | next: | 762 | next: |
697 | current_vector += 8; | 763 | current_vector += 8; |
@@ -699,9 +765,8 @@ next: | |||
699 | goto next; | 765 | goto next; |
700 | 766 | ||
701 | if (current_vector >= FIRST_SYSTEM_VECTOR) { | 767 | if (current_vector >= FIRST_SYSTEM_VECTOR) { |
702 | offset++; | 768 | /* If we run out of vectors on large boxen, must share them. */ |
703 | if (!(offset%8)) | 769 | offset = (offset + 1) % 8; |
704 | return -ENOSPC; | ||
705 | current_vector = FIRST_DEVICE_VECTOR + offset; | 770 | current_vector = FIRST_DEVICE_VECTOR + offset; |
706 | } | 771 | } |
707 | 772 | ||
@@ -1917,6 +1982,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a | |||
1917 | entry.polarity = active_high_low; | 1982 | entry.polarity = active_high_low; |
1918 | entry.mask = 1; /* Disabled (masked) */ | 1983 | entry.mask = 1; /* Disabled (masked) */ |
1919 | 1984 | ||
1985 | irq = gsi_irq_sharing(irq); | ||
1920 | /* | 1986 | /* |
1921 | * IRQs < 16 are already in the irq_2_pin[] map | 1987 | * IRQs < 16 are already in the irq_2_pin[] map |
1922 | */ | 1988 | */ |
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 69541db5ff2c..183dc6105429 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c | |||
@@ -37,7 +37,7 @@ static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; | |||
37 | static unsigned long console_logged; | 37 | static unsigned long console_logged; |
38 | static int notify_user; | 38 | static int notify_user; |
39 | static int rip_msr; | 39 | static int rip_msr; |
40 | static int mce_bootlog; | 40 | static int mce_bootlog = 1; |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * Lockless MCE logging infrastructure. | 43 | * Lockless MCE logging infrastructure. |
@@ -347,7 +347,11 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
347 | /* disable GART TBL walk error reporting, which trips off | 347 | /* disable GART TBL walk error reporting, which trips off |
348 | incorrectly with the IOMMU & 3ware & Cerberus. */ | 348 | incorrectly with the IOMMU & 3ware & Cerberus. */ |
349 | clear_bit(10, &bank[4]); | 349 | clear_bit(10, &bank[4]); |
350 | /* Lots of broken BIOS around that don't clear them | ||
351 | by default and leave crap in there. Don't log. */ | ||
352 | mce_bootlog = 0; | ||
350 | } | 353 | } |
354 | |||
351 | } | 355 | } |
352 | 356 | ||
353 | static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) | 357 | static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) |
@@ -356,6 +360,9 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) | |||
356 | case X86_VENDOR_INTEL: | 360 | case X86_VENDOR_INTEL: |
357 | mce_intel_feature_init(c); | 361 | mce_intel_feature_init(c); |
358 | break; | 362 | break; |
363 | case X86_VENDOR_AMD: | ||
364 | mce_amd_feature_init(c); | ||
365 | break; | ||
359 | default: | 366 | default: |
360 | break; | 367 | break; |
361 | } | 368 | } |
@@ -495,16 +502,16 @@ static int __init mcheck_disable(char *str) | |||
495 | /* mce=off disables machine check. Note you can reenable it later | 502 | /* mce=off disables machine check. Note you can reenable it later |
496 | using sysfs. | 503 | using sysfs. |
497 | mce=TOLERANCELEVEL (number, see above) | 504 | mce=TOLERANCELEVEL (number, see above) |
498 | mce=bootlog Log MCEs from before booting. Disabled by default to work | 505 | mce=bootlog Log MCEs from before booting. Disabled by default on AMD. |
499 | around buggy BIOS that leave bogus MCEs. */ | 506 | mce=nobootlog Don't log MCEs from before booting. */ |
500 | static int __init mcheck_enable(char *str) | 507 | static int __init mcheck_enable(char *str) |
501 | { | 508 | { |
502 | if (*str == '=') | 509 | if (*str == '=') |
503 | str++; | 510 | str++; |
504 | if (!strcmp(str, "off")) | 511 | if (!strcmp(str, "off")) |
505 | mce_dont_init = 1; | 512 | mce_dont_init = 1; |
506 | else if (!strcmp(str, "bootlog")) | 513 | else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog")) |
507 | mce_bootlog = 1; | 514 | mce_bootlog = str[0] == 'b'; |
508 | else if (isdigit(str[0])) | 515 | else if (isdigit(str[0])) |
509 | get_option(&str, &tolerant); | 516 | get_option(&str, &tolerant); |
510 | else | 517 | else |
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c new file mode 100644 index 000000000000..1f76175ace02 --- /dev/null +++ b/arch/x86_64/kernel/mce_amd.c | |||
@@ -0,0 +1,538 @@ | |||
1 | /* | ||
2 | * (c) 2005 Advanced Micro Devices, Inc. | ||
3 | * Your use of this code is subject to the terms and conditions of the | ||
4 | * GNU general public license version 2. See "COPYING" or | ||
5 | * http://www.gnu.org/licenses/gpl.html | ||
6 | * | ||
7 | * Written by Jacob Shin - AMD, Inc. | ||
8 | * | ||
9 | * Support : jacob.shin@amd.com | ||
10 | * | ||
11 | * MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F. | ||
12 | * MC4_MISC0 exists per physical processor. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #include <linux/cpu.h> | ||
17 | #include <linux/errno.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/interrupt.h> | ||
20 | #include <linux/kobject.h> | ||
21 | #include <linux/notifier.h> | ||
22 | #include <linux/sched.h> | ||
23 | #include <linux/smp.h> | ||
24 | #include <linux/sysdev.h> | ||
25 | #include <linux/sysfs.h> | ||
26 | #include <asm/apic.h> | ||
27 | #include <asm/mce.h> | ||
28 | #include <asm/msr.h> | ||
29 | #include <asm/percpu.h> | ||
30 | |||
31 | #define PFX "mce_threshold: " | ||
32 | #define VERSION "version 1.00.9" | ||
33 | #define NR_BANKS 5 | ||
34 | #define THRESHOLD_MAX 0xFFF | ||
35 | #define INT_TYPE_APIC 0x00020000 | ||
36 | #define MASK_VALID_HI 0x80000000 | ||
37 | #define MASK_LVTOFF_HI 0x00F00000 | ||
38 | #define MASK_COUNT_EN_HI 0x00080000 | ||
39 | #define MASK_INT_TYPE_HI 0x00060000 | ||
40 | #define MASK_OVERFLOW_HI 0x00010000 | ||
41 | #define MASK_ERR_COUNT_HI 0x00000FFF | ||
42 | #define MASK_OVERFLOW 0x0001000000000000L | ||
43 | |||
44 | struct threshold_bank { | ||
45 | unsigned int cpu; | ||
46 | u8 bank; | ||
47 | u8 interrupt_enable; | ||
48 | u16 threshold_limit; | ||
49 | struct kobject kobj; | ||
50 | }; | ||
51 | |||
52 | static struct threshold_bank threshold_defaults = { | ||
53 | .interrupt_enable = 0, | ||
54 | .threshold_limit = THRESHOLD_MAX, | ||
55 | }; | ||
56 | |||
57 | #ifdef CONFIG_SMP | ||
58 | static unsigned char shared_bank[NR_BANKS] = { | ||
59 | 0, 0, 0, 0, 1 | ||
60 | }; | ||
61 | #endif | ||
62 | |||
63 | static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ | ||
64 | |||
65 | /* | ||
66 | * CPU Initialization | ||
67 | */ | ||
68 | |||
69 | /* must be called with correct cpu affinity */ | ||
70 | static void threshold_restart_bank(struct threshold_bank *b, | ||
71 | int reset, u16 old_limit) | ||
72 | { | ||
73 | u32 mci_misc_hi, mci_misc_lo; | ||
74 | |||
75 | rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi); | ||
76 | |||
77 | if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) | ||
78 | reset = 1; /* limit cannot be lower than err count */ | ||
79 | |||
80 | if (reset) { /* reset err count and overflow bit */ | ||
81 | mci_misc_hi = | ||
82 | (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | | ||
83 | (THRESHOLD_MAX - b->threshold_limit); | ||
84 | } else if (old_limit) { /* change limit w/o reset */ | ||
85 | int new_count = (mci_misc_hi & THRESHOLD_MAX) + | ||
86 | (old_limit - b->threshold_limit); | ||
87 | mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | | ||
88 | (new_count & THRESHOLD_MAX); | ||
89 | } | ||
90 | |||
91 | b->interrupt_enable ? | ||
92 | (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : | ||
93 | (mci_misc_hi &= ~MASK_INT_TYPE_HI); | ||
94 | |||
95 | mci_misc_hi |= MASK_COUNT_EN_HI; | ||
96 | wrmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi); | ||
97 | } | ||
98 | |||
99 | void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) | ||
100 | { | ||
101 | int bank; | ||
102 | u32 mci_misc_lo, mci_misc_hi; | ||
103 | unsigned int cpu = smp_processor_id(); | ||
104 | |||
105 | for (bank = 0; bank < NR_BANKS; ++bank) { | ||
106 | rdmsr(MSR_IA32_MC0_MISC + bank * 4, mci_misc_lo, mci_misc_hi); | ||
107 | |||
108 | /* !valid, !counter present, bios locked */ | ||
109 | if (!(mci_misc_hi & MASK_VALID_HI) || | ||
110 | !(mci_misc_hi & MASK_VALID_HI >> 1) || | ||
111 | (mci_misc_hi & MASK_VALID_HI >> 2)) | ||
112 | continue; | ||
113 | |||
114 | per_cpu(bank_map, cpu) |= (1 << bank); | ||
115 | |||
116 | #ifdef CONFIG_SMP | ||
117 | if (shared_bank[bank] && cpu_core_id[cpu]) | ||
118 | continue; | ||
119 | #endif | ||
120 | |||
121 | setup_threshold_lvt((mci_misc_hi & MASK_LVTOFF_HI) >> 20); | ||
122 | threshold_defaults.cpu = cpu; | ||
123 | threshold_defaults.bank = bank; | ||
124 | threshold_restart_bank(&threshold_defaults, 0, 0); | ||
125 | } | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * APIC Interrupt Handler | ||
130 | */ | ||
131 | |||
132 | /* | ||
133 | * threshold interrupt handler will service THRESHOLD_APIC_VECTOR. | ||
134 | * the interrupt goes off when error_count reaches threshold_limit. | ||
135 | * the handler will simply log mcelog w/ software defined bank number. | ||
136 | */ | ||
137 | asmlinkage void mce_threshold_interrupt(void) | ||
138 | { | ||
139 | int bank; | ||
140 | struct mce m; | ||
141 | |||
142 | ack_APIC_irq(); | ||
143 | irq_enter(); | ||
144 | |||
145 | memset(&m, 0, sizeof(m)); | ||
146 | rdtscll(m.tsc); | ||
147 | m.cpu = smp_processor_id(); | ||
148 | |||
149 | /* assume first bank caused it */ | ||
150 | for (bank = 0; bank < NR_BANKS; ++bank) { | ||
151 | m.bank = MCE_THRESHOLD_BASE + bank; | ||
152 | rdmsrl(MSR_IA32_MC0_MISC + bank * 4, m.misc); | ||
153 | |||
154 | if (m.misc & MASK_OVERFLOW) { | ||
155 | mce_log(&m); | ||
156 | goto out; | ||
157 | } | ||
158 | } | ||
159 | out: | ||
160 | irq_exit(); | ||
161 | } | ||
162 | |||
163 | /* | ||
164 | * Sysfs Interface | ||
165 | */ | ||
166 | |||
167 | static struct sysdev_class threshold_sysclass = { | ||
168 | set_kset_name("threshold"), | ||
169 | }; | ||
170 | |||
171 | static DEFINE_PER_CPU(struct sys_device, device_threshold); | ||
172 | |||
173 | struct threshold_attr { | ||
174 | struct attribute attr; | ||
175 | ssize_t(*show) (struct threshold_bank *, char *); | ||
176 | ssize_t(*store) (struct threshold_bank *, const char *, size_t count); | ||
177 | }; | ||
178 | |||
179 | static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); | ||
180 | |||
181 | static cpumask_t affinity_set(unsigned int cpu) | ||
182 | { | ||
183 | cpumask_t oldmask = current->cpus_allowed; | ||
184 | cpumask_t newmask = CPU_MASK_NONE; | ||
185 | cpu_set(cpu, newmask); | ||
186 | set_cpus_allowed(current, newmask); | ||
187 | return oldmask; | ||
188 | } | ||
189 | |||
190 | static void affinity_restore(cpumask_t oldmask) | ||
191 | { | ||
192 | set_cpus_allowed(current, oldmask); | ||
193 | } | ||
194 | |||
195 | #define SHOW_FIELDS(name) \ | ||
196 | static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \ | ||
197 | { \ | ||
198 | return sprintf(buf, "%lx\n", (unsigned long) b->name); \ | ||
199 | } | ||
200 | SHOW_FIELDS(interrupt_enable) | ||
201 | SHOW_FIELDS(threshold_limit) | ||
202 | |||
203 | static ssize_t store_interrupt_enable(struct threshold_bank *b, | ||
204 | const char *buf, size_t count) | ||
205 | { | ||
206 | char *end; | ||
207 | cpumask_t oldmask; | ||
208 | unsigned long new = simple_strtoul(buf, &end, 0); | ||
209 | if (end == buf) | ||
210 | return -EINVAL; | ||
211 | b->interrupt_enable = !!new; | ||
212 | |||
213 | oldmask = affinity_set(b->cpu); | ||
214 | threshold_restart_bank(b, 0, 0); | ||
215 | affinity_restore(oldmask); | ||
216 | |||
217 | return end - buf; | ||
218 | } | ||
219 | |||
220 | static ssize_t store_threshold_limit(struct threshold_bank *b, | ||
221 | const char *buf, size_t count) | ||
222 | { | ||
223 | char *end; | ||
224 | cpumask_t oldmask; | ||
225 | u16 old; | ||
226 | unsigned long new = simple_strtoul(buf, &end, 0); | ||
227 | if (end == buf) | ||
228 | return -EINVAL; | ||
229 | if (new > THRESHOLD_MAX) | ||
230 | new = THRESHOLD_MAX; | ||
231 | if (new < 1) | ||
232 | new = 1; | ||
233 | old = b->threshold_limit; | ||
234 | b->threshold_limit = new; | ||
235 | |||
236 | oldmask = affinity_set(b->cpu); | ||
237 | threshold_restart_bank(b, 0, old); | ||
238 | affinity_restore(oldmask); | ||
239 | |||
240 | return end - buf; | ||
241 | } | ||
242 | |||
243 | static ssize_t show_error_count(struct threshold_bank *b, char *buf) | ||
244 | { | ||
245 | u32 high, low; | ||
246 | cpumask_t oldmask; | ||
247 | oldmask = affinity_set(b->cpu); | ||
248 | rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, low, high); /* ignore low 32 */ | ||
249 | affinity_restore(oldmask); | ||
250 | return sprintf(buf, "%x\n", | ||
251 | (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); | ||
252 | } | ||
253 | |||
254 | static ssize_t store_error_count(struct threshold_bank *b, | ||
255 | const char *buf, size_t count) | ||
256 | { | ||
257 | cpumask_t oldmask; | ||
258 | oldmask = affinity_set(b->cpu); | ||
259 | threshold_restart_bank(b, 1, 0); | ||
260 | affinity_restore(oldmask); | ||
261 | return 1; | ||
262 | } | ||
263 | |||
264 | #define THRESHOLD_ATTR(_name,_mode,_show,_store) { \ | ||
265 | .attr = {.name = __stringify(_name), .mode = _mode }, \ | ||
266 | .show = _show, \ | ||
267 | .store = _store, \ | ||
268 | }; | ||
269 | |||
270 | #define ATTR_FIELDS(name) \ | ||
271 | static struct threshold_attr name = \ | ||
272 | THRESHOLD_ATTR(name, 0644, show_## name, store_## name) | ||
273 | |||
274 | ATTR_FIELDS(interrupt_enable); | ||
275 | ATTR_FIELDS(threshold_limit); | ||
276 | ATTR_FIELDS(error_count); | ||
277 | |||
278 | static struct attribute *default_attrs[] = { | ||
279 | &interrupt_enable.attr, | ||
280 | &threshold_limit.attr, | ||
281 | &error_count.attr, | ||
282 | NULL | ||
283 | }; | ||
284 | |||
285 | #define to_bank(k) container_of(k,struct threshold_bank,kobj) | ||
286 | #define to_attr(a) container_of(a,struct threshold_attr,attr) | ||
287 | |||
288 | static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) | ||
289 | { | ||
290 | struct threshold_bank *b = to_bank(kobj); | ||
291 | struct threshold_attr *a = to_attr(attr); | ||
292 | ssize_t ret; | ||
293 | ret = a->show ? a->show(b, buf) : -EIO; | ||
294 | return ret; | ||
295 | } | ||
296 | |||
297 | static ssize_t store(struct kobject *kobj, struct attribute *attr, | ||
298 | const char *buf, size_t count) | ||
299 | { | ||
300 | struct threshold_bank *b = to_bank(kobj); | ||
301 | struct threshold_attr *a = to_attr(attr); | ||
302 | ssize_t ret; | ||
303 | ret = a->store ? a->store(b, buf, count) : -EIO; | ||
304 | return ret; | ||
305 | } | ||
306 | |||
307 | static struct sysfs_ops threshold_ops = { | ||
308 | .show = show, | ||
309 | .store = store, | ||
310 | }; | ||
311 | |||
312 | static struct kobj_type threshold_ktype = { | ||
313 | .sysfs_ops = &threshold_ops, | ||
314 | .default_attrs = default_attrs, | ||
315 | }; | ||
316 | |||
317 | /* symlinks sibling shared banks to first core. first core owns dir/files. */ | ||
318 | static __cpuinit int threshold_create_bank(unsigned int cpu, int bank) | ||
319 | { | ||
320 | int err = 0; | ||
321 | struct threshold_bank *b = 0; | ||
322 | |||
323 | #ifdef CONFIG_SMP | ||
324 | if (cpu_core_id[cpu] && shared_bank[bank]) { /* symlink */ | ||
325 | char name[16]; | ||
326 | unsigned lcpu = first_cpu(cpu_core_map[cpu]); | ||
327 | if (cpu_core_id[lcpu]) | ||
328 | goto out; /* first core not up yet */ | ||
329 | |||
330 | b = per_cpu(threshold_banks, lcpu)[bank]; | ||
331 | if (!b) | ||
332 | goto out; | ||
333 | sprintf(name, "bank%i", bank); | ||
334 | err = sysfs_create_link(&per_cpu(device_threshold, cpu).kobj, | ||
335 | &b->kobj, name); | ||
336 | if (err) | ||
337 | goto out; | ||
338 | per_cpu(threshold_banks, cpu)[bank] = b; | ||
339 | goto out; | ||
340 | } | ||
341 | #endif | ||
342 | |||
343 | b = kmalloc(sizeof(struct threshold_bank), GFP_KERNEL); | ||
344 | if (!b) { | ||
345 | err = -ENOMEM; | ||
346 | goto out; | ||
347 | } | ||
348 | memset(b, 0, sizeof(struct threshold_bank)); | ||
349 | |||
350 | b->cpu = cpu; | ||
351 | b->bank = bank; | ||
352 | b->interrupt_enable = 0; | ||
353 | b->threshold_limit = THRESHOLD_MAX; | ||
354 | kobject_set_name(&b->kobj, "bank%i", bank); | ||
355 | b->kobj.parent = &per_cpu(device_threshold, cpu).kobj; | ||
356 | b->kobj.ktype = &threshold_ktype; | ||
357 | |||
358 | err = kobject_register(&b->kobj); | ||
359 | if (err) { | ||
360 | kfree(b); | ||
361 | goto out; | ||
362 | } | ||
363 | per_cpu(threshold_banks, cpu)[bank] = b; | ||
364 | out: | ||
365 | return err; | ||
366 | } | ||
367 | |||
368 | /* create dir/files for all valid threshold banks */ | ||
369 | static __cpuinit int threshold_create_device(unsigned int cpu) | ||
370 | { | ||
371 | int bank; | ||
372 | int err = 0; | ||
373 | |||
374 | per_cpu(device_threshold, cpu).id = cpu; | ||
375 | per_cpu(device_threshold, cpu).cls = &threshold_sysclass; | ||
376 | err = sysdev_register(&per_cpu(device_threshold, cpu)); | ||
377 | if (err) | ||
378 | goto out; | ||
379 | |||
380 | for (bank = 0; bank < NR_BANKS; ++bank) { | ||
381 | if (!(per_cpu(bank_map, cpu) & 1 << bank)) | ||
382 | continue; | ||
383 | err = threshold_create_bank(cpu, bank); | ||
384 | if (err) | ||
385 | goto out; | ||
386 | } | ||
387 | out: | ||
388 | return err; | ||
389 | } | ||
390 | |||
391 | #ifdef CONFIG_HOTPLUG_CPU | ||
392 | /* | ||
393 | * let's be hotplug friendly. | ||
394 | * in case of multiple core processors, the first core always takes ownership | ||
395 | * of shared sysfs dir/files, and rest of the cores will be symlinked to it. | ||
396 | */ | ||
397 | |||
398 | /* cpu hotplug call removes all symlinks before first core dies */ | ||
399 | static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank) | ||
400 | { | ||
401 | struct threshold_bank *b; | ||
402 | char name[16]; | ||
403 | |||
404 | b = per_cpu(threshold_banks, cpu)[bank]; | ||
405 | if (!b) | ||
406 | return; | ||
407 | if (shared_bank[bank] && atomic_read(&b->kobj.kref.refcount) > 2) { | ||
408 | sprintf(name, "bank%i", bank); | ||
409 | sysfs_remove_link(&per_cpu(device_threshold, cpu).kobj, name); | ||
410 | per_cpu(threshold_banks, cpu)[bank] = 0; | ||
411 | } else { | ||
412 | kobject_unregister(&b->kobj); | ||
413 | kfree(per_cpu(threshold_banks, cpu)[bank]); | ||
414 | } | ||
415 | } | ||
416 | |||
417 | static __cpuinit void threshold_remove_device(unsigned int cpu) | ||
418 | { | ||
419 | int bank; | ||
420 | |||
421 | for (bank = 0; bank < NR_BANKS; ++bank) { | ||
422 | if (!(per_cpu(bank_map, cpu) & 1 << bank)) | ||
423 | continue; | ||
424 | threshold_remove_bank(cpu, bank); | ||
425 | } | ||
426 | sysdev_unregister(&per_cpu(device_threshold, cpu)); | ||
427 | } | ||
428 | |||
429 | /* link all existing siblings when first core comes up */ | ||
430 | static __cpuinit int threshold_create_symlinks(unsigned int cpu) | ||
431 | { | ||
432 | int bank, err = 0; | ||
433 | unsigned int lcpu = 0; | ||
434 | |||
435 | if (cpu_core_id[cpu]) | ||
436 | return 0; | ||
437 | for_each_cpu_mask(lcpu, cpu_core_map[cpu]) { | ||
438 | if (lcpu == cpu) | ||
439 | continue; | ||
440 | for (bank = 0; bank < NR_BANKS; ++bank) { | ||
441 | if (!(per_cpu(bank_map, cpu) & 1 << bank)) | ||
442 | continue; | ||
443 | if (!shared_bank[bank]) | ||
444 | continue; | ||
445 | err = threshold_create_bank(lcpu, bank); | ||
446 | } | ||
447 | } | ||
448 | return err; | ||
449 | } | ||
450 | |||
451 | /* remove all symlinks before first core dies. */ | ||
452 | static __cpuinit void threshold_remove_symlinks(unsigned int cpu) | ||
453 | { | ||
454 | int bank; | ||
455 | unsigned int lcpu = 0; | ||
456 | if (cpu_core_id[cpu]) | ||
457 | return; | ||
458 | for_each_cpu_mask(lcpu, cpu_core_map[cpu]) { | ||
459 | if (lcpu == cpu) | ||
460 | continue; | ||
461 | for (bank = 0; bank < NR_BANKS; ++bank) { | ||
462 | if (!(per_cpu(bank_map, cpu) & 1 << bank)) | ||
463 | continue; | ||
464 | if (!shared_bank[bank]) | ||
465 | continue; | ||
466 | threshold_remove_bank(lcpu, bank); | ||
467 | } | ||
468 | } | ||
469 | } | ||
470 | #else /* !CONFIG_HOTPLUG_CPU */ | ||
471 | static __cpuinit void threshold_create_symlinks(unsigned int cpu) | ||
472 | { | ||
473 | } | ||
474 | static __cpuinit void threshold_remove_symlinks(unsigned int cpu) | ||
475 | { | ||
476 | } | ||
477 | static void threshold_remove_device(unsigned int cpu) | ||
478 | { | ||
479 | } | ||
480 | #endif | ||
481 | |||
482 | /* get notified when a cpu comes on/off */ | ||
483 | static __cpuinit int threshold_cpu_callback(struct notifier_block *nfb, | ||
484 | unsigned long action, void *hcpu) | ||
485 | { | ||
486 | /* cpu was unsigned int to begin with */ | ||
487 | unsigned int cpu = (unsigned long)hcpu; | ||
488 | |||
489 | if (cpu >= NR_CPUS) | ||
490 | goto out; | ||
491 | |||
492 | switch (action) { | ||
493 | case CPU_ONLINE: | ||
494 | threshold_create_device(cpu); | ||
495 | threshold_create_symlinks(cpu); | ||
496 | break; | ||
497 | case CPU_DOWN_PREPARE: | ||
498 | threshold_remove_symlinks(cpu); | ||
499 | break; | ||
500 | case CPU_DOWN_FAILED: | ||
501 | threshold_create_symlinks(cpu); | ||
502 | break; | ||
503 | case CPU_DEAD: | ||
504 | threshold_remove_device(cpu); | ||
505 | break; | ||
506 | default: | ||
507 | break; | ||
508 | } | ||
509 | out: | ||
510 | return NOTIFY_OK; | ||
511 | } | ||
512 | |||
513 | static struct notifier_block threshold_cpu_notifier = { | ||
514 | .notifier_call = threshold_cpu_callback, | ||
515 | }; | ||
516 | |||
517 | static __init int threshold_init_device(void) | ||
518 | { | ||
519 | int err; | ||
520 | int lcpu = 0; | ||
521 | |||
522 | err = sysdev_class_register(&threshold_sysclass); | ||
523 | if (err) | ||
524 | goto out; | ||
525 | |||
526 | /* to hit CPUs online before the notifier is up */ | ||
527 | for_each_online_cpu(lcpu) { | ||
528 | err = threshold_create_device(lcpu); | ||
529 | if (err) | ||
530 | goto out; | ||
531 | } | ||
532 | register_cpu_notifier(&threshold_cpu_notifier); | ||
533 | |||
534 | out: | ||
535 | return err; | ||
536 | } | ||
537 | |||
538 | device_initcall(threshold_init_device); | ||
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index f16d38d09daf..1105250bf02c 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c | |||
@@ -42,7 +42,7 @@ int acpi_found_madt; | |||
42 | * Various Linux-internal data structures created from the | 42 | * Various Linux-internal data structures created from the |
43 | * MP-table. | 43 | * MP-table. |
44 | */ | 44 | */ |
45 | int apic_version [MAX_APICS]; | 45 | unsigned char apic_version [MAX_APICS]; |
46 | unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; | 46 | unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; |
47 | int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; | 47 | int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; |
48 | 48 | ||
@@ -65,7 +65,9 @@ unsigned long mp_lapic_addr = 0; | |||
65 | /* Processor that is doing the boot up */ | 65 | /* Processor that is doing the boot up */ |
66 | unsigned int boot_cpu_id = -1U; | 66 | unsigned int boot_cpu_id = -1U; |
67 | /* Internal processor count */ | 67 | /* Internal processor count */ |
68 | static unsigned int num_processors = 0; | 68 | unsigned int num_processors __initdata = 0; |
69 | |||
70 | unsigned disabled_cpus __initdata; | ||
69 | 71 | ||
70 | /* Bitmask of physically existing CPUs */ | 72 | /* Bitmask of physically existing CPUs */ |
71 | physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; | 73 | physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; |
@@ -106,11 +108,14 @@ static int __init mpf_checksum(unsigned char *mp, int len) | |||
106 | 108 | ||
107 | static void __init MP_processor_info (struct mpc_config_processor *m) | 109 | static void __init MP_processor_info (struct mpc_config_processor *m) |
108 | { | 110 | { |
109 | int ver, cpu; | 111 | int cpu; |
112 | unsigned char ver; | ||
110 | static int found_bsp=0; | 113 | static int found_bsp=0; |
111 | 114 | ||
112 | if (!(m->mpc_cpuflag & CPU_ENABLED)) | 115 | if (!(m->mpc_cpuflag & CPU_ENABLED)) { |
116 | disabled_cpus++; | ||
113 | return; | 117 | return; |
118 | } | ||
114 | 119 | ||
115 | printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n", | 120 | printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n", |
116 | m->mpc_apicid, | 121 | m->mpc_apicid, |
@@ -129,12 +134,14 @@ static void __init MP_processor_info (struct mpc_config_processor *m) | |||
129 | } | 134 | } |
130 | 135 | ||
131 | cpu = num_processors++; | 136 | cpu = num_processors++; |
132 | 137 | ||
133 | if (m->mpc_apicid > MAX_APICS) { | 138 | #if MAX_APICS < 255 |
139 | if ((int)m->mpc_apicid > MAX_APICS) { | ||
134 | printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", | 140 | printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", |
135 | m->mpc_apicid, MAX_APICS); | 141 | m->mpc_apicid, MAX_APICS); |
136 | return; | 142 | return; |
137 | } | 143 | } |
144 | #endif | ||
138 | ver = m->mpc_apicver; | 145 | ver = m->mpc_apicver; |
139 | 146 | ||
140 | physid_set(m->mpc_apicid, phys_cpu_present_map); | 147 | physid_set(m->mpc_apicid, phys_cpu_present_map); |
@@ -218,7 +225,7 @@ static void __init MP_intsrc_info (struct mpc_config_intsrc *m) | |||
218 | m->mpc_irqtype, m->mpc_irqflag & 3, | 225 | m->mpc_irqtype, m->mpc_irqflag & 3, |
219 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, | 226 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, |
220 | m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); | 227 | m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); |
221 | if (++mp_irq_entries == MAX_IRQ_SOURCES) | 228 | if (++mp_irq_entries >= MAX_IRQ_SOURCES) |
222 | panic("Max # of irq sources exceeded!!\n"); | 229 | panic("Max # of irq sources exceeded!!\n"); |
223 | } | 230 | } |
224 | 231 | ||
@@ -549,7 +556,7 @@ void __init get_smp_config (void) | |||
549 | * Read the physical hardware table. Anything here will | 556 | * Read the physical hardware table. Anything here will |
550 | * override the defaults. | 557 | * override the defaults. |
551 | */ | 558 | */ |
552 | if (!smp_read_mpc((void *)(unsigned long)mpf->mpf_physptr)) { | 559 | if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) { |
553 | smp_found_config = 0; | 560 | smp_found_config = 0; |
554 | printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); | 561 | printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); |
555 | printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); | 562 | printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); |
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 88be97c96987..2e28e855ec3c 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c | |||
@@ -220,6 +220,12 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
220 | uses the normal dma_mask for alloc_coherent. */ | 220 | uses the normal dma_mask for alloc_coherent. */ |
221 | dma_mask &= *dev->dma_mask; | 221 | dma_mask &= *dev->dma_mask; |
222 | 222 | ||
223 | /* Why <=? Even when the mask is smaller than 4GB it is often larger | ||
224 | than 16MB and in this case we have a chance of finding fitting memory | ||
225 | in the next higher zone first. If not retry with true GFP_DMA. -AK */ | ||
226 | if (dma_mask <= 0xffffffff) | ||
227 | gfp |= GFP_DMA32; | ||
228 | |||
223 | again: | 229 | again: |
224 | memory = dma_alloc_pages(dev, gfp, get_order(size)); | 230 | memory = dma_alloc_pages(dev, gfp, get_order(size)); |
225 | if (memory == NULL) | 231 | if (memory == NULL) |
@@ -245,7 +251,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
245 | } | 251 | } |
246 | 252 | ||
247 | if (!(gfp & GFP_DMA)) { | 253 | if (!(gfp & GFP_DMA)) { |
248 | gfp |= GFP_DMA; | 254 | gfp = (gfp & ~GFP_DMA32) | GFP_DMA; |
249 | goto again; | 255 | goto again; |
250 | } | 256 | } |
251 | return NULL; | 257 | return NULL; |
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 59be85d9a4bc..5afd63e8cef7 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c | |||
@@ -144,7 +144,8 @@ void cpu_idle_wait(void) | |||
144 | do { | 144 | do { |
145 | ssleep(1); | 145 | ssleep(1); |
146 | for_each_online_cpu(cpu) { | 146 | for_each_online_cpu(cpu) { |
147 | if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) | 147 | if (cpu_isset(cpu, map) && |
148 | !per_cpu(cpu_idle_state, cpu)) | ||
148 | cpu_clear(cpu, map); | 149 | cpu_clear(cpu, map); |
149 | } | 150 | } |
150 | cpus_and(map, map, cpu_online_map); | 151 | cpus_and(map, map, cpu_online_map); |
@@ -275,7 +276,8 @@ void __show_regs(struct pt_regs * regs) | |||
275 | system_utsname.version); | 276 | system_utsname.version); |
276 | printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); | 277 | printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); |
277 | printk_address(regs->rip); | 278 | printk_address(regs->rip); |
278 | printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags); | 279 | printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, |
280 | regs->eflags); | ||
279 | printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", | 281 | printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", |
280 | regs->rax, regs->rbx, regs->rcx); | 282 | regs->rax, regs->rbx, regs->rcx); |
281 | printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", | 283 | printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", |
@@ -427,15 +429,14 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, | |||
427 | struct pt_regs * childregs; | 429 | struct pt_regs * childregs; |
428 | struct task_struct *me = current; | 430 | struct task_struct *me = current; |
429 | 431 | ||
430 | childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1; | 432 | childregs = ((struct pt_regs *) |
431 | 433 | (THREAD_SIZE + (unsigned long) p->thread_info)) - 1; | |
432 | *childregs = *regs; | 434 | *childregs = *regs; |
433 | 435 | ||
434 | childregs->rax = 0; | 436 | childregs->rax = 0; |
435 | childregs->rsp = rsp; | 437 | childregs->rsp = rsp; |
436 | if (rsp == ~0UL) { | 438 | if (rsp == ~0UL) |
437 | childregs->rsp = (unsigned long)childregs; | 439 | childregs->rsp = (unsigned long)childregs; |
438 | } | ||
439 | 440 | ||
440 | p->thread.rsp = (unsigned long) childregs; | 441 | p->thread.rsp = (unsigned long) childregs; |
441 | p->thread.rsp0 = (unsigned long) (childregs+1); | 442 | p->thread.rsp0 = (unsigned long) (childregs+1); |
@@ -457,7 +458,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, | |||
457 | p->thread.io_bitmap_max = 0; | 458 | p->thread.io_bitmap_max = 0; |
458 | return -ENOMEM; | 459 | return -ENOMEM; |
459 | } | 460 | } |
460 | memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES); | 461 | memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, |
462 | IO_BITMAP_BYTES); | ||
461 | } | 463 | } |
462 | 464 | ||
463 | /* | 465 | /* |
@@ -494,7 +496,8 @@ out: | |||
494 | * - fold all the options into a flag word and test it with a single test. | 496 | * - fold all the options into a flag word and test it with a single test. |
495 | * - could test fs/gs bitsliced | 497 | * - could test fs/gs bitsliced |
496 | */ | 498 | */ |
497 | struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p) | 499 | struct task_struct * |
500 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
498 | { | 501 | { |
499 | struct thread_struct *prev = &prev_p->thread, | 502 | struct thread_struct *prev = &prev_p->thread, |
500 | *next = &next_p->thread; | 503 | *next = &next_p->thread; |
@@ -565,7 +568,8 @@ struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct * | |||
565 | prev->userrsp = read_pda(oldrsp); | 568 | prev->userrsp = read_pda(oldrsp); |
566 | write_pda(oldrsp, next->userrsp); | 569 | write_pda(oldrsp, next->userrsp); |
567 | write_pda(pcurrent, next_p); | 570 | write_pda(pcurrent, next_p); |
568 | write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET); | 571 | write_pda(kernelstack, |
572 | (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET); | ||
569 | 573 | ||
570 | /* | 574 | /* |
571 | * Now maybe reload the debug registers | 575 | * Now maybe reload the debug registers |
@@ -646,7 +650,9 @@ asmlinkage long sys_fork(struct pt_regs *regs) | |||
646 | return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL); | 650 | return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL); |
647 | } | 651 | } |
648 | 652 | ||
649 | asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) | 653 | asmlinkage long |
654 | sys_clone(unsigned long clone_flags, unsigned long newsp, | ||
655 | void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) | ||
650 | { | 656 | { |
651 | if (!newsp) | 657 | if (!newsp) |
652 | newsp = regs->rsp; | 658 | newsp = regs->rsp; |
@@ -682,7 +688,8 @@ unsigned long get_wchan(struct task_struct *p) | |||
682 | return 0; | 688 | return 0; |
683 | fp = *(u64 *)(p->thread.rsp); | 689 | fp = *(u64 *)(p->thread.rsp); |
684 | do { | 690 | do { |
685 | if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE) | 691 | if (fp < (unsigned long)stack || |
692 | fp > (unsigned long)stack+THREAD_SIZE) | ||
686 | return 0; | 693 | return 0; |
687 | rip = *(u64 *)(fp+8); | 694 | rip = *(u64 *)(fp+8); |
688 | if (!in_sched_functions(rip)) | 695 | if (!in_sched_functions(rip)) |
@@ -717,8 +724,8 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
717 | task->thread.gsindex = 0; | 724 | task->thread.gsindex = 0; |
718 | task->thread.gs = addr; | 725 | task->thread.gs = addr; |
719 | if (doit) { | 726 | if (doit) { |
720 | load_gs_index(0); | 727 | load_gs_index(0); |
721 | ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); | 728 | ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); |
722 | } | 729 | } |
723 | } | 730 | } |
724 | put_cpu(); | 731 | put_cpu(); |
@@ -735,7 +742,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
735 | set_32bit_tls(task, FS_TLS, addr); | 742 | set_32bit_tls(task, FS_TLS, addr); |
736 | if (doit) { | 743 | if (doit) { |
737 | load_TLS(&task->thread, cpu); | 744 | load_TLS(&task->thread, cpu); |
738 | asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL)); | 745 | asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL)); |
739 | } | 746 | } |
740 | task->thread.fsindex = FS_TLS_SEL; | 747 | task->thread.fsindex = FS_TLS_SEL; |
741 | task->thread.fs = 0; | 748 | task->thread.fs = 0; |
@@ -745,8 +752,8 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
745 | if (doit) { | 752 | if (doit) { |
746 | /* set the selector to 0 to not confuse | 753 | /* set the selector to 0 to not confuse |
747 | __switch_to */ | 754 | __switch_to */ |
748 | asm volatile("movl %0,%%fs" :: "r" (0)); | 755 | asm volatile("movl %0,%%fs" :: "r" (0)); |
749 | ret = checking_wrmsrl(MSR_FS_BASE, addr); | 756 | ret = checking_wrmsrl(MSR_FS_BASE, addr); |
750 | } | 757 | } |
751 | } | 758 | } |
752 | put_cpu(); | 759 | put_cpu(); |
@@ -755,9 +762,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
755 | unsigned long base; | 762 | unsigned long base; |
756 | if (task->thread.fsindex == FS_TLS_SEL) | 763 | if (task->thread.fsindex == FS_TLS_SEL) |
757 | base = read_32bit_tls(task, FS_TLS); | 764 | base = read_32bit_tls(task, FS_TLS); |
758 | else if (doit) { | 765 | else if (doit) |
759 | rdmsrl(MSR_FS_BASE, base); | 766 | rdmsrl(MSR_FS_BASE, base); |
760 | } else | 767 | else |
761 | base = task->thread.fs; | 768 | base = task->thread.fs; |
762 | ret = put_user(base, (unsigned long __user *)addr); | 769 | ret = put_user(base, (unsigned long __user *)addr); |
763 | break; | 770 | break; |
@@ -766,9 +773,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
766 | unsigned long base; | 773 | unsigned long base; |
767 | if (task->thread.gsindex == GS_TLS_SEL) | 774 | if (task->thread.gsindex == GS_TLS_SEL) |
768 | base = read_32bit_tls(task, GS_TLS); | 775 | base = read_32bit_tls(task, GS_TLS); |
769 | else if (doit) { | 776 | else if (doit) |
770 | rdmsrl(MSR_KERNEL_GS_BASE, base); | 777 | rdmsrl(MSR_KERNEL_GS_BASE, base); |
771 | } else | 778 | else |
772 | base = task->thread.gs; | 779 | base = task->thread.gs; |
773 | ret = put_user(base, (unsigned long __user *)addr); | 780 | ret = put_user(base, (unsigned long __user *)addr); |
774 | break; | 781 | break; |
diff --git a/arch/x86_64/kernel/reboot.c b/arch/x86_64/kernel/reboot.c index 47f95687905f..75235ed2b31b 100644 --- a/arch/x86_64/kernel/reboot.c +++ b/arch/x86_64/kernel/reboot.c | |||
@@ -77,6 +77,7 @@ static inline void kb_wait(void) | |||
77 | 77 | ||
78 | void machine_shutdown(void) | 78 | void machine_shutdown(void) |
79 | { | 79 | { |
80 | unsigned long flags; | ||
80 | /* Stop the cpus and apics */ | 81 | /* Stop the cpus and apics */ |
81 | #ifdef CONFIG_SMP | 82 | #ifdef CONFIG_SMP |
82 | int reboot_cpu_id; | 83 | int reboot_cpu_id; |
@@ -98,7 +99,7 @@ void machine_shutdown(void) | |||
98 | smp_send_stop(); | 99 | smp_send_stop(); |
99 | #endif | 100 | #endif |
100 | 101 | ||
101 | local_irq_disable(); | 102 | local_irq_save(flags); |
102 | 103 | ||
103 | #ifndef CONFIG_SMP | 104 | #ifndef CONFIG_SMP |
104 | disable_local_APIC(); | 105 | disable_local_APIC(); |
@@ -106,7 +107,7 @@ void machine_shutdown(void) | |||
106 | 107 | ||
107 | disable_IO_APIC(); | 108 | disable_IO_APIC(); |
108 | 109 | ||
109 | local_irq_enable(); | 110 | local_irq_restore(flags); |
110 | } | 111 | } |
111 | 112 | ||
112 | void machine_emergency_restart(void) | 113 | void machine_emergency_restart(void) |
@@ -120,7 +121,7 @@ void machine_emergency_restart(void) | |||
120 | /* Could also try the reset bit in the Hammer NB */ | 121 | /* Could also try the reset bit in the Hammer NB */ |
121 | switch (reboot_type) { | 122 | switch (reboot_type) { |
122 | case BOOT_KBD: | 123 | case BOOT_KBD: |
123 | for (i=0; i<100; i++) { | 124 | for (i=0; i<10; i++) { |
124 | kb_wait(); | 125 | kb_wait(); |
125 | udelay(50); | 126 | udelay(50); |
126 | outb(0xfe,0x64); /* pulse reset low */ | 127 | outb(0xfe,0x64); /* pulse reset low */ |
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index da0bc3e7bdf5..750e01dcbdf4 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c | |||
@@ -60,6 +60,7 @@ | |||
60 | #include <asm/setup.h> | 60 | #include <asm/setup.h> |
61 | #include <asm/mach_apic.h> | 61 | #include <asm/mach_apic.h> |
62 | #include <asm/numa.h> | 62 | #include <asm/numa.h> |
63 | #include <asm/sections.h> | ||
63 | 64 | ||
64 | /* | 65 | /* |
65 | * Machine setup.. | 66 | * Machine setup.. |
@@ -103,7 +104,6 @@ struct edid_info edid_info; | |||
103 | struct e820map e820; | 104 | struct e820map e820; |
104 | 105 | ||
105 | extern int root_mountflags; | 106 | extern int root_mountflags; |
106 | extern char _text, _etext, _edata, _end; | ||
107 | 107 | ||
108 | char command_line[COMMAND_LINE_SIZE]; | 108 | char command_line[COMMAND_LINE_SIZE]; |
109 | 109 | ||
@@ -412,7 +412,6 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
412 | { | 412 | { |
413 | unsigned long bootmap_size, bootmap; | 413 | unsigned long bootmap_size, bootmap; |
414 | 414 | ||
415 | memory_present(0, start_pfn, end_pfn); | ||
416 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; | 415 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; |
417 | bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); | 416 | bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); |
418 | if (bootmap == -1L) | 417 | if (bootmap == -1L) |
@@ -571,6 +570,8 @@ void __init setup_arch(char **cmdline_p) | |||
571 | 570 | ||
572 | init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); | 571 | init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); |
573 | 572 | ||
573 | zap_low_mappings(0); | ||
574 | |||
574 | #ifdef CONFIG_ACPI | 575 | #ifdef CONFIG_ACPI |
575 | /* | 576 | /* |
576 | * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). | 577 | * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). |
@@ -657,8 +658,6 @@ void __init setup_arch(char **cmdline_p) | |||
657 | } | 658 | } |
658 | #endif | 659 | #endif |
659 | 660 | ||
660 | sparse_init(); | ||
661 | |||
662 | paging_init(); | 661 | paging_init(); |
663 | 662 | ||
664 | check_ioapic(); | 663 | check_ioapic(); |
@@ -793,7 +792,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) | |||
793 | #endif | 792 | #endif |
794 | 793 | ||
795 | bits = 0; | 794 | bits = 0; |
796 | while ((1 << bits) < c->x86_num_cores) | 795 | while ((1 << bits) < c->x86_max_cores) |
797 | bits++; | 796 | bits++; |
798 | 797 | ||
799 | /* Low order bits define the core id (index of core in socket) */ | 798 | /* Low order bits define the core id (index of core in socket) */ |
@@ -823,10 +822,10 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) | |||
823 | if (!node_online(node)) | 822 | if (!node_online(node)) |
824 | node = nearby_node(apicid); | 823 | node = nearby_node(apicid); |
825 | } | 824 | } |
826 | cpu_to_node[cpu] = node; | 825 | numa_set_node(cpu, node); |
827 | 826 | ||
828 | printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", | 827 | printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", |
829 | cpu, c->x86_num_cores, node, cpu_core_id[cpu]); | 828 | cpu, c->x86_max_cores, node, cpu_core_id[cpu]); |
830 | #endif | 829 | #endif |
831 | #endif | 830 | #endif |
832 | } | 831 | } |
@@ -875,9 +874,9 @@ static int __init init_amd(struct cpuinfo_x86 *c) | |||
875 | display_cacheinfo(c); | 874 | display_cacheinfo(c); |
876 | 875 | ||
877 | if (c->extended_cpuid_level >= 0x80000008) { | 876 | if (c->extended_cpuid_level >= 0x80000008) { |
878 | c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; | 877 | c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; |
879 | if (c->x86_num_cores & (c->x86_num_cores - 1)) | 878 | if (c->x86_max_cores & (c->x86_max_cores - 1)) |
880 | c->x86_num_cores = 1; | 879 | c->x86_max_cores = 1; |
881 | 880 | ||
882 | amd_detect_cmp(c); | 881 | amd_detect_cmp(c); |
883 | } | 882 | } |
@@ -889,54 +888,44 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
889 | { | 888 | { |
890 | #ifdef CONFIG_SMP | 889 | #ifdef CONFIG_SMP |
891 | u32 eax, ebx, ecx, edx; | 890 | u32 eax, ebx, ecx, edx; |
892 | int index_msb, tmp; | 891 | int index_msb, core_bits; |
893 | int cpu = smp_processor_id(); | 892 | int cpu = smp_processor_id(); |
894 | 893 | ||
894 | cpuid(1, &eax, &ebx, &ecx, &edx); | ||
895 | |||
896 | c->apicid = phys_pkg_id(0); | ||
897 | |||
895 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) | 898 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) |
896 | return; | 899 | return; |
897 | 900 | ||
898 | cpuid(1, &eax, &ebx, &ecx, &edx); | ||
899 | smp_num_siblings = (ebx & 0xff0000) >> 16; | 901 | smp_num_siblings = (ebx & 0xff0000) >> 16; |
900 | 902 | ||
901 | if (smp_num_siblings == 1) { | 903 | if (smp_num_siblings == 1) { |
902 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); | 904 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); |
903 | } else if (smp_num_siblings > 1) { | 905 | } else if (smp_num_siblings > 1 ) { |
904 | index_msb = 31; | 906 | |
905 | /* | ||
906 | * At this point we only support two siblings per | ||
907 | * processor package. | ||
908 | */ | ||
909 | if (smp_num_siblings > NR_CPUS) { | 907 | if (smp_num_siblings > NR_CPUS) { |
910 | printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); | 908 | printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); |
911 | smp_num_siblings = 1; | 909 | smp_num_siblings = 1; |
912 | return; | 910 | return; |
913 | } | 911 | } |
914 | tmp = smp_num_siblings; | 912 | |
915 | while ((tmp & 0x80000000 ) == 0) { | 913 | index_msb = get_count_order(smp_num_siblings); |
916 | tmp <<=1 ; | ||
917 | index_msb--; | ||
918 | } | ||
919 | if (smp_num_siblings & (smp_num_siblings - 1)) | ||
920 | index_msb++; | ||
921 | phys_proc_id[cpu] = phys_pkg_id(index_msb); | 914 | phys_proc_id[cpu] = phys_pkg_id(index_msb); |
922 | 915 | ||
923 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | 916 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", |
924 | phys_proc_id[cpu]); | 917 | phys_proc_id[cpu]); |
925 | 918 | ||
926 | smp_num_siblings = smp_num_siblings / c->x86_num_cores; | 919 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; |
927 | 920 | ||
928 | tmp = smp_num_siblings; | 921 | index_msb = get_count_order(smp_num_siblings) ; |
929 | index_msb = 31; | ||
930 | while ((tmp & 0x80000000) == 0) { | ||
931 | tmp <<=1 ; | ||
932 | index_msb--; | ||
933 | } | ||
934 | if (smp_num_siblings & (smp_num_siblings - 1)) | ||
935 | index_msb++; | ||
936 | 922 | ||
937 | cpu_core_id[cpu] = phys_pkg_id(index_msb); | 923 | core_bits = get_count_order(c->x86_max_cores); |
938 | 924 | ||
939 | if (c->x86_num_cores > 1) | 925 | cpu_core_id[cpu] = phys_pkg_id(index_msb) & |
926 | ((1 << core_bits) - 1); | ||
927 | |||
928 | if (c->x86_max_cores > 1) | ||
940 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", | 929 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", |
941 | cpu_core_id[cpu]); | 930 | cpu_core_id[cpu]); |
942 | } | 931 | } |
@@ -975,7 +964,7 @@ static void srat_detect_node(void) | |||
975 | node = apicid_to_node[hard_smp_processor_id()]; | 964 | node = apicid_to_node[hard_smp_processor_id()]; |
976 | if (node == NUMA_NO_NODE) | 965 | if (node == NUMA_NO_NODE) |
977 | node = 0; | 966 | node = 0; |
978 | cpu_to_node[cpu] = node; | 967 | numa_set_node(cpu, node); |
979 | 968 | ||
980 | if (acpi_numa > 0) | 969 | if (acpi_numa > 0) |
981 | printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node); | 970 | printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node); |
@@ -993,13 +982,18 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
993 | unsigned eax = cpuid_eax(0x80000008); | 982 | unsigned eax = cpuid_eax(0x80000008); |
994 | c->x86_virt_bits = (eax >> 8) & 0xff; | 983 | c->x86_virt_bits = (eax >> 8) & 0xff; |
995 | c->x86_phys_bits = eax & 0xff; | 984 | c->x86_phys_bits = eax & 0xff; |
985 | /* CPUID workaround for Intel 0F34 CPU */ | ||
986 | if (c->x86_vendor == X86_VENDOR_INTEL && | ||
987 | c->x86 == 0xF && c->x86_model == 0x3 && | ||
988 | c->x86_mask == 0x4) | ||
989 | c->x86_phys_bits = 36; | ||
996 | } | 990 | } |
997 | 991 | ||
998 | if (c->x86 == 15) | 992 | if (c->x86 == 15) |
999 | c->x86_cache_alignment = c->x86_clflush_size * 2; | 993 | c->x86_cache_alignment = c->x86_clflush_size * 2; |
1000 | if (c->x86 >= 15) | 994 | if (c->x86 >= 15) |
1001 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); | 995 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); |
1002 | c->x86_num_cores = intel_num_cpu_cores(c); | 996 | c->x86_max_cores = intel_num_cpu_cores(c); |
1003 | 997 | ||
1004 | srat_detect_node(); | 998 | srat_detect_node(); |
1005 | } | 999 | } |
@@ -1037,7 +1031,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |||
1037 | c->x86_model_id[0] = '\0'; /* Unset */ | 1031 | c->x86_model_id[0] = '\0'; /* Unset */ |
1038 | c->x86_clflush_size = 64; | 1032 | c->x86_clflush_size = 64; |
1039 | c->x86_cache_alignment = c->x86_clflush_size; | 1033 | c->x86_cache_alignment = c->x86_clflush_size; |
1040 | c->x86_num_cores = 1; | 1034 | c->x86_max_cores = 1; |
1041 | c->extended_cpuid_level = 0; | 1035 | c->extended_cpuid_level = 0; |
1042 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | 1036 | memset(&c->x86_capability, 0, sizeof c->x86_capability); |
1043 | 1037 | ||
@@ -1060,10 +1054,10 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |||
1060 | c->x86 = (tfms >> 8) & 0xf; | 1054 | c->x86 = (tfms >> 8) & 0xf; |
1061 | c->x86_model = (tfms >> 4) & 0xf; | 1055 | c->x86_model = (tfms >> 4) & 0xf; |
1062 | c->x86_mask = tfms & 0xf; | 1056 | c->x86_mask = tfms & 0xf; |
1063 | if (c->x86 == 0xf) { | 1057 | if (c->x86 == 0xf) |
1064 | c->x86 += (tfms >> 20) & 0xff; | 1058 | c->x86 += (tfms >> 20) & 0xff; |
1059 | if (c->x86 >= 0x6) | ||
1065 | c->x86_model += ((tfms >> 16) & 0xF) << 4; | 1060 | c->x86_model += ((tfms >> 16) & 0xF) << 4; |
1066 | } | ||
1067 | if (c->x86_capability[0] & (1<<19)) | 1061 | if (c->x86_capability[0] & (1<<19)) |
1068 | c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; | 1062 | c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; |
1069 | } else { | 1063 | } else { |
@@ -1271,13 +1265,12 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
1271 | seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); | 1265 | seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); |
1272 | 1266 | ||
1273 | #ifdef CONFIG_SMP | 1267 | #ifdef CONFIG_SMP |
1274 | if (smp_num_siblings * c->x86_num_cores > 1) { | 1268 | if (smp_num_siblings * c->x86_max_cores > 1) { |
1275 | int cpu = c - cpu_data; | 1269 | int cpu = c - cpu_data; |
1276 | seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]); | 1270 | seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]); |
1277 | seq_printf(m, "siblings\t: %d\n", | 1271 | seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu])); |
1278 | c->x86_num_cores * smp_num_siblings); | ||
1279 | seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]); | 1272 | seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]); |
1280 | seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores); | 1273 | seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); |
1281 | } | 1274 | } |
1282 | #endif | 1275 | #endif |
1283 | 1276 | ||
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 79190891fbc5..06dc354375c3 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c | |||
@@ -141,7 +141,6 @@ void pda_init(int cpu) | |||
141 | panic("cannot allocate irqstack for cpu %d", cpu); | 141 | panic("cannot allocate irqstack for cpu %d", cpu); |
142 | } | 142 | } |
143 | 143 | ||
144 | asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); | ||
145 | 144 | ||
146 | pda->irqstackptr += IRQSTACKSIZE-64; | 145 | pda->irqstackptr += IRQSTACKSIZE-64; |
147 | } | 146 | } |
@@ -197,6 +196,7 @@ void __cpuinit cpu_init (void) | |||
197 | /* CPU 0 is initialised in head64.c */ | 196 | /* CPU 0 is initialised in head64.c */ |
198 | if (cpu != 0) { | 197 | if (cpu != 0) { |
199 | pda_init(cpu); | 198 | pda_init(cpu); |
199 | zap_low_mappings(cpu); | ||
200 | } else | 200 | } else |
201 | estacks = boot_exception_stacks; | 201 | estacks = boot_exception_stacks; |
202 | 202 | ||
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c index d642fbf3da29..5876df116c92 100644 --- a/arch/x86_64/kernel/signal.c +++ b/arch/x86_64/kernel/signal.c | |||
@@ -110,6 +110,15 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned | |||
110 | COPY(r14); | 110 | COPY(r14); |
111 | COPY(r15); | 111 | COPY(r15); |
112 | 112 | ||
113 | /* Kernel saves and restores only the CS segment register on signals, | ||
114 | * which is the bare minimum needed to allow mixed 32/64-bit code. | ||
115 | * App's signal handler can save/restore other segments if needed. */ | ||
116 | { | ||
117 | unsigned cs; | ||
118 | err |= __get_user(cs, &sc->cs); | ||
119 | regs->cs = cs | 3; /* Force into user mode */ | ||
120 | } | ||
121 | |||
113 | { | 122 | { |
114 | unsigned int tmpflags; | 123 | unsigned int tmpflags; |
115 | err |= __get_user(tmpflags, &sc->eflags); | 124 | err |= __get_user(tmpflags, &sc->eflags); |
@@ -187,6 +196,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned lo | |||
187 | { | 196 | { |
188 | int err = 0; | 197 | int err = 0; |
189 | 198 | ||
199 | err |= __put_user(regs->cs, &sc->cs); | ||
190 | err |= __put_user(0, &sc->gs); | 200 | err |= __put_user(0, &sc->gs); |
191 | err |= __put_user(0, &sc->fs); | 201 | err |= __put_user(0, &sc->fs); |
192 | 202 | ||
@@ -318,7 +328,14 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
318 | 328 | ||
319 | regs->rsp = (unsigned long)frame; | 329 | regs->rsp = (unsigned long)frame; |
320 | 330 | ||
331 | /* Set up the CS register to run signal handlers in 64-bit mode, | ||
332 | even if the handler happens to be interrupting 32-bit code. */ | ||
333 | regs->cs = __USER_CS; | ||
334 | |||
335 | /* This, by contrast, has nothing to do with segment registers - | ||
336 | see include/asm-x86_64/uaccess.h for details. */ | ||
321 | set_fs(USER_DS); | 337 | set_fs(USER_DS); |
338 | |||
322 | regs->eflags &= ~TF_MASK; | 339 | regs->eflags &= ~TF_MASK; |
323 | if (test_thread_flag(TIF_SINGLESTEP)) | 340 | if (test_thread_flag(TIF_SINGLESTEP)) |
324 | ptrace_notify(SIGTRAP); | 341 | ptrace_notify(SIGTRAP); |
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 9db9dda161b4..cfc3d9dccbd9 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c | |||
@@ -28,8 +28,6 @@ | |||
28 | #include <asm/proto.h> | 28 | #include <asm/proto.h> |
29 | #include <asm/apicdef.h> | 29 | #include <asm/apicdef.h> |
30 | 30 | ||
31 | #define __cpuinit __init | ||
32 | |||
33 | /* | 31 | /* |
34 | * Smarter SMP flushing macros. | 32 | * Smarter SMP flushing macros. |
35 | * c/o Linus Torvalds. | 33 | * c/o Linus Torvalds. |
@@ -452,13 +450,14 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, | |||
452 | 450 | ||
453 | void smp_stop_cpu(void) | 451 | void smp_stop_cpu(void) |
454 | { | 452 | { |
453 | unsigned long flags; | ||
455 | /* | 454 | /* |
456 | * Remove this CPU: | 455 | * Remove this CPU: |
457 | */ | 456 | */ |
458 | cpu_clear(smp_processor_id(), cpu_online_map); | 457 | cpu_clear(smp_processor_id(), cpu_online_map); |
459 | local_irq_disable(); | 458 | local_irq_save(flags); |
460 | disable_local_APIC(); | 459 | disable_local_APIC(); |
461 | local_irq_enable(); | 460 | local_irq_restore(flags); |
462 | } | 461 | } |
463 | 462 | ||
464 | static void smp_really_stop_cpu(void *dummy) | 463 | static void smp_really_stop_cpu(void *dummy) |
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index c4e59bbdc187..683c33f7b967 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c | |||
@@ -64,6 +64,7 @@ | |||
64 | int smp_num_siblings = 1; | 64 | int smp_num_siblings = 1; |
65 | /* Package ID of each logical CPU */ | 65 | /* Package ID of each logical CPU */ |
66 | u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; | 66 | u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; |
67 | /* core ID of each logical CPU */ | ||
67 | u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; | 68 | u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; |
68 | 69 | ||
69 | /* Bitmask of currently online CPUs */ | 70 | /* Bitmask of currently online CPUs */ |
@@ -87,7 +88,10 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; | |||
87 | /* Set when the idlers are all forked */ | 88 | /* Set when the idlers are all forked */ |
88 | int smp_threads_ready; | 89 | int smp_threads_ready; |
89 | 90 | ||
91 | /* representing HT siblings of each logical CPU */ | ||
90 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; | 92 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; |
93 | |||
94 | /* representing HT and core siblings of each logical CPU */ | ||
91 | cpumask_t cpu_core_map[NR_CPUS] __read_mostly; | 95 | cpumask_t cpu_core_map[NR_CPUS] __read_mostly; |
92 | EXPORT_SYMBOL(cpu_core_map); | 96 | EXPORT_SYMBOL(cpu_core_map); |
93 | 97 | ||
@@ -434,30 +438,59 @@ void __cpuinit smp_callin(void) | |||
434 | cpu_set(cpuid, cpu_callin_map); | 438 | cpu_set(cpuid, cpu_callin_map); |
435 | } | 439 | } |
436 | 440 | ||
441 | /* representing cpus for which sibling maps can be computed */ | ||
442 | static cpumask_t cpu_sibling_setup_map; | ||
443 | |||
437 | static inline void set_cpu_sibling_map(int cpu) | 444 | static inline void set_cpu_sibling_map(int cpu) |
438 | { | 445 | { |
439 | int i; | 446 | int i; |
447 | struct cpuinfo_x86 *c = cpu_data; | ||
448 | |||
449 | cpu_set(cpu, cpu_sibling_setup_map); | ||
440 | 450 | ||
441 | if (smp_num_siblings > 1) { | 451 | if (smp_num_siblings > 1) { |
442 | for_each_cpu(i) { | 452 | for_each_cpu_mask(i, cpu_sibling_setup_map) { |
443 | if (cpu_core_id[cpu] == cpu_core_id[i]) { | 453 | if (phys_proc_id[cpu] == phys_proc_id[i] && |
454 | cpu_core_id[cpu] == cpu_core_id[i]) { | ||
444 | cpu_set(i, cpu_sibling_map[cpu]); | 455 | cpu_set(i, cpu_sibling_map[cpu]); |
445 | cpu_set(cpu, cpu_sibling_map[i]); | 456 | cpu_set(cpu, cpu_sibling_map[i]); |
457 | cpu_set(i, cpu_core_map[cpu]); | ||
458 | cpu_set(cpu, cpu_core_map[i]); | ||
446 | } | 459 | } |
447 | } | 460 | } |
448 | } else { | 461 | } else { |
449 | cpu_set(cpu, cpu_sibling_map[cpu]); | 462 | cpu_set(cpu, cpu_sibling_map[cpu]); |
450 | } | 463 | } |
451 | 464 | ||
452 | if (current_cpu_data.x86_num_cores > 1) { | 465 | if (current_cpu_data.x86_max_cores == 1) { |
453 | for_each_cpu(i) { | ||
454 | if (phys_proc_id[cpu] == phys_proc_id[i]) { | ||
455 | cpu_set(i, cpu_core_map[cpu]); | ||
456 | cpu_set(cpu, cpu_core_map[i]); | ||
457 | } | ||
458 | } | ||
459 | } else { | ||
460 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; | 466 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; |
467 | c[cpu].booted_cores = 1; | ||
468 | return; | ||
469 | } | ||
470 | |||
471 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | ||
472 | if (phys_proc_id[cpu] == phys_proc_id[i]) { | ||
473 | cpu_set(i, cpu_core_map[cpu]); | ||
474 | cpu_set(cpu, cpu_core_map[i]); | ||
475 | /* | ||
476 | * Does this new cpu bringup a new core? | ||
477 | */ | ||
478 | if (cpus_weight(cpu_sibling_map[cpu]) == 1) { | ||
479 | /* | ||
480 | * for each core in package, increment | ||
481 | * the booted_cores for this new cpu | ||
482 | */ | ||
483 | if (first_cpu(cpu_sibling_map[i]) == i) | ||
484 | c[cpu].booted_cores++; | ||
485 | /* | ||
486 | * increment the core count for all | ||
487 | * the other cpus in this package | ||
488 | */ | ||
489 | if (i != cpu) | ||
490 | c[i].booted_cores++; | ||
491 | } else if (i != cpu && !c[cpu].booted_cores) | ||
492 | c[cpu].booted_cores = c[i].booted_cores; | ||
493 | } | ||
461 | } | 494 | } |
462 | } | 495 | } |
463 | 496 | ||
@@ -879,6 +912,9 @@ static __init void disable_smp(void) | |||
879 | } | 912 | } |
880 | 913 | ||
881 | #ifdef CONFIG_HOTPLUG_CPU | 914 | #ifdef CONFIG_HOTPLUG_CPU |
915 | |||
916 | int additional_cpus __initdata = -1; | ||
917 | |||
882 | /* | 918 | /* |
883 | * cpu_possible_map should be static, it cannot change as cpu's | 919 | * cpu_possible_map should be static, it cannot change as cpu's |
884 | * are onlined, or offlined. The reason is per-cpu data-structures | 920 | * are onlined, or offlined. The reason is per-cpu data-structures |
@@ -887,14 +923,38 @@ static __init void disable_smp(void) | |||
887 | * cpu_present_map on the other hand can change dynamically. | 923 | * cpu_present_map on the other hand can change dynamically. |
888 | * In case when cpu_hotplug is not compiled, then we resort to current | 924 | * In case when cpu_hotplug is not compiled, then we resort to current |
889 | * behaviour, which is cpu_possible == cpu_present. | 925 | * behaviour, which is cpu_possible == cpu_present. |
890 | * If cpu-hotplug is supported, then we need to preallocate for all | ||
891 | * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range. | ||
892 | * - Ashok Raj | 926 | * - Ashok Raj |
927 | * | ||
928 | * Three ways to find out the number of additional hotplug CPUs: | ||
929 | * - If the BIOS specified disabled CPUs in ACPI/mptables use that. | ||
930 | * - otherwise use half of the available CPUs or 2, whatever is more. | ||
931 | * - The user can overwrite it with additional_cpus=NUM | ||
932 | * We do this because additional CPUs waste a lot of memory. | ||
933 | * -AK | ||
893 | */ | 934 | */ |
894 | __init void prefill_possible_map(void) | 935 | __init void prefill_possible_map(void) |
895 | { | 936 | { |
896 | int i; | 937 | int i; |
897 | for (i = 0; i < NR_CPUS; i++) | 938 | int possible; |
939 | |||
940 | if (additional_cpus == -1) { | ||
941 | if (disabled_cpus > 0) { | ||
942 | additional_cpus = disabled_cpus; | ||
943 | } else { | ||
944 | additional_cpus = num_processors / 2; | ||
945 | if (additional_cpus == 0) | ||
946 | additional_cpus = 2; | ||
947 | } | ||
948 | } | ||
949 | possible = num_processors + additional_cpus; | ||
950 | if (possible > NR_CPUS) | ||
951 | possible = NR_CPUS; | ||
952 | |||
953 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", | ||
954 | possible, | ||
955 | max_t(int, possible - num_processors, 0)); | ||
956 | |||
957 | for (i = 0; i < possible; i++) | ||
898 | cpu_set(i, cpu_possible_map); | 958 | cpu_set(i, cpu_possible_map); |
899 | } | 959 | } |
900 | #endif | 960 | #endif |
@@ -965,6 +1025,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) | |||
965 | nmi_watchdog_default(); | 1025 | nmi_watchdog_default(); |
966 | current_cpu_data = boot_cpu_data; | 1026 | current_cpu_data = boot_cpu_data; |
967 | current_thread_info()->cpu = 0; /* needed? */ | 1027 | current_thread_info()->cpu = 0; /* needed? */ |
1028 | set_cpu_sibling_map(0); | ||
968 | 1029 | ||
969 | if (smp_sanity_check(max_cpus) < 0) { | 1030 | if (smp_sanity_check(max_cpus) < 0) { |
970 | printk(KERN_INFO "SMP disabled\n"); | 1031 | printk(KERN_INFO "SMP disabled\n"); |
@@ -1008,8 +1069,6 @@ void __init smp_prepare_boot_cpu(void) | |||
1008 | int me = smp_processor_id(); | 1069 | int me = smp_processor_id(); |
1009 | cpu_set(me, cpu_online_map); | 1070 | cpu_set(me, cpu_online_map); |
1010 | cpu_set(me, cpu_callout_map); | 1071 | cpu_set(me, cpu_callout_map); |
1011 | cpu_set(0, cpu_sibling_map[0]); | ||
1012 | cpu_set(0, cpu_core_map[0]); | ||
1013 | per_cpu(cpu_state, me) = CPU_ONLINE; | 1072 | per_cpu(cpu_state, me) = CPU_ONLINE; |
1014 | } | 1073 | } |
1015 | 1074 | ||
@@ -1062,9 +1121,6 @@ int __cpuinit __cpu_up(unsigned int cpu) | |||
1062 | */ | 1121 | */ |
1063 | void __init smp_cpus_done(unsigned int max_cpus) | 1122 | void __init smp_cpus_done(unsigned int max_cpus) |
1064 | { | 1123 | { |
1065 | #ifndef CONFIG_HOTPLUG_CPU | ||
1066 | zap_low_mappings(); | ||
1067 | #endif | ||
1068 | smp_cleanup_boot(); | 1124 | smp_cleanup_boot(); |
1069 | 1125 | ||
1070 | #ifdef CONFIG_X86_IO_APIC | 1126 | #ifdef CONFIG_X86_IO_APIC |
@@ -1081,15 +1137,24 @@ void __init smp_cpus_done(unsigned int max_cpus) | |||
1081 | static void remove_siblinginfo(int cpu) | 1137 | static void remove_siblinginfo(int cpu) |
1082 | { | 1138 | { |
1083 | int sibling; | 1139 | int sibling; |
1140 | struct cpuinfo_x86 *c = cpu_data; | ||
1084 | 1141 | ||
1142 | for_each_cpu_mask(sibling, cpu_core_map[cpu]) { | ||
1143 | cpu_clear(cpu, cpu_core_map[sibling]); | ||
1144 | /* | ||
1145 | * last thread sibling in this cpu core going down | ||
1146 | */ | ||
1147 | if (cpus_weight(cpu_sibling_map[cpu]) == 1) | ||
1148 | c[sibling].booted_cores--; | ||
1149 | } | ||
1150 | |||
1085 | for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) | 1151 | for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) |
1086 | cpu_clear(cpu, cpu_sibling_map[sibling]); | 1152 | cpu_clear(cpu, cpu_sibling_map[sibling]); |
1087 | for_each_cpu_mask(sibling, cpu_core_map[cpu]) | ||
1088 | cpu_clear(cpu, cpu_core_map[sibling]); | ||
1089 | cpus_clear(cpu_sibling_map[cpu]); | 1153 | cpus_clear(cpu_sibling_map[cpu]); |
1090 | cpus_clear(cpu_core_map[cpu]); | 1154 | cpus_clear(cpu_core_map[cpu]); |
1091 | phys_proc_id[cpu] = BAD_APICID; | 1155 | phys_proc_id[cpu] = BAD_APICID; |
1092 | cpu_core_id[cpu] = BAD_APICID; | 1156 | cpu_core_id[cpu] = BAD_APICID; |
1157 | cpu_clear(cpu, cpu_sibling_setup_map); | ||
1093 | } | 1158 | } |
1094 | 1159 | ||
1095 | void remove_cpu_from_maps(void) | 1160 | void remove_cpu_from_maps(void) |
@@ -1153,6 +1218,12 @@ void __cpu_die(unsigned int cpu) | |||
1153 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); | 1218 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); |
1154 | } | 1219 | } |
1155 | 1220 | ||
1221 | static __init int setup_additional_cpus(char *s) | ||
1222 | { | ||
1223 | return get_option(&s, &additional_cpus); | ||
1224 | } | ||
1225 | __setup("additional_cpus=", setup_additional_cpus); | ||
1226 | |||
1156 | #else /* ... !CONFIG_HOTPLUG_CPU */ | 1227 | #else /* ... !CONFIG_HOTPLUG_CPU */ |
1157 | 1228 | ||
1158 | int __cpu_disable(void) | 1229 | int __cpu_disable(void) |
diff --git a/arch/x86_64/kernel/sys_x86_64.c b/arch/x86_64/kernel/sys_x86_64.c index cc7821c68851..6449ea8fe756 100644 --- a/arch/x86_64/kernel/sys_x86_64.c +++ b/arch/x86_64/kernel/sys_x86_64.c | |||
@@ -154,17 +154,3 @@ asmlinkage long sys_uname(struct new_utsname __user * name) | |||
154 | err |= copy_to_user(&name->machine, "i686", 5); | 154 | err |= copy_to_user(&name->machine, "i686", 5); |
155 | return err ? -EFAULT : 0; | 155 | return err ? -EFAULT : 0; |
156 | } | 156 | } |
157 | |||
158 | asmlinkage long sys_time64(long __user * tloc) | ||
159 | { | ||
160 | struct timeval now; | ||
161 | int i; | ||
162 | |||
163 | do_gettimeofday(&now); | ||
164 | i = now.tv_sec; | ||
165 | if (tloc) { | ||
166 | if (put_user(i,tloc)) | ||
167 | i = -EFAULT; | ||
168 | } | ||
169 | return i; | ||
170 | } | ||
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index b5e09e6b5536..bf337f493189 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c | |||
@@ -428,19 +428,6 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, | |||
428 | { | 428 | { |
429 | conditional_sti(regs); | 429 | conditional_sti(regs); |
430 | 430 | ||
431 | #ifdef CONFIG_CHECKING | ||
432 | { | ||
433 | unsigned long gs; | ||
434 | struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); | ||
435 | rdmsrl(MSR_GS_BASE, gs); | ||
436 | if (gs != (unsigned long)pda) { | ||
437 | wrmsrl(MSR_GS_BASE, pda); | ||
438 | printk("%s: wrong gs %lx expected %p rip %lx\n", str, gs, pda, | ||
439 | regs->rip); | ||
440 | } | ||
441 | } | ||
442 | #endif | ||
443 | |||
444 | if (user_mode(regs)) { | 431 | if (user_mode(regs)) { |
445 | struct task_struct *tsk = current; | 432 | struct task_struct *tsk = current; |
446 | 433 | ||
@@ -513,20 +500,6 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, | |||
513 | { | 500 | { |
514 | conditional_sti(regs); | 501 | conditional_sti(regs); |
515 | 502 | ||
516 | #ifdef CONFIG_CHECKING | ||
517 | { | ||
518 | unsigned long gs; | ||
519 | struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); | ||
520 | rdmsrl(MSR_GS_BASE, gs); | ||
521 | if (gs != (unsigned long)pda) { | ||
522 | wrmsrl(MSR_GS_BASE, pda); | ||
523 | oops_in_progress++; | ||
524 | printk("general protection handler: wrong gs %lx expected %p\n", gs, pda); | ||
525 | oops_in_progress--; | ||
526 | } | ||
527 | } | ||
528 | #endif | ||
529 | |||
530 | if (user_mode(regs)) { | 503 | if (user_mode(regs)) { |
531 | struct task_struct *tsk = current; | 504 | struct task_struct *tsk = current; |
532 | 505 | ||
@@ -665,19 +638,6 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs, | |||
665 | struct task_struct *tsk = current; | 638 | struct task_struct *tsk = current; |
666 | siginfo_t info; | 639 | siginfo_t info; |
667 | 640 | ||
668 | #ifdef CONFIG_CHECKING | ||
669 | { | ||
670 | /* RED-PEN interaction with debugger - could destroy gs */ | ||
671 | unsigned long gs; | ||
672 | struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); | ||
673 | rdmsrl(MSR_GS_BASE, gs); | ||
674 | if (gs != (unsigned long)pda) { | ||
675 | wrmsrl(MSR_GS_BASE, pda); | ||
676 | printk("debug handler: wrong gs %lx expected %p\n", gs, pda); | ||
677 | } | ||
678 | } | ||
679 | #endif | ||
680 | |||
681 | get_debugreg(condition, 6); | 641 | get_debugreg(condition, 6); |
682 | 642 | ||
683 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | 643 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, |
@@ -888,6 +848,10 @@ asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) | |||
888 | { | 848 | { |
889 | } | 849 | } |
890 | 850 | ||
851 | asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) | ||
852 | { | ||
853 | } | ||
854 | |||
891 | /* | 855 | /* |
892 | * 'math_state_restore()' saves the current math information in the | 856 | * 'math_state_restore()' saves the current math information in the |
893 | * old math state array, and gets the new ones from the current task | 857 | * old math state array, and gets the new ones from the current task |
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 6dd642cad2ef..58b19215b4b3 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S | |||
@@ -50,7 +50,7 @@ SECTIONS | |||
50 | *(.bss.page_aligned) | 50 | *(.bss.page_aligned) |
51 | *(.bss) | 51 | *(.bss) |
52 | } | 52 | } |
53 | __bss_end = .; | 53 | __bss_stop = .; |
54 | 54 | ||
55 | . = ALIGN(PAGE_SIZE); | 55 | . = ALIGN(PAGE_SIZE); |
56 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 56 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); |
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index fd99ddd009bc..4a54221e10bc 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c | |||
@@ -203,3 +203,6 @@ EXPORT_SYMBOL(flush_tlb_page); | |||
203 | #endif | 203 | #endif |
204 | 204 | ||
205 | EXPORT_SYMBOL(cpu_khz); | 205 | EXPORT_SYMBOL(cpu_khz); |
206 | |||
207 | EXPORT_SYMBOL(load_gs_index); | ||
208 | |||