aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2005-11-14 22:56:02 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2005-11-14 22:56:02 -0500
commit4060994c3e337b40e0f6fa8ce2cc178e021baf3d (patch)
tree980297c1747ca89354bc879cc5d17903eacb19e2 /arch
parent0174f72f848dfe7dc7488799776303c81b181b16 (diff)
parentd3ee871e63d0a0c70413dc0aa5534b8d6cd6ec37 (diff)
Merge x86-64 update from Andi
Diffstat (limited to 'arch')
-rw-r--r--arch/i386/kernel/acpi/boot.c17
-rw-r--r--arch/i386/kernel/cpu/amd.c12
-rw-r--r--arch/i386/kernel/cpu/common.c40
-rw-r--r--arch/i386/kernel/cpu/intel.c2
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c46
-rw-r--r--arch/i386/kernel/cpu/mtrr/main.c8
-rw-r--r--arch/i386/kernel/cpu/proc.c7
-rw-r--r--arch/i386/kernel/smpboot.c73
-rw-r--r--arch/i386/kernel/srat.c4
-rw-r--r--arch/ia64/Kconfig4
-rw-r--r--arch/x86_64/Kconfig57
-rw-r--r--arch/x86_64/Kconfig.debug9
-rw-r--r--arch/x86_64/defconfig98
-rw-r--r--arch/x86_64/ia32/ia32_aout.c3
-rw-r--r--arch/x86_64/ia32/ia32_binfmt.c4
-rw-r--r--arch/x86_64/kernel/Makefile1
-rw-r--r--arch/x86_64/kernel/aperture.c2
-rw-r--r--arch/x86_64/kernel/apic.c10
-rw-r--r--arch/x86_64/kernel/e820.c3
-rw-r--r--arch/x86_64/kernel/entry.S3
-rw-r--r--arch/x86_64/kernel/head.S37
-rw-r--r--arch/x86_64/kernel/head64.c14
-rw-r--r--arch/x86_64/kernel/i8259.c2
-rw-r--r--arch/x86_64/kernel/io_apic.c80
-rw-r--r--arch/x86_64/kernel/mce.c17
-rw-r--r--arch/x86_64/kernel/mce_amd.c538
-rw-r--r--arch/x86_64/kernel/mpparse.c23
-rw-r--r--arch/x86_64/kernel/pci-gart.c8
-rw-r--r--arch/x86_64/kernel/process.c47
-rw-r--r--arch/x86_64/kernel/reboot.c7
-rw-r--r--arch/x86_64/kernel/setup.c89
-rw-r--r--arch/x86_64/kernel/setup64.c2
-rw-r--r--arch/x86_64/kernel/signal.c17
-rw-r--r--arch/x86_64/kernel/smp.c7
-rw-r--r--arch/x86_64/kernel/smpboot.c111
-rw-r--r--arch/x86_64/kernel/sys_x86_64.c14
-rw-r--r--arch/x86_64/kernel/traps.c44
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S2
-rw-r--r--arch/x86_64/kernel/x8664_ksyms.c3
-rw-r--r--arch/x86_64/lib/clear_page.S38
-rw-r--r--arch/x86_64/lib/copy_page.S87
-rw-r--r--arch/x86_64/lib/memcpy.S93
-rw-r--r--arch/x86_64/lib/memset.S94
-rw-r--r--arch/x86_64/mm/fault.c19
-rw-r--r--arch/x86_64/mm/init.c129
-rw-r--r--arch/x86_64/mm/k8topology.c1
-rw-r--r--arch/x86_64/mm/numa.c122
-rw-r--r--arch/x86_64/mm/srat.c6
48 files changed, 1311 insertions, 743 deletions
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index b66c13c0cc0f..f36677241ecd 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -39,17 +39,14 @@
39 39
40#ifdef CONFIG_X86_64 40#ifdef CONFIG_X86_64
41 41
42static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id)
43{
44}
45extern void __init clustered_apic_check(void); 42extern void __init clustered_apic_check(void);
46static inline int ioapic_setup_disabled(void)
47{
48 return 0;
49}
50 43
44extern int gsi_irq_sharing(int gsi);
51#include <asm/proto.h> 45#include <asm/proto.h>
52 46
47static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; }
48
49
53#else /* X86 */ 50#else /* X86 */
54 51
55#ifdef CONFIG_X86_LOCAL_APIC 52#ifdef CONFIG_X86_LOCAL_APIC
@@ -57,6 +54,8 @@ static inline int ioapic_setup_disabled(void)
57#include <mach_mpparse.h> 54#include <mach_mpparse.h>
58#endif /* CONFIG_X86_LOCAL_APIC */ 55#endif /* CONFIG_X86_LOCAL_APIC */
59 56
57static inline int gsi_irq_sharing(int gsi) { return gsi; }
58
60#endif /* X86 */ 59#endif /* X86 */
61 60
62#define BAD_MADT_ENTRY(entry, end) ( \ 61#define BAD_MADT_ENTRY(entry, end) ( \
@@ -459,7 +458,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
459 *irq = IO_APIC_VECTOR(gsi); 458 *irq = IO_APIC_VECTOR(gsi);
460 else 459 else
461#endif 460#endif
462 *irq = gsi; 461 *irq = gsi_irq_sharing(gsi);
463 return 0; 462 return 0;
464} 463}
465 464
@@ -543,7 +542,7 @@ acpi_scan_rsdp(unsigned long start, unsigned long length)
543 * RSDP signature. 542 * RSDP signature.
544 */ 543 */
545 for (offset = 0; offset < length; offset += 16) { 544 for (offset = 0; offset < length; offset += 16) {
546 if (strncmp((char *)(start + offset), "RSD PTR ", sig_len)) 545 if (strncmp((char *)(phys_to_virt(start) + offset), "RSD PTR ", sig_len))
547 continue; 546 continue;
548 return (start + offset); 547 return (start + offset);
549 } 548 }
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 53a1681cd964..e344ef88cfcd 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -206,9 +206,9 @@ static void __init init_amd(struct cpuinfo_x86 *c)
206 display_cacheinfo(c); 206 display_cacheinfo(c);
207 207
208 if (cpuid_eax(0x80000000) >= 0x80000008) { 208 if (cpuid_eax(0x80000000) >= 0x80000008) {
209 c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; 209 c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
210 if (c->x86_num_cores & (c->x86_num_cores - 1)) 210 if (c->x86_max_cores & (c->x86_max_cores - 1))
211 c->x86_num_cores = 1; 211 c->x86_max_cores = 1;
212 } 212 }
213 213
214#ifdef CONFIG_X86_HT 214#ifdef CONFIG_X86_HT
@@ -217,15 +217,15 @@ static void __init init_amd(struct cpuinfo_x86 *c)
217 * distingush the cores. Assumes number of cores is a power 217 * distingush the cores. Assumes number of cores is a power
218 * of two. 218 * of two.
219 */ 219 */
220 if (c->x86_num_cores > 1) { 220 if (c->x86_max_cores > 1) {
221 int cpu = smp_processor_id(); 221 int cpu = smp_processor_id();
222 unsigned bits = 0; 222 unsigned bits = 0;
223 while ((1 << bits) < c->x86_num_cores) 223 while ((1 << bits) < c->x86_max_cores)
224 bits++; 224 bits++;
225 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1); 225 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1);
226 phys_proc_id[cpu] >>= bits; 226 phys_proc_id[cpu] >>= bits;
227 printk(KERN_INFO "CPU %d(%d) -> Core %d\n", 227 printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
228 cpu, c->x86_num_cores, cpu_core_id[cpu]); 228 cpu, c->x86_max_cores, cpu_core_id[cpu]);
229 } 229 }
230#endif 230#endif
231} 231}
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index c145fb30002e..31e344b26bae 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -231,10 +231,10 @@ static void __init early_cpu_detect(void)
231 cpuid(0x00000001, &tfms, &misc, &junk, &cap0); 231 cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
232 c->x86 = (tfms >> 8) & 15; 232 c->x86 = (tfms >> 8) & 15;
233 c->x86_model = (tfms >> 4) & 15; 233 c->x86_model = (tfms >> 4) & 15;
234 if (c->x86 == 0xf) { 234 if (c->x86 == 0xf)
235 c->x86 += (tfms >> 20) & 0xff; 235 c->x86 += (tfms >> 20) & 0xff;
236 if (c->x86 >= 0x6)
236 c->x86_model += ((tfms >> 16) & 0xF) << 4; 237 c->x86_model += ((tfms >> 16) & 0xF) << 4;
237 }
238 c->x86_mask = tfms & 15; 238 c->x86_mask = tfms & 15;
239 if (cap0 & (1<<19)) 239 if (cap0 & (1<<19))
240 c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; 240 c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
@@ -333,7 +333,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
333 c->x86_model = c->x86_mask = 0; /* So far unknown... */ 333 c->x86_model = c->x86_mask = 0; /* So far unknown... */
334 c->x86_vendor_id[0] = '\0'; /* Unset */ 334 c->x86_vendor_id[0] = '\0'; /* Unset */
335 c->x86_model_id[0] = '\0'; /* Unset */ 335 c->x86_model_id[0] = '\0'; /* Unset */
336 c->x86_num_cores = 1; 336 c->x86_max_cores = 1;
337 memset(&c->x86_capability, 0, sizeof c->x86_capability); 337 memset(&c->x86_capability, 0, sizeof c->x86_capability);
338 338
339 if (!have_cpuid_p()) { 339 if (!have_cpuid_p()) {
@@ -443,52 +443,44 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
443void __devinit detect_ht(struct cpuinfo_x86 *c) 443void __devinit detect_ht(struct cpuinfo_x86 *c)
444{ 444{
445 u32 eax, ebx, ecx, edx; 445 u32 eax, ebx, ecx, edx;
446 int index_msb, tmp; 446 int index_msb, core_bits;
447 int cpu = smp_processor_id(); 447 int cpu = smp_processor_id();
448 448
449 cpuid(1, &eax, &ebx, &ecx, &edx);
450
451 c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
452
449 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) 453 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
450 return; 454 return;
451 455
452 cpuid(1, &eax, &ebx, &ecx, &edx);
453 smp_num_siblings = (ebx & 0xff0000) >> 16; 456 smp_num_siblings = (ebx & 0xff0000) >> 16;
454 457
455 if (smp_num_siblings == 1) { 458 if (smp_num_siblings == 1) {
456 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); 459 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
457 } else if (smp_num_siblings > 1 ) { 460 } else if (smp_num_siblings > 1 ) {
458 index_msb = 31;
459 461
460 if (smp_num_siblings > NR_CPUS) { 462 if (smp_num_siblings > NR_CPUS) {
461 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); 463 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
462 smp_num_siblings = 1; 464 smp_num_siblings = 1;
463 return; 465 return;
464 } 466 }
465 tmp = smp_num_siblings; 467
466 while ((tmp & 0x80000000 ) == 0) { 468 index_msb = get_count_order(smp_num_siblings);
467 tmp <<=1 ;
468 index_msb--;
469 }
470 if (smp_num_siblings & (smp_num_siblings - 1))
471 index_msb++;
472 phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); 469 phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
473 470
474 printk(KERN_INFO "CPU: Physical Processor ID: %d\n", 471 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
475 phys_proc_id[cpu]); 472 phys_proc_id[cpu]);
476 473
477 smp_num_siblings = smp_num_siblings / c->x86_num_cores; 474 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
478 475
479 tmp = smp_num_siblings; 476 index_msb = get_count_order(smp_num_siblings) ;
480 index_msb = 31;
481 while ((tmp & 0x80000000) == 0) {
482 tmp <<=1 ;
483 index_msb--;
484 }
485 477
486 if (smp_num_siblings & (smp_num_siblings - 1)) 478 core_bits = get_count_order(c->x86_max_cores);
487 index_msb++;
488 479
489 cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); 480 cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
481 ((1 << core_bits) - 1);
490 482
491 if (c->x86_num_cores > 1) 483 if (c->x86_max_cores > 1)
492 printk(KERN_INFO "CPU: Processor Core ID: %d\n", 484 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
493 cpu_core_id[cpu]); 485 cpu_core_id[cpu]);
494 } 486 }
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index c28d26fb5f24..5e2da704f0fa 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -158,7 +158,7 @@ static void __devinit init_intel(struct cpuinfo_x86 *c)
158 if ( p ) 158 if ( p )
159 strcpy(c->x86_model_id, p); 159 strcpy(c->x86_model_id, p);
160 160
161 c->x86_num_cores = num_cpu_cores(c); 161 c->x86_max_cores = num_cpu_cores(c);
162 162
163 detect_ht(c); 163 detect_ht(c);
164 164
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index 4dc42a189ae5..fbfd374aa336 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -293,29 +293,45 @@ static struct _cpuid4_info *cpuid4_info[NR_CPUS];
293#ifdef CONFIG_SMP 293#ifdef CONFIG_SMP
294static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) 294static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
295{ 295{
296 struct _cpuid4_info *this_leaf; 296 struct _cpuid4_info *this_leaf, *sibling_leaf;
297 unsigned long num_threads_sharing; 297 unsigned long num_threads_sharing;
298#ifdef CONFIG_X86_HT 298 int index_msb, i;
299 struct cpuinfo_x86 *c = cpu_data + cpu; 299 struct cpuinfo_x86 *c = cpu_data;
300#endif
301 300
302 this_leaf = CPUID4_INFO_IDX(cpu, index); 301 this_leaf = CPUID4_INFO_IDX(cpu, index);
303 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; 302 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
304 303
305 if (num_threads_sharing == 1) 304 if (num_threads_sharing == 1)
306 cpu_set(cpu, this_leaf->shared_cpu_map); 305 cpu_set(cpu, this_leaf->shared_cpu_map);
307#ifdef CONFIG_X86_HT 306 else {
308 else if (num_threads_sharing == smp_num_siblings) 307 index_msb = get_count_order(num_threads_sharing);
309 this_leaf->shared_cpu_map = cpu_sibling_map[cpu]; 308
310 else if (num_threads_sharing == (c->x86_num_cores * smp_num_siblings)) 309 for_each_online_cpu(i) {
311 this_leaf->shared_cpu_map = cpu_core_map[cpu]; 310 if (c[i].apicid >> index_msb ==
312 else 311 c[cpu].apicid >> index_msb) {
313 printk(KERN_DEBUG "Number of CPUs sharing cache didn't match " 312 cpu_set(i, this_leaf->shared_cpu_map);
314 "any known set of CPUs\n"); 313 if (i != cpu && cpuid4_info[i]) {
315#endif 314 sibling_leaf = CPUID4_INFO_IDX(i, index);
315 cpu_set(cpu, sibling_leaf->shared_cpu_map);
316 }
317 }
318 }
319 }
320}
321static void __devinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
322{
323 struct _cpuid4_info *this_leaf, *sibling_leaf;
324 int sibling;
325
326 this_leaf = CPUID4_INFO_IDX(cpu, index);
327 for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) {
328 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
329 cpu_clear(cpu, sibling_leaf->shared_cpu_map);
330 }
316} 331}
317#else 332#else
318static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {} 333static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
334static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {}
319#endif 335#endif
320 336
321static void free_cache_attributes(unsigned int cpu) 337static void free_cache_attributes(unsigned int cpu)
@@ -574,8 +590,10 @@ static void __cpuexit cache_remove_dev(struct sys_device * sys_dev)
574 unsigned int cpu = sys_dev->id; 590 unsigned int cpu = sys_dev->id;
575 unsigned long i; 591 unsigned long i;
576 592
577 for (i = 0; i < num_cache_leaves; i++) 593 for (i = 0; i < num_cache_leaves; i++) {
594 cache_remove_shared_cpu_map(cpu, i);
578 kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); 595 kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
596 }
579 kobject_unregister(cache_kobject[cpu]); 597 kobject_unregister(cache_kobject[cpu]);
580 cpuid4_cache_sysfs_exit(cpu); 598 cpuid4_cache_sysfs_exit(cpu);
581 return; 599 return;
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index dd4ebd6af7e4..1e9db198c440 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -626,6 +626,14 @@ void __init mtrr_bp_init(void)
626 if (cpuid_eax(0x80000000) >= 0x80000008) { 626 if (cpuid_eax(0x80000000) >= 0x80000008) {
627 u32 phys_addr; 627 u32 phys_addr;
628 phys_addr = cpuid_eax(0x80000008) & 0xff; 628 phys_addr = cpuid_eax(0x80000008) & 0xff;
629 /* CPUID workaround for Intel 0F33/0F34 CPU */
630 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
631 boot_cpu_data.x86 == 0xF &&
632 boot_cpu_data.x86_model == 0x3 &&
633 (boot_cpu_data.x86_mask == 0x3 ||
634 boot_cpu_data.x86_mask == 0x4))
635 phys_addr = 36;
636
629 size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1); 637 size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
630 size_and_mask = ~size_or_mask & 0xfff00000; 638 size_and_mask = ~size_or_mask & 0xfff00000;
631 } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && 639 } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
index 41b871ecf4b3..e7921315ae9d 100644
--- a/arch/i386/kernel/cpu/proc.c
+++ b/arch/i386/kernel/cpu/proc.c
@@ -94,12 +94,11 @@ static int show_cpuinfo(struct seq_file *m, void *v)
94 if (c->x86_cache_size >= 0) 94 if (c->x86_cache_size >= 0)
95 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); 95 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
96#ifdef CONFIG_X86_HT 96#ifdef CONFIG_X86_HT
97 if (c->x86_num_cores * smp_num_siblings > 1) { 97 if (c->x86_max_cores * smp_num_siblings > 1) {
98 seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]); 98 seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]);
99 seq_printf(m, "siblings\t: %d\n", 99 seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n]));
100 c->x86_num_cores * smp_num_siblings);
101 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[n]); 100 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[n]);
102 seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores); 101 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
103 } 102 }
104#endif 103#endif
105 104
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index bc5a9d97466b..d16520da4550 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -72,9 +72,11 @@ int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
72/* Core ID of each logical CPU */ 72/* Core ID of each logical CPU */
73int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; 73int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
74 74
75/* representing HT siblings of each logical CPU */
75cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; 76cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
76EXPORT_SYMBOL(cpu_sibling_map); 77EXPORT_SYMBOL(cpu_sibling_map);
77 78
79/* representing HT and core siblings of each logical CPU */
78cpumask_t cpu_core_map[NR_CPUS] __read_mostly; 80cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
79EXPORT_SYMBOL(cpu_core_map); 81EXPORT_SYMBOL(cpu_core_map);
80 82
@@ -442,35 +444,60 @@ static void __devinit smp_callin(void)
442 444
443static int cpucount; 445static int cpucount;
444 446
447/* representing cpus for which sibling maps can be computed */
448static cpumask_t cpu_sibling_setup_map;
449
445static inline void 450static inline void
446set_cpu_sibling_map(int cpu) 451set_cpu_sibling_map(int cpu)
447{ 452{
448 int i; 453 int i;
454 struct cpuinfo_x86 *c = cpu_data;
455
456 cpu_set(cpu, cpu_sibling_setup_map);
449 457
450 if (smp_num_siblings > 1) { 458 if (smp_num_siblings > 1) {
451 for (i = 0; i < NR_CPUS; i++) { 459 for_each_cpu_mask(i, cpu_sibling_setup_map) {
452 if (!cpu_isset(i, cpu_callout_map)) 460 if (phys_proc_id[cpu] == phys_proc_id[i] &&
453 continue; 461 cpu_core_id[cpu] == cpu_core_id[i]) {
454 if (cpu_core_id[cpu] == cpu_core_id[i]) {
455 cpu_set(i, cpu_sibling_map[cpu]); 462 cpu_set(i, cpu_sibling_map[cpu]);
456 cpu_set(cpu, cpu_sibling_map[i]); 463 cpu_set(cpu, cpu_sibling_map[i]);
464 cpu_set(i, cpu_core_map[cpu]);
465 cpu_set(cpu, cpu_core_map[i]);
457 } 466 }
458 } 467 }
459 } else { 468 } else {
460 cpu_set(cpu, cpu_sibling_map[cpu]); 469 cpu_set(cpu, cpu_sibling_map[cpu]);
461 } 470 }
462 471
463 if (current_cpu_data.x86_num_cores > 1) { 472 if (current_cpu_data.x86_max_cores == 1) {
464 for (i = 0; i < NR_CPUS; i++) {
465 if (!cpu_isset(i, cpu_callout_map))
466 continue;
467 if (phys_proc_id[cpu] == phys_proc_id[i]) {
468 cpu_set(i, cpu_core_map[cpu]);
469 cpu_set(cpu, cpu_core_map[i]);
470 }
471 }
472 } else {
473 cpu_core_map[cpu] = cpu_sibling_map[cpu]; 473 cpu_core_map[cpu] = cpu_sibling_map[cpu];
474 c[cpu].booted_cores = 1;
475 return;
476 }
477
478 for_each_cpu_mask(i, cpu_sibling_setup_map) {
479 if (phys_proc_id[cpu] == phys_proc_id[i]) {
480 cpu_set(i, cpu_core_map[cpu]);
481 cpu_set(cpu, cpu_core_map[i]);
482 /*
483 * Does this new cpu bringup a new core?
484 */
485 if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
486 /*
487 * for each core in package, increment
488 * the booted_cores for this new cpu
489 */
490 if (first_cpu(cpu_sibling_map[i]) == i)
491 c[cpu].booted_cores++;
492 /*
493 * increment the core count for all
494 * the other cpus in this package
495 */
496 if (i != cpu)
497 c[i].booted_cores++;
498 } else if (i != cpu && !c[cpu].booted_cores)
499 c[cpu].booted_cores = c[i].booted_cores;
500 }
474 } 501 }
475} 502}
476 503
@@ -1095,11 +1122,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
1095 1122
1096 current_thread_info()->cpu = 0; 1123 current_thread_info()->cpu = 0;
1097 smp_tune_scheduling(); 1124 smp_tune_scheduling();
1098 cpus_clear(cpu_sibling_map[0]);
1099 cpu_set(0, cpu_sibling_map[0]);
1100 1125
1101 cpus_clear(cpu_core_map[0]); 1126 set_cpu_sibling_map(0);
1102 cpu_set(0, cpu_core_map[0]);
1103 1127
1104 /* 1128 /*
1105 * If we couldn't find an SMP configuration at boot time, 1129 * If we couldn't find an SMP configuration at boot time,
@@ -1278,15 +1302,24 @@ static void
1278remove_siblinginfo(int cpu) 1302remove_siblinginfo(int cpu)
1279{ 1303{
1280 int sibling; 1304 int sibling;
1305 struct cpuinfo_x86 *c = cpu_data;
1281 1306
1307 for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
1308 cpu_clear(cpu, cpu_core_map[sibling]);
1309 /*
1310 * last thread sibling in this cpu core going down
1311 */
1312 if (cpus_weight(cpu_sibling_map[cpu]) == 1)
1313 c[sibling].booted_cores--;
1314 }
1315
1282 for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) 1316 for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
1283 cpu_clear(cpu, cpu_sibling_map[sibling]); 1317 cpu_clear(cpu, cpu_sibling_map[sibling]);
1284 for_each_cpu_mask(sibling, cpu_core_map[cpu])
1285 cpu_clear(cpu, cpu_core_map[sibling]);
1286 cpus_clear(cpu_sibling_map[cpu]); 1318 cpus_clear(cpu_sibling_map[cpu]);
1287 cpus_clear(cpu_core_map[cpu]); 1319 cpus_clear(cpu_core_map[cpu]);
1288 phys_proc_id[cpu] = BAD_APICID; 1320 phys_proc_id[cpu] = BAD_APICID;
1289 cpu_core_id[cpu] = BAD_APICID; 1321 cpu_core_id[cpu] = BAD_APICID;
1322 cpu_clear(cpu, cpu_sibling_setup_map);
1290} 1323}
1291 1324
1292int __cpu_disable(void) 1325int __cpu_disable(void)
diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c
index 8de658db8146..52b3ed5d2cb5 100644
--- a/arch/i386/kernel/srat.c
+++ b/arch/i386/kernel/srat.c
@@ -137,8 +137,8 @@ static void __init parse_memory_affinity_structure (char *sratp)
137 "enabled and removable" : "enabled" ) ); 137 "enabled and removable" : "enabled" ) );
138} 138}
139 139
140#if MAX_NR_ZONES != 3 140#if MAX_NR_ZONES != 4
141#error "MAX_NR_ZONES != 3, chunk_to_zone requires review" 141#error "MAX_NR_ZONES != 4, chunk_to_zone requires review"
142#endif 142#endif
143/* Take a chunk of pages from page frame cstart to cend and count the number 143/* Take a chunk of pages from page frame cstart to cend and count the number
144 * of pages in each zone, returned via zones[]. 144 * of pages in each zone, returned via zones[].
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 8796e12c56f3..b76ce1fe2e7f 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -58,6 +58,10 @@ config IA64_UNCACHED_ALLOCATOR
58 bool 58 bool
59 select GENERIC_ALLOCATOR 59 select GENERIC_ALLOCATOR
60 60
61config ZONE_DMA_IS_DMA32
62 bool
63 default y
64
61choice 65choice
62 prompt "System type" 66 prompt "System type"
63 default IA64_GENERIC 67 default IA64_GENERIC
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 4cce2f6f170c..6ece645e4dbe 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -226,22 +226,42 @@ config SCHED_SMT
226 226
227source "kernel/Kconfig.preempt" 227source "kernel/Kconfig.preempt"
228 228
229config K8_NUMA 229config NUMA
230 bool "K8 NUMA support" 230 bool "Non Uniform Memory Access (NUMA) Support"
231 select NUMA
232 depends on SMP 231 depends on SMP
233 help 232 help
234 Enable NUMA (Non Unified Memory Architecture) support for 233 Enable NUMA (Non Uniform Memory Access) support. The kernel
235 AMD Opteron Multiprocessor systems. The kernel will try to allocate 234 will try to allocate memory used by a CPU on the local memory
236 memory used by a CPU on the local memory controller of the CPU 235 controller of the CPU and add some more NUMA awareness to the kernel.
237 and add some more NUMA awareness to the kernel. 236 This code is recommended on all multiprocessor Opteron systems.
238 This code is recommended on all multiprocessor Opteron systems 237 If the system is EM64T, you should say N unless your system is EM64T
239 and normally doesn't hurt on others. 238 NUMA.
239
240config K8_NUMA
241 bool "Old style AMD Opteron NUMA detection"
242 depends on NUMA
243 default y
244 help
245 Enable K8 NUMA node topology detection. You should say Y here if
246 you have a multi processor AMD K8 system. This uses an old
247 method to read the NUMA configurtion directly from the builtin
248 Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA
249 instead, which also takes priority if both are compiled in.
250
251# Dummy CONFIG option to select ACPI_NUMA from drivers/acpi/Kconfig.
252
253config X86_64_ACPI_NUMA
254 bool "ACPI NUMA detection"
255 depends on NUMA
256 select ACPI
257 select ACPI_NUMA
258 default y
259 help
260 Enable ACPI SRAT based node topology detection.
240 261
241config NUMA_EMU 262config NUMA_EMU
242 bool "NUMA emulation support" 263 bool "NUMA emulation"
243 select NUMA 264 depends on NUMA
244 depends on SMP
245 help 265 help
246 Enable NUMA emulation. A flat machine will be split 266 Enable NUMA emulation. A flat machine will be split
247 into virtual nodes when booted with "numa=fake=N", where N is the 267 into virtual nodes when booted with "numa=fake=N", where N is the
@@ -252,9 +272,6 @@ config ARCH_DISCONTIGMEM_ENABLE
252 depends on NUMA 272 depends on NUMA
253 default y 273 default y
254 274
255config NUMA
256 bool
257 default n
258 275
259config ARCH_DISCONTIGMEM_ENABLE 276config ARCH_DISCONTIGMEM_ENABLE
260 def_bool y 277 def_bool y
@@ -374,6 +391,14 @@ config X86_MCE_INTEL
374 Additional support for intel specific MCE features such as 391 Additional support for intel specific MCE features such as
375 the thermal monitor. 392 the thermal monitor.
376 393
394config X86_MCE_AMD
395 bool "AMD MCE features"
396 depends on X86_MCE && X86_LOCAL_APIC
397 default y
398 help
399 Additional support for AMD specific MCE features such as
400 the DRAM Error Threshold.
401
377config PHYSICAL_START 402config PHYSICAL_START
378 hex "Physical address where the kernel is loaded" if EMBEDDED 403 hex "Physical address where the kernel is loaded" if EMBEDDED
379 default "0x100000" 404 default "0x100000"
@@ -502,7 +527,7 @@ config IA32_EMULATION
502 left. 527 left.
503 528
504config IA32_AOUT 529config IA32_AOUT
505 bool "IA32 a.out support" 530 tristate "IA32 a.out support"
506 depends on IA32_EMULATION 531 depends on IA32_EMULATION
507 help 532 help
508 Support old a.out binaries in the 32bit emulation. 533 Support old a.out binaries in the 32bit emulation.
diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug
index d584ecc27ea1..e2c6e64a85ec 100644
--- a/arch/x86_64/Kconfig.debug
+++ b/arch/x86_64/Kconfig.debug
@@ -2,15 +2,6 @@ menu "Kernel hacking"
2 2
3source "lib/Kconfig.debug" 3source "lib/Kconfig.debug"
4 4
5# !SMP for now because the context switch early causes GPF in segment reloading
6# and the GS base checking does the wrong thing then, causing a hang.
7config CHECKING
8 bool "Additional run-time checks"
9 depends on DEBUG_KERNEL && !SMP
10 help
11 Enables some internal consistency checks for kernel debugging.
12 You should normally say N.
13
14config INIT_DEBUG 5config INIT_DEBUG
15 bool "Debug __init statements" 6 bool "Debug __init statements"
16 depends on DEBUG_KERNEL 7 depends on DEBUG_KERNEL
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index f8db7e500fbf..5d56542fb68f 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.13-git11 3# Linux kernel version: 2.6.14-git7
4# Mon Sep 12 16:16:16 2005 4# Sat Nov 5 15:55:50 2005
5# 5#
6CONFIG_X86_64=y 6CONFIG_X86_64=y
7CONFIG_64BIT=y 7CONFIG_64BIT=y
@@ -35,7 +35,7 @@ CONFIG_POSIX_MQUEUE=y
35# CONFIG_BSD_PROCESS_ACCT is not set 35# CONFIG_BSD_PROCESS_ACCT is not set
36CONFIG_SYSCTL=y 36CONFIG_SYSCTL=y
37# CONFIG_AUDIT is not set 37# CONFIG_AUDIT is not set
38# CONFIG_HOTPLUG is not set 38CONFIG_HOTPLUG=y
39CONFIG_KOBJECT_UEVENT=y 39CONFIG_KOBJECT_UEVENT=y
40CONFIG_IKCONFIG=y 40CONFIG_IKCONFIG=y
41CONFIG_IKCONFIG_PROC=y 41CONFIG_IKCONFIG_PROC=y
@@ -93,10 +93,11 @@ CONFIG_PREEMPT_NONE=y
93# CONFIG_PREEMPT_VOLUNTARY is not set 93# CONFIG_PREEMPT_VOLUNTARY is not set
94# CONFIG_PREEMPT is not set 94# CONFIG_PREEMPT is not set
95CONFIG_PREEMPT_BKL=y 95CONFIG_PREEMPT_BKL=y
96CONFIG_NUMA=y
96CONFIG_K8_NUMA=y 97CONFIG_K8_NUMA=y
98CONFIG_X86_64_ACPI_NUMA=y
97# CONFIG_NUMA_EMU is not set 99# CONFIG_NUMA_EMU is not set
98CONFIG_ARCH_DISCONTIGMEM_ENABLE=y 100CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
99CONFIG_NUMA=y
100CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y 101CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y
101CONFIG_ARCH_SPARSEMEM_ENABLE=y 102CONFIG_ARCH_SPARSEMEM_ENABLE=y
102CONFIG_SELECT_MEMORY_MODEL=y 103CONFIG_SELECT_MEMORY_MODEL=y
@@ -107,9 +108,10 @@ CONFIG_DISCONTIGMEM=y
107CONFIG_FLAT_NODE_MEM_MAP=y 108CONFIG_FLAT_NODE_MEM_MAP=y
108CONFIG_NEED_MULTIPLE_NODES=y 109CONFIG_NEED_MULTIPLE_NODES=y
109# CONFIG_SPARSEMEM_STATIC is not set 110# CONFIG_SPARSEMEM_STATIC is not set
111CONFIG_SPLIT_PTLOCK_CPUS=4
110CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y 112CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
111CONFIG_HAVE_DEC_LOCK=y
112CONFIG_NR_CPUS=32 113CONFIG_NR_CPUS=32
114CONFIG_HOTPLUG_CPU=y
113CONFIG_HPET_TIMER=y 115CONFIG_HPET_TIMER=y
114CONFIG_X86_PM_TIMER=y 116CONFIG_X86_PM_TIMER=y
115CONFIG_HPET_EMULATE_RTC=y 117CONFIG_HPET_EMULATE_RTC=y
@@ -117,6 +119,7 @@ CONFIG_GART_IOMMU=y
117CONFIG_SWIOTLB=y 119CONFIG_SWIOTLB=y
118CONFIG_X86_MCE=y 120CONFIG_X86_MCE=y
119CONFIG_X86_MCE_INTEL=y 121CONFIG_X86_MCE_INTEL=y
122CONFIG_X86_MCE_AMD=y
120CONFIG_PHYSICAL_START=0x100000 123CONFIG_PHYSICAL_START=0x100000
121# CONFIG_KEXEC is not set 124# CONFIG_KEXEC is not set
122CONFIG_SECCOMP=y 125CONFIG_SECCOMP=y
@@ -136,11 +139,15 @@ CONFIG_PM=y
136# CONFIG_PM_DEBUG is not set 139# CONFIG_PM_DEBUG is not set
137CONFIG_SOFTWARE_SUSPEND=y 140CONFIG_SOFTWARE_SUSPEND=y
138CONFIG_PM_STD_PARTITION="" 141CONFIG_PM_STD_PARTITION=""
142CONFIG_SUSPEND_SMP=y
139 143
140# 144#
141# ACPI (Advanced Configuration and Power Interface) Support 145# ACPI (Advanced Configuration and Power Interface) Support
142# 146#
143CONFIG_ACPI=y 147CONFIG_ACPI=y
148CONFIG_ACPI_SLEEP=y
149CONFIG_ACPI_SLEEP_PROC_FS=y
150CONFIG_ACPI_SLEEP_PROC_SLEEP=y
144CONFIG_ACPI_AC=y 151CONFIG_ACPI_AC=y
145CONFIG_ACPI_BATTERY=y 152CONFIG_ACPI_BATTERY=y
146CONFIG_ACPI_BUTTON=y 153CONFIG_ACPI_BUTTON=y
@@ -148,6 +155,7 @@ CONFIG_ACPI_BUTTON=y
148CONFIG_ACPI_HOTKEY=m 155CONFIG_ACPI_HOTKEY=m
149CONFIG_ACPI_FAN=y 156CONFIG_ACPI_FAN=y
150CONFIG_ACPI_PROCESSOR=y 157CONFIG_ACPI_PROCESSOR=y
158CONFIG_ACPI_HOTPLUG_CPU=y
151CONFIG_ACPI_THERMAL=y 159CONFIG_ACPI_THERMAL=y
152CONFIG_ACPI_NUMA=y 160CONFIG_ACPI_NUMA=y
153# CONFIG_ACPI_ASUS is not set 161# CONFIG_ACPI_ASUS is not set
@@ -158,7 +166,7 @@ CONFIG_ACPI_BLACKLIST_YEAR=2001
158CONFIG_ACPI_EC=y 166CONFIG_ACPI_EC=y
159CONFIG_ACPI_POWER=y 167CONFIG_ACPI_POWER=y
160CONFIG_ACPI_SYSTEM=y 168CONFIG_ACPI_SYSTEM=y
161# CONFIG_ACPI_CONTAINER is not set 169CONFIG_ACPI_CONTAINER=y
162 170
163# 171#
164# CPU Frequency scaling 172# CPU Frequency scaling
@@ -293,7 +301,6 @@ CONFIG_IPV6=y
293# Network testing 301# Network testing
294# 302#
295# CONFIG_NET_PKTGEN is not set 303# CONFIG_NET_PKTGEN is not set
296# CONFIG_NETFILTER_NETLINK is not set
297# CONFIG_HAMRADIO is not set 304# CONFIG_HAMRADIO is not set
298# CONFIG_IRDA is not set 305# CONFIG_IRDA is not set
299# CONFIG_BT is not set 306# CONFIG_BT is not set
@@ -312,6 +319,11 @@ CONFIG_PREVENT_FIRMWARE_BUILD=y
312# CONFIG_DEBUG_DRIVER is not set 319# CONFIG_DEBUG_DRIVER is not set
313 320
314# 321#
322# Connector - unified userspace <-> kernelspace linker
323#
324# CONFIG_CONNECTOR is not set
325
326#
315# Memory Technology Devices (MTD) 327# Memory Technology Devices (MTD)
316# 328#
317# CONFIG_MTD is not set 329# CONFIG_MTD is not set
@@ -354,6 +366,11 @@ CONFIG_IOSCHED_NOOP=y
354# CONFIG_IOSCHED_AS is not set 366# CONFIG_IOSCHED_AS is not set
355CONFIG_IOSCHED_DEADLINE=y 367CONFIG_IOSCHED_DEADLINE=y
356CONFIG_IOSCHED_CFQ=y 368CONFIG_IOSCHED_CFQ=y
369# CONFIG_DEFAULT_AS is not set
370CONFIG_DEFAULT_DEADLINE=y
371# CONFIG_DEFAULT_CFQ is not set
372# CONFIG_DEFAULT_NOOP is not set
373CONFIG_DEFAULT_IOSCHED="cfq"
357# CONFIG_ATA_OVER_ETH is not set 374# CONFIG_ATA_OVER_ETH is not set
358 375
359# 376#
@@ -450,6 +467,7 @@ CONFIG_BLK_DEV_SD=y
450CONFIG_SCSI_SPI_ATTRS=y 467CONFIG_SCSI_SPI_ATTRS=y
451# CONFIG_SCSI_FC_ATTRS is not set 468# CONFIG_SCSI_FC_ATTRS is not set
452# CONFIG_SCSI_ISCSI_ATTRS is not set 469# CONFIG_SCSI_ISCSI_ATTRS is not set
470# CONFIG_SCSI_SAS_ATTRS is not set
453 471
454# 472#
455# SCSI low-level drivers 473# SCSI low-level drivers
@@ -469,20 +487,24 @@ CONFIG_AIC79XX_DEBUG_MASK=0
469# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set 487# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
470# CONFIG_MEGARAID_NEWGEN is not set 488# CONFIG_MEGARAID_NEWGEN is not set
471# CONFIG_MEGARAID_LEGACY is not set 489# CONFIG_MEGARAID_LEGACY is not set
490# CONFIG_MEGARAID_SAS is not set
472CONFIG_SCSI_SATA=y 491CONFIG_SCSI_SATA=y
473# CONFIG_SCSI_SATA_AHCI is not set 492# CONFIG_SCSI_SATA_AHCI is not set
474# CONFIG_SCSI_SATA_SVW is not set 493# CONFIG_SCSI_SATA_SVW is not set
475CONFIG_SCSI_ATA_PIIX=y 494CONFIG_SCSI_ATA_PIIX=y
476# CONFIG_SCSI_SATA_MV is not set 495# CONFIG_SCSI_SATA_MV is not set
477# CONFIG_SCSI_SATA_NV is not set 496CONFIG_SCSI_SATA_NV=y
478# CONFIG_SCSI_SATA_PROMISE is not set 497# CONFIG_SCSI_PDC_ADMA is not set
479# CONFIG_SCSI_SATA_QSTOR is not set 498# CONFIG_SCSI_SATA_QSTOR is not set
499# CONFIG_SCSI_SATA_PROMISE is not set
480# CONFIG_SCSI_SATA_SX4 is not set 500# CONFIG_SCSI_SATA_SX4 is not set
481# CONFIG_SCSI_SATA_SIL is not set 501# CONFIG_SCSI_SATA_SIL is not set
502# CONFIG_SCSI_SATA_SIL24 is not set
482# CONFIG_SCSI_SATA_SIS is not set 503# CONFIG_SCSI_SATA_SIS is not set
483# CONFIG_SCSI_SATA_ULI is not set 504# CONFIG_SCSI_SATA_ULI is not set
484CONFIG_SCSI_SATA_VIA=y 505CONFIG_SCSI_SATA_VIA=y
485# CONFIG_SCSI_SATA_VITESSE is not set 506# CONFIG_SCSI_SATA_VITESSE is not set
507CONFIG_SCSI_SATA_INTEL_COMBINED=y
486# CONFIG_SCSI_BUSLOGIC is not set 508# CONFIG_SCSI_BUSLOGIC is not set
487# CONFIG_SCSI_DMX3191D is not set 509# CONFIG_SCSI_DMX3191D is not set
488# CONFIG_SCSI_EATA is not set 510# CONFIG_SCSI_EATA is not set
@@ -525,6 +547,7 @@ CONFIG_BLK_DEV_DM=y
525CONFIG_FUSION=y 547CONFIG_FUSION=y
526CONFIG_FUSION_SPI=y 548CONFIG_FUSION_SPI=y
527# CONFIG_FUSION_FC is not set 549# CONFIG_FUSION_FC is not set
550# CONFIG_FUSION_SAS is not set
528CONFIG_FUSION_MAX_SGE=128 551CONFIG_FUSION_MAX_SGE=128
529# CONFIG_FUSION_CTL is not set 552# CONFIG_FUSION_CTL is not set
530 553
@@ -564,6 +587,7 @@ CONFIG_NET_ETHERNET=y
564CONFIG_MII=y 587CONFIG_MII=y
565# CONFIG_HAPPYMEAL is not set 588# CONFIG_HAPPYMEAL is not set
566# CONFIG_SUNGEM is not set 589# CONFIG_SUNGEM is not set
590# CONFIG_CASSINI is not set
567CONFIG_NET_VENDOR_3COM=y 591CONFIG_NET_VENDOR_3COM=y
568CONFIG_VORTEX=y 592CONFIG_VORTEX=y
569# CONFIG_TYPHOON is not set 593# CONFIG_TYPHOON is not set
@@ -740,7 +764,43 @@ CONFIG_LEGACY_PTY_COUNT=256
740# 764#
741# Watchdog Cards 765# Watchdog Cards
742# 766#
743# CONFIG_WATCHDOG is not set 767CONFIG_WATCHDOG=y
768# CONFIG_WATCHDOG_NOWAYOUT is not set
769
770#
771# Watchdog Device Drivers
772#
773CONFIG_SOFT_WATCHDOG=y
774# CONFIG_ACQUIRE_WDT is not set
775# CONFIG_ADVANTECH_WDT is not set
776# CONFIG_ALIM1535_WDT is not set
777# CONFIG_ALIM7101_WDT is not set
778# CONFIG_SC520_WDT is not set
779# CONFIG_EUROTECH_WDT is not set
780# CONFIG_IB700_WDT is not set
781# CONFIG_IBMASR is not set
782# CONFIG_WAFER_WDT is not set
783# CONFIG_I6300ESB_WDT is not set
784# CONFIG_I8XX_TCO is not set
785# CONFIG_SC1200_WDT is not set
786# CONFIG_60XX_WDT is not set
787# CONFIG_SBC8360_WDT is not set
788# CONFIG_CPU5_WDT is not set
789# CONFIG_W83627HF_WDT is not set
790# CONFIG_W83877F_WDT is not set
791# CONFIG_W83977F_WDT is not set
792# CONFIG_MACHZ_WDT is not set
793
794#
795# PCI-based Watchdog Cards
796#
797# CONFIG_PCIPCWATCHDOG is not set
798# CONFIG_WDTPCI is not set
799
800#
801# USB-based Watchdog Cards
802#
803# CONFIG_USBPCWATCHDOG is not set
744CONFIG_HW_RANDOM=y 804CONFIG_HW_RANDOM=y
745# CONFIG_NVRAM is not set 805# CONFIG_NVRAM is not set
746CONFIG_RTC=y 806CONFIG_RTC=y
@@ -767,6 +827,7 @@ CONFIG_MAX_RAW_DEVS=256
767# TPM devices 827# TPM devices
768# 828#
769# CONFIG_TCG_TPM is not set 829# CONFIG_TCG_TPM is not set
830# CONFIG_TELCLOCK is not set
770 831
771# 832#
772# I2C support 833# I2C support
@@ -783,6 +844,7 @@ CONFIG_MAX_RAW_DEVS=256
783# 844#
784CONFIG_HWMON=y 845CONFIG_HWMON=y
785# CONFIG_HWMON_VID is not set 846# CONFIG_HWMON_VID is not set
847# CONFIG_SENSORS_HDAPS is not set
786# CONFIG_HWMON_DEBUG_CHIP is not set 848# CONFIG_HWMON_DEBUG_CHIP is not set
787 849
788# 850#
@@ -886,12 +948,15 @@ CONFIG_USB_UHCI_HCD=y
886# USB Device Class drivers 948# USB Device Class drivers
887# 949#
888# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set 950# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
889# CONFIG_USB_BLUETOOTH_TTY is not set
890# CONFIG_USB_ACM is not set 951# CONFIG_USB_ACM is not set
891CONFIG_USB_PRINTER=y 952CONFIG_USB_PRINTER=y
892 953
893# 954#
894# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information 955# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
956#
957
958#
959# may also be needed; see USB_STORAGE Help for more information
895# 960#
896CONFIG_USB_STORAGE=y 961CONFIG_USB_STORAGE=y
897# CONFIG_USB_STORAGE_DEBUG is not set 962# CONFIG_USB_STORAGE_DEBUG is not set
@@ -924,6 +989,7 @@ CONFIG_USB_HIDINPUT=y
924# CONFIG_USB_XPAD is not set 989# CONFIG_USB_XPAD is not set
925# CONFIG_USB_ATI_REMOTE is not set 990# CONFIG_USB_ATI_REMOTE is not set
926# CONFIG_USB_KEYSPAN_REMOTE is not set 991# CONFIG_USB_KEYSPAN_REMOTE is not set
992# CONFIG_USB_APPLETOUCH is not set
927 993
928# 994#
929# USB Imaging devices 995# USB Imaging devices
@@ -1005,7 +1071,7 @@ CONFIG_USB_MON=y
1005# 1071#
1006# CONFIG_EDD is not set 1072# CONFIG_EDD is not set
1007# CONFIG_DELL_RBU is not set 1073# CONFIG_DELL_RBU is not set
1008CONFIG_DCDBAS=m 1074# CONFIG_DCDBAS is not set
1009 1075
1010# 1076#
1011# File systems 1077# File systems
@@ -1037,7 +1103,7 @@ CONFIG_INOTIFY=y
1037# CONFIG_QUOTA is not set 1103# CONFIG_QUOTA is not set
1038CONFIG_DNOTIFY=y 1104CONFIG_DNOTIFY=y
1039CONFIG_AUTOFS_FS=y 1105CONFIG_AUTOFS_FS=y
1040# CONFIG_AUTOFS4_FS is not set 1106CONFIG_AUTOFS4_FS=y
1041# CONFIG_FUSE_FS is not set 1107# CONFIG_FUSE_FS is not set
1042 1108
1043# 1109#
@@ -1068,7 +1134,7 @@ CONFIG_TMPFS=y
1068CONFIG_HUGETLBFS=y 1134CONFIG_HUGETLBFS=y
1069CONFIG_HUGETLB_PAGE=y 1135CONFIG_HUGETLB_PAGE=y
1070CONFIG_RAMFS=y 1136CONFIG_RAMFS=y
1071# CONFIG_RELAYFS_FS is not set 1137CONFIG_RELAYFS_FS=y
1072 1138
1073# 1139#
1074# Miscellaneous filesystems 1140# Miscellaneous filesystems
@@ -1186,7 +1252,9 @@ CONFIG_DETECT_SOFTLOCKUP=y
1186# CONFIG_DEBUG_KOBJECT is not set 1252# CONFIG_DEBUG_KOBJECT is not set
1187# CONFIG_DEBUG_INFO is not set 1253# CONFIG_DEBUG_INFO is not set
1188CONFIG_DEBUG_FS=y 1254CONFIG_DEBUG_FS=y
1255# CONFIG_DEBUG_VM is not set
1189# CONFIG_FRAME_POINTER is not set 1256# CONFIG_FRAME_POINTER is not set
1257# CONFIG_RCU_TORTURE_TEST is not set
1190CONFIG_INIT_DEBUG=y 1258CONFIG_INIT_DEBUG=y
1191# CONFIG_IOMMU_DEBUG is not set 1259# CONFIG_IOMMU_DEBUG is not set
1192CONFIG_KPROBES=y 1260CONFIG_KPROBES=y
diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c
index 93c60f4aa47a..3bf58af98936 100644
--- a/arch/x86_64/ia32/ia32_aout.c
+++ b/arch/x86_64/ia32/ia32_aout.c
@@ -36,9 +36,6 @@
36#undef WARN_OLD 36#undef WARN_OLD
37#undef CORE_DUMP /* probably broken */ 37#undef CORE_DUMP /* probably broken */
38 38
39extern int ia32_setup_arg_pages(struct linux_binprm *bprm,
40 unsigned long stack_top, int exec_stack);
41
42static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); 39static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
43static int load_aout_library(struct file*); 40static int load_aout_library(struct file*);
44 41
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index d9161e395978..830feb272eca 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -335,7 +335,8 @@ static void elf32_init(struct pt_regs *regs)
335 me->thread.es = __USER_DS; 335 me->thread.es = __USER_DS;
336} 336}
337 337
338int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack) 338int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top,
339 int executable_stack)
339{ 340{
340 unsigned long stack_base; 341 unsigned long stack_base;
341 struct vm_area_struct *mpnt; 342 struct vm_area_struct *mpnt;
@@ -389,6 +390,7 @@ int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int exec
389 390
390 return 0; 391 return 0;
391} 392}
393EXPORT_SYMBOL(ia32_setup_arg_pages);
392 394
393static unsigned long 395static unsigned long
394elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type) 396elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 14328cab5d3a..fe4cbd1c4b2f 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -11,6 +11,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
11 11
12obj-$(CONFIG_X86_MCE) += mce.o 12obj-$(CONFIG_X86_MCE) += mce.o
13obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o 13obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
14obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
14obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ 15obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/
15obj-$(CONFIG_ACPI) += acpi/ 16obj-$(CONFIG_ACPI) += acpi/
16obj-$(CONFIG_X86_MSR) += msr.o 17obj-$(CONFIG_X86_MSR) += msr.o
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index 962ad4823b6a..c7f4fdd20f05 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -196,7 +196,7 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp)
196void __init iommu_hole_init(void) 196void __init iommu_hole_init(void)
197{ 197{
198 int fix, num; 198 int fix, num;
199 u32 aper_size, aper_alloc = 0, aper_order, last_aper_order = 0; 199 u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0;
200 u64 aper_base, last_aper_base = 0; 200 u64 aper_base, last_aper_base = 0;
201 int valid_agp = 0; 201 int valid_agp = 0;
202 202
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index b6e7715d877f..18691ce4c759 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -833,6 +833,16 @@ int setup_profiling_timer(unsigned int multiplier)
833 return 0; 833 return 0;
834} 834}
835 835
836#ifdef CONFIG_X86_MCE_AMD
837void setup_threshold_lvt(unsigned long lvt_off)
838{
839 unsigned int v = 0;
840 unsigned long reg = (lvt_off << 4) + 0x500;
841 v |= THRESHOLD_APIC_VECTOR;
842 apic_write(reg, v);
843}
844#endif /* CONFIG_X86_MCE_AMD */
845
836#undef APIC_DIVISOR 846#undef APIC_DIVISOR
837 847
838/* 848/*
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index ab3f87aaff70..17579a1a174b 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -23,8 +23,7 @@
23#include <asm/e820.h> 23#include <asm/e820.h>
24#include <asm/proto.h> 24#include <asm/proto.h>
25#include <asm/bootsetup.h> 25#include <asm/bootsetup.h>
26 26#include <asm/sections.h>
27extern char _end[];
28 27
29/* 28/*
30 * PFN of last memory page. 29 * PFN of last memory page.
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 7937971d1853..9ff42041bb6b 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -612,6 +612,9 @@ retint_kernel:
612ENTRY(thermal_interrupt) 612ENTRY(thermal_interrupt)
613 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt 613 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
614 614
615ENTRY(threshold_interrupt)
616 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
617
615#ifdef CONFIG_SMP 618#ifdef CONFIG_SMP
616ENTRY(reschedule_interrupt) 619ENTRY(reschedule_interrupt)
617 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt 620 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index b92e5f45ed46..15290968e49d 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -12,6 +12,7 @@
12 12
13#include <linux/linkage.h> 13#include <linux/linkage.h>
14#include <linux/threads.h> 14#include <linux/threads.h>
15#include <linux/init.h>
15#include <asm/desc.h> 16#include <asm/desc.h>
16#include <asm/segment.h> 17#include <asm/segment.h>
17#include <asm/page.h> 18#include <asm/page.h>
@@ -70,7 +71,7 @@ startup_32:
70 movl %eax, %cr4 71 movl %eax, %cr4
71 72
72 /* Setup early boot stage 4 level pagetables */ 73 /* Setup early boot stage 4 level pagetables */
73 movl $(init_level4_pgt - __START_KERNEL_map), %eax 74 movl $(boot_level4_pgt - __START_KERNEL_map), %eax
74 movl %eax, %cr3 75 movl %eax, %cr3
75 76
76 /* Setup EFER (Extended Feature Enable Register) */ 77 /* Setup EFER (Extended Feature Enable Register) */
@@ -113,7 +114,7 @@ startup_64:
113 movq %rax, %cr4 114 movq %rax, %cr4
114 115
115 /* Setup early boot stage 4 level pagetables. */ 116 /* Setup early boot stage 4 level pagetables. */
116 movq $(init_level4_pgt - __START_KERNEL_map), %rax 117 movq $(boot_level4_pgt - __START_KERNEL_map), %rax
117 movq %rax, %cr3 118 movq %rax, %cr3
118 119
119 /* Check if nx is implemented */ 120 /* Check if nx is implemented */
@@ -240,20 +241,10 @@ ljumpvector:
240ENTRY(stext) 241ENTRY(stext)
241ENTRY(_stext) 242ENTRY(_stext)
242 243
243 /*
244 * This default setting generates an ident mapping at address 0x100000
245 * and a mapping for the kernel that precisely maps virtual address
246 * 0xffffffff80000000 to physical address 0x000000. (always using
247 * 2Mbyte large pages provided by PAE mode)
248 */
249.org 0x1000 244.org 0x1000
250ENTRY(init_level4_pgt) 245ENTRY(init_level4_pgt)
251 .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */ 246 /* This gets initialized in x86_64_start_kernel */
252 .fill 255,8,0 247 .fill 512,8,0
253 .quad 0x000000000000a007 + __PHYSICAL_START
254 .fill 254,8,0
255 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
256 .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */
257 248
258.org 0x2000 249.org 0x2000
259ENTRY(level3_ident_pgt) 250ENTRY(level3_ident_pgt)
@@ -350,6 +341,24 @@ ENTRY(wakeup_level4_pgt)
350 .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ 341 .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */
351#endif 342#endif
352 343
344#ifndef CONFIG_HOTPLUG_CPU
345 __INITDATA
346#endif
347 /*
348 * This default setting generates an ident mapping at address 0x100000
349 * and a mapping for the kernel that precisely maps virtual address
350 * 0xffffffff80000000 to physical address 0x000000. (always using
351 * 2Mbyte large pages provided by PAE mode)
352 */
353 .align PAGE_SIZE
354ENTRY(boot_level4_pgt)
355 .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */
356 .fill 255,8,0
357 .quad 0x000000000000a007 + __PHYSICAL_START
358 .fill 254,8,0
359 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
360 .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */
361
353 .data 362 .data
354 363
355 .align 16 364 .align 16
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index cf6ab147a2a5..b675c5add01e 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -19,14 +19,15 @@
19#include <asm/bootsetup.h> 19#include <asm/bootsetup.h>
20#include <asm/setup.h> 20#include <asm/setup.h>
21#include <asm/desc.h> 21#include <asm/desc.h>
22#include <asm/pgtable.h>
23#include <asm/sections.h>
22 24
23/* Don't add a printk in there. printk relies on the PDA which is not initialized 25/* Don't add a printk in there. printk relies on the PDA which is not initialized
24 yet. */ 26 yet. */
25static void __init clear_bss(void) 27static void __init clear_bss(void)
26{ 28{
27 extern char __bss_start[], __bss_end[];
28 memset(__bss_start, 0, 29 memset(__bss_start, 0,
29 (unsigned long) __bss_end - (unsigned long) __bss_start); 30 (unsigned long) __bss_stop - (unsigned long) __bss_start);
30} 31}
31 32
32#define NEW_CL_POINTER 0x228 /* Relative to real mode data */ 33#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
@@ -75,8 +76,6 @@ static void __init setup_boot_cpu_data(void)
75 boot_cpu_data.x86_mask = eax & 0xf; 76 boot_cpu_data.x86_mask = eax & 0xf;
76} 77}
77 78
78extern char _end[];
79
80void __init x86_64_start_kernel(char * real_mode_data) 79void __init x86_64_start_kernel(char * real_mode_data)
81{ 80{
82 char *s; 81 char *s;
@@ -86,6 +85,13 @@ void __init x86_64_start_kernel(char * real_mode_data)
86 set_intr_gate(i, early_idt_handler); 85 set_intr_gate(i, early_idt_handler);
87 asm volatile("lidt %0" :: "m" (idt_descr)); 86 asm volatile("lidt %0" :: "m" (idt_descr));
88 clear_bss(); 87 clear_bss();
88
89 /*
90 * switch to init_level4_pgt from boot_level4_pgt
91 */
92 memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t));
93 asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
94
89 pda_init(0); 95 pda_init(0);
90 copy_bootdata(real_mode_data); 96 copy_bootdata(real_mode_data);
91#ifdef CONFIG_SMP 97#ifdef CONFIG_SMP
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index a9368d4c4aba..6e5101ad3d1a 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -492,6 +492,7 @@ void invalidate_interrupt5(void);
492void invalidate_interrupt6(void); 492void invalidate_interrupt6(void);
493void invalidate_interrupt7(void); 493void invalidate_interrupt7(void);
494void thermal_interrupt(void); 494void thermal_interrupt(void);
495void threshold_interrupt(void);
495void i8254_timer_resume(void); 496void i8254_timer_resume(void);
496 497
497static void setup_timer_hardware(void) 498static void setup_timer_hardware(void)
@@ -580,6 +581,7 @@ void __init init_IRQ(void)
580 set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 581 set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
581#endif 582#endif
582 set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); 583 set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
584 set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
583 585
584#ifdef CONFIG_X86_LOCAL_APIC 586#ifdef CONFIG_X86_LOCAL_APIC
585 /* self generated IPI for local APIC timer */ 587 /* self generated IPI for local APIC timer */
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index c8eee20cd519..97154ab058b4 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -57,7 +57,7 @@ int nr_ioapic_registers[MAX_IO_APICS];
57 * Rough estimation of how many shared IRQs there are, can 57 * Rough estimation of how many shared IRQs there are, can
58 * be changed anytime. 58 * be changed anytime.
59 */ 59 */
60#define MAX_PLUS_SHARED_IRQS NR_IRQS 60#define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS
61#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) 61#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
62 62
63/* 63/*
@@ -85,6 +85,7 @@ int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
85 int pin; \ 85 int pin; \
86 struct irq_pin_list *entry = irq_2_pin + irq; \ 86 struct irq_pin_list *entry = irq_2_pin + irq; \
87 \ 87 \
88 BUG_ON(irq >= NR_IRQS); \
88 for (;;) { \ 89 for (;;) { \
89 unsigned int reg; \ 90 unsigned int reg; \
90 pin = entry->pin; \ 91 pin = entry->pin; \
@@ -127,6 +128,8 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
127} 128}
128#endif 129#endif
129 130
131static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };
132
130/* 133/*
131 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are 134 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
132 * shared ISA-space IRQs, so we have to support them. We are super 135 * shared ISA-space IRQs, so we have to support them. We are super
@@ -137,6 +140,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
137 static int first_free_entry = NR_IRQS; 140 static int first_free_entry = NR_IRQS;
138 struct irq_pin_list *entry = irq_2_pin + irq; 141 struct irq_pin_list *entry = irq_2_pin + irq;
139 142
143 BUG_ON(irq >= NR_IRQS);
140 while (entry->next) 144 while (entry->next)
141 entry = irq_2_pin + entry->next; 145 entry = irq_2_pin + entry->next;
142 146
@@ -144,7 +148,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
144 entry->next = first_free_entry; 148 entry->next = first_free_entry;
145 entry = irq_2_pin + entry->next; 149 entry = irq_2_pin + entry->next;
146 if (++first_free_entry >= PIN_MAP_SIZE) 150 if (++first_free_entry >= PIN_MAP_SIZE)
147 panic("io_apic.c: whoops"); 151 panic("io_apic.c: ran out of irq_2_pin entries!");
148 } 152 }
149 entry->apic = apic; 153 entry->apic = apic;
150 entry->pin = pin; 154 entry->pin = pin;
@@ -420,6 +424,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
420 best_guess = irq; 424 best_guess = irq;
421 } 425 }
422 } 426 }
427 BUG_ON(best_guess >= NR_IRQS);
423 return best_guess; 428 return best_guess;
424} 429}
425 430
@@ -610,6 +615,64 @@ static inline int irq_trigger(int idx)
610 return MPBIOS_trigger(idx); 615 return MPBIOS_trigger(idx);
611} 616}
612 617
618static int next_irq = 16;
619
620/*
621 * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ
622 * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number
623 * from ACPI, which can reach 800 in large boxen.
624 *
625 * Compact the sparse GSI space into a sequential IRQ series and reuse
626 * vectors if possible.
627 */
628int gsi_irq_sharing(int gsi)
629{
630 int i, tries, vector;
631
632 BUG_ON(gsi >= NR_IRQ_VECTORS);
633
634 if (platform_legacy_irq(gsi))
635 return gsi;
636
637 if (gsi_2_irq[gsi] != 0xFF)
638 return (int)gsi_2_irq[gsi];
639
640 tries = NR_IRQS;
641 try_again:
642 vector = assign_irq_vector(gsi);
643
644 /*
645 * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
646 * use of vector and if found, return that IRQ. However, we never want
647 * to share legacy IRQs, which usually have a different trigger mode
648 * than PCI.
649 */
650 for (i = 0; i < NR_IRQS; i++)
651 if (IO_APIC_VECTOR(i) == vector)
652 break;
653 if (platform_legacy_irq(i)) {
654 if (--tries >= 0) {
655 IO_APIC_VECTOR(i) = 0;
656 goto try_again;
657 }
658 panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi);
659 }
660 if (i < NR_IRQS) {
661 gsi_2_irq[gsi] = i;
662 printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",
663 gsi, vector, i);
664 return i;
665 }
666
667 i = next_irq++;
668 BUG_ON(i >= NR_IRQS);
669 gsi_2_irq[gsi] = i;
670 IO_APIC_VECTOR(i) = vector;
671 printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",
672 gsi, vector, i);
673 return i;
674}
675
613static int pin_2_irq(int idx, int apic, int pin) 676static int pin_2_irq(int idx, int apic, int pin)
614{ 677{
615 int irq, i; 678 int irq, i;
@@ -639,6 +702,7 @@ static int pin_2_irq(int idx, int apic, int pin)
639 while (i < apic) 702 while (i < apic)
640 irq += nr_ioapic_registers[i++]; 703 irq += nr_ioapic_registers[i++];
641 irq += pin; 704 irq += pin;
705 irq = gsi_irq_sharing(irq);
642 break; 706 break;
643 } 707 }
644 default: 708 default:
@@ -648,6 +712,7 @@ static int pin_2_irq(int idx, int apic, int pin)
648 break; 712 break;
649 } 713 }
650 } 714 }
715 BUG_ON(irq >= NR_IRQS);
651 716
652 /* 717 /*
653 * PCI IRQ command line redirection. Yes, limits are hardcoded. 718 * PCI IRQ command line redirection. Yes, limits are hardcoded.
@@ -663,6 +728,7 @@ static int pin_2_irq(int idx, int apic, int pin)
663 } 728 }
664 } 729 }
665 } 730 }
731 BUG_ON(irq >= NR_IRQS);
666 return irq; 732 return irq;
667} 733}
668 734
@@ -690,8 +756,8 @@ int assign_irq_vector(int irq)
690{ 756{
691 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; 757 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
692 758
693 BUG_ON(irq >= NR_IRQ_VECTORS); 759 BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
694 if (IO_APIC_VECTOR(irq) > 0) 760 if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
695 return IO_APIC_VECTOR(irq); 761 return IO_APIC_VECTOR(irq);
696next: 762next:
697 current_vector += 8; 763 current_vector += 8;
@@ -699,9 +765,8 @@ next:
699 goto next; 765 goto next;
700 766
701 if (current_vector >= FIRST_SYSTEM_VECTOR) { 767 if (current_vector >= FIRST_SYSTEM_VECTOR) {
702 offset++; 768 /* If we run out of vectors on large boxen, must share them. */
703 if (!(offset%8)) 769 offset = (offset + 1) % 8;
704 return -ENOSPC;
705 current_vector = FIRST_DEVICE_VECTOR + offset; 770 current_vector = FIRST_DEVICE_VECTOR + offset;
706 } 771 }
707 772
@@ -1917,6 +1982,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
1917 entry.polarity = active_high_low; 1982 entry.polarity = active_high_low;
1918 entry.mask = 1; /* Disabled (masked) */ 1983 entry.mask = 1; /* Disabled (masked) */
1919 1984
1985 irq = gsi_irq_sharing(irq);
1920 /* 1986 /*
1921 * IRQs < 16 are already in the irq_2_pin[] map 1987 * IRQs < 16 are already in the irq_2_pin[] map
1922 */ 1988 */
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 69541db5ff2c..183dc6105429 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -37,7 +37,7 @@ static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
37static unsigned long console_logged; 37static unsigned long console_logged;
38static int notify_user; 38static int notify_user;
39static int rip_msr; 39static int rip_msr;
40static int mce_bootlog; 40static int mce_bootlog = 1;
41 41
42/* 42/*
43 * Lockless MCE logging infrastructure. 43 * Lockless MCE logging infrastructure.
@@ -347,7 +347,11 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
347 /* disable GART TBL walk error reporting, which trips off 347 /* disable GART TBL walk error reporting, which trips off
348 incorrectly with the IOMMU & 3ware & Cerberus. */ 348 incorrectly with the IOMMU & 3ware & Cerberus. */
349 clear_bit(10, &bank[4]); 349 clear_bit(10, &bank[4]);
350 /* Lots of broken BIOS around that don't clear them
351 by default and leave crap in there. Don't log. */
352 mce_bootlog = 0;
350 } 353 }
354
351} 355}
352 356
353static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) 357static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
@@ -356,6 +360,9 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
356 case X86_VENDOR_INTEL: 360 case X86_VENDOR_INTEL:
357 mce_intel_feature_init(c); 361 mce_intel_feature_init(c);
358 break; 362 break;
363 case X86_VENDOR_AMD:
364 mce_amd_feature_init(c);
365 break;
359 default: 366 default:
360 break; 367 break;
361 } 368 }
@@ -495,16 +502,16 @@ static int __init mcheck_disable(char *str)
495/* mce=off disables machine check. Note you can reenable it later 502/* mce=off disables machine check. Note you can reenable it later
496 using sysfs. 503 using sysfs.
497 mce=TOLERANCELEVEL (number, see above) 504 mce=TOLERANCELEVEL (number, see above)
498 mce=bootlog Log MCEs from before booting. Disabled by default to work 505 mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
499 around buggy BIOS that leave bogus MCEs. */ 506 mce=nobootlog Don't log MCEs from before booting. */
500static int __init mcheck_enable(char *str) 507static int __init mcheck_enable(char *str)
501{ 508{
502 if (*str == '=') 509 if (*str == '=')
503 str++; 510 str++;
504 if (!strcmp(str, "off")) 511 if (!strcmp(str, "off"))
505 mce_dont_init = 1; 512 mce_dont_init = 1;
506 else if (!strcmp(str, "bootlog")) 513 else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog"))
507 mce_bootlog = 1; 514 mce_bootlog = str[0] == 'b';
508 else if (isdigit(str[0])) 515 else if (isdigit(str[0]))
509 get_option(&str, &tolerant); 516 get_option(&str, &tolerant);
510 else 517 else
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c
new file mode 100644
index 000000000000..1f76175ace02
--- /dev/null
+++ b/arch/x86_64/kernel/mce_amd.c
@@ -0,0 +1,538 @@
1/*
2 * (c) 2005 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html
6 *
7 * Written by Jacob Shin - AMD, Inc.
8 *
9 * Support : jacob.shin@amd.com
10 *
11 * MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F.
12 * MC4_MISC0 exists per physical processor.
13 *
14 */
15
16#include <linux/cpu.h>
17#include <linux/errno.h>
18#include <linux/init.h>
19#include <linux/interrupt.h>
20#include <linux/kobject.h>
21#include <linux/notifier.h>
22#include <linux/sched.h>
23#include <linux/smp.h>
24#include <linux/sysdev.h>
25#include <linux/sysfs.h>
26#include <asm/apic.h>
27#include <asm/mce.h>
28#include <asm/msr.h>
29#include <asm/percpu.h>
30
31#define PFX "mce_threshold: "
32#define VERSION "version 1.00.9"
33#define NR_BANKS 5
34#define THRESHOLD_MAX 0xFFF
35#define INT_TYPE_APIC 0x00020000
36#define MASK_VALID_HI 0x80000000
37#define MASK_LVTOFF_HI 0x00F00000
38#define MASK_COUNT_EN_HI 0x00080000
39#define MASK_INT_TYPE_HI 0x00060000
40#define MASK_OVERFLOW_HI 0x00010000
41#define MASK_ERR_COUNT_HI 0x00000FFF
42#define MASK_OVERFLOW 0x0001000000000000L
43
44struct threshold_bank {
45 unsigned int cpu;
46 u8 bank;
47 u8 interrupt_enable;
48 u16 threshold_limit;
49 struct kobject kobj;
50};
51
52static struct threshold_bank threshold_defaults = {
53 .interrupt_enable = 0,
54 .threshold_limit = THRESHOLD_MAX,
55};
56
57#ifdef CONFIG_SMP
58static unsigned char shared_bank[NR_BANKS] = {
59 0, 0, 0, 0, 1
60};
61#endif
62
63static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
64
65/*
66 * CPU Initialization
67 */
68
69/* must be called with correct cpu affinity */
70static void threshold_restart_bank(struct threshold_bank *b,
71 int reset, u16 old_limit)
72{
73 u32 mci_misc_hi, mci_misc_lo;
74
75 rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);
76
77 if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
78 reset = 1; /* limit cannot be lower than err count */
79
80 if (reset) { /* reset err count and overflow bit */
81 mci_misc_hi =
82 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
83 (THRESHOLD_MAX - b->threshold_limit);
84 } else if (old_limit) { /* change limit w/o reset */
85 int new_count = (mci_misc_hi & THRESHOLD_MAX) +
86 (old_limit - b->threshold_limit);
87 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
88 (new_count & THRESHOLD_MAX);
89 }
90
91 b->interrupt_enable ?
92 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
93 (mci_misc_hi &= ~MASK_INT_TYPE_HI);
94
95 mci_misc_hi |= MASK_COUNT_EN_HI;
96 wrmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);
97}
98
99void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
100{
101 int bank;
102 u32 mci_misc_lo, mci_misc_hi;
103 unsigned int cpu = smp_processor_id();
104
105 for (bank = 0; bank < NR_BANKS; ++bank) {
106 rdmsr(MSR_IA32_MC0_MISC + bank * 4, mci_misc_lo, mci_misc_hi);
107
108 /* !valid, !counter present, bios locked */
109 if (!(mci_misc_hi & MASK_VALID_HI) ||
110 !(mci_misc_hi & MASK_VALID_HI >> 1) ||
111 (mci_misc_hi & MASK_VALID_HI >> 2))
112 continue;
113
114 per_cpu(bank_map, cpu) |= (1 << bank);
115
116#ifdef CONFIG_SMP
117 if (shared_bank[bank] && cpu_core_id[cpu])
118 continue;
119#endif
120
121 setup_threshold_lvt((mci_misc_hi & MASK_LVTOFF_HI) >> 20);
122 threshold_defaults.cpu = cpu;
123 threshold_defaults.bank = bank;
124 threshold_restart_bank(&threshold_defaults, 0, 0);
125 }
126}
127
128/*
129 * APIC Interrupt Handler
130 */
131
132/*
133 * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
134 * the interrupt goes off when error_count reaches threshold_limit.
135 * the handler will simply log mcelog w/ software defined bank number.
136 */
137asmlinkage void mce_threshold_interrupt(void)
138{
139 int bank;
140 struct mce m;
141
142 ack_APIC_irq();
143 irq_enter();
144
145 memset(&m, 0, sizeof(m));
146 rdtscll(m.tsc);
147 m.cpu = smp_processor_id();
148
149 /* assume first bank caused it */
150 for (bank = 0; bank < NR_BANKS; ++bank) {
151 m.bank = MCE_THRESHOLD_BASE + bank;
152 rdmsrl(MSR_IA32_MC0_MISC + bank * 4, m.misc);
153
154 if (m.misc & MASK_OVERFLOW) {
155 mce_log(&m);
156 goto out;
157 }
158 }
159 out:
160 irq_exit();
161}
162
163/*
164 * Sysfs Interface
165 */
166
167static struct sysdev_class threshold_sysclass = {
168 set_kset_name("threshold"),
169};
170
171static DEFINE_PER_CPU(struct sys_device, device_threshold);
172
173struct threshold_attr {
174 struct attribute attr;
175 ssize_t(*show) (struct threshold_bank *, char *);
176 ssize_t(*store) (struct threshold_bank *, const char *, size_t count);
177};
178
179static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
180
181static cpumask_t affinity_set(unsigned int cpu)
182{
183 cpumask_t oldmask = current->cpus_allowed;
184 cpumask_t newmask = CPU_MASK_NONE;
185 cpu_set(cpu, newmask);
186 set_cpus_allowed(current, newmask);
187 return oldmask;
188}
189
190static void affinity_restore(cpumask_t oldmask)
191{
192 set_cpus_allowed(current, oldmask);
193}
194
195#define SHOW_FIELDS(name) \
196 static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \
197 { \
198 return sprintf(buf, "%lx\n", (unsigned long) b->name); \
199 }
200SHOW_FIELDS(interrupt_enable)
201SHOW_FIELDS(threshold_limit)
202
203static ssize_t store_interrupt_enable(struct threshold_bank *b,
204 const char *buf, size_t count)
205{
206 char *end;
207 cpumask_t oldmask;
208 unsigned long new = simple_strtoul(buf, &end, 0);
209 if (end == buf)
210 return -EINVAL;
211 b->interrupt_enable = !!new;
212
213 oldmask = affinity_set(b->cpu);
214 threshold_restart_bank(b, 0, 0);
215 affinity_restore(oldmask);
216
217 return end - buf;
218}
219
220static ssize_t store_threshold_limit(struct threshold_bank *b,
221 const char *buf, size_t count)
222{
223 char *end;
224 cpumask_t oldmask;
225 u16 old;
226 unsigned long new = simple_strtoul(buf, &end, 0);
227 if (end == buf)
228 return -EINVAL;
229 if (new > THRESHOLD_MAX)
230 new = THRESHOLD_MAX;
231 if (new < 1)
232 new = 1;
233 old = b->threshold_limit;
234 b->threshold_limit = new;
235
236 oldmask = affinity_set(b->cpu);
237 threshold_restart_bank(b, 0, old);
238 affinity_restore(oldmask);
239
240 return end - buf;
241}
242
243static ssize_t show_error_count(struct threshold_bank *b, char *buf)
244{
245 u32 high, low;
246 cpumask_t oldmask;
247 oldmask = affinity_set(b->cpu);
248 rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, low, high); /* ignore low 32 */
249 affinity_restore(oldmask);
250 return sprintf(buf, "%x\n",
251 (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
252}
253
254static ssize_t store_error_count(struct threshold_bank *b,
255 const char *buf, size_t count)
256{
257 cpumask_t oldmask;
258 oldmask = affinity_set(b->cpu);
259 threshold_restart_bank(b, 1, 0);
260 affinity_restore(oldmask);
261 return 1;
262}
263
264#define THRESHOLD_ATTR(_name,_mode,_show,_store) { \
265 .attr = {.name = __stringify(_name), .mode = _mode }, \
266 .show = _show, \
267 .store = _store, \
268};
269
270#define ATTR_FIELDS(name) \
271 static struct threshold_attr name = \
272 THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
273
274ATTR_FIELDS(interrupt_enable);
275ATTR_FIELDS(threshold_limit);
276ATTR_FIELDS(error_count);
277
278static struct attribute *default_attrs[] = {
279 &interrupt_enable.attr,
280 &threshold_limit.attr,
281 &error_count.attr,
282 NULL
283};
284
285#define to_bank(k) container_of(k,struct threshold_bank,kobj)
286#define to_attr(a) container_of(a,struct threshold_attr,attr)
287
288static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
289{
290 struct threshold_bank *b = to_bank(kobj);
291 struct threshold_attr *a = to_attr(attr);
292 ssize_t ret;
293 ret = a->show ? a->show(b, buf) : -EIO;
294 return ret;
295}
296
297static ssize_t store(struct kobject *kobj, struct attribute *attr,
298 const char *buf, size_t count)
299{
300 struct threshold_bank *b = to_bank(kobj);
301 struct threshold_attr *a = to_attr(attr);
302 ssize_t ret;
303 ret = a->store ? a->store(b, buf, count) : -EIO;
304 return ret;
305}
306
307static struct sysfs_ops threshold_ops = {
308 .show = show,
309 .store = store,
310};
311
312static struct kobj_type threshold_ktype = {
313 .sysfs_ops = &threshold_ops,
314 .default_attrs = default_attrs,
315};
316
317/* symlinks sibling shared banks to first core. first core owns dir/files. */
318static __cpuinit int threshold_create_bank(unsigned int cpu, int bank)
319{
320 int err = 0;
321 struct threshold_bank *b = 0;
322
323#ifdef CONFIG_SMP
324 if (cpu_core_id[cpu] && shared_bank[bank]) { /* symlink */
325 char name[16];
326 unsigned lcpu = first_cpu(cpu_core_map[cpu]);
327 if (cpu_core_id[lcpu])
328 goto out; /* first core not up yet */
329
330 b = per_cpu(threshold_banks, lcpu)[bank];
331 if (!b)
332 goto out;
333 sprintf(name, "bank%i", bank);
334 err = sysfs_create_link(&per_cpu(device_threshold, cpu).kobj,
335 &b->kobj, name);
336 if (err)
337 goto out;
338 per_cpu(threshold_banks, cpu)[bank] = b;
339 goto out;
340 }
341#endif
342
343 b = kmalloc(sizeof(struct threshold_bank), GFP_KERNEL);
344 if (!b) {
345 err = -ENOMEM;
346 goto out;
347 }
348 memset(b, 0, sizeof(struct threshold_bank));
349
350 b->cpu = cpu;
351 b->bank = bank;
352 b->interrupt_enable = 0;
353 b->threshold_limit = THRESHOLD_MAX;
354 kobject_set_name(&b->kobj, "bank%i", bank);
355 b->kobj.parent = &per_cpu(device_threshold, cpu).kobj;
356 b->kobj.ktype = &threshold_ktype;
357
358 err = kobject_register(&b->kobj);
359 if (err) {
360 kfree(b);
361 goto out;
362 }
363 per_cpu(threshold_banks, cpu)[bank] = b;
364 out:
365 return err;
366}
367
368/* create dir/files for all valid threshold banks */
369static __cpuinit int threshold_create_device(unsigned int cpu)
370{
371 int bank;
372 int err = 0;
373
374 per_cpu(device_threshold, cpu).id = cpu;
375 per_cpu(device_threshold, cpu).cls = &threshold_sysclass;
376 err = sysdev_register(&per_cpu(device_threshold, cpu));
377 if (err)
378 goto out;
379
380 for (bank = 0; bank < NR_BANKS; ++bank) {
381 if (!(per_cpu(bank_map, cpu) & 1 << bank))
382 continue;
383 err = threshold_create_bank(cpu, bank);
384 if (err)
385 goto out;
386 }
387 out:
388 return err;
389}
390
391#ifdef CONFIG_HOTPLUG_CPU
392/*
393 * let's be hotplug friendly.
394 * in case of multiple core processors, the first core always takes ownership
395 * of shared sysfs dir/files, and rest of the cores will be symlinked to it.
396 */
397
398/* cpu hotplug call removes all symlinks before first core dies */
399static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank)
400{
401 struct threshold_bank *b;
402 char name[16];
403
404 b = per_cpu(threshold_banks, cpu)[bank];
405 if (!b)
406 return;
407 if (shared_bank[bank] && atomic_read(&b->kobj.kref.refcount) > 2) {
408 sprintf(name, "bank%i", bank);
409 sysfs_remove_link(&per_cpu(device_threshold, cpu).kobj, name);
410 per_cpu(threshold_banks, cpu)[bank] = 0;
411 } else {
412 kobject_unregister(&b->kobj);
413 kfree(per_cpu(threshold_banks, cpu)[bank]);
414 }
415}
416
417static __cpuinit void threshold_remove_device(unsigned int cpu)
418{
419 int bank;
420
421 for (bank = 0; bank < NR_BANKS; ++bank) {
422 if (!(per_cpu(bank_map, cpu) & 1 << bank))
423 continue;
424 threshold_remove_bank(cpu, bank);
425 }
426 sysdev_unregister(&per_cpu(device_threshold, cpu));
427}
428
429/* link all existing siblings when first core comes up */
430static __cpuinit int threshold_create_symlinks(unsigned int cpu)
431{
432 int bank, err = 0;
433 unsigned int lcpu = 0;
434
435 if (cpu_core_id[cpu])
436 return 0;
437 for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
438 if (lcpu == cpu)
439 continue;
440 for (bank = 0; bank < NR_BANKS; ++bank) {
441 if (!(per_cpu(bank_map, cpu) & 1 << bank))
442 continue;
443 if (!shared_bank[bank])
444 continue;
445 err = threshold_create_bank(lcpu, bank);
446 }
447 }
448 return err;
449}
450
451/* remove all symlinks before first core dies. */
452static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
453{
454 int bank;
455 unsigned int lcpu = 0;
456 if (cpu_core_id[cpu])
457 return;
458 for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
459 if (lcpu == cpu)
460 continue;
461 for (bank = 0; bank < NR_BANKS; ++bank) {
462 if (!(per_cpu(bank_map, cpu) & 1 << bank))
463 continue;
464 if (!shared_bank[bank])
465 continue;
466 threshold_remove_bank(lcpu, bank);
467 }
468 }
469}
470#else /* !CONFIG_HOTPLUG_CPU */
471static __cpuinit void threshold_create_symlinks(unsigned int cpu)
472{
473}
474static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
475{
476}
477static void threshold_remove_device(unsigned int cpu)
478{
479}
480#endif
481
482/* get notified when a cpu comes on/off */
483static __cpuinit int threshold_cpu_callback(struct notifier_block *nfb,
484 unsigned long action, void *hcpu)
485{
486 /* cpu was unsigned int to begin with */
487 unsigned int cpu = (unsigned long)hcpu;
488
489 if (cpu >= NR_CPUS)
490 goto out;
491
492 switch (action) {
493 case CPU_ONLINE:
494 threshold_create_device(cpu);
495 threshold_create_symlinks(cpu);
496 break;
497 case CPU_DOWN_PREPARE:
498 threshold_remove_symlinks(cpu);
499 break;
500 case CPU_DOWN_FAILED:
501 threshold_create_symlinks(cpu);
502 break;
503 case CPU_DEAD:
504 threshold_remove_device(cpu);
505 break;
506 default:
507 break;
508 }
509 out:
510 return NOTIFY_OK;
511}
512
513static struct notifier_block threshold_cpu_notifier = {
514 .notifier_call = threshold_cpu_callback,
515};
516
517static __init int threshold_init_device(void)
518{
519 int err;
520 int lcpu = 0;
521
522 err = sysdev_class_register(&threshold_sysclass);
523 if (err)
524 goto out;
525
526 /* to hit CPUs online before the notifier is up */
527 for_each_online_cpu(lcpu) {
528 err = threshold_create_device(lcpu);
529 if (err)
530 goto out;
531 }
532 register_cpu_notifier(&threshold_cpu_notifier);
533
534 out:
535 return err;
536}
537
538device_initcall(threshold_init_device);
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index f16d38d09daf..1105250bf02c 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -42,7 +42,7 @@ int acpi_found_madt;
42 * Various Linux-internal data structures created from the 42 * Various Linux-internal data structures created from the
43 * MP-table. 43 * MP-table.
44 */ 44 */
45int apic_version [MAX_APICS]; 45unsigned char apic_version [MAX_APICS];
46unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; 46unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
47int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; 47int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
48 48
@@ -65,7 +65,9 @@ unsigned long mp_lapic_addr = 0;
65/* Processor that is doing the boot up */ 65/* Processor that is doing the boot up */
66unsigned int boot_cpu_id = -1U; 66unsigned int boot_cpu_id = -1U;
67/* Internal processor count */ 67/* Internal processor count */
68static unsigned int num_processors = 0; 68unsigned int num_processors __initdata = 0;
69
70unsigned disabled_cpus __initdata;
69 71
70/* Bitmask of physically existing CPUs */ 72/* Bitmask of physically existing CPUs */
71physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; 73physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
@@ -106,11 +108,14 @@ static int __init mpf_checksum(unsigned char *mp, int len)
106 108
107static void __init MP_processor_info (struct mpc_config_processor *m) 109static void __init MP_processor_info (struct mpc_config_processor *m)
108{ 110{
109 int ver, cpu; 111 int cpu;
112 unsigned char ver;
110 static int found_bsp=0; 113 static int found_bsp=0;
111 114
112 if (!(m->mpc_cpuflag & CPU_ENABLED)) 115 if (!(m->mpc_cpuflag & CPU_ENABLED)) {
116 disabled_cpus++;
113 return; 117 return;
118 }
114 119
115 printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n", 120 printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",
116 m->mpc_apicid, 121 m->mpc_apicid,
@@ -129,12 +134,14 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
129 } 134 }
130 135
131 cpu = num_processors++; 136 cpu = num_processors++;
132 137
133 if (m->mpc_apicid > MAX_APICS) { 138#if MAX_APICS < 255
139 if ((int)m->mpc_apicid > MAX_APICS) {
134 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", 140 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
135 m->mpc_apicid, MAX_APICS); 141 m->mpc_apicid, MAX_APICS);
136 return; 142 return;
137 } 143 }
144#endif
138 ver = m->mpc_apicver; 145 ver = m->mpc_apicver;
139 146
140 physid_set(m->mpc_apicid, phys_cpu_present_map); 147 physid_set(m->mpc_apicid, phys_cpu_present_map);
@@ -218,7 +225,7 @@ static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
218 m->mpc_irqtype, m->mpc_irqflag & 3, 225 m->mpc_irqtype, m->mpc_irqflag & 3,
219 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, 226 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
220 m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); 227 m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
221 if (++mp_irq_entries == MAX_IRQ_SOURCES) 228 if (++mp_irq_entries >= MAX_IRQ_SOURCES)
222 panic("Max # of irq sources exceeded!!\n"); 229 panic("Max # of irq sources exceeded!!\n");
223} 230}
224 231
@@ -549,7 +556,7 @@ void __init get_smp_config (void)
549 * Read the physical hardware table. Anything here will 556 * Read the physical hardware table. Anything here will
550 * override the defaults. 557 * override the defaults.
551 */ 558 */
552 if (!smp_read_mpc((void *)(unsigned long)mpf->mpf_physptr)) { 559 if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) {
553 smp_found_config = 0; 560 smp_found_config = 0;
554 printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); 561 printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
555 printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); 562 printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 88be97c96987..2e28e855ec3c 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -220,6 +220,12 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
220 uses the normal dma_mask for alloc_coherent. */ 220 uses the normal dma_mask for alloc_coherent. */
221 dma_mask &= *dev->dma_mask; 221 dma_mask &= *dev->dma_mask;
222 222
223 /* Why <=? Even when the mask is smaller than 4GB it is often larger
224 than 16MB and in this case we have a chance of finding fitting memory
225 in the next higher zone first. If not retry with true GFP_DMA. -AK */
226 if (dma_mask <= 0xffffffff)
227 gfp |= GFP_DMA32;
228
223 again: 229 again:
224 memory = dma_alloc_pages(dev, gfp, get_order(size)); 230 memory = dma_alloc_pages(dev, gfp, get_order(size));
225 if (memory == NULL) 231 if (memory == NULL)
@@ -245,7 +251,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
245 } 251 }
246 252
247 if (!(gfp & GFP_DMA)) { 253 if (!(gfp & GFP_DMA)) {
248 gfp |= GFP_DMA; 254 gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
249 goto again; 255 goto again;
250 } 256 }
251 return NULL; 257 return NULL;
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 59be85d9a4bc..5afd63e8cef7 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -144,7 +144,8 @@ void cpu_idle_wait(void)
144 do { 144 do {
145 ssleep(1); 145 ssleep(1);
146 for_each_online_cpu(cpu) { 146 for_each_online_cpu(cpu) {
147 if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) 147 if (cpu_isset(cpu, map) &&
148 !per_cpu(cpu_idle_state, cpu))
148 cpu_clear(cpu, map); 149 cpu_clear(cpu, map);
149 } 150 }
150 cpus_and(map, map, cpu_online_map); 151 cpus_and(map, map, cpu_online_map);
@@ -275,7 +276,8 @@ void __show_regs(struct pt_regs * regs)
275 system_utsname.version); 276 system_utsname.version);
276 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); 277 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
277 printk_address(regs->rip); 278 printk_address(regs->rip);
278 printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags); 279 printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
280 regs->eflags);
279 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", 281 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
280 regs->rax, regs->rbx, regs->rcx); 282 regs->rax, regs->rbx, regs->rcx);
281 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", 283 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
@@ -427,15 +429,14 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
427 struct pt_regs * childregs; 429 struct pt_regs * childregs;
428 struct task_struct *me = current; 430 struct task_struct *me = current;
429 431
430 childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1; 432 childregs = ((struct pt_regs *)
431 433 (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
432 *childregs = *regs; 434 *childregs = *regs;
433 435
434 childregs->rax = 0; 436 childregs->rax = 0;
435 childregs->rsp = rsp; 437 childregs->rsp = rsp;
436 if (rsp == ~0UL) { 438 if (rsp == ~0UL)
437 childregs->rsp = (unsigned long)childregs; 439 childregs->rsp = (unsigned long)childregs;
438 }
439 440
440 p->thread.rsp = (unsigned long) childregs; 441 p->thread.rsp = (unsigned long) childregs;
441 p->thread.rsp0 = (unsigned long) (childregs+1); 442 p->thread.rsp0 = (unsigned long) (childregs+1);
@@ -457,7 +458,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
457 p->thread.io_bitmap_max = 0; 458 p->thread.io_bitmap_max = 0;
458 return -ENOMEM; 459 return -ENOMEM;
459 } 460 }
460 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES); 461 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
462 IO_BITMAP_BYTES);
461 } 463 }
462 464
463 /* 465 /*
@@ -494,7 +496,8 @@ out:
494 * - fold all the options into a flag word and test it with a single test. 496 * - fold all the options into a flag word and test it with a single test.
495 * - could test fs/gs bitsliced 497 * - could test fs/gs bitsliced
496 */ 498 */
497struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p) 499struct task_struct *
500__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
498{ 501{
499 struct thread_struct *prev = &prev_p->thread, 502 struct thread_struct *prev = &prev_p->thread,
500 *next = &next_p->thread; 503 *next = &next_p->thread;
@@ -565,7 +568,8 @@ struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *
565 prev->userrsp = read_pda(oldrsp); 568 prev->userrsp = read_pda(oldrsp);
566 write_pda(oldrsp, next->userrsp); 569 write_pda(oldrsp, next->userrsp);
567 write_pda(pcurrent, next_p); 570 write_pda(pcurrent, next_p);
568 write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET); 571 write_pda(kernelstack,
572 (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
569 573
570 /* 574 /*
571 * Now maybe reload the debug registers 575 * Now maybe reload the debug registers
@@ -646,7 +650,9 @@ asmlinkage long sys_fork(struct pt_regs *regs)
646 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL); 650 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
647} 651}
648 652
649asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) 653asmlinkage long
654sys_clone(unsigned long clone_flags, unsigned long newsp,
655 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
650{ 656{
651 if (!newsp) 657 if (!newsp)
652 newsp = regs->rsp; 658 newsp = regs->rsp;
@@ -682,7 +688,8 @@ unsigned long get_wchan(struct task_struct *p)
682 return 0; 688 return 0;
683 fp = *(u64 *)(p->thread.rsp); 689 fp = *(u64 *)(p->thread.rsp);
684 do { 690 do {
685 if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE) 691 if (fp < (unsigned long)stack ||
692 fp > (unsigned long)stack+THREAD_SIZE)
686 return 0; 693 return 0;
687 rip = *(u64 *)(fp+8); 694 rip = *(u64 *)(fp+8);
688 if (!in_sched_functions(rip)) 695 if (!in_sched_functions(rip))
@@ -717,8 +724,8 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
717 task->thread.gsindex = 0; 724 task->thread.gsindex = 0;
718 task->thread.gs = addr; 725 task->thread.gs = addr;
719 if (doit) { 726 if (doit) {
720 load_gs_index(0); 727 load_gs_index(0);
721 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); 728 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
722 } 729 }
723 } 730 }
724 put_cpu(); 731 put_cpu();
@@ -735,7 +742,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
735 set_32bit_tls(task, FS_TLS, addr); 742 set_32bit_tls(task, FS_TLS, addr);
736 if (doit) { 743 if (doit) {
737 load_TLS(&task->thread, cpu); 744 load_TLS(&task->thread, cpu);
738 asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL)); 745 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
739 } 746 }
740 task->thread.fsindex = FS_TLS_SEL; 747 task->thread.fsindex = FS_TLS_SEL;
741 task->thread.fs = 0; 748 task->thread.fs = 0;
@@ -745,8 +752,8 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
745 if (doit) { 752 if (doit) {
746 /* set the selector to 0 to not confuse 753 /* set the selector to 0 to not confuse
747 __switch_to */ 754 __switch_to */
748 asm volatile("movl %0,%%fs" :: "r" (0)); 755 asm volatile("movl %0,%%fs" :: "r" (0));
749 ret = checking_wrmsrl(MSR_FS_BASE, addr); 756 ret = checking_wrmsrl(MSR_FS_BASE, addr);
750 } 757 }
751 } 758 }
752 put_cpu(); 759 put_cpu();
@@ -755,9 +762,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
755 unsigned long base; 762 unsigned long base;
756 if (task->thread.fsindex == FS_TLS_SEL) 763 if (task->thread.fsindex == FS_TLS_SEL)
757 base = read_32bit_tls(task, FS_TLS); 764 base = read_32bit_tls(task, FS_TLS);
758 else if (doit) { 765 else if (doit)
759 rdmsrl(MSR_FS_BASE, base); 766 rdmsrl(MSR_FS_BASE, base);
760 } else 767 else
761 base = task->thread.fs; 768 base = task->thread.fs;
762 ret = put_user(base, (unsigned long __user *)addr); 769 ret = put_user(base, (unsigned long __user *)addr);
763 break; 770 break;
@@ -766,9 +773,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
766 unsigned long base; 773 unsigned long base;
767 if (task->thread.gsindex == GS_TLS_SEL) 774 if (task->thread.gsindex == GS_TLS_SEL)
768 base = read_32bit_tls(task, GS_TLS); 775 base = read_32bit_tls(task, GS_TLS);
769 else if (doit) { 776 else if (doit)
770 rdmsrl(MSR_KERNEL_GS_BASE, base); 777 rdmsrl(MSR_KERNEL_GS_BASE, base);
771 } else 778 else
772 base = task->thread.gs; 779 base = task->thread.gs;
773 ret = put_user(base, (unsigned long __user *)addr); 780 ret = put_user(base, (unsigned long __user *)addr);
774 break; 781 break;
diff --git a/arch/x86_64/kernel/reboot.c b/arch/x86_64/kernel/reboot.c
index 47f95687905f..75235ed2b31b 100644
--- a/arch/x86_64/kernel/reboot.c
+++ b/arch/x86_64/kernel/reboot.c
@@ -77,6 +77,7 @@ static inline void kb_wait(void)
77 77
78void machine_shutdown(void) 78void machine_shutdown(void)
79{ 79{
80 unsigned long flags;
80 /* Stop the cpus and apics */ 81 /* Stop the cpus and apics */
81#ifdef CONFIG_SMP 82#ifdef CONFIG_SMP
82 int reboot_cpu_id; 83 int reboot_cpu_id;
@@ -98,7 +99,7 @@ void machine_shutdown(void)
98 smp_send_stop(); 99 smp_send_stop();
99#endif 100#endif
100 101
101 local_irq_disable(); 102 local_irq_save(flags);
102 103
103#ifndef CONFIG_SMP 104#ifndef CONFIG_SMP
104 disable_local_APIC(); 105 disable_local_APIC();
@@ -106,7 +107,7 @@ void machine_shutdown(void)
106 107
107 disable_IO_APIC(); 108 disable_IO_APIC();
108 109
109 local_irq_enable(); 110 local_irq_restore(flags);
110} 111}
111 112
112void machine_emergency_restart(void) 113void machine_emergency_restart(void)
@@ -120,7 +121,7 @@ void machine_emergency_restart(void)
120 /* Could also try the reset bit in the Hammer NB */ 121 /* Could also try the reset bit in the Hammer NB */
121 switch (reboot_type) { 122 switch (reboot_type) {
122 case BOOT_KBD: 123 case BOOT_KBD:
123 for (i=0; i<100; i++) { 124 for (i=0; i<10; i++) {
124 kb_wait(); 125 kb_wait();
125 udelay(50); 126 udelay(50);
126 outb(0xfe,0x64); /* pulse reset low */ 127 outb(0xfe,0x64); /* pulse reset low */
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index da0bc3e7bdf5..750e01dcbdf4 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -60,6 +60,7 @@
60#include <asm/setup.h> 60#include <asm/setup.h>
61#include <asm/mach_apic.h> 61#include <asm/mach_apic.h>
62#include <asm/numa.h> 62#include <asm/numa.h>
63#include <asm/sections.h>
63 64
64/* 65/*
65 * Machine setup.. 66 * Machine setup..
@@ -103,7 +104,6 @@ struct edid_info edid_info;
103struct e820map e820; 104struct e820map e820;
104 105
105extern int root_mountflags; 106extern int root_mountflags;
106extern char _text, _etext, _edata, _end;
107 107
108char command_line[COMMAND_LINE_SIZE]; 108char command_line[COMMAND_LINE_SIZE];
109 109
@@ -412,7 +412,6 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
412{ 412{
413 unsigned long bootmap_size, bootmap; 413 unsigned long bootmap_size, bootmap;
414 414
415 memory_present(0, start_pfn, end_pfn);
416 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; 415 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
417 bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); 416 bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
418 if (bootmap == -1L) 417 if (bootmap == -1L)
@@ -571,6 +570,8 @@ void __init setup_arch(char **cmdline_p)
571 570
572 init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); 571 init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
573 572
573 zap_low_mappings(0);
574
574#ifdef CONFIG_ACPI 575#ifdef CONFIG_ACPI
575 /* 576 /*
576 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). 577 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
@@ -657,8 +658,6 @@ void __init setup_arch(char **cmdline_p)
657 } 658 }
658#endif 659#endif
659 660
660 sparse_init();
661
662 paging_init(); 661 paging_init();
663 662
664 check_ioapic(); 663 check_ioapic();
@@ -793,7 +792,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
793#endif 792#endif
794 793
795 bits = 0; 794 bits = 0;
796 while ((1 << bits) < c->x86_num_cores) 795 while ((1 << bits) < c->x86_max_cores)
797 bits++; 796 bits++;
798 797
799 /* Low order bits define the core id (index of core in socket) */ 798 /* Low order bits define the core id (index of core in socket) */
@@ -823,10 +822,10 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
823 if (!node_online(node)) 822 if (!node_online(node))
824 node = nearby_node(apicid); 823 node = nearby_node(apicid);
825 } 824 }
826 cpu_to_node[cpu] = node; 825 numa_set_node(cpu, node);
827 826
828 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", 827 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
829 cpu, c->x86_num_cores, node, cpu_core_id[cpu]); 828 cpu, c->x86_max_cores, node, cpu_core_id[cpu]);
830#endif 829#endif
831#endif 830#endif
832} 831}
@@ -875,9 +874,9 @@ static int __init init_amd(struct cpuinfo_x86 *c)
875 display_cacheinfo(c); 874 display_cacheinfo(c);
876 875
877 if (c->extended_cpuid_level >= 0x80000008) { 876 if (c->extended_cpuid_level >= 0x80000008) {
878 c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; 877 c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
879 if (c->x86_num_cores & (c->x86_num_cores - 1)) 878 if (c->x86_max_cores & (c->x86_max_cores - 1))
880 c->x86_num_cores = 1; 879 c->x86_max_cores = 1;
881 880
882 amd_detect_cmp(c); 881 amd_detect_cmp(c);
883 } 882 }
@@ -889,54 +888,44 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
889{ 888{
890#ifdef CONFIG_SMP 889#ifdef CONFIG_SMP
891 u32 eax, ebx, ecx, edx; 890 u32 eax, ebx, ecx, edx;
892 int index_msb, tmp; 891 int index_msb, core_bits;
893 int cpu = smp_processor_id(); 892 int cpu = smp_processor_id();
894 893
894 cpuid(1, &eax, &ebx, &ecx, &edx);
895
896 c->apicid = phys_pkg_id(0);
897
895 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) 898 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
896 return; 899 return;
897 900
898 cpuid(1, &eax, &ebx, &ecx, &edx);
899 smp_num_siblings = (ebx & 0xff0000) >> 16; 901 smp_num_siblings = (ebx & 0xff0000) >> 16;
900 902
901 if (smp_num_siblings == 1) { 903 if (smp_num_siblings == 1) {
902 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); 904 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
903 } else if (smp_num_siblings > 1) { 905 } else if (smp_num_siblings > 1 ) {
904 index_msb = 31; 906
905 /*
906 * At this point we only support two siblings per
907 * processor package.
908 */
909 if (smp_num_siblings > NR_CPUS) { 907 if (smp_num_siblings > NR_CPUS) {
910 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); 908 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
911 smp_num_siblings = 1; 909 smp_num_siblings = 1;
912 return; 910 return;
913 } 911 }
914 tmp = smp_num_siblings; 912
915 while ((tmp & 0x80000000 ) == 0) { 913 index_msb = get_count_order(smp_num_siblings);
916 tmp <<=1 ;
917 index_msb--;
918 }
919 if (smp_num_siblings & (smp_num_siblings - 1))
920 index_msb++;
921 phys_proc_id[cpu] = phys_pkg_id(index_msb); 914 phys_proc_id[cpu] = phys_pkg_id(index_msb);
922 915
923 printk(KERN_INFO "CPU: Physical Processor ID: %d\n", 916 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
924 phys_proc_id[cpu]); 917 phys_proc_id[cpu]);
925 918
926 smp_num_siblings = smp_num_siblings / c->x86_num_cores; 919 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
927 920
928 tmp = smp_num_siblings; 921 index_msb = get_count_order(smp_num_siblings) ;
929 index_msb = 31;
930 while ((tmp & 0x80000000) == 0) {
931 tmp <<=1 ;
932 index_msb--;
933 }
934 if (smp_num_siblings & (smp_num_siblings - 1))
935 index_msb++;
936 922
937 cpu_core_id[cpu] = phys_pkg_id(index_msb); 923 core_bits = get_count_order(c->x86_max_cores);
938 924
939 if (c->x86_num_cores > 1) 925 cpu_core_id[cpu] = phys_pkg_id(index_msb) &
926 ((1 << core_bits) - 1);
927
928 if (c->x86_max_cores > 1)
940 printk(KERN_INFO "CPU: Processor Core ID: %d\n", 929 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
941 cpu_core_id[cpu]); 930 cpu_core_id[cpu]);
942 } 931 }
@@ -975,7 +964,7 @@ static void srat_detect_node(void)
975 node = apicid_to_node[hard_smp_processor_id()]; 964 node = apicid_to_node[hard_smp_processor_id()];
976 if (node == NUMA_NO_NODE) 965 if (node == NUMA_NO_NODE)
977 node = 0; 966 node = 0;
978 cpu_to_node[cpu] = node; 967 numa_set_node(cpu, node);
979 968
980 if (acpi_numa > 0) 969 if (acpi_numa > 0)
981 printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node); 970 printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node);
@@ -993,13 +982,18 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
993 unsigned eax = cpuid_eax(0x80000008); 982 unsigned eax = cpuid_eax(0x80000008);
994 c->x86_virt_bits = (eax >> 8) & 0xff; 983 c->x86_virt_bits = (eax >> 8) & 0xff;
995 c->x86_phys_bits = eax & 0xff; 984 c->x86_phys_bits = eax & 0xff;
985 /* CPUID workaround for Intel 0F34 CPU */
986 if (c->x86_vendor == X86_VENDOR_INTEL &&
987 c->x86 == 0xF && c->x86_model == 0x3 &&
988 c->x86_mask == 0x4)
989 c->x86_phys_bits = 36;
996 } 990 }
997 991
998 if (c->x86 == 15) 992 if (c->x86 == 15)
999 c->x86_cache_alignment = c->x86_clflush_size * 2; 993 c->x86_cache_alignment = c->x86_clflush_size * 2;
1000 if (c->x86 >= 15) 994 if (c->x86 >= 15)
1001 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); 995 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
1002 c->x86_num_cores = intel_num_cpu_cores(c); 996 c->x86_max_cores = intel_num_cpu_cores(c);
1003 997
1004 srat_detect_node(); 998 srat_detect_node();
1005} 999}
@@ -1037,7 +1031,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
1037 c->x86_model_id[0] = '\0'; /* Unset */ 1031 c->x86_model_id[0] = '\0'; /* Unset */
1038 c->x86_clflush_size = 64; 1032 c->x86_clflush_size = 64;
1039 c->x86_cache_alignment = c->x86_clflush_size; 1033 c->x86_cache_alignment = c->x86_clflush_size;
1040 c->x86_num_cores = 1; 1034 c->x86_max_cores = 1;
1041 c->extended_cpuid_level = 0; 1035 c->extended_cpuid_level = 0;
1042 memset(&c->x86_capability, 0, sizeof c->x86_capability); 1036 memset(&c->x86_capability, 0, sizeof c->x86_capability);
1043 1037
@@ -1060,10 +1054,10 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
1060 c->x86 = (tfms >> 8) & 0xf; 1054 c->x86 = (tfms >> 8) & 0xf;
1061 c->x86_model = (tfms >> 4) & 0xf; 1055 c->x86_model = (tfms >> 4) & 0xf;
1062 c->x86_mask = tfms & 0xf; 1056 c->x86_mask = tfms & 0xf;
1063 if (c->x86 == 0xf) { 1057 if (c->x86 == 0xf)
1064 c->x86 += (tfms >> 20) & 0xff; 1058 c->x86 += (tfms >> 20) & 0xff;
1059 if (c->x86 >= 0x6)
1065 c->x86_model += ((tfms >> 16) & 0xF) << 4; 1060 c->x86_model += ((tfms >> 16) & 0xF) << 4;
1066 }
1067 if (c->x86_capability[0] & (1<<19)) 1061 if (c->x86_capability[0] & (1<<19))
1068 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; 1062 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
1069 } else { 1063 } else {
@@ -1271,13 +1265,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
1271 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); 1265 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
1272 1266
1273#ifdef CONFIG_SMP 1267#ifdef CONFIG_SMP
1274 if (smp_num_siblings * c->x86_num_cores > 1) { 1268 if (smp_num_siblings * c->x86_max_cores > 1) {
1275 int cpu = c - cpu_data; 1269 int cpu = c - cpu_data;
1276 seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]); 1270 seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]);
1277 seq_printf(m, "siblings\t: %d\n", 1271 seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
1278 c->x86_num_cores * smp_num_siblings);
1279 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]); 1272 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]);
1280 seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores); 1273 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
1281 } 1274 }
1282#endif 1275#endif
1283 1276
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index 79190891fbc5..06dc354375c3 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -141,7 +141,6 @@ void pda_init(int cpu)
141 panic("cannot allocate irqstack for cpu %d", cpu); 141 panic("cannot allocate irqstack for cpu %d", cpu);
142 } 142 }
143 143
144 asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
145 144
146 pda->irqstackptr += IRQSTACKSIZE-64; 145 pda->irqstackptr += IRQSTACKSIZE-64;
147} 146}
@@ -197,6 +196,7 @@ void __cpuinit cpu_init (void)
197 /* CPU 0 is initialised in head64.c */ 196 /* CPU 0 is initialised in head64.c */
198 if (cpu != 0) { 197 if (cpu != 0) {
199 pda_init(cpu); 198 pda_init(cpu);
199 zap_low_mappings(cpu);
200 } else 200 } else
201 estacks = boot_exception_stacks; 201 estacks = boot_exception_stacks;
202 202
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
index d642fbf3da29..5876df116c92 100644
--- a/arch/x86_64/kernel/signal.c
+++ b/arch/x86_64/kernel/signal.c
@@ -110,6 +110,15 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned
110 COPY(r14); 110 COPY(r14);
111 COPY(r15); 111 COPY(r15);
112 112
113 /* Kernel saves and restores only the CS segment register on signals,
114 * which is the bare minimum needed to allow mixed 32/64-bit code.
115 * App's signal handler can save/restore other segments if needed. */
116 {
117 unsigned cs;
118 err |= __get_user(cs, &sc->cs);
119 regs->cs = cs | 3; /* Force into user mode */
120 }
121
113 { 122 {
114 unsigned int tmpflags; 123 unsigned int tmpflags;
115 err |= __get_user(tmpflags, &sc->eflags); 124 err |= __get_user(tmpflags, &sc->eflags);
@@ -187,6 +196,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned lo
187{ 196{
188 int err = 0; 197 int err = 0;
189 198
199 err |= __put_user(regs->cs, &sc->cs);
190 err |= __put_user(0, &sc->gs); 200 err |= __put_user(0, &sc->gs);
191 err |= __put_user(0, &sc->fs); 201 err |= __put_user(0, &sc->fs);
192 202
@@ -318,7 +328,14 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
318 328
319 regs->rsp = (unsigned long)frame; 329 regs->rsp = (unsigned long)frame;
320 330
331 /* Set up the CS register to run signal handlers in 64-bit mode,
332 even if the handler happens to be interrupting 32-bit code. */
333 regs->cs = __USER_CS;
334
335 /* This, by contrast, has nothing to do with segment registers -
336 see include/asm-x86_64/uaccess.h for details. */
321 set_fs(USER_DS); 337 set_fs(USER_DS);
338
322 regs->eflags &= ~TF_MASK; 339 regs->eflags &= ~TF_MASK;
323 if (test_thread_flag(TIF_SINGLESTEP)) 340 if (test_thread_flag(TIF_SINGLESTEP))
324 ptrace_notify(SIGTRAP); 341 ptrace_notify(SIGTRAP);
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index 9db9dda161b4..cfc3d9dccbd9 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -28,8 +28,6 @@
28#include <asm/proto.h> 28#include <asm/proto.h>
29#include <asm/apicdef.h> 29#include <asm/apicdef.h>
30 30
31#define __cpuinit __init
32
33/* 31/*
34 * Smarter SMP flushing macros. 32 * Smarter SMP flushing macros.
35 * c/o Linus Torvalds. 33 * c/o Linus Torvalds.
@@ -452,13 +450,14 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
452 450
453void smp_stop_cpu(void) 451void smp_stop_cpu(void)
454{ 452{
453 unsigned long flags;
455 /* 454 /*
456 * Remove this CPU: 455 * Remove this CPU:
457 */ 456 */
458 cpu_clear(smp_processor_id(), cpu_online_map); 457 cpu_clear(smp_processor_id(), cpu_online_map);
459 local_irq_disable(); 458 local_irq_save(flags);
460 disable_local_APIC(); 459 disable_local_APIC();
461 local_irq_enable(); 460 local_irq_restore(flags);
462} 461}
463 462
464static void smp_really_stop_cpu(void *dummy) 463static void smp_really_stop_cpu(void *dummy)
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index c4e59bbdc187..683c33f7b967 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -64,6 +64,7 @@
64int smp_num_siblings = 1; 64int smp_num_siblings = 1;
65/* Package ID of each logical CPU */ 65/* Package ID of each logical CPU */
66u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; 66u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
67/* core ID of each logical CPU */
67u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; 68u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
68 69
69/* Bitmask of currently online CPUs */ 70/* Bitmask of currently online CPUs */
@@ -87,7 +88,10 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
87/* Set when the idlers are all forked */ 88/* Set when the idlers are all forked */
88int smp_threads_ready; 89int smp_threads_ready;
89 90
91/* representing HT siblings of each logical CPU */
90cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; 92cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
93
94/* representing HT and core siblings of each logical CPU */
91cpumask_t cpu_core_map[NR_CPUS] __read_mostly; 95cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
92EXPORT_SYMBOL(cpu_core_map); 96EXPORT_SYMBOL(cpu_core_map);
93 97
@@ -434,30 +438,59 @@ void __cpuinit smp_callin(void)
434 cpu_set(cpuid, cpu_callin_map); 438 cpu_set(cpuid, cpu_callin_map);
435} 439}
436 440
441/* representing cpus for which sibling maps can be computed */
442static cpumask_t cpu_sibling_setup_map;
443
437static inline void set_cpu_sibling_map(int cpu) 444static inline void set_cpu_sibling_map(int cpu)
438{ 445{
439 int i; 446 int i;
447 struct cpuinfo_x86 *c = cpu_data;
448
449 cpu_set(cpu, cpu_sibling_setup_map);
440 450
441 if (smp_num_siblings > 1) { 451 if (smp_num_siblings > 1) {
442 for_each_cpu(i) { 452 for_each_cpu_mask(i, cpu_sibling_setup_map) {
443 if (cpu_core_id[cpu] == cpu_core_id[i]) { 453 if (phys_proc_id[cpu] == phys_proc_id[i] &&
454 cpu_core_id[cpu] == cpu_core_id[i]) {
444 cpu_set(i, cpu_sibling_map[cpu]); 455 cpu_set(i, cpu_sibling_map[cpu]);
445 cpu_set(cpu, cpu_sibling_map[i]); 456 cpu_set(cpu, cpu_sibling_map[i]);
457 cpu_set(i, cpu_core_map[cpu]);
458 cpu_set(cpu, cpu_core_map[i]);
446 } 459 }
447 } 460 }
448 } else { 461 } else {
449 cpu_set(cpu, cpu_sibling_map[cpu]); 462 cpu_set(cpu, cpu_sibling_map[cpu]);
450 } 463 }
451 464
452 if (current_cpu_data.x86_num_cores > 1) { 465 if (current_cpu_data.x86_max_cores == 1) {
453 for_each_cpu(i) {
454 if (phys_proc_id[cpu] == phys_proc_id[i]) {
455 cpu_set(i, cpu_core_map[cpu]);
456 cpu_set(cpu, cpu_core_map[i]);
457 }
458 }
459 } else {
460 cpu_core_map[cpu] = cpu_sibling_map[cpu]; 466 cpu_core_map[cpu] = cpu_sibling_map[cpu];
467 c[cpu].booted_cores = 1;
468 return;
469 }
470
471 for_each_cpu_mask(i, cpu_sibling_setup_map) {
472 if (phys_proc_id[cpu] == phys_proc_id[i]) {
473 cpu_set(i, cpu_core_map[cpu]);
474 cpu_set(cpu, cpu_core_map[i]);
475 /*
476 * Does this new cpu bringup a new core?
477 */
478 if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
479 /*
480 * for each core in package, increment
481 * the booted_cores for this new cpu
482 */
483 if (first_cpu(cpu_sibling_map[i]) == i)
484 c[cpu].booted_cores++;
485 /*
486 * increment the core count for all
487 * the other cpus in this package
488 */
489 if (i != cpu)
490 c[i].booted_cores++;
491 } else if (i != cpu && !c[cpu].booted_cores)
492 c[cpu].booted_cores = c[i].booted_cores;
493 }
461 } 494 }
462} 495}
463 496
@@ -879,6 +912,9 @@ static __init void disable_smp(void)
879} 912}
880 913
881#ifdef CONFIG_HOTPLUG_CPU 914#ifdef CONFIG_HOTPLUG_CPU
915
916int additional_cpus __initdata = -1;
917
882/* 918/*
883 * cpu_possible_map should be static, it cannot change as cpu's 919 * cpu_possible_map should be static, it cannot change as cpu's
884 * are onlined, or offlined. The reason is per-cpu data-structures 920 * are onlined, or offlined. The reason is per-cpu data-structures
@@ -887,14 +923,38 @@ static __init void disable_smp(void)
887 * cpu_present_map on the other hand can change dynamically. 923 * cpu_present_map on the other hand can change dynamically.
888 * In case when cpu_hotplug is not compiled, then we resort to current 924 * In case when cpu_hotplug is not compiled, then we resort to current
889 * behaviour, which is cpu_possible == cpu_present. 925 * behaviour, which is cpu_possible == cpu_present.
890 * If cpu-hotplug is supported, then we need to preallocate for all
891 * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range.
892 * - Ashok Raj 926 * - Ashok Raj
927 *
928 * Three ways to find out the number of additional hotplug CPUs:
929 * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
930 * - otherwise use half of the available CPUs or 2, whatever is more.
931 * - The user can overwrite it with additional_cpus=NUM
932 * We do this because additional CPUs waste a lot of memory.
933 * -AK
893 */ 934 */
894__init void prefill_possible_map(void) 935__init void prefill_possible_map(void)
895{ 936{
896 int i; 937 int i;
897 for (i = 0; i < NR_CPUS; i++) 938 int possible;
939
940 if (additional_cpus == -1) {
941 if (disabled_cpus > 0) {
942 additional_cpus = disabled_cpus;
943 } else {
944 additional_cpus = num_processors / 2;
945 if (additional_cpus == 0)
946 additional_cpus = 2;
947 }
948 }
949 possible = num_processors + additional_cpus;
950 if (possible > NR_CPUS)
951 possible = NR_CPUS;
952
953 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
954 possible,
955 max_t(int, possible - num_processors, 0));
956
957 for (i = 0; i < possible; i++)
898 cpu_set(i, cpu_possible_map); 958 cpu_set(i, cpu_possible_map);
899} 959}
900#endif 960#endif
@@ -965,6 +1025,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
965 nmi_watchdog_default(); 1025 nmi_watchdog_default();
966 current_cpu_data = boot_cpu_data; 1026 current_cpu_data = boot_cpu_data;
967 current_thread_info()->cpu = 0; /* needed? */ 1027 current_thread_info()->cpu = 0; /* needed? */
1028 set_cpu_sibling_map(0);
968 1029
969 if (smp_sanity_check(max_cpus) < 0) { 1030 if (smp_sanity_check(max_cpus) < 0) {
970 printk(KERN_INFO "SMP disabled\n"); 1031 printk(KERN_INFO "SMP disabled\n");
@@ -1008,8 +1069,6 @@ void __init smp_prepare_boot_cpu(void)
1008 int me = smp_processor_id(); 1069 int me = smp_processor_id();
1009 cpu_set(me, cpu_online_map); 1070 cpu_set(me, cpu_online_map);
1010 cpu_set(me, cpu_callout_map); 1071 cpu_set(me, cpu_callout_map);
1011 cpu_set(0, cpu_sibling_map[0]);
1012 cpu_set(0, cpu_core_map[0]);
1013 per_cpu(cpu_state, me) = CPU_ONLINE; 1072 per_cpu(cpu_state, me) = CPU_ONLINE;
1014} 1073}
1015 1074
@@ -1062,9 +1121,6 @@ int __cpuinit __cpu_up(unsigned int cpu)
1062 */ 1121 */
1063void __init smp_cpus_done(unsigned int max_cpus) 1122void __init smp_cpus_done(unsigned int max_cpus)
1064{ 1123{
1065#ifndef CONFIG_HOTPLUG_CPU
1066 zap_low_mappings();
1067#endif
1068 smp_cleanup_boot(); 1124 smp_cleanup_boot();
1069 1125
1070#ifdef CONFIG_X86_IO_APIC 1126#ifdef CONFIG_X86_IO_APIC
@@ -1081,15 +1137,24 @@ void __init smp_cpus_done(unsigned int max_cpus)
1081static void remove_siblinginfo(int cpu) 1137static void remove_siblinginfo(int cpu)
1082{ 1138{
1083 int sibling; 1139 int sibling;
1140 struct cpuinfo_x86 *c = cpu_data;
1084 1141
1142 for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
1143 cpu_clear(cpu, cpu_core_map[sibling]);
1144 /*
1145 * last thread sibling in this cpu core going down
1146 */
1147 if (cpus_weight(cpu_sibling_map[cpu]) == 1)
1148 c[sibling].booted_cores--;
1149 }
1150
1085 for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) 1151 for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
1086 cpu_clear(cpu, cpu_sibling_map[sibling]); 1152 cpu_clear(cpu, cpu_sibling_map[sibling]);
1087 for_each_cpu_mask(sibling, cpu_core_map[cpu])
1088 cpu_clear(cpu, cpu_core_map[sibling]);
1089 cpus_clear(cpu_sibling_map[cpu]); 1153 cpus_clear(cpu_sibling_map[cpu]);
1090 cpus_clear(cpu_core_map[cpu]); 1154 cpus_clear(cpu_core_map[cpu]);
1091 phys_proc_id[cpu] = BAD_APICID; 1155 phys_proc_id[cpu] = BAD_APICID;
1092 cpu_core_id[cpu] = BAD_APICID; 1156 cpu_core_id[cpu] = BAD_APICID;
1157 cpu_clear(cpu, cpu_sibling_setup_map);
1093} 1158}
1094 1159
1095void remove_cpu_from_maps(void) 1160void remove_cpu_from_maps(void)
@@ -1153,6 +1218,12 @@ void __cpu_die(unsigned int cpu)
1153 printk(KERN_ERR "CPU %u didn't die...\n", cpu); 1218 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1154} 1219}
1155 1220
1221static __init int setup_additional_cpus(char *s)
1222{
1223 return get_option(&s, &additional_cpus);
1224}
1225__setup("additional_cpus=", setup_additional_cpus);
1226
1156#else /* ... !CONFIG_HOTPLUG_CPU */ 1227#else /* ... !CONFIG_HOTPLUG_CPU */
1157 1228
1158int __cpu_disable(void) 1229int __cpu_disable(void)
diff --git a/arch/x86_64/kernel/sys_x86_64.c b/arch/x86_64/kernel/sys_x86_64.c
index cc7821c68851..6449ea8fe756 100644
--- a/arch/x86_64/kernel/sys_x86_64.c
+++ b/arch/x86_64/kernel/sys_x86_64.c
@@ -154,17 +154,3 @@ asmlinkage long sys_uname(struct new_utsname __user * name)
154 err |= copy_to_user(&name->machine, "i686", 5); 154 err |= copy_to_user(&name->machine, "i686", 5);
155 return err ? -EFAULT : 0; 155 return err ? -EFAULT : 0;
156} 156}
157
158asmlinkage long sys_time64(long __user * tloc)
159{
160 struct timeval now;
161 int i;
162
163 do_gettimeofday(&now);
164 i = now.tv_sec;
165 if (tloc) {
166 if (put_user(i,tloc))
167 i = -EFAULT;
168 }
169 return i;
170}
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index b5e09e6b5536..bf337f493189 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -428,19 +428,6 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
428{ 428{
429 conditional_sti(regs); 429 conditional_sti(regs);
430 430
431#ifdef CONFIG_CHECKING
432 {
433 unsigned long gs;
434 struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
435 rdmsrl(MSR_GS_BASE, gs);
436 if (gs != (unsigned long)pda) {
437 wrmsrl(MSR_GS_BASE, pda);
438 printk("%s: wrong gs %lx expected %p rip %lx\n", str, gs, pda,
439 regs->rip);
440 }
441 }
442#endif
443
444 if (user_mode(regs)) { 431 if (user_mode(regs)) {
445 struct task_struct *tsk = current; 432 struct task_struct *tsk = current;
446 433
@@ -513,20 +500,6 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
513{ 500{
514 conditional_sti(regs); 501 conditional_sti(regs);
515 502
516#ifdef CONFIG_CHECKING
517 {
518 unsigned long gs;
519 struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
520 rdmsrl(MSR_GS_BASE, gs);
521 if (gs != (unsigned long)pda) {
522 wrmsrl(MSR_GS_BASE, pda);
523 oops_in_progress++;
524 printk("general protection handler: wrong gs %lx expected %p\n", gs, pda);
525 oops_in_progress--;
526 }
527 }
528#endif
529
530 if (user_mode(regs)) { 503 if (user_mode(regs)) {
531 struct task_struct *tsk = current; 504 struct task_struct *tsk = current;
532 505
@@ -665,19 +638,6 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
665 struct task_struct *tsk = current; 638 struct task_struct *tsk = current;
666 siginfo_t info; 639 siginfo_t info;
667 640
668#ifdef CONFIG_CHECKING
669 {
670 /* RED-PEN interaction with debugger - could destroy gs */
671 unsigned long gs;
672 struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
673 rdmsrl(MSR_GS_BASE, gs);
674 if (gs != (unsigned long)pda) {
675 wrmsrl(MSR_GS_BASE, pda);
676 printk("debug handler: wrong gs %lx expected %p\n", gs, pda);
677 }
678 }
679#endif
680
681 get_debugreg(condition, 6); 641 get_debugreg(condition, 6);
682 642
683 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, 643 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
@@ -888,6 +848,10 @@ asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
888{ 848{
889} 849}
890 850
851asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
852{
853}
854
891/* 855/*
892 * 'math_state_restore()' saves the current math information in the 856 * 'math_state_restore()' saves the current math information in the
893 * old math state array, and gets the new ones from the current task 857 * old math state array, and gets the new ones from the current task
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 6dd642cad2ef..58b19215b4b3 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -50,7 +50,7 @@ SECTIONS
50 *(.bss.page_aligned) 50 *(.bss.page_aligned)
51 *(.bss) 51 *(.bss)
52 } 52 }
53 __bss_end = .; 53 __bss_stop = .;
54 54
55 . = ALIGN(PAGE_SIZE); 55 . = ALIGN(PAGE_SIZE);
56 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); 56 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c
index fd99ddd009bc..4a54221e10bc 100644
--- a/arch/x86_64/kernel/x8664_ksyms.c
+++ b/arch/x86_64/kernel/x8664_ksyms.c
@@ -203,3 +203,6 @@ EXPORT_SYMBOL(flush_tlb_page);
203#endif 203#endif
204 204
205EXPORT_SYMBOL(cpu_khz); 205EXPORT_SYMBOL(cpu_khz);
206
207EXPORT_SYMBOL(load_gs_index);
208
diff --git a/arch/x86_64/lib/clear_page.S b/arch/x86_64/lib/clear_page.S
index 30a9da458c15..43d9fa136180 100644
--- a/arch/x86_64/lib/clear_page.S
+++ b/arch/x86_64/lib/clear_page.S
@@ -5,46 +5,8 @@
5 .globl clear_page 5 .globl clear_page
6 .p2align 4 6 .p2align 4
7clear_page: 7clear_page:
8 xorl %eax,%eax
9 movl $4096/64,%ecx
10 .p2align 4
11.Lloop:
12 decl %ecx
13#define PUT(x) movq %rax,x*8(%rdi)
14 movq %rax,(%rdi)
15 PUT(1)
16 PUT(2)
17 PUT(3)
18 PUT(4)
19 PUT(5)
20 PUT(6)
21 PUT(7)
22 leaq 64(%rdi),%rdi
23 jnz .Lloop
24 nop
25 ret
26clear_page_end:
27
28 /* C stepping K8 run faster using the string instructions.
29 It is also a lot simpler. Use this when possible */
30
31#include <asm/cpufeature.h>
32
33 .section .altinstructions,"a"
34 .align 8
35 .quad clear_page
36 .quad clear_page_c
37 .byte X86_FEATURE_K8_C
38 .byte clear_page_end-clear_page
39 .byte clear_page_c_end-clear_page_c
40 .previous
41
42 .section .altinstr_replacement,"ax"
43clear_page_c:
44 movl $4096/8,%ecx 8 movl $4096/8,%ecx
45 xorl %eax,%eax 9 xorl %eax,%eax
46 rep 10 rep
47 stosq 11 stosq
48 ret 12 ret
49clear_page_c_end:
50 .previous
diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S
index dd3aa47b6bf5..621a19769406 100644
--- a/arch/x86_64/lib/copy_page.S
+++ b/arch/x86_64/lib/copy_page.S
@@ -8,94 +8,7 @@
8 .globl copy_page 8 .globl copy_page
9 .p2align 4 9 .p2align 4
10copy_page: 10copy_page:
11 subq $3*8,%rsp
12 movq %rbx,(%rsp)
13 movq %r12,1*8(%rsp)
14 movq %r13,2*8(%rsp)
15
16 movl $(4096/64)-5,%ecx
17 .p2align 4
18.Loop64:
19 dec %rcx
20
21 movq (%rsi), %rax
22 movq 8 (%rsi), %rbx
23 movq 16 (%rsi), %rdx
24 movq 24 (%rsi), %r8
25 movq 32 (%rsi), %r9
26 movq 40 (%rsi), %r10
27 movq 48 (%rsi), %r11
28 movq 56 (%rsi), %r12
29
30 prefetcht0 5*64(%rsi)
31
32 movq %rax, (%rdi)
33 movq %rbx, 8 (%rdi)
34 movq %rdx, 16 (%rdi)
35 movq %r8, 24 (%rdi)
36 movq %r9, 32 (%rdi)
37 movq %r10, 40 (%rdi)
38 movq %r11, 48 (%rdi)
39 movq %r12, 56 (%rdi)
40
41 leaq 64 (%rsi), %rsi
42 leaq 64 (%rdi), %rdi
43
44 jnz .Loop64
45
46 movl $5,%ecx
47 .p2align 4
48.Loop2:
49 decl %ecx
50
51 movq (%rsi), %rax
52 movq 8 (%rsi), %rbx
53 movq 16 (%rsi), %rdx
54 movq 24 (%rsi), %r8
55 movq 32 (%rsi), %r9
56 movq 40 (%rsi), %r10
57 movq 48 (%rsi), %r11
58 movq 56 (%rsi), %r12
59
60 movq %rax, (%rdi)
61 movq %rbx, 8 (%rdi)
62 movq %rdx, 16 (%rdi)
63 movq %r8, 24 (%rdi)
64 movq %r9, 32 (%rdi)
65 movq %r10, 40 (%rdi)
66 movq %r11, 48 (%rdi)
67 movq %r12, 56 (%rdi)
68
69 leaq 64(%rdi),%rdi
70 leaq 64(%rsi),%rsi
71
72 jnz .Loop2
73
74 movq (%rsp),%rbx
75 movq 1*8(%rsp),%r12
76 movq 2*8(%rsp),%r13
77 addq $3*8,%rsp
78 ret
79
80 /* C stepping K8 run faster using the string copy instructions.
81 It is also a lot simpler. Use this when possible */
82
83#include <asm/cpufeature.h>
84
85 .section .altinstructions,"a"
86 .align 8
87 .quad copy_page
88 .quad copy_page_c
89 .byte X86_FEATURE_K8_C
90 .byte copy_page_c_end-copy_page_c
91 .byte copy_page_c_end-copy_page_c
92 .previous
93
94 .section .altinstr_replacement,"ax"
95copy_page_c:
96 movl $4096/8,%ecx 11 movl $4096/8,%ecx
97 rep 12 rep
98 movsq 13 movsq
99 ret 14 ret
100copy_page_c_end:
101 .previous
diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S
index c6c46494fef5..92dd80544602 100644
--- a/arch/x86_64/lib/memcpy.S
+++ b/arch/x86_64/lib/memcpy.S
@@ -11,6 +11,8 @@
11 * 11 *
12 * Output: 12 * Output:
13 * rax original destination 13 * rax original destination
14 *
15 * TODO: check best memcpy for PSC
14 */ 16 */
15 17
16 .globl __memcpy 18 .globl __memcpy
@@ -18,95 +20,6 @@
18 .p2align 4 20 .p2align 4
19__memcpy: 21__memcpy:
20memcpy: 22memcpy:
21 pushq %rbx
22 movq %rdi,%rax
23
24 movl %edx,%ecx
25 shrl $6,%ecx
26 jz .Lhandle_tail
27
28 .p2align 4
29.Lloop_64:
30 decl %ecx
31
32 movq (%rsi),%r11
33 movq 8(%rsi),%r8
34
35 movq %r11,(%rdi)
36 movq %r8,1*8(%rdi)
37
38 movq 2*8(%rsi),%r9
39 movq 3*8(%rsi),%r10
40
41 movq %r9,2*8(%rdi)
42 movq %r10,3*8(%rdi)
43
44 movq 4*8(%rsi),%r11
45 movq 5*8(%rsi),%r8
46
47 movq %r11,4*8(%rdi)
48 movq %r8,5*8(%rdi)
49
50 movq 6*8(%rsi),%r9
51 movq 7*8(%rsi),%r10
52
53 movq %r9,6*8(%rdi)
54 movq %r10,7*8(%rdi)
55
56 leaq 64(%rsi),%rsi
57 leaq 64(%rdi),%rdi
58 jnz .Lloop_64
59
60.Lhandle_tail:
61 movl %edx,%ecx
62 andl $63,%ecx
63 shrl $3,%ecx
64 jz .Lhandle_7
65 .p2align 4
66.Lloop_8:
67 decl %ecx
68 movq (%rsi),%r8
69 movq %r8,(%rdi)
70 leaq 8(%rdi),%rdi
71 leaq 8(%rsi),%rsi
72 jnz .Lloop_8
73
74.Lhandle_7:
75 movl %edx,%ecx
76 andl $7,%ecx
77 jz .Lende
78 .p2align 4
79.Lloop_1:
80 movb (%rsi),%r8b
81 movb %r8b,(%rdi)
82 incq %rdi
83 incq %rsi
84 decl %ecx
85 jnz .Lloop_1
86
87.Lende:
88 popq %rbx
89 ret
90.Lfinal:
91
92 /* C stepping K8 run faster using the string copy instructions.
93 It is also a lot simpler. Use this when possible */
94
95 .section .altinstructions,"a"
96 .align 8
97 .quad memcpy
98 .quad memcpy_c
99 .byte X86_FEATURE_K8_C
100 .byte .Lfinal-memcpy
101 .byte memcpy_c_end-memcpy_c
102 .previous
103
104 .section .altinstr_replacement,"ax"
105 /* rdi destination
106 * rsi source
107 * rdx count
108 */
109memcpy_c:
110 movq %rdi,%rax 23 movq %rdi,%rax
111 movl %edx,%ecx 24 movl %edx,%ecx
112 shrl $3,%ecx 25 shrl $3,%ecx
@@ -117,5 +30,3 @@ memcpy_c:
117 rep 30 rep
118 movsb 31 movsb
119 ret 32 ret
120memcpy_c_end:
121 .previous
diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S
index 4b4c40638640..2aa48f24ed1e 100644
--- a/arch/x86_64/lib/memset.S
+++ b/arch/x86_64/lib/memset.S
@@ -13,98 +13,6 @@
13 .p2align 4 13 .p2align 4
14memset: 14memset:
15__memset: 15__memset:
16 movq %rdi,%r10
17 movq %rdx,%r11
18
19 /* expand byte value */
20 movzbl %sil,%ecx
21 movabs $0x0101010101010101,%rax
22 mul %rcx /* with rax, clobbers rdx */
23
24 /* align dst */
25 movl %edi,%r9d
26 andl $7,%r9d
27 jnz .Lbad_alignment
28.Lafter_bad_alignment:
29
30 movl %r11d,%ecx
31 shrl $6,%ecx
32 jz .Lhandle_tail
33
34 .p2align 4
35.Lloop_64:
36 decl %ecx
37 movq %rax,(%rdi)
38 movq %rax,8(%rdi)
39 movq %rax,16(%rdi)
40 movq %rax,24(%rdi)
41 movq %rax,32(%rdi)
42 movq %rax,40(%rdi)
43 movq %rax,48(%rdi)
44 movq %rax,56(%rdi)
45 leaq 64(%rdi),%rdi
46 jnz .Lloop_64
47
48 /* Handle tail in loops. The loops should be faster than hard
49 to predict jump tables. */
50 .p2align 4
51.Lhandle_tail:
52 movl %r11d,%ecx
53 andl $63&(~7),%ecx
54 jz .Lhandle_7
55 shrl $3,%ecx
56 .p2align 4
57.Lloop_8:
58 decl %ecx
59 movq %rax,(%rdi)
60 leaq 8(%rdi),%rdi
61 jnz .Lloop_8
62
63.Lhandle_7:
64 movl %r11d,%ecx
65 andl $7,%ecx
66 jz .Lende
67 .p2align 4
68.Lloop_1:
69 decl %ecx
70 movb %al,(%rdi)
71 leaq 1(%rdi),%rdi
72 jnz .Lloop_1
73
74.Lende:
75 movq %r10,%rax
76 ret
77
78.Lbad_alignment:
79 cmpq $7,%r11
80 jbe .Lhandle_7
81 movq %rax,(%rdi) /* unaligned store */
82 movq $8,%r8
83 subq %r9,%r8
84 addq %r8,%rdi
85 subq %r8,%r11
86 jmp .Lafter_bad_alignment
87
88 /* C stepping K8 run faster using the string instructions.
89 It is also a lot simpler. Use this when possible */
90
91#include <asm/cpufeature.h>
92
93 .section .altinstructions,"a"
94 .align 8
95 .quad memset
96 .quad memset_c
97 .byte X86_FEATURE_K8_C
98 .byte memset_c_end-memset_c
99 .byte memset_c_end-memset_c
100 .previous
101
102 .section .altinstr_replacement,"ax"
103 /* rdi destination
104 * rsi value
105 * rdx count
106 */
107memset_c:
108 movq %rdi,%r9 16 movq %rdi,%r9
109 movl %edx,%r8d 17 movl %edx,%r8d
110 andl $7,%r8d 18 andl $7,%r8d
@@ -121,5 +29,3 @@ memset_c:
121 stosb 29 stosb
122 movq %r9,%rax 30 movq %r9,%rax
123 ret 31 ret
124memset_c_end:
125 .previous
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index b75b872ec154..3a63707a698b 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -308,18 +308,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
308 unsigned long flags; 308 unsigned long flags;
309 siginfo_t info; 309 siginfo_t info;
310 310
311#ifdef CONFIG_CHECKING
312 {
313 unsigned long gs;
314 struct x8664_pda *pda = cpu_pda + stack_smp_processor_id();
315 rdmsrl(MSR_GS_BASE, gs);
316 if (gs != (unsigned long)pda) {
317 wrmsrl(MSR_GS_BASE, pda);
318 printk("page_fault: wrong gs %lx expected %p\n", gs, pda);
319 }
320 }
321#endif
322
323 /* get the address */ 311 /* get the address */
324 __asm__("movq %%cr2,%0":"=r" (address)); 312 __asm__("movq %%cr2,%0":"=r" (address));
325 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, 313 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
@@ -571,3 +559,10 @@ do_sigbus:
571 force_sig_info(SIGBUS, &info, tsk); 559 force_sig_info(SIGBUS, &info, tsk);
572 return; 560 return;
573} 561}
562
563static int __init enable_pagefaulttrace(char *str)
564{
565 page_fault_trace = 1;
566 return 0;
567}
568__setup("pagefaulttrace", enable_pagefaulttrace);
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index e60a1a848de8..286f6a624c3a 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -22,6 +22,7 @@
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/bootmem.h> 23#include <linux/bootmem.h>
24#include <linux/proc_fs.h> 24#include <linux/proc_fs.h>
25#include <linux/pci.h>
25 26
26#include <asm/processor.h> 27#include <asm/processor.h>
27#include <asm/system.h> 28#include <asm/system.h>
@@ -36,16 +37,13 @@
36#include <asm/mmu_context.h> 37#include <asm/mmu_context.h>
37#include <asm/proto.h> 38#include <asm/proto.h>
38#include <asm/smp.h> 39#include <asm/smp.h>
40#include <asm/sections.h>
39 41
40#ifndef Dprintk 42#ifndef Dprintk
41#define Dprintk(x...) 43#define Dprintk(x...)
42#endif 44#endif
43 45
44#ifdef CONFIG_GART_IOMMU 46static unsigned long dma_reserve __initdata;
45extern int swiotlb;
46#endif
47
48extern char _stext[];
49 47
50DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 48DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
51 49
@@ -86,9 +84,6 @@ void show_mem(void)
86 84
87/* References to section boundaries */ 85/* References to section boundaries */
88 86
89extern char _text, _etext, _edata, __bss_start, _end[];
90extern char __init_begin, __init_end;
91
92int after_bootmem; 87int after_bootmem;
93 88
94static void *spp_getpage(void) 89static void *spp_getpage(void)
@@ -308,42 +303,81 @@ void __init init_memory_mapping(unsigned long start, unsigned long end)
308 table_end<<PAGE_SHIFT); 303 table_end<<PAGE_SHIFT);
309} 304}
310 305
311extern struct x8664_pda cpu_pda[NR_CPUS]; 306void __cpuinit zap_low_mappings(int cpu)
307{
308 if (cpu == 0) {
309 pgd_t *pgd = pgd_offset_k(0UL);
310 pgd_clear(pgd);
311 } else {
312 /*
313 * For AP's, zap the low identity mappings by changing the cr3
314 * to init_level4_pgt and doing local flush tlb all
315 */
316 asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
317 }
318 __flush_tlb_all();
319}
312 320
313/* Assumes all CPUs still execute in init_mm */ 321/* Compute zone sizes for the DMA and DMA32 zones in a node. */
314void zap_low_mappings(void) 322__init void
323size_zones(unsigned long *z, unsigned long *h,
324 unsigned long start_pfn, unsigned long end_pfn)
315{ 325{
316 pgd_t *pgd = pgd_offset_k(0UL); 326 int i;
317 pgd_clear(pgd); 327 unsigned long w;
318 flush_tlb_all(); 328
329 for (i = 0; i < MAX_NR_ZONES; i++)
330 z[i] = 0;
331
332 if (start_pfn < MAX_DMA_PFN)
333 z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
334 if (start_pfn < MAX_DMA32_PFN) {
335 unsigned long dma32_pfn = MAX_DMA32_PFN;
336 if (dma32_pfn > end_pfn)
337 dma32_pfn = end_pfn;
338 z[ZONE_DMA32] = dma32_pfn - start_pfn;
339 }
340 z[ZONE_NORMAL] = end_pfn - start_pfn;
341
342 /* Remove lower zones from higher ones. */
343 w = 0;
344 for (i = 0; i < MAX_NR_ZONES; i++) {
345 if (z[i])
346 z[i] -= w;
347 w += z[i];
348 }
349
350 /* Compute holes */
351 w = 0;
352 for (i = 0; i < MAX_NR_ZONES; i++) {
353 unsigned long s = w;
354 w += z[i];
355 h[i] = e820_hole_size(s, w);
356 }
357
358 /* Add the space pace needed for mem_map to the holes too. */
359 for (i = 0; i < MAX_NR_ZONES; i++)
360 h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
361
362 /* The 16MB DMA zone has the kernel and other misc mappings.
363 Account them too */
364 if (h[ZONE_DMA]) {
365 h[ZONE_DMA] += dma_reserve;
366 if (h[ZONE_DMA] >= z[ZONE_DMA]) {
367 printk(KERN_WARNING
368 "Kernel too large and filling up ZONE_DMA?\n");
369 h[ZONE_DMA] = z[ZONE_DMA];
370 }
371 }
319} 372}
320 373
321#ifndef CONFIG_NUMA 374#ifndef CONFIG_NUMA
322void __init paging_init(void) 375void __init paging_init(void)
323{ 376{
324 { 377 unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
325 unsigned long zones_size[MAX_NR_ZONES]; 378 size_zones(zones, holes, 0, end_pfn);
326 unsigned long holes[MAX_NR_ZONES]; 379 free_area_init_node(0, NODE_DATA(0), zones,
327 unsigned int max_dma; 380 __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
328
329 memset(zones_size, 0, sizeof(zones_size));
330 memset(holes, 0, sizeof(holes));
331
332 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
333
334 if (end_pfn < max_dma) {
335 zones_size[ZONE_DMA] = end_pfn;
336 holes[ZONE_DMA] = e820_hole_size(0, end_pfn);
337 } else {
338 zones_size[ZONE_DMA] = max_dma;
339 holes[ZONE_DMA] = e820_hole_size(0, max_dma);
340 zones_size[ZONE_NORMAL] = end_pfn - max_dma;
341 holes[ZONE_NORMAL] = e820_hole_size(max_dma, end_pfn);
342 }
343 free_area_init_node(0, NODE_DATA(0), zones_size,
344 __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
345 }
346 return;
347} 381}
348#endif 382#endif
349 383
@@ -438,19 +472,16 @@ void __init mem_init(void)
438 datasize >> 10, 472 datasize >> 10,
439 initsize >> 10); 473 initsize >> 10);
440 474
475#ifdef CONFIG_SMP
441 /* 476 /*
442 * Subtle. SMP is doing its boot stuff late (because it has to 477 * Sync boot_level4_pgt mappings with the init_level4_pgt
443 * fork idle threads) - but it also needs low mappings for the 478 * except for the low identity mappings which are already zapped
444 * protected-mode entry to work. We zap these entries only after 479 * in init_level4_pgt. This sync-up is essential for AP's bringup
445 * the WP-bit has been tested.
446 */ 480 */
447#ifndef CONFIG_SMP 481 memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
448 zap_low_mappings();
449#endif 482#endif
450} 483}
451 484
452extern char __initdata_begin[], __initdata_end[];
453
454void free_initmem(void) 485void free_initmem(void)
455{ 486{
456 unsigned long addr; 487 unsigned long addr;
@@ -464,7 +495,7 @@ void free_initmem(void)
464 totalram_pages++; 495 totalram_pages++;
465 } 496 }
466 memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin); 497 memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
467 printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10); 498 printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10);
468} 499}
469 500
470#ifdef CONFIG_BLK_DEV_INITRD 501#ifdef CONFIG_BLK_DEV_INITRD
@@ -491,6 +522,8 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
491#else 522#else
492 reserve_bootmem(phys, len); 523 reserve_bootmem(phys, len);
493#endif 524#endif
525 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
526 dma_reserve += len / PAGE_SIZE;
494} 527}
495 528
496int kern_addr_valid(unsigned long addr) 529int kern_addr_valid(unsigned long addr)
@@ -532,10 +565,6 @@ extern int exception_trace, page_fault_trace;
532static ctl_table debug_table2[] = { 565static ctl_table debug_table2[] = {
533 { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL, 566 { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
534 proc_dointvec }, 567 proc_dointvec },
535#ifdef CONFIG_CHECKING
536 { 100, "page-fault-trace", &page_fault_trace, sizeof(int), 0644, NULL,
537 proc_dointvec },
538#endif
539 { 0, } 568 { 0, }
540}; 569};
541 570
diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c
index 65417b040c1b..a5663e0bb01c 100644
--- a/arch/x86_64/mm/k8topology.c
+++ b/arch/x86_64/mm/k8topology.c
@@ -108,6 +108,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
108 limit >>= 16; 108 limit >>= 16;
109 limit <<= 24; 109 limit <<= 24;
110 limit |= (1<<24)-1; 110 limit |= (1<<24)-1;
111 limit++;
111 112
112 if (limit > end_pfn << PAGE_SHIFT) 113 if (limit > end_pfn << PAGE_SHIFT)
113 limit = end_pfn << PAGE_SHIFT; 114 limit = end_pfn << PAGE_SHIFT;
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 214803821001..a828a01739cc 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -38,38 +38,57 @@ cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
38 38
39int numa_off __initdata; 39int numa_off __initdata;
40 40
41int __init compute_hash_shift(struct node *nodes, int numnodes) 41
42/*
43 * Given a shift value, try to populate memnodemap[]
44 * Returns :
45 * 1 if OK
46 * 0 if memnodmap[] too small (of shift too small)
47 * -1 if node overlap or lost ram (shift too big)
48 */
49static int __init populate_memnodemap(
50 const struct node *nodes, int numnodes, int shift)
42{ 51{
43 int i; 52 int i;
44 int shift = 20; 53 int res = -1;
45 unsigned long addr,maxend=0; 54 unsigned long addr, end;
46
47 for (i = 0; i < numnodes; i++)
48 if ((nodes[i].start != nodes[i].end) && (nodes[i].end > maxend))
49 maxend = nodes[i].end;
50 55
51 while ((1UL << shift) < (maxend / NODEMAPSIZE)) 56 memset(memnodemap, 0xff, sizeof(memnodemap));
52 shift++;
53
54 printk (KERN_DEBUG"Using %d for the hash shift. Max adder is %lx \n",
55 shift,maxend);
56 memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE);
57 for (i = 0; i < numnodes; i++) { 57 for (i = 0; i < numnodes; i++) {
58 if (nodes[i].start == nodes[i].end) 58 addr = nodes[i].start;
59 end = nodes[i].end;
60 if (addr >= end)
59 continue; 61 continue;
60 for (addr = nodes[i].start; 62 if ((end >> shift) >= NODEMAPSIZE)
61 addr < nodes[i].end; 63 return 0;
62 addr += (1UL << shift)) { 64 do {
63 if (memnodemap[addr >> shift] != 0xff) { 65 if (memnodemap[addr >> shift] != 0xff)
64 printk(KERN_INFO
65 "Your memory is not aligned you need to rebuild your kernel "
66 "with a bigger NODEMAPSIZE shift=%d adder=%lu\n",
67 shift,addr);
68 return -1; 66 return -1;
69 }
70 memnodemap[addr >> shift] = i; 67 memnodemap[addr >> shift] = i;
71 } 68 addr += (1 << shift);
69 } while (addr < end);
70 res = 1;
72 } 71 }
72 return res;
73}
74
75int __init compute_hash_shift(struct node *nodes, int numnodes)
76{
77 int shift = 20;
78
79 while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0)
80 shift++;
81
82 printk(KERN_DEBUG "Using %d for the hash shift.\n",
83 shift);
84
85 if (populate_memnodemap(nodes, numnodes, shift) != 1) {
86 printk(KERN_INFO
87 "Your memory is not aligned you need to rebuild your kernel "
88 "with a bigger NODEMAPSIZE shift=%d\n",
89 shift);
90 return -1;
91 }
73 return shift; 92 return shift;
74} 93}
75 94
@@ -94,7 +113,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en
94 start_pfn = start >> PAGE_SHIFT; 113 start_pfn = start >> PAGE_SHIFT;
95 end_pfn = end >> PAGE_SHIFT; 114 end_pfn = end >> PAGE_SHIFT;
96 115
97 memory_present(nodeid, start_pfn, end_pfn);
98 nodedata_phys = find_e820_area(start, end, pgdat_size); 116 nodedata_phys = find_e820_area(start, end, pgdat_size);
99 if (nodedata_phys == -1L) 117 if (nodedata_phys == -1L)
100 panic("Cannot find memory pgdat in node %d\n", nodeid); 118 panic("Cannot find memory pgdat in node %d\n", nodeid);
@@ -132,29 +150,14 @@ void __init setup_node_zones(int nodeid)
132 unsigned long start_pfn, end_pfn; 150 unsigned long start_pfn, end_pfn;
133 unsigned long zones[MAX_NR_ZONES]; 151 unsigned long zones[MAX_NR_ZONES];
134 unsigned long holes[MAX_NR_ZONES]; 152 unsigned long holes[MAX_NR_ZONES];
135 unsigned long dma_end_pfn;
136 153
137 memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES); 154 start_pfn = node_start_pfn(nodeid);
138 memset(holes, 0, sizeof(unsigned long) * MAX_NR_ZONES); 155 end_pfn = node_end_pfn(nodeid);
139 156
140 start_pfn = node_start_pfn(nodeid); 157 Dprintk(KERN_INFO "setting up node %d %lx-%lx\n",
141 end_pfn = node_end_pfn(nodeid); 158 nodeid, start_pfn, end_pfn);
142 159
143 Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn); 160 size_zones(zones, holes, start_pfn, end_pfn);
144
145 /* All nodes > 0 have a zero length zone DMA */
146 dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT;
147 if (start_pfn < dma_end_pfn) {
148 zones[ZONE_DMA] = dma_end_pfn - start_pfn;
149 holes[ZONE_DMA] = e820_hole_size(start_pfn, dma_end_pfn);
150 zones[ZONE_NORMAL] = end_pfn - dma_end_pfn;
151 holes[ZONE_NORMAL] = e820_hole_size(dma_end_pfn, end_pfn);
152
153 } else {
154 zones[ZONE_NORMAL] = end_pfn - start_pfn;
155 holes[ZONE_NORMAL] = e820_hole_size(start_pfn, end_pfn);
156 }
157
158 free_area_init_node(nodeid, NODE_DATA(nodeid), zones, 161 free_area_init_node(nodeid, NODE_DATA(nodeid), zones,
159 start_pfn, holes); 162 start_pfn, holes);
160} 163}
@@ -171,7 +174,7 @@ void __init numa_init_array(void)
171 for (i = 0; i < NR_CPUS; i++) { 174 for (i = 0; i < NR_CPUS; i++) {
172 if (cpu_to_node[i] != NUMA_NO_NODE) 175 if (cpu_to_node[i] != NUMA_NO_NODE)
173 continue; 176 continue;
174 cpu_to_node[i] = rr; 177 numa_set_node(i, rr);
175 rr = next_node(rr, node_online_map); 178 rr = next_node(rr, node_online_map);
176 if (rr == MAX_NUMNODES) 179 if (rr == MAX_NUMNODES)
177 rr = first_node(node_online_map); 180 rr = first_node(node_online_map);
@@ -205,8 +208,6 @@ static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
205 if (i == numa_fake-1) 208 if (i == numa_fake-1)
206 sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; 209 sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
207 nodes[i].end = nodes[i].start + sz; 210 nodes[i].end = nodes[i].start + sz;
208 if (i != numa_fake-1)
209 nodes[i].end--;
210 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", 211 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n",
211 i, 212 i,
212 nodes[i].start, nodes[i].end, 213 nodes[i].start, nodes[i].end,
@@ -257,7 +258,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
257 nodes_clear(node_online_map); 258 nodes_clear(node_online_map);
258 node_set_online(0); 259 node_set_online(0);
259 for (i = 0; i < NR_CPUS; i++) 260 for (i = 0; i < NR_CPUS; i++)
260 cpu_to_node[i] = 0; 261 numa_set_node(i, 0);
261 node_to_cpumask[0] = cpumask_of_cpu(0); 262 node_to_cpumask[0] = cpumask_of_cpu(0);
262 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); 263 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
263} 264}
@@ -267,6 +268,12 @@ __cpuinit void numa_add_cpu(int cpu)
267 set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); 268 set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
268} 269}
269 270
271void __cpuinit numa_set_node(int cpu, int node)
272{
273 cpu_pda[cpu].nodenumber = node;
274 cpu_to_node[cpu] = node;
275}
276
270unsigned long __init numa_free_all_bootmem(void) 277unsigned long __init numa_free_all_bootmem(void)
271{ 278{
272 int i; 279 int i;
@@ -277,9 +284,26 @@ unsigned long __init numa_free_all_bootmem(void)
277 return pages; 284 return pages;
278} 285}
279 286
287#ifdef CONFIG_SPARSEMEM
288static void __init arch_sparse_init(void)
289{
290 int i;
291
292 for_each_online_node(i)
293 memory_present(i, node_start_pfn(i), node_end_pfn(i));
294
295 sparse_init();
296}
297#else
298#define arch_sparse_init() do {} while (0)
299#endif
300
280void __init paging_init(void) 301void __init paging_init(void)
281{ 302{
282 int i; 303 int i;
304
305 arch_sparse_init();
306
283 for_each_online_node(i) { 307 for_each_online_node(i) {
284 setup_node_zones(i); 308 setup_node_zones(i);
285 } 309 }
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
index 4b2e844c15a7..33340bd1e328 100644
--- a/arch/x86_64/mm/srat.c
+++ b/arch/x86_64/mm/srat.c
@@ -71,8 +71,6 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end)
71 nd->start = nd->end; 71 nd->start = nd->end;
72 } 72 }
73 if (nd->end > end) { 73 if (nd->end > end) {
74 if (!(end & 0xfff))
75 end--;
76 nd->end = end; 74 nd->end = end;
77 if (nd->start > nd->end) 75 if (nd->start > nd->end)
78 nd->start = nd->end; 76 nd->start = nd->end;
@@ -166,8 +164,6 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
166 if (nd->end < end) 164 if (nd->end < end)
167 nd->end = end; 165 nd->end = end;
168 } 166 }
169 if (!(nd->end & 0xfff))
170 nd->end--;
171 printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, 167 printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
172 nd->start, nd->end); 168 nd->start, nd->end);
173} 169}
@@ -203,7 +199,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
203 if (cpu_to_node[i] == NUMA_NO_NODE) 199 if (cpu_to_node[i] == NUMA_NO_NODE)
204 continue; 200 continue;
205 if (!node_isset(cpu_to_node[i], nodes_parsed)) 201 if (!node_isset(cpu_to_node[i], nodes_parsed))
206 cpu_to_node[i] = NUMA_NO_NODE; 202 numa_set_node(i, NUMA_NO_NODE);
207 } 203 }
208 numa_init_array(); 204 numa_init_array();
209 return 0; 205 return 0;