aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/Kconfig43
-rw-r--r--arch/x86_64/Makefile4
-rw-r--r--arch/x86_64/defconfig27
-rw-r--r--arch/x86_64/ia32/ia32_signal.c5
-rw-r--r--arch/x86_64/kernel/apic.c104
-rw-r--r--arch/x86_64/kernel/early-quirks.c18
-rw-r--r--arch/x86_64/kernel/entry.S36
-rw-r--r--arch/x86_64/kernel/genapic.c9
-rw-r--r--arch/x86_64/kernel/head64.c6
-rw-r--r--arch/x86_64/kernel/i387.c7
-rw-r--r--arch/x86_64/kernel/i8259.c3
-rw-r--r--arch/x86_64/kernel/io_apic.c258
-rw-r--r--arch/x86_64/kernel/irq.c2
-rw-r--r--arch/x86_64/kernel/mce.c1
-rw-r--r--arch/x86_64/kernel/mpparse.c2
-rw-r--r--arch/x86_64/kernel/nmi.c29
-rw-r--r--arch/x86_64/kernel/pci-calgary.c218
-rw-r--r--arch/x86_64/kernel/pci-dma.c5
-rw-r--r--arch/x86_64/kernel/pci-gart.c3
-rw-r--r--arch/x86_64/kernel/process.c45
-rw-r--r--arch/x86_64/kernel/setup.c21
-rw-r--r--arch/x86_64/kernel/smp.c1
-rw-r--r--arch/x86_64/kernel/smpboot.c8
-rw-r--r--arch/x86_64/kernel/traps.c55
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S9
-rw-r--r--arch/x86_64/kernel/vsyscall.c1
-rw-r--r--arch/x86_64/lib/csum-partial.c4
-rw-r--r--arch/x86_64/lib/delay.c4
-rw-r--r--arch/x86_64/mm/fault.c10
-rw-r--r--arch/x86_64/mm/init.c7
-rw-r--r--arch/x86_64/mm/pageattr.c58
31 files changed, 647 insertions, 356 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 010d2265f1cf..bfbb9bcae123 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -122,7 +122,7 @@ endchoice
122 122
123choice 123choice
124 prompt "Processor family" 124 prompt "Processor family"
125 default MK8 125 default GENERIC_CPU
126 126
127config MK8 127config MK8
128 bool "AMD-Opteron/Athlon64" 128 bool "AMD-Opteron/Athlon64"
@@ -130,16 +130,31 @@ config MK8
130 Optimize for AMD Opteron/Athlon64/Hammer/K8 CPUs. 130 Optimize for AMD Opteron/Athlon64/Hammer/K8 CPUs.
131 131
132config MPSC 132config MPSC
133 bool "Intel EM64T" 133 bool "Intel P4 / older Netburst based Xeon"
134 help 134 help
135 Optimize for Intel Pentium 4 and Xeon CPUs with Intel 135 Optimize for Intel Pentium 4 and older Nocona/Dempsey Xeon CPUs
136 Extended Memory 64 Technology(EM64T). For details see 136 with Intel Extended Memory 64 Technology(EM64T). For details see
137 <http://www.intel.com/technology/64bitextensions/>. 137 <http://www.intel.com/technology/64bitextensions/>.
138 Note the the latest Xeons (Xeon 51xx and 53xx) are not based on the
139 Netburst core and shouldn't use this option. You can distingush them
140 using the cpu family field
141 in /proc/cpuinfo. Family 15 is a older Xeon, Family 6 a newer one
142 (this rule only applies to system that support EM64T)
143
144config MCORE2
145 bool "Intel Core2 / newer Xeon"
146 help
147 Optimize for Intel Core2 and newer Xeons (51xx)
148 You can distingush the newer Xeons from the older ones using
149 the cpu family field in /proc/cpuinfo. 15 is a older Xeon
150 (use CONFIG_MPSC then), 6 is a newer one. This rule only
151 applies to CPUs that support EM64T.
138 152
139config GENERIC_CPU 153config GENERIC_CPU
140 bool "Generic-x86-64" 154 bool "Generic-x86-64"
141 help 155 help
142 Generic x86-64 CPU. 156 Generic x86-64 CPU.
157 Run equally well on all x86-64 CPUs.
143 158
144endchoice 159endchoice
145 160
@@ -149,12 +164,12 @@ endchoice
149config X86_L1_CACHE_BYTES 164config X86_L1_CACHE_BYTES
150 int 165 int
151 default "128" if GENERIC_CPU || MPSC 166 default "128" if GENERIC_CPU || MPSC
152 default "64" if MK8 167 default "64" if MK8 || MCORE2
153 168
154config X86_L1_CACHE_SHIFT 169config X86_L1_CACHE_SHIFT
155 int 170 int
156 default "7" if GENERIC_CPU || MPSC 171 default "7" if GENERIC_CPU || MPSC
157 default "6" if MK8 172 default "6" if MK8 || MCORE2
158 173
159config X86_INTERNODE_CACHE_BYTES 174config X86_INTERNODE_CACHE_BYTES
160 int 175 int
@@ -344,11 +359,6 @@ config ARCH_DISCONTIGMEM_ENABLE
344 depends on NUMA 359 depends on NUMA
345 default y 360 default y
346 361
347
348config ARCH_DISCONTIGMEM_ENABLE
349 def_bool y
350 depends on NUMA
351
352config ARCH_DISCONTIGMEM_DEFAULT 362config ARCH_DISCONTIGMEM_DEFAULT
353 def_bool y 363 def_bool y
354 depends on NUMA 364 depends on NUMA
@@ -455,6 +465,17 @@ config CALGARY_IOMMU
455 Normally the kernel will make the right choice by itself. 465 Normally the kernel will make the right choice by itself.
456 If unsure, say Y. 466 If unsure, say Y.
457 467
468config CALGARY_IOMMU_ENABLED_BY_DEFAULT
469 bool "Should Calgary be enabled by default?"
470 default y
471 depends on CALGARY_IOMMU
472 help
473 Should Calgary be enabled by default? if you choose 'y', Calgary
474 will be used (if it exists). If you choose 'n', Calgary will not be
475 used even if it exists. If you choose 'n' and would like to use
476 Calgary anyway, pass 'iommu=calgary' on the kernel command line.
477 If unsure, say Y.
478
458# need this always selected by IOMMU for the VIA workaround 479# need this always selected by IOMMU for the VIA workaround
459config SWIOTLB 480config SWIOTLB
460 bool 481 bool
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 6e38d4daeed7..b471b8550d03 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -30,6 +30,10 @@ cflags-y :=
30cflags-kernel-y := 30cflags-kernel-y :=
31cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) 31cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
32cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) 32cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
33# gcc doesn't support -march=core2 yet as of gcc 4.3, but I hope it
34# will eventually. Use -mtune=generic as fallback
35cflags-$(CONFIG_MCORE2) += \
36 $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
33cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) 37cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
34 38
35cflags-y += -m64 39cflags-y += -m64
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 0f5d44e86be5..96f226cfb339 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.19-rc2-git4 3# Linux kernel version: 2.6.19-git7
4# Sat Oct 21 03:38:52 2006 4# Wed Dec 6 23:50:47 2006
5# 5#
6CONFIG_X86_64=y 6CONFIG_X86_64=y
7CONFIG_64BIT=y 7CONFIG_64BIT=y
@@ -47,13 +47,14 @@ CONFIG_POSIX_MQUEUE=y
47CONFIG_IKCONFIG=y 47CONFIG_IKCONFIG=y
48CONFIG_IKCONFIG_PROC=y 48CONFIG_IKCONFIG_PROC=y
49# CONFIG_CPUSETS is not set 49# CONFIG_CPUSETS is not set
50CONFIG_SYSFS_DEPRECATED=y
50# CONFIG_RELAY is not set 51# CONFIG_RELAY is not set
51CONFIG_INITRAMFS_SOURCE="" 52CONFIG_INITRAMFS_SOURCE=""
52CONFIG_CC_OPTIMIZE_FOR_SIZE=y 53CONFIG_CC_OPTIMIZE_FOR_SIZE=y
53CONFIG_SYSCTL=y 54CONFIG_SYSCTL=y
54# CONFIG_EMBEDDED is not set 55# CONFIG_EMBEDDED is not set
55CONFIG_UID16=y 56CONFIG_UID16=y
56# CONFIG_SYSCTL_SYSCALL is not set 57CONFIG_SYSCTL_SYSCALL=y
57CONFIG_KALLSYMS=y 58CONFIG_KALLSYMS=y
58CONFIG_KALLSYMS_ALL=y 59CONFIG_KALLSYMS_ALL=y
59# CONFIG_KALLSYMS_EXTRA_PASS is not set 60# CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -87,9 +88,7 @@ CONFIG_STOP_MACHINE=y
87# Block layer 88# Block layer
88# 89#
89CONFIG_BLOCK=y 90CONFIG_BLOCK=y
90CONFIG_LBD=y
91# CONFIG_BLK_DEV_IO_TRACE is not set 91# CONFIG_BLK_DEV_IO_TRACE is not set
92# CONFIG_LSF is not set
93 92
94# 93#
95# IO Schedulers 94# IO Schedulers
@@ -111,10 +110,11 @@ CONFIG_X86_PC=y
111# CONFIG_X86_VSMP is not set 110# CONFIG_X86_VSMP is not set
112# CONFIG_MK8 is not set 111# CONFIG_MK8 is not set
113# CONFIG_MPSC is not set 112# CONFIG_MPSC is not set
114CONFIG_GENERIC_CPU=y 113CONFIG_MCORE2=y
115CONFIG_X86_L1_CACHE_BYTES=128 114# CONFIG_GENERIC_CPU is not set
116CONFIG_X86_L1_CACHE_SHIFT=7 115CONFIG_X86_L1_CACHE_BYTES=64
117CONFIG_X86_INTERNODE_CACHE_BYTES=128 116CONFIG_X86_L1_CACHE_SHIFT=6
117CONFIG_X86_INTERNODE_CACHE_BYTES=64
118CONFIG_X86_TSC=y 118CONFIG_X86_TSC=y
119CONFIG_X86_GOOD_APIC=y 119CONFIG_X86_GOOD_APIC=y
120# CONFIG_MICROCODE is not set 120# CONFIG_MICROCODE is not set
@@ -322,6 +322,7 @@ CONFIG_INET_TCP_DIAG=y
322# CONFIG_TCP_CONG_ADVANCED is not set 322# CONFIG_TCP_CONG_ADVANCED is not set
323CONFIG_TCP_CONG_CUBIC=y 323CONFIG_TCP_CONG_CUBIC=y
324CONFIG_DEFAULT_TCP_CONG="cubic" 324CONFIG_DEFAULT_TCP_CONG="cubic"
325# CONFIG_TCP_MD5SIG is not set
325CONFIG_IPV6=y 326CONFIG_IPV6=y
326# CONFIG_IPV6_PRIVACY is not set 327# CONFIG_IPV6_PRIVACY is not set
327# CONFIG_IPV6_ROUTER_PREF is not set 328# CONFIG_IPV6_ROUTER_PREF is not set
@@ -624,6 +625,7 @@ CONFIG_SATA_INTEL_COMBINED=y
624# CONFIG_PATA_IT821X is not set 625# CONFIG_PATA_IT821X is not set
625# CONFIG_PATA_JMICRON is not set 626# CONFIG_PATA_JMICRON is not set
626# CONFIG_PATA_TRIFLEX is not set 627# CONFIG_PATA_TRIFLEX is not set
628# CONFIG_PATA_MARVELL is not set
627# CONFIG_PATA_MPIIX is not set 629# CONFIG_PATA_MPIIX is not set
628# CONFIG_PATA_OLDPIIX is not set 630# CONFIG_PATA_OLDPIIX is not set
629# CONFIG_PATA_NETCELL is not set 631# CONFIG_PATA_NETCELL is not set
@@ -795,6 +797,7 @@ CONFIG_BNX2=y
795CONFIG_S2IO=m 797CONFIG_S2IO=m
796# CONFIG_S2IO_NAPI is not set 798# CONFIG_S2IO_NAPI is not set
797# CONFIG_MYRI10GE is not set 799# CONFIG_MYRI10GE is not set
800# CONFIG_NETXEN_NIC is not set
798 801
799# 802#
800# Token Ring devices 803# Token Ring devices
@@ -927,10 +930,6 @@ CONFIG_RTC=y
927# CONFIG_DTLK is not set 930# CONFIG_DTLK is not set
928# CONFIG_R3964 is not set 931# CONFIG_R3964 is not set
929# CONFIG_APPLICOM is not set 932# CONFIG_APPLICOM is not set
930
931#
932# Ftape, the floppy tape device driver
933#
934CONFIG_AGP=y 933CONFIG_AGP=y
935CONFIG_AGP_AMD64=y 934CONFIG_AGP_AMD64=y
936CONFIG_AGP_INTEL=y 935CONFIG_AGP_INTEL=y
@@ -1135,6 +1134,7 @@ CONFIG_USB_DEVICEFS=y
1135# CONFIG_USB_BANDWIDTH is not set 1134# CONFIG_USB_BANDWIDTH is not set
1136# CONFIG_USB_DYNAMIC_MINORS is not set 1135# CONFIG_USB_DYNAMIC_MINORS is not set
1137# CONFIG_USB_SUSPEND is not set 1136# CONFIG_USB_SUSPEND is not set
1137# CONFIG_USB_MULTITHREAD_PROBE is not set
1138# CONFIG_USB_OTG is not set 1138# CONFIG_USB_OTG is not set
1139 1139
1140# 1140#
@@ -1212,6 +1212,7 @@ CONFIG_USB_HIDINPUT=y
1212# CONFIG_USB_KAWETH is not set 1212# CONFIG_USB_KAWETH is not set
1213# CONFIG_USB_PEGASUS is not set 1213# CONFIG_USB_PEGASUS is not set
1214# CONFIG_USB_RTL8150 is not set 1214# CONFIG_USB_RTL8150 is not set
1215# CONFIG_USB_USBNET_MII is not set
1215# CONFIG_USB_USBNET is not set 1216# CONFIG_USB_USBNET is not set
1216CONFIG_USB_MON=y 1217CONFIG_USB_MON=y
1217 1218
diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c
index 0e0a266d976f..ff499ef2a1ba 100644
--- a/arch/x86_64/ia32/ia32_signal.c
+++ b/arch/x86_64/ia32/ia32_signal.c
@@ -584,6 +584,11 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
584 regs->rdx = (unsigned long) &frame->info; 584 regs->rdx = (unsigned long) &frame->info;
585 regs->rcx = (unsigned long) &frame->uc; 585 regs->rcx = (unsigned long) &frame->uc;
586 586
587 /* Make -mregparm=3 work */
588 regs->rax = sig;
589 regs->rdx = (unsigned long) &frame->info;
590 regs->rcx = (unsigned long) &frame->uc;
591
587 asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); 592 asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
588 asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); 593 asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
589 594
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 4d9d5ed942b2..124b2d27b4ac 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -25,6 +25,7 @@
25#include <linux/kernel_stat.h> 25#include <linux/kernel_stat.h>
26#include <linux/sysdev.h> 26#include <linux/sysdev.h>
27#include <linux/module.h> 27#include <linux/module.h>
28#include <linux/ioport.h>
28 29
29#include <asm/atomic.h> 30#include <asm/atomic.h>
30#include <asm/smp.h> 31#include <asm/smp.h>
@@ -45,6 +46,12 @@ int apic_calibrate_pmtmr __initdata;
45 46
46int disable_apic_timer __initdata; 47int disable_apic_timer __initdata;
47 48
49static struct resource *ioapic_resources;
50static struct resource lapic_resource = {
51 .name = "Local APIC",
52 .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
53};
54
48/* 55/*
49 * cpu_mask that denotes the CPUs that needs timer interrupt coming in as 56 * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
50 * IPIs in place of local APIC timers 57 * IPIs in place of local APIC timers
@@ -133,7 +140,6 @@ void clear_local_APIC(void)
133 apic_write(APIC_LVTERR, APIC_LVT_MASKED); 140 apic_write(APIC_LVTERR, APIC_LVT_MASKED);
134 if (maxlvt >= 4) 141 if (maxlvt >= 4)
135 apic_write(APIC_LVTPC, APIC_LVT_MASKED); 142 apic_write(APIC_LVTPC, APIC_LVT_MASKED);
136 v = GET_APIC_VERSION(apic_read(APIC_LVR));
137 apic_write(APIC_ESR, 0); 143 apic_write(APIC_ESR, 0);
138 apic_read(APIC_ESR); 144 apic_read(APIC_ESR);
139} 145}
@@ -452,23 +458,30 @@ static struct {
452static int lapic_suspend(struct sys_device *dev, pm_message_t state) 458static int lapic_suspend(struct sys_device *dev, pm_message_t state)
453{ 459{
454 unsigned long flags; 460 unsigned long flags;
461 int maxlvt;
455 462
456 if (!apic_pm_state.active) 463 if (!apic_pm_state.active)
457 return 0; 464 return 0;
458 465
466 maxlvt = get_maxlvt();
467
459 apic_pm_state.apic_id = apic_read(APIC_ID); 468 apic_pm_state.apic_id = apic_read(APIC_ID);
460 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); 469 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
461 apic_pm_state.apic_ldr = apic_read(APIC_LDR); 470 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
462 apic_pm_state.apic_dfr = apic_read(APIC_DFR); 471 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
463 apic_pm_state.apic_spiv = apic_read(APIC_SPIV); 472 apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
464 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); 473 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
465 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); 474 if (maxlvt >= 4)
475 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
466 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); 476 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
467 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); 477 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
468 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); 478 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
469 apic_pm_state.apic_tmict = apic_read(APIC_TMICT); 479 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
470 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); 480 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
471 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); 481#ifdef CONFIG_X86_MCE_INTEL
482 if (maxlvt >= 5)
483 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
484#endif
472 local_irq_save(flags); 485 local_irq_save(flags);
473 disable_local_APIC(); 486 disable_local_APIC();
474 local_irq_restore(flags); 487 local_irq_restore(flags);
@@ -479,10 +492,13 @@ static int lapic_resume(struct sys_device *dev)
479{ 492{
480 unsigned int l, h; 493 unsigned int l, h;
481 unsigned long flags; 494 unsigned long flags;
495 int maxlvt;
482 496
483 if (!apic_pm_state.active) 497 if (!apic_pm_state.active)
484 return 0; 498 return 0;
485 499
500 maxlvt = get_maxlvt();
501
486 local_irq_save(flags); 502 local_irq_save(flags);
487 rdmsr(MSR_IA32_APICBASE, l, h); 503 rdmsr(MSR_IA32_APICBASE, l, h);
488 l &= ~MSR_IA32_APICBASE_BASE; 504 l &= ~MSR_IA32_APICBASE_BASE;
@@ -496,8 +512,12 @@ static int lapic_resume(struct sys_device *dev)
496 apic_write(APIC_SPIV, apic_pm_state.apic_spiv); 512 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
497 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); 513 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
498 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); 514 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
499 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); 515#ifdef CONFIG_X86_MCE_INTEL
500 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); 516 if (maxlvt >= 5)
517 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
518#endif
519 if (maxlvt >= 4)
520 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
501 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); 521 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
502 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); 522 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
503 apic_write(APIC_TMICT, apic_pm_state.apic_tmict); 523 apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
@@ -585,6 +605,64 @@ static int __init detect_init_APIC (void)
585 return 0; 605 return 0;
586} 606}
587 607
608#ifdef CONFIG_X86_IO_APIC
609static struct resource * __init ioapic_setup_resources(void)
610{
611#define IOAPIC_RESOURCE_NAME_SIZE 11
612 unsigned long n;
613 struct resource *res;
614 char *mem;
615 int i;
616
617 if (nr_ioapics <= 0)
618 return NULL;
619
620 n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
621 n *= nr_ioapics;
622
623 mem = alloc_bootmem(n);
624 res = (void *)mem;
625
626 if (mem != NULL) {
627 memset(mem, 0, n);
628 mem += sizeof(struct resource) * nr_ioapics;
629
630 for (i = 0; i < nr_ioapics; i++) {
631 res[i].name = mem;
632 res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
633 sprintf(mem, "IOAPIC %u", i);
634 mem += IOAPIC_RESOURCE_NAME_SIZE;
635 }
636 }
637
638 ioapic_resources = res;
639
640 return res;
641}
642
643static int __init ioapic_insert_resources(void)
644{
645 int i;
646 struct resource *r = ioapic_resources;
647
648 if (!r) {
649 printk("IO APIC resources could be not be allocated.\n");
650 return -1;
651 }
652
653 for (i = 0; i < nr_ioapics; i++) {
654 insert_resource(&iomem_resource, r);
655 r++;
656 }
657
658 return 0;
659}
660
661/* Insert the IO APIC resources after PCI initialization has occured to handle
662 * IO APICS that are mapped in on a BAR in PCI space. */
663late_initcall(ioapic_insert_resources);
664#endif
665
588void __init init_apic_mappings(void) 666void __init init_apic_mappings(void)
589{ 667{
590 unsigned long apic_phys; 668 unsigned long apic_phys;
@@ -604,6 +682,11 @@ void __init init_apic_mappings(void)
604 apic_mapped = 1; 682 apic_mapped = 1;
605 apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys); 683 apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys);
606 684
685 /* Put local APIC into the resource map. */
686 lapic_resource.start = apic_phys;
687 lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
688 insert_resource(&iomem_resource, &lapic_resource);
689
607 /* 690 /*
608 * Fetch the APIC ID of the BSP in case we have a 691 * Fetch the APIC ID of the BSP in case we have a
609 * default configuration (or the MP table is broken). 692 * default configuration (or the MP table is broken).
@@ -613,7 +696,9 @@ void __init init_apic_mappings(void)
613 { 696 {
614 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; 697 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
615 int i; 698 int i;
699 struct resource *ioapic_res;
616 700
701 ioapic_res = ioapic_setup_resources();
617 for (i = 0; i < nr_ioapics; i++) { 702 for (i = 0; i < nr_ioapics; i++) {
618 if (smp_found_config) { 703 if (smp_found_config) {
619 ioapic_phys = mp_ioapics[i].mpc_apicaddr; 704 ioapic_phys = mp_ioapics[i].mpc_apicaddr;
@@ -625,6 +710,12 @@ void __init init_apic_mappings(void)
625 apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n", 710 apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n",
626 __fix_to_virt(idx), ioapic_phys); 711 __fix_to_virt(idx), ioapic_phys);
627 idx++; 712 idx++;
713
714 if (ioapic_res != NULL) {
715 ioapic_res->start = ioapic_phys;
716 ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
717 ioapic_res++;
718 }
628 } 719 }
629 } 720 }
630} 721}
@@ -644,10 +735,9 @@ void __init init_apic_mappings(void)
644 735
645static void __setup_APIC_LVTT(unsigned int clocks) 736static void __setup_APIC_LVTT(unsigned int clocks)
646{ 737{
647 unsigned int lvtt_value, tmp_value, ver; 738 unsigned int lvtt_value, tmp_value;
648 int cpu = smp_processor_id(); 739 int cpu = smp_processor_id();
649 740
650 ver = GET_APIC_VERSION(apic_read(APIC_LVR));
651 lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; 741 lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
652 742
653 if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) 743 if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask))
diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
index 68273bff58cc..829698f6d049 100644
--- a/arch/x86_64/kernel/early-quirks.c
+++ b/arch/x86_64/kernel/early-quirks.c
@@ -69,11 +69,18 @@ static void nvidia_bugs(void)
69 69
70static void ati_bugs(void) 70static void ati_bugs(void)
71{ 71{
72 if (timer_over_8254 == 1) { 72}
73 timer_over_8254 = 0; 73
74 printk(KERN_INFO 74static void intel_bugs(void)
75 "ATI board detected. Disabling timer routing over 8254.\n"); 75{
76 } 76 u16 device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID);
77
78#ifdef CONFIG_SMP
79 if (device == PCI_DEVICE_ID_INTEL_E7320_MCH ||
80 device == PCI_DEVICE_ID_INTEL_E7520_MCH ||
81 device == PCI_DEVICE_ID_INTEL_E7525_MCH)
82 quirk_intel_irqbalance();
83#endif
77} 84}
78 85
79struct chipset { 86struct chipset {
@@ -85,6 +92,7 @@ static struct chipset early_qrk[] = {
85 { PCI_VENDOR_ID_NVIDIA, nvidia_bugs }, 92 { PCI_VENDOR_ID_NVIDIA, nvidia_bugs },
86 { PCI_VENDOR_ID_VIA, via_bugs }, 93 { PCI_VENDOR_ID_VIA, via_bugs },
87 { PCI_VENDOR_ID_ATI, ati_bugs }, 94 { PCI_VENDOR_ID_ATI, ati_bugs },
95 { PCI_VENDOR_ID_INTEL, intel_bugs},
88 {} 96 {}
89}; 97};
90 98
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 7d401b00d822..601d332c4b79 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -230,7 +230,6 @@ ENTRY(system_call)
230 CFI_REL_OFFSET rip,RIP-ARGOFFSET 230 CFI_REL_OFFSET rip,RIP-ARGOFFSET
231 GET_THREAD_INFO(%rcx) 231 GET_THREAD_INFO(%rcx)
232 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) 232 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
233 CFI_REMEMBER_STATE
234 jnz tracesys 233 jnz tracesys
235 cmpq $__NR_syscall_max,%rax 234 cmpq $__NR_syscall_max,%rax
236 ja badsys 235 ja badsys
@@ -241,7 +240,6 @@ ENTRY(system_call)
241 * Syscall return path ending with SYSRET (fast path) 240 * Syscall return path ending with SYSRET (fast path)
242 * Has incomplete stack frame and undefined top of stack. 241 * Has incomplete stack frame and undefined top of stack.
243 */ 242 */
244 .globl ret_from_sys_call
245ret_from_sys_call: 243ret_from_sys_call:
246 movl $_TIF_ALLWORK_MASK,%edi 244 movl $_TIF_ALLWORK_MASK,%edi
247 /* edi: flagmask */ 245 /* edi: flagmask */
@@ -251,8 +249,8 @@ sysret_check:
251 TRACE_IRQS_OFF 249 TRACE_IRQS_OFF
252 movl threadinfo_flags(%rcx),%edx 250 movl threadinfo_flags(%rcx),%edx
253 andl %edi,%edx 251 andl %edi,%edx
254 CFI_REMEMBER_STATE
255 jnz sysret_careful 252 jnz sysret_careful
253 CFI_REMEMBER_STATE
256 /* 254 /*
257 * sysretq will re-enable interrupts: 255 * sysretq will re-enable interrupts:
258 */ 256 */
@@ -265,10 +263,10 @@ sysret_check:
265 swapgs 263 swapgs
266 sysretq 264 sysretq
267 265
266 CFI_RESTORE_STATE
268 /* Handle reschedules */ 267 /* Handle reschedules */
269 /* edx: work, edi: workmask */ 268 /* edx: work, edi: workmask */
270sysret_careful: 269sysret_careful:
271 CFI_RESTORE_STATE
272 bt $TIF_NEED_RESCHED,%edx 270 bt $TIF_NEED_RESCHED,%edx
273 jnc sysret_signal 271 jnc sysret_signal
274 TRACE_IRQS_ON 272 TRACE_IRQS_ON
@@ -306,7 +304,6 @@ badsys:
306 304
307 /* Do syscall tracing */ 305 /* Do syscall tracing */
308tracesys: 306tracesys:
309 CFI_RESTORE_STATE
310 SAVE_REST 307 SAVE_REST
311 movq $-ENOSYS,RAX(%rsp) 308 movq $-ENOSYS,RAX(%rsp)
312 FIXUP_TOP_OF_STACK %rdi 309 FIXUP_TOP_OF_STACK %rdi
@@ -322,32 +319,13 @@ tracesys:
322 call *sys_call_table(,%rax,8) 319 call *sys_call_table(,%rax,8)
3231: movq %rax,RAX-ARGOFFSET(%rsp) 3201: movq %rax,RAX-ARGOFFSET(%rsp)
324 /* Use IRET because user could have changed frame */ 321 /* Use IRET because user could have changed frame */
325 jmp int_ret_from_sys_call
326 CFI_ENDPROC
327END(system_call)
328 322
329/* 323/*
330 * Syscall return path ending with IRET. 324 * Syscall return path ending with IRET.
331 * Has correct top of stack, but partial stack frame. 325 * Has correct top of stack, but partial stack frame.
332 */ 326 */
333ENTRY(int_ret_from_sys_call) 327 .globl int_ret_from_sys_call
334 CFI_STARTPROC simple 328int_ret_from_sys_call:
335 CFI_SIGNAL_FRAME
336 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
337 /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
338 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
339 /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
340 /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
341 CFI_REL_OFFSET rip,RIP-ARGOFFSET
342 CFI_REL_OFFSET rdx,RDX-ARGOFFSET
343 CFI_REL_OFFSET rcx,RCX-ARGOFFSET
344 CFI_REL_OFFSET rax,RAX-ARGOFFSET
345 CFI_REL_OFFSET rdi,RDI-ARGOFFSET
346 CFI_REL_OFFSET rsi,RSI-ARGOFFSET
347 CFI_REL_OFFSET r8,R8-ARGOFFSET
348 CFI_REL_OFFSET r9,R9-ARGOFFSET
349 CFI_REL_OFFSET r10,R10-ARGOFFSET
350 CFI_REL_OFFSET r11,R11-ARGOFFSET
351 cli 329 cli
352 TRACE_IRQS_OFF 330 TRACE_IRQS_OFF
353 testl $3,CS-ARGOFFSET(%rsp) 331 testl $3,CS-ARGOFFSET(%rsp)
@@ -394,8 +372,6 @@ int_very_careful:
394 popq %rdi 372 popq %rdi
395 CFI_ADJUST_CFA_OFFSET -8 373 CFI_ADJUST_CFA_OFFSET -8
396 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi 374 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
397 cli
398 TRACE_IRQS_OFF
399 jmp int_restore_rest 375 jmp int_restore_rest
400 376
401int_signal: 377int_signal:
@@ -411,7 +387,7 @@ int_restore_rest:
411 TRACE_IRQS_OFF 387 TRACE_IRQS_OFF
412 jmp int_with_check 388 jmp int_with_check
413 CFI_ENDPROC 389 CFI_ENDPROC
414END(int_ret_from_sys_call) 390END(system_call)
415 391
416/* 392/*
417 * Certain special system calls that need to save a complete full stack frame. 393 * Certain special system calls that need to save a complete full stack frame.
diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c
index 8e78a75d1866..b007433f96bb 100644
--- a/arch/x86_64/kernel/genapic.c
+++ b/arch/x86_64/kernel/genapic.c
@@ -33,7 +33,7 @@ extern struct genapic apic_flat;
33extern struct genapic apic_physflat; 33extern struct genapic apic_physflat;
34 34
35struct genapic *genapic = &apic_flat; 35struct genapic *genapic = &apic_flat;
36 36struct genapic *genapic_force;
37 37
38/* 38/*
39 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. 39 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
@@ -46,6 +46,13 @@ void __init clustered_apic_check(void)
46 u8 cluster_cnt[NUM_APIC_CLUSTERS]; 46 u8 cluster_cnt[NUM_APIC_CLUSTERS];
47 int max_apic = 0; 47 int max_apic = 0;
48 48
49 /* genapic selection can be forced because of certain quirks.
50 */
51 if (genapic_force) {
52 genapic = genapic_force;
53 goto print;
54 }
55
49#if defined(CONFIG_ACPI) 56#if defined(CONFIG_ACPI)
50 /* 57 /*
51 * Some x86_64 machines use physical APIC mode regardless of how many 58 * Some x86_64 machines use physical APIC mode regardless of how many
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index 9561eb3c5b5c..cc230b93cd1c 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -57,10 +57,12 @@ void __init x86_64_start_kernel(char * real_mode_data)
57{ 57{
58 int i; 58 int i;
59 59
60 for (i = 0; i < 256; i++) 60 /* clear bss before set_intr_gate with early_idt_handler */
61 clear_bss();
62
63 for (i = 0; i < IDT_ENTRIES; i++)
61 set_intr_gate(i, early_idt_handler); 64 set_intr_gate(i, early_idt_handler);
62 asm volatile("lidt %0" :: "m" (idt_descr)); 65 asm volatile("lidt %0" :: "m" (idt_descr));
63 clear_bss();
64 66
65 early_printk("Kernel alive\n"); 67 early_printk("Kernel alive\n");
66 68
diff --git a/arch/x86_64/kernel/i387.c b/arch/x86_64/kernel/i387.c
index 3aa1e9bb781d..1d58c13bc6bc 100644
--- a/arch/x86_64/kernel/i387.c
+++ b/arch/x86_64/kernel/i387.c
@@ -82,11 +82,8 @@ int save_i387(struct _fpstate __user *buf)
82 struct task_struct *tsk = current; 82 struct task_struct *tsk = current;
83 int err = 0; 83 int err = 0;
84 84
85 { 85 BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
86 extern void bad_user_i387_struct(void); 86 sizeof(tsk->thread.i387.fxsave));
87 if (sizeof(struct user_i387_struct) != sizeof(tsk->thread.i387.fxsave))
88 bad_user_i387_struct();
89 }
90 87
91 if ((unsigned long)buf % 16) 88 if ((unsigned long)buf % 16)
92 printk("save_i387: bad fpstate %p\n",buf); 89 printk("save_i387: bad fpstate %p\n",buf);
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index c4ef801b765b..d73c79e821f1 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -76,7 +76,8 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
76 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ 76 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
77 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) 77 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
78 78
79void (*interrupt[NR_IRQS])(void) = { 79/* for the irq vectors */
80static void (*interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
80 IRQLIST_16(0x2), IRQLIST_16(0x3), 81 IRQLIST_16(0x2), IRQLIST_16(0x3),
81 IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), 82 IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
82 IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), 83 IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index c80081a6ba41..2a1dcd5f69c2 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -55,10 +55,6 @@ int sis_apic_bug; /* not actually supported, dummy for compile */
55 55
56static int no_timer_check; 56static int no_timer_check;
57 57
58static int disable_timer_pin_1 __initdata;
59
60int timer_over_8254 __initdata = 1;
61
62/* Where if anywhere is the i8259 connect in external int mode */ 58/* Where if anywhere is the i8259 connect in external int mode */
63static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; 59static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
64 60
@@ -178,14 +174,20 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
178 * the interrupt, and we need to make sure the entry is fully populated 174 * the interrupt, and we need to make sure the entry is fully populated
179 * before that happens. 175 * before that happens.
180 */ 176 */
181static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) 177static void
178__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
182{ 179{
183 unsigned long flags;
184 union entry_union eu; 180 union entry_union eu;
185 eu.entry = e; 181 eu.entry = e;
186 spin_lock_irqsave(&ioapic_lock, flags);
187 io_apic_write(apic, 0x11 + 2*pin, eu.w2); 182 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
188 io_apic_write(apic, 0x10 + 2*pin, eu.w1); 183 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
184}
185
186static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
187{
188 unsigned long flags;
189 spin_lock_irqsave(&ioapic_lock, flags);
190 __ioapic_write_entry(apic, pin, e);
189 spin_unlock_irqrestore(&ioapic_lock, flags); 191 spin_unlock_irqrestore(&ioapic_lock, flags);
190} 192}
191 193
@@ -348,29 +350,6 @@ static int __init disable_ioapic_setup(char *str)
348} 350}
349early_param("noapic", disable_ioapic_setup); 351early_param("noapic", disable_ioapic_setup);
350 352
351/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
352static int __init disable_timer_pin_setup(char *arg)
353{
354 disable_timer_pin_1 = 1;
355 return 1;
356}
357__setup("disable_timer_pin_1", disable_timer_pin_setup);
358
359static int __init setup_disable_8254_timer(char *s)
360{
361 timer_over_8254 = -1;
362 return 1;
363}
364static int __init setup_enable_8254_timer(char *s)
365{
366 timer_over_8254 = 2;
367 return 1;
368}
369
370__setup("disable_8254_timer", setup_disable_8254_timer);
371__setup("enable_8254_timer", setup_enable_8254_timer);
372
373
374/* 353/*
375 * Find the IRQ entry number of a certain pin. 354 * Find the IRQ entry number of a certain pin.
376 */ 355 */
@@ -750,6 +729,22 @@ static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
750 return vector; 729 return vector;
751} 730}
752 731
732static void __clear_irq_vector(int irq)
733{
734 cpumask_t mask;
735 int cpu, vector;
736
737 BUG_ON(!irq_vector[irq]);
738
739 vector = irq_vector[irq];
740 cpus_and(mask, irq_domain[irq], cpu_online_map);
741 for_each_cpu_mask(cpu, mask)
742 per_cpu(vector_irq, cpu)[vector] = -1;
743
744 irq_vector[irq] = 0;
745 irq_domain[irq] = CPU_MASK_NONE;
746}
747
753void __setup_vector_irq(int cpu) 748void __setup_vector_irq(int cpu)
754{ 749{
755 /* Initialize vector_irq on a new cpu */ 750 /* Initialize vector_irq on a new cpu */
@@ -794,27 +789,65 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
794 handle_edge_irq, "edge"); 789 handle_edge_irq, "edge");
795 } 790 }
796} 791}
797 792static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq)
798static void __init setup_IO_APIC_irqs(void)
799{ 793{
800 struct IO_APIC_route_entry entry; 794 struct IO_APIC_route_entry entry;
801 int apic, pin, idx, irq, first_notcon = 1, vector; 795 int vector;
802 unsigned long flags; 796 unsigned long flags;
803 797
804 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
805 798
806 for (apic = 0; apic < nr_ioapics; apic++) { 799 /*
807 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 800 * add it to the IO-APIC irq-routing table:
801 */
802 memset(&entry,0,sizeof(entry));
808 803
809 /* 804 entry.delivery_mode = INT_DELIVERY_MODE;
810 * add it to the IO-APIC irq-routing table: 805 entry.dest_mode = INT_DEST_MODE;
811 */ 806 entry.mask = 0; /* enable IRQ */
812 memset(&entry,0,sizeof(entry)); 807 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
808
809 entry.trigger = irq_trigger(idx);
810 entry.polarity = irq_polarity(idx);
813 811
814 entry.delivery_mode = INT_DELIVERY_MODE; 812 if (irq_trigger(idx)) {
815 entry.dest_mode = INT_DEST_MODE; 813 entry.trigger = 1;
816 entry.mask = 0; /* enable IRQ */ 814 entry.mask = 1;
817 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); 815 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
816 }
817
818 if (!apic && !IO_APIC_IRQ(irq))
819 return;
820
821 if (IO_APIC_IRQ(irq)) {
822 cpumask_t mask;
823 vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
824 if (vector < 0)
825 return;
826
827 entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
828 entry.vector = vector;
829
830 ioapic_register_intr(irq, vector, IOAPIC_AUTO);
831 if (!apic && (irq < 16))
832 disable_8259A_irq(irq);
833 }
834
835 ioapic_write_entry(apic, pin, entry);
836
837 spin_lock_irqsave(&ioapic_lock, flags);
838 set_native_irq_info(irq, TARGET_CPUS);
839 spin_unlock_irqrestore(&ioapic_lock, flags);
840
841}
842
843static void __init setup_IO_APIC_irqs(void)
844{
845 int apic, pin, idx, irq, first_notcon = 1;
846
847 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
848
849 for (apic = 0; apic < nr_ioapics; apic++) {
850 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
818 851
819 idx = find_irq_entry(apic,pin,mp_INT); 852 idx = find_irq_entry(apic,pin,mp_INT);
820 if (idx == -1) { 853 if (idx == -1) {
@@ -826,39 +859,11 @@ static void __init setup_IO_APIC_irqs(void)
826 continue; 859 continue;
827 } 860 }
828 861
829 entry.trigger = irq_trigger(idx);
830 entry.polarity = irq_polarity(idx);
831
832 if (irq_trigger(idx)) {
833 entry.trigger = 1;
834 entry.mask = 1;
835 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
836 }
837
838 irq = pin_2_irq(idx, apic, pin); 862 irq = pin_2_irq(idx, apic, pin);
839 add_pin_to_irq(irq, apic, pin); 863 add_pin_to_irq(irq, apic, pin);
840 864
841 if (!apic && !IO_APIC_IRQ(irq)) 865 setup_IO_APIC_irq(apic, pin, idx, irq);
842 continue;
843
844 if (IO_APIC_IRQ(irq)) {
845 cpumask_t mask;
846 vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
847 if (vector < 0)
848 continue;
849
850 entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
851 entry.vector = vector;
852
853 ioapic_register_intr(irq, vector, IOAPIC_AUTO);
854 if (!apic && (irq < 16))
855 disable_8259A_irq(irq);
856 }
857 ioapic_write_entry(apic, pin, entry);
858 866
859 spin_lock_irqsave(&ioapic_lock, flags);
860 set_native_irq_info(irq, TARGET_CPUS);
861 spin_unlock_irqrestore(&ioapic_lock, flags);
862 } 867 }
863 } 868 }
864 869
@@ -1563,10 +1568,33 @@ static inline void unlock_ExtINT_logic(void)
1563 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ 1568 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
1564 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast 1569 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
1565 * fanatically on his truly buggy board. 1570 * fanatically on his truly buggy board.
1566 *
1567 * FIXME: really need to revamp this for modern platforms only.
1568 */ 1571 */
1569static inline void check_timer(void) 1572
1573static int try_apic_pin(int apic, int pin, char *msg)
1574{
1575 apic_printk(APIC_VERBOSE, KERN_INFO
1576 "..TIMER: trying IO-APIC=%d PIN=%d %s",
1577 apic, pin, msg);
1578
1579 /*
1580 * Ok, does IRQ0 through the IOAPIC work?
1581 */
1582 if (!no_timer_check && timer_irq_works()) {
1583 nmi_watchdog_default();
1584 if (nmi_watchdog == NMI_IO_APIC) {
1585 disable_8259A_irq(0);
1586 setup_nmi();
1587 enable_8259A_irq(0);
1588 }
1589 return 1;
1590 }
1591 clear_IO_APIC_pin(apic, pin);
1592 apic_printk(APIC_QUIET, KERN_ERR " .. failed\n");
1593 return 0;
1594}
1595
1596/* The function from hell */
1597static void check_timer(void)
1570{ 1598{
1571 int apic1, pin1, apic2, pin2; 1599 int apic1, pin1, apic2, pin2;
1572 int vector; 1600 int vector;
@@ -1587,61 +1615,43 @@ static inline void check_timer(void)
1587 */ 1615 */
1588 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 1616 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
1589 init_8259A(1); 1617 init_8259A(1);
1590 if (timer_over_8254 > 0)
1591 enable_8259A_irq(0);
1592 1618
1593 pin1 = find_isa_irq_pin(0, mp_INT); 1619 pin1 = find_isa_irq_pin(0, mp_INT);
1594 apic1 = find_isa_irq_apic(0, mp_INT); 1620 apic1 = find_isa_irq_apic(0, mp_INT);
1595 pin2 = ioapic_i8259.pin; 1621 pin2 = ioapic_i8259.pin;
1596 apic2 = ioapic_i8259.apic; 1622 apic2 = ioapic_i8259.apic;
1597 1623
1598 apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", 1624 /* Do this first, otherwise we get double interrupts on ATI boards */
1599 vector, apic1, pin1, apic2, pin2); 1625 if ((pin1 != -1) && try_apic_pin(apic1, pin1,"with 8259 IRQ0 disabled"))
1626 return;
1600 1627
1601 if (pin1 != -1) { 1628 /* Now try again with IRQ0 8259A enabled.
1602 /* 1629 Assumes timer is on IO-APIC 0 ?!? */
1603 * Ok, does IRQ0 through the IOAPIC work? 1630 enable_8259A_irq(0);
1604 */ 1631 unmask_IO_APIC_irq(0);
1605 unmask_IO_APIC_irq(0); 1632 if (try_apic_pin(apic1, pin1, "with 8259 IRQ0 enabled"))
1606 if (!no_timer_check && timer_irq_works()) { 1633 return;
1607 nmi_watchdog_default(); 1634 disable_8259A_irq(0);
1608 if (nmi_watchdog == NMI_IO_APIC) { 1635
1609 disable_8259A_irq(0); 1636 /* Always try pin0 and pin2 on APIC 0 to handle buggy timer overrides
1610 setup_nmi(); 1637 on Nvidia boards */
1611 enable_8259A_irq(0); 1638 if (!(apic1 == 0 && pin1 == 0) &&
1612 } 1639 try_apic_pin(0, 0, "fallback with 8259 IRQ0 disabled"))
1613 if (disable_timer_pin_1 > 0) 1640 return;
1614 clear_IO_APIC_pin(0, pin1); 1641 if (!(apic1 == 0 && pin1 == 2) &&
1615 return; 1642 try_apic_pin(0, 2, "fallback with 8259 IRQ0 disabled"))
1616 } 1643 return;
1617 clear_IO_APIC_pin(apic1, pin1);
1618 apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not "
1619 "connected to IO-APIC\n");
1620 }
1621 1644
1622 apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) " 1645 /* Then try pure 8259A routing on the 8259 as reported by BIOS*/
1623 "through the 8259A ... "); 1646 enable_8259A_irq(0);
1624 if (pin2 != -1) { 1647 if (pin2 != -1) {
1625 apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...",
1626 apic2, pin2);
1627 /*
1628 * legacy devices should be connected to IO APIC #0
1629 */
1630 setup_ExtINT_IRQ0_pin(apic2, pin2, vector); 1648 setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
1631 if (timer_irq_works()) { 1649 if (try_apic_pin(apic2,pin2,"8259A broadcast ExtINT from BIOS"))
1632 apic_printk(APIC_VERBOSE," works.\n");
1633 nmi_watchdog_default();
1634 if (nmi_watchdog == NMI_IO_APIC) {
1635 setup_nmi();
1636 }
1637 return; 1650 return;
1638 }
1639 /*
1640 * Cleanup, just in case ...
1641 */
1642 clear_IO_APIC_pin(apic2, pin2);
1643 } 1651 }
1644 apic_printk(APIC_VERBOSE," failed.\n"); 1652
1653 /* Tried all possibilities to go through the IO-APIC. Now come the
1654 really cheesy fallbacks. */
1645 1655
1646 if (nmi_watchdog == NMI_IO_APIC) { 1656 if (nmi_watchdog == NMI_IO_APIC) {
1647 printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); 1657 printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
@@ -1837,7 +1847,7 @@ void destroy_irq(unsigned int irq)
1837 dynamic_irq_cleanup(irq); 1847 dynamic_irq_cleanup(irq);
1838 1848
1839 spin_lock_irqsave(&vector_lock, flags); 1849 spin_lock_irqsave(&vector_lock, flags);
1840 irq_vector[irq] = 0; 1850 __clear_irq_vector(irq);
1841 spin_unlock_irqrestore(&vector_lock, flags); 1851 spin_unlock_irqrestore(&vector_lock, flags);
1842} 1852}
1843 1853
@@ -2139,7 +2149,15 @@ void __init setup_ioapic_dest(void)
2139 if (irq_entry == -1) 2149 if (irq_entry == -1)
2140 continue; 2150 continue;
2141 irq = pin_2_irq(irq_entry, ioapic, pin); 2151 irq = pin_2_irq(irq_entry, ioapic, pin);
2142 set_ioapic_affinity_irq(irq, TARGET_CPUS); 2152
2153 /* setup_IO_APIC_irqs could fail to get vector for some device
2154 * when you have too many devices, because at that time only boot
2155 * cpu is online.
2156 */
2157 if(!irq_vector[irq])
2158 setup_IO_APIC_irq(ioapic, pin, irq_entry, irq);
2159 else
2160 set_ioapic_affinity_irq(irq, TARGET_CPUS);
2143 } 2161 }
2144 2162
2145 } 2163 }
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index e46c55856d40..0c06af6c13bc 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -120,7 +120,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
120 120
121 if (likely(irq < NR_IRQS)) 121 if (likely(irq < NR_IRQS))
122 generic_handle_irq(irq); 122 generic_handle_irq(irq);
123 else 123 else if (printk_ratelimit())
124 printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n", 124 printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n",
125 __func__, smp_processor_id(), vector); 125 __func__, smp_processor_id(), vector);
126 126
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index bc863c464a1f..ac085038af29 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -651,6 +651,7 @@ static void mce_remove_device(unsigned int cpu)
651 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant); 651 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant);
652 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval); 652 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval);
653 sysdev_unregister(&per_cpu(device_mce,cpu)); 653 sysdev_unregister(&per_cpu(device_mce,cpu));
654 memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
654} 655}
655 656
656/* Get notified when a cpu comes on/off. Be hotplug friendly. */ 657/* Get notified when a cpu comes on/off. Be hotplug friendly. */
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index b147ab19fbd4..08072568847d 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -35,8 +35,6 @@
35int smp_found_config; 35int smp_found_config;
36unsigned int __initdata maxcpus = NR_CPUS; 36unsigned int __initdata maxcpus = NR_CPUS;
37 37
38int acpi_found_madt;
39
40/* 38/*
41 * Various Linux-internal data structures created from the 39 * Various Linux-internal data structures created from the
42 * MP-table. 40 * MP-table.
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 7af9cb3e2d99..27e95e7922c1 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -12,14 +12,15 @@
12 * Mikael Pettersson : PM converted to driver model. Disable/enable API. 12 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
13 */ 13 */
14 14
15#include <linux/nmi.h>
15#include <linux/mm.h> 16#include <linux/mm.h>
16#include <linux/delay.h> 17#include <linux/delay.h>
17#include <linux/interrupt.h> 18#include <linux/interrupt.h>
18#include <linux/module.h> 19#include <linux/module.h>
19#include <linux/sysdev.h> 20#include <linux/sysdev.h>
20#include <linux/nmi.h>
21#include <linux/sysctl.h> 21#include <linux/sysctl.h>
22#include <linux/kprobes.h> 22#include <linux/kprobes.h>
23#include <linux/cpumask.h>
23 24
24#include <asm/smp.h> 25#include <asm/smp.h>
25#include <asm/nmi.h> 26#include <asm/nmi.h>
@@ -41,6 +42,8 @@ int panic_on_unrecovered_nmi;
41static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner); 42static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner);
42static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]); 43static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]);
43 44
45static cpumask_t backtrace_mask = CPU_MASK_NONE;
46
44/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's 47/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
45 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) 48 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
46 */ 49 */
@@ -782,6 +785,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
782{ 785{
783 int sum; 786 int sum;
784 int touched = 0; 787 int touched = 0;
788 int cpu = smp_processor_id();
785 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 789 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
786 u64 dummy; 790 u64 dummy;
787 int rc=0; 791 int rc=0;
@@ -799,6 +803,16 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
799 touched = 1; 803 touched = 1;
800 } 804 }
801 805
806 if (cpu_isset(cpu, backtrace_mask)) {
807 static DEFINE_SPINLOCK(lock); /* Serialise the printks */
808
809 spin_lock(&lock);
810 printk("NMI backtrace for cpu %d\n", cpu);
811 dump_stack();
812 spin_unlock(&lock);
813 cpu_clear(cpu, backtrace_mask);
814 }
815
802#ifdef CONFIG_X86_MCE 816#ifdef CONFIG_X86_MCE
803 /* Could check oops_in_progress here too, but it's safer 817 /* Could check oops_in_progress here too, but it's safer
804 not too */ 818 not too */
@@ -931,6 +945,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
931 945
932#endif 946#endif
933 947
948void __trigger_all_cpu_backtrace(void)
949{
950 int i;
951
952 backtrace_mask = cpu_online_map;
953 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
954 for (i = 0; i < 10 * 1000; i++) {
955 if (cpus_empty(backtrace_mask))
956 break;
957 mdelay(1);
958 }
959}
960
934EXPORT_SYMBOL(nmi_active); 961EXPORT_SYMBOL(nmi_active);
935EXPORT_SYMBOL(nmi_watchdog); 962EXPORT_SYMBOL(nmi_watchdog);
936EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); 963EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index 37a770859e71..3215675ab128 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -41,6 +41,13 @@
41#include <asm/pci-direct.h> 41#include <asm/pci-direct.h>
42#include <asm/system.h> 42#include <asm/system.h>
43#include <asm/dma.h> 43#include <asm/dma.h>
44#include <asm/rio.h>
45
46#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
47int use_calgary __read_mostly = 1;
48#else
49int use_calgary __read_mostly = 0;
50#endif /* CONFIG_CALGARY_DEFAULT_ENABLED */
44 51
45#define PCI_DEVICE_ID_IBM_CALGARY 0x02a1 52#define PCI_DEVICE_ID_IBM_CALGARY 0x02a1
46#define PCI_VENDOR_DEVICE_ID_CALGARY \ 53#define PCI_VENDOR_DEVICE_ID_CALGARY \
@@ -115,14 +122,35 @@ static const unsigned long phb_offsets[] = {
115 0xB000 /* PHB3 */ 122 0xB000 /* PHB3 */
116}; 123};
117 124
125/* PHB debug registers */
126
127static const unsigned long phb_debug_offsets[] = {
128 0x4000 /* PHB 0 DEBUG */,
129 0x5000 /* PHB 1 DEBUG */,
130 0x6000 /* PHB 2 DEBUG */,
131 0x7000 /* PHB 3 DEBUG */
132};
133
134/*
135 * STUFF register for each debug PHB,
136 * byte 1 = start bus number, byte 2 = end bus number
137 */
138
139#define PHB_DEBUG_STUFF_OFFSET 0x0020
140
118unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED; 141unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;
119static int translate_empty_slots __read_mostly = 0; 142static int translate_empty_slots __read_mostly = 0;
120static int calgary_detected __read_mostly = 0; 143static int calgary_detected __read_mostly = 0;
121 144
145static struct rio_table_hdr *rio_table_hdr __initdata;
146static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
147static struct rio_detail *rio_devs[MAX_NUMNODES * 4] __initdata;
148
122struct calgary_bus_info { 149struct calgary_bus_info {
123 void *tce_space; 150 void *tce_space;
124 unsigned char translation_disabled; 151 unsigned char translation_disabled;
125 signed char phbid; 152 signed char phbid;
153 void __iomem *bbar;
126}; 154};
127 155
128static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, }; 156static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
@@ -475,6 +503,11 @@ static struct dma_mapping_ops calgary_dma_ops = {
475 .unmap_sg = calgary_unmap_sg, 503 .unmap_sg = calgary_unmap_sg,
476}; 504};
477 505
506static inline void __iomem * busno_to_bbar(unsigned char num)
507{
508 return bus_info[num].bbar;
509}
510
478static inline int busno_to_phbid(unsigned char num) 511static inline int busno_to_phbid(unsigned char num)
479{ 512{
480 return bus_info[num].phbid; 513 return bus_info[num].phbid;
@@ -620,14 +653,9 @@ static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev)
620static void __init calgary_reserve_regions(struct pci_dev *dev) 653static void __init calgary_reserve_regions(struct pci_dev *dev)
621{ 654{
622 unsigned int npages; 655 unsigned int npages;
623 void __iomem *bbar;
624 unsigned char busnum;
625 u64 start; 656 u64 start;
626 struct iommu_table *tbl = dev->sysdata; 657 struct iommu_table *tbl = dev->sysdata;
627 658
628 bbar = tbl->bbar;
629 busnum = dev->bus->number;
630
631 /* reserve bad_dma_address in case it's a legal address */ 659 /* reserve bad_dma_address in case it's a legal address */
632 iommu_range_reserve(tbl, bad_dma_address, 1); 660 iommu_range_reserve(tbl, bad_dma_address, 1);
633 661
@@ -740,7 +768,7 @@ static void __init calgary_increase_split_completion_timeout(void __iomem *bbar,
740{ 768{
741 u64 val64; 769 u64 val64;
742 void __iomem *target; 770 void __iomem *target;
743 unsigned long phb_shift = -1; 771 unsigned int phb_shift = ~0; /* silence gcc */
744 u64 mask; 772 u64 mask;
745 773
746 switch (busno_to_phbid(busnum)) { 774 switch (busno_to_phbid(busnum)) {
@@ -828,33 +856,6 @@ static void __init calgary_disable_translation(struct pci_dev *dev)
828 del_timer_sync(&tbl->watchdog_timer); 856 del_timer_sync(&tbl->watchdog_timer);
829} 857}
830 858
831static inline unsigned int __init locate_register_space(struct pci_dev *dev)
832{
833 int rionodeid;
834 u32 address;
835
836 /*
837 * Each Calgary has four busses. The first four busses (first Calgary)
838 * have RIO node ID 2, then the next four (second Calgary) have RIO
839 * node ID 3, the next four (third Calgary) have node ID 2 again, etc.
840 * We use a gross hack - relying on the dev->bus->number ordering,
841 * modulo 14 - to decide which Calgary a given bus is on. Busses 0, 1,
842 * 2 and 4 are on the first Calgary (id 2), 6, 8, a and c are on the
843 * second (id 3), and then it repeats modulo 14.
844 */
845 rionodeid = (dev->bus->number % 14 > 4) ? 3 : 2;
846 /*
847 * register space address calculation as follows:
848 * FE0MB-8MB*OneBasedChassisNumber+1MB*(RioNodeId-ChassisBase)
849 * ChassisBase is always zero for x366/x260/x460
850 * RioNodeId is 2 for first Calgary, 3 for second Calgary
851 */
852 address = START_ADDRESS -
853 (0x800000 * (ONE_BASED_CHASSIS_NUM + dev->bus->number / 14)) +
854 (0x100000) * (rionodeid - CHASSIS_BASE);
855 return address;
856}
857
858static void __init calgary_init_one_nontraslated(struct pci_dev *dev) 859static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
859{ 860{
860 pci_dev_get(dev); 861 pci_dev_get(dev);
@@ -864,23 +865,15 @@ static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
864 865
865static int __init calgary_init_one(struct pci_dev *dev) 866static int __init calgary_init_one(struct pci_dev *dev)
866{ 867{
867 u32 address;
868 void __iomem *bbar; 868 void __iomem *bbar;
869 int ret; 869 int ret;
870 870
871 BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM); 871 BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM);
872 872
873 address = locate_register_space(dev); 873 bbar = busno_to_bbar(dev->bus->number);
874 /* map entire 1MB of Calgary config space */
875 bbar = ioremap_nocache(address, 1024 * 1024);
876 if (!bbar) {
877 ret = -ENODATA;
878 goto done;
879 }
880
881 ret = calgary_setup_tar(dev, bbar); 874 ret = calgary_setup_tar(dev, bbar);
882 if (ret) 875 if (ret)
883 goto iounmap; 876 goto done;
884 877
885 pci_dev_get(dev); 878 pci_dev_get(dev);
886 dev->bus->self = dev; 879 dev->bus->self = dev;
@@ -888,17 +881,66 @@ static int __init calgary_init_one(struct pci_dev *dev)
888 881
889 return 0; 882 return 0;
890 883
891iounmap:
892 iounmap(bbar);
893done: 884done:
894 return ret; 885 return ret;
895} 886}
896 887
888static int __init calgary_locate_bbars(void)
889{
890 int ret;
891 int rioidx, phb, bus;
892 void __iomem *bbar;
893 void __iomem *target;
894 unsigned long offset;
895 u8 start_bus, end_bus;
896 u32 val;
897
898 ret = -ENODATA;
899 for (rioidx = 0; rioidx < rio_table_hdr->num_rio_dev; rioidx++) {
900 struct rio_detail *rio = rio_devs[rioidx];
901
902 if ((rio->type != COMPAT_CALGARY) && (rio->type != ALT_CALGARY))
903 continue;
904
905 /* map entire 1MB of Calgary config space */
906 bbar = ioremap_nocache(rio->BBAR, 1024 * 1024);
907 if (!bbar)
908 goto error;
909
910 for (phb = 0; phb < PHBS_PER_CALGARY; phb++) {
911 offset = phb_debug_offsets[phb] | PHB_DEBUG_STUFF_OFFSET;
912 target = calgary_reg(bbar, offset);
913
914 val = be32_to_cpu(readl(target));
915 start_bus = (u8)((val & 0x00FF0000) >> 16);
916 end_bus = (u8)((val & 0x0000FF00) >> 8);
917 for (bus = start_bus; bus <= end_bus; bus++) {
918 bus_info[bus].bbar = bbar;
919 bus_info[bus].phbid = phb;
920 }
921 }
922 }
923
924 return 0;
925
926error:
927 /* scan bus_info and iounmap any bbars we previously ioremap'd */
928 for (bus = 0; bus < ARRAY_SIZE(bus_info); bus++)
929 if (bus_info[bus].bbar)
930 iounmap(bus_info[bus].bbar);
931
932 return ret;
933}
934
897static int __init calgary_init(void) 935static int __init calgary_init(void)
898{ 936{
899 int ret = -ENODEV; 937 int ret;
900 struct pci_dev *dev = NULL; 938 struct pci_dev *dev = NULL;
901 939
940 ret = calgary_locate_bbars();
941 if (ret)
942 return ret;
943
902 do { 944 do {
903 dev = pci_get_device(PCI_VENDOR_ID_IBM, 945 dev = pci_get_device(PCI_VENDOR_ID_IBM,
904 PCI_DEVICE_ID_IBM_CALGARY, 946 PCI_DEVICE_ID_IBM_CALGARY,
@@ -921,7 +963,7 @@ static int __init calgary_init(void)
921 963
922error: 964error:
923 do { 965 do {
924 dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM, 966 dev = pci_get_device_reverse(PCI_VENDOR_ID_IBM,
925 PCI_DEVICE_ID_IBM_CALGARY, 967 PCI_DEVICE_ID_IBM_CALGARY,
926 dev); 968 dev);
927 if (!dev) 969 if (!dev)
@@ -962,13 +1004,56 @@ static inline int __init determine_tce_table_size(u64 ram)
962 return ret; 1004 return ret;
963} 1005}
964 1006
1007static int __init build_detail_arrays(void)
1008{
1009 unsigned long ptr;
1010 int i, scal_detail_size, rio_detail_size;
1011
1012 if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){
1013 printk(KERN_WARNING
1014 "Calgary: MAX_NUMNODES too low! Defined as %d, "
1015 "but system has %d nodes.\n",
1016 MAX_NUMNODES, rio_table_hdr->num_scal_dev);
1017 return -ENODEV;
1018 }
1019
1020 switch (rio_table_hdr->version){
1021 case 2:
1022 scal_detail_size = 11;
1023 rio_detail_size = 13;
1024 break;
1025 case 3:
1026 scal_detail_size = 12;
1027 rio_detail_size = 15;
1028 break;
1029 default:
1030 printk(KERN_WARNING
1031 "Calgary: Invalid Rio Grande Table Version: %d\n",
1032 rio_table_hdr->version);
1033 return -EPROTO;
1034 }
1035
1036 ptr = ((unsigned long)rio_table_hdr) + 3;
1037 for (i = 0; i < rio_table_hdr->num_scal_dev;
1038 i++, ptr += scal_detail_size)
1039 scal_devs[i] = (struct scal_detail *)ptr;
1040
1041 for (i = 0; i < rio_table_hdr->num_rio_dev;
1042 i++, ptr += rio_detail_size)
1043 rio_devs[i] = (struct rio_detail *)ptr;
1044
1045 return 0;
1046}
1047
965void __init detect_calgary(void) 1048void __init detect_calgary(void)
966{ 1049{
967 u32 val; 1050 u32 val;
968 int bus; 1051 int bus;
969 void *tbl; 1052 void *tbl;
970 int calgary_found = 0; 1053 int calgary_found = 0;
971 int phb = -1; 1054 unsigned long ptr;
1055 int offset;
1056 int ret;
972 1057
973 /* 1058 /*
974 * if the user specified iommu=off or iommu=soft or we found 1059 * if the user specified iommu=off or iommu=soft or we found
@@ -977,25 +1062,47 @@ void __init detect_calgary(void)
977 if (swiotlb || no_iommu || iommu_detected) 1062 if (swiotlb || no_iommu || iommu_detected)
978 return; 1063 return;
979 1064
1065 if (!use_calgary)
1066 return;
1067
980 if (!early_pci_allowed()) 1068 if (!early_pci_allowed())
981 return; 1069 return;
982 1070
1071 ptr = (unsigned long)phys_to_virt(get_bios_ebda());
1072
1073 rio_table_hdr = NULL;
1074 offset = 0x180;
1075 while (offset) {
1076 /* The block id is stored in the 2nd word */
1077 if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){
1078 /* set the pointer past the offset & block id */
1079 rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
1080 break;
1081 }
1082 /* The next offset is stored in the 1st word. 0 means no more */
1083 offset = *((unsigned short *)(ptr + offset));
1084 }
1085 if (!rio_table_hdr) {
1086 printk(KERN_ERR "Calgary: Unable to locate "
1087 "Rio Grande Table in EBDA - bailing!\n");
1088 return;
1089 }
1090
1091 ret = build_detail_arrays();
1092 if (ret) {
1093 printk(KERN_ERR "Calgary: build_detail_arrays ret %d\n", ret);
1094 return;
1095 }
1096
983 specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE); 1097 specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
984 1098
985 for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { 1099 for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
986 int dev; 1100 int dev;
987 struct calgary_bus_info *info = &bus_info[bus]; 1101 struct calgary_bus_info *info = &bus_info[bus];
988 info->phbid = -1;
989 1102
990 if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY) 1103 if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY)
991 continue; 1104 continue;
992 1105
993 /*
994 * There are 4 PHBs per Calgary chip. Set phb to which phb (0-3)
995 * it is connected to releative to the clagary chip.
996 */
997 phb = (phb + 1) % PHBS_PER_CALGARY;
998
999 if (info->translation_disabled) 1106 if (info->translation_disabled)
1000 continue; 1107 continue;
1001 1108
@@ -1010,7 +1117,6 @@ void __init detect_calgary(void)
1010 if (!tbl) 1117 if (!tbl)
1011 goto cleanup; 1118 goto cleanup;
1012 info->tce_space = tbl; 1119 info->tce_space = tbl;
1013 info->phbid = phb;
1014 calgary_found = 1; 1120 calgary_found = 1;
1015 break; 1121 break;
1016 } 1122 }
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
index f8d857453f8a..683b7a5c1ab3 100644
--- a/arch/x86_64/kernel/pci-dma.c
+++ b/arch/x86_64/kernel/pci-dma.c
@@ -296,6 +296,11 @@ __init int iommu_setup(char *p)
296 gart_parse_options(p); 296 gart_parse_options(p);
297#endif 297#endif
298 298
299#ifdef CONFIG_CALGARY_IOMMU
300 if (!strncmp(p, "calgary", 7))
301 use_calgary = 1;
302#endif /* CONFIG_CALGARY_IOMMU */
303
299 p += strcspn(p, ","); 304 p += strcspn(p, ",");
300 if (*p == ',') 305 if (*p == ',')
301 ++p; 306 ++p;
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 16261a8a3303..fc1960f1f243 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -601,10 +601,9 @@ void __init gart_iommu_init(void)
601 (!force_iommu && end_pfn <= MAX_DMA32_PFN) || 601 (!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
602 !iommu_aperture || 602 !iommu_aperture ||
603 (no_agp && init_k8_gatt(&info) < 0)) { 603 (no_agp && init_k8_gatt(&info) < 0)) {
604 printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
605 if (end_pfn > MAX_DMA32_PFN) { 604 if (end_pfn > MAX_DMA32_PFN) {
606 printk(KERN_ERR "WARNING more than 4GB of memory " 605 printk(KERN_ERR "WARNING more than 4GB of memory "
607 "but IOMMU not available.\n" 606 "but GART IOMMU not available.\n"
608 KERN_ERR "WARNING 32bit PCI may malfunction.\n"); 607 KERN_ERR "WARNING 32bit PCI may malfunction.\n");
609 } 608 }
610 return; 609 return;
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 7451a4c43c16..a418ee4c8c62 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -108,17 +108,15 @@ void exit_idle(void)
108 */ 108 */
109static void default_idle(void) 109static void default_idle(void)
110{ 110{
111 local_irq_enable();
112
113 current_thread_info()->status &= ~TS_POLLING; 111 current_thread_info()->status &= ~TS_POLLING;
114 smp_mb__after_clear_bit(); 112 smp_mb__after_clear_bit();
115 while (!need_resched()) { 113 local_irq_disable();
116 local_irq_disable(); 114 if (!need_resched()) {
117 if (!need_resched()) 115 /* Enables interrupts one instruction before HLT.
118 safe_halt(); 116 x86 special cases this so there is no race. */
119 else 117 safe_halt();
120 local_irq_enable(); 118 } else
121 } 119 local_irq_enable();
122 current_thread_info()->status |= TS_POLLING; 120 current_thread_info()->status |= TS_POLLING;
123} 121}
124 122
@@ -130,15 +128,7 @@ static void default_idle(void)
130static void poll_idle (void) 128static void poll_idle (void)
131{ 129{
132 local_irq_enable(); 130 local_irq_enable();
133 131 cpu_relax();
134 asm volatile(
135 "2:"
136 "testl %0,%1;"
137 "rep; nop;"
138 "je 2b;"
139 : :
140 "i" (_TIF_NEED_RESCHED),
141 "m" (current_thread_info()->flags));
142} 132}
143 133
144void cpu_idle_wait(void) 134void cpu_idle_wait(void)
@@ -219,6 +209,12 @@ void cpu_idle (void)
219 idle = default_idle; 209 idle = default_idle;
220 if (cpu_is_offline(smp_processor_id())) 210 if (cpu_is_offline(smp_processor_id()))
221 play_dead(); 211 play_dead();
212 /*
213 * Idle routines should keep interrupts disabled
214 * from here on, until they go to idle.
215 * Otherwise, idle callbacks can misfire.
216 */
217 local_irq_disable();
222 enter_idle(); 218 enter_idle();
223 idle(); 219 idle();
224 /* In many cases the interrupt that ended idle 220 /* In many cases the interrupt that ended idle
@@ -256,9 +252,16 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
256/* Default MONITOR/MWAIT with no hints, used for default C1 state */ 252/* Default MONITOR/MWAIT with no hints, used for default C1 state */
257static void mwait_idle(void) 253static void mwait_idle(void)
258{ 254{
259 local_irq_enable(); 255 if (!need_resched()) {
260 while (!need_resched()) 256 __monitor((void *)&current_thread_info()->flags, 0, 0);
261 mwait_idle_with_hints(0,0); 257 smp_mb();
258 if (!need_resched())
259 __sti_mwait(0, 0);
260 else
261 local_irq_enable();
262 } else {
263 local_irq_enable();
264 }
262} 265}
263 266
264void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) 267void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index f12f266f3e98..af425a8049fb 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -731,11 +731,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
731 /* Fix cpuid4 emulation for more */ 731 /* Fix cpuid4 emulation for more */
732 num_cache_leaves = 3; 732 num_cache_leaves = 3;
733 733
734 /* When there is only one core no need to synchronize RDTSC */ 734 /* RDTSC can be speculated around */
735 if (num_possible_cpus() == 1) 735 clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
736 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
737 else
738 clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
739} 736}
740 737
741static void __cpuinit detect_ht(struct cpuinfo_x86 *c) 738static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -834,6 +831,15 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
834 set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability); 831 set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
835 } 832 }
836 833
834 if (cpu_has_ds) {
835 unsigned int l1, l2;
836 rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
837 if (!(l1 & (1<<11)))
838 set_bit(X86_FEATURE_BTS, c->x86_capability);
839 if (!(l1 & (1<<12)))
840 set_bit(X86_FEATURE_PEBS, c->x86_capability);
841 }
842
837 n = c->extended_cpuid_level; 843 n = c->extended_cpuid_level;
838 if (n >= 0x80000008) { 844 if (n >= 0x80000008) {
839 unsigned eax = cpuid_eax(0x80000008); 845 unsigned eax = cpuid_eax(0x80000008);
@@ -853,7 +859,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
853 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); 859 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
854 if (c->x86 == 6) 860 if (c->x86 == 6)
855 set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); 861 set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
856 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); 862 if (c->x86 == 15)
863 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
864 else
865 clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
857 c->x86_max_cores = intel_num_cpu_cores(c); 866 c->x86_max_cores = intel_num_cpu_cores(c);
858 867
859 srat_detect_node(); 868 srat_detect_node();
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index 32f4d7e2a060..af1ec4d23cf8 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -389,6 +389,7 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
389 put_cpu(); 389 put_cpu();
390 return 0; 390 return 0;
391} 391}
392EXPORT_SYMBOL(smp_call_function_single);
392 393
393/* 394/*
394 * this function sends a 'generic call function' IPI to all other CPUs 395 * this function sends a 'generic call function' IPI to all other CPUs
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 9800147c4c68..daf19332f0dd 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -60,6 +60,7 @@
60#include <asm/irq.h> 60#include <asm/irq.h>
61#include <asm/hw_irq.h> 61#include <asm/hw_irq.h>
62#include <asm/numa.h> 62#include <asm/numa.h>
63#include <asm/genapic.h>
63 64
64/* Number of siblings per CPU package */ 65/* Number of siblings per CPU package */
65int smp_num_siblings = 1; 66int smp_num_siblings = 1;
@@ -1169,6 +1170,13 @@ int __cpuinit __cpu_up(unsigned int cpu)
1169 1170
1170 while (!cpu_isset(cpu, cpu_online_map)) 1171 while (!cpu_isset(cpu, cpu_online_map))
1171 cpu_relax(); 1172 cpu_relax();
1173
1174 if (num_online_cpus() > 8 && genapic == &apic_flat) {
1175 printk(KERN_WARNING
1176 "flat APIC routing can't be used with > 8 cpus\n");
1177 BUG();
1178 }
1179
1172 err = 0; 1180 err = 0;
1173 1181
1174 return err; 1182 return err;
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 0d65b22f229c..a1641ffdffcf 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -30,9 +30,9 @@
30#include <linux/kprobes.h> 30#include <linux/kprobes.h>
31#include <linux/kexec.h> 31#include <linux/kexec.h>
32#include <linux/unwind.h> 32#include <linux/unwind.h>
33#include <linux/uaccess.h>
33 34
34#include <asm/system.h> 35#include <asm/system.h>
35#include <asm/uaccess.h>
36#include <asm/io.h> 36#include <asm/io.h>
37#include <asm/atomic.h> 37#include <asm/atomic.h>
38#include <asm/debugreg.h> 38#include <asm/debugreg.h>
@@ -108,7 +108,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
108 preempt_enable_no_resched(); 108 preempt_enable_no_resched();
109} 109}
110 110
111static int kstack_depth_to_print = 12; 111int kstack_depth_to_print = 12;
112#ifdef CONFIG_STACK_UNWIND 112#ifdef CONFIG_STACK_UNWIND
113static int call_trace = 1; 113static int call_trace = 1;
114#else 114#else
@@ -225,16 +225,25 @@ static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
225{ 225{
226 struct ops_and_data *oad = (struct ops_and_data *)context; 226 struct ops_and_data *oad = (struct ops_and_data *)context;
227 int n = 0; 227 int n = 0;
228 unsigned long sp = UNW_SP(info);
228 229
230 if (arch_unw_user_mode(info))
231 return -1;
229 while (unwind(info) == 0 && UNW_PC(info)) { 232 while (unwind(info) == 0 && UNW_PC(info)) {
230 n++; 233 n++;
231 oad->ops->address(oad->data, UNW_PC(info)); 234 oad->ops->address(oad->data, UNW_PC(info));
232 if (arch_unw_user_mode(info)) 235 if (arch_unw_user_mode(info))
233 break; 236 break;
237 if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1))
238 && sp > UNW_SP(info))
239 break;
240 sp = UNW_SP(info);
234 } 241 }
235 return n; 242 return n;
236} 243}
237 244
245#define MSG(txt) ops->warning(data, txt)
246
238/* 247/*
239 * x86-64 can have upto three kernel stacks: 248 * x86-64 can have upto three kernel stacks:
240 * process stack 249 * process stack
@@ -248,11 +257,12 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
248 return p > t && p < t + THREAD_SIZE - 3; 257 return p > t && p < t + THREAD_SIZE - 3;
249} 258}
250 259
251void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack, 260void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
261 unsigned long *stack,
252 struct stacktrace_ops *ops, void *data) 262 struct stacktrace_ops *ops, void *data)
253{ 263{
254 const unsigned cpu = smp_processor_id(); 264 const unsigned cpu = get_cpu();
255 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; 265 unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
256 unsigned used = 0; 266 unsigned used = 0;
257 struct thread_info *tinfo; 267 struct thread_info *tinfo;
258 268
@@ -268,28 +278,30 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
268 if (unwind_init_frame_info(&info, tsk, regs) == 0) 278 if (unwind_init_frame_info(&info, tsk, regs) == 0)
269 unw_ret = dump_trace_unwind(&info, &oad); 279 unw_ret = dump_trace_unwind(&info, &oad);
270 } else if (tsk == current) 280 } else if (tsk == current)
271 unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); 281 unw_ret = unwind_init_running(&info, dump_trace_unwind,
282 &oad);
272 else { 283 else {
273 if (unwind_init_blocked(&info, tsk) == 0) 284 if (unwind_init_blocked(&info, tsk) == 0)
274 unw_ret = dump_trace_unwind(&info, &oad); 285 unw_ret = dump_trace_unwind(&info, &oad);
275 } 286 }
276 if (unw_ret > 0) { 287 if (unw_ret > 0) {
277 if (call_trace == 1 && !arch_unw_user_mode(&info)) { 288 if (call_trace == 1 && !arch_unw_user_mode(&info)) {
278 ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", 289 ops->warning_symbol(data,
290 "DWARF2 unwinder stuck at %s",
279 UNW_PC(&info)); 291 UNW_PC(&info));
280 if ((long)UNW_SP(&info) < 0) { 292 if ((long)UNW_SP(&info) < 0) {
281 ops->warning(data, "Leftover inexact backtrace:\n"); 293 MSG("Leftover inexact backtrace:");
282 stack = (unsigned long *)UNW_SP(&info); 294 stack = (unsigned long *)UNW_SP(&info);
283 if (!stack) 295 if (!stack)
284 return; 296 goto out;
285 } else 297 } else
286 ops->warning(data, "Full inexact backtrace again:\n"); 298 MSG("Full inexact backtrace again:");
287 } else if (call_trace >= 1) 299 } else if (call_trace >= 1)
288 return; 300 goto out;
289 else 301 else
290 ops->warning(data, "Full inexact backtrace again:\n"); 302 MSG("Full inexact backtrace again:");
291 } else 303 } else
292 ops->warning(data, "Inexact backtrace:\n"); 304 MSG("Inexact backtrace:");
293 } 305 }
294 if (!stack) { 306 if (!stack) {
295 unsigned long dummy; 307 unsigned long dummy;
@@ -297,12 +309,6 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
297 if (tsk && tsk != current) 309 if (tsk && tsk != current)
298 stack = (unsigned long *)tsk->thread.rsp; 310 stack = (unsigned long *)tsk->thread.rsp;
299 } 311 }
300 /*
301 * Align the stack pointer on word boundary, later loops
302 * rely on that (and corruption / debug info bugs can cause
303 * unaligned values here):
304 */
305 stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1));
306 312
307 /* 313 /*
308 * Print function call entries within a stack. 'cond' is the 314 * Print function call entries within a stack. 'cond' is the
@@ -312,9 +318,9 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
312#define HANDLE_STACK(cond) \ 318#define HANDLE_STACK(cond) \
313 do while (cond) { \ 319 do while (cond) { \
314 unsigned long addr = *stack++; \ 320 unsigned long addr = *stack++; \
315 if (oops_in_progress ? \ 321 /* Use unlocked access here because except for NMIs \
316 __kernel_text_address(addr) : \ 322 we should be already protected against module unloads */ \
317 kernel_text_address(addr)) { \ 323 if (__kernel_text_address(addr)) { \
318 /* \ 324 /* \
319 * If the address is either in the text segment of the \ 325 * If the address is either in the text segment of the \
320 * kernel, or in the region which contains vmalloc'ed \ 326 * kernel, or in the region which contains vmalloc'ed \
@@ -380,6 +386,8 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
380 tinfo = current_thread_info(); 386 tinfo = current_thread_info();
381 HANDLE_STACK (valid_stack_ptr(tinfo, stack)); 387 HANDLE_STACK (valid_stack_ptr(tinfo, stack));
382#undef HANDLE_STACK 388#undef HANDLE_STACK
389out:
390 put_cpu();
383} 391}
384EXPORT_SYMBOL(dump_trace); 392EXPORT_SYMBOL(dump_trace);
385 393
@@ -786,8 +794,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs)
786{ 794{
787 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", 795 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
788 reason); 796 reason);
789 printk(KERN_EMERG "You probably have a hardware problem with your " 797 printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
790 "RAM chips\n");
791 798
792 if (panic_on_unrecovered_nmi) 799 if (panic_on_unrecovered_nmi)
793 panic("NMI: Not continuing"); 800 panic("NMI: Not continuing");
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index d9534e750d4f..6a1f8f491e5d 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -51,15 +51,6 @@ SECTIONS
51 51
52 RODATA 52 RODATA
53 53
54#ifdef CONFIG_STACK_UNWIND
55 . = ALIGN(8);
56 .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
57 __start_unwind = .;
58 *(.eh_frame)
59 __end_unwind = .;
60 }
61#endif
62
63 . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ 54 . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */
64 /* Data */ 55 /* Data */
65 .data : AT(ADDR(.data) - LOAD_OFFSET) { 56 .data : AT(ADDR(.data) - LOAD_OFFSET) {
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index 3785e4954734..4a673f5397a0 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -289,6 +289,7 @@ static void __init map_vsyscall(void)
289 extern char __vsyscall_0; 289 extern char __vsyscall_0;
290 unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); 290 unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
291 291
292 /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
292 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); 293 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
293} 294}
294 295
diff --git a/arch/x86_64/lib/csum-partial.c b/arch/x86_64/lib/csum-partial.c
index 06ae630de82b..bc503f506903 100644
--- a/arch/x86_64/lib/csum-partial.c
+++ b/arch/x86_64/lib/csum-partial.c
@@ -9,8 +9,6 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <asm/checksum.h> 10#include <asm/checksum.h>
11 11
12#define __force_inline inline __attribute__((always_inline))
13
14static inline unsigned short from32to16(unsigned a) 12static inline unsigned short from32to16(unsigned a)
15{ 13{
16 unsigned short b = a >> 16; 14 unsigned short b = a >> 16;
@@ -33,7 +31,7 @@ static inline unsigned short from32to16(unsigned a)
33 * Unrolling to an 128 bytes inner loop. 31 * Unrolling to an 128 bytes inner loop.
34 * Using interleaving with more registers to break the carry chains. 32 * Using interleaving with more registers to break the carry chains.
35 */ 33 */
36static __force_inline unsigned do_csum(const unsigned char *buff, unsigned len) 34static unsigned do_csum(const unsigned char *buff, unsigned len)
37{ 35{
38 unsigned odd, count; 36 unsigned odd, count;
39 unsigned long result = 0; 37 unsigned long result = 0;
diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c
index 50be90975d04..2dbebd308347 100644
--- a/arch/x86_64/lib/delay.c
+++ b/arch/x86_64/lib/delay.c
@@ -40,13 +40,13 @@ EXPORT_SYMBOL(__delay);
40 40
41inline void __const_udelay(unsigned long xloops) 41inline void __const_udelay(unsigned long xloops)
42{ 42{
43 __delay((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32); 43 __delay(((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32) + 1);
44} 44}
45EXPORT_SYMBOL(__const_udelay); 45EXPORT_SYMBOL(__const_udelay);
46 46
47void __udelay(unsigned long usecs) 47void __udelay(unsigned long usecs)
48{ 48{
49 __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ 49 __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
50} 50}
51EXPORT_SYMBOL(__udelay); 51EXPORT_SYMBOL(__udelay);
52 52
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 3751b4788e28..a65fc6f1dcaf 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -23,9 +23,9 @@
23#include <linux/compiler.h> 23#include <linux/compiler.h>
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/kprobes.h> 25#include <linux/kprobes.h>
26#include <linux/uaccess.h>
26 27
27#include <asm/system.h> 28#include <asm/system.h>
28#include <asm/uaccess.h>
29#include <asm/pgalloc.h> 29#include <asm/pgalloc.h>
30#include <asm/smp.h> 30#include <asm/smp.h>
31#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
@@ -96,7 +96,7 @@ void bust_spinlocks(int yes)
96static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, 96static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
97 unsigned long error_code) 97 unsigned long error_code)
98{ 98{
99 unsigned char __user *instr; 99 unsigned char *instr;
100 int scan_more = 1; 100 int scan_more = 1;
101 int prefetch = 0; 101 int prefetch = 0;
102 unsigned char *max_instr; 102 unsigned char *max_instr;
@@ -116,7 +116,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
116 unsigned char instr_hi; 116 unsigned char instr_hi;
117 unsigned char instr_lo; 117 unsigned char instr_lo;
118 118
119 if (__get_user(opcode, (char __user *)instr)) 119 if (probe_kernel_address(instr, opcode))
120 break; 120 break;
121 121
122 instr_hi = opcode & 0xf0; 122 instr_hi = opcode & 0xf0;
@@ -154,7 +154,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
154 case 0x00: 154 case 0x00:
155 /* Prefetch instruction is 0x0F0D or 0x0F18 */ 155 /* Prefetch instruction is 0x0F0D or 0x0F18 */
156 scan_more = 0; 156 scan_more = 0;
157 if (__get_user(opcode, (char __user *)instr)) 157 if (probe_kernel_address(instr, opcode))
158 break; 158 break;
159 prefetch = (instr_lo == 0xF) && 159 prefetch = (instr_lo == 0xF) &&
160 (opcode == 0x0D || opcode == 0x18); 160 (opcode == 0x0D || opcode == 0x18);
@@ -170,7 +170,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
170static int bad_address(void *p) 170static int bad_address(void *p)
171{ 171{
172 unsigned long dummy; 172 unsigned long dummy;
173 return __get_user(dummy, (unsigned long __user *)p); 173 return probe_kernel_address((unsigned long *)p, dummy);
174} 174}
175 175
176void dump_pagetable(unsigned long address) 176void dump_pagetable(unsigned long address)
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 4c0c00ef3ca7..2968b90ef8ad 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -730,14 +730,15 @@ static __init int x8664_sysctl_init(void)
730__initcall(x8664_sysctl_init); 730__initcall(x8664_sysctl_init);
731#endif 731#endif
732 732
733/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only 733/* A pseudo VMA to allow ptrace access for the vsyscall page. This only
734 covers the 64bit vsyscall page now. 32bit has a real VMA now and does 734 covers the 64bit vsyscall page now. 32bit has a real VMA now and does
735 not need special handling anymore. */ 735 not need special handling anymore. */
736 736
737static struct vm_area_struct gate_vma = { 737static struct vm_area_struct gate_vma = {
738 .vm_start = VSYSCALL_START, 738 .vm_start = VSYSCALL_START,
739 .vm_end = VSYSCALL_END, 739 .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
740 .vm_page_prot = PAGE_READONLY 740 .vm_page_prot = PAGE_READONLY_EXEC,
741 .vm_flags = VM_READ | VM_EXEC
741}; 742};
742 743
743struct vm_area_struct *get_gate_vma(struct task_struct *tsk) 744struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 3e231d762aaa..ccb91dd996a9 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -61,34 +61,40 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
61 return base; 61 return base;
62} 62}
63 63
64 64static void cache_flush_page(void *adr)
65static void flush_kernel_map(void *address)
66{ 65{
67 if (0 && address && cpu_has_clflush) { 66 int i;
68 /* is this worth it? */ 67 for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
69 int i; 68 asm volatile("clflush (%0)" :: "r" (adr + i));
70 for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
71 asm volatile("clflush (%0)" :: "r" (address + i));
72 } else
73 asm volatile("wbinvd":::"memory");
74 if (address)
75 __flush_tlb_one(address);
76 else
77 __flush_tlb_all();
78} 69}
79 70
71static void flush_kernel_map(void *arg)
72{
73 struct list_head *l = (struct list_head *)arg;
74 struct page *pg;
75
76 /* When clflush is available always use it because it is
77 much cheaper than WBINVD */
78 if (!cpu_has_clflush)
79 asm volatile("wbinvd" ::: "memory");
80 list_for_each_entry(pg, l, lru) {
81 void *adr = page_address(pg);
82 if (cpu_has_clflush)
83 cache_flush_page(adr);
84 __flush_tlb_one(adr);
85 }
86}
80 87
81static inline void flush_map(unsigned long address) 88static inline void flush_map(struct list_head *l)
82{ 89{
83 on_each_cpu(flush_kernel_map, (void *)address, 1, 1); 90 on_each_cpu(flush_kernel_map, l, 1, 1);
84} 91}
85 92
86static struct page *deferred_pages; /* protected by init_mm.mmap_sem */ 93static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */
87 94
88static inline void save_page(struct page *fpage) 95static inline void save_page(struct page *fpage)
89{ 96{
90 fpage->lru.next = (struct list_head *)deferred_pages; 97 list_add(&fpage->lru, &deferred_pages);
91 deferred_pages = fpage;
92} 98}
93 99
94/* 100/*
@@ -207,18 +213,18 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot)
207 213
208void global_flush_tlb(void) 214void global_flush_tlb(void)
209{ 215{
210 struct page *dpage; 216 struct page *pg, *next;
217 struct list_head l;
211 218
212 down_read(&init_mm.mmap_sem); 219 down_read(&init_mm.mmap_sem);
213 dpage = xchg(&deferred_pages, NULL); 220 list_replace_init(&deferred_pages, &l);
214 up_read(&init_mm.mmap_sem); 221 up_read(&init_mm.mmap_sem);
215 222
216 flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0); 223 flush_map(&l);
217 while (dpage) { 224
218 struct page *tmp = dpage; 225 list_for_each_entry_safe(pg, next, &l, lru) {
219 dpage = (struct page *)dpage->lru.next; 226 ClearPagePrivate(pg);
220 ClearPagePrivate(tmp); 227 __free_page(pg);
221 __free_page(tmp);
222 } 228 }
223} 229}
224 230