aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-06-26 13:51:09 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-26 13:51:09 -0400
commit81a07d7588d376c530d006e24d7981304ce96e16 (patch)
tree1608e094c88b9702c86cf2e6f65339aab9ea3f3f /arch/x86_64/kernel
parent8871e73fdbde07d0a41393f7ee30787b65387b36 (diff)
parent8501a2fbe762b21d2504ed3aca3b52be61b5e6e4 (diff)
Merge branch 'x86-64'
* x86-64: (83 commits) [PATCH] x86_64: x86_64 stack usage debugging [PATCH] x86_64: (resend) x86_64 stack overflow debugging [PATCH] x86_64: msi_apic.c build fix [PATCH] x86_64: i386/x86-64 Add nmi watchdog support for new Intel CPUs [PATCH] x86_64: Avoid broadcasting NMI IPIs [PATCH] x86_64: fix apic error on bootup [PATCH] x86_64: enlarge window for stack growth [PATCH] x86_64: Minor string functions optimizations [PATCH] x86_64: Move export symbols to their C functions [PATCH] x86_64: Standardize i386/x86_64 handling of NMI_VECTOR [PATCH] x86_64: Fix modular pc speaker [PATCH] x86_64: remove sys32_ni_syscall() [PATCH] x86_64: Do not use -ffunction-sections for modules [PATCH] x86_64: Add cpu_relax to apic_wait_icr_idle [PATCH] x86_64: adjust kstack_depth_to_print default [PATCH] i386/x86-64: adjust /proc/interrupts column headings [PATCH] x86_64: Fix race in cpu_local_* on preemptible kernels [PATCH] x86_64: Fix fast check in safe_smp_processor_id [PATCH] x86_64: x86_64 setup.c - printing cmp related boottime information [PATCH] i386/x86-64/ia64: Move polling flag into thread_info_status ... Manual resolve of trivial conflict in arch/i386/kernel/Makefile
Diffstat (limited to 'arch/x86_64/kernel')
-rw-r--r--arch/x86_64/kernel/Makefile8
-rw-r--r--arch/x86_64/kernel/aperture.c26
-rw-r--r--arch/x86_64/kernel/apic.c32
-rw-r--r--arch/x86_64/kernel/crash.c4
-rw-r--r--arch/x86_64/kernel/e820.c2
-rw-r--r--arch/x86_64/kernel/entry.S113
-rw-r--r--arch/x86_64/kernel/genapic_flat.c30
-rw-r--r--arch/x86_64/kernel/head64.c2
-rw-r--r--arch/x86_64/kernel/i8259.c14
-rw-r--r--arch/x86_64/kernel/io_apic.c45
-rw-r--r--arch/x86_64/kernel/irq.c30
-rw-r--r--arch/x86_64/kernel/k8.c118
-rw-r--r--arch/x86_64/kernel/mce.c2
-rw-r--r--arch/x86_64/kernel/mce_amd.c506
-rw-r--r--arch/x86_64/kernel/module.c38
-rw-r--r--arch/x86_64/kernel/nmi.c89
-rw-r--r--arch/x86_64/kernel/pci-calgary.c1018
-rw-r--r--arch/x86_64/kernel/pci-dma.c55
-rw-r--r--arch/x86_64/kernel/pci-gart.c155
-rw-r--r--arch/x86_64/kernel/pci-nommu.c9
-rw-r--r--arch/x86_64/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86_64/kernel/process.c16
-rw-r--r--arch/x86_64/kernel/reboot.c1
-rw-r--r--arch/x86_64/kernel/setup.c180
-rw-r--r--arch/x86_64/kernel/setup64.c3
-rw-r--r--arch/x86_64/kernel/signal.c3
-rw-r--r--arch/x86_64/kernel/smp.c10
-rw-r--r--arch/x86_64/kernel/smpboot.c23
-rw-r--r--arch/x86_64/kernel/tce.c202
-rw-r--r--arch/x86_64/kernel/time.c87
-rw-r--r--arch/x86_64/kernel/traps.c83
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S29
-rw-r--r--arch/x86_64/kernel/vsyscall.c4
-rw-r--r--arch/x86_64/kernel/x8664_ksyms.c114
34 files changed, 2312 insertions, 741 deletions
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 059c88313f4e..aeb9c560be88 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ 8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
9 x8664_ksyms.o i387.o syscall.o vsyscall.o \ 9 x8664_ksyms.o i387.o syscall.o vsyscall.o \
10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ 10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
11 pci-dma.o pci-nommu.o 11 pci-dma.o pci-nommu.o alternative.o
12 12
13obj-$(CONFIG_X86_MCE) += mce.o 13obj-$(CONFIG_X86_MCE) += mce.o
14obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o 14obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
@@ -28,11 +28,13 @@ obj-$(CONFIG_PM) += suspend.o
28obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o 28obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o
29obj-$(CONFIG_CPU_FREQ) += cpufreq/ 29obj-$(CONFIG_CPU_FREQ) += cpufreq/
30obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 30obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
31obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o 31obj-$(CONFIG_IOMMU) += pci-gart.o aperture.o
32obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary.o tce.o
32obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o 33obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
33obj-$(CONFIG_KPROBES) += kprobes.o 34obj-$(CONFIG_KPROBES) += kprobes.o
34obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o 35obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
35obj-$(CONFIG_X86_VSMP) += vsmp.o 36obj-$(CONFIG_X86_VSMP) += vsmp.o
37obj-$(CONFIG_K8_NB) += k8.o
36 38
37obj-$(CONFIG_MODULES) += module.o 39obj-$(CONFIG_MODULES) += module.o
38 40
@@ -49,3 +51,5 @@ intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o
49quirks-y += ../../i386/kernel/quirks.o 51quirks-y += ../../i386/kernel/quirks.o
50i8237-y += ../../i386/kernel/i8237.o 52i8237-y += ../../i386/kernel/i8237.o
51msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o 53msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
54alternative-y += ../../i386/kernel/alternative.o
55
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index 70b9d21ed675..a195ef06ec55 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -8,7 +8,6 @@
8 * because only the bootmem allocator can allocate 32+MB. 8 * because only the bootmem allocator can allocate 32+MB.
9 * 9 *
10 * Copyright 2002 Andi Kleen, SuSE Labs. 10 * Copyright 2002 Andi Kleen, SuSE Labs.
11 * $Id: aperture.c,v 1.7 2003/08/01 03:36:18 ak Exp $
12 */ 11 */
13#include <linux/config.h> 12#include <linux/config.h>
14#include <linux/kernel.h> 13#include <linux/kernel.h>
@@ -24,6 +23,7 @@
24#include <asm/proto.h> 23#include <asm/proto.h>
25#include <asm/pci-direct.h> 24#include <asm/pci-direct.h>
26#include <asm/dma.h> 25#include <asm/dma.h>
26#include <asm/k8.h>
27 27
28int iommu_aperture; 28int iommu_aperture;
29int iommu_aperture_disabled __initdata = 0; 29int iommu_aperture_disabled __initdata = 0;
@@ -37,8 +37,6 @@ int fix_aperture __initdata = 1;
37/* This code runs before the PCI subsystem is initialized, so just 37/* This code runs before the PCI subsystem is initialized, so just
38 access the northbridge directly. */ 38 access the northbridge directly. */
39 39
40#define NB_ID_3 (PCI_VENDOR_ID_AMD | (0x1103<<16))
41
42static u32 __init allocate_aperture(void) 40static u32 __init allocate_aperture(void)
43{ 41{
44 pg_data_t *nd0 = NODE_DATA(0); 42 pg_data_t *nd0 = NODE_DATA(0);
@@ -68,20 +66,20 @@ static u32 __init allocate_aperture(void)
68 return (u32)__pa(p); 66 return (u32)__pa(p);
69} 67}
70 68
71static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size) 69static int __init aperture_valid(u64 aper_base, u32 aper_size)
72{ 70{
73 if (!aper_base) 71 if (!aper_base)
74 return 0; 72 return 0;
75 if (aper_size < 64*1024*1024) { 73 if (aper_size < 64*1024*1024) {
76 printk("Aperture from %s too small (%d MB)\n", name, aper_size>>20); 74 printk("Aperture too small (%d MB)\n", aper_size>>20);
77 return 0; 75 return 0;
78 } 76 }
79 if (aper_base + aper_size >= 0xffffffff) { 77 if (aper_base + aper_size >= 0xffffffff) {
80 printk("Aperture from %s beyond 4GB. Ignoring.\n",name); 78 printk("Aperture beyond 4GB. Ignoring.\n");
81 return 0; 79 return 0;
82 } 80 }
83 if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { 81 if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
84 printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name); 82 printk("Aperture pointing to e820 RAM. Ignoring.\n");
85 return 0; 83 return 0;
86 } 84 }
87 return 1; 85 return 1;
@@ -140,7 +138,7 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
140 printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", 138 printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n",
141 aper, 32 << *order, apsizereg); 139 aper, 32 << *order, apsizereg);
142 140
143 if (!aperture_valid("AGP bridge", aper, (32*1024*1024) << *order)) 141 if (!aperture_valid(aper, (32*1024*1024) << *order))
144 return 0; 142 return 0;
145 return (u32)aper; 143 return (u32)aper;
146} 144}
@@ -208,10 +206,10 @@ void __init iommu_hole_init(void)
208 206
209 fix = 0; 207 fix = 0;
210 for (num = 24; num < 32; num++) { 208 for (num = 24; num < 32; num++) {
211 char name[30]; 209 if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
212 if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) 210 continue;
213 continue;
214 211
212 iommu_detected = 1;
215 iommu_aperture = 1; 213 iommu_aperture = 1;
216 214
217 aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; 215 aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7;
@@ -222,9 +220,7 @@ void __init iommu_hole_init(void)
222 printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, 220 printk("CPU %d: aperture @ %Lx size %u MB\n", num-24,
223 aper_base, aper_size>>20); 221 aper_base, aper_size>>20);
224 222
225 sprintf(name, "northbridge cpu %d", num-24); 223 if (!aperture_valid(aper_base, aper_size)) {
226
227 if (!aperture_valid(name, aper_base, aper_size)) {
228 fix = 1; 224 fix = 1;
229 break; 225 break;
230 } 226 }
@@ -273,7 +269,7 @@ void __init iommu_hole_init(void)
273 269
274 /* Fix up the north bridges */ 270 /* Fix up the north bridges */
275 for (num = 24; num < 32; num++) { 271 for (num = 24; num < 32; num++) {
276 if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) 272 if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
277 continue; 273 continue;
278 274
279 /* Don't enable translation yet. That is done later. 275 /* Don't enable translation yet. That is done later.
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 29ef99001e05..b2ead91df218 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -100,7 +100,7 @@ void clear_local_APIC(void)
100 maxlvt = get_maxlvt(); 100 maxlvt = get_maxlvt();
101 101
102 /* 102 /*
103 * Masking an LVT entry on a P6 can trigger a local APIC error 103 * Masking an LVT entry can trigger a local APIC error
104 * if the vector is zero. Mask LVTERR first to prevent this. 104 * if the vector is zero. Mask LVTERR first to prevent this.
105 */ 105 */
106 if (maxlvt >= 3) { 106 if (maxlvt >= 3) {
@@ -851,7 +851,18 @@ void disable_APIC_timer(void)
851 unsigned long v; 851 unsigned long v;
852 852
853 v = apic_read(APIC_LVTT); 853 v = apic_read(APIC_LVTT);
854 apic_write(APIC_LVTT, v | APIC_LVT_MASKED); 854 /*
855 * When an illegal vector value (0-15) is written to an LVT
856 * entry and delivery mode is Fixed, the APIC may signal an
857 * illegal vector error, with out regard to whether the mask
858 * bit is set or whether an interrupt is actually seen on input.
859 *
860 * Boot sequence might call this function when the LVTT has
861 * '0' vector value. So make sure vector field is set to
862 * valid value.
863 */
864 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
865 apic_write(APIC_LVTT, v);
855 } 866 }
856} 867}
857 868
@@ -909,15 +920,13 @@ int setup_profiling_timer(unsigned int multiplier)
909 return -EINVAL; 920 return -EINVAL;
910} 921}
911 922
912#ifdef CONFIG_X86_MCE_AMD 923void setup_APIC_extened_lvt(unsigned char lvt_off, unsigned char vector,
913void setup_threshold_lvt(unsigned long lvt_off) 924 unsigned char msg_type, unsigned char mask)
914{ 925{
915 unsigned int v = 0; 926 unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE;
916 unsigned long reg = (lvt_off << 4) + 0x500; 927 unsigned int v = (mask << 16) | (msg_type << 8) | vector;
917 v |= THRESHOLD_APIC_VECTOR;
918 apic_write(reg, v); 928 apic_write(reg, v);
919} 929}
920#endif /* CONFIG_X86_MCE_AMD */
921 930
922#undef APIC_DIVISOR 931#undef APIC_DIVISOR
923 932
@@ -983,7 +992,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
983} 992}
984 993
985/* 994/*
986 * oem_force_hpet_timer -- force HPET mode for some boxes. 995 * apic_is_clustered_box() -- Check if we can expect good TSC
987 * 996 *
988 * Thus far, the major user of this is IBM's Summit2 series: 997 * Thus far, the major user of this is IBM's Summit2 series:
989 * 998 *
@@ -991,7 +1000,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
991 * multi-chassis. Use available data to take a good guess. 1000 * multi-chassis. Use available data to take a good guess.
992 * If in doubt, go HPET. 1001 * If in doubt, go HPET.
993 */ 1002 */
994__cpuinit int oem_force_hpet_timer(void) 1003__cpuinit int apic_is_clustered_box(void)
995{ 1004{
996 int i, clusters, zeros; 1005 int i, clusters, zeros;
997 unsigned id; 1006 unsigned id;
@@ -1022,8 +1031,7 @@ __cpuinit int oem_force_hpet_timer(void)
1022 } 1031 }
1023 1032
1024 /* 1033 /*
1025 * If clusters > 2, then should be multi-chassis. Return 1 for HPET. 1034 * If clusters > 2, then should be multi-chassis.
1026 * Else return 0 to use TSC.
1027 * May have to revisit this when multi-core + hyperthreaded CPUs come 1035 * May have to revisit this when multi-core + hyperthreaded CPUs come
1028 * out, but AFAIK this will work even for them. 1036 * out, but AFAIK this will work even for them.
1029 */ 1037 */
diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c
index 4e6c3b729e39..8ca04912b1cc 100644
--- a/arch/x86_64/kernel/crash.c
+++ b/arch/x86_64/kernel/crash.c
@@ -111,14 +111,14 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
111 atomic_dec(&waiting_for_crash_ipi); 111 atomic_dec(&waiting_for_crash_ipi);
112 /* Assume hlt works */ 112 /* Assume hlt works */
113 for(;;) 113 for(;;)
114 asm("hlt"); 114 halt();
115 115
116 return 1; 116 return 1;
117} 117}
118 118
119static void smp_send_nmi_allbutself(void) 119static void smp_send_nmi_allbutself(void)
120{ 120{
121 send_IPI_allbutself(APIC_DM_NMI); 121 send_IPI_allbutself(NMI_VECTOR);
122} 122}
123 123
124/* 124/*
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index 1ef6028f721e..9e94d834624b 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -1,7 +1,6 @@
1/* 1/*
2 * Handle the memory map. 2 * Handle the memory map.
3 * The functions here do the job until bootmem takes over. 3 * The functions here do the job until bootmem takes over.
4 * $Id: e820.c,v 1.4 2002/09/19 19:25:32 ak Exp $
5 * 4 *
6 * Getting sanitize_e820_map() in sync with i386 version by applying change: 5 * Getting sanitize_e820_map() in sync with i386 version by applying change:
7 * - Provisions for empty E820 memory regions (reported by certain BIOSes). 6 * - Provisions for empty E820 memory regions (reported by certain BIOSes).
@@ -621,6 +620,7 @@ void __init parse_memmapopt(char *p, char **from)
621} 620}
622 621
623unsigned long pci_mem_start = 0xaeedbabe; 622unsigned long pci_mem_start = 0xaeedbabe;
623EXPORT_SYMBOL(pci_mem_start);
624 624
625/* 625/*
626 * Search for the biggest gap in the low 32 bits of the e820 626 * Search for the biggest gap in the low 32 bits of the e820
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 586b34c00c48..7290e72b9a34 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -154,6 +154,7 @@ rff_trace:
154 GET_THREAD_INFO(%rcx) 154 GET_THREAD_INFO(%rcx)
155 jmp rff_action 155 jmp rff_action
156 CFI_ENDPROC 156 CFI_ENDPROC
157END(ret_from_fork)
157 158
158/* 159/*
159 * System call entry. Upto 6 arguments in registers are supported. 160 * System call entry. Upto 6 arguments in registers are supported.
@@ -188,7 +189,7 @@ rff_trace:
188 189
189ENTRY(system_call) 190ENTRY(system_call)
190 CFI_STARTPROC simple 191 CFI_STARTPROC simple
191 CFI_DEF_CFA rsp,0 192 CFI_DEF_CFA rsp,PDA_STACKOFFSET
192 CFI_REGISTER rip,rcx 193 CFI_REGISTER rip,rcx
193 /*CFI_REGISTER rflags,r11*/ 194 /*CFI_REGISTER rflags,r11*/
194 swapgs 195 swapgs
@@ -285,6 +286,7 @@ tracesys:
285 /* Use IRET because user could have changed frame */ 286 /* Use IRET because user could have changed frame */
286 jmp int_ret_from_sys_call 287 jmp int_ret_from_sys_call
287 CFI_ENDPROC 288 CFI_ENDPROC
289END(system_call)
288 290
289/* 291/*
290 * Syscall return path ending with IRET. 292 * Syscall return path ending with IRET.
@@ -364,6 +366,7 @@ int_restore_rest:
364 cli 366 cli
365 jmp int_with_check 367 jmp int_with_check
366 CFI_ENDPROC 368 CFI_ENDPROC
369END(int_ret_from_sys_call)
367 370
368/* 371/*
369 * Certain special system calls that need to save a complete full stack frame. 372 * Certain special system calls that need to save a complete full stack frame.
@@ -375,6 +378,7 @@ int_restore_rest:
375 leaq \func(%rip),%rax 378 leaq \func(%rip),%rax
376 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ 379 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
377 jmp ptregscall_common 380 jmp ptregscall_common
381END(\label)
378 .endm 382 .endm
379 383
380 CFI_STARTPROC 384 CFI_STARTPROC
@@ -404,6 +408,7 @@ ENTRY(ptregscall_common)
404 CFI_REL_OFFSET rip, 0 408 CFI_REL_OFFSET rip, 0
405 ret 409 ret
406 CFI_ENDPROC 410 CFI_ENDPROC
411END(ptregscall_common)
407 412
408ENTRY(stub_execve) 413ENTRY(stub_execve)
409 CFI_STARTPROC 414 CFI_STARTPROC
@@ -418,6 +423,7 @@ ENTRY(stub_execve)
418 RESTORE_REST 423 RESTORE_REST
419 jmp int_ret_from_sys_call 424 jmp int_ret_from_sys_call
420 CFI_ENDPROC 425 CFI_ENDPROC
426END(stub_execve)
421 427
422/* 428/*
423 * sigreturn is special because it needs to restore all registers on return. 429 * sigreturn is special because it needs to restore all registers on return.
@@ -435,6 +441,7 @@ ENTRY(stub_rt_sigreturn)
435 RESTORE_REST 441 RESTORE_REST
436 jmp int_ret_from_sys_call 442 jmp int_ret_from_sys_call
437 CFI_ENDPROC 443 CFI_ENDPROC
444END(stub_rt_sigreturn)
438 445
439/* 446/*
440 * initial frame state for interrupts and exceptions 447 * initial frame state for interrupts and exceptions
@@ -466,29 +473,18 @@ ENTRY(stub_rt_sigreturn)
466/* 0(%rsp): interrupt number */ 473/* 0(%rsp): interrupt number */
467 .macro interrupt func 474 .macro interrupt func
468 cld 475 cld
469#ifdef CONFIG_DEBUG_INFO
470 SAVE_ALL
471 movq %rsp,%rdi
472 /*
473 * Setup a stack frame pointer. This allows gdb to trace
474 * back to the original stack.
475 */
476 movq %rsp,%rbp
477 CFI_DEF_CFA_REGISTER rbp
478#else
479 SAVE_ARGS 476 SAVE_ARGS
480 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler 477 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
481#endif 478 pushq %rbp
479 CFI_ADJUST_CFA_OFFSET 8
480 CFI_REL_OFFSET rbp, 0
481 movq %rsp,%rbp
482 CFI_DEF_CFA_REGISTER rbp
482 testl $3,CS(%rdi) 483 testl $3,CS(%rdi)
483 je 1f 484 je 1f
484 swapgs 485 swapgs
4851: incl %gs:pda_irqcount # RED-PEN should check preempt count 4861: incl %gs:pda_irqcount # RED-PEN should check preempt count
486 movq %gs:pda_irqstackptr,%rax 487 cmoveq %gs:pda_irqstackptr,%rsp
487 cmoveq %rax,%rsp /*todo This needs CFI annotation! */
488 pushq %rdi # save old stack
489#ifndef CONFIG_DEBUG_INFO
490 CFI_ADJUST_CFA_OFFSET 8
491#endif
492 call \func 488 call \func
493 .endm 489 .endm
494 490
@@ -497,17 +493,11 @@ ENTRY(common_interrupt)
497 interrupt do_IRQ 493 interrupt do_IRQ
498 /* 0(%rsp): oldrsp-ARGOFFSET */ 494 /* 0(%rsp): oldrsp-ARGOFFSET */
499ret_from_intr: 495ret_from_intr:
500 popq %rdi
501#ifndef CONFIG_DEBUG_INFO
502 CFI_ADJUST_CFA_OFFSET -8
503#endif
504 cli 496 cli
505 decl %gs:pda_irqcount 497 decl %gs:pda_irqcount
506#ifdef CONFIG_DEBUG_INFO 498 leaveq
507 movq RBP(%rdi),%rbp
508 CFI_DEF_CFA_REGISTER rsp 499 CFI_DEF_CFA_REGISTER rsp
509#endif 500 CFI_ADJUST_CFA_OFFSET -8
510 leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */
511exit_intr: 501exit_intr:
512 GET_THREAD_INFO(%rcx) 502 GET_THREAD_INFO(%rcx)
513 testl $3,CS-ARGOFFSET(%rsp) 503 testl $3,CS-ARGOFFSET(%rsp)
@@ -589,7 +579,9 @@ retint_kernel:
589 call preempt_schedule_irq 579 call preempt_schedule_irq
590 jmp exit_intr 580 jmp exit_intr
591#endif 581#endif
582
592 CFI_ENDPROC 583 CFI_ENDPROC
584END(common_interrupt)
593 585
594/* 586/*
595 * APIC interrupts. 587 * APIC interrupts.
@@ -605,17 +597,21 @@ retint_kernel:
605 597
606ENTRY(thermal_interrupt) 598ENTRY(thermal_interrupt)
607 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt 599 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
600END(thermal_interrupt)
608 601
609ENTRY(threshold_interrupt) 602ENTRY(threshold_interrupt)
610 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt 603 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
604END(threshold_interrupt)
611 605
612#ifdef CONFIG_SMP 606#ifdef CONFIG_SMP
613ENTRY(reschedule_interrupt) 607ENTRY(reschedule_interrupt)
614 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt 608 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
609END(reschedule_interrupt)
615 610
616 .macro INVALIDATE_ENTRY num 611 .macro INVALIDATE_ENTRY num
617ENTRY(invalidate_interrupt\num) 612ENTRY(invalidate_interrupt\num)
618 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt 613 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
614END(invalidate_interrupt\num)
619 .endm 615 .endm
620 616
621 INVALIDATE_ENTRY 0 617 INVALIDATE_ENTRY 0
@@ -629,17 +625,21 @@ ENTRY(invalidate_interrupt\num)
629 625
630ENTRY(call_function_interrupt) 626ENTRY(call_function_interrupt)
631 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt 627 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
628END(call_function_interrupt)
632#endif 629#endif
633 630
634#ifdef CONFIG_X86_LOCAL_APIC 631#ifdef CONFIG_X86_LOCAL_APIC
635ENTRY(apic_timer_interrupt) 632ENTRY(apic_timer_interrupt)
636 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt 633 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
634END(apic_timer_interrupt)
637 635
638ENTRY(error_interrupt) 636ENTRY(error_interrupt)
639 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt 637 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
638END(error_interrupt)
640 639
641ENTRY(spurious_interrupt) 640ENTRY(spurious_interrupt)
642 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt 641 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
642END(spurious_interrupt)
643#endif 643#endif
644 644
645/* 645/*
@@ -777,6 +777,7 @@ error_kernelspace:
777 cmpq $gs_change,RIP(%rsp) 777 cmpq $gs_change,RIP(%rsp)
778 je error_swapgs 778 je error_swapgs
779 jmp error_sti 779 jmp error_sti
780END(error_entry)
780 781
781 /* Reload gs selector with exception handling */ 782 /* Reload gs selector with exception handling */
782 /* edi: new selector */ 783 /* edi: new selector */
@@ -794,6 +795,7 @@ gs_change:
794 CFI_ADJUST_CFA_OFFSET -8 795 CFI_ADJUST_CFA_OFFSET -8
795 ret 796 ret
796 CFI_ENDPROC 797 CFI_ENDPROC
798ENDPROC(load_gs_index)
797 799
798 .section __ex_table,"a" 800 .section __ex_table,"a"
799 .align 8 801 .align 8
@@ -847,7 +849,7 @@ ENTRY(kernel_thread)
847 UNFAKE_STACK_FRAME 849 UNFAKE_STACK_FRAME
848 ret 850 ret
849 CFI_ENDPROC 851 CFI_ENDPROC
850 852ENDPROC(kernel_thread)
851 853
852child_rip: 854child_rip:
853 /* 855 /*
@@ -860,6 +862,7 @@ child_rip:
860 # exit 862 # exit
861 xorl %edi, %edi 863 xorl %edi, %edi
862 call do_exit 864 call do_exit
865ENDPROC(child_rip)
863 866
864/* 867/*
865 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. 868 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
@@ -889,19 +892,24 @@ ENTRY(execve)
889 UNFAKE_STACK_FRAME 892 UNFAKE_STACK_FRAME
890 ret 893 ret
891 CFI_ENDPROC 894 CFI_ENDPROC
895ENDPROC(execve)
892 896
893KPROBE_ENTRY(page_fault) 897KPROBE_ENTRY(page_fault)
894 errorentry do_page_fault 898 errorentry do_page_fault
899END(page_fault)
895 .previous .text 900 .previous .text
896 901
897ENTRY(coprocessor_error) 902ENTRY(coprocessor_error)
898 zeroentry do_coprocessor_error 903 zeroentry do_coprocessor_error
904END(coprocessor_error)
899 905
900ENTRY(simd_coprocessor_error) 906ENTRY(simd_coprocessor_error)
901 zeroentry do_simd_coprocessor_error 907 zeroentry do_simd_coprocessor_error
908END(simd_coprocessor_error)
902 909
903ENTRY(device_not_available) 910ENTRY(device_not_available)
904 zeroentry math_state_restore 911 zeroentry math_state_restore
912END(device_not_available)
905 913
906 /* runs on exception stack */ 914 /* runs on exception stack */
907KPROBE_ENTRY(debug) 915KPROBE_ENTRY(debug)
@@ -911,6 +919,7 @@ KPROBE_ENTRY(debug)
911 paranoidentry do_debug, DEBUG_STACK 919 paranoidentry do_debug, DEBUG_STACK
912 jmp paranoid_exit 920 jmp paranoid_exit
913 CFI_ENDPROC 921 CFI_ENDPROC
922END(debug)
914 .previous .text 923 .previous .text
915 924
916 /* runs on exception stack */ 925 /* runs on exception stack */
@@ -961,6 +970,7 @@ paranoid_schedule:
961 cli 970 cli
962 jmp paranoid_userspace 971 jmp paranoid_userspace
963 CFI_ENDPROC 972 CFI_ENDPROC
973END(nmi)
964 .previous .text 974 .previous .text
965 975
966KPROBE_ENTRY(int3) 976KPROBE_ENTRY(int3)
@@ -970,22 +980,28 @@ KPROBE_ENTRY(int3)
970 paranoidentry do_int3, DEBUG_STACK 980 paranoidentry do_int3, DEBUG_STACK
971 jmp paranoid_exit 981 jmp paranoid_exit
972 CFI_ENDPROC 982 CFI_ENDPROC
983END(int3)
973 .previous .text 984 .previous .text
974 985
975ENTRY(overflow) 986ENTRY(overflow)
976 zeroentry do_overflow 987 zeroentry do_overflow
988END(overflow)
977 989
978ENTRY(bounds) 990ENTRY(bounds)
979 zeroentry do_bounds 991 zeroentry do_bounds
992END(bounds)
980 993
981ENTRY(invalid_op) 994ENTRY(invalid_op)
982 zeroentry do_invalid_op 995 zeroentry do_invalid_op
996END(invalid_op)
983 997
984ENTRY(coprocessor_segment_overrun) 998ENTRY(coprocessor_segment_overrun)
985 zeroentry do_coprocessor_segment_overrun 999 zeroentry do_coprocessor_segment_overrun
1000END(coprocessor_segment_overrun)
986 1001
987ENTRY(reserved) 1002ENTRY(reserved)
988 zeroentry do_reserved 1003 zeroentry do_reserved
1004END(reserved)
989 1005
990 /* runs on exception stack */ 1006 /* runs on exception stack */
991ENTRY(double_fault) 1007ENTRY(double_fault)
@@ -993,12 +1009,15 @@ ENTRY(double_fault)
993 paranoidentry do_double_fault 1009 paranoidentry do_double_fault
994 jmp paranoid_exit 1010 jmp paranoid_exit
995 CFI_ENDPROC 1011 CFI_ENDPROC
1012END(double_fault)
996 1013
997ENTRY(invalid_TSS) 1014ENTRY(invalid_TSS)
998 errorentry do_invalid_TSS 1015 errorentry do_invalid_TSS
1016END(invalid_TSS)
999 1017
1000ENTRY(segment_not_present) 1018ENTRY(segment_not_present)
1001 errorentry do_segment_not_present 1019 errorentry do_segment_not_present
1020END(segment_not_present)
1002 1021
1003 /* runs on exception stack */ 1022 /* runs on exception stack */
1004ENTRY(stack_segment) 1023ENTRY(stack_segment)
@@ -1006,19 +1025,24 @@ ENTRY(stack_segment)
1006 paranoidentry do_stack_segment 1025 paranoidentry do_stack_segment
1007 jmp paranoid_exit 1026 jmp paranoid_exit
1008 CFI_ENDPROC 1027 CFI_ENDPROC
1028END(stack_segment)
1009 1029
1010KPROBE_ENTRY(general_protection) 1030KPROBE_ENTRY(general_protection)
1011 errorentry do_general_protection 1031 errorentry do_general_protection
1032END(general_protection)
1012 .previous .text 1033 .previous .text
1013 1034
1014ENTRY(alignment_check) 1035ENTRY(alignment_check)
1015 errorentry do_alignment_check 1036 errorentry do_alignment_check
1037END(alignment_check)
1016 1038
1017ENTRY(divide_error) 1039ENTRY(divide_error)
1018 zeroentry do_divide_error 1040 zeroentry do_divide_error
1041END(divide_error)
1019 1042
1020ENTRY(spurious_interrupt_bug) 1043ENTRY(spurious_interrupt_bug)
1021 zeroentry do_spurious_interrupt_bug 1044 zeroentry do_spurious_interrupt_bug
1045END(spurious_interrupt_bug)
1022 1046
1023#ifdef CONFIG_X86_MCE 1047#ifdef CONFIG_X86_MCE
1024 /* runs on exception stack */ 1048 /* runs on exception stack */
@@ -1029,6 +1053,7 @@ ENTRY(machine_check)
1029 paranoidentry do_machine_check 1053 paranoidentry do_machine_check
1030 jmp paranoid_exit 1054 jmp paranoid_exit
1031 CFI_ENDPROC 1055 CFI_ENDPROC
1056END(machine_check)
1032#endif 1057#endif
1033 1058
1034ENTRY(call_softirq) 1059ENTRY(call_softirq)
@@ -1046,3 +1071,37 @@ ENTRY(call_softirq)
1046 decl %gs:pda_irqcount 1071 decl %gs:pda_irqcount
1047 ret 1072 ret
1048 CFI_ENDPROC 1073 CFI_ENDPROC
1074ENDPROC(call_softirq)
1075
1076#ifdef CONFIG_STACK_UNWIND
1077ENTRY(arch_unwind_init_running)
1078 CFI_STARTPROC
1079 movq %r15, R15(%rdi)
1080 movq %r14, R14(%rdi)
1081 xchgq %rsi, %rdx
1082 movq %r13, R13(%rdi)
1083 movq %r12, R12(%rdi)
1084 xorl %eax, %eax
1085 movq %rbp, RBP(%rdi)
1086 movq %rbx, RBX(%rdi)
1087 movq (%rsp), %rcx
1088 movq %rax, R11(%rdi)
1089 movq %rax, R10(%rdi)
1090 movq %rax, R9(%rdi)
1091 movq %rax, R8(%rdi)
1092 movq %rax, RAX(%rdi)
1093 movq %rax, RCX(%rdi)
1094 movq %rax, RDX(%rdi)
1095 movq %rax, RSI(%rdi)
1096 movq %rax, RDI(%rdi)
1097 movq %rax, ORIG_RAX(%rdi)
1098 movq %rcx, RIP(%rdi)
1099 leaq 8(%rsp), %rcx
1100 movq $__KERNEL_CS, CS(%rdi)
1101 movq %rax, EFLAGS(%rdi)
1102 movq %rcx, RSP(%rdi)
1103 movq $__KERNEL_DS, SS(%rdi)
1104 jmpq *%rdx
1105 CFI_ENDPROC
1106ENDPROC(arch_unwind_init_running)
1107#endif
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c
index 1a2ab825be98..21c7066e236a 100644
--- a/arch/x86_64/kernel/genapic_flat.c
+++ b/arch/x86_64/kernel/genapic_flat.c
@@ -78,22 +78,29 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
78 78
79static void flat_send_IPI_allbutself(int vector) 79static void flat_send_IPI_allbutself(int vector)
80{ 80{
81#ifndef CONFIG_HOTPLUG_CPU 81#ifdef CONFIG_HOTPLUG_CPU
82 if (((num_online_cpus()) - 1) >= 1) 82 int hotplug = 1;
83 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
84#else 83#else
85 cpumask_t allbutme = cpu_online_map; 84 int hotplug = 0;
85#endif
86 if (hotplug || vector == NMI_VECTOR) {
87 cpumask_t allbutme = cpu_online_map;
86 88
87 cpu_clear(smp_processor_id(), allbutme); 89 cpu_clear(smp_processor_id(), allbutme);
88 90
89 if (!cpus_empty(allbutme)) 91 if (!cpus_empty(allbutme))
90 flat_send_IPI_mask(allbutme, vector); 92 flat_send_IPI_mask(allbutme, vector);
91#endif 93 } else if (num_online_cpus() > 1) {
94 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
95 }
92} 96}
93 97
94static void flat_send_IPI_all(int vector) 98static void flat_send_IPI_all(int vector)
95{ 99{
96 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 100 if (vector == NMI_VECTOR)
101 flat_send_IPI_mask(cpu_online_map, vector);
102 else
103 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
97} 104}
98 105
99static int flat_apic_id_registered(void) 106static int flat_apic_id_registered(void)
@@ -108,10 +115,7 @@ static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
108 115
109static unsigned int phys_pkg_id(int index_msb) 116static unsigned int phys_pkg_id(int index_msb)
110{ 117{
111 u32 ebx; 118 return hard_smp_processor_id() >> index_msb;
112
113 ebx = cpuid_ebx(1);
114 return ((ebx >> 24) & 0xFF) >> index_msb;
115} 119}
116 120
117struct genapic apic_flat = { 121struct genapic apic_flat = {
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index cea20a66c150..e6a71c9556d9 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -2,8 +2,6 @@
2 * linux/arch/x86_64/kernel/head64.c -- prepare to run common code 2 * linux/arch/x86_64/kernel/head64.c -- prepare to run common code
3 * 3 *
4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5 *
6 * $Id: head64.c,v 1.22 2001/07/06 14:28:20 ak Exp $
7 */ 5 */
8 6
9#include <linux/init.h> 7#include <linux/init.h>
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index 5ecd34ab8c2b..9b1a4e147321 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -44,11 +44,11 @@
44 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ 44 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
45 BI(x,c) BI(x,d) BI(x,e) BI(x,f) 45 BI(x,c) BI(x,d) BI(x,e) BI(x,f)
46 46
47#define BUILD_14_IRQS(x) \ 47#define BUILD_15_IRQS(x) \
48 BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ 48 BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
49 BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ 49 BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
50 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ 50 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
51 BI(x,c) BI(x,d) 51 BI(x,c) BI(x,d) BI(x,e)
52 52
53/* 53/*
54 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: 54 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
@@ -73,13 +73,13 @@ BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
73BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) 73BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
74 74
75#ifdef CONFIG_PCI_MSI 75#ifdef CONFIG_PCI_MSI
76 BUILD_14_IRQS(0xe) 76 BUILD_15_IRQS(0xe)
77#endif 77#endif
78 78
79#endif 79#endif
80 80
81#undef BUILD_16_IRQS 81#undef BUILD_16_IRQS
82#undef BUILD_14_IRQS 82#undef BUILD_15_IRQS
83#undef BI 83#undef BI
84 84
85 85
@@ -92,11 +92,11 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
92 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ 92 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
93 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) 93 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
94 94
95#define IRQLIST_14(x) \ 95#define IRQLIST_15(x) \
96 IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ 96 IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
97 IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ 97 IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
98 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ 98 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
99 IRQ(x,c), IRQ(x,d) 99 IRQ(x,c), IRQ(x,d), IRQ(x,e)
100 100
101void (*interrupt[NR_IRQS])(void) = { 101void (*interrupt[NR_IRQS])(void) = {
102 IRQLIST_16(0x0), 102 IRQLIST_16(0x0),
@@ -108,7 +108,7 @@ void (*interrupt[NR_IRQS])(void) = {
108 IRQLIST_16(0xc), IRQLIST_16(0xd) 108 IRQLIST_16(0xc), IRQLIST_16(0xd)
109 109
110#ifdef CONFIG_PCI_MSI 110#ifdef CONFIG_PCI_MSI
111 , IRQLIST_14(0xe) 111 , IRQLIST_15(0xe)
112#endif 112#endif
113 113
114#endif 114#endif
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 9cc7031b7151..c768d8a036d0 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -41,6 +41,7 @@
41#include <asm/mach_apic.h> 41#include <asm/mach_apic.h>
42#include <asm/acpi.h> 42#include <asm/acpi.h>
43#include <asm/dma.h> 43#include <asm/dma.h>
44#include <asm/nmi.h>
44 45
45#define __apicdebuginit __init 46#define __apicdebuginit __init
46 47
@@ -56,6 +57,7 @@ int timer_over_8254 __initdata = 0;
56static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; 57static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
57 58
58static DEFINE_SPINLOCK(ioapic_lock); 59static DEFINE_SPINLOCK(ioapic_lock);
60static DEFINE_SPINLOCK(vector_lock);
59 61
60/* 62/*
61 * # of IRQ routing registers 63 * # of IRQ routing registers
@@ -317,7 +319,7 @@ void __init check_ioapic(void)
317 vendor &= 0xffff; 319 vendor &= 0xffff;
318 switch (vendor) { 320 switch (vendor) {
319 case PCI_VENDOR_ID_VIA: 321 case PCI_VENDOR_ID_VIA:
320#ifdef CONFIG_GART_IOMMU 322#ifdef CONFIG_IOMMU
321 if ((end_pfn > MAX_DMA32_PFN || 323 if ((end_pfn > MAX_DMA32_PFN ||
322 force_iommu) && 324 force_iommu) &&
323 !iommu_aperture_allowed) { 325 !iommu_aperture_allowed) {
@@ -834,10 +836,17 @@ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
834int assign_irq_vector(int irq) 836int assign_irq_vector(int irq)
835{ 837{
836 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; 838 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
839 unsigned long flags;
840 int vector;
837 841
838 BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); 842 BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
839 if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) 843
844 spin_lock_irqsave(&vector_lock, flags);
845
846 if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
847 spin_unlock_irqrestore(&vector_lock, flags);
840 return IO_APIC_VECTOR(irq); 848 return IO_APIC_VECTOR(irq);
849 }
841next: 850next:
842 current_vector += 8; 851 current_vector += 8;
843 if (current_vector == IA32_SYSCALL_VECTOR) 852 if (current_vector == IA32_SYSCALL_VECTOR)
@@ -849,11 +858,14 @@ next:
849 current_vector = FIRST_DEVICE_VECTOR + offset; 858 current_vector = FIRST_DEVICE_VECTOR + offset;
850 } 859 }
851 860
852 vector_irq[current_vector] = irq; 861 vector = current_vector;
862 vector_irq[vector] = irq;
853 if (irq != AUTO_ASSIGN) 863 if (irq != AUTO_ASSIGN)
854 IO_APIC_VECTOR(irq) = current_vector; 864 IO_APIC_VECTOR(irq) = vector;
865
866 spin_unlock_irqrestore(&vector_lock, flags);
855 867
856 return current_vector; 868 return vector;
857} 869}
858 870
859extern void (*interrupt[NR_IRQS])(void); 871extern void (*interrupt[NR_IRQS])(void);
@@ -866,21 +878,14 @@ static struct hw_interrupt_type ioapic_edge_type;
866 878
867static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) 879static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
868{ 880{
869 if (use_pci_vector() && !platform_legacy_irq(irq)) { 881 unsigned idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
870 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 882
871 trigger == IOAPIC_LEVEL) 883 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
872 irq_desc[vector].handler = &ioapic_level_type; 884 trigger == IOAPIC_LEVEL)
873 else 885 irq_desc[idx].handler = &ioapic_level_type;
874 irq_desc[vector].handler = &ioapic_edge_type; 886 else
875 set_intr_gate(vector, interrupt[vector]); 887 irq_desc[idx].handler = &ioapic_edge_type;
876 } else { 888 set_intr_gate(vector, interrupt[idx]);
877 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
878 trigger == IOAPIC_LEVEL)
879 irq_desc[irq].handler = &ioapic_level_type;
880 else
881 irq_desc[irq].handler = &ioapic_edge_type;
882 set_intr_gate(vector, interrupt[irq]);
883 }
884} 889}
885 890
886static void __init setup_IO_APIC_irqs(void) 891static void __init setup_IO_APIC_irqs(void)
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index d8bd0b345b1e..59518d4d4358 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -26,6 +26,30 @@ atomic_t irq_mis_count;
26#endif 26#endif
27#endif 27#endif
28 28
29#ifdef CONFIG_DEBUG_STACKOVERFLOW
30/*
31 * Probabilistic stack overflow check:
32 *
33 * Only check the stack in process context, because everything else
34 * runs on the big interrupt stacks. Checking reliably is too expensive,
35 * so we just check from interrupts.
36 */
37static inline void stack_overflow_check(struct pt_regs *regs)
38{
39 u64 curbase = (u64) current->thread_info;
40 static unsigned long warned = -60*HZ;
41
42 if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE &&
43 regs->rsp < curbase + sizeof(struct thread_info) + 128 &&
44 time_after(jiffies, warned + 60*HZ)) {
45 printk("do_IRQ: %s near stack overflow (cur:%Lx,rsp:%lx)\n",
46 current->comm, curbase, regs->rsp);
47 show_stack(NULL,NULL);
48 warned = jiffies;
49 }
50}
51#endif
52
29/* 53/*
30 * Generic, controller-independent functions: 54 * Generic, controller-independent functions:
31 */ 55 */
@@ -39,7 +63,7 @@ int show_interrupts(struct seq_file *p, void *v)
39 if (i == 0) { 63 if (i == 0) {
40 seq_printf(p, " "); 64 seq_printf(p, " ");
41 for_each_online_cpu(j) 65 for_each_online_cpu(j)
42 seq_printf(p, "CPU%d ",j); 66 seq_printf(p, "CPU%-8d",j);
43 seq_putc(p, '\n'); 67 seq_putc(p, '\n');
44 } 68 }
45 69
@@ -96,7 +120,9 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
96 120
97 exit_idle(); 121 exit_idle();
98 irq_enter(); 122 irq_enter();
99 123#ifdef CONFIG_DEBUG_STACKOVERFLOW
124 stack_overflow_check(regs);
125#endif
100 __do_IRQ(irq, regs); 126 __do_IRQ(irq, regs);
101 irq_exit(); 127 irq_exit();
102 128
diff --git a/arch/x86_64/kernel/k8.c b/arch/x86_64/kernel/k8.c
new file mode 100644
index 000000000000..6416682d33d0
--- /dev/null
+++ b/arch/x86_64/kernel/k8.c
@@ -0,0 +1,118 @@
1/*
2 * Shared support code for AMD K8 northbridges and derivates.
3 * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.
4 */
5#include <linux/gfp.h>
6#include <linux/types.h>
7#include <linux/init.h>
8#include <linux/errno.h>
9#include <linux/module.h>
10#include <linux/spinlock.h>
11#include <asm/k8.h>
12
13int num_k8_northbridges;
14EXPORT_SYMBOL(num_k8_northbridges);
15
16static u32 *flush_words;
17
18struct pci_device_id k8_nb_ids[] = {
19 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
20 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
21 {}
22};
23EXPORT_SYMBOL(k8_nb_ids);
24
25struct pci_dev **k8_northbridges;
26EXPORT_SYMBOL(k8_northbridges);
27
28static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
29{
30 do {
31 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
32 if (!dev)
33 break;
34 } while (!pci_match_id(&k8_nb_ids[0], dev));
35 return dev;
36}
37
38int cache_k8_northbridges(void)
39{
40 int i;
41 struct pci_dev *dev;
42 if (num_k8_northbridges)
43 return 0;
44
45 num_k8_northbridges = 0;
46 dev = NULL;
47 while ((dev = next_k8_northbridge(dev)) != NULL)
48 num_k8_northbridges++;
49
50 k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *),
51 GFP_KERNEL);
52 if (!k8_northbridges)
53 return -ENOMEM;
54
55 flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL);
56 if (!flush_words) {
57 kfree(k8_northbridges);
58 return -ENOMEM;
59 }
60
61 dev = NULL;
62 i = 0;
63 while ((dev = next_k8_northbridge(dev)) != NULL) {
64 k8_northbridges[i++] = dev;
65 pci_read_config_dword(dev, 0x9c, &flush_words[i]);
66 }
67 k8_northbridges[i] = NULL;
68 return 0;
69}
70EXPORT_SYMBOL_GPL(cache_k8_northbridges);
71
72/* Ignores subdevice/subvendor but as far as I can figure out
73 they're useless anyways */
74int __init early_is_k8_nb(u32 device)
75{
76 struct pci_device_id *id;
77 u32 vendor = device & 0xffff;
78 device >>= 16;
79 for (id = k8_nb_ids; id->vendor; id++)
80 if (vendor == id->vendor && device == id->device)
81 return 1;
82 return 0;
83}
84
85void k8_flush_garts(void)
86{
87 int flushed, i;
88 unsigned long flags;
89 static DEFINE_SPINLOCK(gart_lock);
90
91 /* Avoid races between AGP and IOMMU. In theory it's not needed
92 but I'm not sure if the hardware won't lose flush requests
93 when another is pending. This whole thing is so expensive anyways
94 that it doesn't matter to serialize more. -AK */
95 spin_lock_irqsave(&gart_lock, flags);
96 flushed = 0;
97 for (i = 0; i < num_k8_northbridges; i++) {
98 pci_write_config_dword(k8_northbridges[i], 0x9c,
99 flush_words[i]|1);
100 flushed++;
101 }
102 for (i = 0; i < num_k8_northbridges; i++) {
103 u32 w;
104 /* Make sure the hardware actually executed the flush*/
105 for (;;) {
106 pci_read_config_dword(k8_northbridges[i],
107 0x9c, &w);
108 if (!(w & 1))
109 break;
110 cpu_relax();
111 }
112 }
113 spin_unlock_irqrestore(&gart_lock, flags);
114 if (!flushed)
115 printk("nothing to flush?\n");
116}
117EXPORT_SYMBOL_GPL(k8_flush_garts);
118
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index c69fc43cee7b..acd5816b1a6f 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -562,7 +562,7 @@ static struct sysdev_class mce_sysclass = {
562 set_kset_name("machinecheck"), 562 set_kset_name("machinecheck"),
563}; 563};
564 564
565static DEFINE_PER_CPU(struct sys_device, device_mce); 565DEFINE_PER_CPU(struct sys_device, device_mce);
566 566
567/* Why are there no generic functions for this? */ 567/* Why are there no generic functions for this? */
568#define ACCESSOR(name, var, start) \ 568#define ACCESSOR(name, var, start) \
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c
index d13b241ad094..335200aa2737 100644
--- a/arch/x86_64/kernel/mce_amd.c
+++ b/arch/x86_64/kernel/mce_amd.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * (c) 2005 Advanced Micro Devices, Inc. 2 * (c) 2005, 2006 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the 3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or 4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html 5 * http://www.gnu.org/licenses/gpl.html
@@ -8,9 +8,10 @@
8 * 8 *
9 * Support : jacob.shin@amd.com 9 * Support : jacob.shin@amd.com
10 * 10 *
11 * MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F. 11 * April 2006
12 * MC4_MISC0 exists per physical processor. 12 * - added support for AMD Family 0x10 processors
13 * 13 *
14 * All MC4_MISCi registers are shared between multi-cores
14 */ 15 */
15 16
16#include <linux/cpu.h> 17#include <linux/cpu.h>
@@ -29,32 +30,45 @@
29#include <asm/percpu.h> 30#include <asm/percpu.h>
30#include <asm/idle.h> 31#include <asm/idle.h>
31 32
32#define PFX "mce_threshold: " 33#define PFX "mce_threshold: "
33#define VERSION "version 1.00.9" 34#define VERSION "version 1.1.1"
34#define NR_BANKS 5 35#define NR_BANKS 6
35#define THRESHOLD_MAX 0xFFF 36#define NR_BLOCKS 9
36#define INT_TYPE_APIC 0x00020000 37#define THRESHOLD_MAX 0xFFF
37#define MASK_VALID_HI 0x80000000 38#define INT_TYPE_APIC 0x00020000
38#define MASK_LVTOFF_HI 0x00F00000 39#define MASK_VALID_HI 0x80000000
39#define MASK_COUNT_EN_HI 0x00080000 40#define MASK_LVTOFF_HI 0x00F00000
40#define MASK_INT_TYPE_HI 0x00060000 41#define MASK_COUNT_EN_HI 0x00080000
41#define MASK_OVERFLOW_HI 0x00010000 42#define MASK_INT_TYPE_HI 0x00060000
43#define MASK_OVERFLOW_HI 0x00010000
42#define MASK_ERR_COUNT_HI 0x00000FFF 44#define MASK_ERR_COUNT_HI 0x00000FFF
43#define MASK_OVERFLOW 0x0001000000000000L 45#define MASK_BLKPTR_LO 0xFF000000
46#define MCG_XBLK_ADDR 0xC0000400
44 47
45struct threshold_bank { 48struct threshold_block {
49 unsigned int block;
50 unsigned int bank;
46 unsigned int cpu; 51 unsigned int cpu;
47 u8 bank; 52 u32 address;
48 u8 interrupt_enable; 53 u16 interrupt_enable;
49 u16 threshold_limit; 54 u16 threshold_limit;
50 struct kobject kobj; 55 struct kobject kobj;
56 struct list_head miscj;
51}; 57};
52 58
53static struct threshold_bank threshold_defaults = { 59/* defaults used early on boot */
60static struct threshold_block threshold_defaults = {
54 .interrupt_enable = 0, 61 .interrupt_enable = 0,
55 .threshold_limit = THRESHOLD_MAX, 62 .threshold_limit = THRESHOLD_MAX,
56}; 63};
57 64
65struct threshold_bank {
66 struct kobject kobj;
67 struct threshold_block *blocks;
68 cpumask_t cpus;
69};
70static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
71
58#ifdef CONFIG_SMP 72#ifdef CONFIG_SMP
59static unsigned char shared_bank[NR_BANKS] = { 73static unsigned char shared_bank[NR_BANKS] = {
60 0, 0, 0, 0, 1 74 0, 0, 0, 0, 1
@@ -68,12 +82,12 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
68 */ 82 */
69 83
70/* must be called with correct cpu affinity */ 84/* must be called with correct cpu affinity */
71static void threshold_restart_bank(struct threshold_bank *b, 85static void threshold_restart_bank(struct threshold_block *b,
72 int reset, u16 old_limit) 86 int reset, u16 old_limit)
73{ 87{
74 u32 mci_misc_hi, mci_misc_lo; 88 u32 mci_misc_hi, mci_misc_lo;
75 89
76 rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi); 90 rdmsr(b->address, mci_misc_lo, mci_misc_hi);
77 91
78 if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) 92 if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
79 reset = 1; /* limit cannot be lower than err count */ 93 reset = 1; /* limit cannot be lower than err count */
@@ -94,35 +108,57 @@ static void threshold_restart_bank(struct threshold_bank *b,
94 (mci_misc_hi &= ~MASK_INT_TYPE_HI); 108 (mci_misc_hi &= ~MASK_INT_TYPE_HI);
95 109
96 mci_misc_hi |= MASK_COUNT_EN_HI; 110 mci_misc_hi |= MASK_COUNT_EN_HI;
97 wrmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi); 111 wrmsr(b->address, mci_misc_lo, mci_misc_hi);
98} 112}
99 113
114/* cpu init entry point, called from mce.c with preempt off */
100void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) 115void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
101{ 116{
102 int bank; 117 unsigned int bank, block;
103 u32 mci_misc_lo, mci_misc_hi;
104 unsigned int cpu = smp_processor_id(); 118 unsigned int cpu = smp_processor_id();
119 u32 low = 0, high = 0, address = 0;
105 120
106 for (bank = 0; bank < NR_BANKS; ++bank) { 121 for (bank = 0; bank < NR_BANKS; ++bank) {
107 rdmsr(MSR_IA32_MC0_MISC + bank * 4, mci_misc_lo, mci_misc_hi); 122 for (block = 0; block < NR_BLOCKS; ++block) {
123 if (block == 0)
124 address = MSR_IA32_MC0_MISC + bank * 4;
125 else if (block == 1)
126 address = MCG_XBLK_ADDR
127 + ((low & MASK_BLKPTR_LO) >> 21);
128 else
129 ++address;
130
131 if (rdmsr_safe(address, &low, &high))
132 continue;
108 133
109 /* !valid, !counter present, bios locked */ 134 if (!(high & MASK_VALID_HI)) {
110 if (!(mci_misc_hi & MASK_VALID_HI) || 135 if (block)
111 !(mci_misc_hi & MASK_VALID_HI >> 1) || 136 continue;
112 (mci_misc_hi & MASK_VALID_HI >> 2)) 137 else
113 continue; 138 break;
139 }
114 140
115 per_cpu(bank_map, cpu) |= (1 << bank); 141 if (!(high & MASK_VALID_HI >> 1) ||
142 (high & MASK_VALID_HI >> 2))
143 continue;
116 144
145 if (!block)
146 per_cpu(bank_map, cpu) |= (1 << bank);
117#ifdef CONFIG_SMP 147#ifdef CONFIG_SMP
118 if (shared_bank[bank] && cpu_core_id[cpu]) 148 if (shared_bank[bank] && c->cpu_core_id)
119 continue; 149 break;
120#endif 150#endif
151 high &= ~MASK_LVTOFF_HI;
152 high |= K8_APIC_EXT_LVT_ENTRY_THRESHOLD << 20;
153 wrmsr(address, low, high);
121 154
122 setup_threshold_lvt((mci_misc_hi & MASK_LVTOFF_HI) >> 20); 155 setup_APIC_extened_lvt(K8_APIC_EXT_LVT_ENTRY_THRESHOLD,
123 threshold_defaults.cpu = cpu; 156 THRESHOLD_APIC_VECTOR,
124 threshold_defaults.bank = bank; 157 K8_APIC_EXT_INT_MSG_FIX, 0);
125 threshold_restart_bank(&threshold_defaults, 0, 0); 158
159 threshold_defaults.address = address;
160 threshold_restart_bank(&threshold_defaults, 0, 0);
161 }
126 } 162 }
127} 163}
128 164
@@ -137,8 +173,9 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
137 */ 173 */
138asmlinkage void mce_threshold_interrupt(void) 174asmlinkage void mce_threshold_interrupt(void)
139{ 175{
140 int bank; 176 unsigned int bank, block;
141 struct mce m; 177 struct mce m;
178 u32 low = 0, high = 0, address = 0;
142 179
143 ack_APIC_irq(); 180 ack_APIC_irq();
144 exit_idle(); 181 exit_idle();
@@ -150,15 +187,42 @@ asmlinkage void mce_threshold_interrupt(void)
150 187
151 /* assume first bank caused it */ 188 /* assume first bank caused it */
152 for (bank = 0; bank < NR_BANKS; ++bank) { 189 for (bank = 0; bank < NR_BANKS; ++bank) {
153 m.bank = MCE_THRESHOLD_BASE + bank; 190 for (block = 0; block < NR_BLOCKS; ++block) {
154 rdmsrl(MSR_IA32_MC0_MISC + bank * 4, m.misc); 191 if (block == 0)
192 address = MSR_IA32_MC0_MISC + bank * 4;
193 else if (block == 1)
194 address = MCG_XBLK_ADDR
195 + ((low & MASK_BLKPTR_LO) >> 21);
196 else
197 ++address;
198
199 if (rdmsr_safe(address, &low, &high))
200 continue;
155 201
156 if (m.misc & MASK_OVERFLOW) { 202 if (!(high & MASK_VALID_HI)) {
157 mce_log(&m); 203 if (block)
158 goto out; 204 continue;
205 else
206 break;
207 }
208
209 if (!(high & MASK_VALID_HI >> 1) ||
210 (high & MASK_VALID_HI >> 2))
211 continue;
212
213 if (high & MASK_OVERFLOW_HI) {
214 rdmsrl(address, m.misc);
215 rdmsrl(MSR_IA32_MC0_STATUS + bank * 4,
216 m.status);
217 m.bank = K8_MCE_THRESHOLD_BASE
218 + bank * NR_BLOCKS
219 + block;
220 mce_log(&m);
221 goto out;
222 }
159 } 223 }
160 } 224 }
161 out: 225out:
162 irq_exit(); 226 irq_exit();
163} 227}
164 228
@@ -166,20 +230,12 @@ asmlinkage void mce_threshold_interrupt(void)
166 * Sysfs Interface 230 * Sysfs Interface
167 */ 231 */
168 232
169static struct sysdev_class threshold_sysclass = {
170 set_kset_name("threshold"),
171};
172
173static DEFINE_PER_CPU(struct sys_device, device_threshold);
174
175struct threshold_attr { 233struct threshold_attr {
176 struct attribute attr; 234 struct attribute attr;
177 ssize_t(*show) (struct threshold_bank *, char *); 235 ssize_t(*show) (struct threshold_block *, char *);
178 ssize_t(*store) (struct threshold_bank *, const char *, size_t count); 236 ssize_t(*store) (struct threshold_block *, const char *, size_t count);
179}; 237};
180 238
181static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
182
183static cpumask_t affinity_set(unsigned int cpu) 239static cpumask_t affinity_set(unsigned int cpu)
184{ 240{
185 cpumask_t oldmask = current->cpus_allowed; 241 cpumask_t oldmask = current->cpus_allowed;
@@ -194,15 +250,15 @@ static void affinity_restore(cpumask_t oldmask)
194 set_cpus_allowed(current, oldmask); 250 set_cpus_allowed(current, oldmask);
195} 251}
196 252
197#define SHOW_FIELDS(name) \ 253#define SHOW_FIELDS(name) \
198 static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \ 254static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
199 { \ 255{ \
200 return sprintf(buf, "%lx\n", (unsigned long) b->name); \ 256 return sprintf(buf, "%lx\n", (unsigned long) b->name); \
201 } 257}
202SHOW_FIELDS(interrupt_enable) 258SHOW_FIELDS(interrupt_enable)
203SHOW_FIELDS(threshold_limit) 259SHOW_FIELDS(threshold_limit)
204 260
205static ssize_t store_interrupt_enable(struct threshold_bank *b, 261static ssize_t store_interrupt_enable(struct threshold_block *b,
206 const char *buf, size_t count) 262 const char *buf, size_t count)
207{ 263{
208 char *end; 264 char *end;
@@ -219,7 +275,7 @@ static ssize_t store_interrupt_enable(struct threshold_bank *b,
219 return end - buf; 275 return end - buf;
220} 276}
221 277
222static ssize_t store_threshold_limit(struct threshold_bank *b, 278static ssize_t store_threshold_limit(struct threshold_block *b,
223 const char *buf, size_t count) 279 const char *buf, size_t count)
224{ 280{
225 char *end; 281 char *end;
@@ -242,18 +298,18 @@ static ssize_t store_threshold_limit(struct threshold_bank *b,
242 return end - buf; 298 return end - buf;
243} 299}
244 300
245static ssize_t show_error_count(struct threshold_bank *b, char *buf) 301static ssize_t show_error_count(struct threshold_block *b, char *buf)
246{ 302{
247 u32 high, low; 303 u32 high, low;
248 cpumask_t oldmask; 304 cpumask_t oldmask;
249 oldmask = affinity_set(b->cpu); 305 oldmask = affinity_set(b->cpu);
250 rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, low, high); /* ignore low 32 */ 306 rdmsr(b->address, low, high);
251 affinity_restore(oldmask); 307 affinity_restore(oldmask);
252 return sprintf(buf, "%x\n", 308 return sprintf(buf, "%x\n",
253 (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); 309 (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
254} 310}
255 311
256static ssize_t store_error_count(struct threshold_bank *b, 312static ssize_t store_error_count(struct threshold_block *b,
257 const char *buf, size_t count) 313 const char *buf, size_t count)
258{ 314{
259 cpumask_t oldmask; 315 cpumask_t oldmask;
@@ -269,13 +325,13 @@ static ssize_t store_error_count(struct threshold_bank *b,
269 .store = _store, \ 325 .store = _store, \
270}; 326};
271 327
272#define ATTR_FIELDS(name) \ 328#define RW_ATTR(name) \
273 static struct threshold_attr name = \ 329static struct threshold_attr name = \
274 THRESHOLD_ATTR(name, 0644, show_## name, store_## name) 330 THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
275 331
276ATTR_FIELDS(interrupt_enable); 332RW_ATTR(interrupt_enable);
277ATTR_FIELDS(threshold_limit); 333RW_ATTR(threshold_limit);
278ATTR_FIELDS(error_count); 334RW_ATTR(error_count);
279 335
280static struct attribute *default_attrs[] = { 336static struct attribute *default_attrs[] = {
281 &interrupt_enable.attr, 337 &interrupt_enable.attr,
@@ -284,12 +340,12 @@ static struct attribute *default_attrs[] = {
284 NULL 340 NULL
285}; 341};
286 342
287#define to_bank(k) container_of(k,struct threshold_bank,kobj) 343#define to_block(k) container_of(k, struct threshold_block, kobj)
288#define to_attr(a) container_of(a,struct threshold_attr,attr) 344#define to_attr(a) container_of(a, struct threshold_attr, attr)
289 345
290static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) 346static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
291{ 347{
292 struct threshold_bank *b = to_bank(kobj); 348 struct threshold_block *b = to_block(kobj);
293 struct threshold_attr *a = to_attr(attr); 349 struct threshold_attr *a = to_attr(attr);
294 ssize_t ret; 350 ssize_t ret;
295 ret = a->show ? a->show(b, buf) : -EIO; 351 ret = a->show ? a->show(b, buf) : -EIO;
@@ -299,7 +355,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
299static ssize_t store(struct kobject *kobj, struct attribute *attr, 355static ssize_t store(struct kobject *kobj, struct attribute *attr,
300 const char *buf, size_t count) 356 const char *buf, size_t count)
301{ 357{
302 struct threshold_bank *b = to_bank(kobj); 358 struct threshold_block *b = to_block(kobj);
303 struct threshold_attr *a = to_attr(attr); 359 struct threshold_attr *a = to_attr(attr);
304 ssize_t ret; 360 ssize_t ret;
305 ret = a->store ? a->store(b, buf, count) : -EIO; 361 ret = a->store ? a->store(b, buf, count) : -EIO;
@@ -316,69 +372,174 @@ static struct kobj_type threshold_ktype = {
316 .default_attrs = default_attrs, 372 .default_attrs = default_attrs,
317}; 373};
318 374
375static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
376 unsigned int bank,
377 unsigned int block,
378 u32 address)
379{
380 int err;
381 u32 low, high;
382 struct threshold_block *b = NULL;
383
384 if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
385 return 0;
386
387 if (rdmsr_safe(address, &low, &high))
388 goto recurse;
389
390 if (!(high & MASK_VALID_HI)) {
391 if (block)
392 goto recurse;
393 else
394 return 0;
395 }
396
397 if (!(high & MASK_VALID_HI >> 1) ||
398 (high & MASK_VALID_HI >> 2))
399 goto recurse;
400
401 b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
402 if (!b)
403 return -ENOMEM;
404 memset(b, 0, sizeof(struct threshold_block));
405
406 b->block = block;
407 b->bank = bank;
408 b->cpu = cpu;
409 b->address = address;
410 b->interrupt_enable = 0;
411 b->threshold_limit = THRESHOLD_MAX;
412
413 INIT_LIST_HEAD(&b->miscj);
414
415 if (per_cpu(threshold_banks, cpu)[bank]->blocks)
416 list_add(&b->miscj,
417 &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
418 else
419 per_cpu(threshold_banks, cpu)[bank]->blocks = b;
420
421 kobject_set_name(&b->kobj, "misc%i", block);
422 b->kobj.parent = &per_cpu(threshold_banks, cpu)[bank]->kobj;
423 b->kobj.ktype = &threshold_ktype;
424 err = kobject_register(&b->kobj);
425 if (err)
426 goto out_free;
427recurse:
428 if (!block) {
429 address = (low & MASK_BLKPTR_LO) >> 21;
430 if (!address)
431 return 0;
432 address += MCG_XBLK_ADDR;
433 } else
434 ++address;
435
436 err = allocate_threshold_blocks(cpu, bank, ++block, address);
437 if (err)
438 goto out_free;
439
440 return err;
441
442out_free:
443 if (b) {
444 kobject_unregister(&b->kobj);
445 kfree(b);
446 }
447 return err;
448}
449
319/* symlinks sibling shared banks to first core. first core owns dir/files. */ 450/* symlinks sibling shared banks to first core. first core owns dir/files. */
320static __cpuinit int threshold_create_bank(unsigned int cpu, int bank) 451static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
321{ 452{
322 int err = 0; 453 int i, err = 0;
323 struct threshold_bank *b = NULL; 454 struct threshold_bank *b = NULL;
455 cpumask_t oldmask = CPU_MASK_NONE;
456 char name[32];
457
458 sprintf(name, "threshold_bank%i", bank);
324 459
325#ifdef CONFIG_SMP 460#ifdef CONFIG_SMP
326 if (cpu_core_id[cpu] && shared_bank[bank]) { /* symlink */ 461 if (cpu_data[cpu].cpu_core_id && shared_bank[bank]) { /* symlink */
327 char name[16]; 462 i = first_cpu(cpu_core_map[cpu]);
328 unsigned lcpu = first_cpu(cpu_core_map[cpu]); 463
329 if (cpu_core_id[lcpu]) 464 /* first core not up yet */
330 goto out; /* first core not up yet */ 465 if (cpu_data[i].cpu_core_id)
466 goto out;
467
468 /* already linked */
469 if (per_cpu(threshold_banks, cpu)[bank])
470 goto out;
471
472 b = per_cpu(threshold_banks, i)[bank];
331 473
332 b = per_cpu(threshold_banks, lcpu)[bank];
333 if (!b) 474 if (!b)
334 goto out; 475 goto out;
335 sprintf(name, "bank%i", bank); 476
336 err = sysfs_create_link(&per_cpu(device_threshold, cpu).kobj, 477 err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj,
337 &b->kobj, name); 478 &b->kobj, name);
338 if (err) 479 if (err)
339 goto out; 480 goto out;
481
482 b->cpus = cpu_core_map[cpu];
340 per_cpu(threshold_banks, cpu)[bank] = b; 483 per_cpu(threshold_banks, cpu)[bank] = b;
341 goto out; 484 goto out;
342 } 485 }
343#endif 486#endif
344 487
345 b = kmalloc(sizeof(struct threshold_bank), GFP_KERNEL); 488 b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
346 if (!b) { 489 if (!b) {
347 err = -ENOMEM; 490 err = -ENOMEM;
348 goto out; 491 goto out;
349 } 492 }
350 memset(b, 0, sizeof(struct threshold_bank)); 493 memset(b, 0, sizeof(struct threshold_bank));
351 494
352 b->cpu = cpu; 495 kobject_set_name(&b->kobj, "threshold_bank%i", bank);
353 b->bank = bank; 496 b->kobj.parent = &per_cpu(device_mce, cpu).kobj;
354 b->interrupt_enable = 0; 497#ifndef CONFIG_SMP
355 b->threshold_limit = THRESHOLD_MAX; 498 b->cpus = CPU_MASK_ALL;
356 kobject_set_name(&b->kobj, "bank%i", bank); 499#else
357 b->kobj.parent = &per_cpu(device_threshold, cpu).kobj; 500 b->cpus = cpu_core_map[cpu];
358 b->kobj.ktype = &threshold_ktype; 501#endif
359
360 err = kobject_register(&b->kobj); 502 err = kobject_register(&b->kobj);
361 if (err) { 503 if (err)
362 kfree(b); 504 goto out_free;
363 goto out; 505
364 }
365 per_cpu(threshold_banks, cpu)[bank] = b; 506 per_cpu(threshold_banks, cpu)[bank] = b;
366 out: 507
508 oldmask = affinity_set(cpu);
509 err = allocate_threshold_blocks(cpu, bank, 0,
510 MSR_IA32_MC0_MISC + bank * 4);
511 affinity_restore(oldmask);
512
513 if (err)
514 goto out_free;
515
516 for_each_cpu_mask(i, b->cpus) {
517 if (i == cpu)
518 continue;
519
520 err = sysfs_create_link(&per_cpu(device_mce, i).kobj,
521 &b->kobj, name);
522 if (err)
523 goto out;
524
525 per_cpu(threshold_banks, i)[bank] = b;
526 }
527
528 goto out;
529
530out_free:
531 per_cpu(threshold_banks, cpu)[bank] = NULL;
532 kfree(b);
533out:
367 return err; 534 return err;
368} 535}
369 536
370/* create dir/files for all valid threshold banks */ 537/* create dir/files for all valid threshold banks */
371static __cpuinit int threshold_create_device(unsigned int cpu) 538static __cpuinit int threshold_create_device(unsigned int cpu)
372{ 539{
373 int bank; 540 unsigned int bank;
374 int err = 0; 541 int err = 0;
375 542
376 per_cpu(device_threshold, cpu).id = cpu;
377 per_cpu(device_threshold, cpu).cls = &threshold_sysclass;
378 err = sysdev_register(&per_cpu(device_threshold, cpu));
379 if (err)
380 goto out;
381
382 for (bank = 0; bank < NR_BANKS; ++bank) { 543 for (bank = 0; bank < NR_BANKS; ++bank) {
383 if (!(per_cpu(bank_map, cpu) & 1 << bank)) 544 if (!(per_cpu(bank_map, cpu) & 1 << bank))
384 continue; 545 continue;
@@ -386,7 +547,7 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
386 if (err) 547 if (err)
387 goto out; 548 goto out;
388 } 549 }
389 out: 550out:
390 return err; 551 return err;
391} 552}
392 553
@@ -397,92 +558,85 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
397 * of shared sysfs dir/files, and rest of the cores will be symlinked to it. 558 * of shared sysfs dir/files, and rest of the cores will be symlinked to it.
398 */ 559 */
399 560
400/* cpu hotplug call removes all symlinks before first core dies */ 561static __cpuinit void deallocate_threshold_block(unsigned int cpu,
562 unsigned int bank)
563{
564 struct threshold_block *pos = NULL;
565 struct threshold_block *tmp = NULL;
566 struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank];
567
568 if (!head)
569 return;
570
571 list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
572 kobject_unregister(&pos->kobj);
573 list_del(&pos->miscj);
574 kfree(pos);
575 }
576
577 kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
578 per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
579}
580
401static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank) 581static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank)
402{ 582{
583 int i = 0;
403 struct threshold_bank *b; 584 struct threshold_bank *b;
404 char name[16]; 585 char name[32];
405 586
406 b = per_cpu(threshold_banks, cpu)[bank]; 587 b = per_cpu(threshold_banks, cpu)[bank];
588
407 if (!b) 589 if (!b)
408 return; 590 return;
409 if (shared_bank[bank] && atomic_read(&b->kobj.kref.refcount) > 2) { 591
410 sprintf(name, "bank%i", bank); 592 if (!b->blocks)
411 sysfs_remove_link(&per_cpu(device_threshold, cpu).kobj, name); 593 goto free_out;
412 per_cpu(threshold_banks, cpu)[bank] = NULL; 594
413 } else { 595 sprintf(name, "threshold_bank%i", bank);
414 kobject_unregister(&b->kobj); 596
415 kfree(per_cpu(threshold_banks, cpu)[bank]); 597 /* sibling symlink */
598 if (shared_bank[bank] && b->blocks->cpu != cpu) {
599 sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name);
600 per_cpu(threshold_banks, i)[bank] = NULL;
601 return;
602 }
603
604 /* remove all sibling symlinks before unregistering */
605 for_each_cpu_mask(i, b->cpus) {
606 if (i == cpu)
607 continue;
608
609 sysfs_remove_link(&per_cpu(device_mce, i).kobj, name);
610 per_cpu(threshold_banks, i)[bank] = NULL;
416 } 611 }
612
613 deallocate_threshold_block(cpu, bank);
614
615free_out:
616 kobject_unregister(&b->kobj);
617 kfree(b);
618 per_cpu(threshold_banks, cpu)[bank] = NULL;
417} 619}
418 620
419static __cpuinit void threshold_remove_device(unsigned int cpu) 621static __cpuinit void threshold_remove_device(unsigned int cpu)
420{ 622{
421 int bank; 623 unsigned int bank;
422 624
423 for (bank = 0; bank < NR_BANKS; ++bank) { 625 for (bank = 0; bank < NR_BANKS; ++bank) {
424 if (!(per_cpu(bank_map, cpu) & 1 << bank)) 626 if (!(per_cpu(bank_map, cpu) & 1 << bank))
425 continue; 627 continue;
426 threshold_remove_bank(cpu, bank); 628 threshold_remove_bank(cpu, bank);
427 } 629 }
428 sysdev_unregister(&per_cpu(device_threshold, cpu));
429} 630}
430 631
431/* link all existing siblings when first core comes up */
432static __cpuinit int threshold_create_symlinks(unsigned int cpu)
433{
434 int bank, err = 0;
435 unsigned int lcpu = 0;
436
437 if (cpu_core_id[cpu])
438 return 0;
439 for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
440 if (lcpu == cpu)
441 continue;
442 for (bank = 0; bank < NR_BANKS; ++bank) {
443 if (!(per_cpu(bank_map, cpu) & 1 << bank))
444 continue;
445 if (!shared_bank[bank])
446 continue;
447 err = threshold_create_bank(lcpu, bank);
448 }
449 }
450 return err;
451}
452
453/* remove all symlinks before first core dies. */
454static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
455{
456 int bank;
457 unsigned int lcpu = 0;
458 if (cpu_core_id[cpu])
459 return;
460 for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
461 if (lcpu == cpu)
462 continue;
463 for (bank = 0; bank < NR_BANKS; ++bank) {
464 if (!(per_cpu(bank_map, cpu) & 1 << bank))
465 continue;
466 if (!shared_bank[bank])
467 continue;
468 threshold_remove_bank(lcpu, bank);
469 }
470 }
471}
472#else /* !CONFIG_HOTPLUG_CPU */ 632#else /* !CONFIG_HOTPLUG_CPU */
473static __cpuinit void threshold_create_symlinks(unsigned int cpu)
474{
475}
476static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
477{
478}
479static void threshold_remove_device(unsigned int cpu) 633static void threshold_remove_device(unsigned int cpu)
480{ 634{
481} 635}
482#endif 636#endif
483 637
484/* get notified when a cpu comes on/off */ 638/* get notified when a cpu comes on/off */
485static int threshold_cpu_callback(struct notifier_block *nfb, 639static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb,
486 unsigned long action, void *hcpu) 640 unsigned long action, void *hcpu)
487{ 641{
488 /* cpu was unsigned int to begin with */ 642 /* cpu was unsigned int to begin with */
@@ -494,13 +648,6 @@ static int threshold_cpu_callback(struct notifier_block *nfb,
494 switch (action) { 648 switch (action) {
495 case CPU_ONLINE: 649 case CPU_ONLINE:
496 threshold_create_device(cpu); 650 threshold_create_device(cpu);
497 threshold_create_symlinks(cpu);
498 break;
499 case CPU_DOWN_PREPARE:
500 threshold_remove_symlinks(cpu);
501 break;
502 case CPU_DOWN_FAILED:
503 threshold_create_symlinks(cpu);
504 break; 651 break;
505 case CPU_DEAD: 652 case CPU_DEAD:
506 threshold_remove_device(cpu); 653 threshold_remove_device(cpu);
@@ -512,29 +659,22 @@ static int threshold_cpu_callback(struct notifier_block *nfb,
512 return NOTIFY_OK; 659 return NOTIFY_OK;
513} 660}
514 661
515static struct notifier_block threshold_cpu_notifier = { 662static struct notifier_block threshold_cpu_notifier __cpuinitdata = {
516 .notifier_call = threshold_cpu_callback, 663 .notifier_call = threshold_cpu_callback,
517}; 664};
518 665
519static __init int threshold_init_device(void) 666static __init int threshold_init_device(void)
520{ 667{
521 int err; 668 unsigned lcpu = 0;
522 int lcpu = 0;
523
524 err = sysdev_class_register(&threshold_sysclass);
525 if (err)
526 goto out;
527 669
528 /* to hit CPUs online before the notifier is up */ 670 /* to hit CPUs online before the notifier is up */
529 for_each_online_cpu(lcpu) { 671 for_each_online_cpu(lcpu) {
530 err = threshold_create_device(lcpu); 672 int err = threshold_create_device(lcpu);
531 if (err) 673 if (err)
532 goto out; 674 return err;
533 } 675 }
534 register_cpu_notifier(&threshold_cpu_notifier); 676 register_cpu_notifier(&threshold_cpu_notifier);
535 677 return 0;
536 out:
537 return err;
538} 678}
539 679
540device_initcall(threshold_init_device); 680device_initcall(threshold_init_device);
diff --git a/arch/x86_64/kernel/module.c b/arch/x86_64/kernel/module.c
index bac195c74bcc..9d0958ff547f 100644
--- a/arch/x86_64/kernel/module.c
+++ b/arch/x86_64/kernel/module.c
@@ -145,26 +145,38 @@ int apply_relocate(Elf_Shdr *sechdrs,
145 return -ENOSYS; 145 return -ENOSYS;
146} 146}
147 147
148extern void apply_alternatives(void *start, void *end);
149
150int module_finalize(const Elf_Ehdr *hdr, 148int module_finalize(const Elf_Ehdr *hdr,
151 const Elf_Shdr *sechdrs, 149 const Elf_Shdr *sechdrs,
152 struct module *me) 150 struct module *me)
153{ 151{
154 const Elf_Shdr *s; 152 const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL;
155 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; 153 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
156 154
157 /* look for .altinstructions to patch */ 155 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
158 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { 156 if (!strcmp(".text", secstrings + s->sh_name))
159 void *seg; 157 text = s;
160 if (strcmp(".altinstructions", secstrings + s->sh_name)) 158 if (!strcmp(".altinstructions", secstrings + s->sh_name))
161 continue; 159 alt = s;
162 seg = (void *)s->sh_addr; 160 if (!strcmp(".smp_locks", secstrings + s->sh_name))
163 apply_alternatives(seg, seg + s->sh_size); 161 locks= s;
164 } 162 }
163
164 if (alt) {
165 /* patch .altinstructions */
166 void *aseg = (void *)alt->sh_addr;
167 apply_alternatives(aseg, aseg + alt->sh_size);
168 }
169 if (locks && text) {
170 void *lseg = (void *)locks->sh_addr;
171 void *tseg = (void *)text->sh_addr;
172 alternatives_smp_module_add(me, me->name,
173 lseg, lseg + locks->sh_size,
174 tseg, tseg + text->sh_size);
175 }
165 return 0; 176 return 0;
166} 177}
167 178
168void module_arch_cleanup(struct module *mod) 179void module_arch_cleanup(struct module *mod)
169{ 180{
181 alternatives_smp_module_del(mod);
170} 182}
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 4e6357fe0ec3..399489c93132 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -15,11 +15,7 @@
15#include <linux/config.h> 15#include <linux/config.h>
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/delay.h> 17#include <linux/delay.h>
18#include <linux/bootmem.h>
19#include <linux/smp_lock.h>
20#include <linux/interrupt.h> 18#include <linux/interrupt.h>
21#include <linux/mc146818rtc.h>
22#include <linux/kernel_stat.h>
23#include <linux/module.h> 19#include <linux/module.h>
24#include <linux/sysdev.h> 20#include <linux/sysdev.h>
25#include <linux/nmi.h> 21#include <linux/nmi.h>
@@ -27,14 +23,11 @@
27#include <linux/kprobes.h> 23#include <linux/kprobes.h>
28 24
29#include <asm/smp.h> 25#include <asm/smp.h>
30#include <asm/mtrr.h>
31#include <asm/mpspec.h>
32#include <asm/nmi.h> 26#include <asm/nmi.h>
33#include <asm/msr.h>
34#include <asm/proto.h> 27#include <asm/proto.h>
35#include <asm/kdebug.h> 28#include <asm/kdebug.h>
36#include <asm/local.h>
37#include <asm/mce.h> 29#include <asm/mce.h>
30#include <asm/intel_arch_perfmon.h>
38 31
39/* 32/*
40 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: 33 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
@@ -74,6 +67,9 @@ static unsigned int nmi_p4_cccr_val;
74#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 67#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
75#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 68#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
76 69
70#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
71#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
72
77#define MSR_P4_MISC_ENABLE 0x1A0 73#define MSR_P4_MISC_ENABLE 0x1A0
78#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) 74#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
79#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) 75#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
@@ -105,7 +101,10 @@ static __cpuinit inline int nmi_known_cpu(void)
105 case X86_VENDOR_AMD: 101 case X86_VENDOR_AMD:
106 return boot_cpu_data.x86 == 15; 102 return boot_cpu_data.x86 == 15;
107 case X86_VENDOR_INTEL: 103 case X86_VENDOR_INTEL:
108 return boot_cpu_data.x86 == 15; 104 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
105 return 1;
106 else
107 return (boot_cpu_data.x86 == 15);
109 } 108 }
110 return 0; 109 return 0;
111} 110}
@@ -211,6 +210,8 @@ int __init setup_nmi_watchdog(char *str)
211 210
212__setup("nmi_watchdog=", setup_nmi_watchdog); 211__setup("nmi_watchdog=", setup_nmi_watchdog);
213 212
213static void disable_intel_arch_watchdog(void);
214
214static void disable_lapic_nmi_watchdog(void) 215static void disable_lapic_nmi_watchdog(void)
215{ 216{
216 if (nmi_active <= 0) 217 if (nmi_active <= 0)
@@ -223,6 +224,8 @@ static void disable_lapic_nmi_watchdog(void)
223 if (boot_cpu_data.x86 == 15) { 224 if (boot_cpu_data.x86 == 15) {
224 wrmsr(MSR_P4_IQ_CCCR0, 0, 0); 225 wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
225 wrmsr(MSR_P4_CRU_ESCR0, 0, 0); 226 wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
227 } else if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
228 disable_intel_arch_watchdog();
226 } 229 }
227 break; 230 break;
228 } 231 }
@@ -375,6 +378,53 @@ static void setup_k7_watchdog(void)
375 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 378 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
376} 379}
377 380
381static void disable_intel_arch_watchdog(void)
382{
383 unsigned ebx;
384
385 /*
386 * Check whether the Architectural PerfMon supports
387 * Unhalted Core Cycles Event or not.
388 * NOTE: Corresponding bit = 0 in ebp indicates event present.
389 */
390 ebx = cpuid_ebx(10);
391 if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
392 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
393}
394
395static int setup_intel_arch_watchdog(void)
396{
397 unsigned int evntsel;
398 unsigned ebx;
399
400 /*
401 * Check whether the Architectural PerfMon supports
402 * Unhalted Core Cycles Event or not.
403 * NOTE: Corresponding bit = 0 in ebp indicates event present.
404 */
405 ebx = cpuid_ebx(10);
406 if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
407 return 0;
408
409 nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
410
411 clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2);
412 clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2);
413
414 evntsel = ARCH_PERFMON_EVENTSEL_INT
415 | ARCH_PERFMON_EVENTSEL_OS
416 | ARCH_PERFMON_EVENTSEL_USR
417 | ARCH_PERFMON_NMI_EVENT_SEL
418 | ARCH_PERFMON_NMI_EVENT_UMASK;
419
420 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
421 wrmsrl(MSR_ARCH_PERFMON_PERFCTR0, -((u64)cpu_khz * 1000 / nmi_hz));
422 apic_write(APIC_LVTPC, APIC_DM_NMI);
423 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
424 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
425 return 1;
426}
427
378 428
379static int setup_p4_watchdog(void) 429static int setup_p4_watchdog(void)
380{ 430{
@@ -428,10 +478,16 @@ void setup_apic_nmi_watchdog(void)
428 setup_k7_watchdog(); 478 setup_k7_watchdog();
429 break; 479 break;
430 case X86_VENDOR_INTEL: 480 case X86_VENDOR_INTEL:
431 if (boot_cpu_data.x86 != 15) 481 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
432 return; 482 if (!setup_intel_arch_watchdog())
433 if (!setup_p4_watchdog()) 483 return;
484 } else if (boot_cpu_data.x86 == 15) {
485 if (!setup_p4_watchdog())
486 return;
487 } else {
434 return; 488 return;
489 }
490
435 break; 491 break;
436 492
437 default: 493 default:
@@ -516,7 +572,14 @@ void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
516 */ 572 */
517 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); 573 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
518 apic_write(APIC_LVTPC, APIC_DM_NMI); 574 apic_write(APIC_LVTPC, APIC_DM_NMI);
519 } 575 } else if (nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
576 /*
577 * For Intel based architectural perfmon
578 * - LVTPC is masked on interrupt and must be
579 * unmasked by the LVTPC handler.
580 */
581 apic_write(APIC_LVTPC, APIC_DM_NMI);
582 }
520 wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); 583 wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
521 } 584 }
522} 585}
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
new file mode 100644
index 000000000000..d91cb843f54d
--- /dev/null
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -0,0 +1,1018 @@
1/*
2 * Derived from arch/powerpc/kernel/iommu.c
3 *
4 * Copyright (C) 2006 Jon Mason <jdmason@us.ibm.com>, IBM Corporation
5 * Copyright (C) 2006 Muli Ben-Yehuda <muli@il.ibm.com>, IBM Corporation
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include <linux/config.h>
23#include <linux/kernel.h>
24#include <linux/init.h>
25#include <linux/types.h>
26#include <linux/slab.h>
27#include <linux/mm.h>
28#include <linux/spinlock.h>
29#include <linux/string.h>
30#include <linux/dma-mapping.h>
31#include <linux/init.h>
32#include <linux/bitops.h>
33#include <linux/pci_ids.h>
34#include <linux/pci.h>
35#include <linux/delay.h>
36#include <asm/proto.h>
37#include <asm/calgary.h>
38#include <asm/tce.h>
39#include <asm/pci-direct.h>
40#include <asm/system.h>
41#include <asm/dma.h>
42
43#define PCI_DEVICE_ID_IBM_CALGARY 0x02a1
44#define PCI_VENDOR_DEVICE_ID_CALGARY \
45 (PCI_VENDOR_ID_IBM | PCI_DEVICE_ID_IBM_CALGARY << 16)
46
47/* we need these for register space address calculation */
48#define START_ADDRESS 0xfe000000
49#define CHASSIS_BASE 0
50#define ONE_BASED_CHASSIS_NUM 1
51
52/* register offsets inside the host bridge space */
53#define PHB_CSR_OFFSET 0x0110
54#define PHB_PLSSR_OFFSET 0x0120
55#define PHB_CONFIG_RW_OFFSET 0x0160
56#define PHB_IOBASE_BAR_LOW 0x0170
57#define PHB_IOBASE_BAR_HIGH 0x0180
58#define PHB_MEM_1_LOW 0x0190
59#define PHB_MEM_1_HIGH 0x01A0
60#define PHB_IO_ADDR_SIZE 0x01B0
61#define PHB_MEM_1_SIZE 0x01C0
62#define PHB_MEM_ST_OFFSET 0x01D0
63#define PHB_AER_OFFSET 0x0200
64#define PHB_CONFIG_0_HIGH 0x0220
65#define PHB_CONFIG_0_LOW 0x0230
66#define PHB_CONFIG_0_END 0x0240
67#define PHB_MEM_2_LOW 0x02B0
68#define PHB_MEM_2_HIGH 0x02C0
69#define PHB_MEM_2_SIZE_HIGH 0x02D0
70#define PHB_MEM_2_SIZE_LOW 0x02E0
71#define PHB_DOSHOLE_OFFSET 0x08E0
72
73/* PHB_CONFIG_RW */
74#define PHB_TCE_ENABLE 0x20000000
75#define PHB_SLOT_DISABLE 0x1C000000
76#define PHB_DAC_DISABLE 0x01000000
77#define PHB_MEM2_ENABLE 0x00400000
78#define PHB_MCSR_ENABLE 0x00100000
79/* TAR (Table Address Register) */
80#define TAR_SW_BITS 0x0000ffffffff800fUL
81#define TAR_VALID 0x0000000000000008UL
82/* CSR (Channel/DMA Status Register) */
83#define CSR_AGENT_MASK 0xffe0ffff
84
85#define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */
86#define MAX_PHB_BUS_NUM (MAX_NUM_OF_PHBS * 2) /* max dev->bus->number */
87#define PHBS_PER_CALGARY 4
88
89/* register offsets in Calgary's internal register space */
90static const unsigned long tar_offsets[] = {
91 0x0580 /* TAR0 */,
92 0x0588 /* TAR1 */,
93 0x0590 /* TAR2 */,
94 0x0598 /* TAR3 */
95};
96
97static const unsigned long split_queue_offsets[] = {
98 0x4870 /* SPLIT QUEUE 0 */,
99 0x5870 /* SPLIT QUEUE 1 */,
100 0x6870 /* SPLIT QUEUE 2 */,
101 0x7870 /* SPLIT QUEUE 3 */
102};
103
104static const unsigned long phb_offsets[] = {
105 0x8000 /* PHB0 */,
106 0x9000 /* PHB1 */,
107 0xA000 /* PHB2 */,
108 0xB000 /* PHB3 */
109};
110
111void* tce_table_kva[MAX_NUM_OF_PHBS * MAX_NUMNODES];
112unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;
113static int translate_empty_slots __read_mostly = 0;
114static int calgary_detected __read_mostly = 0;
115
116/*
117 * the bitmap of PHBs the user requested that we disable
118 * translation on.
119 */
120static DECLARE_BITMAP(translation_disabled, MAX_NUMNODES * MAX_PHB_BUS_NUM);
121
122static void tce_cache_blast(struct iommu_table *tbl);
123
124/* enable this to stress test the chip's TCE cache */
125#ifdef CONFIG_IOMMU_DEBUG
126static inline void tce_cache_blast_stress(struct iommu_table *tbl)
127{
128 tce_cache_blast(tbl);
129}
130#else
131static inline void tce_cache_blast_stress(struct iommu_table *tbl)
132{
133}
134#endif /* BLAST_TCE_CACHE_ON_UNMAP */
135
136static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen)
137{
138 unsigned int npages;
139
140 npages = PAGE_ALIGN(dma + dmalen) - (dma & PAGE_MASK);
141 npages >>= PAGE_SHIFT;
142
143 return npages;
144}
145
146static inline int translate_phb(struct pci_dev* dev)
147{
148 int disabled = test_bit(dev->bus->number, translation_disabled);
149 return !disabled;
150}
151
152static void iommu_range_reserve(struct iommu_table *tbl,
153 unsigned long start_addr, unsigned int npages)
154{
155 unsigned long index;
156 unsigned long end;
157
158 index = start_addr >> PAGE_SHIFT;
159
160 /* bail out if we're asked to reserve a region we don't cover */
161 if (index >= tbl->it_size)
162 return;
163
164 end = index + npages;
165 if (end > tbl->it_size) /* don't go off the table */
166 end = tbl->it_size;
167
168 while (index < end) {
169 if (test_bit(index, tbl->it_map))
170 printk(KERN_ERR "Calgary: entry already allocated at "
171 "0x%lx tbl %p dma 0x%lx npages %u\n",
172 index, tbl, start_addr, npages);
173 ++index;
174 }
175 set_bit_string(tbl->it_map, start_addr >> PAGE_SHIFT, npages);
176}
177
178static unsigned long iommu_range_alloc(struct iommu_table *tbl,
179 unsigned int npages)
180{
181 unsigned long offset;
182
183 BUG_ON(npages == 0);
184
185 offset = find_next_zero_string(tbl->it_map, tbl->it_hint,
186 tbl->it_size, npages);
187 if (offset == ~0UL) {
188 tce_cache_blast(tbl);
189 offset = find_next_zero_string(tbl->it_map, 0,
190 tbl->it_size, npages);
191 if (offset == ~0UL) {
192 printk(KERN_WARNING "Calgary: IOMMU full.\n");
193 if (panic_on_overflow)
194 panic("Calgary: fix the allocator.\n");
195 else
196 return bad_dma_address;
197 }
198 }
199
200 set_bit_string(tbl->it_map, offset, npages);
201 tbl->it_hint = offset + npages;
202 BUG_ON(tbl->it_hint > tbl->it_size);
203
204 return offset;
205}
206
207static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *vaddr,
208 unsigned int npages, int direction)
209{
210 unsigned long entry, flags;
211 dma_addr_t ret = bad_dma_address;
212
213 spin_lock_irqsave(&tbl->it_lock, flags);
214
215 entry = iommu_range_alloc(tbl, npages);
216
217 if (unlikely(entry == bad_dma_address))
218 goto error;
219
220 /* set the return dma address */
221 ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK);
222
223 /* put the TCEs in the HW table */
224 tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK,
225 direction);
226
227 spin_unlock_irqrestore(&tbl->it_lock, flags);
228
229 return ret;
230
231error:
232 spin_unlock_irqrestore(&tbl->it_lock, flags);
233 printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
234 "iommu %p\n", npages, tbl);
235 return bad_dma_address;
236}
237
238static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
239 unsigned int npages)
240{
241 unsigned long entry;
242 unsigned long i;
243
244 entry = dma_addr >> PAGE_SHIFT;
245
246 BUG_ON(entry + npages > tbl->it_size);
247
248 tce_free(tbl, entry, npages);
249
250 for (i = 0; i < npages; ++i) {
251 if (!test_bit(entry + i, tbl->it_map))
252 printk(KERN_ERR "Calgary: bit is off at 0x%lx "
253 "tbl %p dma 0x%Lx entry 0x%lx npages %u\n",
254 entry + i, tbl, dma_addr, entry, npages);
255 }
256
257 __clear_bit_string(tbl->it_map, entry, npages);
258
259 tce_cache_blast_stress(tbl);
260}
261
262static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
263 unsigned int npages)
264{
265 unsigned long flags;
266
267 spin_lock_irqsave(&tbl->it_lock, flags);
268
269 __iommu_free(tbl, dma_addr, npages);
270
271 spin_unlock_irqrestore(&tbl->it_lock, flags);
272}
273
274static void __calgary_unmap_sg(struct iommu_table *tbl,
275 struct scatterlist *sglist, int nelems, int direction)
276{
277 while (nelems--) {
278 unsigned int npages;
279 dma_addr_t dma = sglist->dma_address;
280 unsigned int dmalen = sglist->dma_length;
281
282 if (dmalen == 0)
283 break;
284
285 npages = num_dma_pages(dma, dmalen);
286 __iommu_free(tbl, dma, npages);
287 sglist++;
288 }
289}
290
291void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist,
292 int nelems, int direction)
293{
294 unsigned long flags;
295 struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
296
297 if (!translate_phb(to_pci_dev(dev)))
298 return;
299
300 spin_lock_irqsave(&tbl->it_lock, flags);
301
302 __calgary_unmap_sg(tbl, sglist, nelems, direction);
303
304 spin_unlock_irqrestore(&tbl->it_lock, flags);
305}
306
307static int calgary_nontranslate_map_sg(struct device* dev,
308 struct scatterlist *sg, int nelems, int direction)
309{
310 int i;
311
312 for (i = 0; i < nelems; i++ ) {
313 struct scatterlist *s = &sg[i];
314 BUG_ON(!s->page);
315 s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
316 s->dma_length = s->length;
317 }
318 return nelems;
319}
320
321int calgary_map_sg(struct device *dev, struct scatterlist *sg,
322 int nelems, int direction)
323{
324 struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
325 unsigned long flags;
326 unsigned long vaddr;
327 unsigned int npages;
328 unsigned long entry;
329 int i;
330
331 if (!translate_phb(to_pci_dev(dev)))
332 return calgary_nontranslate_map_sg(dev, sg, nelems, direction);
333
334 spin_lock_irqsave(&tbl->it_lock, flags);
335
336 for (i = 0; i < nelems; i++ ) {
337 struct scatterlist *s = &sg[i];
338 BUG_ON(!s->page);
339
340 vaddr = (unsigned long)page_address(s->page) + s->offset;
341 npages = num_dma_pages(vaddr, s->length);
342
343 entry = iommu_range_alloc(tbl, npages);
344 if (entry == bad_dma_address) {
345 /* makes sure unmap knows to stop */
346 s->dma_length = 0;
347 goto error;
348 }
349
350 s->dma_address = (entry << PAGE_SHIFT) | s->offset;
351
352 /* insert into HW table */
353 tce_build(tbl, entry, npages, vaddr & PAGE_MASK,
354 direction);
355
356 s->dma_length = s->length;
357 }
358
359 spin_unlock_irqrestore(&tbl->it_lock, flags);
360
361 return nelems;
362error:
363 __calgary_unmap_sg(tbl, sg, nelems, direction);
364 for (i = 0; i < nelems; i++) {
365 sg[i].dma_address = bad_dma_address;
366 sg[i].dma_length = 0;
367 }
368 spin_unlock_irqrestore(&tbl->it_lock, flags);
369 return 0;
370}
371
372dma_addr_t calgary_map_single(struct device *dev, void *vaddr,
373 size_t size, int direction)
374{
375 dma_addr_t dma_handle = bad_dma_address;
376 unsigned long uaddr;
377 unsigned int npages;
378 struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
379
380 uaddr = (unsigned long)vaddr;
381 npages = num_dma_pages(uaddr, size);
382
383 if (translate_phb(to_pci_dev(dev)))
384 dma_handle = iommu_alloc(tbl, vaddr, npages, direction);
385 else
386 dma_handle = virt_to_bus(vaddr);
387
388 return dma_handle;
389}
390
391void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
392 size_t size, int direction)
393{
394 struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
395 unsigned int npages;
396
397 if (!translate_phb(to_pci_dev(dev)))
398 return;
399
400 npages = num_dma_pages(dma_handle, size);
401 iommu_free(tbl, dma_handle, npages);
402}
403
404void* calgary_alloc_coherent(struct device *dev, size_t size,
405 dma_addr_t *dma_handle, gfp_t flag)
406{
407 void *ret = NULL;
408 dma_addr_t mapping;
409 unsigned int npages, order;
410 struct iommu_table *tbl;
411
412 tbl = to_pci_dev(dev)->bus->self->sysdata;
413
414 size = PAGE_ALIGN(size); /* size rounded up to full pages */
415 npages = size >> PAGE_SHIFT;
416 order = get_order(size);
417
418 /* alloc enough pages (and possibly more) */
419 ret = (void *)__get_free_pages(flag, order);
420 if (!ret)
421 goto error;
422 memset(ret, 0, size);
423
424 if (translate_phb(to_pci_dev(dev))) {
425 /* set up tces to cover the allocated range */
426 mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL);
427 if (mapping == bad_dma_address)
428 goto free;
429
430 *dma_handle = mapping;
431 } else /* non translated slot */
432 *dma_handle = virt_to_bus(ret);
433
434 return ret;
435
436free:
437 free_pages((unsigned long)ret, get_order(size));
438 ret = NULL;
439error:
440 return ret;
441}
442
443static struct dma_mapping_ops calgary_dma_ops = {
444 .alloc_coherent = calgary_alloc_coherent,
445 .map_single = calgary_map_single,
446 .unmap_single = calgary_unmap_single,
447 .map_sg = calgary_map_sg,
448 .unmap_sg = calgary_unmap_sg,
449};
450
451static inline int busno_to_phbid(unsigned char num)
452{
453 return bus_to_phb(num) % PHBS_PER_CALGARY;
454}
455
456static inline unsigned long split_queue_offset(unsigned char num)
457{
458 size_t idx = busno_to_phbid(num);
459
460 return split_queue_offsets[idx];
461}
462
463static inline unsigned long tar_offset(unsigned char num)
464{
465 size_t idx = busno_to_phbid(num);
466
467 return tar_offsets[idx];
468}
469
470static inline unsigned long phb_offset(unsigned char num)
471{
472 size_t idx = busno_to_phbid(num);
473
474 return phb_offsets[idx];
475}
476
477static inline void __iomem* calgary_reg(void __iomem *bar, unsigned long offset)
478{
479 unsigned long target = ((unsigned long)bar) | offset;
480 return (void __iomem*)target;
481}
482
483static void tce_cache_blast(struct iommu_table *tbl)
484{
485 u64 val;
486 u32 aer;
487 int i = 0;
488 void __iomem *bbar = tbl->bbar;
489 void __iomem *target;
490
491 /* disable arbitration on the bus */
492 target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET);
493 aer = readl(target);
494 writel(0, target);
495
496 /* read plssr to ensure it got there */
497 target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_PLSSR_OFFSET);
498 val = readl(target);
499
500 /* poll split queues until all DMA activity is done */
501 target = calgary_reg(bbar, split_queue_offset(tbl->it_busno));
502 do {
503 val = readq(target);
504 i++;
505 } while ((val & 0xff) != 0xff && i < 100);
506 if (i == 100)
507 printk(KERN_WARNING "Calgary: PCI bus not quiesced, "
508 "continuing anyway\n");
509
510 /* invalidate TCE cache */
511 target = calgary_reg(bbar, tar_offset(tbl->it_busno));
512 writeq(tbl->tar_val, target);
513
514 /* enable arbitration */
515 target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET);
516 writel(aer, target);
517 (void)readl(target); /* flush */
518}
519
520static void __init calgary_reserve_mem_region(struct pci_dev *dev, u64 start,
521 u64 limit)
522{
523 unsigned int numpages;
524
525 limit = limit | 0xfffff;
526 limit++;
527
528 numpages = ((limit - start) >> PAGE_SHIFT);
529 iommu_range_reserve(dev->sysdata, start, numpages);
530}
531
532static void __init calgary_reserve_peripheral_mem_1(struct pci_dev *dev)
533{
534 void __iomem *target;
535 u64 low, high, sizelow;
536 u64 start, limit;
537 struct iommu_table *tbl = dev->sysdata;
538 unsigned char busnum = dev->bus->number;
539 void __iomem *bbar = tbl->bbar;
540
541 /* peripheral MEM_1 region */
542 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_LOW);
543 low = be32_to_cpu(readl(target));
544 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_HIGH);
545 high = be32_to_cpu(readl(target));
546 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_SIZE);
547 sizelow = be32_to_cpu(readl(target));
548
549 start = (high << 32) | low;
550 limit = sizelow;
551
552 calgary_reserve_mem_region(dev, start, limit);
553}
554
555static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev)
556{
557 void __iomem *target;
558 u32 val32;
559 u64 low, high, sizelow, sizehigh;
560 u64 start, limit;
561 struct iommu_table *tbl = dev->sysdata;
562 unsigned char busnum = dev->bus->number;
563 void __iomem *bbar = tbl->bbar;
564
565 /* is it enabled? */
566 target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
567 val32 = be32_to_cpu(readl(target));
568 if (!(val32 & PHB_MEM2_ENABLE))
569 return;
570
571 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_LOW);
572 low = be32_to_cpu(readl(target));
573 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_HIGH);
574 high = be32_to_cpu(readl(target));
575 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_LOW);
576 sizelow = be32_to_cpu(readl(target));
577 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_HIGH);
578 sizehigh = be32_to_cpu(readl(target));
579
580 start = (high << 32) | low;
581 limit = (sizehigh << 32) | sizelow;
582
583 calgary_reserve_mem_region(dev, start, limit);
584}
585
586/*
587 * some regions of the IO address space do not get translated, so we
588 * must not give devices IO addresses in those regions. The regions
589 * are the 640KB-1MB region and the two PCI peripheral memory holes.
590 * Reserve all of them in the IOMMU bitmap to avoid giving them out
591 * later.
592 */
593static void __init calgary_reserve_regions(struct pci_dev *dev)
594{
595 unsigned int npages;
596 void __iomem *bbar;
597 unsigned char busnum;
598 u64 start;
599 struct iommu_table *tbl = dev->sysdata;
600
601 bbar = tbl->bbar;
602 busnum = dev->bus->number;
603
604 /* reserve bad_dma_address in case it's a legal address */
605 iommu_range_reserve(tbl, bad_dma_address, 1);
606
607 /* avoid the BIOS/VGA first 640KB-1MB region */
608 start = (640 * 1024);
609 npages = ((1024 - 640) * 1024) >> PAGE_SHIFT;
610 iommu_range_reserve(tbl, start, npages);
611
612 /* reserve the two PCI peripheral memory regions in IO space */
613 calgary_reserve_peripheral_mem_1(dev);
614 calgary_reserve_peripheral_mem_2(dev);
615}
616
617static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
618{
619 u64 val64;
620 u64 table_phys;
621 void __iomem *target;
622 int ret;
623 struct iommu_table *tbl;
624
625 /* build TCE tables for each PHB */
626 ret = build_tce_table(dev, bbar);
627 if (ret)
628 return ret;
629
630 calgary_reserve_regions(dev);
631
632 /* set TARs for each PHB */
633 target = calgary_reg(bbar, tar_offset(dev->bus->number));
634 val64 = be64_to_cpu(readq(target));
635
636 /* zero out all TAR bits under sw control */
637 val64 &= ~TAR_SW_BITS;
638
639 tbl = dev->sysdata;
640 table_phys = (u64)__pa(tbl->it_base);
641 val64 |= table_phys;
642
643 BUG_ON(specified_table_size > TCE_TABLE_SIZE_8M);
644 val64 |= (u64) specified_table_size;
645
646 tbl->tar_val = cpu_to_be64(val64);
647 writeq(tbl->tar_val, target);
648 readq(target); /* flush */
649
650 return 0;
651}
652
653static void __init calgary_free_tar(struct pci_dev *dev)
654{
655 u64 val64;
656 struct iommu_table *tbl = dev->sysdata;
657 void __iomem *target;
658
659 target = calgary_reg(tbl->bbar, tar_offset(dev->bus->number));
660 val64 = be64_to_cpu(readq(target));
661 val64 &= ~TAR_SW_BITS;
662 writeq(cpu_to_be64(val64), target);
663 readq(target); /* flush */
664
665 kfree(tbl);
666 dev->sysdata = NULL;
667}
668
669static void calgary_watchdog(unsigned long data)
670{
671 struct pci_dev *dev = (struct pci_dev *)data;
672 struct iommu_table *tbl = dev->sysdata;
673 void __iomem *bbar = tbl->bbar;
674 u32 val32;
675 void __iomem *target;
676
677 target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_CSR_OFFSET);
678 val32 = be32_to_cpu(readl(target));
679
680 /* If no error, the agent ID in the CSR is not valid */
681 if (val32 & CSR_AGENT_MASK) {
682 printk(KERN_EMERG "calgary_watchdog: DMA error on bus %d, "
683 "CSR = %#x\n", dev->bus->number, val32);
684 writel(0, target);
685
686 /* Disable bus that caused the error */
687 target = calgary_reg(bbar, phb_offset(tbl->it_busno) |
688 PHB_CONFIG_RW_OFFSET);
689 val32 = be32_to_cpu(readl(target));
690 val32 |= PHB_SLOT_DISABLE;
691 writel(cpu_to_be32(val32), target);
692 readl(target); /* flush */
693 } else {
694 /* Reset the timer */
695 mod_timer(&tbl->watchdog_timer, jiffies + 2 * HZ);
696 }
697}
698
699static void __init calgary_enable_translation(struct pci_dev *dev)
700{
701 u32 val32;
702 unsigned char busnum;
703 void __iomem *target;
704 void __iomem *bbar;
705 struct iommu_table *tbl;
706
707 busnum = dev->bus->number;
708 tbl = dev->sysdata;
709 bbar = tbl->bbar;
710
711 /* enable TCE in PHB Config Register */
712 target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
713 val32 = be32_to_cpu(readl(target));
714 val32 |= PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE;
715
716 printk(KERN_INFO "Calgary: enabling translation on PHB %d\n", busnum);
717 printk(KERN_INFO "Calgary: errant DMAs will now be prevented on this "
718 "bus.\n");
719
720 writel(cpu_to_be32(val32), target);
721 readl(target); /* flush */
722
723 init_timer(&tbl->watchdog_timer);
724 tbl->watchdog_timer.function = &calgary_watchdog;
725 tbl->watchdog_timer.data = (unsigned long)dev;
726 mod_timer(&tbl->watchdog_timer, jiffies);
727}
728
729static void __init calgary_disable_translation(struct pci_dev *dev)
730{
731 u32 val32;
732 unsigned char busnum;
733 void __iomem *target;
734 void __iomem *bbar;
735 struct iommu_table *tbl;
736
737 busnum = dev->bus->number;
738 tbl = dev->sysdata;
739 bbar = tbl->bbar;
740
741 /* disable TCE in PHB Config Register */
742 target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
743 val32 = be32_to_cpu(readl(target));
744 val32 &= ~(PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE);
745
746 printk(KERN_INFO "Calgary: disabling translation on PHB %d!\n", busnum);
747 writel(cpu_to_be32(val32), target);
748 readl(target); /* flush */
749
750 del_timer_sync(&tbl->watchdog_timer);
751}
752
753static inline unsigned int __init locate_register_space(struct pci_dev *dev)
754{
755 int rionodeid;
756 u32 address;
757
758 rionodeid = (dev->bus->number % 15 > 4) ? 3 : 2;
759 /*
760 * register space address calculation as follows:
761 * FE0MB-8MB*OneBasedChassisNumber+1MB*(RioNodeId-ChassisBase)
762 * ChassisBase is always zero for x366/x260/x460
763 * RioNodeId is 2 for first Calgary, 3 for second Calgary
764 */
765 address = START_ADDRESS -
766 (0x800000 * (ONE_BASED_CHASSIS_NUM + dev->bus->number / 15)) +
767 (0x100000) * (rionodeid - CHASSIS_BASE);
768 return address;
769}
770
771static int __init calgary_init_one_nontraslated(struct pci_dev *dev)
772{
773 dev->sysdata = NULL;
774 dev->bus->self = dev;
775
776 return 0;
777}
778
779static int __init calgary_init_one(struct pci_dev *dev)
780{
781 u32 address;
782 void __iomem *bbar;
783 int ret;
784
785 address = locate_register_space(dev);
786 /* map entire 1MB of Calgary config space */
787 bbar = ioremap_nocache(address, 1024 * 1024);
788 if (!bbar) {
789 ret = -ENODATA;
790 goto done;
791 }
792
793 ret = calgary_setup_tar(dev, bbar);
794 if (ret)
795 goto iounmap;
796
797 dev->bus->self = dev;
798 calgary_enable_translation(dev);
799
800 return 0;
801
802iounmap:
803 iounmap(bbar);
804done:
805 return ret;
806}
807
808static int __init calgary_init(void)
809{
810 int i, ret = -ENODEV;
811 struct pci_dev *dev = NULL;
812
813 for (i = 0; i <= num_online_nodes() * MAX_NUM_OF_PHBS; i++) {
814 dev = pci_get_device(PCI_VENDOR_ID_IBM,
815 PCI_DEVICE_ID_IBM_CALGARY,
816 dev);
817 if (!dev)
818 break;
819 if (!translate_phb(dev)) {
820 calgary_init_one_nontraslated(dev);
821 continue;
822 }
823 if (!tce_table_kva[i] && !translate_empty_slots) {
824 pci_dev_put(dev);
825 continue;
826 }
827 ret = calgary_init_one(dev);
828 if (ret)
829 goto error;
830 }
831
832 return ret;
833
834error:
835 for (i--; i >= 0; i--) {
836 dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM,
837 PCI_DEVICE_ID_IBM_CALGARY,
838 dev);
839 if (!translate_phb(dev)) {
840 pci_dev_put(dev);
841 continue;
842 }
843 if (!tce_table_kva[i] && !translate_empty_slots)
844 continue;
845 calgary_disable_translation(dev);
846 calgary_free_tar(dev);
847 pci_dev_put(dev);
848 }
849
850 return ret;
851}
852
853static inline int __init determine_tce_table_size(u64 ram)
854{
855 int ret;
856
857 if (specified_table_size != TCE_TABLE_SIZE_UNSPECIFIED)
858 return specified_table_size;
859
860 /*
861 * Table sizes are from 0 to 7 (TCE_TABLE_SIZE_64K to
862 * TCE_TABLE_SIZE_8M). Table size 0 has 8K entries and each
863 * larger table size has twice as many entries, so shift the
864 * max ram address by 13 to divide by 8K and then look at the
865 * order of the result to choose between 0-7.
866 */
867 ret = get_order(ram >> 13);
868 if (ret > TCE_TABLE_SIZE_8M)
869 ret = TCE_TABLE_SIZE_8M;
870
871 return ret;
872}
873
874void __init detect_calgary(void)
875{
876 u32 val;
877 int bus, table_idx;
878 void *tbl;
879 int detected = 0;
880
881 /*
882 * if the user specified iommu=off or iommu=soft or we found
883 * another HW IOMMU already, bail out.
884 */
885 if (swiotlb || no_iommu || iommu_detected)
886 return;
887
888 specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
889
890 for (bus = 0, table_idx = 0;
891 bus <= num_online_nodes() * MAX_PHB_BUS_NUM;
892 bus++) {
893 BUG_ON(bus > MAX_NUMNODES * MAX_PHB_BUS_NUM);
894 if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY)
895 continue;
896 if (test_bit(bus, translation_disabled)) {
897 printk(KERN_INFO "Calgary: translation is disabled for "
898 "PHB 0x%x\n", bus);
899 /* skip this phb, don't allocate a tbl for it */
900 tce_table_kva[table_idx] = NULL;
901 table_idx++;
902 continue;
903 }
904 /*
905 * scan the first slot of the PCI bus to see if there
906 * are any devices present
907 */
908 val = read_pci_config(bus, 1, 0, 0);
909 if (val != 0xffffffff || translate_empty_slots) {
910 tbl = alloc_tce_table();
911 if (!tbl)
912 goto cleanup;
913 detected = 1;
914 } else
915 tbl = NULL;
916
917 tce_table_kva[table_idx] = tbl;
918 table_idx++;
919 }
920
921 if (detected) {
922 iommu_detected = 1;
923 calgary_detected = 1;
924 printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected. "
925 "TCE table spec is %d.\n", specified_table_size);
926 }
927 return;
928
929cleanup:
930 for (--table_idx; table_idx >= 0; --table_idx)
931 if (tce_table_kva[table_idx])
932 free_tce_table(tce_table_kva[table_idx]);
933}
934
935int __init calgary_iommu_init(void)
936{
937 int ret;
938
939 if (no_iommu || swiotlb)
940 return -ENODEV;
941
942 if (!calgary_detected)
943 return -ENODEV;
944
945 /* ok, we're trying to use Calgary - let's roll */
946 printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n");
947
948 ret = calgary_init();
949 if (ret) {
950 printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
951 "falling back to no_iommu\n", ret);
952 if (end_pfn > MAX_DMA32_PFN)
953 printk(KERN_ERR "WARNING more than 4GB of memory, "
954 "32bit PCI may malfunction.\n");
955 return ret;
956 }
957
958 force_iommu = 1;
959 dma_ops = &calgary_dma_ops;
960
961 return 0;
962}
963
964static int __init calgary_parse_options(char *p)
965{
966 unsigned int bridge;
967 size_t len;
968 char* endp;
969
970 while (*p) {
971 if (!strncmp(p, "64k", 3))
972 specified_table_size = TCE_TABLE_SIZE_64K;
973 else if (!strncmp(p, "128k", 4))
974 specified_table_size = TCE_TABLE_SIZE_128K;
975 else if (!strncmp(p, "256k", 4))
976 specified_table_size = TCE_TABLE_SIZE_256K;
977 else if (!strncmp(p, "512k", 4))
978 specified_table_size = TCE_TABLE_SIZE_512K;
979 else if (!strncmp(p, "1M", 2))
980 specified_table_size = TCE_TABLE_SIZE_1M;
981 else if (!strncmp(p, "2M", 2))
982 specified_table_size = TCE_TABLE_SIZE_2M;
983 else if (!strncmp(p, "4M", 2))
984 specified_table_size = TCE_TABLE_SIZE_4M;
985 else if (!strncmp(p, "8M", 2))
986 specified_table_size = TCE_TABLE_SIZE_8M;
987
988 len = strlen("translate_empty_slots");
989 if (!strncmp(p, "translate_empty_slots", len))
990 translate_empty_slots = 1;
991
992 len = strlen("disable");
993 if (!strncmp(p, "disable", len)) {
994 p += len;
995 if (*p == '=')
996 ++p;
997 if (*p == '\0')
998 break;
999 bridge = simple_strtol(p, &endp, 0);
1000 if (p == endp)
1001 break;
1002
1003 if (bridge <= (num_online_nodes() * MAX_PHB_BUS_NUM)) {
1004 printk(KERN_INFO "Calgary: disabling "
1005 "translation for PHB 0x%x\n", bridge);
1006 set_bit(bridge, translation_disabled);
1007 }
1008 }
1009
1010 p = strpbrk(p, ",");
1011 if (!p)
1012 break;
1013
1014 p++; /* skip ',' */
1015 }
1016 return 1;
1017}
1018__setup("calgary=", calgary_parse_options);
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
index a9275c9557cf..9c44f4f2433d 100644
--- a/arch/x86_64/kernel/pci-dma.c
+++ b/arch/x86_64/kernel/pci-dma.c
@@ -9,6 +9,7 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <asm/io.h> 10#include <asm/io.h>
11#include <asm/proto.h> 11#include <asm/proto.h>
12#include <asm/calgary.h>
12 13
13int iommu_merge __read_mostly = 0; 14int iommu_merge __read_mostly = 0;
14EXPORT_SYMBOL(iommu_merge); 15EXPORT_SYMBOL(iommu_merge);
@@ -33,12 +34,15 @@ int panic_on_overflow __read_mostly = 0;
33int force_iommu __read_mostly= 0; 34int force_iommu __read_mostly= 0;
34#endif 35#endif
35 36
37/* Set this to 1 if there is a HW IOMMU in the system */
38int iommu_detected __read_mostly = 0;
39
36/* Dummy device used for NULL arguments (normally ISA). Better would 40/* Dummy device used for NULL arguments (normally ISA). Better would
37 be probably a smaller DMA mask, but this is bug-to-bug compatible 41 be probably a smaller DMA mask, but this is bug-to-bug compatible
38 to i386. */ 42 to i386. */
39struct device fallback_dev = { 43struct device fallback_dev = {
40 .bus_id = "fallback device", 44 .bus_id = "fallback device",
41 .coherent_dma_mask = 0xffffffff, 45 .coherent_dma_mask = DMA_32BIT_MASK,
42 .dma_mask = &fallback_dev.coherent_dma_mask, 46 .dma_mask = &fallback_dev.coherent_dma_mask,
43}; 47};
44 48
@@ -77,7 +81,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
77 dev = &fallback_dev; 81 dev = &fallback_dev;
78 dma_mask = dev->coherent_dma_mask; 82 dma_mask = dev->coherent_dma_mask;
79 if (dma_mask == 0) 83 if (dma_mask == 0)
80 dma_mask = 0xffffffff; 84 dma_mask = DMA_32BIT_MASK;
81 85
82 /* Don't invoke OOM killer */ 86 /* Don't invoke OOM killer */
83 gfp |= __GFP_NORETRY; 87 gfp |= __GFP_NORETRY;
@@ -90,7 +94,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
90 larger than 16MB and in this case we have a chance of 94 larger than 16MB and in this case we have a chance of
91 finding fitting memory in the next higher zone first. If 95 finding fitting memory in the next higher zone first. If
92 not retry with true GFP_DMA. -AK */ 96 not retry with true GFP_DMA. -AK */
93 if (dma_mask <= 0xffffffff) 97 if (dma_mask <= DMA_32BIT_MASK)
94 gfp |= GFP_DMA32; 98 gfp |= GFP_DMA32;
95 99
96 again: 100 again:
@@ -111,7 +115,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
111 115
112 /* Don't use the 16MB ZONE_DMA unless absolutely 116 /* Don't use the 16MB ZONE_DMA unless absolutely
113 needed. It's better to use remapping first. */ 117 needed. It's better to use remapping first. */
114 if (dma_mask < 0xffffffff && !(gfp & GFP_DMA)) { 118 if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
115 gfp = (gfp & ~GFP_DMA32) | GFP_DMA; 119 gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
116 goto again; 120 goto again;
117 } 121 }
@@ -174,7 +178,7 @@ int dma_supported(struct device *dev, u64 mask)
174 /* Copied from i386. Doesn't make much sense, because it will 178 /* Copied from i386. Doesn't make much sense, because it will
175 only work for pci_alloc_coherent. 179 only work for pci_alloc_coherent.
176 The caller just has to use GFP_DMA in this case. */ 180 The caller just has to use GFP_DMA in this case. */
177 if (mask < 0x00ffffff) 181 if (mask < DMA_24BIT_MASK)
178 return 0; 182 return 0;
179 183
180 /* Tell the device to use SAC when IOMMU force is on. This 184 /* Tell the device to use SAC when IOMMU force is on. This
@@ -189,7 +193,7 @@ int dma_supported(struct device *dev, u64 mask)
189 SAC for these. Assume all masks <= 40 bits are of this 193 SAC for these. Assume all masks <= 40 bits are of this
190 type. Normally this doesn't make any difference, but gives 194 type. Normally this doesn't make any difference, but gives
191 more gentle handling of IOMMU overflow. */ 195 more gentle handling of IOMMU overflow. */
192 if (iommu_sac_force && (mask >= 0xffffffffffULL)) { 196 if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
193 printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask); 197 printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask);
194 return 0; 198 return 0;
195 } 199 }
@@ -266,7 +270,7 @@ __init int iommu_setup(char *p)
266 swiotlb = 1; 270 swiotlb = 1;
267#endif 271#endif
268 272
269#ifdef CONFIG_GART_IOMMU 273#ifdef CONFIG_IOMMU
270 gart_parse_options(p); 274 gart_parse_options(p);
271#endif 275#endif
272 276
@@ -276,3 +280,40 @@ __init int iommu_setup(char *p)
276 } 280 }
277 return 1; 281 return 1;
278} 282}
283__setup("iommu=", iommu_setup);
284
285void __init pci_iommu_alloc(void)
286{
287 /*
288 * The order of these functions is important for
289 * fall-back/fail-over reasons
290 */
291#ifdef CONFIG_IOMMU
292 iommu_hole_init();
293#endif
294
295#ifdef CONFIG_CALGARY_IOMMU
296 detect_calgary();
297#endif
298
299#ifdef CONFIG_SWIOTLB
300 pci_swiotlb_init();
301#endif
302}
303
304static int __init pci_iommu_init(void)
305{
306#ifdef CONFIG_CALGARY_IOMMU
307 calgary_iommu_init();
308#endif
309
310#ifdef CONFIG_IOMMU
311 gart_iommu_init();
312#endif
313
314 no_iommu_init();
315 return 0;
316}
317
318/* Must execute after PCI subsystem */
319fs_initcall(pci_iommu_init);
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 82a7c9bfdfa0..4ca674d16b09 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -32,6 +32,7 @@
32#include <asm/kdebug.h> 32#include <asm/kdebug.h>
33#include <asm/swiotlb.h> 33#include <asm/swiotlb.h>
34#include <asm/dma.h> 34#include <asm/dma.h>
35#include <asm/k8.h>
35 36
36unsigned long iommu_bus_base; /* GART remapping area (physical) */ 37unsigned long iommu_bus_base; /* GART remapping area (physical) */
37static unsigned long iommu_size; /* size of remapping area bytes */ 38static unsigned long iommu_size; /* size of remapping area bytes */
@@ -46,8 +47,6 @@ u32 *iommu_gatt_base; /* Remapping table */
46 also seen with Qlogic at least). */ 47 also seen with Qlogic at least). */
47int iommu_fullflush = 1; 48int iommu_fullflush = 1;
48 49
49#define MAX_NB 8
50
51/* Allocation bitmap for the remapping area */ 50/* Allocation bitmap for the remapping area */
52static DEFINE_SPINLOCK(iommu_bitmap_lock); 51static DEFINE_SPINLOCK(iommu_bitmap_lock);
53static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ 52static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
@@ -63,13 +62,6 @@ static u32 gart_unmapped_entry;
63#define to_pages(addr,size) \ 62#define to_pages(addr,size) \
64 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) 63 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
65 64
66#define for_all_nb(dev) \
67 dev = NULL; \
68 while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)
69
70static struct pci_dev *northbridges[MAX_NB];
71static u32 northbridge_flush_word[MAX_NB];
72
73#define EMERGENCY_PAGES 32 /* = 128KB */ 65#define EMERGENCY_PAGES 32 /* = 128KB */
74 66
75#ifdef CONFIG_AGP 67#ifdef CONFIG_AGP
@@ -93,7 +85,7 @@ static unsigned long alloc_iommu(int size)
93 offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size); 85 offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
94 if (offset == -1) { 86 if (offset == -1) {
95 need_flush = 1; 87 need_flush = 1;
96 offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size); 88 offset = find_next_zero_string(iommu_gart_bitmap,0,iommu_pages,size);
97 } 89 }
98 if (offset != -1) { 90 if (offset != -1) {
99 set_bit_string(iommu_gart_bitmap, offset, size); 91 set_bit_string(iommu_gart_bitmap, offset, size);
@@ -120,44 +112,17 @@ static void free_iommu(unsigned long offset, int size)
120/* 112/*
121 * Use global flush state to avoid races with multiple flushers. 113 * Use global flush state to avoid races with multiple flushers.
122 */ 114 */
123static void flush_gart(struct device *dev) 115static void flush_gart(void)
124{ 116{
125 unsigned long flags; 117 unsigned long flags;
126 int flushed = 0;
127 int i, max;
128
129 spin_lock_irqsave(&iommu_bitmap_lock, flags); 118 spin_lock_irqsave(&iommu_bitmap_lock, flags);
130 if (need_flush) { 119 if (need_flush) {
131 max = 0; 120 k8_flush_garts();
132 for (i = 0; i < MAX_NB; i++) {
133 if (!northbridges[i])
134 continue;
135 pci_write_config_dword(northbridges[i], 0x9c,
136 northbridge_flush_word[i] | 1);
137 flushed++;
138 max = i;
139 }
140 for (i = 0; i <= max; i++) {
141 u32 w;
142 if (!northbridges[i])
143 continue;
144 /* Make sure the hardware actually executed the flush. */
145 for (;;) {
146 pci_read_config_dword(northbridges[i], 0x9c, &w);
147 if (!(w & 1))
148 break;
149 cpu_relax();
150 }
151 }
152 if (!flushed)
153 printk("nothing to flush?\n");
154 need_flush = 0; 121 need_flush = 0;
155 } 122 }
156 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 123 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
157} 124}
158 125
159
160
161#ifdef CONFIG_IOMMU_LEAK 126#ifdef CONFIG_IOMMU_LEAK
162 127
163#define SET_LEAK(x) if (iommu_leak_tab) \ 128#define SET_LEAK(x) if (iommu_leak_tab) \
@@ -266,7 +231,7 @@ static dma_addr_t gart_map_simple(struct device *dev, char *buf,
266 size_t size, int dir) 231 size_t size, int dir)
267{ 232{
268 dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); 233 dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir);
269 flush_gart(dev); 234 flush_gart();
270 return map; 235 return map;
271} 236}
272 237
@@ -289,6 +254,28 @@ dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir)
289} 254}
290 255
291/* 256/*
257 * Free a DMA mapping.
258 */
259void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
260 size_t size, int direction)
261{
262 unsigned long iommu_page;
263 int npages;
264 int i;
265
266 if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
267 dma_addr >= iommu_bus_base + iommu_size)
268 return;
269 iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
270 npages = to_pages(dma_addr, size);
271 for (i = 0; i < npages; i++) {
272 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
273 CLEAR_LEAK(iommu_page + i);
274 }
275 free_iommu(iommu_page, npages);
276}
277
278/*
292 * Wrapper for pci_unmap_single working with scatterlists. 279 * Wrapper for pci_unmap_single working with scatterlists.
293 */ 280 */
294void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) 281void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
@@ -299,7 +286,7 @@ void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int di
299 struct scatterlist *s = &sg[i]; 286 struct scatterlist *s = &sg[i];
300 if (!s->dma_length || !s->length) 287 if (!s->dma_length || !s->length)
301 break; 288 break;
302 dma_unmap_single(dev, s->dma_address, s->dma_length, dir); 289 gart_unmap_single(dev, s->dma_address, s->dma_length, dir);
303 } 290 }
304} 291}
305 292
@@ -329,7 +316,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
329 s->dma_address = addr; 316 s->dma_address = addr;
330 s->dma_length = s->length; 317 s->dma_length = s->length;
331 } 318 }
332 flush_gart(dev); 319 flush_gart();
333 return nents; 320 return nents;
334} 321}
335 322
@@ -436,13 +423,13 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
436 if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0) 423 if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0)
437 goto error; 424 goto error;
438 out++; 425 out++;
439 flush_gart(dev); 426 flush_gart();
440 if (out < nents) 427 if (out < nents)
441 sg[out].dma_length = 0; 428 sg[out].dma_length = 0;
442 return out; 429 return out;
443 430
444error: 431error:
445 flush_gart(NULL); 432 flush_gart();
446 gart_unmap_sg(dev, sg, nents, dir); 433 gart_unmap_sg(dev, sg, nents, dir);
447 /* When it was forced or merged try again in a dumb way */ 434 /* When it was forced or merged try again in a dumb way */
448 if (force_iommu || iommu_merge) { 435 if (force_iommu || iommu_merge) {
@@ -458,28 +445,6 @@ error:
458 return 0; 445 return 0;
459} 446}
460 447
461/*
462 * Free a DMA mapping.
463 */
464void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
465 size_t size, int direction)
466{
467 unsigned long iommu_page;
468 int npages;
469 int i;
470
471 if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
472 dma_addr >= iommu_bus_base + iommu_size)
473 return;
474 iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
475 npages = to_pages(dma_addr, size);
476 for (i = 0; i < npages; i++) {
477 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
478 CLEAR_LEAK(iommu_page + i);
479 }
480 free_iommu(iommu_page, npages);
481}
482
483static int no_agp; 448static int no_agp;
484 449
485static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) 450static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
@@ -532,10 +497,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
532 void *gatt; 497 void *gatt;
533 unsigned aper_base, new_aper_base; 498 unsigned aper_base, new_aper_base;
534 unsigned aper_size, gatt_size, new_aper_size; 499 unsigned aper_size, gatt_size, new_aper_size;
535 500 int i;
501
536 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); 502 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
537 aper_size = aper_base = info->aper_size = 0; 503 aper_size = aper_base = info->aper_size = 0;
538 for_all_nb(dev) { 504 dev = NULL;
505 for (i = 0; i < num_k8_northbridges; i++) {
506 dev = k8_northbridges[i];
539 new_aper_base = read_aperture(dev, &new_aper_size); 507 new_aper_base = read_aperture(dev, &new_aper_size);
540 if (!new_aper_base) 508 if (!new_aper_base)
541 goto nommu; 509 goto nommu;
@@ -558,11 +526,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
558 panic("Cannot allocate GATT table"); 526 panic("Cannot allocate GATT table");
559 memset(gatt, 0, gatt_size); 527 memset(gatt, 0, gatt_size);
560 agp_gatt_table = gatt; 528 agp_gatt_table = gatt;
561 529
562 for_all_nb(dev) { 530 for (i = 0; i < num_k8_northbridges; i++) {
563 u32 ctl; 531 u32 ctl;
564 u32 gatt_reg; 532 u32 gatt_reg;
565 533
534 dev = k8_northbridges[i];
566 gatt_reg = __pa(gatt) >> 12; 535 gatt_reg = __pa(gatt) >> 12;
567 gatt_reg <<= 4; 536 gatt_reg <<= 4;
568 pci_write_config_dword(dev, 0x98, gatt_reg); 537 pci_write_config_dword(dev, 0x98, gatt_reg);
@@ -573,7 +542,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
573 542
574 pci_write_config_dword(dev, 0x90, ctl); 543 pci_write_config_dword(dev, 0x90, ctl);
575 } 544 }
576 flush_gart(NULL); 545 flush_gart();
577 546
578 printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); 547 printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10);
579 return 0; 548 return 0;
@@ -602,15 +571,19 @@ static struct dma_mapping_ops gart_dma_ops = {
602 .unmap_sg = gart_unmap_sg, 571 .unmap_sg = gart_unmap_sg,
603}; 572};
604 573
605static int __init pci_iommu_init(void) 574void __init gart_iommu_init(void)
606{ 575{
607 struct agp_kern_info info; 576 struct agp_kern_info info;
608 unsigned long aper_size; 577 unsigned long aper_size;
609 unsigned long iommu_start; 578 unsigned long iommu_start;
610 struct pci_dev *dev;
611 unsigned long scratch; 579 unsigned long scratch;
612 long i; 580 long i;
613 581
582 if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) {
583 printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n");
584 return;
585 }
586
614#ifndef CONFIG_AGP_AMD64 587#ifndef CONFIG_AGP_AMD64
615 no_agp = 1; 588 no_agp = 1;
616#else 589#else
@@ -622,7 +595,11 @@ static int __init pci_iommu_init(void)
622#endif 595#endif
623 596
624 if (swiotlb) 597 if (swiotlb)
625 return -1; 598 return;
599
600 /* Did we detect a different HW IOMMU? */
601 if (iommu_detected && !iommu_aperture)
602 return;
626 603
627 if (no_iommu || 604 if (no_iommu ||
628 (!force_iommu && end_pfn <= MAX_DMA32_PFN) || 605 (!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
@@ -634,15 +611,7 @@ static int __init pci_iommu_init(void)
634 "but IOMMU not available.\n" 611 "but IOMMU not available.\n"
635 KERN_ERR "WARNING 32bit PCI may malfunction.\n"); 612 KERN_ERR "WARNING 32bit PCI may malfunction.\n");
636 } 613 }
637 return -1; 614 return;
638 }
639
640 i = 0;
641 for_all_nb(dev)
642 i++;
643 if (i > MAX_NB) {
644 printk(KERN_ERR "PCI-GART: Too many northbridges (%ld). Disabled\n", i);
645 return -1;
646 } 615 }
647 616
648 printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); 617 printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
@@ -707,26 +676,10 @@ static int __init pci_iommu_init(void)
707 for (i = EMERGENCY_PAGES; i < iommu_pages; i++) 676 for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
708 iommu_gatt_base[i] = gart_unmapped_entry; 677 iommu_gatt_base[i] = gart_unmapped_entry;
709 678
710 for_all_nb(dev) { 679 flush_gart();
711 u32 flag;
712 int cpu = PCI_SLOT(dev->devfn) - 24;
713 if (cpu >= MAX_NB)
714 continue;
715 northbridges[cpu] = dev;
716 pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */
717 northbridge_flush_word[cpu] = flag;
718 }
719
720 flush_gart(NULL);
721
722 dma_ops = &gart_dma_ops; 680 dma_ops = &gart_dma_ops;
723
724 return 0;
725} 681}
726 682
727/* Must execute after PCI subsystem */
728fs_initcall(pci_iommu_init);
729
730void gart_parse_options(char *p) 683void gart_parse_options(char *p)
731{ 684{
732 int arg; 685 int arg;
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c
index 1f6ecc62061d..c4c3cc36ac5b 100644
--- a/arch/x86_64/kernel/pci-nommu.c
+++ b/arch/x86_64/kernel/pci-nommu.c
@@ -4,6 +4,8 @@
4#include <linux/init.h> 4#include <linux/init.h>
5#include <linux/pci.h> 5#include <linux/pci.h>
6#include <linux/string.h> 6#include <linux/string.h>
7#include <linux/dma-mapping.h>
8
7#include <asm/proto.h> 9#include <asm/proto.h>
8#include <asm/processor.h> 10#include <asm/processor.h>
9#include <asm/dma.h> 11#include <asm/dma.h>
@@ -12,10 +14,11 @@ static int
12check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) 14check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
13{ 15{
14 if (hwdev && bus + size > *hwdev->dma_mask) { 16 if (hwdev && bus + size > *hwdev->dma_mask) {
15 if (*hwdev->dma_mask >= 0xffffffffULL) 17 if (*hwdev->dma_mask >= DMA_32BIT_MASK)
16 printk(KERN_ERR 18 printk(KERN_ERR
17 "nommu_%s: overflow %Lx+%lu of device mask %Lx\n", 19 "nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
18 name, (long long)bus, size, (long long)*hwdev->dma_mask); 20 name, (long long)bus, size,
21 (long long)*hwdev->dma_mask);
19 return 0; 22 return 0;
20 } 23 }
21 return 1; 24 return 1;
diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c
index 990ed67896f2..ebdb77fe2057 100644
--- a/arch/x86_64/kernel/pci-swiotlb.c
+++ b/arch/x86_64/kernel/pci-swiotlb.c
@@ -31,7 +31,7 @@ struct dma_mapping_ops swiotlb_dma_ops = {
31void pci_swiotlb_init(void) 31void pci_swiotlb_init(void)
32{ 32{
33 /* don't initialize swiotlb if iommu=off (no_iommu=1) */ 33 /* don't initialize swiotlb if iommu=off (no_iommu=1) */
34 if (!iommu_aperture && !no_iommu && 34 if (!iommu_detected && !no_iommu &&
35 (end_pfn > MAX_DMA32_PFN || force_iommu)) 35 (end_pfn > MAX_DMA32_PFN || force_iommu))
36 swiotlb = 1; 36 swiotlb = 1;
37 if (swiotlb) { 37 if (swiotlb) {
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index fb903e65e079..ca56e19b8b6e 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -10,7 +10,6 @@
10 * Andi Kleen. 10 * Andi Kleen.
11 * 11 *
12 * CPU hotplug support - ashok.raj@intel.com 12 * CPU hotplug support - ashok.raj@intel.com
13 * $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $
14 */ 13 */
15 14
16/* 15/*
@@ -64,6 +63,7 @@ EXPORT_SYMBOL(boot_option_idle_override);
64 * Powermanagement idle function, if any.. 63 * Powermanagement idle function, if any..
65 */ 64 */
66void (*pm_idle)(void); 65void (*pm_idle)(void);
66EXPORT_SYMBOL(pm_idle);
67static DEFINE_PER_CPU(unsigned int, cpu_idle_state); 67static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
68 68
69static ATOMIC_NOTIFIER_HEAD(idle_notifier); 69static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -111,7 +111,7 @@ static void default_idle(void)
111{ 111{
112 local_irq_enable(); 112 local_irq_enable();
113 113
114 clear_thread_flag(TIF_POLLING_NRFLAG); 114 current_thread_info()->status &= ~TS_POLLING;
115 smp_mb__after_clear_bit(); 115 smp_mb__after_clear_bit();
116 while (!need_resched()) { 116 while (!need_resched()) {
117 local_irq_disable(); 117 local_irq_disable();
@@ -120,7 +120,7 @@ static void default_idle(void)
120 else 120 else
121 local_irq_enable(); 121 local_irq_enable();
122 } 122 }
123 set_thread_flag(TIF_POLLING_NRFLAG); 123 current_thread_info()->status |= TS_POLLING;
124} 124}
125 125
126/* 126/*
@@ -203,8 +203,7 @@ static inline void play_dead(void)
203 */ 203 */
204void cpu_idle (void) 204void cpu_idle (void)
205{ 205{
206 set_thread_flag(TIF_POLLING_NRFLAG); 206 current_thread_info()->status |= TS_POLLING;
207
208 /* endless idle loop with no priority at all */ 207 /* endless idle loop with no priority at all */
209 while (1) { 208 while (1) {
210 while (!need_resched()) { 209 while (!need_resched()) {
@@ -335,7 +334,7 @@ void show_regs(struct pt_regs *regs)
335{ 334{
336 printk("CPU %d:", smp_processor_id()); 335 printk("CPU %d:", smp_processor_id());
337 __show_regs(regs); 336 __show_regs(regs);
338 show_trace(&regs->rsp); 337 show_trace(NULL, regs, (void *)(regs + 1));
339} 338}
340 339
341/* 340/*
@@ -365,8 +364,11 @@ void flush_thread(void)
365 struct task_struct *tsk = current; 364 struct task_struct *tsk = current;
366 struct thread_info *t = current_thread_info(); 365 struct thread_info *t = current_thread_info();
367 366
368 if (t->flags & _TIF_ABI_PENDING) 367 if (t->flags & _TIF_ABI_PENDING) {
369 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32); 368 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
369 if (t->flags & _TIF_IA32)
370 current_thread_info()->status |= TS_COMPAT;
371 }
370 372
371 tsk->thread.debugreg0 = 0; 373 tsk->thread.debugreg0 = 0;
372 tsk->thread.debugreg1 = 0; 374 tsk->thread.debugreg1 = 0;
diff --git a/arch/x86_64/kernel/reboot.c b/arch/x86_64/kernel/reboot.c
index 57117b8beb2b..2d6769847456 100644
--- a/arch/x86_64/kernel/reboot.c
+++ b/arch/x86_64/kernel/reboot.c
@@ -20,6 +20,7 @@
20 * Power off function, if any 20 * Power off function, if any
21 */ 21 */
22void (*pm_power_off)(void); 22void (*pm_power_off)(void);
23EXPORT_SYMBOL(pm_power_off);
23 24
24static long no_idt[3]; 25static long no_idt[3];
25static enum { 26static enum {
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 143c65031539..1129918ede82 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * Nov 2001 Dave Jones <davej@suse.de> 6 * Nov 2001 Dave Jones <davej@suse.de>
7 * Forked from i386 setup code. 7 * Forked from i386 setup code.
8 *
9 * $Id$
10 */ 8 */
11 9
12/* 10/*
@@ -65,9 +63,7 @@
65#include <asm/setup.h> 63#include <asm/setup.h>
66#include <asm/mach_apic.h> 64#include <asm/mach_apic.h>
67#include <asm/numa.h> 65#include <asm/numa.h>
68#include <asm/swiotlb.h>
69#include <asm/sections.h> 66#include <asm/sections.h>
70#include <asm/gart-mapping.h>
71#include <asm/dmi.h> 67#include <asm/dmi.h>
72 68
73/* 69/*
@@ -75,6 +71,7 @@
75 */ 71 */
76 72
77struct cpuinfo_x86 boot_cpu_data __read_mostly; 73struct cpuinfo_x86 boot_cpu_data __read_mostly;
74EXPORT_SYMBOL(boot_cpu_data);
78 75
79unsigned long mmu_cr4_features; 76unsigned long mmu_cr4_features;
80 77
@@ -103,6 +100,7 @@ char dmi_alloc_data[DMI_MAX_DATA];
103 * Setup options 100 * Setup options
104 */ 101 */
105struct screen_info screen_info; 102struct screen_info screen_info;
103EXPORT_SYMBOL(screen_info);
106struct sys_desc_table_struct { 104struct sys_desc_table_struct {
107 unsigned short length; 105 unsigned short length;
108 unsigned char table[0]; 106 unsigned char table[0];
@@ -474,80 +472,6 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
474} 472}
475#endif 473#endif
476 474
477/* Use inline assembly to define this because the nops are defined
478 as inline assembly strings in the include files and we cannot
479 get them easily into strings. */
480asm("\t.data\nk8nops: "
481 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
482 K8_NOP7 K8_NOP8);
483
484extern unsigned char k8nops[];
485static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
486 NULL,
487 k8nops,
488 k8nops + 1,
489 k8nops + 1 + 2,
490 k8nops + 1 + 2 + 3,
491 k8nops + 1 + 2 + 3 + 4,
492 k8nops + 1 + 2 + 3 + 4 + 5,
493 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
494 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
495};
496
497extern char __vsyscall_0;
498
499/* Replace instructions with better alternatives for this CPU type.
500
501 This runs before SMP is initialized to avoid SMP problems with
502 self modifying code. This implies that assymetric systems where
503 APs have less capabilities than the boot processor are not handled.
504 In this case boot with "noreplacement". */
505void apply_alternatives(void *start, void *end)
506{
507 struct alt_instr *a;
508 int diff, i, k;
509 for (a = start; (void *)a < end; a++) {
510 u8 *instr;
511
512 if (!boot_cpu_has(a->cpuid))
513 continue;
514
515 BUG_ON(a->replacementlen > a->instrlen);
516 instr = a->instr;
517 /* vsyscall code is not mapped yet. resolve it manually. */
518 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END)
519 instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
520 __inline_memcpy(instr, a->replacement, a->replacementlen);
521 diff = a->instrlen - a->replacementlen;
522
523 /* Pad the rest with nops */
524 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
525 k = diff;
526 if (k > ASM_NOP_MAX)
527 k = ASM_NOP_MAX;
528 __inline_memcpy(instr + i, k8_nops[k], k);
529 }
530 }
531}
532
533static int no_replacement __initdata = 0;
534
535void __init alternative_instructions(void)
536{
537 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
538 if (no_replacement)
539 return;
540 apply_alternatives(__alt_instructions, __alt_instructions_end);
541}
542
543static int __init noreplacement_setup(char *s)
544{
545 no_replacement = 1;
546 return 1;
547}
548
549__setup("noreplacement", noreplacement_setup);
550
551#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) 475#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
552struct edd edd; 476struct edd edd;
553#ifdef CONFIG_EDD_MODULE 477#ifdef CONFIG_EDD_MODULE
@@ -780,10 +704,6 @@ void __init setup_arch(char **cmdline_p)
780 704
781 e820_setup_gap(); 705 e820_setup_gap();
782 706
783#ifdef CONFIG_GART_IOMMU
784 iommu_hole_init();
785#endif
786
787#ifdef CONFIG_VT 707#ifdef CONFIG_VT
788#if defined(CONFIG_VGA_CONSOLE) 708#if defined(CONFIG_VGA_CONSOLE)
789 conswitchp = &vga_con; 709 conswitchp = &vga_con;
@@ -868,24 +788,32 @@ static int nearby_node(int apicid)
868static void __init amd_detect_cmp(struct cpuinfo_x86 *c) 788static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
869{ 789{
870#ifdef CONFIG_SMP 790#ifdef CONFIG_SMP
871 int cpu = smp_processor_id();
872 unsigned bits; 791 unsigned bits;
873#ifdef CONFIG_NUMA 792#ifdef CONFIG_NUMA
793 int cpu = smp_processor_id();
874 int node = 0; 794 int node = 0;
875 unsigned apicid = hard_smp_processor_id(); 795 unsigned apicid = hard_smp_processor_id();
876#endif 796#endif
797 unsigned ecx = cpuid_ecx(0x80000008);
798
799 c->x86_max_cores = (ecx & 0xff) + 1;
877 800
878 bits = 0; 801 /* CPU telling us the core id bits shift? */
879 while ((1 << bits) < c->x86_max_cores) 802 bits = (ecx >> 12) & 0xF;
880 bits++; 803
804 /* Otherwise recompute */
805 if (bits == 0) {
806 while ((1 << bits) < c->x86_max_cores)
807 bits++;
808 }
881 809
882 /* Low order bits define the core id (index of core in socket) */ 810 /* Low order bits define the core id (index of core in socket) */
883 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1); 811 c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
884 /* Convert the APIC ID into the socket ID */ 812 /* Convert the APIC ID into the socket ID */
885 phys_proc_id[cpu] = phys_pkg_id(bits); 813 c->phys_proc_id = phys_pkg_id(bits);
886 814
887#ifdef CONFIG_NUMA 815#ifdef CONFIG_NUMA
888 node = phys_proc_id[cpu]; 816 node = c->phys_proc_id;
889 if (apicid_to_node[apicid] != NUMA_NO_NODE) 817 if (apicid_to_node[apicid] != NUMA_NO_NODE)
890 node = apicid_to_node[apicid]; 818 node = apicid_to_node[apicid];
891 if (!node_online(node)) { 819 if (!node_online(node)) {
@@ -898,7 +826,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
898 but in the same order as the HT nodeids. 826 but in the same order as the HT nodeids.
899 If that doesn't result in a usable node fall back to the 827 If that doesn't result in a usable node fall back to the
900 path for the previous case. */ 828 path for the previous case. */
901 int ht_nodeid = apicid - (phys_proc_id[0] << bits); 829 int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits);
902 if (ht_nodeid >= 0 && 830 if (ht_nodeid >= 0 &&
903 apicid_to_node[ht_nodeid] != NUMA_NO_NODE) 831 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
904 node = apicid_to_node[ht_nodeid]; 832 node = apicid_to_node[ht_nodeid];
@@ -908,15 +836,13 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
908 } 836 }
909 numa_set_node(cpu, node); 837 numa_set_node(cpu, node);
910 838
911 printk(KERN_INFO "CPU %d/%x(%d) -> Node %d -> Core %d\n", 839 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
912 cpu, apicid, c->x86_max_cores, node, cpu_core_id[cpu]);
913#endif 840#endif
914#endif 841#endif
915} 842}
916 843
917static int __init init_amd(struct cpuinfo_x86 *c) 844static void __init init_amd(struct cpuinfo_x86 *c)
918{ 845{
919 int r;
920 unsigned level; 846 unsigned level;
921 847
922#ifdef CONFIG_SMP 848#ifdef CONFIG_SMP
@@ -949,8 +875,8 @@ static int __init init_amd(struct cpuinfo_x86 *c)
949 if (c->x86 >= 6) 875 if (c->x86 >= 6)
950 set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability); 876 set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
951 877
952 r = get_model_name(c); 878 level = get_model_name(c);
953 if (!r) { 879 if (!level) {
954 switch (c->x86) { 880 switch (c->x86) {
955 case 15: 881 case 15:
956 /* Should distinguish Models here, but this is only 882 /* Should distinguish Models here, but this is only
@@ -965,13 +891,12 @@ static int __init init_amd(struct cpuinfo_x86 *c)
965 if (c->x86_power & (1<<8)) 891 if (c->x86_power & (1<<8))
966 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); 892 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
967 893
968 if (c->extended_cpuid_level >= 0x80000008) { 894 /* Multi core CPU? */
969 c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; 895 if (c->extended_cpuid_level >= 0x80000008)
970
971 amd_detect_cmp(c); 896 amd_detect_cmp(c);
972 }
973 897
974 return r; 898 /* Fix cpuid4 emulation for more */
899 num_cache_leaves = 3;
975} 900}
976 901
977static void __cpuinit detect_ht(struct cpuinfo_x86 *c) 902static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -979,13 +904,14 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
979#ifdef CONFIG_SMP 904#ifdef CONFIG_SMP
980 u32 eax, ebx, ecx, edx; 905 u32 eax, ebx, ecx, edx;
981 int index_msb, core_bits; 906 int index_msb, core_bits;
982 int cpu = smp_processor_id();
983 907
984 cpuid(1, &eax, &ebx, &ecx, &edx); 908 cpuid(1, &eax, &ebx, &ecx, &edx);
985 909
986 910
987 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) 911 if (!cpu_has(c, X86_FEATURE_HT))
988 return; 912 return;
913 if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
914 goto out;
989 915
990 smp_num_siblings = (ebx & 0xff0000) >> 16; 916 smp_num_siblings = (ebx & 0xff0000) >> 16;
991 917
@@ -1000,10 +926,7 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
1000 } 926 }
1001 927
1002 index_msb = get_count_order(smp_num_siblings); 928 index_msb = get_count_order(smp_num_siblings);
1003 phys_proc_id[cpu] = phys_pkg_id(index_msb); 929 c->phys_proc_id = phys_pkg_id(index_msb);
1004
1005 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
1006 phys_proc_id[cpu]);
1007 930
1008 smp_num_siblings = smp_num_siblings / c->x86_max_cores; 931 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
1009 932
@@ -1011,13 +934,15 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
1011 934
1012 core_bits = get_count_order(c->x86_max_cores); 935 core_bits = get_count_order(c->x86_max_cores);
1013 936
1014 cpu_core_id[cpu] = phys_pkg_id(index_msb) & 937 c->cpu_core_id = phys_pkg_id(index_msb) &
1015 ((1 << core_bits) - 1); 938 ((1 << core_bits) - 1);
1016
1017 if (c->x86_max_cores > 1)
1018 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
1019 cpu_core_id[cpu]);
1020 } 939 }
940out:
941 if ((c->x86_max_cores * smp_num_siblings) > 1) {
942 printk(KERN_INFO "CPU: Physical Processor ID: %d\n", c->phys_proc_id);
943 printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id);
944 }
945
1021#endif 946#endif
1022} 947}
1023 948
@@ -1026,15 +951,12 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
1026 */ 951 */
1027static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) 952static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
1028{ 953{
1029 unsigned int eax; 954 unsigned int eax, t;
1030 955
1031 if (c->cpuid_level < 4) 956 if (c->cpuid_level < 4)
1032 return 1; 957 return 1;
1033 958
1034 __asm__("cpuid" 959 cpuid_count(4, 0, &eax, &t, &t, &t);
1035 : "=a" (eax)
1036 : "0" (4), "c" (0)
1037 : "bx", "dx");
1038 960
1039 if (eax & 0x1f) 961 if (eax & 0x1f)
1040 return ((eax >> 26) + 1); 962 return ((eax >> 26) + 1);
@@ -1047,16 +969,17 @@ static void srat_detect_node(void)
1047#ifdef CONFIG_NUMA 969#ifdef CONFIG_NUMA
1048 unsigned node; 970 unsigned node;
1049 int cpu = smp_processor_id(); 971 int cpu = smp_processor_id();
972 int apicid = hard_smp_processor_id();
1050 973
1051 /* Don't do the funky fallback heuristics the AMD version employs 974 /* Don't do the funky fallback heuristics the AMD version employs
1052 for now. */ 975 for now. */
1053 node = apicid_to_node[hard_smp_processor_id()]; 976 node = apicid_to_node[apicid];
1054 if (node == NUMA_NO_NODE) 977 if (node == NUMA_NO_NODE)
1055 node = first_node(node_online_map); 978 node = first_node(node_online_map);
1056 numa_set_node(cpu, node); 979 numa_set_node(cpu, node);
1057 980
1058 if (acpi_numa > 0) 981 if (acpi_numa > 0)
1059 printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node); 982 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
1060#endif 983#endif
1061} 984}
1062 985
@@ -1066,6 +989,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
1066 unsigned n; 989 unsigned n;
1067 990
1068 init_intel_cacheinfo(c); 991 init_intel_cacheinfo(c);
992 if (c->cpuid_level > 9 ) {
993 unsigned eax = cpuid_eax(10);
994 /* Check for version and the number of counters */
995 if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
996 set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
997 }
998
1069 n = c->extended_cpuid_level; 999 n = c->extended_cpuid_level;
1070 if (n >= 0x80000008) { 1000 if (n >= 0x80000008) {
1071 unsigned eax = cpuid_eax(0x80000008); 1001 unsigned eax = cpuid_eax(0x80000008);
@@ -1157,7 +1087,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
1157 } 1087 }
1158 1088
1159#ifdef CONFIG_SMP 1089#ifdef CONFIG_SMP
1160 phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff; 1090 c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
1161#endif 1091#endif
1162} 1092}
1163 1093
@@ -1284,7 +1214,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
1284 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1214 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1285 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, 1215 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
1286 NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL, 1216 NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
1287 NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow", 1217 NULL, "fxsr_opt", NULL, "rdtscp", NULL, "lm", "3dnowext", "3dnow",
1288 1218
1289 /* Transmeta-defined */ 1219 /* Transmeta-defined */
1290 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, 1220 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
@@ -1295,7 +1225,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
1295 /* Other (Linux-defined) */ 1225 /* Other (Linux-defined) */
1296 "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL, 1226 "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
1297 "constant_tsc", NULL, NULL, 1227 "constant_tsc", NULL, NULL,
1298 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1228 "up", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1299 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1229 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1300 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1230 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1301 1231
@@ -1365,9 +1295,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
1365#ifdef CONFIG_SMP 1295#ifdef CONFIG_SMP
1366 if (smp_num_siblings * c->x86_max_cores > 1) { 1296 if (smp_num_siblings * c->x86_max_cores > 1) {
1367 int cpu = c - cpu_data; 1297 int cpu = c - cpu_data;
1368 seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]); 1298 seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
1369 seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu])); 1299 seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
1370 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]); 1300 seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
1371 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); 1301 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
1372 } 1302 }
1373#endif 1303#endif
@@ -1441,7 +1371,7 @@ struct seq_operations cpuinfo_op = {
1441 .show = show_cpuinfo, 1371 .show = show_cpuinfo,
1442}; 1372};
1443 1373
1444#ifdef CONFIG_INPUT_PCSPKR 1374#if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE)
1445#include <linux/platform_device.h> 1375#include <linux/platform_device.h>
1446static __init int add_pcspkr(void) 1376static __init int add_pcspkr(void)
1447{ 1377{
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index 8a691fa6d393..f5934cb4a2b6 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -3,7 +3,6 @@
3 * Copyright (C) 1995 Linus Torvalds 3 * Copyright (C) 1995 Linus Torvalds
4 * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen. 4 * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen.
5 * See setup.c for older changelog. 5 * See setup.c for older changelog.
6 * $Id: setup64.c,v 1.12 2002/03/21 10:09:17 ak Exp $
7 */ 6 */
8#include <linux/config.h> 7#include <linux/config.h>
9#include <linux/init.h> 8#include <linux/init.h>
@@ -31,6 +30,7 @@ char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
31cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; 30cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
32 31
33struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; 32struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
33EXPORT_SYMBOL(_cpu_pda);
34struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; 34struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
35 35
36struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; 36struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
@@ -38,6 +38,7 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
38char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); 38char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
39 39
40unsigned long __supported_pte_mask __read_mostly = ~0UL; 40unsigned long __supported_pte_mask __read_mostly = ~0UL;
41EXPORT_SYMBOL(__supported_pte_mask);
41static int do_not_nx __cpuinitdata = 0; 42static int do_not_nx __cpuinitdata = 0;
42 43
43/* noexec=on|off 44/* noexec=on|off
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
index e5f5ce7909a3..28161170fb0a 100644
--- a/arch/x86_64/kernel/signal.c
+++ b/arch/x86_64/kernel/signal.c
@@ -7,8 +7,6 @@
7 * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson 7 * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
8 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes 8 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
9 * 2000-2002 x86-64 support by Andi Kleen 9 * 2000-2002 x86-64 support by Andi Kleen
10 *
11 * $Id: signal.c,v 1.18 2001/10/17 22:30:37 ak Exp $
12 */ 10 */
13 11
14#include <linux/sched.h> 12#include <linux/sched.h>
@@ -239,7 +237,6 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
239 rsp = regs->rsp - 128; 237 rsp = regs->rsp - 128;
240 238
241 /* This is the X/Open sanctioned signal stack switching. */ 239 /* This is the X/Open sanctioned signal stack switching. */
242 /* RED-PEN: redzone on that stack? */
243 if (ka->sa.sa_flags & SA_ONSTACK) { 240 if (ka->sa.sa_flags & SA_ONSTACK) {
244 if (sas_ss_flags(rsp) == 0) 241 if (sas_ss_flags(rsp) == 0)
245 rsp = current->sas_ss_sp + current->sas_ss_size; 242 rsp = current->sas_ss_sp + current->sas_ss_size;
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index 4a6628b14d99..8188bae9c6d5 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -224,6 +224,7 @@ void flush_tlb_current_task(void)
224 flush_tlb_others(cpu_mask, mm, FLUSH_ALL); 224 flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
225 preempt_enable(); 225 preempt_enable();
226} 226}
227EXPORT_SYMBOL(flush_tlb_current_task);
227 228
228void flush_tlb_mm (struct mm_struct * mm) 229void flush_tlb_mm (struct mm_struct * mm)
229{ 230{
@@ -244,6 +245,7 @@ void flush_tlb_mm (struct mm_struct * mm)
244 245
245 preempt_enable(); 246 preempt_enable();
246} 247}
248EXPORT_SYMBOL(flush_tlb_mm);
247 249
248void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) 250void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
249{ 251{
@@ -266,6 +268,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
266 268
267 preempt_enable(); 269 preempt_enable();
268} 270}
271EXPORT_SYMBOL(flush_tlb_page);
269 272
270static void do_flush_tlb_all(void* info) 273static void do_flush_tlb_all(void* info)
271{ 274{
@@ -443,6 +446,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
443 spin_unlock(&call_lock); 446 spin_unlock(&call_lock);
444 return 0; 447 return 0;
445} 448}
449EXPORT_SYMBOL(smp_call_function);
446 450
447void smp_stop_cpu(void) 451void smp_stop_cpu(void)
448{ 452{
@@ -460,7 +464,7 @@ static void smp_really_stop_cpu(void *dummy)
460{ 464{
461 smp_stop_cpu(); 465 smp_stop_cpu();
462 for (;;) 466 for (;;)
463 asm("hlt"); 467 halt();
464} 468}
465 469
466void smp_send_stop(void) 470void smp_send_stop(void)
@@ -520,13 +524,13 @@ asmlinkage void smp_call_function_interrupt(void)
520 524
521int safe_smp_processor_id(void) 525int safe_smp_processor_id(void)
522{ 526{
523 int apicid, i; 527 unsigned apicid, i;
524 528
525 if (disable_apic) 529 if (disable_apic)
526 return 0; 530 return 0;
527 531
528 apicid = hard_smp_processor_id(); 532 apicid = hard_smp_processor_id();
529 if (x86_cpu_to_apicid[apicid] == apicid) 533 if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid)
530 return apicid; 534 return apicid;
531 535
532 for (i = 0; i < NR_CPUS; ++i) { 536 for (i = 0; i < NR_CPUS; ++i) {
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 71a7222cf9ce..4e9755179ecf 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -63,13 +63,11 @@
63 63
64/* Number of siblings per CPU package */ 64/* Number of siblings per CPU package */
65int smp_num_siblings = 1; 65int smp_num_siblings = 1;
66/* Package ID of each logical CPU */ 66EXPORT_SYMBOL(smp_num_siblings);
67u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
68/* core ID of each logical CPU */
69u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
70 67
71/* Last level cache ID of each logical CPU */ 68/* Last level cache ID of each logical CPU */
72u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; 69u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
70EXPORT_SYMBOL(cpu_llc_id);
73 71
74/* Bitmask of currently online CPUs */ 72/* Bitmask of currently online CPUs */
75cpumask_t cpu_online_map __read_mostly; 73cpumask_t cpu_online_map __read_mostly;
@@ -82,18 +80,21 @@ EXPORT_SYMBOL(cpu_online_map);
82 */ 80 */
83cpumask_t cpu_callin_map; 81cpumask_t cpu_callin_map;
84cpumask_t cpu_callout_map; 82cpumask_t cpu_callout_map;
83EXPORT_SYMBOL(cpu_callout_map);
85 84
86cpumask_t cpu_possible_map; 85cpumask_t cpu_possible_map;
87EXPORT_SYMBOL(cpu_possible_map); 86EXPORT_SYMBOL(cpu_possible_map);
88 87
89/* Per CPU bogomips and other parameters */ 88/* Per CPU bogomips and other parameters */
90struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; 89struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
90EXPORT_SYMBOL(cpu_data);
91 91
92/* Set when the idlers are all forked */ 92/* Set when the idlers are all forked */
93int smp_threads_ready; 93int smp_threads_ready;
94 94
95/* representing HT siblings of each logical CPU */ 95/* representing HT siblings of each logical CPU */
96cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; 96cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
97EXPORT_SYMBOL(cpu_sibling_map);
97 98
98/* representing HT and core siblings of each logical CPU */ 99/* representing HT and core siblings of each logical CPU */
99cpumask_t cpu_core_map[NR_CPUS] __read_mostly; 100cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
@@ -472,8 +473,8 @@ static inline void set_cpu_sibling_map(int cpu)
472 473
473 if (smp_num_siblings > 1) { 474 if (smp_num_siblings > 1) {
474 for_each_cpu_mask(i, cpu_sibling_setup_map) { 475 for_each_cpu_mask(i, cpu_sibling_setup_map) {
475 if (phys_proc_id[cpu] == phys_proc_id[i] && 476 if (c[cpu].phys_proc_id == c[i].phys_proc_id &&
476 cpu_core_id[cpu] == cpu_core_id[i]) { 477 c[cpu].cpu_core_id == c[i].cpu_core_id) {
477 cpu_set(i, cpu_sibling_map[cpu]); 478 cpu_set(i, cpu_sibling_map[cpu]);
478 cpu_set(cpu, cpu_sibling_map[i]); 479 cpu_set(cpu, cpu_sibling_map[i]);
479 cpu_set(i, cpu_core_map[cpu]); 480 cpu_set(i, cpu_core_map[cpu]);
@@ -500,7 +501,7 @@ static inline void set_cpu_sibling_map(int cpu)
500 cpu_set(i, c[cpu].llc_shared_map); 501 cpu_set(i, c[cpu].llc_shared_map);
501 cpu_set(cpu, c[i].llc_shared_map); 502 cpu_set(cpu, c[i].llc_shared_map);
502 } 503 }
503 if (phys_proc_id[cpu] == phys_proc_id[i]) { 504 if (c[cpu].phys_proc_id == c[i].phys_proc_id) {
504 cpu_set(i, cpu_core_map[cpu]); 505 cpu_set(i, cpu_core_map[cpu]);
505 cpu_set(cpu, cpu_core_map[i]); 506 cpu_set(cpu, cpu_core_map[i]);
506 /* 507 /*
@@ -797,6 +798,8 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
797 } 798 }
798 799
799 800
801 alternatives_smp_switch(1);
802
800 c_idle.idle = get_idle_for_cpu(cpu); 803 c_idle.idle = get_idle_for_cpu(cpu);
801 804
802 if (c_idle.idle) { 805 if (c_idle.idle) {
@@ -1199,8 +1202,8 @@ static void remove_siblinginfo(int cpu)
1199 cpu_clear(cpu, cpu_sibling_map[sibling]); 1202 cpu_clear(cpu, cpu_sibling_map[sibling]);
1200 cpus_clear(cpu_sibling_map[cpu]); 1203 cpus_clear(cpu_sibling_map[cpu]);
1201 cpus_clear(cpu_core_map[cpu]); 1204 cpus_clear(cpu_core_map[cpu]);
1202 phys_proc_id[cpu] = BAD_APICID; 1205 c[cpu].phys_proc_id = 0;
1203 cpu_core_id[cpu] = BAD_APICID; 1206 c[cpu].cpu_core_id = 0;
1204 cpu_clear(cpu, cpu_sibling_setup_map); 1207 cpu_clear(cpu, cpu_sibling_setup_map);
1205} 1208}
1206 1209
@@ -1259,6 +1262,8 @@ void __cpu_die(unsigned int cpu)
1259 /* They ack this in play_dead by setting CPU_DEAD */ 1262 /* They ack this in play_dead by setting CPU_DEAD */
1260 if (per_cpu(cpu_state, cpu) == CPU_DEAD) { 1263 if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
1261 printk ("CPU %d is now offline\n", cpu); 1264 printk ("CPU %d is now offline\n", cpu);
1265 if (1 == num_online_cpus())
1266 alternatives_smp_switch(0);
1262 return; 1267 return;
1263 } 1268 }
1264 msleep(100); 1269 msleep(100);
diff --git a/arch/x86_64/kernel/tce.c b/arch/x86_64/kernel/tce.c
new file mode 100644
index 000000000000..8d4c67f61b8e
--- /dev/null
+++ b/arch/x86_64/kernel/tce.c
@@ -0,0 +1,202 @@
1/*
2 * Derived from arch/powerpc/platforms/pseries/iommu.c
3 *
4 * Copyright (C) 2006 Jon Mason <jdmason@us.ibm.com>, IBM Corporation
5 * Copyright (C) 2006 Muli Ben-Yehuda <muli@il.ibm.com>, IBM Corporation
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include <linux/config.h>
23#include <linux/types.h>
24#include <linux/slab.h>
25#include <linux/mm.h>
26#include <linux/spinlock.h>
27#include <linux/string.h>
28#include <linux/pci.h>
29#include <linux/dma-mapping.h>
30#include <linux/bootmem.h>
31#include <asm/tce.h>
32#include <asm/calgary.h>
33#include <asm/proto.h>
34
35/* flush a tce at 'tceaddr' to main memory */
36static inline void flush_tce(void* tceaddr)
37{
38 /* a single tce can't cross a cache line */
39 if (cpu_has_clflush)
40 asm volatile("clflush (%0)" :: "r" (tceaddr));
41 else
42 asm volatile("wbinvd":::"memory");
43}
44
45void tce_build(struct iommu_table *tbl, unsigned long index,
46 unsigned int npages, unsigned long uaddr, int direction)
47{
48 u64* tp;
49 u64 t;
50 u64 rpn;
51
52 t = (1 << TCE_READ_SHIFT);
53 if (direction != DMA_TO_DEVICE)
54 t |= (1 << TCE_WRITE_SHIFT);
55
56 tp = ((u64*)tbl->it_base) + index;
57
58 while (npages--) {
59 rpn = (virt_to_bus((void*)uaddr)) >> PAGE_SHIFT;
60 t &= ~TCE_RPN_MASK;
61 t |= (rpn << TCE_RPN_SHIFT);
62
63 *tp = cpu_to_be64(t);
64 flush_tce(tp);
65
66 uaddr += PAGE_SIZE;
67 tp++;
68 }
69}
70
71void tce_free(struct iommu_table *tbl, long index, unsigned int npages)
72{
73 u64* tp;
74
75 tp = ((u64*)tbl->it_base) + index;
76
77 while (npages--) {
78 *tp = cpu_to_be64(0);
79 flush_tce(tp);
80 tp++;
81 }
82}
83
84static inline unsigned int table_size_to_number_of_entries(unsigned char size)
85{
86 /*
87 * size is the order of the table, 0-7
88 * smallest table is 8K entries, so shift result by 13 to
89 * multiply by 8K
90 */
91 return (1 << size) << 13;
92}
93
94static int tce_table_setparms(struct pci_dev *dev, struct iommu_table *tbl)
95{
96 unsigned int bitmapsz;
97 unsigned int tce_table_index;
98 unsigned long bmppages;
99 int ret;
100
101 tbl->it_busno = dev->bus->number;
102
103 /* set the tce table size - measured in entries */
104 tbl->it_size = table_size_to_number_of_entries(specified_table_size);
105
106 tce_table_index = bus_to_phb(tbl->it_busno);
107 tbl->it_base = (unsigned long)tce_table_kva[tce_table_index];
108 if (!tbl->it_base) {
109 printk(KERN_ERR "Calgary: iommu_table_setparms: "
110 "no table allocated?!\n");
111 ret = -ENOMEM;
112 goto done;
113 }
114
115 /*
116 * number of bytes needed for the bitmap size in number of
117 * entries; we need one bit per entry
118 */
119 bitmapsz = tbl->it_size / BITS_PER_BYTE;
120 bmppages = __get_free_pages(GFP_KERNEL, get_order(bitmapsz));
121 if (!bmppages) {
122 printk(KERN_ERR "Calgary: cannot allocate bitmap\n");
123 ret = -ENOMEM;
124 goto done;
125 }
126
127 tbl->it_map = (unsigned long*)bmppages;
128
129 memset(tbl->it_map, 0, bitmapsz);
130
131 tbl->it_hint = 0;
132
133 spin_lock_init(&tbl->it_lock);
134
135 return 0;
136
137done:
138 return ret;
139}
140
141int build_tce_table(struct pci_dev *dev, void __iomem *bbar)
142{
143 struct iommu_table *tbl;
144 int ret;
145
146 if (dev->sysdata) {
147 printk(KERN_ERR "Calgary: dev %p has sysdata %p\n",
148 dev, dev->sysdata);
149 BUG();
150 }
151
152 tbl = kzalloc(sizeof(struct iommu_table), GFP_KERNEL);
153 if (!tbl) {
154 printk(KERN_ERR "Calgary: error allocating iommu_table\n");
155 ret = -ENOMEM;
156 goto done;
157 }
158
159 ret = tce_table_setparms(dev, tbl);
160 if (ret)
161 goto free_tbl;
162
163 tce_free(tbl, 0, tbl->it_size);
164
165 tbl->bbar = bbar;
166
167 /*
168 * NUMA is already using the bus's sysdata pointer, so we use
169 * the bus's pci_dev's sysdata instead.
170 */
171 dev->sysdata = tbl;
172
173 return 0;
174
175free_tbl:
176 kfree(tbl);
177done:
178 return ret;
179}
180
181void* alloc_tce_table(void)
182{
183 unsigned int size;
184
185 size = table_size_to_number_of_entries(specified_table_size);
186 size *= TCE_ENTRY_SIZE;
187
188 return __alloc_bootmem_low(size, size, 0);
189}
190
191void free_tce_table(void *tbl)
192{
193 unsigned int size;
194
195 if (!tbl)
196 return;
197
198 size = table_size_to_number_of_entries(specified_table_size);
199 size *= TCE_ENTRY_SIZE;
200
201 free_bootmem(__pa(tbl), size);
202}
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 7392570f975d..ebbee6f59ff5 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -8,7 +8,7 @@
8 * Copyright (c) 1995 Markus Kuhn 8 * Copyright (c) 1995 Markus Kuhn
9 * Copyright (c) 1996 Ingo Molnar 9 * Copyright (c) 1996 Ingo Molnar
10 * Copyright (c) 1998 Andrea Arcangeli 10 * Copyright (c) 1998 Andrea Arcangeli
11 * Copyright (c) 2002 Vojtech Pavlik 11 * Copyright (c) 2002,2006 Vojtech Pavlik
12 * Copyright (c) 2003 Andi Kleen 12 * Copyright (c) 2003 Andi Kleen
13 * RTC support code taken from arch/i386/kernel/timers/time_hpet.c 13 * RTC support code taken from arch/i386/kernel/timers/time_hpet.c
14 */ 14 */
@@ -51,14 +51,21 @@ extern int using_apic_timer;
51static char *time_init_gtod(void); 51static char *time_init_gtod(void);
52 52
53DEFINE_SPINLOCK(rtc_lock); 53DEFINE_SPINLOCK(rtc_lock);
54EXPORT_SYMBOL(rtc_lock);
54DEFINE_SPINLOCK(i8253_lock); 55DEFINE_SPINLOCK(i8253_lock);
55 56
56int nohpet __initdata = 0; 57int nohpet __initdata = 0;
57static int notsc __initdata = 0; 58static int notsc __initdata = 0;
58 59
59#undef HPET_HACK_ENABLE_DANGEROUS 60#define USEC_PER_TICK (USEC_PER_SEC / HZ)
61#define NSEC_PER_TICK (NSEC_PER_SEC / HZ)
62#define FSEC_PER_TICK (FSEC_PER_SEC / HZ)
63
64#define NS_SCALE 10 /* 2^10, carefully chosen */
65#define US_SCALE 32 /* 2^32, arbitralrily chosen */
60 66
61unsigned int cpu_khz; /* TSC clocks / usec, not used here */ 67unsigned int cpu_khz; /* TSC clocks / usec, not used here */
68EXPORT_SYMBOL(cpu_khz);
62static unsigned long hpet_period; /* fsecs / HPET clock */ 69static unsigned long hpet_period; /* fsecs / HPET clock */
63unsigned long hpet_tick; /* HPET clocks / interrupt */ 70unsigned long hpet_tick; /* HPET clocks / interrupt */
64int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ 71int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */
@@ -90,7 +97,7 @@ static inline unsigned int do_gettimeoffset_tsc(void)
90 t = get_cycles_sync(); 97 t = get_cycles_sync();
91 if (t < vxtime.last_tsc) 98 if (t < vxtime.last_tsc)
92 t = vxtime.last_tsc; /* hack */ 99 t = vxtime.last_tsc; /* hack */
93 x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32; 100 x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE;
94 return x; 101 return x;
95} 102}
96 103
@@ -98,7 +105,7 @@ static inline unsigned int do_gettimeoffset_hpet(void)
98{ 105{
99 /* cap counter read to one tick to avoid inconsistencies */ 106 /* cap counter read to one tick to avoid inconsistencies */
100 unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last; 107 unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last;
101 return (min(counter,hpet_tick) * vxtime.quot) >> 32; 108 return (min(counter,hpet_tick) * vxtime.quot) >> US_SCALE;
102} 109}
103 110
104unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc; 111unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
@@ -118,7 +125,7 @@ void do_gettimeofday(struct timeval *tv)
118 seq = read_seqbegin(&xtime_lock); 125 seq = read_seqbegin(&xtime_lock);
119 126
120 sec = xtime.tv_sec; 127 sec = xtime.tv_sec;
121 usec = xtime.tv_nsec / 1000; 128 usec = xtime.tv_nsec / NSEC_PER_USEC;
122 129
123 /* i386 does some correction here to keep the clock 130 /* i386 does some correction here to keep the clock
124 monotonous even when ntpd is fixing drift. 131 monotonous even when ntpd is fixing drift.
@@ -129,14 +136,14 @@ void do_gettimeofday(struct timeval *tv)
129 in arch/x86_64/kernel/vsyscall.c and export all needed 136 in arch/x86_64/kernel/vsyscall.c and export all needed
130 variables in vmlinux.lds. -AK */ 137 variables in vmlinux.lds. -AK */
131 138
132 t = (jiffies - wall_jiffies) * (1000000L / HZ) + 139 t = (jiffies - wall_jiffies) * USEC_PER_TICK +
133 do_gettimeoffset(); 140 do_gettimeoffset();
134 usec += t; 141 usec += t;
135 142
136 } while (read_seqretry(&xtime_lock, seq)); 143 } while (read_seqretry(&xtime_lock, seq));
137 144
138 tv->tv_sec = sec + usec / 1000000; 145 tv->tv_sec = sec + usec / USEC_PER_SEC;
139 tv->tv_usec = usec % 1000000; 146 tv->tv_usec = usec % USEC_PER_SEC;
140} 147}
141 148
142EXPORT_SYMBOL(do_gettimeofday); 149EXPORT_SYMBOL(do_gettimeofday);
@@ -157,8 +164,8 @@ int do_settimeofday(struct timespec *tv)
157 164
158 write_seqlock_irq(&xtime_lock); 165 write_seqlock_irq(&xtime_lock);
159 166
160 nsec -= do_gettimeoffset() * 1000 + 167 nsec -= do_gettimeoffset() * NSEC_PER_USEC +
161 (jiffies - wall_jiffies) * (NSEC_PER_SEC/HZ); 168 (jiffies - wall_jiffies) * NSEC_PER_TICK;
162 169
163 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); 170 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
164 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); 171 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
@@ -288,7 +295,7 @@ unsigned long long monotonic_clock(void)
288 this_offset = hpet_readl(HPET_COUNTER); 295 this_offset = hpet_readl(HPET_COUNTER);
289 } while (read_seqretry(&xtime_lock, seq)); 296 } while (read_seqretry(&xtime_lock, seq));
290 offset = (this_offset - last_offset); 297 offset = (this_offset - last_offset);
291 offset *= (NSEC_PER_SEC/HZ) / hpet_tick; 298 offset *= NSEC_PER_TICK / hpet_tick;
292 } else { 299 } else {
293 do { 300 do {
294 seq = read_seqbegin(&xtime_lock); 301 seq = read_seqbegin(&xtime_lock);
@@ -297,7 +304,8 @@ unsigned long long monotonic_clock(void)
297 base = monotonic_base; 304 base = monotonic_base;
298 } while (read_seqretry(&xtime_lock, seq)); 305 } while (read_seqretry(&xtime_lock, seq));
299 this_offset = get_cycles_sync(); 306 this_offset = get_cycles_sync();
300 offset = (this_offset - last_offset)*1000 / cpu_khz; 307 /* FIXME: 1000 or 1000000? */
308 offset = (this_offset - last_offset)*1000 / cpu_khz;
301 } 309 }
302 return base + offset; 310 return base + offset;
303} 311}
@@ -382,7 +390,7 @@ void main_timer_handler(struct pt_regs *regs)
382 } 390 }
383 391
384 monotonic_base += 392 monotonic_base +=
385 (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick; 393 (offset - vxtime.last) * NSEC_PER_TICK / hpet_tick;
386 394
387 vxtime.last = offset; 395 vxtime.last = offset;
388#ifdef CONFIG_X86_PM_TIMER 396#ifdef CONFIG_X86_PM_TIMER
@@ -391,24 +399,25 @@ void main_timer_handler(struct pt_regs *regs)
391#endif 399#endif
392 } else { 400 } else {
393 offset = (((tsc - vxtime.last_tsc) * 401 offset = (((tsc - vxtime.last_tsc) *
394 vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ); 402 vxtime.tsc_quot) >> US_SCALE) - USEC_PER_TICK;
395 403
396 if (offset < 0) 404 if (offset < 0)
397 offset = 0; 405 offset = 0;
398 406
399 if (offset > (USEC_PER_SEC / HZ)) { 407 if (offset > USEC_PER_TICK) {
400 lost = offset / (USEC_PER_SEC / HZ); 408 lost = offset / USEC_PER_TICK;
401 offset %= (USEC_PER_SEC / HZ); 409 offset %= USEC_PER_TICK;
402 } 410 }
403 411
404 monotonic_base += (tsc - vxtime.last_tsc)*1000000/cpu_khz ; 412 /* FIXME: 1000 or 1000000? */
413 monotonic_base += (tsc - vxtime.last_tsc) * 1000000 / cpu_khz;
405 414
406 vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot; 415 vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
407 416
408 if ((((tsc - vxtime.last_tsc) * 417 if ((((tsc - vxtime.last_tsc) *
409 vxtime.tsc_quot) >> 32) < offset) 418 vxtime.tsc_quot) >> US_SCALE) < offset)
410 vxtime.last_tsc = tsc - 419 vxtime.last_tsc = tsc -
411 (((long) offset << 32) / vxtime.tsc_quot) - 1; 420 (((long) offset << US_SCALE) / vxtime.tsc_quot) - 1;
412 } 421 }
413 422
414 if (lost > 0) { 423 if (lost > 0) {
@@ -468,16 +477,15 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
468} 477}
469 478
470static unsigned int cyc2ns_scale __read_mostly; 479static unsigned int cyc2ns_scale __read_mostly;
471#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
472 480
473static inline void set_cyc2ns_scale(unsigned long cpu_khz) 481static inline void set_cyc2ns_scale(unsigned long cpu_khz)
474{ 482{
475 cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; 483 cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
476} 484}
477 485
478static inline unsigned long long cycles_2_ns(unsigned long long cyc) 486static inline unsigned long long cycles_2_ns(unsigned long long cyc)
479{ 487{
480 return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; 488 return (cyc * cyc2ns_scale) >> NS_SCALE;
481} 489}
482 490
483unsigned long long sched_clock(void) 491unsigned long long sched_clock(void)
@@ -490,7 +498,7 @@ unsigned long long sched_clock(void)
490 Disadvantage is a small drift between CPUs in some configurations, 498 Disadvantage is a small drift between CPUs in some configurations,
491 but that should be tolerable. */ 499 but that should be tolerable. */
492 if (__vxtime.mode == VXTIME_HPET) 500 if (__vxtime.mode == VXTIME_HPET)
493 return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> 32; 501 return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> US_SCALE;
494#endif 502#endif
495 503
496 /* Could do CPU core sync here. Opteron can execute rdtsc speculatively, 504 /* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
@@ -633,7 +641,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
633 641
634 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); 642 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
635 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 643 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
636 vxtime.tsc_quot = (1000L << 32) / cpu_khz; 644 vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
637 } 645 }
638 646
639 set_cyc2ns_scale(cpu_khz_ref); 647 set_cyc2ns_scale(cpu_khz_ref);
@@ -789,8 +797,8 @@ static int hpet_timer_stop_set_go(unsigned long tick)
789 if (hpet_use_timer) { 797 if (hpet_use_timer) {
790 hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | 798 hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
791 HPET_TN_32BIT, HPET_T0_CFG); 799 HPET_TN_32BIT, HPET_T0_CFG);
792 hpet_writel(hpet_tick, HPET_T0_CMP); 800 hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */
793 hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */ 801 hpet_writel(hpet_tick, HPET_T0_CMP); /* period */
794 cfg |= HPET_CFG_LEGACY; 802 cfg |= HPET_CFG_LEGACY;
795 } 803 }
796/* 804/*
@@ -825,8 +833,7 @@ static int hpet_init(void)
825 if (hpet_period < 100000 || hpet_period > 100000000) 833 if (hpet_period < 100000 || hpet_period > 100000000)
826 return -1; 834 return -1;
827 835
828 hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) / 836 hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period;
829 hpet_period;
830 837
831 hpet_use_timer = (id & HPET_ID_LEGSUP); 838 hpet_use_timer = (id & HPET_ID_LEGSUP);
832 839
@@ -890,18 +897,6 @@ void __init time_init(void)
890 char *timename; 897 char *timename;
891 char *gtod; 898 char *gtod;
892 899
893#ifdef HPET_HACK_ENABLE_DANGEROUS
894 if (!vxtime.hpet_address) {
895 printk(KERN_WARNING "time.c: WARNING: Enabling HPET base "
896 "manually!\n");
897 outl(0x800038a0, 0xcf8);
898 outl(0xff000001, 0xcfc);
899 outl(0x800038a0, 0xcf8);
900 vxtime.hpet_address = inl(0xcfc) & 0xfffffffe;
901 printk(KERN_WARNING "time.c: WARNING: Enabled HPET "
902 "at %#lx.\n", vxtime.hpet_address);
903 }
904#endif
905 if (nohpet) 900 if (nohpet)
906 vxtime.hpet_address = 0; 901 vxtime.hpet_address = 0;
907 902
@@ -912,7 +907,7 @@ void __init time_init(void)
912 -xtime.tv_sec, -xtime.tv_nsec); 907 -xtime.tv_sec, -xtime.tv_nsec);
913 908
914 if (!hpet_init()) 909 if (!hpet_init())
915 vxtime_hz = (1000000000000000L + hpet_period / 2) / hpet_period; 910 vxtime_hz = (FSEC_PER_SEC + hpet_period / 2) / hpet_period;
916 else 911 else
917 vxtime.hpet_address = 0; 912 vxtime.hpet_address = 0;
918 913
@@ -941,8 +936,8 @@ void __init time_init(void)
941 vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod); 936 vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod);
942 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", 937 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
943 cpu_khz / 1000, cpu_khz % 1000); 938 cpu_khz / 1000, cpu_khz % 1000);
944 vxtime.quot = (1000000L << 32) / vxtime_hz; 939 vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
945 vxtime.tsc_quot = (1000L << 32) / cpu_khz; 940 vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
946 vxtime.last_tsc = get_cycles_sync(); 941 vxtime.last_tsc = get_cycles_sync();
947 setup_irq(0, &irq0); 942 setup_irq(0, &irq0);
948 943
@@ -956,10 +951,10 @@ void __init time_init(void)
956__cpuinit int unsynchronized_tsc(void) 951__cpuinit int unsynchronized_tsc(void)
957{ 952{
958#ifdef CONFIG_SMP 953#ifdef CONFIG_SMP
959 if (oem_force_hpet_timer()) 954 if (apic_is_clustered_box())
960 return 1; 955 return 1;
961 /* Intel systems are normally all synchronized. Exceptions 956 /* Intel systems are normally all synchronized. Exceptions
962 are handled in the OEM check above. */ 957 are handled in the check above. */
963 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 958 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
964 return 0; 959 return 0;
965#endif 960#endif
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index cea335e8746c..3d11a2fe45b7 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -6,8 +6,6 @@
6 * 6 *
7 * Pentium III FXSR, SSE support 7 * Pentium III FXSR, SSE support
8 * Gareth Hughes <gareth@valinux.com>, May 2000 8 * Gareth Hughes <gareth@valinux.com>, May 2000
9 *
10 * $Id: traps.c,v 1.36 2002/03/24 11:09:10 ak Exp $
11 */ 9 */
12 10
13/* 11/*
@@ -31,6 +29,7 @@
31#include <linux/nmi.h> 29#include <linux/nmi.h>
32#include <linux/kprobes.h> 30#include <linux/kprobes.h>
33#include <linux/kexec.h> 31#include <linux/kexec.h>
32#include <linux/unwind.h>
34 33
35#include <asm/system.h> 34#include <asm/system.h>
36#include <asm/uaccess.h> 35#include <asm/uaccess.h>
@@ -41,7 +40,7 @@
41#include <asm/i387.h> 40#include <asm/i387.h>
42#include <asm/kdebug.h> 41#include <asm/kdebug.h>
43#include <asm/processor.h> 42#include <asm/processor.h>
44 43#include <asm/unwind.h>
45#include <asm/smp.h> 44#include <asm/smp.h>
46#include <asm/pgalloc.h> 45#include <asm/pgalloc.h>
47#include <asm/pda.h> 46#include <asm/pda.h>
@@ -71,6 +70,7 @@ asmlinkage void machine_check(void);
71asmlinkage void spurious_interrupt_bug(void); 70asmlinkage void spurious_interrupt_bug(void);
72 71
73ATOMIC_NOTIFIER_HEAD(die_chain); 72ATOMIC_NOTIFIER_HEAD(die_chain);
73EXPORT_SYMBOL(die_chain);
74 74
75int register_die_notifier(struct notifier_block *nb) 75int register_die_notifier(struct notifier_block *nb)
76{ 76{
@@ -107,7 +107,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
107 preempt_enable_no_resched(); 107 preempt_enable_no_resched();
108} 108}
109 109
110static int kstack_depth_to_print = 10; 110static int kstack_depth_to_print = 12;
111static int call_trace = 1;
111 112
112#ifdef CONFIG_KALLSYMS 113#ifdef CONFIG_KALLSYMS
113#include <linux/kallsyms.h> 114#include <linux/kallsyms.h>
@@ -191,6 +192,25 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
191 return NULL; 192 return NULL;
192} 193}
193 194
195static int show_trace_unwind(struct unwind_frame_info *info, void *context)
196{
197 int i = 11, n = 0;
198
199 while (unwind(info) == 0 && UNW_PC(info)) {
200 ++n;
201 if (i > 50) {
202 printk("\n ");
203 i = 7;
204 } else
205 i += printk(" ");
206 i += printk_address(UNW_PC(info));
207 if (arch_unw_user_mode(info))
208 break;
209 }
210 printk("\n");
211 return n;
212}
213
194/* 214/*
195 * x86-64 can have upto three kernel stacks: 215 * x86-64 can have upto three kernel stacks:
196 * process stack 216 * process stack
@@ -198,15 +218,39 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
198 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack 218 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
199 */ 219 */
200 220
201void show_trace(unsigned long *stack) 221void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack)
202{ 222{
203 const unsigned cpu = safe_smp_processor_id(); 223 const unsigned cpu = safe_smp_processor_id();
204 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; 224 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
205 int i; 225 int i = 11;
206 unsigned used = 0; 226 unsigned used = 0;
207 227
208 printk("\nCall Trace:"); 228 printk("\nCall Trace:");
209 229
230 if (!tsk)
231 tsk = current;
232
233 if (call_trace >= 0) {
234 int unw_ret = 0;
235 struct unwind_frame_info info;
236
237 if (regs) {
238 if (unwind_init_frame_info(&info, tsk, regs) == 0)
239 unw_ret = show_trace_unwind(&info, NULL);
240 } else if (tsk == current)
241 unw_ret = unwind_init_running(&info, show_trace_unwind, NULL);
242 else {
243 if (unwind_init_blocked(&info, tsk) == 0)
244 unw_ret = show_trace_unwind(&info, NULL);
245 }
246 if (unw_ret > 0) {
247 if (call_trace > 0)
248 return;
249 printk("Legacy call trace:");
250 i = 18;
251 }
252 }
253
210#define HANDLE_STACK(cond) \ 254#define HANDLE_STACK(cond) \
211 do while (cond) { \ 255 do while (cond) { \
212 unsigned long addr = *stack++; \ 256 unsigned long addr = *stack++; \
@@ -229,7 +273,7 @@ void show_trace(unsigned long *stack)
229 } \ 273 } \
230 } while (0) 274 } while (0)
231 275
232 for(i = 11; ; ) { 276 for(; ; ) {
233 const char *id; 277 const char *id;
234 unsigned long *estack_end; 278 unsigned long *estack_end;
235 estack_end = in_exception_stack(cpu, (unsigned long)stack, 279 estack_end = in_exception_stack(cpu, (unsigned long)stack,
@@ -264,7 +308,7 @@ void show_trace(unsigned long *stack)
264 printk("\n"); 308 printk("\n");
265} 309}
266 310
267void show_stack(struct task_struct *tsk, unsigned long * rsp) 311static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp)
268{ 312{
269 unsigned long *stack; 313 unsigned long *stack;
270 int i; 314 int i;
@@ -298,7 +342,12 @@ void show_stack(struct task_struct *tsk, unsigned long * rsp)
298 printk("%016lx ", *stack++); 342 printk("%016lx ", *stack++);
299 touch_nmi_watchdog(); 343 touch_nmi_watchdog();
300 } 344 }
301 show_trace((unsigned long *)rsp); 345 show_trace(tsk, regs, rsp);
346}
347
348void show_stack(struct task_struct *tsk, unsigned long * rsp)
349{
350 _show_stack(tsk, NULL, rsp);
302} 351}
303 352
304/* 353/*
@@ -307,7 +356,7 @@ void show_stack(struct task_struct *tsk, unsigned long * rsp)
307void dump_stack(void) 356void dump_stack(void)
308{ 357{
309 unsigned long dummy; 358 unsigned long dummy;
310 show_trace(&dummy); 359 show_trace(NULL, NULL, &dummy);
311} 360}
312 361
313EXPORT_SYMBOL(dump_stack); 362EXPORT_SYMBOL(dump_stack);
@@ -334,7 +383,7 @@ void show_registers(struct pt_regs *regs)
334 if (in_kernel) { 383 if (in_kernel) {
335 384
336 printk("Stack: "); 385 printk("Stack: ");
337 show_stack(NULL, (unsigned long*)rsp); 386 _show_stack(NULL, regs, (unsigned long*)rsp);
338 387
339 printk("\nCode: "); 388 printk("\nCode: ");
340 if (regs->rip < PAGE_OFFSET) 389 if (regs->rip < PAGE_OFFSET)
@@ -383,6 +432,7 @@ void out_of_line_bug(void)
383{ 432{
384 BUG(); 433 BUG();
385} 434}
435EXPORT_SYMBOL(out_of_line_bug);
386#endif 436#endif
387 437
388static DEFINE_SPINLOCK(die_lock); 438static DEFINE_SPINLOCK(die_lock);
@@ -1012,3 +1062,14 @@ static int __init kstack_setup(char *s)
1012} 1062}
1013__setup("kstack=", kstack_setup); 1063__setup("kstack=", kstack_setup);
1014 1064
1065static int __init call_trace_setup(char *s)
1066{
1067 if (strcmp(s, "old") == 0)
1068 call_trace = -1;
1069 else if (strcmp(s, "both") == 0)
1070 call_trace = 0;
1071 else if (strcmp(s, "new") == 0)
1072 call_trace = 1;
1073 return 1;
1074}
1075__setup("call_trace=", call_trace_setup);
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index b81f473c4a19..1c6a5f322919 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -45,6 +45,15 @@ SECTIONS
45 45
46 RODATA 46 RODATA
47 47
48#ifdef CONFIG_STACK_UNWIND
49 . = ALIGN(8);
50 .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
51 __start_unwind = .;
52 *(.eh_frame)
53 __end_unwind = .;
54 }
55#endif
56
48 /* Data */ 57 /* Data */
49 .data : AT(ADDR(.data) - LOAD_OFFSET) { 58 .data : AT(ADDR(.data) - LOAD_OFFSET) {
50 *(.data) 59 *(.data)
@@ -131,6 +140,26 @@ SECTIONS
131 *(.data.page_aligned) 140 *(.data.page_aligned)
132 } 141 }
133 142
143 /* might get freed after init */
144 . = ALIGN(4096);
145 __smp_alt_begin = .;
146 __smp_alt_instructions = .;
147 .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) {
148 *(.smp_altinstructions)
149 }
150 __smp_alt_instructions_end = .;
151 . = ALIGN(8);
152 __smp_locks = .;
153 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
154 *(.smp_locks)
155 }
156 __smp_locks_end = .;
157 .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) {
158 *(.smp_altinstr_replacement)
159 }
160 . = ALIGN(4096);
161 __smp_alt_end = .;
162
134 . = ALIGN(4096); /* Init code and data */ 163 . = ALIGN(4096); /* Init code and data */
135 __init_begin = .; 164 __init_begin = .;
136 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { 165 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index 9468fb20b0bc..f603037df162 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -107,7 +107,7 @@ static __always_inline long time_syscall(long *t)
107 107
108int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) 108int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
109{ 109{
110 if (unlikely(!__sysctl_vsyscall)) 110 if (!__sysctl_vsyscall)
111 return gettimeofday(tv,tz); 111 return gettimeofday(tv,tz);
112 if (tv) 112 if (tv)
113 do_vgettimeofday(tv); 113 do_vgettimeofday(tv);
@@ -120,7 +120,7 @@ int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
120 * unlikely */ 120 * unlikely */
121time_t __vsyscall(1) vtime(time_t *t) 121time_t __vsyscall(1) vtime(time_t *t)
122{ 122{
123 if (unlikely(!__sysctl_vsyscall)) 123 if (!__sysctl_vsyscall)
124 return time_syscall(t); 124 return time_syscall(t);
125 else if (t) 125 else if (t)
126 *t = __xtime.tv_sec; 126 *t = __xtime.tv_sec;
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c
index 1def21c9f7cd..370952c4ff22 100644
--- a/arch/x86_64/kernel/x8664_ksyms.c
+++ b/arch/x86_64/kernel/x8664_ksyms.c
@@ -1,66 +1,21 @@
1/* Exports for assembly files.
2 All C exports should go in the respective C files. */
3
1#include <linux/config.h> 4#include <linux/config.h>
2#include <linux/module.h> 5#include <linux/module.h>
3#include <linux/smp.h> 6#include <linux/smp.h>
4#include <linux/user.h>
5#include <linux/sched.h>
6#include <linux/in6.h>
7#include <linux/interrupt.h>
8#include <linux/smp_lock.h>
9#include <linux/pm.h>
10#include <linux/pci.h>
11#include <linux/apm_bios.h>
12#include <linux/kernel.h>
13#include <linux/string.h>
14#include <linux/syscalls.h>
15#include <linux/tty.h>
16 7
17#include <asm/semaphore.h> 8#include <asm/semaphore.h>
18#include <asm/processor.h> 9#include <asm/processor.h>
19#include <asm/i387.h>
20#include <asm/uaccess.h> 10#include <asm/uaccess.h>
21#include <asm/checksum.h>
22#include <asm/io.h>
23#include <asm/delay.h>
24#include <asm/irq.h>
25#include <asm/mmx.h>
26#include <asm/desc.h>
27#include <asm/pgtable.h> 11#include <asm/pgtable.h>
28#include <asm/pgalloc.h>
29#include <asm/nmi.h>
30#include <asm/kdebug.h>
31#include <asm/unistd.h>
32#include <asm/tlbflush.h>
33#include <asm/kdebug.h>
34
35extern spinlock_t rtc_lock;
36 12
37#ifdef CONFIG_SMP
38extern void __write_lock_failed(rwlock_t *rw);
39extern void __read_lock_failed(rwlock_t *rw);
40#endif
41
42/* platform dependent support */
43EXPORT_SYMBOL(boot_cpu_data);
44//EXPORT_SYMBOL(dump_fpu);
45EXPORT_SYMBOL(__ioremap);
46EXPORT_SYMBOL(ioremap_nocache);
47EXPORT_SYMBOL(iounmap);
48EXPORT_SYMBOL(kernel_thread); 13EXPORT_SYMBOL(kernel_thread);
49EXPORT_SYMBOL(pm_idle);
50EXPORT_SYMBOL(pm_power_off);
51 14
52EXPORT_SYMBOL(__down_failed); 15EXPORT_SYMBOL(__down_failed);
53EXPORT_SYMBOL(__down_failed_interruptible); 16EXPORT_SYMBOL(__down_failed_interruptible);
54EXPORT_SYMBOL(__down_failed_trylock); 17EXPORT_SYMBOL(__down_failed_trylock);
55EXPORT_SYMBOL(__up_wakeup); 18EXPORT_SYMBOL(__up_wakeup);
56/* Networking helper routines. */
57EXPORT_SYMBOL(csum_partial_copy_nocheck);
58EXPORT_SYMBOL(ip_compute_csum);
59/* Delay loops */
60EXPORT_SYMBOL(__udelay);
61EXPORT_SYMBOL(__ndelay);
62EXPORT_SYMBOL(__delay);
63EXPORT_SYMBOL(__const_udelay);
64 19
65EXPORT_SYMBOL(__get_user_1); 20EXPORT_SYMBOL(__get_user_1);
66EXPORT_SYMBOL(__get_user_2); 21EXPORT_SYMBOL(__get_user_2);
@@ -71,42 +26,20 @@ EXPORT_SYMBOL(__put_user_2);
71EXPORT_SYMBOL(__put_user_4); 26EXPORT_SYMBOL(__put_user_4);
72EXPORT_SYMBOL(__put_user_8); 27EXPORT_SYMBOL(__put_user_8);
73 28
74EXPORT_SYMBOL(strncpy_from_user);
75EXPORT_SYMBOL(__strncpy_from_user);
76EXPORT_SYMBOL(clear_user);
77EXPORT_SYMBOL(__clear_user);
78EXPORT_SYMBOL(copy_user_generic); 29EXPORT_SYMBOL(copy_user_generic);
79EXPORT_SYMBOL(copy_from_user); 30EXPORT_SYMBOL(copy_from_user);
80EXPORT_SYMBOL(copy_to_user); 31EXPORT_SYMBOL(copy_to_user);
81EXPORT_SYMBOL(copy_in_user);
82EXPORT_SYMBOL(strnlen_user);
83
84#ifdef CONFIG_PCI
85EXPORT_SYMBOL(pci_mem_start);
86#endif
87 32
88EXPORT_SYMBOL(copy_page); 33EXPORT_SYMBOL(copy_page);
89EXPORT_SYMBOL(clear_page); 34EXPORT_SYMBOL(clear_page);
90 35
91EXPORT_SYMBOL(_cpu_pda);
92#ifdef CONFIG_SMP 36#ifdef CONFIG_SMP
93EXPORT_SYMBOL(cpu_data); 37extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
38extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
94EXPORT_SYMBOL(__write_lock_failed); 39EXPORT_SYMBOL(__write_lock_failed);
95EXPORT_SYMBOL(__read_lock_failed); 40EXPORT_SYMBOL(__read_lock_failed);
96
97EXPORT_SYMBOL(smp_call_function);
98EXPORT_SYMBOL(cpu_callout_map);
99#endif
100
101#ifdef CONFIG_VT
102EXPORT_SYMBOL(screen_info);
103#endif 41#endif
104 42
105EXPORT_SYMBOL(rtc_lock);
106
107EXPORT_SYMBOL_GPL(set_nmi_callback);
108EXPORT_SYMBOL_GPL(unset_nmi_callback);
109
110/* Export string functions. We normally rely on gcc builtin for most of these, 43/* Export string functions. We normally rely on gcc builtin for most of these,
111 but gcc sometimes decides not to inline them. */ 44 but gcc sometimes decides not to inline them. */
112#undef memcpy 45#undef memcpy
@@ -114,51 +47,14 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback);
114#undef memmove 47#undef memmove
115 48
116extern void * memset(void *,int,__kernel_size_t); 49extern void * memset(void *,int,__kernel_size_t);
117extern size_t strlen(const char *);
118extern void * memmove(void * dest,const void *src,size_t count);
119extern void * memcpy(void *,const void *,__kernel_size_t); 50extern void * memcpy(void *,const void *,__kernel_size_t);
120extern void * __memcpy(void *,const void *,__kernel_size_t); 51extern void * __memcpy(void *,const void *,__kernel_size_t);
121 52
122EXPORT_SYMBOL(memset); 53EXPORT_SYMBOL(memset);
123EXPORT_SYMBOL(memmove);
124EXPORT_SYMBOL(memcpy); 54EXPORT_SYMBOL(memcpy);
125EXPORT_SYMBOL(__memcpy); 55EXPORT_SYMBOL(__memcpy);
126 56
127#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
128/* prototypes are wrong, these are assembly with custom calling functions */
129extern void rwsem_down_read_failed_thunk(void);
130extern void rwsem_wake_thunk(void);
131extern void rwsem_downgrade_thunk(void);
132extern void rwsem_down_write_failed_thunk(void);
133EXPORT_SYMBOL(rwsem_down_read_failed_thunk);
134EXPORT_SYMBOL(rwsem_wake_thunk);
135EXPORT_SYMBOL(rwsem_downgrade_thunk);
136EXPORT_SYMBOL(rwsem_down_write_failed_thunk);
137#endif
138
139EXPORT_SYMBOL(empty_zero_page); 57EXPORT_SYMBOL(empty_zero_page);
140
141EXPORT_SYMBOL(die_chain);
142
143#ifdef CONFIG_SMP
144EXPORT_SYMBOL(cpu_sibling_map);
145EXPORT_SYMBOL(smp_num_siblings);
146#endif
147
148#ifdef CONFIG_BUG
149EXPORT_SYMBOL(out_of_line_bug);
150#endif
151
152EXPORT_SYMBOL(init_level4_pgt); 58EXPORT_SYMBOL(init_level4_pgt);
153
154extern unsigned long __supported_pte_mask;
155EXPORT_SYMBOL(__supported_pte_mask);
156
157#ifdef CONFIG_SMP
158EXPORT_SYMBOL(flush_tlb_page);
159#endif
160
161EXPORT_SYMBOL(cpu_khz);
162
163EXPORT_SYMBOL(load_gs_index); 59EXPORT_SYMBOL(load_gs_index);
164 60