diff options
Diffstat (limited to 'arch/x86_64/kernel')
34 files changed, 2312 insertions, 741 deletions
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 059c88313f4e..aeb9c560be88 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile | |||
@@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \ | |||
8 | ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ | 8 | ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ |
9 | x8664_ksyms.o i387.o syscall.o vsyscall.o \ | 9 | x8664_ksyms.o i387.o syscall.o vsyscall.o \ |
10 | setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ | 10 | setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ |
11 | pci-dma.o pci-nommu.o | 11 | pci-dma.o pci-nommu.o alternative.o |
12 | 12 | ||
13 | obj-$(CONFIG_X86_MCE) += mce.o | 13 | obj-$(CONFIG_X86_MCE) += mce.o |
14 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o | 14 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o |
@@ -28,11 +28,13 @@ obj-$(CONFIG_PM) += suspend.o | |||
28 | obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o | 28 | obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o |
29 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ | 29 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ |
30 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | 30 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o |
31 | obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o | 31 | obj-$(CONFIG_IOMMU) += pci-gart.o aperture.o |
32 | obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary.o tce.o | ||
32 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o | 33 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o |
33 | obj-$(CONFIG_KPROBES) += kprobes.o | 34 | obj-$(CONFIG_KPROBES) += kprobes.o |
34 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o | 35 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o |
35 | obj-$(CONFIG_X86_VSMP) += vsmp.o | 36 | obj-$(CONFIG_X86_VSMP) += vsmp.o |
37 | obj-$(CONFIG_K8_NB) += k8.o | ||
36 | 38 | ||
37 | obj-$(CONFIG_MODULES) += module.o | 39 | obj-$(CONFIG_MODULES) += module.o |
38 | 40 | ||
@@ -49,3 +51,5 @@ intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o | |||
49 | quirks-y += ../../i386/kernel/quirks.o | 51 | quirks-y += ../../i386/kernel/quirks.o |
50 | i8237-y += ../../i386/kernel/i8237.o | 52 | i8237-y += ../../i386/kernel/i8237.o |
51 | msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o | 53 | msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o |
54 | alternative-y += ../../i386/kernel/alternative.o | ||
55 | |||
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index 70b9d21ed675..a195ef06ec55 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c | |||
@@ -8,7 +8,6 @@ | |||
8 | * because only the bootmem allocator can allocate 32+MB. | 8 | * because only the bootmem allocator can allocate 32+MB. |
9 | * | 9 | * |
10 | * Copyright 2002 Andi Kleen, SuSE Labs. | 10 | * Copyright 2002 Andi Kleen, SuSE Labs. |
11 | * $Id: aperture.c,v 1.7 2003/08/01 03:36:18 ak Exp $ | ||
12 | */ | 11 | */ |
13 | #include <linux/config.h> | 12 | #include <linux/config.h> |
14 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
@@ -24,6 +23,7 @@ | |||
24 | #include <asm/proto.h> | 23 | #include <asm/proto.h> |
25 | #include <asm/pci-direct.h> | 24 | #include <asm/pci-direct.h> |
26 | #include <asm/dma.h> | 25 | #include <asm/dma.h> |
26 | #include <asm/k8.h> | ||
27 | 27 | ||
28 | int iommu_aperture; | 28 | int iommu_aperture; |
29 | int iommu_aperture_disabled __initdata = 0; | 29 | int iommu_aperture_disabled __initdata = 0; |
@@ -37,8 +37,6 @@ int fix_aperture __initdata = 1; | |||
37 | /* This code runs before the PCI subsystem is initialized, so just | 37 | /* This code runs before the PCI subsystem is initialized, so just |
38 | access the northbridge directly. */ | 38 | access the northbridge directly. */ |
39 | 39 | ||
40 | #define NB_ID_3 (PCI_VENDOR_ID_AMD | (0x1103<<16)) | ||
41 | |||
42 | static u32 __init allocate_aperture(void) | 40 | static u32 __init allocate_aperture(void) |
43 | { | 41 | { |
44 | pg_data_t *nd0 = NODE_DATA(0); | 42 | pg_data_t *nd0 = NODE_DATA(0); |
@@ -68,20 +66,20 @@ static u32 __init allocate_aperture(void) | |||
68 | return (u32)__pa(p); | 66 | return (u32)__pa(p); |
69 | } | 67 | } |
70 | 68 | ||
71 | static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size) | 69 | static int __init aperture_valid(u64 aper_base, u32 aper_size) |
72 | { | 70 | { |
73 | if (!aper_base) | 71 | if (!aper_base) |
74 | return 0; | 72 | return 0; |
75 | if (aper_size < 64*1024*1024) { | 73 | if (aper_size < 64*1024*1024) { |
76 | printk("Aperture from %s too small (%d MB)\n", name, aper_size>>20); | 74 | printk("Aperture too small (%d MB)\n", aper_size>>20); |
77 | return 0; | 75 | return 0; |
78 | } | 76 | } |
79 | if (aper_base + aper_size >= 0xffffffff) { | 77 | if (aper_base + aper_size >= 0xffffffff) { |
80 | printk("Aperture from %s beyond 4GB. Ignoring.\n",name); | 78 | printk("Aperture beyond 4GB. Ignoring.\n"); |
81 | return 0; | 79 | return 0; |
82 | } | 80 | } |
83 | if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { | 81 | if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { |
84 | printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name); | 82 | printk("Aperture pointing to e820 RAM. Ignoring.\n"); |
85 | return 0; | 83 | return 0; |
86 | } | 84 | } |
87 | return 1; | 85 | return 1; |
@@ -140,7 +138,7 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) | |||
140 | printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", | 138 | printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", |
141 | aper, 32 << *order, apsizereg); | 139 | aper, 32 << *order, apsizereg); |
142 | 140 | ||
143 | if (!aperture_valid("AGP bridge", aper, (32*1024*1024) << *order)) | 141 | if (!aperture_valid(aper, (32*1024*1024) << *order)) |
144 | return 0; | 142 | return 0; |
145 | return (u32)aper; | 143 | return (u32)aper; |
146 | } | 144 | } |
@@ -208,10 +206,10 @@ void __init iommu_hole_init(void) | |||
208 | 206 | ||
209 | fix = 0; | 207 | fix = 0; |
210 | for (num = 24; num < 32; num++) { | 208 | for (num = 24; num < 32; num++) { |
211 | char name[30]; | 209 | if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) |
212 | if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) | 210 | continue; |
213 | continue; | ||
214 | 211 | ||
212 | iommu_detected = 1; | ||
215 | iommu_aperture = 1; | 213 | iommu_aperture = 1; |
216 | 214 | ||
217 | aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; | 215 | aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; |
@@ -222,9 +220,7 @@ void __init iommu_hole_init(void) | |||
222 | printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, | 220 | printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, |
223 | aper_base, aper_size>>20); | 221 | aper_base, aper_size>>20); |
224 | 222 | ||
225 | sprintf(name, "northbridge cpu %d", num-24); | 223 | if (!aperture_valid(aper_base, aper_size)) { |
226 | |||
227 | if (!aperture_valid(name, aper_base, aper_size)) { | ||
228 | fix = 1; | 224 | fix = 1; |
229 | break; | 225 | break; |
230 | } | 226 | } |
@@ -273,7 +269,7 @@ void __init iommu_hole_init(void) | |||
273 | 269 | ||
274 | /* Fix up the north bridges */ | 270 | /* Fix up the north bridges */ |
275 | for (num = 24; num < 32; num++) { | 271 | for (num = 24; num < 32; num++) { |
276 | if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) | 272 | if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) |
277 | continue; | 273 | continue; |
278 | 274 | ||
279 | /* Don't enable translation yet. That is done later. | 275 | /* Don't enable translation yet. That is done later. |
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 29ef99001e05..b2ead91df218 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c | |||
@@ -100,7 +100,7 @@ void clear_local_APIC(void) | |||
100 | maxlvt = get_maxlvt(); | 100 | maxlvt = get_maxlvt(); |
101 | 101 | ||
102 | /* | 102 | /* |
103 | * Masking an LVT entry on a P6 can trigger a local APIC error | 103 | * Masking an LVT entry can trigger a local APIC error |
104 | * if the vector is zero. Mask LVTERR first to prevent this. | 104 | * if the vector is zero. Mask LVTERR first to prevent this. |
105 | */ | 105 | */ |
106 | if (maxlvt >= 3) { | 106 | if (maxlvt >= 3) { |
@@ -851,7 +851,18 @@ void disable_APIC_timer(void) | |||
851 | unsigned long v; | 851 | unsigned long v; |
852 | 852 | ||
853 | v = apic_read(APIC_LVTT); | 853 | v = apic_read(APIC_LVTT); |
854 | apic_write(APIC_LVTT, v | APIC_LVT_MASKED); | 854 | /* |
855 | * When an illegal vector value (0-15) is written to an LVT | ||
856 | * entry and delivery mode is Fixed, the APIC may signal an | ||
857 | * illegal vector error, with out regard to whether the mask | ||
858 | * bit is set or whether an interrupt is actually seen on input. | ||
859 | * | ||
860 | * Boot sequence might call this function when the LVTT has | ||
861 | * '0' vector value. So make sure vector field is set to | ||
862 | * valid value. | ||
863 | */ | ||
864 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | ||
865 | apic_write(APIC_LVTT, v); | ||
855 | } | 866 | } |
856 | } | 867 | } |
857 | 868 | ||
@@ -909,15 +920,13 @@ int setup_profiling_timer(unsigned int multiplier) | |||
909 | return -EINVAL; | 920 | return -EINVAL; |
910 | } | 921 | } |
911 | 922 | ||
912 | #ifdef CONFIG_X86_MCE_AMD | 923 | void setup_APIC_extened_lvt(unsigned char lvt_off, unsigned char vector, |
913 | void setup_threshold_lvt(unsigned long lvt_off) | 924 | unsigned char msg_type, unsigned char mask) |
914 | { | 925 | { |
915 | unsigned int v = 0; | 926 | unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE; |
916 | unsigned long reg = (lvt_off << 4) + 0x500; | 927 | unsigned int v = (mask << 16) | (msg_type << 8) | vector; |
917 | v |= THRESHOLD_APIC_VECTOR; | ||
918 | apic_write(reg, v); | 928 | apic_write(reg, v); |
919 | } | 929 | } |
920 | #endif /* CONFIG_X86_MCE_AMD */ | ||
921 | 930 | ||
922 | #undef APIC_DIVISOR | 931 | #undef APIC_DIVISOR |
923 | 932 | ||
@@ -983,7 +992,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) | |||
983 | } | 992 | } |
984 | 993 | ||
985 | /* | 994 | /* |
986 | * oem_force_hpet_timer -- force HPET mode for some boxes. | 995 | * apic_is_clustered_box() -- Check if we can expect good TSC |
987 | * | 996 | * |
988 | * Thus far, the major user of this is IBM's Summit2 series: | 997 | * Thus far, the major user of this is IBM's Summit2 series: |
989 | * | 998 | * |
@@ -991,7 +1000,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) | |||
991 | * multi-chassis. Use available data to take a good guess. | 1000 | * multi-chassis. Use available data to take a good guess. |
992 | * If in doubt, go HPET. | 1001 | * If in doubt, go HPET. |
993 | */ | 1002 | */ |
994 | __cpuinit int oem_force_hpet_timer(void) | 1003 | __cpuinit int apic_is_clustered_box(void) |
995 | { | 1004 | { |
996 | int i, clusters, zeros; | 1005 | int i, clusters, zeros; |
997 | unsigned id; | 1006 | unsigned id; |
@@ -1022,8 +1031,7 @@ __cpuinit int oem_force_hpet_timer(void) | |||
1022 | } | 1031 | } |
1023 | 1032 | ||
1024 | /* | 1033 | /* |
1025 | * If clusters > 2, then should be multi-chassis. Return 1 for HPET. | 1034 | * If clusters > 2, then should be multi-chassis. |
1026 | * Else return 0 to use TSC. | ||
1027 | * May have to revisit this when multi-core + hyperthreaded CPUs come | 1035 | * May have to revisit this when multi-core + hyperthreaded CPUs come |
1028 | * out, but AFAIK this will work even for them. | 1036 | * out, but AFAIK this will work even for them. |
1029 | */ | 1037 | */ |
diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c index 4e6c3b729e39..8ca04912b1cc 100644 --- a/arch/x86_64/kernel/crash.c +++ b/arch/x86_64/kernel/crash.c | |||
@@ -111,14 +111,14 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu) | |||
111 | atomic_dec(&waiting_for_crash_ipi); | 111 | atomic_dec(&waiting_for_crash_ipi); |
112 | /* Assume hlt works */ | 112 | /* Assume hlt works */ |
113 | for(;;) | 113 | for(;;) |
114 | asm("hlt"); | 114 | halt(); |
115 | 115 | ||
116 | return 1; | 116 | return 1; |
117 | } | 117 | } |
118 | 118 | ||
119 | static void smp_send_nmi_allbutself(void) | 119 | static void smp_send_nmi_allbutself(void) |
120 | { | 120 | { |
121 | send_IPI_allbutself(APIC_DM_NMI); | 121 | send_IPI_allbutself(NMI_VECTOR); |
122 | } | 122 | } |
123 | 123 | ||
124 | /* | 124 | /* |
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index 1ef6028f721e..9e94d834624b 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c | |||
@@ -1,7 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Handle the memory map. | 2 | * Handle the memory map. |
3 | * The functions here do the job until bootmem takes over. | 3 | * The functions here do the job until bootmem takes over. |
4 | * $Id: e820.c,v 1.4 2002/09/19 19:25:32 ak Exp $ | ||
5 | * | 4 | * |
6 | * Getting sanitize_e820_map() in sync with i386 version by applying change: | 5 | * Getting sanitize_e820_map() in sync with i386 version by applying change: |
7 | * - Provisions for empty E820 memory regions (reported by certain BIOSes). | 6 | * - Provisions for empty E820 memory regions (reported by certain BIOSes). |
@@ -621,6 +620,7 @@ void __init parse_memmapopt(char *p, char **from) | |||
621 | } | 620 | } |
622 | 621 | ||
623 | unsigned long pci_mem_start = 0xaeedbabe; | 622 | unsigned long pci_mem_start = 0xaeedbabe; |
623 | EXPORT_SYMBOL(pci_mem_start); | ||
624 | 624 | ||
625 | /* | 625 | /* |
626 | * Search for the biggest gap in the low 32 bits of the e820 | 626 | * Search for the biggest gap in the low 32 bits of the e820 |
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 586b34c00c48..7290e72b9a34 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S | |||
@@ -154,6 +154,7 @@ rff_trace: | |||
154 | GET_THREAD_INFO(%rcx) | 154 | GET_THREAD_INFO(%rcx) |
155 | jmp rff_action | 155 | jmp rff_action |
156 | CFI_ENDPROC | 156 | CFI_ENDPROC |
157 | END(ret_from_fork) | ||
157 | 158 | ||
158 | /* | 159 | /* |
159 | * System call entry. Upto 6 arguments in registers are supported. | 160 | * System call entry. Upto 6 arguments in registers are supported. |
@@ -188,7 +189,7 @@ rff_trace: | |||
188 | 189 | ||
189 | ENTRY(system_call) | 190 | ENTRY(system_call) |
190 | CFI_STARTPROC simple | 191 | CFI_STARTPROC simple |
191 | CFI_DEF_CFA rsp,0 | 192 | CFI_DEF_CFA rsp,PDA_STACKOFFSET |
192 | CFI_REGISTER rip,rcx | 193 | CFI_REGISTER rip,rcx |
193 | /*CFI_REGISTER rflags,r11*/ | 194 | /*CFI_REGISTER rflags,r11*/ |
194 | swapgs | 195 | swapgs |
@@ -285,6 +286,7 @@ tracesys: | |||
285 | /* Use IRET because user could have changed frame */ | 286 | /* Use IRET because user could have changed frame */ |
286 | jmp int_ret_from_sys_call | 287 | jmp int_ret_from_sys_call |
287 | CFI_ENDPROC | 288 | CFI_ENDPROC |
289 | END(system_call) | ||
288 | 290 | ||
289 | /* | 291 | /* |
290 | * Syscall return path ending with IRET. | 292 | * Syscall return path ending with IRET. |
@@ -364,6 +366,7 @@ int_restore_rest: | |||
364 | cli | 366 | cli |
365 | jmp int_with_check | 367 | jmp int_with_check |
366 | CFI_ENDPROC | 368 | CFI_ENDPROC |
369 | END(int_ret_from_sys_call) | ||
367 | 370 | ||
368 | /* | 371 | /* |
369 | * Certain special system calls that need to save a complete full stack frame. | 372 | * Certain special system calls that need to save a complete full stack frame. |
@@ -375,6 +378,7 @@ int_restore_rest: | |||
375 | leaq \func(%rip),%rax | 378 | leaq \func(%rip),%rax |
376 | leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ | 379 | leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ |
377 | jmp ptregscall_common | 380 | jmp ptregscall_common |
381 | END(\label) | ||
378 | .endm | 382 | .endm |
379 | 383 | ||
380 | CFI_STARTPROC | 384 | CFI_STARTPROC |
@@ -404,6 +408,7 @@ ENTRY(ptregscall_common) | |||
404 | CFI_REL_OFFSET rip, 0 | 408 | CFI_REL_OFFSET rip, 0 |
405 | ret | 409 | ret |
406 | CFI_ENDPROC | 410 | CFI_ENDPROC |
411 | END(ptregscall_common) | ||
407 | 412 | ||
408 | ENTRY(stub_execve) | 413 | ENTRY(stub_execve) |
409 | CFI_STARTPROC | 414 | CFI_STARTPROC |
@@ -418,6 +423,7 @@ ENTRY(stub_execve) | |||
418 | RESTORE_REST | 423 | RESTORE_REST |
419 | jmp int_ret_from_sys_call | 424 | jmp int_ret_from_sys_call |
420 | CFI_ENDPROC | 425 | CFI_ENDPROC |
426 | END(stub_execve) | ||
421 | 427 | ||
422 | /* | 428 | /* |
423 | * sigreturn is special because it needs to restore all registers on return. | 429 | * sigreturn is special because it needs to restore all registers on return. |
@@ -435,6 +441,7 @@ ENTRY(stub_rt_sigreturn) | |||
435 | RESTORE_REST | 441 | RESTORE_REST |
436 | jmp int_ret_from_sys_call | 442 | jmp int_ret_from_sys_call |
437 | CFI_ENDPROC | 443 | CFI_ENDPROC |
444 | END(stub_rt_sigreturn) | ||
438 | 445 | ||
439 | /* | 446 | /* |
440 | * initial frame state for interrupts and exceptions | 447 | * initial frame state for interrupts and exceptions |
@@ -466,29 +473,18 @@ ENTRY(stub_rt_sigreturn) | |||
466 | /* 0(%rsp): interrupt number */ | 473 | /* 0(%rsp): interrupt number */ |
467 | .macro interrupt func | 474 | .macro interrupt func |
468 | cld | 475 | cld |
469 | #ifdef CONFIG_DEBUG_INFO | ||
470 | SAVE_ALL | ||
471 | movq %rsp,%rdi | ||
472 | /* | ||
473 | * Setup a stack frame pointer. This allows gdb to trace | ||
474 | * back to the original stack. | ||
475 | */ | ||
476 | movq %rsp,%rbp | ||
477 | CFI_DEF_CFA_REGISTER rbp | ||
478 | #else | ||
479 | SAVE_ARGS | 476 | SAVE_ARGS |
480 | leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler | 477 | leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler |
481 | #endif | 478 | pushq %rbp |
479 | CFI_ADJUST_CFA_OFFSET 8 | ||
480 | CFI_REL_OFFSET rbp, 0 | ||
481 | movq %rsp,%rbp | ||
482 | CFI_DEF_CFA_REGISTER rbp | ||
482 | testl $3,CS(%rdi) | 483 | testl $3,CS(%rdi) |
483 | je 1f | 484 | je 1f |
484 | swapgs | 485 | swapgs |
485 | 1: incl %gs:pda_irqcount # RED-PEN should check preempt count | 486 | 1: incl %gs:pda_irqcount # RED-PEN should check preempt count |
486 | movq %gs:pda_irqstackptr,%rax | 487 | cmoveq %gs:pda_irqstackptr,%rsp |
487 | cmoveq %rax,%rsp /*todo This needs CFI annotation! */ | ||
488 | pushq %rdi # save old stack | ||
489 | #ifndef CONFIG_DEBUG_INFO | ||
490 | CFI_ADJUST_CFA_OFFSET 8 | ||
491 | #endif | ||
492 | call \func | 488 | call \func |
493 | .endm | 489 | .endm |
494 | 490 | ||
@@ -497,17 +493,11 @@ ENTRY(common_interrupt) | |||
497 | interrupt do_IRQ | 493 | interrupt do_IRQ |
498 | /* 0(%rsp): oldrsp-ARGOFFSET */ | 494 | /* 0(%rsp): oldrsp-ARGOFFSET */ |
499 | ret_from_intr: | 495 | ret_from_intr: |
500 | popq %rdi | ||
501 | #ifndef CONFIG_DEBUG_INFO | ||
502 | CFI_ADJUST_CFA_OFFSET -8 | ||
503 | #endif | ||
504 | cli | 496 | cli |
505 | decl %gs:pda_irqcount | 497 | decl %gs:pda_irqcount |
506 | #ifdef CONFIG_DEBUG_INFO | 498 | leaveq |
507 | movq RBP(%rdi),%rbp | ||
508 | CFI_DEF_CFA_REGISTER rsp | 499 | CFI_DEF_CFA_REGISTER rsp |
509 | #endif | 500 | CFI_ADJUST_CFA_OFFSET -8 |
510 | leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */ | ||
511 | exit_intr: | 501 | exit_intr: |
512 | GET_THREAD_INFO(%rcx) | 502 | GET_THREAD_INFO(%rcx) |
513 | testl $3,CS-ARGOFFSET(%rsp) | 503 | testl $3,CS-ARGOFFSET(%rsp) |
@@ -589,7 +579,9 @@ retint_kernel: | |||
589 | call preempt_schedule_irq | 579 | call preempt_schedule_irq |
590 | jmp exit_intr | 580 | jmp exit_intr |
591 | #endif | 581 | #endif |
582 | |||
592 | CFI_ENDPROC | 583 | CFI_ENDPROC |
584 | END(common_interrupt) | ||
593 | 585 | ||
594 | /* | 586 | /* |
595 | * APIC interrupts. | 587 | * APIC interrupts. |
@@ -605,17 +597,21 @@ retint_kernel: | |||
605 | 597 | ||
606 | ENTRY(thermal_interrupt) | 598 | ENTRY(thermal_interrupt) |
607 | apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt | 599 | apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt |
600 | END(thermal_interrupt) | ||
608 | 601 | ||
609 | ENTRY(threshold_interrupt) | 602 | ENTRY(threshold_interrupt) |
610 | apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt | 603 | apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt |
604 | END(threshold_interrupt) | ||
611 | 605 | ||
612 | #ifdef CONFIG_SMP | 606 | #ifdef CONFIG_SMP |
613 | ENTRY(reschedule_interrupt) | 607 | ENTRY(reschedule_interrupt) |
614 | apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt | 608 | apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt |
609 | END(reschedule_interrupt) | ||
615 | 610 | ||
616 | .macro INVALIDATE_ENTRY num | 611 | .macro INVALIDATE_ENTRY num |
617 | ENTRY(invalidate_interrupt\num) | 612 | ENTRY(invalidate_interrupt\num) |
618 | apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt | 613 | apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt |
614 | END(invalidate_interrupt\num) | ||
619 | .endm | 615 | .endm |
620 | 616 | ||
621 | INVALIDATE_ENTRY 0 | 617 | INVALIDATE_ENTRY 0 |
@@ -629,17 +625,21 @@ ENTRY(invalidate_interrupt\num) | |||
629 | 625 | ||
630 | ENTRY(call_function_interrupt) | 626 | ENTRY(call_function_interrupt) |
631 | apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt | 627 | apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt |
628 | END(call_function_interrupt) | ||
632 | #endif | 629 | #endif |
633 | 630 | ||
634 | #ifdef CONFIG_X86_LOCAL_APIC | 631 | #ifdef CONFIG_X86_LOCAL_APIC |
635 | ENTRY(apic_timer_interrupt) | 632 | ENTRY(apic_timer_interrupt) |
636 | apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt | 633 | apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt |
634 | END(apic_timer_interrupt) | ||
637 | 635 | ||
638 | ENTRY(error_interrupt) | 636 | ENTRY(error_interrupt) |
639 | apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt | 637 | apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt |
638 | END(error_interrupt) | ||
640 | 639 | ||
641 | ENTRY(spurious_interrupt) | 640 | ENTRY(spurious_interrupt) |
642 | apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt | 641 | apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt |
642 | END(spurious_interrupt) | ||
643 | #endif | 643 | #endif |
644 | 644 | ||
645 | /* | 645 | /* |
@@ -777,6 +777,7 @@ error_kernelspace: | |||
777 | cmpq $gs_change,RIP(%rsp) | 777 | cmpq $gs_change,RIP(%rsp) |
778 | je error_swapgs | 778 | je error_swapgs |
779 | jmp error_sti | 779 | jmp error_sti |
780 | END(error_entry) | ||
780 | 781 | ||
781 | /* Reload gs selector with exception handling */ | 782 | /* Reload gs selector with exception handling */ |
782 | /* edi: new selector */ | 783 | /* edi: new selector */ |
@@ -794,6 +795,7 @@ gs_change: | |||
794 | CFI_ADJUST_CFA_OFFSET -8 | 795 | CFI_ADJUST_CFA_OFFSET -8 |
795 | ret | 796 | ret |
796 | CFI_ENDPROC | 797 | CFI_ENDPROC |
798 | ENDPROC(load_gs_index) | ||
797 | 799 | ||
798 | .section __ex_table,"a" | 800 | .section __ex_table,"a" |
799 | .align 8 | 801 | .align 8 |
@@ -847,7 +849,7 @@ ENTRY(kernel_thread) | |||
847 | UNFAKE_STACK_FRAME | 849 | UNFAKE_STACK_FRAME |
848 | ret | 850 | ret |
849 | CFI_ENDPROC | 851 | CFI_ENDPROC |
850 | 852 | ENDPROC(kernel_thread) | |
851 | 853 | ||
852 | child_rip: | 854 | child_rip: |
853 | /* | 855 | /* |
@@ -860,6 +862,7 @@ child_rip: | |||
860 | # exit | 862 | # exit |
861 | xorl %edi, %edi | 863 | xorl %edi, %edi |
862 | call do_exit | 864 | call do_exit |
865 | ENDPROC(child_rip) | ||
863 | 866 | ||
864 | /* | 867 | /* |
865 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. | 868 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. |
@@ -889,19 +892,24 @@ ENTRY(execve) | |||
889 | UNFAKE_STACK_FRAME | 892 | UNFAKE_STACK_FRAME |
890 | ret | 893 | ret |
891 | CFI_ENDPROC | 894 | CFI_ENDPROC |
895 | ENDPROC(execve) | ||
892 | 896 | ||
893 | KPROBE_ENTRY(page_fault) | 897 | KPROBE_ENTRY(page_fault) |
894 | errorentry do_page_fault | 898 | errorentry do_page_fault |
899 | END(page_fault) | ||
895 | .previous .text | 900 | .previous .text |
896 | 901 | ||
897 | ENTRY(coprocessor_error) | 902 | ENTRY(coprocessor_error) |
898 | zeroentry do_coprocessor_error | 903 | zeroentry do_coprocessor_error |
904 | END(coprocessor_error) | ||
899 | 905 | ||
900 | ENTRY(simd_coprocessor_error) | 906 | ENTRY(simd_coprocessor_error) |
901 | zeroentry do_simd_coprocessor_error | 907 | zeroentry do_simd_coprocessor_error |
908 | END(simd_coprocessor_error) | ||
902 | 909 | ||
903 | ENTRY(device_not_available) | 910 | ENTRY(device_not_available) |
904 | zeroentry math_state_restore | 911 | zeroentry math_state_restore |
912 | END(device_not_available) | ||
905 | 913 | ||
906 | /* runs on exception stack */ | 914 | /* runs on exception stack */ |
907 | KPROBE_ENTRY(debug) | 915 | KPROBE_ENTRY(debug) |
@@ -911,6 +919,7 @@ KPROBE_ENTRY(debug) | |||
911 | paranoidentry do_debug, DEBUG_STACK | 919 | paranoidentry do_debug, DEBUG_STACK |
912 | jmp paranoid_exit | 920 | jmp paranoid_exit |
913 | CFI_ENDPROC | 921 | CFI_ENDPROC |
922 | END(debug) | ||
914 | .previous .text | 923 | .previous .text |
915 | 924 | ||
916 | /* runs on exception stack */ | 925 | /* runs on exception stack */ |
@@ -961,6 +970,7 @@ paranoid_schedule: | |||
961 | cli | 970 | cli |
962 | jmp paranoid_userspace | 971 | jmp paranoid_userspace |
963 | CFI_ENDPROC | 972 | CFI_ENDPROC |
973 | END(nmi) | ||
964 | .previous .text | 974 | .previous .text |
965 | 975 | ||
966 | KPROBE_ENTRY(int3) | 976 | KPROBE_ENTRY(int3) |
@@ -970,22 +980,28 @@ KPROBE_ENTRY(int3) | |||
970 | paranoidentry do_int3, DEBUG_STACK | 980 | paranoidentry do_int3, DEBUG_STACK |
971 | jmp paranoid_exit | 981 | jmp paranoid_exit |
972 | CFI_ENDPROC | 982 | CFI_ENDPROC |
983 | END(int3) | ||
973 | .previous .text | 984 | .previous .text |
974 | 985 | ||
975 | ENTRY(overflow) | 986 | ENTRY(overflow) |
976 | zeroentry do_overflow | 987 | zeroentry do_overflow |
988 | END(overflow) | ||
977 | 989 | ||
978 | ENTRY(bounds) | 990 | ENTRY(bounds) |
979 | zeroentry do_bounds | 991 | zeroentry do_bounds |
992 | END(bounds) | ||
980 | 993 | ||
981 | ENTRY(invalid_op) | 994 | ENTRY(invalid_op) |
982 | zeroentry do_invalid_op | 995 | zeroentry do_invalid_op |
996 | END(invalid_op) | ||
983 | 997 | ||
984 | ENTRY(coprocessor_segment_overrun) | 998 | ENTRY(coprocessor_segment_overrun) |
985 | zeroentry do_coprocessor_segment_overrun | 999 | zeroentry do_coprocessor_segment_overrun |
1000 | END(coprocessor_segment_overrun) | ||
986 | 1001 | ||
987 | ENTRY(reserved) | 1002 | ENTRY(reserved) |
988 | zeroentry do_reserved | 1003 | zeroentry do_reserved |
1004 | END(reserved) | ||
989 | 1005 | ||
990 | /* runs on exception stack */ | 1006 | /* runs on exception stack */ |
991 | ENTRY(double_fault) | 1007 | ENTRY(double_fault) |
@@ -993,12 +1009,15 @@ ENTRY(double_fault) | |||
993 | paranoidentry do_double_fault | 1009 | paranoidentry do_double_fault |
994 | jmp paranoid_exit | 1010 | jmp paranoid_exit |
995 | CFI_ENDPROC | 1011 | CFI_ENDPROC |
1012 | END(double_fault) | ||
996 | 1013 | ||
997 | ENTRY(invalid_TSS) | 1014 | ENTRY(invalid_TSS) |
998 | errorentry do_invalid_TSS | 1015 | errorentry do_invalid_TSS |
1016 | END(invalid_TSS) | ||
999 | 1017 | ||
1000 | ENTRY(segment_not_present) | 1018 | ENTRY(segment_not_present) |
1001 | errorentry do_segment_not_present | 1019 | errorentry do_segment_not_present |
1020 | END(segment_not_present) | ||
1002 | 1021 | ||
1003 | /* runs on exception stack */ | 1022 | /* runs on exception stack */ |
1004 | ENTRY(stack_segment) | 1023 | ENTRY(stack_segment) |
@@ -1006,19 +1025,24 @@ ENTRY(stack_segment) | |||
1006 | paranoidentry do_stack_segment | 1025 | paranoidentry do_stack_segment |
1007 | jmp paranoid_exit | 1026 | jmp paranoid_exit |
1008 | CFI_ENDPROC | 1027 | CFI_ENDPROC |
1028 | END(stack_segment) | ||
1009 | 1029 | ||
1010 | KPROBE_ENTRY(general_protection) | 1030 | KPROBE_ENTRY(general_protection) |
1011 | errorentry do_general_protection | 1031 | errorentry do_general_protection |
1032 | END(general_protection) | ||
1012 | .previous .text | 1033 | .previous .text |
1013 | 1034 | ||
1014 | ENTRY(alignment_check) | 1035 | ENTRY(alignment_check) |
1015 | errorentry do_alignment_check | 1036 | errorentry do_alignment_check |
1037 | END(alignment_check) | ||
1016 | 1038 | ||
1017 | ENTRY(divide_error) | 1039 | ENTRY(divide_error) |
1018 | zeroentry do_divide_error | 1040 | zeroentry do_divide_error |
1041 | END(divide_error) | ||
1019 | 1042 | ||
1020 | ENTRY(spurious_interrupt_bug) | 1043 | ENTRY(spurious_interrupt_bug) |
1021 | zeroentry do_spurious_interrupt_bug | 1044 | zeroentry do_spurious_interrupt_bug |
1045 | END(spurious_interrupt_bug) | ||
1022 | 1046 | ||
1023 | #ifdef CONFIG_X86_MCE | 1047 | #ifdef CONFIG_X86_MCE |
1024 | /* runs on exception stack */ | 1048 | /* runs on exception stack */ |
@@ -1029,6 +1053,7 @@ ENTRY(machine_check) | |||
1029 | paranoidentry do_machine_check | 1053 | paranoidentry do_machine_check |
1030 | jmp paranoid_exit | 1054 | jmp paranoid_exit |
1031 | CFI_ENDPROC | 1055 | CFI_ENDPROC |
1056 | END(machine_check) | ||
1032 | #endif | 1057 | #endif |
1033 | 1058 | ||
1034 | ENTRY(call_softirq) | 1059 | ENTRY(call_softirq) |
@@ -1046,3 +1071,37 @@ ENTRY(call_softirq) | |||
1046 | decl %gs:pda_irqcount | 1071 | decl %gs:pda_irqcount |
1047 | ret | 1072 | ret |
1048 | CFI_ENDPROC | 1073 | CFI_ENDPROC |
1074 | ENDPROC(call_softirq) | ||
1075 | |||
1076 | #ifdef CONFIG_STACK_UNWIND | ||
1077 | ENTRY(arch_unwind_init_running) | ||
1078 | CFI_STARTPROC | ||
1079 | movq %r15, R15(%rdi) | ||
1080 | movq %r14, R14(%rdi) | ||
1081 | xchgq %rsi, %rdx | ||
1082 | movq %r13, R13(%rdi) | ||
1083 | movq %r12, R12(%rdi) | ||
1084 | xorl %eax, %eax | ||
1085 | movq %rbp, RBP(%rdi) | ||
1086 | movq %rbx, RBX(%rdi) | ||
1087 | movq (%rsp), %rcx | ||
1088 | movq %rax, R11(%rdi) | ||
1089 | movq %rax, R10(%rdi) | ||
1090 | movq %rax, R9(%rdi) | ||
1091 | movq %rax, R8(%rdi) | ||
1092 | movq %rax, RAX(%rdi) | ||
1093 | movq %rax, RCX(%rdi) | ||
1094 | movq %rax, RDX(%rdi) | ||
1095 | movq %rax, RSI(%rdi) | ||
1096 | movq %rax, RDI(%rdi) | ||
1097 | movq %rax, ORIG_RAX(%rdi) | ||
1098 | movq %rcx, RIP(%rdi) | ||
1099 | leaq 8(%rsp), %rcx | ||
1100 | movq $__KERNEL_CS, CS(%rdi) | ||
1101 | movq %rax, EFLAGS(%rdi) | ||
1102 | movq %rcx, RSP(%rdi) | ||
1103 | movq $__KERNEL_DS, SS(%rdi) | ||
1104 | jmpq *%rdx | ||
1105 | CFI_ENDPROC | ||
1106 | ENDPROC(arch_unwind_init_running) | ||
1107 | #endif | ||
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c index 1a2ab825be98..21c7066e236a 100644 --- a/arch/x86_64/kernel/genapic_flat.c +++ b/arch/x86_64/kernel/genapic_flat.c | |||
@@ -78,22 +78,29 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector) | |||
78 | 78 | ||
79 | static void flat_send_IPI_allbutself(int vector) | 79 | static void flat_send_IPI_allbutself(int vector) |
80 | { | 80 | { |
81 | #ifndef CONFIG_HOTPLUG_CPU | 81 | #ifdef CONFIG_HOTPLUG_CPU |
82 | if (((num_online_cpus()) - 1) >= 1) | 82 | int hotplug = 1; |
83 | __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); | ||
84 | #else | 83 | #else |
85 | cpumask_t allbutme = cpu_online_map; | 84 | int hotplug = 0; |
85 | #endif | ||
86 | if (hotplug || vector == NMI_VECTOR) { | ||
87 | cpumask_t allbutme = cpu_online_map; | ||
86 | 88 | ||
87 | cpu_clear(smp_processor_id(), allbutme); | 89 | cpu_clear(smp_processor_id(), allbutme); |
88 | 90 | ||
89 | if (!cpus_empty(allbutme)) | 91 | if (!cpus_empty(allbutme)) |
90 | flat_send_IPI_mask(allbutme, vector); | 92 | flat_send_IPI_mask(allbutme, vector); |
91 | #endif | 93 | } else if (num_online_cpus() > 1) { |
94 | __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); | ||
95 | } | ||
92 | } | 96 | } |
93 | 97 | ||
94 | static void flat_send_IPI_all(int vector) | 98 | static void flat_send_IPI_all(int vector) |
95 | { | 99 | { |
96 | __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); | 100 | if (vector == NMI_VECTOR) |
101 | flat_send_IPI_mask(cpu_online_map, vector); | ||
102 | else | ||
103 | __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); | ||
97 | } | 104 | } |
98 | 105 | ||
99 | static int flat_apic_id_registered(void) | 106 | static int flat_apic_id_registered(void) |
@@ -108,10 +115,7 @@ static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) | |||
108 | 115 | ||
109 | static unsigned int phys_pkg_id(int index_msb) | 116 | static unsigned int phys_pkg_id(int index_msb) |
110 | { | 117 | { |
111 | u32 ebx; | 118 | return hard_smp_processor_id() >> index_msb; |
112 | |||
113 | ebx = cpuid_ebx(1); | ||
114 | return ((ebx >> 24) & 0xFF) >> index_msb; | ||
115 | } | 119 | } |
116 | 120 | ||
117 | struct genapic apic_flat = { | 121 | struct genapic apic_flat = { |
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c index cea20a66c150..e6a71c9556d9 100644 --- a/arch/x86_64/kernel/head64.c +++ b/arch/x86_64/kernel/head64.c | |||
@@ -2,8 +2,6 @@ | |||
2 | * linux/arch/x86_64/kernel/head64.c -- prepare to run common code | 2 | * linux/arch/x86_64/kernel/head64.c -- prepare to run common code |
3 | * | 3 | * |
4 | * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE | 4 | * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE |
5 | * | ||
6 | * $Id: head64.c,v 1.22 2001/07/06 14:28:20 ak Exp $ | ||
7 | */ | 5 | */ |
8 | 6 | ||
9 | #include <linux/init.h> | 7 | #include <linux/init.h> |
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c index 5ecd34ab8c2b..9b1a4e147321 100644 --- a/arch/x86_64/kernel/i8259.c +++ b/arch/x86_64/kernel/i8259.c | |||
@@ -44,11 +44,11 @@ | |||
44 | BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ | 44 | BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ |
45 | BI(x,c) BI(x,d) BI(x,e) BI(x,f) | 45 | BI(x,c) BI(x,d) BI(x,e) BI(x,f) |
46 | 46 | ||
47 | #define BUILD_14_IRQS(x) \ | 47 | #define BUILD_15_IRQS(x) \ |
48 | BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ | 48 | BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ |
49 | BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ | 49 | BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ |
50 | BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ | 50 | BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ |
51 | BI(x,c) BI(x,d) | 51 | BI(x,c) BI(x,d) BI(x,e) |
52 | 52 | ||
53 | /* | 53 | /* |
54 | * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: | 54 | * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: |
@@ -73,13 +73,13 @@ BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb) | |||
73 | BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) | 73 | BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) |
74 | 74 | ||
75 | #ifdef CONFIG_PCI_MSI | 75 | #ifdef CONFIG_PCI_MSI |
76 | BUILD_14_IRQS(0xe) | 76 | BUILD_15_IRQS(0xe) |
77 | #endif | 77 | #endif |
78 | 78 | ||
79 | #endif | 79 | #endif |
80 | 80 | ||
81 | #undef BUILD_16_IRQS | 81 | #undef BUILD_16_IRQS |
82 | #undef BUILD_14_IRQS | 82 | #undef BUILD_15_IRQS |
83 | #undef BI | 83 | #undef BI |
84 | 84 | ||
85 | 85 | ||
@@ -92,11 +92,11 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) | |||
92 | IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ | 92 | IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ |
93 | IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) | 93 | IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) |
94 | 94 | ||
95 | #define IRQLIST_14(x) \ | 95 | #define IRQLIST_15(x) \ |
96 | IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ | 96 | IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ |
97 | IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ | 97 | IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ |
98 | IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ | 98 | IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ |
99 | IRQ(x,c), IRQ(x,d) | 99 | IRQ(x,c), IRQ(x,d), IRQ(x,e) |
100 | 100 | ||
101 | void (*interrupt[NR_IRQS])(void) = { | 101 | void (*interrupt[NR_IRQS])(void) = { |
102 | IRQLIST_16(0x0), | 102 | IRQLIST_16(0x0), |
@@ -108,7 +108,7 @@ void (*interrupt[NR_IRQS])(void) = { | |||
108 | IRQLIST_16(0xc), IRQLIST_16(0xd) | 108 | IRQLIST_16(0xc), IRQLIST_16(0xd) |
109 | 109 | ||
110 | #ifdef CONFIG_PCI_MSI | 110 | #ifdef CONFIG_PCI_MSI |
111 | , IRQLIST_14(0xe) | 111 | , IRQLIST_15(0xe) |
112 | #endif | 112 | #endif |
113 | 113 | ||
114 | #endif | 114 | #endif |
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 9cc7031b7151..c768d8a036d0 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <asm/mach_apic.h> | 41 | #include <asm/mach_apic.h> |
42 | #include <asm/acpi.h> | 42 | #include <asm/acpi.h> |
43 | #include <asm/dma.h> | 43 | #include <asm/dma.h> |
44 | #include <asm/nmi.h> | ||
44 | 45 | ||
45 | #define __apicdebuginit __init | 46 | #define __apicdebuginit __init |
46 | 47 | ||
@@ -56,6 +57,7 @@ int timer_over_8254 __initdata = 0; | |||
56 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; | 57 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; |
57 | 58 | ||
58 | static DEFINE_SPINLOCK(ioapic_lock); | 59 | static DEFINE_SPINLOCK(ioapic_lock); |
60 | static DEFINE_SPINLOCK(vector_lock); | ||
59 | 61 | ||
60 | /* | 62 | /* |
61 | * # of IRQ routing registers | 63 | * # of IRQ routing registers |
@@ -317,7 +319,7 @@ void __init check_ioapic(void) | |||
317 | vendor &= 0xffff; | 319 | vendor &= 0xffff; |
318 | switch (vendor) { | 320 | switch (vendor) { |
319 | case PCI_VENDOR_ID_VIA: | 321 | case PCI_VENDOR_ID_VIA: |
320 | #ifdef CONFIG_GART_IOMMU | 322 | #ifdef CONFIG_IOMMU |
321 | if ((end_pfn > MAX_DMA32_PFN || | 323 | if ((end_pfn > MAX_DMA32_PFN || |
322 | force_iommu) && | 324 | force_iommu) && |
323 | !iommu_aperture_allowed) { | 325 | !iommu_aperture_allowed) { |
@@ -834,10 +836,17 @@ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; | |||
834 | int assign_irq_vector(int irq) | 836 | int assign_irq_vector(int irq) |
835 | { | 837 | { |
836 | static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; | 838 | static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; |
839 | unsigned long flags; | ||
840 | int vector; | ||
837 | 841 | ||
838 | BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); | 842 | BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); |
839 | if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) | 843 | |
844 | spin_lock_irqsave(&vector_lock, flags); | ||
845 | |||
846 | if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) { | ||
847 | spin_unlock_irqrestore(&vector_lock, flags); | ||
840 | return IO_APIC_VECTOR(irq); | 848 | return IO_APIC_VECTOR(irq); |
849 | } | ||
841 | next: | 850 | next: |
842 | current_vector += 8; | 851 | current_vector += 8; |
843 | if (current_vector == IA32_SYSCALL_VECTOR) | 852 | if (current_vector == IA32_SYSCALL_VECTOR) |
@@ -849,11 +858,14 @@ next: | |||
849 | current_vector = FIRST_DEVICE_VECTOR + offset; | 858 | current_vector = FIRST_DEVICE_VECTOR + offset; |
850 | } | 859 | } |
851 | 860 | ||
852 | vector_irq[current_vector] = irq; | 861 | vector = current_vector; |
862 | vector_irq[vector] = irq; | ||
853 | if (irq != AUTO_ASSIGN) | 863 | if (irq != AUTO_ASSIGN) |
854 | IO_APIC_VECTOR(irq) = current_vector; | 864 | IO_APIC_VECTOR(irq) = vector; |
865 | |||
866 | spin_unlock_irqrestore(&vector_lock, flags); | ||
855 | 867 | ||
856 | return current_vector; | 868 | return vector; |
857 | } | 869 | } |
858 | 870 | ||
859 | extern void (*interrupt[NR_IRQS])(void); | 871 | extern void (*interrupt[NR_IRQS])(void); |
@@ -866,21 +878,14 @@ static struct hw_interrupt_type ioapic_edge_type; | |||
866 | 878 | ||
867 | static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) | 879 | static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) |
868 | { | 880 | { |
869 | if (use_pci_vector() && !platform_legacy_irq(irq)) { | 881 | unsigned idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq; |
870 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || | 882 | |
871 | trigger == IOAPIC_LEVEL) | 883 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || |
872 | irq_desc[vector].handler = &ioapic_level_type; | 884 | trigger == IOAPIC_LEVEL) |
873 | else | 885 | irq_desc[idx].handler = &ioapic_level_type; |
874 | irq_desc[vector].handler = &ioapic_edge_type; | 886 | else |
875 | set_intr_gate(vector, interrupt[vector]); | 887 | irq_desc[idx].handler = &ioapic_edge_type; |
876 | } else { | 888 | set_intr_gate(vector, interrupt[idx]); |
877 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || | ||
878 | trigger == IOAPIC_LEVEL) | ||
879 | irq_desc[irq].handler = &ioapic_level_type; | ||
880 | else | ||
881 | irq_desc[irq].handler = &ioapic_edge_type; | ||
882 | set_intr_gate(vector, interrupt[irq]); | ||
883 | } | ||
884 | } | 889 | } |
885 | 890 | ||
886 | static void __init setup_IO_APIC_irqs(void) | 891 | static void __init setup_IO_APIC_irqs(void) |
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index d8bd0b345b1e..59518d4d4358 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c | |||
@@ -26,6 +26,30 @@ atomic_t irq_mis_count; | |||
26 | #endif | 26 | #endif |
27 | #endif | 27 | #endif |
28 | 28 | ||
29 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | ||
30 | /* | ||
31 | * Probabilistic stack overflow check: | ||
32 | * | ||
33 | * Only check the stack in process context, because everything else | ||
34 | * runs on the big interrupt stacks. Checking reliably is too expensive, | ||
35 | * so we just check from interrupts. | ||
36 | */ | ||
37 | static inline void stack_overflow_check(struct pt_regs *regs) | ||
38 | { | ||
39 | u64 curbase = (u64) current->thread_info; | ||
40 | static unsigned long warned = -60*HZ; | ||
41 | |||
42 | if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE && | ||
43 | regs->rsp < curbase + sizeof(struct thread_info) + 128 && | ||
44 | time_after(jiffies, warned + 60*HZ)) { | ||
45 | printk("do_IRQ: %s near stack overflow (cur:%Lx,rsp:%lx)\n", | ||
46 | current->comm, curbase, regs->rsp); | ||
47 | show_stack(NULL,NULL); | ||
48 | warned = jiffies; | ||
49 | } | ||
50 | } | ||
51 | #endif | ||
52 | |||
29 | /* | 53 | /* |
30 | * Generic, controller-independent functions: | 54 | * Generic, controller-independent functions: |
31 | */ | 55 | */ |
@@ -39,7 +63,7 @@ int show_interrupts(struct seq_file *p, void *v) | |||
39 | if (i == 0) { | 63 | if (i == 0) { |
40 | seq_printf(p, " "); | 64 | seq_printf(p, " "); |
41 | for_each_online_cpu(j) | 65 | for_each_online_cpu(j) |
42 | seq_printf(p, "CPU%d ",j); | 66 | seq_printf(p, "CPU%-8d",j); |
43 | seq_putc(p, '\n'); | 67 | seq_putc(p, '\n'); |
44 | } | 68 | } |
45 | 69 | ||
@@ -96,7 +120,9 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) | |||
96 | 120 | ||
97 | exit_idle(); | 121 | exit_idle(); |
98 | irq_enter(); | 122 | irq_enter(); |
99 | 123 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | |
124 | stack_overflow_check(regs); | ||
125 | #endif | ||
100 | __do_IRQ(irq, regs); | 126 | __do_IRQ(irq, regs); |
101 | irq_exit(); | 127 | irq_exit(); |
102 | 128 | ||
diff --git a/arch/x86_64/kernel/k8.c b/arch/x86_64/kernel/k8.c new file mode 100644 index 000000000000..6416682d33d0 --- /dev/null +++ b/arch/x86_64/kernel/k8.c | |||
@@ -0,0 +1,118 @@ | |||
1 | /* | ||
2 | * Shared support code for AMD K8 northbridges and derivates. | ||
3 | * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2. | ||
4 | */ | ||
5 | #include <linux/gfp.h> | ||
6 | #include <linux/types.h> | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/errno.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/spinlock.h> | ||
11 | #include <asm/k8.h> | ||
12 | |||
13 | int num_k8_northbridges; | ||
14 | EXPORT_SYMBOL(num_k8_northbridges); | ||
15 | |||
16 | static u32 *flush_words; | ||
17 | |||
18 | struct pci_device_id k8_nb_ids[] = { | ||
19 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, | ||
20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, | ||
21 | {} | ||
22 | }; | ||
23 | EXPORT_SYMBOL(k8_nb_ids); | ||
24 | |||
25 | struct pci_dev **k8_northbridges; | ||
26 | EXPORT_SYMBOL(k8_northbridges); | ||
27 | |||
28 | static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) | ||
29 | { | ||
30 | do { | ||
31 | dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); | ||
32 | if (!dev) | ||
33 | break; | ||
34 | } while (!pci_match_id(&k8_nb_ids[0], dev)); | ||
35 | return dev; | ||
36 | } | ||
37 | |||
38 | int cache_k8_northbridges(void) | ||
39 | { | ||
40 | int i; | ||
41 | struct pci_dev *dev; | ||
42 | if (num_k8_northbridges) | ||
43 | return 0; | ||
44 | |||
45 | num_k8_northbridges = 0; | ||
46 | dev = NULL; | ||
47 | while ((dev = next_k8_northbridge(dev)) != NULL) | ||
48 | num_k8_northbridges++; | ||
49 | |||
50 | k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *), | ||
51 | GFP_KERNEL); | ||
52 | if (!k8_northbridges) | ||
53 | return -ENOMEM; | ||
54 | |||
55 | flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL); | ||
56 | if (!flush_words) { | ||
57 | kfree(k8_northbridges); | ||
58 | return -ENOMEM; | ||
59 | } | ||
60 | |||
61 | dev = NULL; | ||
62 | i = 0; | ||
63 | while ((dev = next_k8_northbridge(dev)) != NULL) { | ||
64 | k8_northbridges[i++] = dev; | ||
65 | pci_read_config_dword(dev, 0x9c, &flush_words[i]); | ||
66 | } | ||
67 | k8_northbridges[i] = NULL; | ||
68 | return 0; | ||
69 | } | ||
70 | EXPORT_SYMBOL_GPL(cache_k8_northbridges); | ||
71 | |||
72 | /* Ignores subdevice/subvendor but as far as I can figure out | ||
73 | they're useless anyways */ | ||
74 | int __init early_is_k8_nb(u32 device) | ||
75 | { | ||
76 | struct pci_device_id *id; | ||
77 | u32 vendor = device & 0xffff; | ||
78 | device >>= 16; | ||
79 | for (id = k8_nb_ids; id->vendor; id++) | ||
80 | if (vendor == id->vendor && device == id->device) | ||
81 | return 1; | ||
82 | return 0; | ||
83 | } | ||
84 | |||
85 | void k8_flush_garts(void) | ||
86 | { | ||
87 | int flushed, i; | ||
88 | unsigned long flags; | ||
89 | static DEFINE_SPINLOCK(gart_lock); | ||
90 | |||
91 | /* Avoid races between AGP and IOMMU. In theory it's not needed | ||
92 | but I'm not sure if the hardware won't lose flush requests | ||
93 | when another is pending. This whole thing is so expensive anyways | ||
94 | that it doesn't matter to serialize more. -AK */ | ||
95 | spin_lock_irqsave(&gart_lock, flags); | ||
96 | flushed = 0; | ||
97 | for (i = 0; i < num_k8_northbridges; i++) { | ||
98 | pci_write_config_dword(k8_northbridges[i], 0x9c, | ||
99 | flush_words[i]|1); | ||
100 | flushed++; | ||
101 | } | ||
102 | for (i = 0; i < num_k8_northbridges; i++) { | ||
103 | u32 w; | ||
104 | /* Make sure the hardware actually executed the flush*/ | ||
105 | for (;;) { | ||
106 | pci_read_config_dword(k8_northbridges[i], | ||
107 | 0x9c, &w); | ||
108 | if (!(w & 1)) | ||
109 | break; | ||
110 | cpu_relax(); | ||
111 | } | ||
112 | } | ||
113 | spin_unlock_irqrestore(&gart_lock, flags); | ||
114 | if (!flushed) | ||
115 | printk("nothing to flush?\n"); | ||
116 | } | ||
117 | EXPORT_SYMBOL_GPL(k8_flush_garts); | ||
118 | |||
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index c69fc43cee7b..acd5816b1a6f 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c | |||
@@ -562,7 +562,7 @@ static struct sysdev_class mce_sysclass = { | |||
562 | set_kset_name("machinecheck"), | 562 | set_kset_name("machinecheck"), |
563 | }; | 563 | }; |
564 | 564 | ||
565 | static DEFINE_PER_CPU(struct sys_device, device_mce); | 565 | DEFINE_PER_CPU(struct sys_device, device_mce); |
566 | 566 | ||
567 | /* Why are there no generic functions for this? */ | 567 | /* Why are there no generic functions for this? */ |
568 | #define ACCESSOR(name, var, start) \ | 568 | #define ACCESSOR(name, var, start) \ |
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c index d13b241ad094..335200aa2737 100644 --- a/arch/x86_64/kernel/mce_amd.c +++ b/arch/x86_64/kernel/mce_amd.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * (c) 2005 Advanced Micro Devices, Inc. | 2 | * (c) 2005, 2006 Advanced Micro Devices, Inc. |
3 | * Your use of this code is subject to the terms and conditions of the | 3 | * Your use of this code is subject to the terms and conditions of the |
4 | * GNU general public license version 2. See "COPYING" or | 4 | * GNU general public license version 2. See "COPYING" or |
5 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
@@ -8,9 +8,10 @@ | |||
8 | * | 8 | * |
9 | * Support : jacob.shin@amd.com | 9 | * Support : jacob.shin@amd.com |
10 | * | 10 | * |
11 | * MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F. | 11 | * April 2006 |
12 | * MC4_MISC0 exists per physical processor. | 12 | * - added support for AMD Family 0x10 processors |
13 | * | 13 | * |
14 | * All MC4_MISCi registers are shared between multi-cores | ||
14 | */ | 15 | */ |
15 | 16 | ||
16 | #include <linux/cpu.h> | 17 | #include <linux/cpu.h> |
@@ -29,32 +30,45 @@ | |||
29 | #include <asm/percpu.h> | 30 | #include <asm/percpu.h> |
30 | #include <asm/idle.h> | 31 | #include <asm/idle.h> |
31 | 32 | ||
32 | #define PFX "mce_threshold: " | 33 | #define PFX "mce_threshold: " |
33 | #define VERSION "version 1.00.9" | 34 | #define VERSION "version 1.1.1" |
34 | #define NR_BANKS 5 | 35 | #define NR_BANKS 6 |
35 | #define THRESHOLD_MAX 0xFFF | 36 | #define NR_BLOCKS 9 |
36 | #define INT_TYPE_APIC 0x00020000 | 37 | #define THRESHOLD_MAX 0xFFF |
37 | #define MASK_VALID_HI 0x80000000 | 38 | #define INT_TYPE_APIC 0x00020000 |
38 | #define MASK_LVTOFF_HI 0x00F00000 | 39 | #define MASK_VALID_HI 0x80000000 |
39 | #define MASK_COUNT_EN_HI 0x00080000 | 40 | #define MASK_LVTOFF_HI 0x00F00000 |
40 | #define MASK_INT_TYPE_HI 0x00060000 | 41 | #define MASK_COUNT_EN_HI 0x00080000 |
41 | #define MASK_OVERFLOW_HI 0x00010000 | 42 | #define MASK_INT_TYPE_HI 0x00060000 |
43 | #define MASK_OVERFLOW_HI 0x00010000 | ||
42 | #define MASK_ERR_COUNT_HI 0x00000FFF | 44 | #define MASK_ERR_COUNT_HI 0x00000FFF |
43 | #define MASK_OVERFLOW 0x0001000000000000L | 45 | #define MASK_BLKPTR_LO 0xFF000000 |
46 | #define MCG_XBLK_ADDR 0xC0000400 | ||
44 | 47 | ||
45 | struct threshold_bank { | 48 | struct threshold_block { |
49 | unsigned int block; | ||
50 | unsigned int bank; | ||
46 | unsigned int cpu; | 51 | unsigned int cpu; |
47 | u8 bank; | 52 | u32 address; |
48 | u8 interrupt_enable; | 53 | u16 interrupt_enable; |
49 | u16 threshold_limit; | 54 | u16 threshold_limit; |
50 | struct kobject kobj; | 55 | struct kobject kobj; |
56 | struct list_head miscj; | ||
51 | }; | 57 | }; |
52 | 58 | ||
53 | static struct threshold_bank threshold_defaults = { | 59 | /* defaults used early on boot */ |
60 | static struct threshold_block threshold_defaults = { | ||
54 | .interrupt_enable = 0, | 61 | .interrupt_enable = 0, |
55 | .threshold_limit = THRESHOLD_MAX, | 62 | .threshold_limit = THRESHOLD_MAX, |
56 | }; | 63 | }; |
57 | 64 | ||
65 | struct threshold_bank { | ||
66 | struct kobject kobj; | ||
67 | struct threshold_block *blocks; | ||
68 | cpumask_t cpus; | ||
69 | }; | ||
70 | static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); | ||
71 | |||
58 | #ifdef CONFIG_SMP | 72 | #ifdef CONFIG_SMP |
59 | static unsigned char shared_bank[NR_BANKS] = { | 73 | static unsigned char shared_bank[NR_BANKS] = { |
60 | 0, 0, 0, 0, 1 | 74 | 0, 0, 0, 0, 1 |
@@ -68,12 +82,12 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ | |||
68 | */ | 82 | */ |
69 | 83 | ||
70 | /* must be called with correct cpu affinity */ | 84 | /* must be called with correct cpu affinity */ |
71 | static void threshold_restart_bank(struct threshold_bank *b, | 85 | static void threshold_restart_bank(struct threshold_block *b, |
72 | int reset, u16 old_limit) | 86 | int reset, u16 old_limit) |
73 | { | 87 | { |
74 | u32 mci_misc_hi, mci_misc_lo; | 88 | u32 mci_misc_hi, mci_misc_lo; |
75 | 89 | ||
76 | rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi); | 90 | rdmsr(b->address, mci_misc_lo, mci_misc_hi); |
77 | 91 | ||
78 | if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) | 92 | if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) |
79 | reset = 1; /* limit cannot be lower than err count */ | 93 | reset = 1; /* limit cannot be lower than err count */ |
@@ -94,35 +108,57 @@ static void threshold_restart_bank(struct threshold_bank *b, | |||
94 | (mci_misc_hi &= ~MASK_INT_TYPE_HI); | 108 | (mci_misc_hi &= ~MASK_INT_TYPE_HI); |
95 | 109 | ||
96 | mci_misc_hi |= MASK_COUNT_EN_HI; | 110 | mci_misc_hi |= MASK_COUNT_EN_HI; |
97 | wrmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi); | 111 | wrmsr(b->address, mci_misc_lo, mci_misc_hi); |
98 | } | 112 | } |
99 | 113 | ||
114 | /* cpu init entry point, called from mce.c with preempt off */ | ||
100 | void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) | 115 | void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) |
101 | { | 116 | { |
102 | int bank; | 117 | unsigned int bank, block; |
103 | u32 mci_misc_lo, mci_misc_hi; | ||
104 | unsigned int cpu = smp_processor_id(); | 118 | unsigned int cpu = smp_processor_id(); |
119 | u32 low = 0, high = 0, address = 0; | ||
105 | 120 | ||
106 | for (bank = 0; bank < NR_BANKS; ++bank) { | 121 | for (bank = 0; bank < NR_BANKS; ++bank) { |
107 | rdmsr(MSR_IA32_MC0_MISC + bank * 4, mci_misc_lo, mci_misc_hi); | 122 | for (block = 0; block < NR_BLOCKS; ++block) { |
123 | if (block == 0) | ||
124 | address = MSR_IA32_MC0_MISC + bank * 4; | ||
125 | else if (block == 1) | ||
126 | address = MCG_XBLK_ADDR | ||
127 | + ((low & MASK_BLKPTR_LO) >> 21); | ||
128 | else | ||
129 | ++address; | ||
130 | |||
131 | if (rdmsr_safe(address, &low, &high)) | ||
132 | continue; | ||
108 | 133 | ||
109 | /* !valid, !counter present, bios locked */ | 134 | if (!(high & MASK_VALID_HI)) { |
110 | if (!(mci_misc_hi & MASK_VALID_HI) || | 135 | if (block) |
111 | !(mci_misc_hi & MASK_VALID_HI >> 1) || | 136 | continue; |
112 | (mci_misc_hi & MASK_VALID_HI >> 2)) | 137 | else |
113 | continue; | 138 | break; |
139 | } | ||
114 | 140 | ||
115 | per_cpu(bank_map, cpu) |= (1 << bank); | 141 | if (!(high & MASK_VALID_HI >> 1) || |
142 | (high & MASK_VALID_HI >> 2)) | ||
143 | continue; | ||
116 | 144 | ||
145 | if (!block) | ||
146 | per_cpu(bank_map, cpu) |= (1 << bank); | ||
117 | #ifdef CONFIG_SMP | 147 | #ifdef CONFIG_SMP |
118 | if (shared_bank[bank] && cpu_core_id[cpu]) | 148 | if (shared_bank[bank] && c->cpu_core_id) |
119 | continue; | 149 | break; |
120 | #endif | 150 | #endif |
151 | high &= ~MASK_LVTOFF_HI; | ||
152 | high |= K8_APIC_EXT_LVT_ENTRY_THRESHOLD << 20; | ||
153 | wrmsr(address, low, high); | ||
121 | 154 | ||
122 | setup_threshold_lvt((mci_misc_hi & MASK_LVTOFF_HI) >> 20); | 155 | setup_APIC_extened_lvt(K8_APIC_EXT_LVT_ENTRY_THRESHOLD, |
123 | threshold_defaults.cpu = cpu; | 156 | THRESHOLD_APIC_VECTOR, |
124 | threshold_defaults.bank = bank; | 157 | K8_APIC_EXT_INT_MSG_FIX, 0); |
125 | threshold_restart_bank(&threshold_defaults, 0, 0); | 158 | |
159 | threshold_defaults.address = address; | ||
160 | threshold_restart_bank(&threshold_defaults, 0, 0); | ||
161 | } | ||
126 | } | 162 | } |
127 | } | 163 | } |
128 | 164 | ||
@@ -137,8 +173,9 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
137 | */ | 173 | */ |
138 | asmlinkage void mce_threshold_interrupt(void) | 174 | asmlinkage void mce_threshold_interrupt(void) |
139 | { | 175 | { |
140 | int bank; | 176 | unsigned int bank, block; |
141 | struct mce m; | 177 | struct mce m; |
178 | u32 low = 0, high = 0, address = 0; | ||
142 | 179 | ||
143 | ack_APIC_irq(); | 180 | ack_APIC_irq(); |
144 | exit_idle(); | 181 | exit_idle(); |
@@ -150,15 +187,42 @@ asmlinkage void mce_threshold_interrupt(void) | |||
150 | 187 | ||
151 | /* assume first bank caused it */ | 188 | /* assume first bank caused it */ |
152 | for (bank = 0; bank < NR_BANKS; ++bank) { | 189 | for (bank = 0; bank < NR_BANKS; ++bank) { |
153 | m.bank = MCE_THRESHOLD_BASE + bank; | 190 | for (block = 0; block < NR_BLOCKS; ++block) { |
154 | rdmsrl(MSR_IA32_MC0_MISC + bank * 4, m.misc); | 191 | if (block == 0) |
192 | address = MSR_IA32_MC0_MISC + bank * 4; | ||
193 | else if (block == 1) | ||
194 | address = MCG_XBLK_ADDR | ||
195 | + ((low & MASK_BLKPTR_LO) >> 21); | ||
196 | else | ||
197 | ++address; | ||
198 | |||
199 | if (rdmsr_safe(address, &low, &high)) | ||
200 | continue; | ||
155 | 201 | ||
156 | if (m.misc & MASK_OVERFLOW) { | 202 | if (!(high & MASK_VALID_HI)) { |
157 | mce_log(&m); | 203 | if (block) |
158 | goto out; | 204 | continue; |
205 | else | ||
206 | break; | ||
207 | } | ||
208 | |||
209 | if (!(high & MASK_VALID_HI >> 1) || | ||
210 | (high & MASK_VALID_HI >> 2)) | ||
211 | continue; | ||
212 | |||
213 | if (high & MASK_OVERFLOW_HI) { | ||
214 | rdmsrl(address, m.misc); | ||
215 | rdmsrl(MSR_IA32_MC0_STATUS + bank * 4, | ||
216 | m.status); | ||
217 | m.bank = K8_MCE_THRESHOLD_BASE | ||
218 | + bank * NR_BLOCKS | ||
219 | + block; | ||
220 | mce_log(&m); | ||
221 | goto out; | ||
222 | } | ||
159 | } | 223 | } |
160 | } | 224 | } |
161 | out: | 225 | out: |
162 | irq_exit(); | 226 | irq_exit(); |
163 | } | 227 | } |
164 | 228 | ||
@@ -166,20 +230,12 @@ asmlinkage void mce_threshold_interrupt(void) | |||
166 | * Sysfs Interface | 230 | * Sysfs Interface |
167 | */ | 231 | */ |
168 | 232 | ||
169 | static struct sysdev_class threshold_sysclass = { | ||
170 | set_kset_name("threshold"), | ||
171 | }; | ||
172 | |||
173 | static DEFINE_PER_CPU(struct sys_device, device_threshold); | ||
174 | |||
175 | struct threshold_attr { | 233 | struct threshold_attr { |
176 | struct attribute attr; | 234 | struct attribute attr; |
177 | ssize_t(*show) (struct threshold_bank *, char *); | 235 | ssize_t(*show) (struct threshold_block *, char *); |
178 | ssize_t(*store) (struct threshold_bank *, const char *, size_t count); | 236 | ssize_t(*store) (struct threshold_block *, const char *, size_t count); |
179 | }; | 237 | }; |
180 | 238 | ||
181 | static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); | ||
182 | |||
183 | static cpumask_t affinity_set(unsigned int cpu) | 239 | static cpumask_t affinity_set(unsigned int cpu) |
184 | { | 240 | { |
185 | cpumask_t oldmask = current->cpus_allowed; | 241 | cpumask_t oldmask = current->cpus_allowed; |
@@ -194,15 +250,15 @@ static void affinity_restore(cpumask_t oldmask) | |||
194 | set_cpus_allowed(current, oldmask); | 250 | set_cpus_allowed(current, oldmask); |
195 | } | 251 | } |
196 | 252 | ||
197 | #define SHOW_FIELDS(name) \ | 253 | #define SHOW_FIELDS(name) \ |
198 | static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \ | 254 | static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ |
199 | { \ | 255 | { \ |
200 | return sprintf(buf, "%lx\n", (unsigned long) b->name); \ | 256 | return sprintf(buf, "%lx\n", (unsigned long) b->name); \ |
201 | } | 257 | } |
202 | SHOW_FIELDS(interrupt_enable) | 258 | SHOW_FIELDS(interrupt_enable) |
203 | SHOW_FIELDS(threshold_limit) | 259 | SHOW_FIELDS(threshold_limit) |
204 | 260 | ||
205 | static ssize_t store_interrupt_enable(struct threshold_bank *b, | 261 | static ssize_t store_interrupt_enable(struct threshold_block *b, |
206 | const char *buf, size_t count) | 262 | const char *buf, size_t count) |
207 | { | 263 | { |
208 | char *end; | 264 | char *end; |
@@ -219,7 +275,7 @@ static ssize_t store_interrupt_enable(struct threshold_bank *b, | |||
219 | return end - buf; | 275 | return end - buf; |
220 | } | 276 | } |
221 | 277 | ||
222 | static ssize_t store_threshold_limit(struct threshold_bank *b, | 278 | static ssize_t store_threshold_limit(struct threshold_block *b, |
223 | const char *buf, size_t count) | 279 | const char *buf, size_t count) |
224 | { | 280 | { |
225 | char *end; | 281 | char *end; |
@@ -242,18 +298,18 @@ static ssize_t store_threshold_limit(struct threshold_bank *b, | |||
242 | return end - buf; | 298 | return end - buf; |
243 | } | 299 | } |
244 | 300 | ||
245 | static ssize_t show_error_count(struct threshold_bank *b, char *buf) | 301 | static ssize_t show_error_count(struct threshold_block *b, char *buf) |
246 | { | 302 | { |
247 | u32 high, low; | 303 | u32 high, low; |
248 | cpumask_t oldmask; | 304 | cpumask_t oldmask; |
249 | oldmask = affinity_set(b->cpu); | 305 | oldmask = affinity_set(b->cpu); |
250 | rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, low, high); /* ignore low 32 */ | 306 | rdmsr(b->address, low, high); |
251 | affinity_restore(oldmask); | 307 | affinity_restore(oldmask); |
252 | return sprintf(buf, "%x\n", | 308 | return sprintf(buf, "%x\n", |
253 | (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); | 309 | (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); |
254 | } | 310 | } |
255 | 311 | ||
256 | static ssize_t store_error_count(struct threshold_bank *b, | 312 | static ssize_t store_error_count(struct threshold_block *b, |
257 | const char *buf, size_t count) | 313 | const char *buf, size_t count) |
258 | { | 314 | { |
259 | cpumask_t oldmask; | 315 | cpumask_t oldmask; |
@@ -269,13 +325,13 @@ static ssize_t store_error_count(struct threshold_bank *b, | |||
269 | .store = _store, \ | 325 | .store = _store, \ |
270 | }; | 326 | }; |
271 | 327 | ||
272 | #define ATTR_FIELDS(name) \ | 328 | #define RW_ATTR(name) \ |
273 | static struct threshold_attr name = \ | 329 | static struct threshold_attr name = \ |
274 | THRESHOLD_ATTR(name, 0644, show_## name, store_## name) | 330 | THRESHOLD_ATTR(name, 0644, show_## name, store_## name) |
275 | 331 | ||
276 | ATTR_FIELDS(interrupt_enable); | 332 | RW_ATTR(interrupt_enable); |
277 | ATTR_FIELDS(threshold_limit); | 333 | RW_ATTR(threshold_limit); |
278 | ATTR_FIELDS(error_count); | 334 | RW_ATTR(error_count); |
279 | 335 | ||
280 | static struct attribute *default_attrs[] = { | 336 | static struct attribute *default_attrs[] = { |
281 | &interrupt_enable.attr, | 337 | &interrupt_enable.attr, |
@@ -284,12 +340,12 @@ static struct attribute *default_attrs[] = { | |||
284 | NULL | 340 | NULL |
285 | }; | 341 | }; |
286 | 342 | ||
287 | #define to_bank(k) container_of(k,struct threshold_bank,kobj) | 343 | #define to_block(k) container_of(k, struct threshold_block, kobj) |
288 | #define to_attr(a) container_of(a,struct threshold_attr,attr) | 344 | #define to_attr(a) container_of(a, struct threshold_attr, attr) |
289 | 345 | ||
290 | static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) | 346 | static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) |
291 | { | 347 | { |
292 | struct threshold_bank *b = to_bank(kobj); | 348 | struct threshold_block *b = to_block(kobj); |
293 | struct threshold_attr *a = to_attr(attr); | 349 | struct threshold_attr *a = to_attr(attr); |
294 | ssize_t ret; | 350 | ssize_t ret; |
295 | ret = a->show ? a->show(b, buf) : -EIO; | 351 | ret = a->show ? a->show(b, buf) : -EIO; |
@@ -299,7 +355,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) | |||
299 | static ssize_t store(struct kobject *kobj, struct attribute *attr, | 355 | static ssize_t store(struct kobject *kobj, struct attribute *attr, |
300 | const char *buf, size_t count) | 356 | const char *buf, size_t count) |
301 | { | 357 | { |
302 | struct threshold_bank *b = to_bank(kobj); | 358 | struct threshold_block *b = to_block(kobj); |
303 | struct threshold_attr *a = to_attr(attr); | 359 | struct threshold_attr *a = to_attr(attr); |
304 | ssize_t ret; | 360 | ssize_t ret; |
305 | ret = a->store ? a->store(b, buf, count) : -EIO; | 361 | ret = a->store ? a->store(b, buf, count) : -EIO; |
@@ -316,69 +372,174 @@ static struct kobj_type threshold_ktype = { | |||
316 | .default_attrs = default_attrs, | 372 | .default_attrs = default_attrs, |
317 | }; | 373 | }; |
318 | 374 | ||
375 | static __cpuinit int allocate_threshold_blocks(unsigned int cpu, | ||
376 | unsigned int bank, | ||
377 | unsigned int block, | ||
378 | u32 address) | ||
379 | { | ||
380 | int err; | ||
381 | u32 low, high; | ||
382 | struct threshold_block *b = NULL; | ||
383 | |||
384 | if ((bank >= NR_BANKS) || (block >= NR_BLOCKS)) | ||
385 | return 0; | ||
386 | |||
387 | if (rdmsr_safe(address, &low, &high)) | ||
388 | goto recurse; | ||
389 | |||
390 | if (!(high & MASK_VALID_HI)) { | ||
391 | if (block) | ||
392 | goto recurse; | ||
393 | else | ||
394 | return 0; | ||
395 | } | ||
396 | |||
397 | if (!(high & MASK_VALID_HI >> 1) || | ||
398 | (high & MASK_VALID_HI >> 2)) | ||
399 | goto recurse; | ||
400 | |||
401 | b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL); | ||
402 | if (!b) | ||
403 | return -ENOMEM; | ||
404 | memset(b, 0, sizeof(struct threshold_block)); | ||
405 | |||
406 | b->block = block; | ||
407 | b->bank = bank; | ||
408 | b->cpu = cpu; | ||
409 | b->address = address; | ||
410 | b->interrupt_enable = 0; | ||
411 | b->threshold_limit = THRESHOLD_MAX; | ||
412 | |||
413 | INIT_LIST_HEAD(&b->miscj); | ||
414 | |||
415 | if (per_cpu(threshold_banks, cpu)[bank]->blocks) | ||
416 | list_add(&b->miscj, | ||
417 | &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); | ||
418 | else | ||
419 | per_cpu(threshold_banks, cpu)[bank]->blocks = b; | ||
420 | |||
421 | kobject_set_name(&b->kobj, "misc%i", block); | ||
422 | b->kobj.parent = &per_cpu(threshold_banks, cpu)[bank]->kobj; | ||
423 | b->kobj.ktype = &threshold_ktype; | ||
424 | err = kobject_register(&b->kobj); | ||
425 | if (err) | ||
426 | goto out_free; | ||
427 | recurse: | ||
428 | if (!block) { | ||
429 | address = (low & MASK_BLKPTR_LO) >> 21; | ||
430 | if (!address) | ||
431 | return 0; | ||
432 | address += MCG_XBLK_ADDR; | ||
433 | } else | ||
434 | ++address; | ||
435 | |||
436 | err = allocate_threshold_blocks(cpu, bank, ++block, address); | ||
437 | if (err) | ||
438 | goto out_free; | ||
439 | |||
440 | return err; | ||
441 | |||
442 | out_free: | ||
443 | if (b) { | ||
444 | kobject_unregister(&b->kobj); | ||
445 | kfree(b); | ||
446 | } | ||
447 | return err; | ||
448 | } | ||
449 | |||
319 | /* symlinks sibling shared banks to first core. first core owns dir/files. */ | 450 | /* symlinks sibling shared banks to first core. first core owns dir/files. */ |
320 | static __cpuinit int threshold_create_bank(unsigned int cpu, int bank) | 451 | static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) |
321 | { | 452 | { |
322 | int err = 0; | 453 | int i, err = 0; |
323 | struct threshold_bank *b = NULL; | 454 | struct threshold_bank *b = NULL; |
455 | cpumask_t oldmask = CPU_MASK_NONE; | ||
456 | char name[32]; | ||
457 | |||
458 | sprintf(name, "threshold_bank%i", bank); | ||
324 | 459 | ||
325 | #ifdef CONFIG_SMP | 460 | #ifdef CONFIG_SMP |
326 | if (cpu_core_id[cpu] && shared_bank[bank]) { /* symlink */ | 461 | if (cpu_data[cpu].cpu_core_id && shared_bank[bank]) { /* symlink */ |
327 | char name[16]; | 462 | i = first_cpu(cpu_core_map[cpu]); |
328 | unsigned lcpu = first_cpu(cpu_core_map[cpu]); | 463 | |
329 | if (cpu_core_id[lcpu]) | 464 | /* first core not up yet */ |
330 | goto out; /* first core not up yet */ | 465 | if (cpu_data[i].cpu_core_id) |
466 | goto out; | ||
467 | |||
468 | /* already linked */ | ||
469 | if (per_cpu(threshold_banks, cpu)[bank]) | ||
470 | goto out; | ||
471 | |||
472 | b = per_cpu(threshold_banks, i)[bank]; | ||
331 | 473 | ||
332 | b = per_cpu(threshold_banks, lcpu)[bank]; | ||
333 | if (!b) | 474 | if (!b) |
334 | goto out; | 475 | goto out; |
335 | sprintf(name, "bank%i", bank); | 476 | |
336 | err = sysfs_create_link(&per_cpu(device_threshold, cpu).kobj, | 477 | err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj, |
337 | &b->kobj, name); | 478 | &b->kobj, name); |
338 | if (err) | 479 | if (err) |
339 | goto out; | 480 | goto out; |
481 | |||
482 | b->cpus = cpu_core_map[cpu]; | ||
340 | per_cpu(threshold_banks, cpu)[bank] = b; | 483 | per_cpu(threshold_banks, cpu)[bank] = b; |
341 | goto out; | 484 | goto out; |
342 | } | 485 | } |
343 | #endif | 486 | #endif |
344 | 487 | ||
345 | b = kmalloc(sizeof(struct threshold_bank), GFP_KERNEL); | 488 | b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); |
346 | if (!b) { | 489 | if (!b) { |
347 | err = -ENOMEM; | 490 | err = -ENOMEM; |
348 | goto out; | 491 | goto out; |
349 | } | 492 | } |
350 | memset(b, 0, sizeof(struct threshold_bank)); | 493 | memset(b, 0, sizeof(struct threshold_bank)); |
351 | 494 | ||
352 | b->cpu = cpu; | 495 | kobject_set_name(&b->kobj, "threshold_bank%i", bank); |
353 | b->bank = bank; | 496 | b->kobj.parent = &per_cpu(device_mce, cpu).kobj; |
354 | b->interrupt_enable = 0; | 497 | #ifndef CONFIG_SMP |
355 | b->threshold_limit = THRESHOLD_MAX; | 498 | b->cpus = CPU_MASK_ALL; |
356 | kobject_set_name(&b->kobj, "bank%i", bank); | 499 | #else |
357 | b->kobj.parent = &per_cpu(device_threshold, cpu).kobj; | 500 | b->cpus = cpu_core_map[cpu]; |
358 | b->kobj.ktype = &threshold_ktype; | 501 | #endif |
359 | |||
360 | err = kobject_register(&b->kobj); | 502 | err = kobject_register(&b->kobj); |
361 | if (err) { | 503 | if (err) |
362 | kfree(b); | 504 | goto out_free; |
363 | goto out; | 505 | |
364 | } | ||
365 | per_cpu(threshold_banks, cpu)[bank] = b; | 506 | per_cpu(threshold_banks, cpu)[bank] = b; |
366 | out: | 507 | |
508 | oldmask = affinity_set(cpu); | ||
509 | err = allocate_threshold_blocks(cpu, bank, 0, | ||
510 | MSR_IA32_MC0_MISC + bank * 4); | ||
511 | affinity_restore(oldmask); | ||
512 | |||
513 | if (err) | ||
514 | goto out_free; | ||
515 | |||
516 | for_each_cpu_mask(i, b->cpus) { | ||
517 | if (i == cpu) | ||
518 | continue; | ||
519 | |||
520 | err = sysfs_create_link(&per_cpu(device_mce, i).kobj, | ||
521 | &b->kobj, name); | ||
522 | if (err) | ||
523 | goto out; | ||
524 | |||
525 | per_cpu(threshold_banks, i)[bank] = b; | ||
526 | } | ||
527 | |||
528 | goto out; | ||
529 | |||
530 | out_free: | ||
531 | per_cpu(threshold_banks, cpu)[bank] = NULL; | ||
532 | kfree(b); | ||
533 | out: | ||
367 | return err; | 534 | return err; |
368 | } | 535 | } |
369 | 536 | ||
370 | /* create dir/files for all valid threshold banks */ | 537 | /* create dir/files for all valid threshold banks */ |
371 | static __cpuinit int threshold_create_device(unsigned int cpu) | 538 | static __cpuinit int threshold_create_device(unsigned int cpu) |
372 | { | 539 | { |
373 | int bank; | 540 | unsigned int bank; |
374 | int err = 0; | 541 | int err = 0; |
375 | 542 | ||
376 | per_cpu(device_threshold, cpu).id = cpu; | ||
377 | per_cpu(device_threshold, cpu).cls = &threshold_sysclass; | ||
378 | err = sysdev_register(&per_cpu(device_threshold, cpu)); | ||
379 | if (err) | ||
380 | goto out; | ||
381 | |||
382 | for (bank = 0; bank < NR_BANKS; ++bank) { | 543 | for (bank = 0; bank < NR_BANKS; ++bank) { |
383 | if (!(per_cpu(bank_map, cpu) & 1 << bank)) | 544 | if (!(per_cpu(bank_map, cpu) & 1 << bank)) |
384 | continue; | 545 | continue; |
@@ -386,7 +547,7 @@ static __cpuinit int threshold_create_device(unsigned int cpu) | |||
386 | if (err) | 547 | if (err) |
387 | goto out; | 548 | goto out; |
388 | } | 549 | } |
389 | out: | 550 | out: |
390 | return err; | 551 | return err; |
391 | } | 552 | } |
392 | 553 | ||
@@ -397,92 +558,85 @@ static __cpuinit int threshold_create_device(unsigned int cpu) | |||
397 | * of shared sysfs dir/files, and rest of the cores will be symlinked to it. | 558 | * of shared sysfs dir/files, and rest of the cores will be symlinked to it. |
398 | */ | 559 | */ |
399 | 560 | ||
400 | /* cpu hotplug call removes all symlinks before first core dies */ | 561 | static __cpuinit void deallocate_threshold_block(unsigned int cpu, |
562 | unsigned int bank) | ||
563 | { | ||
564 | struct threshold_block *pos = NULL; | ||
565 | struct threshold_block *tmp = NULL; | ||
566 | struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank]; | ||
567 | |||
568 | if (!head) | ||
569 | return; | ||
570 | |||
571 | list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) { | ||
572 | kobject_unregister(&pos->kobj); | ||
573 | list_del(&pos->miscj); | ||
574 | kfree(pos); | ||
575 | } | ||
576 | |||
577 | kfree(per_cpu(threshold_banks, cpu)[bank]->blocks); | ||
578 | per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; | ||
579 | } | ||
580 | |||
401 | static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank) | 581 | static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank) |
402 | { | 582 | { |
583 | int i = 0; | ||
403 | struct threshold_bank *b; | 584 | struct threshold_bank *b; |
404 | char name[16]; | 585 | char name[32]; |
405 | 586 | ||
406 | b = per_cpu(threshold_banks, cpu)[bank]; | 587 | b = per_cpu(threshold_banks, cpu)[bank]; |
588 | |||
407 | if (!b) | 589 | if (!b) |
408 | return; | 590 | return; |
409 | if (shared_bank[bank] && atomic_read(&b->kobj.kref.refcount) > 2) { | 591 | |
410 | sprintf(name, "bank%i", bank); | 592 | if (!b->blocks) |
411 | sysfs_remove_link(&per_cpu(device_threshold, cpu).kobj, name); | 593 | goto free_out; |
412 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 594 | |
413 | } else { | 595 | sprintf(name, "threshold_bank%i", bank); |
414 | kobject_unregister(&b->kobj); | 596 | |
415 | kfree(per_cpu(threshold_banks, cpu)[bank]); | 597 | /* sibling symlink */ |
598 | if (shared_bank[bank] && b->blocks->cpu != cpu) { | ||
599 | sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name); | ||
600 | per_cpu(threshold_banks, i)[bank] = NULL; | ||
601 | return; | ||
602 | } | ||
603 | |||
604 | /* remove all sibling symlinks before unregistering */ | ||
605 | for_each_cpu_mask(i, b->cpus) { | ||
606 | if (i == cpu) | ||
607 | continue; | ||
608 | |||
609 | sysfs_remove_link(&per_cpu(device_mce, i).kobj, name); | ||
610 | per_cpu(threshold_banks, i)[bank] = NULL; | ||
416 | } | 611 | } |
612 | |||
613 | deallocate_threshold_block(cpu, bank); | ||
614 | |||
615 | free_out: | ||
616 | kobject_unregister(&b->kobj); | ||
617 | kfree(b); | ||
618 | per_cpu(threshold_banks, cpu)[bank] = NULL; | ||
417 | } | 619 | } |
418 | 620 | ||
419 | static __cpuinit void threshold_remove_device(unsigned int cpu) | 621 | static __cpuinit void threshold_remove_device(unsigned int cpu) |
420 | { | 622 | { |
421 | int bank; | 623 | unsigned int bank; |
422 | 624 | ||
423 | for (bank = 0; bank < NR_BANKS; ++bank) { | 625 | for (bank = 0; bank < NR_BANKS; ++bank) { |
424 | if (!(per_cpu(bank_map, cpu) & 1 << bank)) | 626 | if (!(per_cpu(bank_map, cpu) & 1 << bank)) |
425 | continue; | 627 | continue; |
426 | threshold_remove_bank(cpu, bank); | 628 | threshold_remove_bank(cpu, bank); |
427 | } | 629 | } |
428 | sysdev_unregister(&per_cpu(device_threshold, cpu)); | ||
429 | } | 630 | } |
430 | 631 | ||
431 | /* link all existing siblings when first core comes up */ | ||
432 | static __cpuinit int threshold_create_symlinks(unsigned int cpu) | ||
433 | { | ||
434 | int bank, err = 0; | ||
435 | unsigned int lcpu = 0; | ||
436 | |||
437 | if (cpu_core_id[cpu]) | ||
438 | return 0; | ||
439 | for_each_cpu_mask(lcpu, cpu_core_map[cpu]) { | ||
440 | if (lcpu == cpu) | ||
441 | continue; | ||
442 | for (bank = 0; bank < NR_BANKS; ++bank) { | ||
443 | if (!(per_cpu(bank_map, cpu) & 1 << bank)) | ||
444 | continue; | ||
445 | if (!shared_bank[bank]) | ||
446 | continue; | ||
447 | err = threshold_create_bank(lcpu, bank); | ||
448 | } | ||
449 | } | ||
450 | return err; | ||
451 | } | ||
452 | |||
453 | /* remove all symlinks before first core dies. */ | ||
454 | static __cpuinit void threshold_remove_symlinks(unsigned int cpu) | ||
455 | { | ||
456 | int bank; | ||
457 | unsigned int lcpu = 0; | ||
458 | if (cpu_core_id[cpu]) | ||
459 | return; | ||
460 | for_each_cpu_mask(lcpu, cpu_core_map[cpu]) { | ||
461 | if (lcpu == cpu) | ||
462 | continue; | ||
463 | for (bank = 0; bank < NR_BANKS; ++bank) { | ||
464 | if (!(per_cpu(bank_map, cpu) & 1 << bank)) | ||
465 | continue; | ||
466 | if (!shared_bank[bank]) | ||
467 | continue; | ||
468 | threshold_remove_bank(lcpu, bank); | ||
469 | } | ||
470 | } | ||
471 | } | ||
472 | #else /* !CONFIG_HOTPLUG_CPU */ | 632 | #else /* !CONFIG_HOTPLUG_CPU */ |
473 | static __cpuinit void threshold_create_symlinks(unsigned int cpu) | ||
474 | { | ||
475 | } | ||
476 | static __cpuinit void threshold_remove_symlinks(unsigned int cpu) | ||
477 | { | ||
478 | } | ||
479 | static void threshold_remove_device(unsigned int cpu) | 633 | static void threshold_remove_device(unsigned int cpu) |
480 | { | 634 | { |
481 | } | 635 | } |
482 | #endif | 636 | #endif |
483 | 637 | ||
484 | /* get notified when a cpu comes on/off */ | 638 | /* get notified when a cpu comes on/off */ |
485 | static int threshold_cpu_callback(struct notifier_block *nfb, | 639 | static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, |
486 | unsigned long action, void *hcpu) | 640 | unsigned long action, void *hcpu) |
487 | { | 641 | { |
488 | /* cpu was unsigned int to begin with */ | 642 | /* cpu was unsigned int to begin with */ |
@@ -494,13 +648,6 @@ static int threshold_cpu_callback(struct notifier_block *nfb, | |||
494 | switch (action) { | 648 | switch (action) { |
495 | case CPU_ONLINE: | 649 | case CPU_ONLINE: |
496 | threshold_create_device(cpu); | 650 | threshold_create_device(cpu); |
497 | threshold_create_symlinks(cpu); | ||
498 | break; | ||
499 | case CPU_DOWN_PREPARE: | ||
500 | threshold_remove_symlinks(cpu); | ||
501 | break; | ||
502 | case CPU_DOWN_FAILED: | ||
503 | threshold_create_symlinks(cpu); | ||
504 | break; | 651 | break; |
505 | case CPU_DEAD: | 652 | case CPU_DEAD: |
506 | threshold_remove_device(cpu); | 653 | threshold_remove_device(cpu); |
@@ -512,29 +659,22 @@ static int threshold_cpu_callback(struct notifier_block *nfb, | |||
512 | return NOTIFY_OK; | 659 | return NOTIFY_OK; |
513 | } | 660 | } |
514 | 661 | ||
515 | static struct notifier_block threshold_cpu_notifier = { | 662 | static struct notifier_block threshold_cpu_notifier __cpuinitdata = { |
516 | .notifier_call = threshold_cpu_callback, | 663 | .notifier_call = threshold_cpu_callback, |
517 | }; | 664 | }; |
518 | 665 | ||
519 | static __init int threshold_init_device(void) | 666 | static __init int threshold_init_device(void) |
520 | { | 667 | { |
521 | int err; | 668 | unsigned lcpu = 0; |
522 | int lcpu = 0; | ||
523 | |||
524 | err = sysdev_class_register(&threshold_sysclass); | ||
525 | if (err) | ||
526 | goto out; | ||
527 | 669 | ||
528 | /* to hit CPUs online before the notifier is up */ | 670 | /* to hit CPUs online before the notifier is up */ |
529 | for_each_online_cpu(lcpu) { | 671 | for_each_online_cpu(lcpu) { |
530 | err = threshold_create_device(lcpu); | 672 | int err = threshold_create_device(lcpu); |
531 | if (err) | 673 | if (err) |
532 | goto out; | 674 | return err; |
533 | } | 675 | } |
534 | register_cpu_notifier(&threshold_cpu_notifier); | 676 | register_cpu_notifier(&threshold_cpu_notifier); |
535 | 677 | return 0; | |
536 | out: | ||
537 | return err; | ||
538 | } | 678 | } |
539 | 679 | ||
540 | device_initcall(threshold_init_device); | 680 | device_initcall(threshold_init_device); |
diff --git a/arch/x86_64/kernel/module.c b/arch/x86_64/kernel/module.c index bac195c74bcc..9d0958ff547f 100644 --- a/arch/x86_64/kernel/module.c +++ b/arch/x86_64/kernel/module.c | |||
@@ -145,26 +145,38 @@ int apply_relocate(Elf_Shdr *sechdrs, | |||
145 | return -ENOSYS; | 145 | return -ENOSYS; |
146 | } | 146 | } |
147 | 147 | ||
148 | extern void apply_alternatives(void *start, void *end); | ||
149 | |||
150 | int module_finalize(const Elf_Ehdr *hdr, | 148 | int module_finalize(const Elf_Ehdr *hdr, |
151 | const Elf_Shdr *sechdrs, | 149 | const Elf_Shdr *sechdrs, |
152 | struct module *me) | 150 | struct module *me) |
153 | { | 151 | { |
154 | const Elf_Shdr *s; | 152 | const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL; |
155 | char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; | 153 | char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; |
156 | 154 | ||
157 | /* look for .altinstructions to patch */ | 155 | for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { |
158 | for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { | 156 | if (!strcmp(".text", secstrings + s->sh_name)) |
159 | void *seg; | 157 | text = s; |
160 | if (strcmp(".altinstructions", secstrings + s->sh_name)) | 158 | if (!strcmp(".altinstructions", secstrings + s->sh_name)) |
161 | continue; | 159 | alt = s; |
162 | seg = (void *)s->sh_addr; | 160 | if (!strcmp(".smp_locks", secstrings + s->sh_name)) |
163 | apply_alternatives(seg, seg + s->sh_size); | 161 | locks= s; |
164 | } | 162 | } |
163 | |||
164 | if (alt) { | ||
165 | /* patch .altinstructions */ | ||
166 | void *aseg = (void *)alt->sh_addr; | ||
167 | apply_alternatives(aseg, aseg + alt->sh_size); | ||
168 | } | ||
169 | if (locks && text) { | ||
170 | void *lseg = (void *)locks->sh_addr; | ||
171 | void *tseg = (void *)text->sh_addr; | ||
172 | alternatives_smp_module_add(me, me->name, | ||
173 | lseg, lseg + locks->sh_size, | ||
174 | tseg, tseg + text->sh_size); | ||
175 | } | ||
165 | return 0; | 176 | return 0; |
166 | } | 177 | } |
167 | 178 | ||
168 | void module_arch_cleanup(struct module *mod) | 179 | void module_arch_cleanup(struct module *mod) |
169 | { | 180 | { |
181 | alternatives_smp_module_del(mod); | ||
170 | } | 182 | } |
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 4e6357fe0ec3..399489c93132 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c | |||
@@ -15,11 +15,7 @@ | |||
15 | #include <linux/config.h> | 15 | #include <linux/config.h> |
16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
17 | #include <linux/delay.h> | 17 | #include <linux/delay.h> |
18 | #include <linux/bootmem.h> | ||
19 | #include <linux/smp_lock.h> | ||
20 | #include <linux/interrupt.h> | 18 | #include <linux/interrupt.h> |
21 | #include <linux/mc146818rtc.h> | ||
22 | #include <linux/kernel_stat.h> | ||
23 | #include <linux/module.h> | 19 | #include <linux/module.h> |
24 | #include <linux/sysdev.h> | 20 | #include <linux/sysdev.h> |
25 | #include <linux/nmi.h> | 21 | #include <linux/nmi.h> |
@@ -27,14 +23,11 @@ | |||
27 | #include <linux/kprobes.h> | 23 | #include <linux/kprobes.h> |
28 | 24 | ||
29 | #include <asm/smp.h> | 25 | #include <asm/smp.h> |
30 | #include <asm/mtrr.h> | ||
31 | #include <asm/mpspec.h> | ||
32 | #include <asm/nmi.h> | 26 | #include <asm/nmi.h> |
33 | #include <asm/msr.h> | ||
34 | #include <asm/proto.h> | 27 | #include <asm/proto.h> |
35 | #include <asm/kdebug.h> | 28 | #include <asm/kdebug.h> |
36 | #include <asm/local.h> | ||
37 | #include <asm/mce.h> | 29 | #include <asm/mce.h> |
30 | #include <asm/intel_arch_perfmon.h> | ||
38 | 31 | ||
39 | /* | 32 | /* |
40 | * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: | 33 | * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: |
@@ -74,6 +67,9 @@ static unsigned int nmi_p4_cccr_val; | |||
74 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | 67 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 |
75 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | 68 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING |
76 | 69 | ||
70 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | ||
71 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | ||
72 | |||
77 | #define MSR_P4_MISC_ENABLE 0x1A0 | 73 | #define MSR_P4_MISC_ENABLE 0x1A0 |
78 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) | 74 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) |
79 | #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) | 75 | #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) |
@@ -105,7 +101,10 @@ static __cpuinit inline int nmi_known_cpu(void) | |||
105 | case X86_VENDOR_AMD: | 101 | case X86_VENDOR_AMD: |
106 | return boot_cpu_data.x86 == 15; | 102 | return boot_cpu_data.x86 == 15; |
107 | case X86_VENDOR_INTEL: | 103 | case X86_VENDOR_INTEL: |
108 | return boot_cpu_data.x86 == 15; | 104 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) |
105 | return 1; | ||
106 | else | ||
107 | return (boot_cpu_data.x86 == 15); | ||
109 | } | 108 | } |
110 | return 0; | 109 | return 0; |
111 | } | 110 | } |
@@ -211,6 +210,8 @@ int __init setup_nmi_watchdog(char *str) | |||
211 | 210 | ||
212 | __setup("nmi_watchdog=", setup_nmi_watchdog); | 211 | __setup("nmi_watchdog=", setup_nmi_watchdog); |
213 | 212 | ||
213 | static void disable_intel_arch_watchdog(void); | ||
214 | |||
214 | static void disable_lapic_nmi_watchdog(void) | 215 | static void disable_lapic_nmi_watchdog(void) |
215 | { | 216 | { |
216 | if (nmi_active <= 0) | 217 | if (nmi_active <= 0) |
@@ -223,6 +224,8 @@ static void disable_lapic_nmi_watchdog(void) | |||
223 | if (boot_cpu_data.x86 == 15) { | 224 | if (boot_cpu_data.x86 == 15) { |
224 | wrmsr(MSR_P4_IQ_CCCR0, 0, 0); | 225 | wrmsr(MSR_P4_IQ_CCCR0, 0, 0); |
225 | wrmsr(MSR_P4_CRU_ESCR0, 0, 0); | 226 | wrmsr(MSR_P4_CRU_ESCR0, 0, 0); |
227 | } else if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
228 | disable_intel_arch_watchdog(); | ||
226 | } | 229 | } |
227 | break; | 230 | break; |
228 | } | 231 | } |
@@ -375,6 +378,53 @@ static void setup_k7_watchdog(void) | |||
375 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | 378 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); |
376 | } | 379 | } |
377 | 380 | ||
381 | static void disable_intel_arch_watchdog(void) | ||
382 | { | ||
383 | unsigned ebx; | ||
384 | |||
385 | /* | ||
386 | * Check whether the Architectural PerfMon supports | ||
387 | * Unhalted Core Cycles Event or not. | ||
388 | * NOTE: Corresponding bit = 0 in ebp indicates event present. | ||
389 | */ | ||
390 | ebx = cpuid_ebx(10); | ||
391 | if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
392 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0); | ||
393 | } | ||
394 | |||
395 | static int setup_intel_arch_watchdog(void) | ||
396 | { | ||
397 | unsigned int evntsel; | ||
398 | unsigned ebx; | ||
399 | |||
400 | /* | ||
401 | * Check whether the Architectural PerfMon supports | ||
402 | * Unhalted Core Cycles Event or not. | ||
403 | * NOTE: Corresponding bit = 0 in ebp indicates event present. | ||
404 | */ | ||
405 | ebx = cpuid_ebx(10); | ||
406 | if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
407 | return 0; | ||
408 | |||
409 | nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; | ||
410 | |||
411 | clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2); | ||
412 | clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2); | ||
413 | |||
414 | evntsel = ARCH_PERFMON_EVENTSEL_INT | ||
415 | | ARCH_PERFMON_EVENTSEL_OS | ||
416 | | ARCH_PERFMON_EVENTSEL_USR | ||
417 | | ARCH_PERFMON_NMI_EVENT_SEL | ||
418 | | ARCH_PERFMON_NMI_EVENT_UMASK; | ||
419 | |||
420 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); | ||
421 | wrmsrl(MSR_ARCH_PERFMON_PERFCTR0, -((u64)cpu_khz * 1000 / nmi_hz)); | ||
422 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
423 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
424 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); | ||
425 | return 1; | ||
426 | } | ||
427 | |||
378 | 428 | ||
379 | static int setup_p4_watchdog(void) | 429 | static int setup_p4_watchdog(void) |
380 | { | 430 | { |
@@ -428,10 +478,16 @@ void setup_apic_nmi_watchdog(void) | |||
428 | setup_k7_watchdog(); | 478 | setup_k7_watchdog(); |
429 | break; | 479 | break; |
430 | case X86_VENDOR_INTEL: | 480 | case X86_VENDOR_INTEL: |
431 | if (boot_cpu_data.x86 != 15) | 481 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
432 | return; | 482 | if (!setup_intel_arch_watchdog()) |
433 | if (!setup_p4_watchdog()) | 483 | return; |
484 | } else if (boot_cpu_data.x86 == 15) { | ||
485 | if (!setup_p4_watchdog()) | ||
486 | return; | ||
487 | } else { | ||
434 | return; | 488 | return; |
489 | } | ||
490 | |||
435 | break; | 491 | break; |
436 | 492 | ||
437 | default: | 493 | default: |
@@ -516,7 +572,14 @@ void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
516 | */ | 572 | */ |
517 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | 573 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); |
518 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 574 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
519 | } | 575 | } else if (nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { |
576 | /* | ||
577 | * For Intel based architectural perfmon | ||
578 | * - LVTPC is masked on interrupt and must be | ||
579 | * unmasked by the LVTPC handler. | ||
580 | */ | ||
581 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
582 | } | ||
520 | wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); | 583 | wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); |
521 | } | 584 | } |
522 | } | 585 | } |
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c new file mode 100644 index 000000000000..d91cb843f54d --- /dev/null +++ b/arch/x86_64/kernel/pci-calgary.c | |||
@@ -0,0 +1,1018 @@ | |||
1 | /* | ||
2 | * Derived from arch/powerpc/kernel/iommu.c | ||
3 | * | ||
4 | * Copyright (C) 2006 Jon Mason <jdmason@us.ibm.com>, IBM Corporation | ||
5 | * Copyright (C) 2006 Muli Ben-Yehuda <muli@il.ibm.com>, IBM Corporation | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | |||
22 | #include <linux/config.h> | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/init.h> | ||
25 | #include <linux/types.h> | ||
26 | #include <linux/slab.h> | ||
27 | #include <linux/mm.h> | ||
28 | #include <linux/spinlock.h> | ||
29 | #include <linux/string.h> | ||
30 | #include <linux/dma-mapping.h> | ||
31 | #include <linux/init.h> | ||
32 | #include <linux/bitops.h> | ||
33 | #include <linux/pci_ids.h> | ||
34 | #include <linux/pci.h> | ||
35 | #include <linux/delay.h> | ||
36 | #include <asm/proto.h> | ||
37 | #include <asm/calgary.h> | ||
38 | #include <asm/tce.h> | ||
39 | #include <asm/pci-direct.h> | ||
40 | #include <asm/system.h> | ||
41 | #include <asm/dma.h> | ||
42 | |||
43 | #define PCI_DEVICE_ID_IBM_CALGARY 0x02a1 | ||
44 | #define PCI_VENDOR_DEVICE_ID_CALGARY \ | ||
45 | (PCI_VENDOR_ID_IBM | PCI_DEVICE_ID_IBM_CALGARY << 16) | ||
46 | |||
47 | /* we need these for register space address calculation */ | ||
48 | #define START_ADDRESS 0xfe000000 | ||
49 | #define CHASSIS_BASE 0 | ||
50 | #define ONE_BASED_CHASSIS_NUM 1 | ||
51 | |||
52 | /* register offsets inside the host bridge space */ | ||
53 | #define PHB_CSR_OFFSET 0x0110 | ||
54 | #define PHB_PLSSR_OFFSET 0x0120 | ||
55 | #define PHB_CONFIG_RW_OFFSET 0x0160 | ||
56 | #define PHB_IOBASE_BAR_LOW 0x0170 | ||
57 | #define PHB_IOBASE_BAR_HIGH 0x0180 | ||
58 | #define PHB_MEM_1_LOW 0x0190 | ||
59 | #define PHB_MEM_1_HIGH 0x01A0 | ||
60 | #define PHB_IO_ADDR_SIZE 0x01B0 | ||
61 | #define PHB_MEM_1_SIZE 0x01C0 | ||
62 | #define PHB_MEM_ST_OFFSET 0x01D0 | ||
63 | #define PHB_AER_OFFSET 0x0200 | ||
64 | #define PHB_CONFIG_0_HIGH 0x0220 | ||
65 | #define PHB_CONFIG_0_LOW 0x0230 | ||
66 | #define PHB_CONFIG_0_END 0x0240 | ||
67 | #define PHB_MEM_2_LOW 0x02B0 | ||
68 | #define PHB_MEM_2_HIGH 0x02C0 | ||
69 | #define PHB_MEM_2_SIZE_HIGH 0x02D0 | ||
70 | #define PHB_MEM_2_SIZE_LOW 0x02E0 | ||
71 | #define PHB_DOSHOLE_OFFSET 0x08E0 | ||
72 | |||
73 | /* PHB_CONFIG_RW */ | ||
74 | #define PHB_TCE_ENABLE 0x20000000 | ||
75 | #define PHB_SLOT_DISABLE 0x1C000000 | ||
76 | #define PHB_DAC_DISABLE 0x01000000 | ||
77 | #define PHB_MEM2_ENABLE 0x00400000 | ||
78 | #define PHB_MCSR_ENABLE 0x00100000 | ||
79 | /* TAR (Table Address Register) */ | ||
80 | #define TAR_SW_BITS 0x0000ffffffff800fUL | ||
81 | #define TAR_VALID 0x0000000000000008UL | ||
82 | /* CSR (Channel/DMA Status Register) */ | ||
83 | #define CSR_AGENT_MASK 0xffe0ffff | ||
84 | |||
85 | #define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */ | ||
86 | #define MAX_PHB_BUS_NUM (MAX_NUM_OF_PHBS * 2) /* max dev->bus->number */ | ||
87 | #define PHBS_PER_CALGARY 4 | ||
88 | |||
89 | /* register offsets in Calgary's internal register space */ | ||
90 | static const unsigned long tar_offsets[] = { | ||
91 | 0x0580 /* TAR0 */, | ||
92 | 0x0588 /* TAR1 */, | ||
93 | 0x0590 /* TAR2 */, | ||
94 | 0x0598 /* TAR3 */ | ||
95 | }; | ||
96 | |||
97 | static const unsigned long split_queue_offsets[] = { | ||
98 | 0x4870 /* SPLIT QUEUE 0 */, | ||
99 | 0x5870 /* SPLIT QUEUE 1 */, | ||
100 | 0x6870 /* SPLIT QUEUE 2 */, | ||
101 | 0x7870 /* SPLIT QUEUE 3 */ | ||
102 | }; | ||
103 | |||
104 | static const unsigned long phb_offsets[] = { | ||
105 | 0x8000 /* PHB0 */, | ||
106 | 0x9000 /* PHB1 */, | ||
107 | 0xA000 /* PHB2 */, | ||
108 | 0xB000 /* PHB3 */ | ||
109 | }; | ||
110 | |||
111 | void* tce_table_kva[MAX_NUM_OF_PHBS * MAX_NUMNODES]; | ||
112 | unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED; | ||
113 | static int translate_empty_slots __read_mostly = 0; | ||
114 | static int calgary_detected __read_mostly = 0; | ||
115 | |||
116 | /* | ||
117 | * the bitmap of PHBs the user requested that we disable | ||
118 | * translation on. | ||
119 | */ | ||
120 | static DECLARE_BITMAP(translation_disabled, MAX_NUMNODES * MAX_PHB_BUS_NUM); | ||
121 | |||
122 | static void tce_cache_blast(struct iommu_table *tbl); | ||
123 | |||
124 | /* enable this to stress test the chip's TCE cache */ | ||
125 | #ifdef CONFIG_IOMMU_DEBUG | ||
126 | static inline void tce_cache_blast_stress(struct iommu_table *tbl) | ||
127 | { | ||
128 | tce_cache_blast(tbl); | ||
129 | } | ||
130 | #else | ||
131 | static inline void tce_cache_blast_stress(struct iommu_table *tbl) | ||
132 | { | ||
133 | } | ||
134 | #endif /* BLAST_TCE_CACHE_ON_UNMAP */ | ||
135 | |||
136 | static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen) | ||
137 | { | ||
138 | unsigned int npages; | ||
139 | |||
140 | npages = PAGE_ALIGN(dma + dmalen) - (dma & PAGE_MASK); | ||
141 | npages >>= PAGE_SHIFT; | ||
142 | |||
143 | return npages; | ||
144 | } | ||
145 | |||
146 | static inline int translate_phb(struct pci_dev* dev) | ||
147 | { | ||
148 | int disabled = test_bit(dev->bus->number, translation_disabled); | ||
149 | return !disabled; | ||
150 | } | ||
151 | |||
152 | static void iommu_range_reserve(struct iommu_table *tbl, | ||
153 | unsigned long start_addr, unsigned int npages) | ||
154 | { | ||
155 | unsigned long index; | ||
156 | unsigned long end; | ||
157 | |||
158 | index = start_addr >> PAGE_SHIFT; | ||
159 | |||
160 | /* bail out if we're asked to reserve a region we don't cover */ | ||
161 | if (index >= tbl->it_size) | ||
162 | return; | ||
163 | |||
164 | end = index + npages; | ||
165 | if (end > tbl->it_size) /* don't go off the table */ | ||
166 | end = tbl->it_size; | ||
167 | |||
168 | while (index < end) { | ||
169 | if (test_bit(index, tbl->it_map)) | ||
170 | printk(KERN_ERR "Calgary: entry already allocated at " | ||
171 | "0x%lx tbl %p dma 0x%lx npages %u\n", | ||
172 | index, tbl, start_addr, npages); | ||
173 | ++index; | ||
174 | } | ||
175 | set_bit_string(tbl->it_map, start_addr >> PAGE_SHIFT, npages); | ||
176 | } | ||
177 | |||
178 | static unsigned long iommu_range_alloc(struct iommu_table *tbl, | ||
179 | unsigned int npages) | ||
180 | { | ||
181 | unsigned long offset; | ||
182 | |||
183 | BUG_ON(npages == 0); | ||
184 | |||
185 | offset = find_next_zero_string(tbl->it_map, tbl->it_hint, | ||
186 | tbl->it_size, npages); | ||
187 | if (offset == ~0UL) { | ||
188 | tce_cache_blast(tbl); | ||
189 | offset = find_next_zero_string(tbl->it_map, 0, | ||
190 | tbl->it_size, npages); | ||
191 | if (offset == ~0UL) { | ||
192 | printk(KERN_WARNING "Calgary: IOMMU full.\n"); | ||
193 | if (panic_on_overflow) | ||
194 | panic("Calgary: fix the allocator.\n"); | ||
195 | else | ||
196 | return bad_dma_address; | ||
197 | } | ||
198 | } | ||
199 | |||
200 | set_bit_string(tbl->it_map, offset, npages); | ||
201 | tbl->it_hint = offset + npages; | ||
202 | BUG_ON(tbl->it_hint > tbl->it_size); | ||
203 | |||
204 | return offset; | ||
205 | } | ||
206 | |||
207 | static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *vaddr, | ||
208 | unsigned int npages, int direction) | ||
209 | { | ||
210 | unsigned long entry, flags; | ||
211 | dma_addr_t ret = bad_dma_address; | ||
212 | |||
213 | spin_lock_irqsave(&tbl->it_lock, flags); | ||
214 | |||
215 | entry = iommu_range_alloc(tbl, npages); | ||
216 | |||
217 | if (unlikely(entry == bad_dma_address)) | ||
218 | goto error; | ||
219 | |||
220 | /* set the return dma address */ | ||
221 | ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK); | ||
222 | |||
223 | /* put the TCEs in the HW table */ | ||
224 | tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK, | ||
225 | direction); | ||
226 | |||
227 | spin_unlock_irqrestore(&tbl->it_lock, flags); | ||
228 | |||
229 | return ret; | ||
230 | |||
231 | error: | ||
232 | spin_unlock_irqrestore(&tbl->it_lock, flags); | ||
233 | printk(KERN_WARNING "Calgary: failed to allocate %u pages in " | ||
234 | "iommu %p\n", npages, tbl); | ||
235 | return bad_dma_address; | ||
236 | } | ||
237 | |||
238 | static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | ||
239 | unsigned int npages) | ||
240 | { | ||
241 | unsigned long entry; | ||
242 | unsigned long i; | ||
243 | |||
244 | entry = dma_addr >> PAGE_SHIFT; | ||
245 | |||
246 | BUG_ON(entry + npages > tbl->it_size); | ||
247 | |||
248 | tce_free(tbl, entry, npages); | ||
249 | |||
250 | for (i = 0; i < npages; ++i) { | ||
251 | if (!test_bit(entry + i, tbl->it_map)) | ||
252 | printk(KERN_ERR "Calgary: bit is off at 0x%lx " | ||
253 | "tbl %p dma 0x%Lx entry 0x%lx npages %u\n", | ||
254 | entry + i, tbl, dma_addr, entry, npages); | ||
255 | } | ||
256 | |||
257 | __clear_bit_string(tbl->it_map, entry, npages); | ||
258 | |||
259 | tce_cache_blast_stress(tbl); | ||
260 | } | ||
261 | |||
262 | static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | ||
263 | unsigned int npages) | ||
264 | { | ||
265 | unsigned long flags; | ||
266 | |||
267 | spin_lock_irqsave(&tbl->it_lock, flags); | ||
268 | |||
269 | __iommu_free(tbl, dma_addr, npages); | ||
270 | |||
271 | spin_unlock_irqrestore(&tbl->it_lock, flags); | ||
272 | } | ||
273 | |||
274 | static void __calgary_unmap_sg(struct iommu_table *tbl, | ||
275 | struct scatterlist *sglist, int nelems, int direction) | ||
276 | { | ||
277 | while (nelems--) { | ||
278 | unsigned int npages; | ||
279 | dma_addr_t dma = sglist->dma_address; | ||
280 | unsigned int dmalen = sglist->dma_length; | ||
281 | |||
282 | if (dmalen == 0) | ||
283 | break; | ||
284 | |||
285 | npages = num_dma_pages(dma, dmalen); | ||
286 | __iommu_free(tbl, dma, npages); | ||
287 | sglist++; | ||
288 | } | ||
289 | } | ||
290 | |||
291 | void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist, | ||
292 | int nelems, int direction) | ||
293 | { | ||
294 | unsigned long flags; | ||
295 | struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata; | ||
296 | |||
297 | if (!translate_phb(to_pci_dev(dev))) | ||
298 | return; | ||
299 | |||
300 | spin_lock_irqsave(&tbl->it_lock, flags); | ||
301 | |||
302 | __calgary_unmap_sg(tbl, sglist, nelems, direction); | ||
303 | |||
304 | spin_unlock_irqrestore(&tbl->it_lock, flags); | ||
305 | } | ||
306 | |||
307 | static int calgary_nontranslate_map_sg(struct device* dev, | ||
308 | struct scatterlist *sg, int nelems, int direction) | ||
309 | { | ||
310 | int i; | ||
311 | |||
312 | for (i = 0; i < nelems; i++ ) { | ||
313 | struct scatterlist *s = &sg[i]; | ||
314 | BUG_ON(!s->page); | ||
315 | s->dma_address = virt_to_bus(page_address(s->page) +s->offset); | ||
316 | s->dma_length = s->length; | ||
317 | } | ||
318 | return nelems; | ||
319 | } | ||
320 | |||
321 | int calgary_map_sg(struct device *dev, struct scatterlist *sg, | ||
322 | int nelems, int direction) | ||
323 | { | ||
324 | struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata; | ||
325 | unsigned long flags; | ||
326 | unsigned long vaddr; | ||
327 | unsigned int npages; | ||
328 | unsigned long entry; | ||
329 | int i; | ||
330 | |||
331 | if (!translate_phb(to_pci_dev(dev))) | ||
332 | return calgary_nontranslate_map_sg(dev, sg, nelems, direction); | ||
333 | |||
334 | spin_lock_irqsave(&tbl->it_lock, flags); | ||
335 | |||
336 | for (i = 0; i < nelems; i++ ) { | ||
337 | struct scatterlist *s = &sg[i]; | ||
338 | BUG_ON(!s->page); | ||
339 | |||
340 | vaddr = (unsigned long)page_address(s->page) + s->offset; | ||
341 | npages = num_dma_pages(vaddr, s->length); | ||
342 | |||
343 | entry = iommu_range_alloc(tbl, npages); | ||
344 | if (entry == bad_dma_address) { | ||
345 | /* makes sure unmap knows to stop */ | ||
346 | s->dma_length = 0; | ||
347 | goto error; | ||
348 | } | ||
349 | |||
350 | s->dma_address = (entry << PAGE_SHIFT) | s->offset; | ||
351 | |||
352 | /* insert into HW table */ | ||
353 | tce_build(tbl, entry, npages, vaddr & PAGE_MASK, | ||
354 | direction); | ||
355 | |||
356 | s->dma_length = s->length; | ||
357 | } | ||
358 | |||
359 | spin_unlock_irqrestore(&tbl->it_lock, flags); | ||
360 | |||
361 | return nelems; | ||
362 | error: | ||
363 | __calgary_unmap_sg(tbl, sg, nelems, direction); | ||
364 | for (i = 0; i < nelems; i++) { | ||
365 | sg[i].dma_address = bad_dma_address; | ||
366 | sg[i].dma_length = 0; | ||
367 | } | ||
368 | spin_unlock_irqrestore(&tbl->it_lock, flags); | ||
369 | return 0; | ||
370 | } | ||
371 | |||
372 | dma_addr_t calgary_map_single(struct device *dev, void *vaddr, | ||
373 | size_t size, int direction) | ||
374 | { | ||
375 | dma_addr_t dma_handle = bad_dma_address; | ||
376 | unsigned long uaddr; | ||
377 | unsigned int npages; | ||
378 | struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata; | ||
379 | |||
380 | uaddr = (unsigned long)vaddr; | ||
381 | npages = num_dma_pages(uaddr, size); | ||
382 | |||
383 | if (translate_phb(to_pci_dev(dev))) | ||
384 | dma_handle = iommu_alloc(tbl, vaddr, npages, direction); | ||
385 | else | ||
386 | dma_handle = virt_to_bus(vaddr); | ||
387 | |||
388 | return dma_handle; | ||
389 | } | ||
390 | |||
391 | void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, | ||
392 | size_t size, int direction) | ||
393 | { | ||
394 | struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata; | ||
395 | unsigned int npages; | ||
396 | |||
397 | if (!translate_phb(to_pci_dev(dev))) | ||
398 | return; | ||
399 | |||
400 | npages = num_dma_pages(dma_handle, size); | ||
401 | iommu_free(tbl, dma_handle, npages); | ||
402 | } | ||
403 | |||
404 | void* calgary_alloc_coherent(struct device *dev, size_t size, | ||
405 | dma_addr_t *dma_handle, gfp_t flag) | ||
406 | { | ||
407 | void *ret = NULL; | ||
408 | dma_addr_t mapping; | ||
409 | unsigned int npages, order; | ||
410 | struct iommu_table *tbl; | ||
411 | |||
412 | tbl = to_pci_dev(dev)->bus->self->sysdata; | ||
413 | |||
414 | size = PAGE_ALIGN(size); /* size rounded up to full pages */ | ||
415 | npages = size >> PAGE_SHIFT; | ||
416 | order = get_order(size); | ||
417 | |||
418 | /* alloc enough pages (and possibly more) */ | ||
419 | ret = (void *)__get_free_pages(flag, order); | ||
420 | if (!ret) | ||
421 | goto error; | ||
422 | memset(ret, 0, size); | ||
423 | |||
424 | if (translate_phb(to_pci_dev(dev))) { | ||
425 | /* set up tces to cover the allocated range */ | ||
426 | mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL); | ||
427 | if (mapping == bad_dma_address) | ||
428 | goto free; | ||
429 | |||
430 | *dma_handle = mapping; | ||
431 | } else /* non translated slot */ | ||
432 | *dma_handle = virt_to_bus(ret); | ||
433 | |||
434 | return ret; | ||
435 | |||
436 | free: | ||
437 | free_pages((unsigned long)ret, get_order(size)); | ||
438 | ret = NULL; | ||
439 | error: | ||
440 | return ret; | ||
441 | } | ||
442 | |||
443 | static struct dma_mapping_ops calgary_dma_ops = { | ||
444 | .alloc_coherent = calgary_alloc_coherent, | ||
445 | .map_single = calgary_map_single, | ||
446 | .unmap_single = calgary_unmap_single, | ||
447 | .map_sg = calgary_map_sg, | ||
448 | .unmap_sg = calgary_unmap_sg, | ||
449 | }; | ||
450 | |||
451 | static inline int busno_to_phbid(unsigned char num) | ||
452 | { | ||
453 | return bus_to_phb(num) % PHBS_PER_CALGARY; | ||
454 | } | ||
455 | |||
456 | static inline unsigned long split_queue_offset(unsigned char num) | ||
457 | { | ||
458 | size_t idx = busno_to_phbid(num); | ||
459 | |||
460 | return split_queue_offsets[idx]; | ||
461 | } | ||
462 | |||
463 | static inline unsigned long tar_offset(unsigned char num) | ||
464 | { | ||
465 | size_t idx = busno_to_phbid(num); | ||
466 | |||
467 | return tar_offsets[idx]; | ||
468 | } | ||
469 | |||
470 | static inline unsigned long phb_offset(unsigned char num) | ||
471 | { | ||
472 | size_t idx = busno_to_phbid(num); | ||
473 | |||
474 | return phb_offsets[idx]; | ||
475 | } | ||
476 | |||
477 | static inline void __iomem* calgary_reg(void __iomem *bar, unsigned long offset) | ||
478 | { | ||
479 | unsigned long target = ((unsigned long)bar) | offset; | ||
480 | return (void __iomem*)target; | ||
481 | } | ||
482 | |||
483 | static void tce_cache_blast(struct iommu_table *tbl) | ||
484 | { | ||
485 | u64 val; | ||
486 | u32 aer; | ||
487 | int i = 0; | ||
488 | void __iomem *bbar = tbl->bbar; | ||
489 | void __iomem *target; | ||
490 | |||
491 | /* disable arbitration on the bus */ | ||
492 | target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET); | ||
493 | aer = readl(target); | ||
494 | writel(0, target); | ||
495 | |||
496 | /* read plssr to ensure it got there */ | ||
497 | target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_PLSSR_OFFSET); | ||
498 | val = readl(target); | ||
499 | |||
500 | /* poll split queues until all DMA activity is done */ | ||
501 | target = calgary_reg(bbar, split_queue_offset(tbl->it_busno)); | ||
502 | do { | ||
503 | val = readq(target); | ||
504 | i++; | ||
505 | } while ((val & 0xff) != 0xff && i < 100); | ||
506 | if (i == 100) | ||
507 | printk(KERN_WARNING "Calgary: PCI bus not quiesced, " | ||
508 | "continuing anyway\n"); | ||
509 | |||
510 | /* invalidate TCE cache */ | ||
511 | target = calgary_reg(bbar, tar_offset(tbl->it_busno)); | ||
512 | writeq(tbl->tar_val, target); | ||
513 | |||
514 | /* enable arbitration */ | ||
515 | target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET); | ||
516 | writel(aer, target); | ||
517 | (void)readl(target); /* flush */ | ||
518 | } | ||
519 | |||
520 | static void __init calgary_reserve_mem_region(struct pci_dev *dev, u64 start, | ||
521 | u64 limit) | ||
522 | { | ||
523 | unsigned int numpages; | ||
524 | |||
525 | limit = limit | 0xfffff; | ||
526 | limit++; | ||
527 | |||
528 | numpages = ((limit - start) >> PAGE_SHIFT); | ||
529 | iommu_range_reserve(dev->sysdata, start, numpages); | ||
530 | } | ||
531 | |||
532 | static void __init calgary_reserve_peripheral_mem_1(struct pci_dev *dev) | ||
533 | { | ||
534 | void __iomem *target; | ||
535 | u64 low, high, sizelow; | ||
536 | u64 start, limit; | ||
537 | struct iommu_table *tbl = dev->sysdata; | ||
538 | unsigned char busnum = dev->bus->number; | ||
539 | void __iomem *bbar = tbl->bbar; | ||
540 | |||
541 | /* peripheral MEM_1 region */ | ||
542 | target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_LOW); | ||
543 | low = be32_to_cpu(readl(target)); | ||
544 | target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_HIGH); | ||
545 | high = be32_to_cpu(readl(target)); | ||
546 | target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_SIZE); | ||
547 | sizelow = be32_to_cpu(readl(target)); | ||
548 | |||
549 | start = (high << 32) | low; | ||
550 | limit = sizelow; | ||
551 | |||
552 | calgary_reserve_mem_region(dev, start, limit); | ||
553 | } | ||
554 | |||
555 | static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev) | ||
556 | { | ||
557 | void __iomem *target; | ||
558 | u32 val32; | ||
559 | u64 low, high, sizelow, sizehigh; | ||
560 | u64 start, limit; | ||
561 | struct iommu_table *tbl = dev->sysdata; | ||
562 | unsigned char busnum = dev->bus->number; | ||
563 | void __iomem *bbar = tbl->bbar; | ||
564 | |||
565 | /* is it enabled? */ | ||
566 | target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET); | ||
567 | val32 = be32_to_cpu(readl(target)); | ||
568 | if (!(val32 & PHB_MEM2_ENABLE)) | ||
569 | return; | ||
570 | |||
571 | target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_LOW); | ||
572 | low = be32_to_cpu(readl(target)); | ||
573 | target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_HIGH); | ||
574 | high = be32_to_cpu(readl(target)); | ||
575 | target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_LOW); | ||
576 | sizelow = be32_to_cpu(readl(target)); | ||
577 | target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_HIGH); | ||
578 | sizehigh = be32_to_cpu(readl(target)); | ||
579 | |||
580 | start = (high << 32) | low; | ||
581 | limit = (sizehigh << 32) | sizelow; | ||
582 | |||
583 | calgary_reserve_mem_region(dev, start, limit); | ||
584 | } | ||
585 | |||
586 | /* | ||
587 | * some regions of the IO address space do not get translated, so we | ||
588 | * must not give devices IO addresses in those regions. The regions | ||
589 | * are the 640KB-1MB region and the two PCI peripheral memory holes. | ||
590 | * Reserve all of them in the IOMMU bitmap to avoid giving them out | ||
591 | * later. | ||
592 | */ | ||
593 | static void __init calgary_reserve_regions(struct pci_dev *dev) | ||
594 | { | ||
595 | unsigned int npages; | ||
596 | void __iomem *bbar; | ||
597 | unsigned char busnum; | ||
598 | u64 start; | ||
599 | struct iommu_table *tbl = dev->sysdata; | ||
600 | |||
601 | bbar = tbl->bbar; | ||
602 | busnum = dev->bus->number; | ||
603 | |||
604 | /* reserve bad_dma_address in case it's a legal address */ | ||
605 | iommu_range_reserve(tbl, bad_dma_address, 1); | ||
606 | |||
607 | /* avoid the BIOS/VGA first 640KB-1MB region */ | ||
608 | start = (640 * 1024); | ||
609 | npages = ((1024 - 640) * 1024) >> PAGE_SHIFT; | ||
610 | iommu_range_reserve(tbl, start, npages); | ||
611 | |||
612 | /* reserve the two PCI peripheral memory regions in IO space */ | ||
613 | calgary_reserve_peripheral_mem_1(dev); | ||
614 | calgary_reserve_peripheral_mem_2(dev); | ||
615 | } | ||
616 | |||
617 | static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar) | ||
618 | { | ||
619 | u64 val64; | ||
620 | u64 table_phys; | ||
621 | void __iomem *target; | ||
622 | int ret; | ||
623 | struct iommu_table *tbl; | ||
624 | |||
625 | /* build TCE tables for each PHB */ | ||
626 | ret = build_tce_table(dev, bbar); | ||
627 | if (ret) | ||
628 | return ret; | ||
629 | |||
630 | calgary_reserve_regions(dev); | ||
631 | |||
632 | /* set TARs for each PHB */ | ||
633 | target = calgary_reg(bbar, tar_offset(dev->bus->number)); | ||
634 | val64 = be64_to_cpu(readq(target)); | ||
635 | |||
636 | /* zero out all TAR bits under sw control */ | ||
637 | val64 &= ~TAR_SW_BITS; | ||
638 | |||
639 | tbl = dev->sysdata; | ||
640 | table_phys = (u64)__pa(tbl->it_base); | ||
641 | val64 |= table_phys; | ||
642 | |||
643 | BUG_ON(specified_table_size > TCE_TABLE_SIZE_8M); | ||
644 | val64 |= (u64) specified_table_size; | ||
645 | |||
646 | tbl->tar_val = cpu_to_be64(val64); | ||
647 | writeq(tbl->tar_val, target); | ||
648 | readq(target); /* flush */ | ||
649 | |||
650 | return 0; | ||
651 | } | ||
652 | |||
653 | static void __init calgary_free_tar(struct pci_dev *dev) | ||
654 | { | ||
655 | u64 val64; | ||
656 | struct iommu_table *tbl = dev->sysdata; | ||
657 | void __iomem *target; | ||
658 | |||
659 | target = calgary_reg(tbl->bbar, tar_offset(dev->bus->number)); | ||
660 | val64 = be64_to_cpu(readq(target)); | ||
661 | val64 &= ~TAR_SW_BITS; | ||
662 | writeq(cpu_to_be64(val64), target); | ||
663 | readq(target); /* flush */ | ||
664 | |||
665 | kfree(tbl); | ||
666 | dev->sysdata = NULL; | ||
667 | } | ||
668 | |||
669 | static void calgary_watchdog(unsigned long data) | ||
670 | { | ||
671 | struct pci_dev *dev = (struct pci_dev *)data; | ||
672 | struct iommu_table *tbl = dev->sysdata; | ||
673 | void __iomem *bbar = tbl->bbar; | ||
674 | u32 val32; | ||
675 | void __iomem *target; | ||
676 | |||
677 | target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_CSR_OFFSET); | ||
678 | val32 = be32_to_cpu(readl(target)); | ||
679 | |||
680 | /* If no error, the agent ID in the CSR is not valid */ | ||
681 | if (val32 & CSR_AGENT_MASK) { | ||
682 | printk(KERN_EMERG "calgary_watchdog: DMA error on bus %d, " | ||
683 | "CSR = %#x\n", dev->bus->number, val32); | ||
684 | writel(0, target); | ||
685 | |||
686 | /* Disable bus that caused the error */ | ||
687 | target = calgary_reg(bbar, phb_offset(tbl->it_busno) | | ||
688 | PHB_CONFIG_RW_OFFSET); | ||
689 | val32 = be32_to_cpu(readl(target)); | ||
690 | val32 |= PHB_SLOT_DISABLE; | ||
691 | writel(cpu_to_be32(val32), target); | ||
692 | readl(target); /* flush */ | ||
693 | } else { | ||
694 | /* Reset the timer */ | ||
695 | mod_timer(&tbl->watchdog_timer, jiffies + 2 * HZ); | ||
696 | } | ||
697 | } | ||
698 | |||
699 | static void __init calgary_enable_translation(struct pci_dev *dev) | ||
700 | { | ||
701 | u32 val32; | ||
702 | unsigned char busnum; | ||
703 | void __iomem *target; | ||
704 | void __iomem *bbar; | ||
705 | struct iommu_table *tbl; | ||
706 | |||
707 | busnum = dev->bus->number; | ||
708 | tbl = dev->sysdata; | ||
709 | bbar = tbl->bbar; | ||
710 | |||
711 | /* enable TCE in PHB Config Register */ | ||
712 | target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET); | ||
713 | val32 = be32_to_cpu(readl(target)); | ||
714 | val32 |= PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE; | ||
715 | |||
716 | printk(KERN_INFO "Calgary: enabling translation on PHB %d\n", busnum); | ||
717 | printk(KERN_INFO "Calgary: errant DMAs will now be prevented on this " | ||
718 | "bus.\n"); | ||
719 | |||
720 | writel(cpu_to_be32(val32), target); | ||
721 | readl(target); /* flush */ | ||
722 | |||
723 | init_timer(&tbl->watchdog_timer); | ||
724 | tbl->watchdog_timer.function = &calgary_watchdog; | ||
725 | tbl->watchdog_timer.data = (unsigned long)dev; | ||
726 | mod_timer(&tbl->watchdog_timer, jiffies); | ||
727 | } | ||
728 | |||
729 | static void __init calgary_disable_translation(struct pci_dev *dev) | ||
730 | { | ||
731 | u32 val32; | ||
732 | unsigned char busnum; | ||
733 | void __iomem *target; | ||
734 | void __iomem *bbar; | ||
735 | struct iommu_table *tbl; | ||
736 | |||
737 | busnum = dev->bus->number; | ||
738 | tbl = dev->sysdata; | ||
739 | bbar = tbl->bbar; | ||
740 | |||
741 | /* disable TCE in PHB Config Register */ | ||
742 | target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET); | ||
743 | val32 = be32_to_cpu(readl(target)); | ||
744 | val32 &= ~(PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE); | ||
745 | |||
746 | printk(KERN_INFO "Calgary: disabling translation on PHB %d!\n", busnum); | ||
747 | writel(cpu_to_be32(val32), target); | ||
748 | readl(target); /* flush */ | ||
749 | |||
750 | del_timer_sync(&tbl->watchdog_timer); | ||
751 | } | ||
752 | |||
753 | static inline unsigned int __init locate_register_space(struct pci_dev *dev) | ||
754 | { | ||
755 | int rionodeid; | ||
756 | u32 address; | ||
757 | |||
758 | rionodeid = (dev->bus->number % 15 > 4) ? 3 : 2; | ||
759 | /* | ||
760 | * register space address calculation as follows: | ||
761 | * FE0MB-8MB*OneBasedChassisNumber+1MB*(RioNodeId-ChassisBase) | ||
762 | * ChassisBase is always zero for x366/x260/x460 | ||
763 | * RioNodeId is 2 for first Calgary, 3 for second Calgary | ||
764 | */ | ||
765 | address = START_ADDRESS - | ||
766 | (0x800000 * (ONE_BASED_CHASSIS_NUM + dev->bus->number / 15)) + | ||
767 | (0x100000) * (rionodeid - CHASSIS_BASE); | ||
768 | return address; | ||
769 | } | ||
770 | |||
771 | static int __init calgary_init_one_nontraslated(struct pci_dev *dev) | ||
772 | { | ||
773 | dev->sysdata = NULL; | ||
774 | dev->bus->self = dev; | ||
775 | |||
776 | return 0; | ||
777 | } | ||
778 | |||
779 | static int __init calgary_init_one(struct pci_dev *dev) | ||
780 | { | ||
781 | u32 address; | ||
782 | void __iomem *bbar; | ||
783 | int ret; | ||
784 | |||
785 | address = locate_register_space(dev); | ||
786 | /* map entire 1MB of Calgary config space */ | ||
787 | bbar = ioremap_nocache(address, 1024 * 1024); | ||
788 | if (!bbar) { | ||
789 | ret = -ENODATA; | ||
790 | goto done; | ||
791 | } | ||
792 | |||
793 | ret = calgary_setup_tar(dev, bbar); | ||
794 | if (ret) | ||
795 | goto iounmap; | ||
796 | |||
797 | dev->bus->self = dev; | ||
798 | calgary_enable_translation(dev); | ||
799 | |||
800 | return 0; | ||
801 | |||
802 | iounmap: | ||
803 | iounmap(bbar); | ||
804 | done: | ||
805 | return ret; | ||
806 | } | ||
807 | |||
808 | static int __init calgary_init(void) | ||
809 | { | ||
810 | int i, ret = -ENODEV; | ||
811 | struct pci_dev *dev = NULL; | ||
812 | |||
813 | for (i = 0; i <= num_online_nodes() * MAX_NUM_OF_PHBS; i++) { | ||
814 | dev = pci_get_device(PCI_VENDOR_ID_IBM, | ||
815 | PCI_DEVICE_ID_IBM_CALGARY, | ||
816 | dev); | ||
817 | if (!dev) | ||
818 | break; | ||
819 | if (!translate_phb(dev)) { | ||
820 | calgary_init_one_nontraslated(dev); | ||
821 | continue; | ||
822 | } | ||
823 | if (!tce_table_kva[i] && !translate_empty_slots) { | ||
824 | pci_dev_put(dev); | ||
825 | continue; | ||
826 | } | ||
827 | ret = calgary_init_one(dev); | ||
828 | if (ret) | ||
829 | goto error; | ||
830 | } | ||
831 | |||
832 | return ret; | ||
833 | |||
834 | error: | ||
835 | for (i--; i >= 0; i--) { | ||
836 | dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM, | ||
837 | PCI_DEVICE_ID_IBM_CALGARY, | ||
838 | dev); | ||
839 | if (!translate_phb(dev)) { | ||
840 | pci_dev_put(dev); | ||
841 | continue; | ||
842 | } | ||
843 | if (!tce_table_kva[i] && !translate_empty_slots) | ||
844 | continue; | ||
845 | calgary_disable_translation(dev); | ||
846 | calgary_free_tar(dev); | ||
847 | pci_dev_put(dev); | ||
848 | } | ||
849 | |||
850 | return ret; | ||
851 | } | ||
852 | |||
853 | static inline int __init determine_tce_table_size(u64 ram) | ||
854 | { | ||
855 | int ret; | ||
856 | |||
857 | if (specified_table_size != TCE_TABLE_SIZE_UNSPECIFIED) | ||
858 | return specified_table_size; | ||
859 | |||
860 | /* | ||
861 | * Table sizes are from 0 to 7 (TCE_TABLE_SIZE_64K to | ||
862 | * TCE_TABLE_SIZE_8M). Table size 0 has 8K entries and each | ||
863 | * larger table size has twice as many entries, so shift the | ||
864 | * max ram address by 13 to divide by 8K and then look at the | ||
865 | * order of the result to choose between 0-7. | ||
866 | */ | ||
867 | ret = get_order(ram >> 13); | ||
868 | if (ret > TCE_TABLE_SIZE_8M) | ||
869 | ret = TCE_TABLE_SIZE_8M; | ||
870 | |||
871 | return ret; | ||
872 | } | ||
873 | |||
874 | void __init detect_calgary(void) | ||
875 | { | ||
876 | u32 val; | ||
877 | int bus, table_idx; | ||
878 | void *tbl; | ||
879 | int detected = 0; | ||
880 | |||
881 | /* | ||
882 | * if the user specified iommu=off or iommu=soft or we found | ||
883 | * another HW IOMMU already, bail out. | ||
884 | */ | ||
885 | if (swiotlb || no_iommu || iommu_detected) | ||
886 | return; | ||
887 | |||
888 | specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE); | ||
889 | |||
890 | for (bus = 0, table_idx = 0; | ||
891 | bus <= num_online_nodes() * MAX_PHB_BUS_NUM; | ||
892 | bus++) { | ||
893 | BUG_ON(bus > MAX_NUMNODES * MAX_PHB_BUS_NUM); | ||
894 | if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY) | ||
895 | continue; | ||
896 | if (test_bit(bus, translation_disabled)) { | ||
897 | printk(KERN_INFO "Calgary: translation is disabled for " | ||
898 | "PHB 0x%x\n", bus); | ||
899 | /* skip this phb, don't allocate a tbl for it */ | ||
900 | tce_table_kva[table_idx] = NULL; | ||
901 | table_idx++; | ||
902 | continue; | ||
903 | } | ||
904 | /* | ||
905 | * scan the first slot of the PCI bus to see if there | ||
906 | * are any devices present | ||
907 | */ | ||
908 | val = read_pci_config(bus, 1, 0, 0); | ||
909 | if (val != 0xffffffff || translate_empty_slots) { | ||
910 | tbl = alloc_tce_table(); | ||
911 | if (!tbl) | ||
912 | goto cleanup; | ||
913 | detected = 1; | ||
914 | } else | ||
915 | tbl = NULL; | ||
916 | |||
917 | tce_table_kva[table_idx] = tbl; | ||
918 | table_idx++; | ||
919 | } | ||
920 | |||
921 | if (detected) { | ||
922 | iommu_detected = 1; | ||
923 | calgary_detected = 1; | ||
924 | printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected. " | ||
925 | "TCE table spec is %d.\n", specified_table_size); | ||
926 | } | ||
927 | return; | ||
928 | |||
929 | cleanup: | ||
930 | for (--table_idx; table_idx >= 0; --table_idx) | ||
931 | if (tce_table_kva[table_idx]) | ||
932 | free_tce_table(tce_table_kva[table_idx]); | ||
933 | } | ||
934 | |||
935 | int __init calgary_iommu_init(void) | ||
936 | { | ||
937 | int ret; | ||
938 | |||
939 | if (no_iommu || swiotlb) | ||
940 | return -ENODEV; | ||
941 | |||
942 | if (!calgary_detected) | ||
943 | return -ENODEV; | ||
944 | |||
945 | /* ok, we're trying to use Calgary - let's roll */ | ||
946 | printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); | ||
947 | |||
948 | ret = calgary_init(); | ||
949 | if (ret) { | ||
950 | printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " | ||
951 | "falling back to no_iommu\n", ret); | ||
952 | if (end_pfn > MAX_DMA32_PFN) | ||
953 | printk(KERN_ERR "WARNING more than 4GB of memory, " | ||
954 | "32bit PCI may malfunction.\n"); | ||
955 | return ret; | ||
956 | } | ||
957 | |||
958 | force_iommu = 1; | ||
959 | dma_ops = &calgary_dma_ops; | ||
960 | |||
961 | return 0; | ||
962 | } | ||
963 | |||
964 | static int __init calgary_parse_options(char *p) | ||
965 | { | ||
966 | unsigned int bridge; | ||
967 | size_t len; | ||
968 | char* endp; | ||
969 | |||
970 | while (*p) { | ||
971 | if (!strncmp(p, "64k", 3)) | ||
972 | specified_table_size = TCE_TABLE_SIZE_64K; | ||
973 | else if (!strncmp(p, "128k", 4)) | ||
974 | specified_table_size = TCE_TABLE_SIZE_128K; | ||
975 | else if (!strncmp(p, "256k", 4)) | ||
976 | specified_table_size = TCE_TABLE_SIZE_256K; | ||
977 | else if (!strncmp(p, "512k", 4)) | ||
978 | specified_table_size = TCE_TABLE_SIZE_512K; | ||
979 | else if (!strncmp(p, "1M", 2)) | ||
980 | specified_table_size = TCE_TABLE_SIZE_1M; | ||
981 | else if (!strncmp(p, "2M", 2)) | ||
982 | specified_table_size = TCE_TABLE_SIZE_2M; | ||
983 | else if (!strncmp(p, "4M", 2)) | ||
984 | specified_table_size = TCE_TABLE_SIZE_4M; | ||
985 | else if (!strncmp(p, "8M", 2)) | ||
986 | specified_table_size = TCE_TABLE_SIZE_8M; | ||
987 | |||
988 | len = strlen("translate_empty_slots"); | ||
989 | if (!strncmp(p, "translate_empty_slots", len)) | ||
990 | translate_empty_slots = 1; | ||
991 | |||
992 | len = strlen("disable"); | ||
993 | if (!strncmp(p, "disable", len)) { | ||
994 | p += len; | ||
995 | if (*p == '=') | ||
996 | ++p; | ||
997 | if (*p == '\0') | ||
998 | break; | ||
999 | bridge = simple_strtol(p, &endp, 0); | ||
1000 | if (p == endp) | ||
1001 | break; | ||
1002 | |||
1003 | if (bridge <= (num_online_nodes() * MAX_PHB_BUS_NUM)) { | ||
1004 | printk(KERN_INFO "Calgary: disabling " | ||
1005 | "translation for PHB 0x%x\n", bridge); | ||
1006 | set_bit(bridge, translation_disabled); | ||
1007 | } | ||
1008 | } | ||
1009 | |||
1010 | p = strpbrk(p, ","); | ||
1011 | if (!p) | ||
1012 | break; | ||
1013 | |||
1014 | p++; /* skip ',' */ | ||
1015 | } | ||
1016 | return 1; | ||
1017 | } | ||
1018 | __setup("calgary=", calgary_parse_options); | ||
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index a9275c9557cf..9c44f4f2433d 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <asm/io.h> | 10 | #include <asm/io.h> |
11 | #include <asm/proto.h> | 11 | #include <asm/proto.h> |
12 | #include <asm/calgary.h> | ||
12 | 13 | ||
13 | int iommu_merge __read_mostly = 0; | 14 | int iommu_merge __read_mostly = 0; |
14 | EXPORT_SYMBOL(iommu_merge); | 15 | EXPORT_SYMBOL(iommu_merge); |
@@ -33,12 +34,15 @@ int panic_on_overflow __read_mostly = 0; | |||
33 | int force_iommu __read_mostly= 0; | 34 | int force_iommu __read_mostly= 0; |
34 | #endif | 35 | #endif |
35 | 36 | ||
37 | /* Set this to 1 if there is a HW IOMMU in the system */ | ||
38 | int iommu_detected __read_mostly = 0; | ||
39 | |||
36 | /* Dummy device used for NULL arguments (normally ISA). Better would | 40 | /* Dummy device used for NULL arguments (normally ISA). Better would |
37 | be probably a smaller DMA mask, but this is bug-to-bug compatible | 41 | be probably a smaller DMA mask, but this is bug-to-bug compatible |
38 | to i386. */ | 42 | to i386. */ |
39 | struct device fallback_dev = { | 43 | struct device fallback_dev = { |
40 | .bus_id = "fallback device", | 44 | .bus_id = "fallback device", |
41 | .coherent_dma_mask = 0xffffffff, | 45 | .coherent_dma_mask = DMA_32BIT_MASK, |
42 | .dma_mask = &fallback_dev.coherent_dma_mask, | 46 | .dma_mask = &fallback_dev.coherent_dma_mask, |
43 | }; | 47 | }; |
44 | 48 | ||
@@ -77,7 +81,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
77 | dev = &fallback_dev; | 81 | dev = &fallback_dev; |
78 | dma_mask = dev->coherent_dma_mask; | 82 | dma_mask = dev->coherent_dma_mask; |
79 | if (dma_mask == 0) | 83 | if (dma_mask == 0) |
80 | dma_mask = 0xffffffff; | 84 | dma_mask = DMA_32BIT_MASK; |
81 | 85 | ||
82 | /* Don't invoke OOM killer */ | 86 | /* Don't invoke OOM killer */ |
83 | gfp |= __GFP_NORETRY; | 87 | gfp |= __GFP_NORETRY; |
@@ -90,7 +94,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
90 | larger than 16MB and in this case we have a chance of | 94 | larger than 16MB and in this case we have a chance of |
91 | finding fitting memory in the next higher zone first. If | 95 | finding fitting memory in the next higher zone first. If |
92 | not retry with true GFP_DMA. -AK */ | 96 | not retry with true GFP_DMA. -AK */ |
93 | if (dma_mask <= 0xffffffff) | 97 | if (dma_mask <= DMA_32BIT_MASK) |
94 | gfp |= GFP_DMA32; | 98 | gfp |= GFP_DMA32; |
95 | 99 | ||
96 | again: | 100 | again: |
@@ -111,7 +115,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
111 | 115 | ||
112 | /* Don't use the 16MB ZONE_DMA unless absolutely | 116 | /* Don't use the 16MB ZONE_DMA unless absolutely |
113 | needed. It's better to use remapping first. */ | 117 | needed. It's better to use remapping first. */ |
114 | if (dma_mask < 0xffffffff && !(gfp & GFP_DMA)) { | 118 | if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) { |
115 | gfp = (gfp & ~GFP_DMA32) | GFP_DMA; | 119 | gfp = (gfp & ~GFP_DMA32) | GFP_DMA; |
116 | goto again; | 120 | goto again; |
117 | } | 121 | } |
@@ -174,7 +178,7 @@ int dma_supported(struct device *dev, u64 mask) | |||
174 | /* Copied from i386. Doesn't make much sense, because it will | 178 | /* Copied from i386. Doesn't make much sense, because it will |
175 | only work for pci_alloc_coherent. | 179 | only work for pci_alloc_coherent. |
176 | The caller just has to use GFP_DMA in this case. */ | 180 | The caller just has to use GFP_DMA in this case. */ |
177 | if (mask < 0x00ffffff) | 181 | if (mask < DMA_24BIT_MASK) |
178 | return 0; | 182 | return 0; |
179 | 183 | ||
180 | /* Tell the device to use SAC when IOMMU force is on. This | 184 | /* Tell the device to use SAC when IOMMU force is on. This |
@@ -189,7 +193,7 @@ int dma_supported(struct device *dev, u64 mask) | |||
189 | SAC for these. Assume all masks <= 40 bits are of this | 193 | SAC for these. Assume all masks <= 40 bits are of this |
190 | type. Normally this doesn't make any difference, but gives | 194 | type. Normally this doesn't make any difference, but gives |
191 | more gentle handling of IOMMU overflow. */ | 195 | more gentle handling of IOMMU overflow. */ |
192 | if (iommu_sac_force && (mask >= 0xffffffffffULL)) { | 196 | if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { |
193 | printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask); | 197 | printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask); |
194 | return 0; | 198 | return 0; |
195 | } | 199 | } |
@@ -266,7 +270,7 @@ __init int iommu_setup(char *p) | |||
266 | swiotlb = 1; | 270 | swiotlb = 1; |
267 | #endif | 271 | #endif |
268 | 272 | ||
269 | #ifdef CONFIG_GART_IOMMU | 273 | #ifdef CONFIG_IOMMU |
270 | gart_parse_options(p); | 274 | gart_parse_options(p); |
271 | #endif | 275 | #endif |
272 | 276 | ||
@@ -276,3 +280,40 @@ __init int iommu_setup(char *p) | |||
276 | } | 280 | } |
277 | return 1; | 281 | return 1; |
278 | } | 282 | } |
283 | __setup("iommu=", iommu_setup); | ||
284 | |||
285 | void __init pci_iommu_alloc(void) | ||
286 | { | ||
287 | /* | ||
288 | * The order of these functions is important for | ||
289 | * fall-back/fail-over reasons | ||
290 | */ | ||
291 | #ifdef CONFIG_IOMMU | ||
292 | iommu_hole_init(); | ||
293 | #endif | ||
294 | |||
295 | #ifdef CONFIG_CALGARY_IOMMU | ||
296 | detect_calgary(); | ||
297 | #endif | ||
298 | |||
299 | #ifdef CONFIG_SWIOTLB | ||
300 | pci_swiotlb_init(); | ||
301 | #endif | ||
302 | } | ||
303 | |||
304 | static int __init pci_iommu_init(void) | ||
305 | { | ||
306 | #ifdef CONFIG_CALGARY_IOMMU | ||
307 | calgary_iommu_init(); | ||
308 | #endif | ||
309 | |||
310 | #ifdef CONFIG_IOMMU | ||
311 | gart_iommu_init(); | ||
312 | #endif | ||
313 | |||
314 | no_iommu_init(); | ||
315 | return 0; | ||
316 | } | ||
317 | |||
318 | /* Must execute after PCI subsystem */ | ||
319 | fs_initcall(pci_iommu_init); | ||
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 82a7c9bfdfa0..4ca674d16b09 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <asm/kdebug.h> | 32 | #include <asm/kdebug.h> |
33 | #include <asm/swiotlb.h> | 33 | #include <asm/swiotlb.h> |
34 | #include <asm/dma.h> | 34 | #include <asm/dma.h> |
35 | #include <asm/k8.h> | ||
35 | 36 | ||
36 | unsigned long iommu_bus_base; /* GART remapping area (physical) */ | 37 | unsigned long iommu_bus_base; /* GART remapping area (physical) */ |
37 | static unsigned long iommu_size; /* size of remapping area bytes */ | 38 | static unsigned long iommu_size; /* size of remapping area bytes */ |
@@ -46,8 +47,6 @@ u32 *iommu_gatt_base; /* Remapping table */ | |||
46 | also seen with Qlogic at least). */ | 47 | also seen with Qlogic at least). */ |
47 | int iommu_fullflush = 1; | 48 | int iommu_fullflush = 1; |
48 | 49 | ||
49 | #define MAX_NB 8 | ||
50 | |||
51 | /* Allocation bitmap for the remapping area */ | 50 | /* Allocation bitmap for the remapping area */ |
52 | static DEFINE_SPINLOCK(iommu_bitmap_lock); | 51 | static DEFINE_SPINLOCK(iommu_bitmap_lock); |
53 | static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ | 52 | static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ |
@@ -63,13 +62,6 @@ static u32 gart_unmapped_entry; | |||
63 | #define to_pages(addr,size) \ | 62 | #define to_pages(addr,size) \ |
64 | (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) | 63 | (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) |
65 | 64 | ||
66 | #define for_all_nb(dev) \ | ||
67 | dev = NULL; \ | ||
68 | while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL) | ||
69 | |||
70 | static struct pci_dev *northbridges[MAX_NB]; | ||
71 | static u32 northbridge_flush_word[MAX_NB]; | ||
72 | |||
73 | #define EMERGENCY_PAGES 32 /* = 128KB */ | 65 | #define EMERGENCY_PAGES 32 /* = 128KB */ |
74 | 66 | ||
75 | #ifdef CONFIG_AGP | 67 | #ifdef CONFIG_AGP |
@@ -93,7 +85,7 @@ static unsigned long alloc_iommu(int size) | |||
93 | offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size); | 85 | offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size); |
94 | if (offset == -1) { | 86 | if (offset == -1) { |
95 | need_flush = 1; | 87 | need_flush = 1; |
96 | offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size); | 88 | offset = find_next_zero_string(iommu_gart_bitmap,0,iommu_pages,size); |
97 | } | 89 | } |
98 | if (offset != -1) { | 90 | if (offset != -1) { |
99 | set_bit_string(iommu_gart_bitmap, offset, size); | 91 | set_bit_string(iommu_gart_bitmap, offset, size); |
@@ -120,44 +112,17 @@ static void free_iommu(unsigned long offset, int size) | |||
120 | /* | 112 | /* |
121 | * Use global flush state to avoid races with multiple flushers. | 113 | * Use global flush state to avoid races with multiple flushers. |
122 | */ | 114 | */ |
123 | static void flush_gart(struct device *dev) | 115 | static void flush_gart(void) |
124 | { | 116 | { |
125 | unsigned long flags; | 117 | unsigned long flags; |
126 | int flushed = 0; | ||
127 | int i, max; | ||
128 | |||
129 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | 118 | spin_lock_irqsave(&iommu_bitmap_lock, flags); |
130 | if (need_flush) { | 119 | if (need_flush) { |
131 | max = 0; | 120 | k8_flush_garts(); |
132 | for (i = 0; i < MAX_NB; i++) { | ||
133 | if (!northbridges[i]) | ||
134 | continue; | ||
135 | pci_write_config_dword(northbridges[i], 0x9c, | ||
136 | northbridge_flush_word[i] | 1); | ||
137 | flushed++; | ||
138 | max = i; | ||
139 | } | ||
140 | for (i = 0; i <= max; i++) { | ||
141 | u32 w; | ||
142 | if (!northbridges[i]) | ||
143 | continue; | ||
144 | /* Make sure the hardware actually executed the flush. */ | ||
145 | for (;;) { | ||
146 | pci_read_config_dword(northbridges[i], 0x9c, &w); | ||
147 | if (!(w & 1)) | ||
148 | break; | ||
149 | cpu_relax(); | ||
150 | } | ||
151 | } | ||
152 | if (!flushed) | ||
153 | printk("nothing to flush?\n"); | ||
154 | need_flush = 0; | 121 | need_flush = 0; |
155 | } | 122 | } |
156 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | 123 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); |
157 | } | 124 | } |
158 | 125 | ||
159 | |||
160 | |||
161 | #ifdef CONFIG_IOMMU_LEAK | 126 | #ifdef CONFIG_IOMMU_LEAK |
162 | 127 | ||
163 | #define SET_LEAK(x) if (iommu_leak_tab) \ | 128 | #define SET_LEAK(x) if (iommu_leak_tab) \ |
@@ -266,7 +231,7 @@ static dma_addr_t gart_map_simple(struct device *dev, char *buf, | |||
266 | size_t size, int dir) | 231 | size_t size, int dir) |
267 | { | 232 | { |
268 | dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); | 233 | dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); |
269 | flush_gart(dev); | 234 | flush_gart(); |
270 | return map; | 235 | return map; |
271 | } | 236 | } |
272 | 237 | ||
@@ -289,6 +254,28 @@ dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir) | |||
289 | } | 254 | } |
290 | 255 | ||
291 | /* | 256 | /* |
257 | * Free a DMA mapping. | ||
258 | */ | ||
259 | void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, | ||
260 | size_t size, int direction) | ||
261 | { | ||
262 | unsigned long iommu_page; | ||
263 | int npages; | ||
264 | int i; | ||
265 | |||
266 | if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || | ||
267 | dma_addr >= iommu_bus_base + iommu_size) | ||
268 | return; | ||
269 | iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; | ||
270 | npages = to_pages(dma_addr, size); | ||
271 | for (i = 0; i < npages; i++) { | ||
272 | iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; | ||
273 | CLEAR_LEAK(iommu_page + i); | ||
274 | } | ||
275 | free_iommu(iommu_page, npages); | ||
276 | } | ||
277 | |||
278 | /* | ||
292 | * Wrapper for pci_unmap_single working with scatterlists. | 279 | * Wrapper for pci_unmap_single working with scatterlists. |
293 | */ | 280 | */ |
294 | void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | 281 | void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) |
@@ -299,7 +286,7 @@ void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int di | |||
299 | struct scatterlist *s = &sg[i]; | 286 | struct scatterlist *s = &sg[i]; |
300 | if (!s->dma_length || !s->length) | 287 | if (!s->dma_length || !s->length) |
301 | break; | 288 | break; |
302 | dma_unmap_single(dev, s->dma_address, s->dma_length, dir); | 289 | gart_unmap_single(dev, s->dma_address, s->dma_length, dir); |
303 | } | 290 | } |
304 | } | 291 | } |
305 | 292 | ||
@@ -329,7 +316,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | |||
329 | s->dma_address = addr; | 316 | s->dma_address = addr; |
330 | s->dma_length = s->length; | 317 | s->dma_length = s->length; |
331 | } | 318 | } |
332 | flush_gart(dev); | 319 | flush_gart(); |
333 | return nents; | 320 | return nents; |
334 | } | 321 | } |
335 | 322 | ||
@@ -436,13 +423,13 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | |||
436 | if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0) | 423 | if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0) |
437 | goto error; | 424 | goto error; |
438 | out++; | 425 | out++; |
439 | flush_gart(dev); | 426 | flush_gart(); |
440 | if (out < nents) | 427 | if (out < nents) |
441 | sg[out].dma_length = 0; | 428 | sg[out].dma_length = 0; |
442 | return out; | 429 | return out; |
443 | 430 | ||
444 | error: | 431 | error: |
445 | flush_gart(NULL); | 432 | flush_gart(); |
446 | gart_unmap_sg(dev, sg, nents, dir); | 433 | gart_unmap_sg(dev, sg, nents, dir); |
447 | /* When it was forced or merged try again in a dumb way */ | 434 | /* When it was forced or merged try again in a dumb way */ |
448 | if (force_iommu || iommu_merge) { | 435 | if (force_iommu || iommu_merge) { |
@@ -458,28 +445,6 @@ error: | |||
458 | return 0; | 445 | return 0; |
459 | } | 446 | } |
460 | 447 | ||
461 | /* | ||
462 | * Free a DMA mapping. | ||
463 | */ | ||
464 | void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, | ||
465 | size_t size, int direction) | ||
466 | { | ||
467 | unsigned long iommu_page; | ||
468 | int npages; | ||
469 | int i; | ||
470 | |||
471 | if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || | ||
472 | dma_addr >= iommu_bus_base + iommu_size) | ||
473 | return; | ||
474 | iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; | ||
475 | npages = to_pages(dma_addr, size); | ||
476 | for (i = 0; i < npages; i++) { | ||
477 | iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; | ||
478 | CLEAR_LEAK(iommu_page + i); | ||
479 | } | ||
480 | free_iommu(iommu_page, npages); | ||
481 | } | ||
482 | |||
483 | static int no_agp; | 448 | static int no_agp; |
484 | 449 | ||
485 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) | 450 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) |
@@ -532,10 +497,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
532 | void *gatt; | 497 | void *gatt; |
533 | unsigned aper_base, new_aper_base; | 498 | unsigned aper_base, new_aper_base; |
534 | unsigned aper_size, gatt_size, new_aper_size; | 499 | unsigned aper_size, gatt_size, new_aper_size; |
535 | 500 | int i; | |
501 | |||
536 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); | 502 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); |
537 | aper_size = aper_base = info->aper_size = 0; | 503 | aper_size = aper_base = info->aper_size = 0; |
538 | for_all_nb(dev) { | 504 | dev = NULL; |
505 | for (i = 0; i < num_k8_northbridges; i++) { | ||
506 | dev = k8_northbridges[i]; | ||
539 | new_aper_base = read_aperture(dev, &new_aper_size); | 507 | new_aper_base = read_aperture(dev, &new_aper_size); |
540 | if (!new_aper_base) | 508 | if (!new_aper_base) |
541 | goto nommu; | 509 | goto nommu; |
@@ -558,11 +526,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
558 | panic("Cannot allocate GATT table"); | 526 | panic("Cannot allocate GATT table"); |
559 | memset(gatt, 0, gatt_size); | 527 | memset(gatt, 0, gatt_size); |
560 | agp_gatt_table = gatt; | 528 | agp_gatt_table = gatt; |
561 | 529 | ||
562 | for_all_nb(dev) { | 530 | for (i = 0; i < num_k8_northbridges; i++) { |
563 | u32 ctl; | 531 | u32 ctl; |
564 | u32 gatt_reg; | 532 | u32 gatt_reg; |
565 | 533 | ||
534 | dev = k8_northbridges[i]; | ||
566 | gatt_reg = __pa(gatt) >> 12; | 535 | gatt_reg = __pa(gatt) >> 12; |
567 | gatt_reg <<= 4; | 536 | gatt_reg <<= 4; |
568 | pci_write_config_dword(dev, 0x98, gatt_reg); | 537 | pci_write_config_dword(dev, 0x98, gatt_reg); |
@@ -573,7 +542,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
573 | 542 | ||
574 | pci_write_config_dword(dev, 0x90, ctl); | 543 | pci_write_config_dword(dev, 0x90, ctl); |
575 | } | 544 | } |
576 | flush_gart(NULL); | 545 | flush_gart(); |
577 | 546 | ||
578 | printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); | 547 | printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); |
579 | return 0; | 548 | return 0; |
@@ -602,15 +571,19 @@ static struct dma_mapping_ops gart_dma_ops = { | |||
602 | .unmap_sg = gart_unmap_sg, | 571 | .unmap_sg = gart_unmap_sg, |
603 | }; | 572 | }; |
604 | 573 | ||
605 | static int __init pci_iommu_init(void) | 574 | void __init gart_iommu_init(void) |
606 | { | 575 | { |
607 | struct agp_kern_info info; | 576 | struct agp_kern_info info; |
608 | unsigned long aper_size; | 577 | unsigned long aper_size; |
609 | unsigned long iommu_start; | 578 | unsigned long iommu_start; |
610 | struct pci_dev *dev; | ||
611 | unsigned long scratch; | 579 | unsigned long scratch; |
612 | long i; | 580 | long i; |
613 | 581 | ||
582 | if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) { | ||
583 | printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n"); | ||
584 | return; | ||
585 | } | ||
586 | |||
614 | #ifndef CONFIG_AGP_AMD64 | 587 | #ifndef CONFIG_AGP_AMD64 |
615 | no_agp = 1; | 588 | no_agp = 1; |
616 | #else | 589 | #else |
@@ -622,7 +595,11 @@ static int __init pci_iommu_init(void) | |||
622 | #endif | 595 | #endif |
623 | 596 | ||
624 | if (swiotlb) | 597 | if (swiotlb) |
625 | return -1; | 598 | return; |
599 | |||
600 | /* Did we detect a different HW IOMMU? */ | ||
601 | if (iommu_detected && !iommu_aperture) | ||
602 | return; | ||
626 | 603 | ||
627 | if (no_iommu || | 604 | if (no_iommu || |
628 | (!force_iommu && end_pfn <= MAX_DMA32_PFN) || | 605 | (!force_iommu && end_pfn <= MAX_DMA32_PFN) || |
@@ -634,15 +611,7 @@ static int __init pci_iommu_init(void) | |||
634 | "but IOMMU not available.\n" | 611 | "but IOMMU not available.\n" |
635 | KERN_ERR "WARNING 32bit PCI may malfunction.\n"); | 612 | KERN_ERR "WARNING 32bit PCI may malfunction.\n"); |
636 | } | 613 | } |
637 | return -1; | 614 | return; |
638 | } | ||
639 | |||
640 | i = 0; | ||
641 | for_all_nb(dev) | ||
642 | i++; | ||
643 | if (i > MAX_NB) { | ||
644 | printk(KERN_ERR "PCI-GART: Too many northbridges (%ld). Disabled\n", i); | ||
645 | return -1; | ||
646 | } | 615 | } |
647 | 616 | ||
648 | printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); | 617 | printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); |
@@ -707,26 +676,10 @@ static int __init pci_iommu_init(void) | |||
707 | for (i = EMERGENCY_PAGES; i < iommu_pages; i++) | 676 | for (i = EMERGENCY_PAGES; i < iommu_pages; i++) |
708 | iommu_gatt_base[i] = gart_unmapped_entry; | 677 | iommu_gatt_base[i] = gart_unmapped_entry; |
709 | 678 | ||
710 | for_all_nb(dev) { | 679 | flush_gart(); |
711 | u32 flag; | ||
712 | int cpu = PCI_SLOT(dev->devfn) - 24; | ||
713 | if (cpu >= MAX_NB) | ||
714 | continue; | ||
715 | northbridges[cpu] = dev; | ||
716 | pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */ | ||
717 | northbridge_flush_word[cpu] = flag; | ||
718 | } | ||
719 | |||
720 | flush_gart(NULL); | ||
721 | |||
722 | dma_ops = &gart_dma_ops; | 680 | dma_ops = &gart_dma_ops; |
723 | |||
724 | return 0; | ||
725 | } | 681 | } |
726 | 682 | ||
727 | /* Must execute after PCI subsystem */ | ||
728 | fs_initcall(pci_iommu_init); | ||
729 | |||
730 | void gart_parse_options(char *p) | 683 | void gart_parse_options(char *p) |
731 | { | 684 | { |
732 | int arg; | 685 | int arg; |
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c index 1f6ecc62061d..c4c3cc36ac5b 100644 --- a/arch/x86_64/kernel/pci-nommu.c +++ b/arch/x86_64/kernel/pci-nommu.c | |||
@@ -4,6 +4,8 @@ | |||
4 | #include <linux/init.h> | 4 | #include <linux/init.h> |
5 | #include <linux/pci.h> | 5 | #include <linux/pci.h> |
6 | #include <linux/string.h> | 6 | #include <linux/string.h> |
7 | #include <linux/dma-mapping.h> | ||
8 | |||
7 | #include <asm/proto.h> | 9 | #include <asm/proto.h> |
8 | #include <asm/processor.h> | 10 | #include <asm/processor.h> |
9 | #include <asm/dma.h> | 11 | #include <asm/dma.h> |
@@ -12,10 +14,11 @@ static int | |||
12 | check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) | 14 | check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) |
13 | { | 15 | { |
14 | if (hwdev && bus + size > *hwdev->dma_mask) { | 16 | if (hwdev && bus + size > *hwdev->dma_mask) { |
15 | if (*hwdev->dma_mask >= 0xffffffffULL) | 17 | if (*hwdev->dma_mask >= DMA_32BIT_MASK) |
16 | printk(KERN_ERR | 18 | printk(KERN_ERR |
17 | "nommu_%s: overflow %Lx+%lu of device mask %Lx\n", | 19 | "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", |
18 | name, (long long)bus, size, (long long)*hwdev->dma_mask); | 20 | name, (long long)bus, size, |
21 | (long long)*hwdev->dma_mask); | ||
19 | return 0; | 22 | return 0; |
20 | } | 23 | } |
21 | return 1; | 24 | return 1; |
diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c index 990ed67896f2..ebdb77fe2057 100644 --- a/arch/x86_64/kernel/pci-swiotlb.c +++ b/arch/x86_64/kernel/pci-swiotlb.c | |||
@@ -31,7 +31,7 @@ struct dma_mapping_ops swiotlb_dma_ops = { | |||
31 | void pci_swiotlb_init(void) | 31 | void pci_swiotlb_init(void) |
32 | { | 32 | { |
33 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ | 33 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ |
34 | if (!iommu_aperture && !no_iommu && | 34 | if (!iommu_detected && !no_iommu && |
35 | (end_pfn > MAX_DMA32_PFN || force_iommu)) | 35 | (end_pfn > MAX_DMA32_PFN || force_iommu)) |
36 | swiotlb = 1; | 36 | swiotlb = 1; |
37 | if (swiotlb) { | 37 | if (swiotlb) { |
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index fb903e65e079..ca56e19b8b6e 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c | |||
@@ -10,7 +10,6 @@ | |||
10 | * Andi Kleen. | 10 | * Andi Kleen. |
11 | * | 11 | * |
12 | * CPU hotplug support - ashok.raj@intel.com | 12 | * CPU hotplug support - ashok.raj@intel.com |
13 | * $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $ | ||
14 | */ | 13 | */ |
15 | 14 | ||
16 | /* | 15 | /* |
@@ -64,6 +63,7 @@ EXPORT_SYMBOL(boot_option_idle_override); | |||
64 | * Powermanagement idle function, if any.. | 63 | * Powermanagement idle function, if any.. |
65 | */ | 64 | */ |
66 | void (*pm_idle)(void); | 65 | void (*pm_idle)(void); |
66 | EXPORT_SYMBOL(pm_idle); | ||
67 | static DEFINE_PER_CPU(unsigned int, cpu_idle_state); | 67 | static DEFINE_PER_CPU(unsigned int, cpu_idle_state); |
68 | 68 | ||
69 | static ATOMIC_NOTIFIER_HEAD(idle_notifier); | 69 | static ATOMIC_NOTIFIER_HEAD(idle_notifier); |
@@ -111,7 +111,7 @@ static void default_idle(void) | |||
111 | { | 111 | { |
112 | local_irq_enable(); | 112 | local_irq_enable(); |
113 | 113 | ||
114 | clear_thread_flag(TIF_POLLING_NRFLAG); | 114 | current_thread_info()->status &= ~TS_POLLING; |
115 | smp_mb__after_clear_bit(); | 115 | smp_mb__after_clear_bit(); |
116 | while (!need_resched()) { | 116 | while (!need_resched()) { |
117 | local_irq_disable(); | 117 | local_irq_disable(); |
@@ -120,7 +120,7 @@ static void default_idle(void) | |||
120 | else | 120 | else |
121 | local_irq_enable(); | 121 | local_irq_enable(); |
122 | } | 122 | } |
123 | set_thread_flag(TIF_POLLING_NRFLAG); | 123 | current_thread_info()->status |= TS_POLLING; |
124 | } | 124 | } |
125 | 125 | ||
126 | /* | 126 | /* |
@@ -203,8 +203,7 @@ static inline void play_dead(void) | |||
203 | */ | 203 | */ |
204 | void cpu_idle (void) | 204 | void cpu_idle (void) |
205 | { | 205 | { |
206 | set_thread_flag(TIF_POLLING_NRFLAG); | 206 | current_thread_info()->status |= TS_POLLING; |
207 | |||
208 | /* endless idle loop with no priority at all */ | 207 | /* endless idle loop with no priority at all */ |
209 | while (1) { | 208 | while (1) { |
210 | while (!need_resched()) { | 209 | while (!need_resched()) { |
@@ -335,7 +334,7 @@ void show_regs(struct pt_regs *regs) | |||
335 | { | 334 | { |
336 | printk("CPU %d:", smp_processor_id()); | 335 | printk("CPU %d:", smp_processor_id()); |
337 | __show_regs(regs); | 336 | __show_regs(regs); |
338 | show_trace(®s->rsp); | 337 | show_trace(NULL, regs, (void *)(regs + 1)); |
339 | } | 338 | } |
340 | 339 | ||
341 | /* | 340 | /* |
@@ -365,8 +364,11 @@ void flush_thread(void) | |||
365 | struct task_struct *tsk = current; | 364 | struct task_struct *tsk = current; |
366 | struct thread_info *t = current_thread_info(); | 365 | struct thread_info *t = current_thread_info(); |
367 | 366 | ||
368 | if (t->flags & _TIF_ABI_PENDING) | 367 | if (t->flags & _TIF_ABI_PENDING) { |
369 | t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32); | 368 | t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32); |
369 | if (t->flags & _TIF_IA32) | ||
370 | current_thread_info()->status |= TS_COMPAT; | ||
371 | } | ||
370 | 372 | ||
371 | tsk->thread.debugreg0 = 0; | 373 | tsk->thread.debugreg0 = 0; |
372 | tsk->thread.debugreg1 = 0; | 374 | tsk->thread.debugreg1 = 0; |
diff --git a/arch/x86_64/kernel/reboot.c b/arch/x86_64/kernel/reboot.c index 57117b8beb2b..2d6769847456 100644 --- a/arch/x86_64/kernel/reboot.c +++ b/arch/x86_64/kernel/reboot.c | |||
@@ -20,6 +20,7 @@ | |||
20 | * Power off function, if any | 20 | * Power off function, if any |
21 | */ | 21 | */ |
22 | void (*pm_power_off)(void); | 22 | void (*pm_power_off)(void); |
23 | EXPORT_SYMBOL(pm_power_off); | ||
23 | 24 | ||
24 | static long no_idt[3]; | 25 | static long no_idt[3]; |
25 | static enum { | 26 | static enum { |
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 143c65031539..1129918ede82 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c | |||
@@ -5,8 +5,6 @@ | |||
5 | * | 5 | * |
6 | * Nov 2001 Dave Jones <davej@suse.de> | 6 | * Nov 2001 Dave Jones <davej@suse.de> |
7 | * Forked from i386 setup code. | 7 | * Forked from i386 setup code. |
8 | * | ||
9 | * $Id$ | ||
10 | */ | 8 | */ |
11 | 9 | ||
12 | /* | 10 | /* |
@@ -65,9 +63,7 @@ | |||
65 | #include <asm/setup.h> | 63 | #include <asm/setup.h> |
66 | #include <asm/mach_apic.h> | 64 | #include <asm/mach_apic.h> |
67 | #include <asm/numa.h> | 65 | #include <asm/numa.h> |
68 | #include <asm/swiotlb.h> | ||
69 | #include <asm/sections.h> | 66 | #include <asm/sections.h> |
70 | #include <asm/gart-mapping.h> | ||
71 | #include <asm/dmi.h> | 67 | #include <asm/dmi.h> |
72 | 68 | ||
73 | /* | 69 | /* |
@@ -75,6 +71,7 @@ | |||
75 | */ | 71 | */ |
76 | 72 | ||
77 | struct cpuinfo_x86 boot_cpu_data __read_mostly; | 73 | struct cpuinfo_x86 boot_cpu_data __read_mostly; |
74 | EXPORT_SYMBOL(boot_cpu_data); | ||
78 | 75 | ||
79 | unsigned long mmu_cr4_features; | 76 | unsigned long mmu_cr4_features; |
80 | 77 | ||
@@ -103,6 +100,7 @@ char dmi_alloc_data[DMI_MAX_DATA]; | |||
103 | * Setup options | 100 | * Setup options |
104 | */ | 101 | */ |
105 | struct screen_info screen_info; | 102 | struct screen_info screen_info; |
103 | EXPORT_SYMBOL(screen_info); | ||
106 | struct sys_desc_table_struct { | 104 | struct sys_desc_table_struct { |
107 | unsigned short length; | 105 | unsigned short length; |
108 | unsigned char table[0]; | 106 | unsigned char table[0]; |
@@ -474,80 +472,6 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
474 | } | 472 | } |
475 | #endif | 473 | #endif |
476 | 474 | ||
477 | /* Use inline assembly to define this because the nops are defined | ||
478 | as inline assembly strings in the include files and we cannot | ||
479 | get them easily into strings. */ | ||
480 | asm("\t.data\nk8nops: " | ||
481 | K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 | ||
482 | K8_NOP7 K8_NOP8); | ||
483 | |||
484 | extern unsigned char k8nops[]; | ||
485 | static unsigned char *k8_nops[ASM_NOP_MAX+1] = { | ||
486 | NULL, | ||
487 | k8nops, | ||
488 | k8nops + 1, | ||
489 | k8nops + 1 + 2, | ||
490 | k8nops + 1 + 2 + 3, | ||
491 | k8nops + 1 + 2 + 3 + 4, | ||
492 | k8nops + 1 + 2 + 3 + 4 + 5, | ||
493 | k8nops + 1 + 2 + 3 + 4 + 5 + 6, | ||
494 | k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, | ||
495 | }; | ||
496 | |||
497 | extern char __vsyscall_0; | ||
498 | |||
499 | /* Replace instructions with better alternatives for this CPU type. | ||
500 | |||
501 | This runs before SMP is initialized to avoid SMP problems with | ||
502 | self modifying code. This implies that assymetric systems where | ||
503 | APs have less capabilities than the boot processor are not handled. | ||
504 | In this case boot with "noreplacement". */ | ||
505 | void apply_alternatives(void *start, void *end) | ||
506 | { | ||
507 | struct alt_instr *a; | ||
508 | int diff, i, k; | ||
509 | for (a = start; (void *)a < end; a++) { | ||
510 | u8 *instr; | ||
511 | |||
512 | if (!boot_cpu_has(a->cpuid)) | ||
513 | continue; | ||
514 | |||
515 | BUG_ON(a->replacementlen > a->instrlen); | ||
516 | instr = a->instr; | ||
517 | /* vsyscall code is not mapped yet. resolve it manually. */ | ||
518 | if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) | ||
519 | instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0)); | ||
520 | __inline_memcpy(instr, a->replacement, a->replacementlen); | ||
521 | diff = a->instrlen - a->replacementlen; | ||
522 | |||
523 | /* Pad the rest with nops */ | ||
524 | for (i = a->replacementlen; diff > 0; diff -= k, i += k) { | ||
525 | k = diff; | ||
526 | if (k > ASM_NOP_MAX) | ||
527 | k = ASM_NOP_MAX; | ||
528 | __inline_memcpy(instr + i, k8_nops[k], k); | ||
529 | } | ||
530 | } | ||
531 | } | ||
532 | |||
533 | static int no_replacement __initdata = 0; | ||
534 | |||
535 | void __init alternative_instructions(void) | ||
536 | { | ||
537 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | ||
538 | if (no_replacement) | ||
539 | return; | ||
540 | apply_alternatives(__alt_instructions, __alt_instructions_end); | ||
541 | } | ||
542 | |||
543 | static int __init noreplacement_setup(char *s) | ||
544 | { | ||
545 | no_replacement = 1; | ||
546 | return 1; | ||
547 | } | ||
548 | |||
549 | __setup("noreplacement", noreplacement_setup); | ||
550 | |||
551 | #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) | 475 | #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) |
552 | struct edd edd; | 476 | struct edd edd; |
553 | #ifdef CONFIG_EDD_MODULE | 477 | #ifdef CONFIG_EDD_MODULE |
@@ -780,10 +704,6 @@ void __init setup_arch(char **cmdline_p) | |||
780 | 704 | ||
781 | e820_setup_gap(); | 705 | e820_setup_gap(); |
782 | 706 | ||
783 | #ifdef CONFIG_GART_IOMMU | ||
784 | iommu_hole_init(); | ||
785 | #endif | ||
786 | |||
787 | #ifdef CONFIG_VT | 707 | #ifdef CONFIG_VT |
788 | #if defined(CONFIG_VGA_CONSOLE) | 708 | #if defined(CONFIG_VGA_CONSOLE) |
789 | conswitchp = &vga_con; | 709 | conswitchp = &vga_con; |
@@ -868,24 +788,32 @@ static int nearby_node(int apicid) | |||
868 | static void __init amd_detect_cmp(struct cpuinfo_x86 *c) | 788 | static void __init amd_detect_cmp(struct cpuinfo_x86 *c) |
869 | { | 789 | { |
870 | #ifdef CONFIG_SMP | 790 | #ifdef CONFIG_SMP |
871 | int cpu = smp_processor_id(); | ||
872 | unsigned bits; | 791 | unsigned bits; |
873 | #ifdef CONFIG_NUMA | 792 | #ifdef CONFIG_NUMA |
793 | int cpu = smp_processor_id(); | ||
874 | int node = 0; | 794 | int node = 0; |
875 | unsigned apicid = hard_smp_processor_id(); | 795 | unsigned apicid = hard_smp_processor_id(); |
876 | #endif | 796 | #endif |
797 | unsigned ecx = cpuid_ecx(0x80000008); | ||
798 | |||
799 | c->x86_max_cores = (ecx & 0xff) + 1; | ||
877 | 800 | ||
878 | bits = 0; | 801 | /* CPU telling us the core id bits shift? */ |
879 | while ((1 << bits) < c->x86_max_cores) | 802 | bits = (ecx >> 12) & 0xF; |
880 | bits++; | 803 | |
804 | /* Otherwise recompute */ | ||
805 | if (bits == 0) { | ||
806 | while ((1 << bits) < c->x86_max_cores) | ||
807 | bits++; | ||
808 | } | ||
881 | 809 | ||
882 | /* Low order bits define the core id (index of core in socket) */ | 810 | /* Low order bits define the core id (index of core in socket) */ |
883 | cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1); | 811 | c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1); |
884 | /* Convert the APIC ID into the socket ID */ | 812 | /* Convert the APIC ID into the socket ID */ |
885 | phys_proc_id[cpu] = phys_pkg_id(bits); | 813 | c->phys_proc_id = phys_pkg_id(bits); |
886 | 814 | ||
887 | #ifdef CONFIG_NUMA | 815 | #ifdef CONFIG_NUMA |
888 | node = phys_proc_id[cpu]; | 816 | node = c->phys_proc_id; |
889 | if (apicid_to_node[apicid] != NUMA_NO_NODE) | 817 | if (apicid_to_node[apicid] != NUMA_NO_NODE) |
890 | node = apicid_to_node[apicid]; | 818 | node = apicid_to_node[apicid]; |
891 | if (!node_online(node)) { | 819 | if (!node_online(node)) { |
@@ -898,7 +826,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) | |||
898 | but in the same order as the HT nodeids. | 826 | but in the same order as the HT nodeids. |
899 | If that doesn't result in a usable node fall back to the | 827 | If that doesn't result in a usable node fall back to the |
900 | path for the previous case. */ | 828 | path for the previous case. */ |
901 | int ht_nodeid = apicid - (phys_proc_id[0] << bits); | 829 | int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits); |
902 | if (ht_nodeid >= 0 && | 830 | if (ht_nodeid >= 0 && |
903 | apicid_to_node[ht_nodeid] != NUMA_NO_NODE) | 831 | apicid_to_node[ht_nodeid] != NUMA_NO_NODE) |
904 | node = apicid_to_node[ht_nodeid]; | 832 | node = apicid_to_node[ht_nodeid]; |
@@ -908,15 +836,13 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) | |||
908 | } | 836 | } |
909 | numa_set_node(cpu, node); | 837 | numa_set_node(cpu, node); |
910 | 838 | ||
911 | printk(KERN_INFO "CPU %d/%x(%d) -> Node %d -> Core %d\n", | 839 | printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); |
912 | cpu, apicid, c->x86_max_cores, node, cpu_core_id[cpu]); | ||
913 | #endif | 840 | #endif |
914 | #endif | 841 | #endif |
915 | } | 842 | } |
916 | 843 | ||
917 | static int __init init_amd(struct cpuinfo_x86 *c) | 844 | static void __init init_amd(struct cpuinfo_x86 *c) |
918 | { | 845 | { |
919 | int r; | ||
920 | unsigned level; | 846 | unsigned level; |
921 | 847 | ||
922 | #ifdef CONFIG_SMP | 848 | #ifdef CONFIG_SMP |
@@ -949,8 +875,8 @@ static int __init init_amd(struct cpuinfo_x86 *c) | |||
949 | if (c->x86 >= 6) | 875 | if (c->x86 >= 6) |
950 | set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability); | 876 | set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability); |
951 | 877 | ||
952 | r = get_model_name(c); | 878 | level = get_model_name(c); |
953 | if (!r) { | 879 | if (!level) { |
954 | switch (c->x86) { | 880 | switch (c->x86) { |
955 | case 15: | 881 | case 15: |
956 | /* Should distinguish Models here, but this is only | 882 | /* Should distinguish Models here, but this is only |
@@ -965,13 +891,12 @@ static int __init init_amd(struct cpuinfo_x86 *c) | |||
965 | if (c->x86_power & (1<<8)) | 891 | if (c->x86_power & (1<<8)) |
966 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); | 892 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); |
967 | 893 | ||
968 | if (c->extended_cpuid_level >= 0x80000008) { | 894 | /* Multi core CPU? */ |
969 | c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; | 895 | if (c->extended_cpuid_level >= 0x80000008) |
970 | |||
971 | amd_detect_cmp(c); | 896 | amd_detect_cmp(c); |
972 | } | ||
973 | 897 | ||
974 | return r; | 898 | /* Fix cpuid4 emulation for more */ |
899 | num_cache_leaves = 3; | ||
975 | } | 900 | } |
976 | 901 | ||
977 | static void __cpuinit detect_ht(struct cpuinfo_x86 *c) | 902 | static void __cpuinit detect_ht(struct cpuinfo_x86 *c) |
@@ -979,13 +904,14 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
979 | #ifdef CONFIG_SMP | 904 | #ifdef CONFIG_SMP |
980 | u32 eax, ebx, ecx, edx; | 905 | u32 eax, ebx, ecx, edx; |
981 | int index_msb, core_bits; | 906 | int index_msb, core_bits; |
982 | int cpu = smp_processor_id(); | ||
983 | 907 | ||
984 | cpuid(1, &eax, &ebx, &ecx, &edx); | 908 | cpuid(1, &eax, &ebx, &ecx, &edx); |
985 | 909 | ||
986 | 910 | ||
987 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) | 911 | if (!cpu_has(c, X86_FEATURE_HT)) |
988 | return; | 912 | return; |
913 | if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) | ||
914 | goto out; | ||
989 | 915 | ||
990 | smp_num_siblings = (ebx & 0xff0000) >> 16; | 916 | smp_num_siblings = (ebx & 0xff0000) >> 16; |
991 | 917 | ||
@@ -1000,10 +926,7 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
1000 | } | 926 | } |
1001 | 927 | ||
1002 | index_msb = get_count_order(smp_num_siblings); | 928 | index_msb = get_count_order(smp_num_siblings); |
1003 | phys_proc_id[cpu] = phys_pkg_id(index_msb); | 929 | c->phys_proc_id = phys_pkg_id(index_msb); |
1004 | |||
1005 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | ||
1006 | phys_proc_id[cpu]); | ||
1007 | 930 | ||
1008 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; | 931 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; |
1009 | 932 | ||
@@ -1011,13 +934,15 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
1011 | 934 | ||
1012 | core_bits = get_count_order(c->x86_max_cores); | 935 | core_bits = get_count_order(c->x86_max_cores); |
1013 | 936 | ||
1014 | cpu_core_id[cpu] = phys_pkg_id(index_msb) & | 937 | c->cpu_core_id = phys_pkg_id(index_msb) & |
1015 | ((1 << core_bits) - 1); | 938 | ((1 << core_bits) - 1); |
1016 | |||
1017 | if (c->x86_max_cores > 1) | ||
1018 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", | ||
1019 | cpu_core_id[cpu]); | ||
1020 | } | 939 | } |
940 | out: | ||
941 | if ((c->x86_max_cores * smp_num_siblings) > 1) { | ||
942 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", c->phys_proc_id); | ||
943 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id); | ||
944 | } | ||
945 | |||
1021 | #endif | 946 | #endif |
1022 | } | 947 | } |
1023 | 948 | ||
@@ -1026,15 +951,12 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
1026 | */ | 951 | */ |
1027 | static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) | 952 | static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) |
1028 | { | 953 | { |
1029 | unsigned int eax; | 954 | unsigned int eax, t; |
1030 | 955 | ||
1031 | if (c->cpuid_level < 4) | 956 | if (c->cpuid_level < 4) |
1032 | return 1; | 957 | return 1; |
1033 | 958 | ||
1034 | __asm__("cpuid" | 959 | cpuid_count(4, 0, &eax, &t, &t, &t); |
1035 | : "=a" (eax) | ||
1036 | : "0" (4), "c" (0) | ||
1037 | : "bx", "dx"); | ||
1038 | 960 | ||
1039 | if (eax & 0x1f) | 961 | if (eax & 0x1f) |
1040 | return ((eax >> 26) + 1); | 962 | return ((eax >> 26) + 1); |
@@ -1047,16 +969,17 @@ static void srat_detect_node(void) | |||
1047 | #ifdef CONFIG_NUMA | 969 | #ifdef CONFIG_NUMA |
1048 | unsigned node; | 970 | unsigned node; |
1049 | int cpu = smp_processor_id(); | 971 | int cpu = smp_processor_id(); |
972 | int apicid = hard_smp_processor_id(); | ||
1050 | 973 | ||
1051 | /* Don't do the funky fallback heuristics the AMD version employs | 974 | /* Don't do the funky fallback heuristics the AMD version employs |
1052 | for now. */ | 975 | for now. */ |
1053 | node = apicid_to_node[hard_smp_processor_id()]; | 976 | node = apicid_to_node[apicid]; |
1054 | if (node == NUMA_NO_NODE) | 977 | if (node == NUMA_NO_NODE) |
1055 | node = first_node(node_online_map); | 978 | node = first_node(node_online_map); |
1056 | numa_set_node(cpu, node); | 979 | numa_set_node(cpu, node); |
1057 | 980 | ||
1058 | if (acpi_numa > 0) | 981 | if (acpi_numa > 0) |
1059 | printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node); | 982 | printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); |
1060 | #endif | 983 | #endif |
1061 | } | 984 | } |
1062 | 985 | ||
@@ -1066,6 +989,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
1066 | unsigned n; | 989 | unsigned n; |
1067 | 990 | ||
1068 | init_intel_cacheinfo(c); | 991 | init_intel_cacheinfo(c); |
992 | if (c->cpuid_level > 9 ) { | ||
993 | unsigned eax = cpuid_eax(10); | ||
994 | /* Check for version and the number of counters */ | ||
995 | if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) | ||
996 | set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability); | ||
997 | } | ||
998 | |||
1069 | n = c->extended_cpuid_level; | 999 | n = c->extended_cpuid_level; |
1070 | if (n >= 0x80000008) { | 1000 | if (n >= 0x80000008) { |
1071 | unsigned eax = cpuid_eax(0x80000008); | 1001 | unsigned eax = cpuid_eax(0x80000008); |
@@ -1157,7 +1087,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |||
1157 | } | 1087 | } |
1158 | 1088 | ||
1159 | #ifdef CONFIG_SMP | 1089 | #ifdef CONFIG_SMP |
1160 | phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff; | 1090 | c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; |
1161 | #endif | 1091 | #endif |
1162 | } | 1092 | } |
1163 | 1093 | ||
@@ -1284,7 +1214,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
1284 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 1214 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1285 | NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, | 1215 | NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, |
1286 | NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL, | 1216 | NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL, |
1287 | NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow", | 1217 | NULL, "fxsr_opt", NULL, "rdtscp", NULL, "lm", "3dnowext", "3dnow", |
1288 | 1218 | ||
1289 | /* Transmeta-defined */ | 1219 | /* Transmeta-defined */ |
1290 | "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, | 1220 | "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, |
@@ -1295,7 +1225,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
1295 | /* Other (Linux-defined) */ | 1225 | /* Other (Linux-defined) */ |
1296 | "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL, | 1226 | "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL, |
1297 | "constant_tsc", NULL, NULL, | 1227 | "constant_tsc", NULL, NULL, |
1298 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 1228 | "up", NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1299 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 1229 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1300 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 1230 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1301 | 1231 | ||
@@ -1365,9 +1295,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
1365 | #ifdef CONFIG_SMP | 1295 | #ifdef CONFIG_SMP |
1366 | if (smp_num_siblings * c->x86_max_cores > 1) { | 1296 | if (smp_num_siblings * c->x86_max_cores > 1) { |
1367 | int cpu = c - cpu_data; | 1297 | int cpu = c - cpu_data; |
1368 | seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]); | 1298 | seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); |
1369 | seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu])); | 1299 | seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu])); |
1370 | seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]); | 1300 | seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); |
1371 | seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); | 1301 | seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); |
1372 | } | 1302 | } |
1373 | #endif | 1303 | #endif |
@@ -1441,7 +1371,7 @@ struct seq_operations cpuinfo_op = { | |||
1441 | .show = show_cpuinfo, | 1371 | .show = show_cpuinfo, |
1442 | }; | 1372 | }; |
1443 | 1373 | ||
1444 | #ifdef CONFIG_INPUT_PCSPKR | 1374 | #if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE) |
1445 | #include <linux/platform_device.h> | 1375 | #include <linux/platform_device.h> |
1446 | static __init int add_pcspkr(void) | 1376 | static __init int add_pcspkr(void) |
1447 | { | 1377 | { |
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 8a691fa6d393..f5934cb4a2b6 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c | |||
@@ -3,7 +3,6 @@ | |||
3 | * Copyright (C) 1995 Linus Torvalds | 3 | * Copyright (C) 1995 Linus Torvalds |
4 | * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen. | 4 | * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen. |
5 | * See setup.c for older changelog. | 5 | * See setup.c for older changelog. |
6 | * $Id: setup64.c,v 1.12 2002/03/21 10:09:17 ak Exp $ | ||
7 | */ | 6 | */ |
8 | #include <linux/config.h> | 7 | #include <linux/config.h> |
9 | #include <linux/init.h> | 8 | #include <linux/init.h> |
@@ -31,6 +30,7 @@ char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,}; | |||
31 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; | 30 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; |
32 | 31 | ||
33 | struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; | 32 | struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; |
33 | EXPORT_SYMBOL(_cpu_pda); | ||
34 | struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; | 34 | struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; |
35 | 35 | ||
36 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | 36 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; |
@@ -38,6 +38,7 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | |||
38 | char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); | 38 | char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); |
39 | 39 | ||
40 | unsigned long __supported_pte_mask __read_mostly = ~0UL; | 40 | unsigned long __supported_pte_mask __read_mostly = ~0UL; |
41 | EXPORT_SYMBOL(__supported_pte_mask); | ||
41 | static int do_not_nx __cpuinitdata = 0; | 42 | static int do_not_nx __cpuinitdata = 0; |
42 | 43 | ||
43 | /* noexec=on|off | 44 | /* noexec=on|off |
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c index e5f5ce7909a3..28161170fb0a 100644 --- a/arch/x86_64/kernel/signal.c +++ b/arch/x86_64/kernel/signal.c | |||
@@ -7,8 +7,6 @@ | |||
7 | * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson | 7 | * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson |
8 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes | 8 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes |
9 | * 2000-2002 x86-64 support by Andi Kleen | 9 | * 2000-2002 x86-64 support by Andi Kleen |
10 | * | ||
11 | * $Id: signal.c,v 1.18 2001/10/17 22:30:37 ak Exp $ | ||
12 | */ | 10 | */ |
13 | 11 | ||
14 | #include <linux/sched.h> | 12 | #include <linux/sched.h> |
@@ -239,7 +237,6 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) | |||
239 | rsp = regs->rsp - 128; | 237 | rsp = regs->rsp - 128; |
240 | 238 | ||
241 | /* This is the X/Open sanctioned signal stack switching. */ | 239 | /* This is the X/Open sanctioned signal stack switching. */ |
242 | /* RED-PEN: redzone on that stack? */ | ||
243 | if (ka->sa.sa_flags & SA_ONSTACK) { | 240 | if (ka->sa.sa_flags & SA_ONSTACK) { |
244 | if (sas_ss_flags(rsp) == 0) | 241 | if (sas_ss_flags(rsp) == 0) |
245 | rsp = current->sas_ss_sp + current->sas_ss_size; | 242 | rsp = current->sas_ss_sp + current->sas_ss_size; |
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 4a6628b14d99..8188bae9c6d5 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c | |||
@@ -224,6 +224,7 @@ void flush_tlb_current_task(void) | |||
224 | flush_tlb_others(cpu_mask, mm, FLUSH_ALL); | 224 | flush_tlb_others(cpu_mask, mm, FLUSH_ALL); |
225 | preempt_enable(); | 225 | preempt_enable(); |
226 | } | 226 | } |
227 | EXPORT_SYMBOL(flush_tlb_current_task); | ||
227 | 228 | ||
228 | void flush_tlb_mm (struct mm_struct * mm) | 229 | void flush_tlb_mm (struct mm_struct * mm) |
229 | { | 230 | { |
@@ -244,6 +245,7 @@ void flush_tlb_mm (struct mm_struct * mm) | |||
244 | 245 | ||
245 | preempt_enable(); | 246 | preempt_enable(); |
246 | } | 247 | } |
248 | EXPORT_SYMBOL(flush_tlb_mm); | ||
247 | 249 | ||
248 | void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) | 250 | void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) |
249 | { | 251 | { |
@@ -266,6 +268,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) | |||
266 | 268 | ||
267 | preempt_enable(); | 269 | preempt_enable(); |
268 | } | 270 | } |
271 | EXPORT_SYMBOL(flush_tlb_page); | ||
269 | 272 | ||
270 | static void do_flush_tlb_all(void* info) | 273 | static void do_flush_tlb_all(void* info) |
271 | { | 274 | { |
@@ -443,6 +446,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, | |||
443 | spin_unlock(&call_lock); | 446 | spin_unlock(&call_lock); |
444 | return 0; | 447 | return 0; |
445 | } | 448 | } |
449 | EXPORT_SYMBOL(smp_call_function); | ||
446 | 450 | ||
447 | void smp_stop_cpu(void) | 451 | void smp_stop_cpu(void) |
448 | { | 452 | { |
@@ -460,7 +464,7 @@ static void smp_really_stop_cpu(void *dummy) | |||
460 | { | 464 | { |
461 | smp_stop_cpu(); | 465 | smp_stop_cpu(); |
462 | for (;;) | 466 | for (;;) |
463 | asm("hlt"); | 467 | halt(); |
464 | } | 468 | } |
465 | 469 | ||
466 | void smp_send_stop(void) | 470 | void smp_send_stop(void) |
@@ -520,13 +524,13 @@ asmlinkage void smp_call_function_interrupt(void) | |||
520 | 524 | ||
521 | int safe_smp_processor_id(void) | 525 | int safe_smp_processor_id(void) |
522 | { | 526 | { |
523 | int apicid, i; | 527 | unsigned apicid, i; |
524 | 528 | ||
525 | if (disable_apic) | 529 | if (disable_apic) |
526 | return 0; | 530 | return 0; |
527 | 531 | ||
528 | apicid = hard_smp_processor_id(); | 532 | apicid = hard_smp_processor_id(); |
529 | if (x86_cpu_to_apicid[apicid] == apicid) | 533 | if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid) |
530 | return apicid; | 534 | return apicid; |
531 | 535 | ||
532 | for (i = 0; i < NR_CPUS; ++i) { | 536 | for (i = 0; i < NR_CPUS; ++i) { |
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 71a7222cf9ce..4e9755179ecf 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c | |||
@@ -63,13 +63,11 @@ | |||
63 | 63 | ||
64 | /* Number of siblings per CPU package */ | 64 | /* Number of siblings per CPU package */ |
65 | int smp_num_siblings = 1; | 65 | int smp_num_siblings = 1; |
66 | /* Package ID of each logical CPU */ | 66 | EXPORT_SYMBOL(smp_num_siblings); |
67 | u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; | ||
68 | /* core ID of each logical CPU */ | ||
69 | u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; | ||
70 | 67 | ||
71 | /* Last level cache ID of each logical CPU */ | 68 | /* Last level cache ID of each logical CPU */ |
72 | u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; | 69 | u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; |
70 | EXPORT_SYMBOL(cpu_llc_id); | ||
73 | 71 | ||
74 | /* Bitmask of currently online CPUs */ | 72 | /* Bitmask of currently online CPUs */ |
75 | cpumask_t cpu_online_map __read_mostly; | 73 | cpumask_t cpu_online_map __read_mostly; |
@@ -82,18 +80,21 @@ EXPORT_SYMBOL(cpu_online_map); | |||
82 | */ | 80 | */ |
83 | cpumask_t cpu_callin_map; | 81 | cpumask_t cpu_callin_map; |
84 | cpumask_t cpu_callout_map; | 82 | cpumask_t cpu_callout_map; |
83 | EXPORT_SYMBOL(cpu_callout_map); | ||
85 | 84 | ||
86 | cpumask_t cpu_possible_map; | 85 | cpumask_t cpu_possible_map; |
87 | EXPORT_SYMBOL(cpu_possible_map); | 86 | EXPORT_SYMBOL(cpu_possible_map); |
88 | 87 | ||
89 | /* Per CPU bogomips and other parameters */ | 88 | /* Per CPU bogomips and other parameters */ |
90 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; | 89 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; |
90 | EXPORT_SYMBOL(cpu_data); | ||
91 | 91 | ||
92 | /* Set when the idlers are all forked */ | 92 | /* Set when the idlers are all forked */ |
93 | int smp_threads_ready; | 93 | int smp_threads_ready; |
94 | 94 | ||
95 | /* representing HT siblings of each logical CPU */ | 95 | /* representing HT siblings of each logical CPU */ |
96 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; | 96 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; |
97 | EXPORT_SYMBOL(cpu_sibling_map); | ||
97 | 98 | ||
98 | /* representing HT and core siblings of each logical CPU */ | 99 | /* representing HT and core siblings of each logical CPU */ |
99 | cpumask_t cpu_core_map[NR_CPUS] __read_mostly; | 100 | cpumask_t cpu_core_map[NR_CPUS] __read_mostly; |
@@ -472,8 +473,8 @@ static inline void set_cpu_sibling_map(int cpu) | |||
472 | 473 | ||
473 | if (smp_num_siblings > 1) { | 474 | if (smp_num_siblings > 1) { |
474 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | 475 | for_each_cpu_mask(i, cpu_sibling_setup_map) { |
475 | if (phys_proc_id[cpu] == phys_proc_id[i] && | 476 | if (c[cpu].phys_proc_id == c[i].phys_proc_id && |
476 | cpu_core_id[cpu] == cpu_core_id[i]) { | 477 | c[cpu].cpu_core_id == c[i].cpu_core_id) { |
477 | cpu_set(i, cpu_sibling_map[cpu]); | 478 | cpu_set(i, cpu_sibling_map[cpu]); |
478 | cpu_set(cpu, cpu_sibling_map[i]); | 479 | cpu_set(cpu, cpu_sibling_map[i]); |
479 | cpu_set(i, cpu_core_map[cpu]); | 480 | cpu_set(i, cpu_core_map[cpu]); |
@@ -500,7 +501,7 @@ static inline void set_cpu_sibling_map(int cpu) | |||
500 | cpu_set(i, c[cpu].llc_shared_map); | 501 | cpu_set(i, c[cpu].llc_shared_map); |
501 | cpu_set(cpu, c[i].llc_shared_map); | 502 | cpu_set(cpu, c[i].llc_shared_map); |
502 | } | 503 | } |
503 | if (phys_proc_id[cpu] == phys_proc_id[i]) { | 504 | if (c[cpu].phys_proc_id == c[i].phys_proc_id) { |
504 | cpu_set(i, cpu_core_map[cpu]); | 505 | cpu_set(i, cpu_core_map[cpu]); |
505 | cpu_set(cpu, cpu_core_map[i]); | 506 | cpu_set(cpu, cpu_core_map[i]); |
506 | /* | 507 | /* |
@@ -797,6 +798,8 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid) | |||
797 | } | 798 | } |
798 | 799 | ||
799 | 800 | ||
801 | alternatives_smp_switch(1); | ||
802 | |||
800 | c_idle.idle = get_idle_for_cpu(cpu); | 803 | c_idle.idle = get_idle_for_cpu(cpu); |
801 | 804 | ||
802 | if (c_idle.idle) { | 805 | if (c_idle.idle) { |
@@ -1199,8 +1202,8 @@ static void remove_siblinginfo(int cpu) | |||
1199 | cpu_clear(cpu, cpu_sibling_map[sibling]); | 1202 | cpu_clear(cpu, cpu_sibling_map[sibling]); |
1200 | cpus_clear(cpu_sibling_map[cpu]); | 1203 | cpus_clear(cpu_sibling_map[cpu]); |
1201 | cpus_clear(cpu_core_map[cpu]); | 1204 | cpus_clear(cpu_core_map[cpu]); |
1202 | phys_proc_id[cpu] = BAD_APICID; | 1205 | c[cpu].phys_proc_id = 0; |
1203 | cpu_core_id[cpu] = BAD_APICID; | 1206 | c[cpu].cpu_core_id = 0; |
1204 | cpu_clear(cpu, cpu_sibling_setup_map); | 1207 | cpu_clear(cpu, cpu_sibling_setup_map); |
1205 | } | 1208 | } |
1206 | 1209 | ||
@@ -1259,6 +1262,8 @@ void __cpu_die(unsigned int cpu) | |||
1259 | /* They ack this in play_dead by setting CPU_DEAD */ | 1262 | /* They ack this in play_dead by setting CPU_DEAD */ |
1260 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { | 1263 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { |
1261 | printk ("CPU %d is now offline\n", cpu); | 1264 | printk ("CPU %d is now offline\n", cpu); |
1265 | if (1 == num_online_cpus()) | ||
1266 | alternatives_smp_switch(0); | ||
1262 | return; | 1267 | return; |
1263 | } | 1268 | } |
1264 | msleep(100); | 1269 | msleep(100); |
diff --git a/arch/x86_64/kernel/tce.c b/arch/x86_64/kernel/tce.c new file mode 100644 index 000000000000..8d4c67f61b8e --- /dev/null +++ b/arch/x86_64/kernel/tce.c | |||
@@ -0,0 +1,202 @@ | |||
1 | /* | ||
2 | * Derived from arch/powerpc/platforms/pseries/iommu.c | ||
3 | * | ||
4 | * Copyright (C) 2006 Jon Mason <jdmason@us.ibm.com>, IBM Corporation | ||
5 | * Copyright (C) 2006 Muli Ben-Yehuda <muli@il.ibm.com>, IBM Corporation | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | |||
22 | #include <linux/config.h> | ||
23 | #include <linux/types.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/mm.h> | ||
26 | #include <linux/spinlock.h> | ||
27 | #include <linux/string.h> | ||
28 | #include <linux/pci.h> | ||
29 | #include <linux/dma-mapping.h> | ||
30 | #include <linux/bootmem.h> | ||
31 | #include <asm/tce.h> | ||
32 | #include <asm/calgary.h> | ||
33 | #include <asm/proto.h> | ||
34 | |||
35 | /* flush a tce at 'tceaddr' to main memory */ | ||
36 | static inline void flush_tce(void* tceaddr) | ||
37 | { | ||
38 | /* a single tce can't cross a cache line */ | ||
39 | if (cpu_has_clflush) | ||
40 | asm volatile("clflush (%0)" :: "r" (tceaddr)); | ||
41 | else | ||
42 | asm volatile("wbinvd":::"memory"); | ||
43 | } | ||
44 | |||
45 | void tce_build(struct iommu_table *tbl, unsigned long index, | ||
46 | unsigned int npages, unsigned long uaddr, int direction) | ||
47 | { | ||
48 | u64* tp; | ||
49 | u64 t; | ||
50 | u64 rpn; | ||
51 | |||
52 | t = (1 << TCE_READ_SHIFT); | ||
53 | if (direction != DMA_TO_DEVICE) | ||
54 | t |= (1 << TCE_WRITE_SHIFT); | ||
55 | |||
56 | tp = ((u64*)tbl->it_base) + index; | ||
57 | |||
58 | while (npages--) { | ||
59 | rpn = (virt_to_bus((void*)uaddr)) >> PAGE_SHIFT; | ||
60 | t &= ~TCE_RPN_MASK; | ||
61 | t |= (rpn << TCE_RPN_SHIFT); | ||
62 | |||
63 | *tp = cpu_to_be64(t); | ||
64 | flush_tce(tp); | ||
65 | |||
66 | uaddr += PAGE_SIZE; | ||
67 | tp++; | ||
68 | } | ||
69 | } | ||
70 | |||
71 | void tce_free(struct iommu_table *tbl, long index, unsigned int npages) | ||
72 | { | ||
73 | u64* tp; | ||
74 | |||
75 | tp = ((u64*)tbl->it_base) + index; | ||
76 | |||
77 | while (npages--) { | ||
78 | *tp = cpu_to_be64(0); | ||
79 | flush_tce(tp); | ||
80 | tp++; | ||
81 | } | ||
82 | } | ||
83 | |||
84 | static inline unsigned int table_size_to_number_of_entries(unsigned char size) | ||
85 | { | ||
86 | /* | ||
87 | * size is the order of the table, 0-7 | ||
88 | * smallest table is 8K entries, so shift result by 13 to | ||
89 | * multiply by 8K | ||
90 | */ | ||
91 | return (1 << size) << 13; | ||
92 | } | ||
93 | |||
94 | static int tce_table_setparms(struct pci_dev *dev, struct iommu_table *tbl) | ||
95 | { | ||
96 | unsigned int bitmapsz; | ||
97 | unsigned int tce_table_index; | ||
98 | unsigned long bmppages; | ||
99 | int ret; | ||
100 | |||
101 | tbl->it_busno = dev->bus->number; | ||
102 | |||
103 | /* set the tce table size - measured in entries */ | ||
104 | tbl->it_size = table_size_to_number_of_entries(specified_table_size); | ||
105 | |||
106 | tce_table_index = bus_to_phb(tbl->it_busno); | ||
107 | tbl->it_base = (unsigned long)tce_table_kva[tce_table_index]; | ||
108 | if (!tbl->it_base) { | ||
109 | printk(KERN_ERR "Calgary: iommu_table_setparms: " | ||
110 | "no table allocated?!\n"); | ||
111 | ret = -ENOMEM; | ||
112 | goto done; | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * number of bytes needed for the bitmap size in number of | ||
117 | * entries; we need one bit per entry | ||
118 | */ | ||
119 | bitmapsz = tbl->it_size / BITS_PER_BYTE; | ||
120 | bmppages = __get_free_pages(GFP_KERNEL, get_order(bitmapsz)); | ||
121 | if (!bmppages) { | ||
122 | printk(KERN_ERR "Calgary: cannot allocate bitmap\n"); | ||
123 | ret = -ENOMEM; | ||
124 | goto done; | ||
125 | } | ||
126 | |||
127 | tbl->it_map = (unsigned long*)bmppages; | ||
128 | |||
129 | memset(tbl->it_map, 0, bitmapsz); | ||
130 | |||
131 | tbl->it_hint = 0; | ||
132 | |||
133 | spin_lock_init(&tbl->it_lock); | ||
134 | |||
135 | return 0; | ||
136 | |||
137 | done: | ||
138 | return ret; | ||
139 | } | ||
140 | |||
141 | int build_tce_table(struct pci_dev *dev, void __iomem *bbar) | ||
142 | { | ||
143 | struct iommu_table *tbl; | ||
144 | int ret; | ||
145 | |||
146 | if (dev->sysdata) { | ||
147 | printk(KERN_ERR "Calgary: dev %p has sysdata %p\n", | ||
148 | dev, dev->sysdata); | ||
149 | BUG(); | ||
150 | } | ||
151 | |||
152 | tbl = kzalloc(sizeof(struct iommu_table), GFP_KERNEL); | ||
153 | if (!tbl) { | ||
154 | printk(KERN_ERR "Calgary: error allocating iommu_table\n"); | ||
155 | ret = -ENOMEM; | ||
156 | goto done; | ||
157 | } | ||
158 | |||
159 | ret = tce_table_setparms(dev, tbl); | ||
160 | if (ret) | ||
161 | goto free_tbl; | ||
162 | |||
163 | tce_free(tbl, 0, tbl->it_size); | ||
164 | |||
165 | tbl->bbar = bbar; | ||
166 | |||
167 | /* | ||
168 | * NUMA is already using the bus's sysdata pointer, so we use | ||
169 | * the bus's pci_dev's sysdata instead. | ||
170 | */ | ||
171 | dev->sysdata = tbl; | ||
172 | |||
173 | return 0; | ||
174 | |||
175 | free_tbl: | ||
176 | kfree(tbl); | ||
177 | done: | ||
178 | return ret; | ||
179 | } | ||
180 | |||
181 | void* alloc_tce_table(void) | ||
182 | { | ||
183 | unsigned int size; | ||
184 | |||
185 | size = table_size_to_number_of_entries(specified_table_size); | ||
186 | size *= TCE_ENTRY_SIZE; | ||
187 | |||
188 | return __alloc_bootmem_low(size, size, 0); | ||
189 | } | ||
190 | |||
191 | void free_tce_table(void *tbl) | ||
192 | { | ||
193 | unsigned int size; | ||
194 | |||
195 | if (!tbl) | ||
196 | return; | ||
197 | |||
198 | size = table_size_to_number_of_entries(specified_table_size); | ||
199 | size *= TCE_ENTRY_SIZE; | ||
200 | |||
201 | free_bootmem(__pa(tbl), size); | ||
202 | } | ||
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 7392570f975d..ebbee6f59ff5 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
@@ -8,7 +8,7 @@ | |||
8 | * Copyright (c) 1995 Markus Kuhn | 8 | * Copyright (c) 1995 Markus Kuhn |
9 | * Copyright (c) 1996 Ingo Molnar | 9 | * Copyright (c) 1996 Ingo Molnar |
10 | * Copyright (c) 1998 Andrea Arcangeli | 10 | * Copyright (c) 1998 Andrea Arcangeli |
11 | * Copyright (c) 2002 Vojtech Pavlik | 11 | * Copyright (c) 2002,2006 Vojtech Pavlik |
12 | * Copyright (c) 2003 Andi Kleen | 12 | * Copyright (c) 2003 Andi Kleen |
13 | * RTC support code taken from arch/i386/kernel/timers/time_hpet.c | 13 | * RTC support code taken from arch/i386/kernel/timers/time_hpet.c |
14 | */ | 14 | */ |
@@ -51,14 +51,21 @@ extern int using_apic_timer; | |||
51 | static char *time_init_gtod(void); | 51 | static char *time_init_gtod(void); |
52 | 52 | ||
53 | DEFINE_SPINLOCK(rtc_lock); | 53 | DEFINE_SPINLOCK(rtc_lock); |
54 | EXPORT_SYMBOL(rtc_lock); | ||
54 | DEFINE_SPINLOCK(i8253_lock); | 55 | DEFINE_SPINLOCK(i8253_lock); |
55 | 56 | ||
56 | int nohpet __initdata = 0; | 57 | int nohpet __initdata = 0; |
57 | static int notsc __initdata = 0; | 58 | static int notsc __initdata = 0; |
58 | 59 | ||
59 | #undef HPET_HACK_ENABLE_DANGEROUS | 60 | #define USEC_PER_TICK (USEC_PER_SEC / HZ) |
61 | #define NSEC_PER_TICK (NSEC_PER_SEC / HZ) | ||
62 | #define FSEC_PER_TICK (FSEC_PER_SEC / HZ) | ||
63 | |||
64 | #define NS_SCALE 10 /* 2^10, carefully chosen */ | ||
65 | #define US_SCALE 32 /* 2^32, arbitralrily chosen */ | ||
60 | 66 | ||
61 | unsigned int cpu_khz; /* TSC clocks / usec, not used here */ | 67 | unsigned int cpu_khz; /* TSC clocks / usec, not used here */ |
68 | EXPORT_SYMBOL(cpu_khz); | ||
62 | static unsigned long hpet_period; /* fsecs / HPET clock */ | 69 | static unsigned long hpet_period; /* fsecs / HPET clock */ |
63 | unsigned long hpet_tick; /* HPET clocks / interrupt */ | 70 | unsigned long hpet_tick; /* HPET clocks / interrupt */ |
64 | int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ | 71 | int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ |
@@ -90,7 +97,7 @@ static inline unsigned int do_gettimeoffset_tsc(void) | |||
90 | t = get_cycles_sync(); | 97 | t = get_cycles_sync(); |
91 | if (t < vxtime.last_tsc) | 98 | if (t < vxtime.last_tsc) |
92 | t = vxtime.last_tsc; /* hack */ | 99 | t = vxtime.last_tsc; /* hack */ |
93 | x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32; | 100 | x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE; |
94 | return x; | 101 | return x; |
95 | } | 102 | } |
96 | 103 | ||
@@ -98,7 +105,7 @@ static inline unsigned int do_gettimeoffset_hpet(void) | |||
98 | { | 105 | { |
99 | /* cap counter read to one tick to avoid inconsistencies */ | 106 | /* cap counter read to one tick to avoid inconsistencies */ |
100 | unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last; | 107 | unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last; |
101 | return (min(counter,hpet_tick) * vxtime.quot) >> 32; | 108 | return (min(counter,hpet_tick) * vxtime.quot) >> US_SCALE; |
102 | } | 109 | } |
103 | 110 | ||
104 | unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc; | 111 | unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc; |
@@ -118,7 +125,7 @@ void do_gettimeofday(struct timeval *tv) | |||
118 | seq = read_seqbegin(&xtime_lock); | 125 | seq = read_seqbegin(&xtime_lock); |
119 | 126 | ||
120 | sec = xtime.tv_sec; | 127 | sec = xtime.tv_sec; |
121 | usec = xtime.tv_nsec / 1000; | 128 | usec = xtime.tv_nsec / NSEC_PER_USEC; |
122 | 129 | ||
123 | /* i386 does some correction here to keep the clock | 130 | /* i386 does some correction here to keep the clock |
124 | monotonous even when ntpd is fixing drift. | 131 | monotonous even when ntpd is fixing drift. |
@@ -129,14 +136,14 @@ void do_gettimeofday(struct timeval *tv) | |||
129 | in arch/x86_64/kernel/vsyscall.c and export all needed | 136 | in arch/x86_64/kernel/vsyscall.c and export all needed |
130 | variables in vmlinux.lds. -AK */ | 137 | variables in vmlinux.lds. -AK */ |
131 | 138 | ||
132 | t = (jiffies - wall_jiffies) * (1000000L / HZ) + | 139 | t = (jiffies - wall_jiffies) * USEC_PER_TICK + |
133 | do_gettimeoffset(); | 140 | do_gettimeoffset(); |
134 | usec += t; | 141 | usec += t; |
135 | 142 | ||
136 | } while (read_seqretry(&xtime_lock, seq)); | 143 | } while (read_seqretry(&xtime_lock, seq)); |
137 | 144 | ||
138 | tv->tv_sec = sec + usec / 1000000; | 145 | tv->tv_sec = sec + usec / USEC_PER_SEC; |
139 | tv->tv_usec = usec % 1000000; | 146 | tv->tv_usec = usec % USEC_PER_SEC; |
140 | } | 147 | } |
141 | 148 | ||
142 | EXPORT_SYMBOL(do_gettimeofday); | 149 | EXPORT_SYMBOL(do_gettimeofday); |
@@ -157,8 +164,8 @@ int do_settimeofday(struct timespec *tv) | |||
157 | 164 | ||
158 | write_seqlock_irq(&xtime_lock); | 165 | write_seqlock_irq(&xtime_lock); |
159 | 166 | ||
160 | nsec -= do_gettimeoffset() * 1000 + | 167 | nsec -= do_gettimeoffset() * NSEC_PER_USEC + |
161 | (jiffies - wall_jiffies) * (NSEC_PER_SEC/HZ); | 168 | (jiffies - wall_jiffies) * NSEC_PER_TICK; |
162 | 169 | ||
163 | wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); | 170 | wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); |
164 | wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); | 171 | wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); |
@@ -288,7 +295,7 @@ unsigned long long monotonic_clock(void) | |||
288 | this_offset = hpet_readl(HPET_COUNTER); | 295 | this_offset = hpet_readl(HPET_COUNTER); |
289 | } while (read_seqretry(&xtime_lock, seq)); | 296 | } while (read_seqretry(&xtime_lock, seq)); |
290 | offset = (this_offset - last_offset); | 297 | offset = (this_offset - last_offset); |
291 | offset *= (NSEC_PER_SEC/HZ) / hpet_tick; | 298 | offset *= NSEC_PER_TICK / hpet_tick; |
292 | } else { | 299 | } else { |
293 | do { | 300 | do { |
294 | seq = read_seqbegin(&xtime_lock); | 301 | seq = read_seqbegin(&xtime_lock); |
@@ -297,7 +304,8 @@ unsigned long long monotonic_clock(void) | |||
297 | base = monotonic_base; | 304 | base = monotonic_base; |
298 | } while (read_seqretry(&xtime_lock, seq)); | 305 | } while (read_seqretry(&xtime_lock, seq)); |
299 | this_offset = get_cycles_sync(); | 306 | this_offset = get_cycles_sync(); |
300 | offset = (this_offset - last_offset)*1000 / cpu_khz; | 307 | /* FIXME: 1000 or 1000000? */ |
308 | offset = (this_offset - last_offset)*1000 / cpu_khz; | ||
301 | } | 309 | } |
302 | return base + offset; | 310 | return base + offset; |
303 | } | 311 | } |
@@ -382,7 +390,7 @@ void main_timer_handler(struct pt_regs *regs) | |||
382 | } | 390 | } |
383 | 391 | ||
384 | monotonic_base += | 392 | monotonic_base += |
385 | (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick; | 393 | (offset - vxtime.last) * NSEC_PER_TICK / hpet_tick; |
386 | 394 | ||
387 | vxtime.last = offset; | 395 | vxtime.last = offset; |
388 | #ifdef CONFIG_X86_PM_TIMER | 396 | #ifdef CONFIG_X86_PM_TIMER |
@@ -391,24 +399,25 @@ void main_timer_handler(struct pt_regs *regs) | |||
391 | #endif | 399 | #endif |
392 | } else { | 400 | } else { |
393 | offset = (((tsc - vxtime.last_tsc) * | 401 | offset = (((tsc - vxtime.last_tsc) * |
394 | vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ); | 402 | vxtime.tsc_quot) >> US_SCALE) - USEC_PER_TICK; |
395 | 403 | ||
396 | if (offset < 0) | 404 | if (offset < 0) |
397 | offset = 0; | 405 | offset = 0; |
398 | 406 | ||
399 | if (offset > (USEC_PER_SEC / HZ)) { | 407 | if (offset > USEC_PER_TICK) { |
400 | lost = offset / (USEC_PER_SEC / HZ); | 408 | lost = offset / USEC_PER_TICK; |
401 | offset %= (USEC_PER_SEC / HZ); | 409 | offset %= USEC_PER_TICK; |
402 | } | 410 | } |
403 | 411 | ||
404 | monotonic_base += (tsc - vxtime.last_tsc)*1000000/cpu_khz ; | 412 | /* FIXME: 1000 or 1000000? */ |
413 | monotonic_base += (tsc - vxtime.last_tsc) * 1000000 / cpu_khz; | ||
405 | 414 | ||
406 | vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot; | 415 | vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot; |
407 | 416 | ||
408 | if ((((tsc - vxtime.last_tsc) * | 417 | if ((((tsc - vxtime.last_tsc) * |
409 | vxtime.tsc_quot) >> 32) < offset) | 418 | vxtime.tsc_quot) >> US_SCALE) < offset) |
410 | vxtime.last_tsc = tsc - | 419 | vxtime.last_tsc = tsc - |
411 | (((long) offset << 32) / vxtime.tsc_quot) - 1; | 420 | (((long) offset << US_SCALE) / vxtime.tsc_quot) - 1; |
412 | } | 421 | } |
413 | 422 | ||
414 | if (lost > 0) { | 423 | if (lost > 0) { |
@@ -468,16 +477,15 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) | |||
468 | } | 477 | } |
469 | 478 | ||
470 | static unsigned int cyc2ns_scale __read_mostly; | 479 | static unsigned int cyc2ns_scale __read_mostly; |
471 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | ||
472 | 480 | ||
473 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) | 481 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) |
474 | { | 482 | { |
475 | cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; | 483 | cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz; |
476 | } | 484 | } |
477 | 485 | ||
478 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | 486 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) |
479 | { | 487 | { |
480 | return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; | 488 | return (cyc * cyc2ns_scale) >> NS_SCALE; |
481 | } | 489 | } |
482 | 490 | ||
483 | unsigned long long sched_clock(void) | 491 | unsigned long long sched_clock(void) |
@@ -490,7 +498,7 @@ unsigned long long sched_clock(void) | |||
490 | Disadvantage is a small drift between CPUs in some configurations, | 498 | Disadvantage is a small drift between CPUs in some configurations, |
491 | but that should be tolerable. */ | 499 | but that should be tolerable. */ |
492 | if (__vxtime.mode == VXTIME_HPET) | 500 | if (__vxtime.mode == VXTIME_HPET) |
493 | return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> 32; | 501 | return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> US_SCALE; |
494 | #endif | 502 | #endif |
495 | 503 | ||
496 | /* Could do CPU core sync here. Opteron can execute rdtsc speculatively, | 504 | /* Could do CPU core sync here. Opteron can execute rdtsc speculatively, |
@@ -633,7 +641,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | |||
633 | 641 | ||
634 | cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); | 642 | cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); |
635 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | 643 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) |
636 | vxtime.tsc_quot = (1000L << 32) / cpu_khz; | 644 | vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz; |
637 | } | 645 | } |
638 | 646 | ||
639 | set_cyc2ns_scale(cpu_khz_ref); | 647 | set_cyc2ns_scale(cpu_khz_ref); |
@@ -789,8 +797,8 @@ static int hpet_timer_stop_set_go(unsigned long tick) | |||
789 | if (hpet_use_timer) { | 797 | if (hpet_use_timer) { |
790 | hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | | 798 | hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | |
791 | HPET_TN_32BIT, HPET_T0_CFG); | 799 | HPET_TN_32BIT, HPET_T0_CFG); |
792 | hpet_writel(hpet_tick, HPET_T0_CMP); | 800 | hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */ |
793 | hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */ | 801 | hpet_writel(hpet_tick, HPET_T0_CMP); /* period */ |
794 | cfg |= HPET_CFG_LEGACY; | 802 | cfg |= HPET_CFG_LEGACY; |
795 | } | 803 | } |
796 | /* | 804 | /* |
@@ -825,8 +833,7 @@ static int hpet_init(void) | |||
825 | if (hpet_period < 100000 || hpet_period > 100000000) | 833 | if (hpet_period < 100000 || hpet_period > 100000000) |
826 | return -1; | 834 | return -1; |
827 | 835 | ||
828 | hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) / | 836 | hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period; |
829 | hpet_period; | ||
830 | 837 | ||
831 | hpet_use_timer = (id & HPET_ID_LEGSUP); | 838 | hpet_use_timer = (id & HPET_ID_LEGSUP); |
832 | 839 | ||
@@ -890,18 +897,6 @@ void __init time_init(void) | |||
890 | char *timename; | 897 | char *timename; |
891 | char *gtod; | 898 | char *gtod; |
892 | 899 | ||
893 | #ifdef HPET_HACK_ENABLE_DANGEROUS | ||
894 | if (!vxtime.hpet_address) { | ||
895 | printk(KERN_WARNING "time.c: WARNING: Enabling HPET base " | ||
896 | "manually!\n"); | ||
897 | outl(0x800038a0, 0xcf8); | ||
898 | outl(0xff000001, 0xcfc); | ||
899 | outl(0x800038a0, 0xcf8); | ||
900 | vxtime.hpet_address = inl(0xcfc) & 0xfffffffe; | ||
901 | printk(KERN_WARNING "time.c: WARNING: Enabled HPET " | ||
902 | "at %#lx.\n", vxtime.hpet_address); | ||
903 | } | ||
904 | #endif | ||
905 | if (nohpet) | 900 | if (nohpet) |
906 | vxtime.hpet_address = 0; | 901 | vxtime.hpet_address = 0; |
907 | 902 | ||
@@ -912,7 +907,7 @@ void __init time_init(void) | |||
912 | -xtime.tv_sec, -xtime.tv_nsec); | 907 | -xtime.tv_sec, -xtime.tv_nsec); |
913 | 908 | ||
914 | if (!hpet_init()) | 909 | if (!hpet_init()) |
915 | vxtime_hz = (1000000000000000L + hpet_period / 2) / hpet_period; | 910 | vxtime_hz = (FSEC_PER_SEC + hpet_period / 2) / hpet_period; |
916 | else | 911 | else |
917 | vxtime.hpet_address = 0; | 912 | vxtime.hpet_address = 0; |
918 | 913 | ||
@@ -941,8 +936,8 @@ void __init time_init(void) | |||
941 | vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod); | 936 | vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod); |
942 | printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", | 937 | printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", |
943 | cpu_khz / 1000, cpu_khz % 1000); | 938 | cpu_khz / 1000, cpu_khz % 1000); |
944 | vxtime.quot = (1000000L << 32) / vxtime_hz; | 939 | vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz; |
945 | vxtime.tsc_quot = (1000L << 32) / cpu_khz; | 940 | vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz; |
946 | vxtime.last_tsc = get_cycles_sync(); | 941 | vxtime.last_tsc = get_cycles_sync(); |
947 | setup_irq(0, &irq0); | 942 | setup_irq(0, &irq0); |
948 | 943 | ||
@@ -956,10 +951,10 @@ void __init time_init(void) | |||
956 | __cpuinit int unsynchronized_tsc(void) | 951 | __cpuinit int unsynchronized_tsc(void) |
957 | { | 952 | { |
958 | #ifdef CONFIG_SMP | 953 | #ifdef CONFIG_SMP |
959 | if (oem_force_hpet_timer()) | 954 | if (apic_is_clustered_box()) |
960 | return 1; | 955 | return 1; |
961 | /* Intel systems are normally all synchronized. Exceptions | 956 | /* Intel systems are normally all synchronized. Exceptions |
962 | are handled in the OEM check above. */ | 957 | are handled in the check above. */ |
963 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | 958 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) |
964 | return 0; | 959 | return 0; |
965 | #endif | 960 | #endif |
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index cea335e8746c..3d11a2fe45b7 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c | |||
@@ -6,8 +6,6 @@ | |||
6 | * | 6 | * |
7 | * Pentium III FXSR, SSE support | 7 | * Pentium III FXSR, SSE support |
8 | * Gareth Hughes <gareth@valinux.com>, May 2000 | 8 | * Gareth Hughes <gareth@valinux.com>, May 2000 |
9 | * | ||
10 | * $Id: traps.c,v 1.36 2002/03/24 11:09:10 ak Exp $ | ||
11 | */ | 9 | */ |
12 | 10 | ||
13 | /* | 11 | /* |
@@ -31,6 +29,7 @@ | |||
31 | #include <linux/nmi.h> | 29 | #include <linux/nmi.h> |
32 | #include <linux/kprobes.h> | 30 | #include <linux/kprobes.h> |
33 | #include <linux/kexec.h> | 31 | #include <linux/kexec.h> |
32 | #include <linux/unwind.h> | ||
34 | 33 | ||
35 | #include <asm/system.h> | 34 | #include <asm/system.h> |
36 | #include <asm/uaccess.h> | 35 | #include <asm/uaccess.h> |
@@ -41,7 +40,7 @@ | |||
41 | #include <asm/i387.h> | 40 | #include <asm/i387.h> |
42 | #include <asm/kdebug.h> | 41 | #include <asm/kdebug.h> |
43 | #include <asm/processor.h> | 42 | #include <asm/processor.h> |
44 | 43 | #include <asm/unwind.h> | |
45 | #include <asm/smp.h> | 44 | #include <asm/smp.h> |
46 | #include <asm/pgalloc.h> | 45 | #include <asm/pgalloc.h> |
47 | #include <asm/pda.h> | 46 | #include <asm/pda.h> |
@@ -71,6 +70,7 @@ asmlinkage void machine_check(void); | |||
71 | asmlinkage void spurious_interrupt_bug(void); | 70 | asmlinkage void spurious_interrupt_bug(void); |
72 | 71 | ||
73 | ATOMIC_NOTIFIER_HEAD(die_chain); | 72 | ATOMIC_NOTIFIER_HEAD(die_chain); |
73 | EXPORT_SYMBOL(die_chain); | ||
74 | 74 | ||
75 | int register_die_notifier(struct notifier_block *nb) | 75 | int register_die_notifier(struct notifier_block *nb) |
76 | { | 76 | { |
@@ -107,7 +107,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) | |||
107 | preempt_enable_no_resched(); | 107 | preempt_enable_no_resched(); |
108 | } | 108 | } |
109 | 109 | ||
110 | static int kstack_depth_to_print = 10; | 110 | static int kstack_depth_to_print = 12; |
111 | static int call_trace = 1; | ||
111 | 112 | ||
112 | #ifdef CONFIG_KALLSYMS | 113 | #ifdef CONFIG_KALLSYMS |
113 | #include <linux/kallsyms.h> | 114 | #include <linux/kallsyms.h> |
@@ -191,6 +192,25 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
191 | return NULL; | 192 | return NULL; |
192 | } | 193 | } |
193 | 194 | ||
195 | static int show_trace_unwind(struct unwind_frame_info *info, void *context) | ||
196 | { | ||
197 | int i = 11, n = 0; | ||
198 | |||
199 | while (unwind(info) == 0 && UNW_PC(info)) { | ||
200 | ++n; | ||
201 | if (i > 50) { | ||
202 | printk("\n "); | ||
203 | i = 7; | ||
204 | } else | ||
205 | i += printk(" "); | ||
206 | i += printk_address(UNW_PC(info)); | ||
207 | if (arch_unw_user_mode(info)) | ||
208 | break; | ||
209 | } | ||
210 | printk("\n"); | ||
211 | return n; | ||
212 | } | ||
213 | |||
194 | /* | 214 | /* |
195 | * x86-64 can have upto three kernel stacks: | 215 | * x86-64 can have upto three kernel stacks: |
196 | * process stack | 216 | * process stack |
@@ -198,15 +218,39 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
198 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | 218 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack |
199 | */ | 219 | */ |
200 | 220 | ||
201 | void show_trace(unsigned long *stack) | 221 | void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack) |
202 | { | 222 | { |
203 | const unsigned cpu = safe_smp_processor_id(); | 223 | const unsigned cpu = safe_smp_processor_id(); |
204 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; | 224 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; |
205 | int i; | 225 | int i = 11; |
206 | unsigned used = 0; | 226 | unsigned used = 0; |
207 | 227 | ||
208 | printk("\nCall Trace:"); | 228 | printk("\nCall Trace:"); |
209 | 229 | ||
230 | if (!tsk) | ||
231 | tsk = current; | ||
232 | |||
233 | if (call_trace >= 0) { | ||
234 | int unw_ret = 0; | ||
235 | struct unwind_frame_info info; | ||
236 | |||
237 | if (regs) { | ||
238 | if (unwind_init_frame_info(&info, tsk, regs) == 0) | ||
239 | unw_ret = show_trace_unwind(&info, NULL); | ||
240 | } else if (tsk == current) | ||
241 | unw_ret = unwind_init_running(&info, show_trace_unwind, NULL); | ||
242 | else { | ||
243 | if (unwind_init_blocked(&info, tsk) == 0) | ||
244 | unw_ret = show_trace_unwind(&info, NULL); | ||
245 | } | ||
246 | if (unw_ret > 0) { | ||
247 | if (call_trace > 0) | ||
248 | return; | ||
249 | printk("Legacy call trace:"); | ||
250 | i = 18; | ||
251 | } | ||
252 | } | ||
253 | |||
210 | #define HANDLE_STACK(cond) \ | 254 | #define HANDLE_STACK(cond) \ |
211 | do while (cond) { \ | 255 | do while (cond) { \ |
212 | unsigned long addr = *stack++; \ | 256 | unsigned long addr = *stack++; \ |
@@ -229,7 +273,7 @@ void show_trace(unsigned long *stack) | |||
229 | } \ | 273 | } \ |
230 | } while (0) | 274 | } while (0) |
231 | 275 | ||
232 | for(i = 11; ; ) { | 276 | for(; ; ) { |
233 | const char *id; | 277 | const char *id; |
234 | unsigned long *estack_end; | 278 | unsigned long *estack_end; |
235 | estack_end = in_exception_stack(cpu, (unsigned long)stack, | 279 | estack_end = in_exception_stack(cpu, (unsigned long)stack, |
@@ -264,7 +308,7 @@ void show_trace(unsigned long *stack) | |||
264 | printk("\n"); | 308 | printk("\n"); |
265 | } | 309 | } |
266 | 310 | ||
267 | void show_stack(struct task_struct *tsk, unsigned long * rsp) | 311 | static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp) |
268 | { | 312 | { |
269 | unsigned long *stack; | 313 | unsigned long *stack; |
270 | int i; | 314 | int i; |
@@ -298,7 +342,12 @@ void show_stack(struct task_struct *tsk, unsigned long * rsp) | |||
298 | printk("%016lx ", *stack++); | 342 | printk("%016lx ", *stack++); |
299 | touch_nmi_watchdog(); | 343 | touch_nmi_watchdog(); |
300 | } | 344 | } |
301 | show_trace((unsigned long *)rsp); | 345 | show_trace(tsk, regs, rsp); |
346 | } | ||
347 | |||
348 | void show_stack(struct task_struct *tsk, unsigned long * rsp) | ||
349 | { | ||
350 | _show_stack(tsk, NULL, rsp); | ||
302 | } | 351 | } |
303 | 352 | ||
304 | /* | 353 | /* |
@@ -307,7 +356,7 @@ void show_stack(struct task_struct *tsk, unsigned long * rsp) | |||
307 | void dump_stack(void) | 356 | void dump_stack(void) |
308 | { | 357 | { |
309 | unsigned long dummy; | 358 | unsigned long dummy; |
310 | show_trace(&dummy); | 359 | show_trace(NULL, NULL, &dummy); |
311 | } | 360 | } |
312 | 361 | ||
313 | EXPORT_SYMBOL(dump_stack); | 362 | EXPORT_SYMBOL(dump_stack); |
@@ -334,7 +383,7 @@ void show_registers(struct pt_regs *regs) | |||
334 | if (in_kernel) { | 383 | if (in_kernel) { |
335 | 384 | ||
336 | printk("Stack: "); | 385 | printk("Stack: "); |
337 | show_stack(NULL, (unsigned long*)rsp); | 386 | _show_stack(NULL, regs, (unsigned long*)rsp); |
338 | 387 | ||
339 | printk("\nCode: "); | 388 | printk("\nCode: "); |
340 | if (regs->rip < PAGE_OFFSET) | 389 | if (regs->rip < PAGE_OFFSET) |
@@ -383,6 +432,7 @@ void out_of_line_bug(void) | |||
383 | { | 432 | { |
384 | BUG(); | 433 | BUG(); |
385 | } | 434 | } |
435 | EXPORT_SYMBOL(out_of_line_bug); | ||
386 | #endif | 436 | #endif |
387 | 437 | ||
388 | static DEFINE_SPINLOCK(die_lock); | 438 | static DEFINE_SPINLOCK(die_lock); |
@@ -1012,3 +1062,14 @@ static int __init kstack_setup(char *s) | |||
1012 | } | 1062 | } |
1013 | __setup("kstack=", kstack_setup); | 1063 | __setup("kstack=", kstack_setup); |
1014 | 1064 | ||
1065 | static int __init call_trace_setup(char *s) | ||
1066 | { | ||
1067 | if (strcmp(s, "old") == 0) | ||
1068 | call_trace = -1; | ||
1069 | else if (strcmp(s, "both") == 0) | ||
1070 | call_trace = 0; | ||
1071 | else if (strcmp(s, "new") == 0) | ||
1072 | call_trace = 1; | ||
1073 | return 1; | ||
1074 | } | ||
1075 | __setup("call_trace=", call_trace_setup); | ||
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index b81f473c4a19..1c6a5f322919 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S | |||
@@ -45,6 +45,15 @@ SECTIONS | |||
45 | 45 | ||
46 | RODATA | 46 | RODATA |
47 | 47 | ||
48 | #ifdef CONFIG_STACK_UNWIND | ||
49 | . = ALIGN(8); | ||
50 | .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { | ||
51 | __start_unwind = .; | ||
52 | *(.eh_frame) | ||
53 | __end_unwind = .; | ||
54 | } | ||
55 | #endif | ||
56 | |||
48 | /* Data */ | 57 | /* Data */ |
49 | .data : AT(ADDR(.data) - LOAD_OFFSET) { | 58 | .data : AT(ADDR(.data) - LOAD_OFFSET) { |
50 | *(.data) | 59 | *(.data) |
@@ -131,6 +140,26 @@ SECTIONS | |||
131 | *(.data.page_aligned) | 140 | *(.data.page_aligned) |
132 | } | 141 | } |
133 | 142 | ||
143 | /* might get freed after init */ | ||
144 | . = ALIGN(4096); | ||
145 | __smp_alt_begin = .; | ||
146 | __smp_alt_instructions = .; | ||
147 | .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) { | ||
148 | *(.smp_altinstructions) | ||
149 | } | ||
150 | __smp_alt_instructions_end = .; | ||
151 | . = ALIGN(8); | ||
152 | __smp_locks = .; | ||
153 | .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { | ||
154 | *(.smp_locks) | ||
155 | } | ||
156 | __smp_locks_end = .; | ||
157 | .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) { | ||
158 | *(.smp_altinstr_replacement) | ||
159 | } | ||
160 | . = ALIGN(4096); | ||
161 | __smp_alt_end = .; | ||
162 | |||
134 | . = ALIGN(4096); /* Init code and data */ | 163 | . = ALIGN(4096); /* Init code and data */ |
135 | __init_begin = .; | 164 | __init_begin = .; |
136 | .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { | 165 | .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { |
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index 9468fb20b0bc..f603037df162 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c | |||
@@ -107,7 +107,7 @@ static __always_inline long time_syscall(long *t) | |||
107 | 107 | ||
108 | int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) | 108 | int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) |
109 | { | 109 | { |
110 | if (unlikely(!__sysctl_vsyscall)) | 110 | if (!__sysctl_vsyscall) |
111 | return gettimeofday(tv,tz); | 111 | return gettimeofday(tv,tz); |
112 | if (tv) | 112 | if (tv) |
113 | do_vgettimeofday(tv); | 113 | do_vgettimeofday(tv); |
@@ -120,7 +120,7 @@ int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) | |||
120 | * unlikely */ | 120 | * unlikely */ |
121 | time_t __vsyscall(1) vtime(time_t *t) | 121 | time_t __vsyscall(1) vtime(time_t *t) |
122 | { | 122 | { |
123 | if (unlikely(!__sysctl_vsyscall)) | 123 | if (!__sysctl_vsyscall) |
124 | return time_syscall(t); | 124 | return time_syscall(t); |
125 | else if (t) | 125 | else if (t) |
126 | *t = __xtime.tv_sec; | 126 | *t = __xtime.tv_sec; |
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index 1def21c9f7cd..370952c4ff22 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c | |||
@@ -1,66 +1,21 @@ | |||
1 | /* Exports for assembly files. | ||
2 | All C exports should go in the respective C files. */ | ||
3 | |||
1 | #include <linux/config.h> | 4 | #include <linux/config.h> |
2 | #include <linux/module.h> | 5 | #include <linux/module.h> |
3 | #include <linux/smp.h> | 6 | #include <linux/smp.h> |
4 | #include <linux/user.h> | ||
5 | #include <linux/sched.h> | ||
6 | #include <linux/in6.h> | ||
7 | #include <linux/interrupt.h> | ||
8 | #include <linux/smp_lock.h> | ||
9 | #include <linux/pm.h> | ||
10 | #include <linux/pci.h> | ||
11 | #include <linux/apm_bios.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <linux/syscalls.h> | ||
15 | #include <linux/tty.h> | ||
16 | 7 | ||
17 | #include <asm/semaphore.h> | 8 | #include <asm/semaphore.h> |
18 | #include <asm/processor.h> | 9 | #include <asm/processor.h> |
19 | #include <asm/i387.h> | ||
20 | #include <asm/uaccess.h> | 10 | #include <asm/uaccess.h> |
21 | #include <asm/checksum.h> | ||
22 | #include <asm/io.h> | ||
23 | #include <asm/delay.h> | ||
24 | #include <asm/irq.h> | ||
25 | #include <asm/mmx.h> | ||
26 | #include <asm/desc.h> | ||
27 | #include <asm/pgtable.h> | 11 | #include <asm/pgtable.h> |
28 | #include <asm/pgalloc.h> | ||
29 | #include <asm/nmi.h> | ||
30 | #include <asm/kdebug.h> | ||
31 | #include <asm/unistd.h> | ||
32 | #include <asm/tlbflush.h> | ||
33 | #include <asm/kdebug.h> | ||
34 | |||
35 | extern spinlock_t rtc_lock; | ||
36 | 12 | ||
37 | #ifdef CONFIG_SMP | ||
38 | extern void __write_lock_failed(rwlock_t *rw); | ||
39 | extern void __read_lock_failed(rwlock_t *rw); | ||
40 | #endif | ||
41 | |||
42 | /* platform dependent support */ | ||
43 | EXPORT_SYMBOL(boot_cpu_data); | ||
44 | //EXPORT_SYMBOL(dump_fpu); | ||
45 | EXPORT_SYMBOL(__ioremap); | ||
46 | EXPORT_SYMBOL(ioremap_nocache); | ||
47 | EXPORT_SYMBOL(iounmap); | ||
48 | EXPORT_SYMBOL(kernel_thread); | 13 | EXPORT_SYMBOL(kernel_thread); |
49 | EXPORT_SYMBOL(pm_idle); | ||
50 | EXPORT_SYMBOL(pm_power_off); | ||
51 | 14 | ||
52 | EXPORT_SYMBOL(__down_failed); | 15 | EXPORT_SYMBOL(__down_failed); |
53 | EXPORT_SYMBOL(__down_failed_interruptible); | 16 | EXPORT_SYMBOL(__down_failed_interruptible); |
54 | EXPORT_SYMBOL(__down_failed_trylock); | 17 | EXPORT_SYMBOL(__down_failed_trylock); |
55 | EXPORT_SYMBOL(__up_wakeup); | 18 | EXPORT_SYMBOL(__up_wakeup); |
56 | /* Networking helper routines. */ | ||
57 | EXPORT_SYMBOL(csum_partial_copy_nocheck); | ||
58 | EXPORT_SYMBOL(ip_compute_csum); | ||
59 | /* Delay loops */ | ||
60 | EXPORT_SYMBOL(__udelay); | ||
61 | EXPORT_SYMBOL(__ndelay); | ||
62 | EXPORT_SYMBOL(__delay); | ||
63 | EXPORT_SYMBOL(__const_udelay); | ||
64 | 19 | ||
65 | EXPORT_SYMBOL(__get_user_1); | 20 | EXPORT_SYMBOL(__get_user_1); |
66 | EXPORT_SYMBOL(__get_user_2); | 21 | EXPORT_SYMBOL(__get_user_2); |
@@ -71,42 +26,20 @@ EXPORT_SYMBOL(__put_user_2); | |||
71 | EXPORT_SYMBOL(__put_user_4); | 26 | EXPORT_SYMBOL(__put_user_4); |
72 | EXPORT_SYMBOL(__put_user_8); | 27 | EXPORT_SYMBOL(__put_user_8); |
73 | 28 | ||
74 | EXPORT_SYMBOL(strncpy_from_user); | ||
75 | EXPORT_SYMBOL(__strncpy_from_user); | ||
76 | EXPORT_SYMBOL(clear_user); | ||
77 | EXPORT_SYMBOL(__clear_user); | ||
78 | EXPORT_SYMBOL(copy_user_generic); | 29 | EXPORT_SYMBOL(copy_user_generic); |
79 | EXPORT_SYMBOL(copy_from_user); | 30 | EXPORT_SYMBOL(copy_from_user); |
80 | EXPORT_SYMBOL(copy_to_user); | 31 | EXPORT_SYMBOL(copy_to_user); |
81 | EXPORT_SYMBOL(copy_in_user); | ||
82 | EXPORT_SYMBOL(strnlen_user); | ||
83 | |||
84 | #ifdef CONFIG_PCI | ||
85 | EXPORT_SYMBOL(pci_mem_start); | ||
86 | #endif | ||
87 | 32 | ||
88 | EXPORT_SYMBOL(copy_page); | 33 | EXPORT_SYMBOL(copy_page); |
89 | EXPORT_SYMBOL(clear_page); | 34 | EXPORT_SYMBOL(clear_page); |
90 | 35 | ||
91 | EXPORT_SYMBOL(_cpu_pda); | ||
92 | #ifdef CONFIG_SMP | 36 | #ifdef CONFIG_SMP |
93 | EXPORT_SYMBOL(cpu_data); | 37 | extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); |
38 | extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); | ||
94 | EXPORT_SYMBOL(__write_lock_failed); | 39 | EXPORT_SYMBOL(__write_lock_failed); |
95 | EXPORT_SYMBOL(__read_lock_failed); | 40 | EXPORT_SYMBOL(__read_lock_failed); |
96 | |||
97 | EXPORT_SYMBOL(smp_call_function); | ||
98 | EXPORT_SYMBOL(cpu_callout_map); | ||
99 | #endif | ||
100 | |||
101 | #ifdef CONFIG_VT | ||
102 | EXPORT_SYMBOL(screen_info); | ||
103 | #endif | 41 | #endif |
104 | 42 | ||
105 | EXPORT_SYMBOL(rtc_lock); | ||
106 | |||
107 | EXPORT_SYMBOL_GPL(set_nmi_callback); | ||
108 | EXPORT_SYMBOL_GPL(unset_nmi_callback); | ||
109 | |||
110 | /* Export string functions. We normally rely on gcc builtin for most of these, | 43 | /* Export string functions. We normally rely on gcc builtin for most of these, |
111 | but gcc sometimes decides not to inline them. */ | 44 | but gcc sometimes decides not to inline them. */ |
112 | #undef memcpy | 45 | #undef memcpy |
@@ -114,51 +47,14 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback); | |||
114 | #undef memmove | 47 | #undef memmove |
115 | 48 | ||
116 | extern void * memset(void *,int,__kernel_size_t); | 49 | extern void * memset(void *,int,__kernel_size_t); |
117 | extern size_t strlen(const char *); | ||
118 | extern void * memmove(void * dest,const void *src,size_t count); | ||
119 | extern void * memcpy(void *,const void *,__kernel_size_t); | 50 | extern void * memcpy(void *,const void *,__kernel_size_t); |
120 | extern void * __memcpy(void *,const void *,__kernel_size_t); | 51 | extern void * __memcpy(void *,const void *,__kernel_size_t); |
121 | 52 | ||
122 | EXPORT_SYMBOL(memset); | 53 | EXPORT_SYMBOL(memset); |
123 | EXPORT_SYMBOL(memmove); | ||
124 | EXPORT_SYMBOL(memcpy); | 54 | EXPORT_SYMBOL(memcpy); |
125 | EXPORT_SYMBOL(__memcpy); | 55 | EXPORT_SYMBOL(__memcpy); |
126 | 56 | ||
127 | #ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM | ||
128 | /* prototypes are wrong, these are assembly with custom calling functions */ | ||
129 | extern void rwsem_down_read_failed_thunk(void); | ||
130 | extern void rwsem_wake_thunk(void); | ||
131 | extern void rwsem_downgrade_thunk(void); | ||
132 | extern void rwsem_down_write_failed_thunk(void); | ||
133 | EXPORT_SYMBOL(rwsem_down_read_failed_thunk); | ||
134 | EXPORT_SYMBOL(rwsem_wake_thunk); | ||
135 | EXPORT_SYMBOL(rwsem_downgrade_thunk); | ||
136 | EXPORT_SYMBOL(rwsem_down_write_failed_thunk); | ||
137 | #endif | ||
138 | |||
139 | EXPORT_SYMBOL(empty_zero_page); | 57 | EXPORT_SYMBOL(empty_zero_page); |
140 | |||
141 | EXPORT_SYMBOL(die_chain); | ||
142 | |||
143 | #ifdef CONFIG_SMP | ||
144 | EXPORT_SYMBOL(cpu_sibling_map); | ||
145 | EXPORT_SYMBOL(smp_num_siblings); | ||
146 | #endif | ||
147 | |||
148 | #ifdef CONFIG_BUG | ||
149 | EXPORT_SYMBOL(out_of_line_bug); | ||
150 | #endif | ||
151 | |||
152 | EXPORT_SYMBOL(init_level4_pgt); | 58 | EXPORT_SYMBOL(init_level4_pgt); |
153 | |||
154 | extern unsigned long __supported_pte_mask; | ||
155 | EXPORT_SYMBOL(__supported_pte_mask); | ||
156 | |||
157 | #ifdef CONFIG_SMP | ||
158 | EXPORT_SYMBOL(flush_tlb_page); | ||
159 | #endif | ||
160 | |||
161 | EXPORT_SYMBOL(cpu_khz); | ||
162 | |||
163 | EXPORT_SYMBOL(load_gs_index); | 59 | EXPORT_SYMBOL(load_gs_index); |
164 | 60 | ||