aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/i386/Kconfig13
-rw-r--r--arch/i386/boot/Makefile9
-rw-r--r--arch/i386/boot/compressed/misc.c32
-rw-r--r--arch/i386/kernel/Makefile4
-rw-r--r--arch/i386/kernel/alternative.c118
-rw-r--r--arch/i386/kernel/apic.c16
-rw-r--r--arch/i386/kernel/apm.c6
-rw-r--r--arch/i386/kernel/cpu/amd.c16
-rw-r--r--arch/i386/kernel/cpu/intel.c6
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c113
-rw-r--r--arch/i386/kernel/crash.c7
-rw-r--r--arch/i386/kernel/entry.S263
-rw-r--r--arch/i386/kernel/io_apic.c49
-rw-r--r--arch/i386/kernel/irq.c2
-rw-r--r--arch/i386/kernel/nmi.c72
-rw-r--r--arch/i386/kernel/process.c8
-rw-r--r--arch/i386/kernel/smp.c12
-rw-r--r--arch/i386/kernel/smpboot.c1
-rw-r--r--arch/i386/kernel/traps.c70
-rw-r--r--arch/i386/kernel/vmlinux.lds.S9
-rw-r--r--arch/i386/oprofile/op_model_athlon.c1
-rw-r--r--arch/i386/oprofile/op_model_p4.c1
-rw-r--r--arch/i386/oprofile/op_model_ppro.c1
-rw-r--r--arch/ia64/kernel/process.c4
-rw-r--r--arch/x86_64/Kconfig51
-rw-r--r--arch/x86_64/Kconfig.debug18
-rw-r--r--arch/x86_64/Makefile4
-rw-r--r--arch/x86_64/boot/Makefile9
-rw-r--r--arch/x86_64/boot/compressed/misc.c46
-rw-r--r--arch/x86_64/boot/tools/build.c6
-rw-r--r--arch/x86_64/defconfig159
-rw-r--r--arch/x86_64/ia32/fpu32.c1
-rw-r--r--arch/x86_64/ia32/ia32_signal.c2
-rw-r--r--arch/x86_64/ia32/ia32entry.S11
-rw-r--r--arch/x86_64/ia32/ptrace32.c43
-rw-r--r--arch/x86_64/ia32/sys_ia32.c25
-rw-r--r--arch/x86_64/kernel/Makefile8
-rw-r--r--arch/x86_64/kernel/aperture.c26
-rw-r--r--arch/x86_64/kernel/apic.c32
-rw-r--r--arch/x86_64/kernel/crash.c4
-rw-r--r--arch/x86_64/kernel/e820.c2
-rw-r--r--arch/x86_64/kernel/entry.S113
-rw-r--r--arch/x86_64/kernel/genapic_flat.c30
-rw-r--r--arch/x86_64/kernel/head64.c2
-rw-r--r--arch/x86_64/kernel/i8259.c14
-rw-r--r--arch/x86_64/kernel/io_apic.c45
-rw-r--r--arch/x86_64/kernel/irq.c30
-rw-r--r--arch/x86_64/kernel/k8.c118
-rw-r--r--arch/x86_64/kernel/mce.c2
-rw-r--r--arch/x86_64/kernel/mce_amd.c506
-rw-r--r--arch/x86_64/kernel/module.c38
-rw-r--r--arch/x86_64/kernel/nmi.c89
-rw-r--r--arch/x86_64/kernel/pci-calgary.c1018
-rw-r--r--arch/x86_64/kernel/pci-dma.c55
-rw-r--r--arch/x86_64/kernel/pci-gart.c155
-rw-r--r--arch/x86_64/kernel/pci-nommu.c9
-rw-r--r--arch/x86_64/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86_64/kernel/process.c16
-rw-r--r--arch/x86_64/kernel/reboot.c1
-rw-r--r--arch/x86_64/kernel/setup.c180
-rw-r--r--arch/x86_64/kernel/setup64.c3
-rw-r--r--arch/x86_64/kernel/signal.c3
-rw-r--r--arch/x86_64/kernel/smp.c10
-rw-r--r--arch/x86_64/kernel/smpboot.c23
-rw-r--r--arch/x86_64/kernel/tce.c202
-rw-r--r--arch/x86_64/kernel/time.c87
-rw-r--r--arch/x86_64/kernel/traps.c83
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S29
-rw-r--r--arch/x86_64/kernel/vsyscall.c4
-rw-r--r--arch/x86_64/kernel/x8664_ksyms.c114
-rw-r--r--arch/x86_64/lib/csum-partial.c1
-rw-r--r--arch/x86_64/lib/csum-wrappers.c1
-rw-r--r--arch/x86_64/lib/delay.c5
-rw-r--r--arch/x86_64/lib/memmove.c4
-rw-r--r--arch/x86_64/lib/usercopy.c13
-rw-r--r--arch/x86_64/mm/fault.c8
-rw-r--r--arch/x86_64/mm/init.c48
-rw-r--r--arch/x86_64/mm/ioremap.c5
-rw-r--r--arch/x86_64/pci/k8-bus.c10
79 files changed, 3326 insertions, 1030 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 374fb50608a0..f3eaf22f273d 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -328,6 +328,15 @@ config X86_MCE_P4THERMAL
328 Enabling this feature will cause a message to be printed when the P4 328 Enabling this feature will cause a message to be printed when the P4
329 enters thermal throttling. 329 enters thermal throttling.
330 330
331config VM86
332 default y
333 bool "Enable VM86 support" if EMBEDDED
334 help
335 This option is required by programs like DOSEMU to run 16-bit legacy
336 code on X86 processors. It also may be needed by software like
337 XFree86 to initialize some video cards via BIOS. Disabling this
338 option saves about 6k.
339
331config TOSHIBA 340config TOSHIBA
332 tristate "Toshiba Laptop support" 341 tristate "Toshiba Laptop support"
333 ---help--- 342 ---help---
@@ -1068,6 +1077,10 @@ config SCx200HR_TIMER
1068 processor goes idle (as is done by the scheduler). The 1077 processor goes idle (as is done by the scheduler). The
1069 other workaround is idle=poll boot option. 1078 other workaround is idle=poll boot option.
1070 1079
1080config K8_NB
1081 def_bool y
1082 depends on AGP_AMD64
1083
1071source "drivers/pcmcia/Kconfig" 1084source "drivers/pcmcia/Kconfig"
1072 1085
1073source "drivers/pci/hotplug/Kconfig" 1086source "drivers/pci/hotplug/Kconfig"
diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile
index 33e55476381b..e97946626064 100644
--- a/arch/i386/boot/Makefile
+++ b/arch/i386/boot/Makefile
@@ -109,8 +109,13 @@ fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
109isoimage: $(BOOTIMAGE) 109isoimage: $(BOOTIMAGE)
110 -rm -rf $(obj)/isoimage 110 -rm -rf $(obj)/isoimage
111 mkdir $(obj)/isoimage 111 mkdir $(obj)/isoimage
112 cp `echo /usr/lib*/syslinux/isolinux.bin | awk '{ print $1; }'` \ 112 for i in lib lib64 share end ; do \
113 $(obj)/isoimage 113 if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \
114 cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \
115 break ; \
116 fi ; \
117 if [ $$i = end ] ; then exit 1 ; fi ; \
118 done
114 cp $(BOOTIMAGE) $(obj)/isoimage/linux 119 cp $(BOOTIMAGE) $(obj)/isoimage/linux
115 echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg 120 echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
116 if [ -f '$(FDINITRD)' ] ; then \ 121 if [ -f '$(FDINITRD)' ] ; then \
diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c
index f19f3a7492a5..b2ccd543410d 100644
--- a/arch/i386/boot/compressed/misc.c
+++ b/arch/i386/boot/compressed/misc.c
@@ -24,14 +24,6 @@
24 24
25#undef memset 25#undef memset
26#undef memcpy 26#undef memcpy
27
28/*
29 * Why do we do this? Don't ask me..
30 *
31 * Incomprehensible are the ways of bootloaders.
32 */
33static void* memset(void *, int, size_t);
34static void* memcpy(void *, __const void *, size_t);
35#define memzero(s, n) memset ((s), 0, (n)) 27#define memzero(s, n) memset ((s), 0, (n))
36 28
37typedef unsigned char uch; 29typedef unsigned char uch;
@@ -93,7 +85,7 @@ static unsigned char *real_mode; /* Pointer to real-mode data */
93#endif 85#endif
94#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0)) 86#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
95 87
96extern char input_data[]; 88extern unsigned char input_data[];
97extern int input_len; 89extern int input_len;
98 90
99static long bytes_out = 0; 91static long bytes_out = 0;
@@ -103,6 +95,9 @@ static unsigned long output_ptr = 0;
103static void *malloc(int size); 95static void *malloc(int size);
104static void free(void *where); 96static void free(void *where);
105 97
98static void *memset(void *s, int c, unsigned n);
99static void *memcpy(void *dest, const void *src, unsigned n);
100
106static void putstr(const char *); 101static void putstr(const char *);
107 102
108extern int end; 103extern int end;
@@ -205,7 +200,7 @@ static void putstr(const char *s)
205 outb_p(0xff & (pos >> 1), vidport+1); 200 outb_p(0xff & (pos >> 1), vidport+1);
206} 201}
207 202
208static void* memset(void* s, int c, size_t n) 203static void* memset(void* s, int c, unsigned n)
209{ 204{
210 int i; 205 int i;
211 char *ss = (char*)s; 206 char *ss = (char*)s;
@@ -214,14 +209,13 @@ static void* memset(void* s, int c, size_t n)
214 return s; 209 return s;
215} 210}
216 211
217static void* memcpy(void* __dest, __const void* __src, 212static void* memcpy(void* dest, const void* src, unsigned n)
218 size_t __n)
219{ 213{
220 int i; 214 int i;
221 char *d = (char *)__dest, *s = (char *)__src; 215 char *d = (char *)dest, *s = (char *)src;
222 216
223 for (i=0;i<__n;i++) d[i] = s[i]; 217 for (i=0;i<n;i++) d[i] = s[i];
224 return __dest; 218 return dest;
225} 219}
226 220
227/* =========================================================================== 221/* ===========================================================================
@@ -309,7 +303,7 @@ static void setup_normal_output_buffer(void)
309#else 303#else
310 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory"); 304 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
311#endif 305#endif
312 output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */ 306 output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
313 free_mem_end_ptr = (long)real_mode; 307 free_mem_end_ptr = (long)real_mode;
314} 308}
315 309
@@ -324,11 +318,9 @@ static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
324#ifdef STANDARD_MEMORY_BIOS_CALL 318#ifdef STANDARD_MEMORY_BIOS_CALL
325 if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory"); 319 if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
326#else 320#else
327 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 321 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
328 (3*1024))
329 error("Less than 4MB of memory");
330#endif 322#endif
331 mv->low_buffer_start = output_data = (char *)LOW_BUFFER_START; 323 mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
332 low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX 324 low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
333 ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff; 325 ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
334 low_buffer_size = low_buffer_end - LOW_BUFFER_START; 326 low_buffer_size = low_buffer_end - LOW_BUFFER_START;
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 0fac85df64f1..5e70c2fb273a 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
37obj-$(CONFIG_VM86) += vm86.o 37obj-$(CONFIG_VM86) += vm86.o
38obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 38obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
39obj-$(CONFIG_HPET_TIMER) += hpet.o 39obj-$(CONFIG_HPET_TIMER) += hpet.o
40obj-$(CONFIG_K8_NB) += k8.o
40 41
41EXTRA_AFLAGS := -traditional 42EXTRA_AFLAGS := -traditional
42 43
@@ -76,3 +77,6 @@ SYSCFLAGS_vsyscall-syms.o = -r
76$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ 77$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
77 $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE 78 $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE
78 $(call if_changed,syscall) 79 $(call if_changed,syscall)
80
81k8-y += ../../x86_64/kernel/k8.o
82
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index 5cbd6f99fb2a..50eb0e03777e 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -4,27 +4,41 @@
4#include <asm/alternative.h> 4#include <asm/alternative.h>
5#include <asm/sections.h> 5#include <asm/sections.h>
6 6
7#define DEBUG 0 7static int no_replacement = 0;
8#if DEBUG 8static int smp_alt_once = 0;
9# define DPRINTK(fmt, args...) printk(fmt, args) 9static int debug_alternative = 0;
10#else 10
11# define DPRINTK(fmt, args...) 11static int __init noreplacement_setup(char *s)
12#endif 12{
13 no_replacement = 1;
14 return 1;
15}
16static int __init bootonly(char *str)
17{
18 smp_alt_once = 1;
19 return 1;
20}
21static int __init debug_alt(char *str)
22{
23 debug_alternative = 1;
24 return 1;
25}
13 26
27__setup("noreplacement", noreplacement_setup);
28__setup("smp-alt-boot", bootonly);
29__setup("debug-alternative", debug_alt);
30
31#define DPRINTK(fmt, args...) if (debug_alternative) \
32 printk(KERN_DEBUG fmt, args)
33
34#ifdef GENERIC_NOP1
14/* Use inline assembly to define this because the nops are defined 35/* Use inline assembly to define this because the nops are defined
15 as inline assembly strings in the include files and we cannot 36 as inline assembly strings in the include files and we cannot
16 get them easily into strings. */ 37 get them easily into strings. */
17asm("\t.data\nintelnops: " 38asm("\t.data\nintelnops: "
18 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 39 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
19 GENERIC_NOP7 GENERIC_NOP8); 40 GENERIC_NOP7 GENERIC_NOP8);
20asm("\t.data\nk8nops: " 41extern unsigned char intelnops[];
21 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
22 K8_NOP7 K8_NOP8);
23asm("\t.data\nk7nops: "
24 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
25 K7_NOP7 K7_NOP8);
26
27extern unsigned char intelnops[], k8nops[], k7nops[];
28static unsigned char *intel_nops[ASM_NOP_MAX+1] = { 42static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
29 NULL, 43 NULL,
30 intelnops, 44 intelnops,
@@ -36,6 +50,13 @@ static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
36 intelnops + 1 + 2 + 3 + 4 + 5 + 6, 50 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
37 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, 51 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
38}; 52};
53#endif
54
55#ifdef K8_NOP1
56asm("\t.data\nk8nops: "
57 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
58 K8_NOP7 K8_NOP8);
59extern unsigned char k8nops[];
39static unsigned char *k8_nops[ASM_NOP_MAX+1] = { 60static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
40 NULL, 61 NULL,
41 k8nops, 62 k8nops,
@@ -47,6 +68,13 @@ static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
47 k8nops + 1 + 2 + 3 + 4 + 5 + 6, 68 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
48 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, 69 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
49}; 70};
71#endif
72
73#ifdef K7_NOP1
74asm("\t.data\nk7nops: "
75 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
76 K7_NOP7 K7_NOP8);
77extern unsigned char k7nops[];
50static unsigned char *k7_nops[ASM_NOP_MAX+1] = { 78static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
51 NULL, 79 NULL,
52 k7nops, 80 k7nops,
@@ -58,6 +86,18 @@ static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
58 k7nops + 1 + 2 + 3 + 4 + 5 + 6, 86 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
59 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, 87 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
60}; 88};
89#endif
90
91#ifdef CONFIG_X86_64
92
93extern char __vsyscall_0;
94static inline unsigned char** find_nop_table(void)
95{
96 return k8_nops;
97}
98
99#else /* CONFIG_X86_64 */
100
61static struct nop { 101static struct nop {
62 int cpuid; 102 int cpuid;
63 unsigned char **noptable; 103 unsigned char **noptable;
@@ -67,14 +107,6 @@ static struct nop {
67 { -1, NULL } 107 { -1, NULL }
68}; 108};
69 109
70
71extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
72extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
73extern u8 *__smp_locks[], *__smp_locks_end[];
74
75extern u8 __smp_alt_begin[], __smp_alt_end[];
76
77
78static unsigned char** find_nop_table(void) 110static unsigned char** find_nop_table(void)
79{ 111{
80 unsigned char **noptable = intel_nops; 112 unsigned char **noptable = intel_nops;
@@ -89,6 +121,14 @@ static unsigned char** find_nop_table(void)
89 return noptable; 121 return noptable;
90} 122}
91 123
124#endif /* CONFIG_X86_64 */
125
126extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
127extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
128extern u8 *__smp_locks[], *__smp_locks_end[];
129
130extern u8 __smp_alt_begin[], __smp_alt_end[];
131
92/* Replace instructions with better alternatives for this CPU type. 132/* Replace instructions with better alternatives for this CPU type.
93 This runs before SMP is initialized to avoid SMP problems with 133 This runs before SMP is initialized to avoid SMP problems with
94 self modifying code. This implies that assymetric systems where 134 self modifying code. This implies that assymetric systems where
@@ -99,6 +139,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
99{ 139{
100 unsigned char **noptable = find_nop_table(); 140 unsigned char **noptable = find_nop_table();
101 struct alt_instr *a; 141 struct alt_instr *a;
142 u8 *instr;
102 int diff, i, k; 143 int diff, i, k;
103 144
104 DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); 145 DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
@@ -106,7 +147,16 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
106 BUG_ON(a->replacementlen > a->instrlen); 147 BUG_ON(a->replacementlen > a->instrlen);
107 if (!boot_cpu_has(a->cpuid)) 148 if (!boot_cpu_has(a->cpuid))
108 continue; 149 continue;
109 memcpy(a->instr, a->replacement, a->replacementlen); 150 instr = a->instr;
151#ifdef CONFIG_X86_64
152 /* vsyscall code is not mapped yet. resolve it manually. */
153 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
154 instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
155 DPRINTK("%s: vsyscall fixup: %p => %p\n",
156 __FUNCTION__, a->instr, instr);
157 }
158#endif
159 memcpy(instr, a->replacement, a->replacementlen);
110 diff = a->instrlen - a->replacementlen; 160 diff = a->instrlen - a->replacementlen;
111 /* Pad the rest with nops */ 161 /* Pad the rest with nops */
112 for (i = a->replacementlen; diff > 0; diff -= k, i += k) { 162 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
@@ -186,14 +236,6 @@ struct smp_alt_module {
186static LIST_HEAD(smp_alt_modules); 236static LIST_HEAD(smp_alt_modules);
187static DEFINE_SPINLOCK(smp_alt); 237static DEFINE_SPINLOCK(smp_alt);
188 238
189static int smp_alt_once = 0;
190static int __init bootonly(char *str)
191{
192 smp_alt_once = 1;
193 return 1;
194}
195__setup("smp-alt-boot", bootonly);
196
197void alternatives_smp_module_add(struct module *mod, char *name, 239void alternatives_smp_module_add(struct module *mod, char *name,
198 void *locks, void *locks_end, 240 void *locks, void *locks_end,
199 void *text, void *text_end) 241 void *text, void *text_end)
@@ -201,6 +243,9 @@ void alternatives_smp_module_add(struct module *mod, char *name,
201 struct smp_alt_module *smp; 243 struct smp_alt_module *smp;
202 unsigned long flags; 244 unsigned long flags;
203 245
246 if (no_replacement)
247 return;
248
204 if (smp_alt_once) { 249 if (smp_alt_once) {
205 if (boot_cpu_has(X86_FEATURE_UP)) 250 if (boot_cpu_has(X86_FEATURE_UP))
206 alternatives_smp_unlock(locks, locks_end, 251 alternatives_smp_unlock(locks, locks_end,
@@ -235,7 +280,7 @@ void alternatives_smp_module_del(struct module *mod)
235 struct smp_alt_module *item; 280 struct smp_alt_module *item;
236 unsigned long flags; 281 unsigned long flags;
237 282
238 if (smp_alt_once) 283 if (no_replacement || smp_alt_once)
239 return; 284 return;
240 285
241 spin_lock_irqsave(&smp_alt, flags); 286 spin_lock_irqsave(&smp_alt, flags);
@@ -256,7 +301,7 @@ void alternatives_smp_switch(int smp)
256 struct smp_alt_module *mod; 301 struct smp_alt_module *mod;
257 unsigned long flags; 302 unsigned long flags;
258 303
259 if (smp_alt_once) 304 if (no_replacement || smp_alt_once)
260 return; 305 return;
261 BUG_ON(!smp && (num_online_cpus() > 1)); 306 BUG_ON(!smp && (num_online_cpus() > 1));
262 307
@@ -285,6 +330,13 @@ void alternatives_smp_switch(int smp)
285 330
286void __init alternative_instructions(void) 331void __init alternative_instructions(void)
287{ 332{
333 if (no_replacement) {
334 printk(KERN_INFO "(SMP-)alternatives turned off\n");
335 free_init_pages("SMP alternatives",
336 (unsigned long)__smp_alt_begin,
337 (unsigned long)__smp_alt_end);
338 return;
339 }
288 apply_alternatives(__alt_instructions, __alt_instructions_end); 340 apply_alternatives(__alt_instructions, __alt_instructions_end);
289 341
290 /* switch to patch-once-at-boottime-only mode and free the 342 /* switch to patch-once-at-boottime-only mode and free the
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 5ab59c12335b..7ce09492fc0c 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -36,6 +36,7 @@
36#include <asm/arch_hooks.h> 36#include <asm/arch_hooks.h>
37#include <asm/hpet.h> 37#include <asm/hpet.h>
38#include <asm/i8253.h> 38#include <asm/i8253.h>
39#include <asm/nmi.h>
39 40
40#include <mach_apic.h> 41#include <mach_apic.h>
41#include <mach_apicdef.h> 42#include <mach_apicdef.h>
@@ -156,7 +157,7 @@ void clear_local_APIC(void)
156 maxlvt = get_maxlvt(); 157 maxlvt = get_maxlvt();
157 158
158 /* 159 /*
159 * Masking an LVT entry on a P6 can trigger a local APIC error 160 * Masking an LVT entry can trigger a local APIC error
160 * if the vector is zero. Mask LVTERR first to prevent this. 161 * if the vector is zero. Mask LVTERR first to prevent this.
161 */ 162 */
162 if (maxlvt >= 3) { 163 if (maxlvt >= 3) {
@@ -1117,7 +1118,18 @@ void disable_APIC_timer(void)
1117 unsigned long v; 1118 unsigned long v;
1118 1119
1119 v = apic_read(APIC_LVTT); 1120 v = apic_read(APIC_LVTT);
1120 apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); 1121 /*
1122 * When an illegal vector value (0-15) is written to an LVT
1123 * entry and delivery mode is Fixed, the APIC may signal an
1124 * illegal vector error, with out regard to whether the mask
1125 * bit is set or whether an interrupt is actually seen on input.
1126 *
1127 * Boot sequence might call this function when the LVTT has
1128 * '0' vector value. So make sure vector field is set to
1129 * valid value.
1130 */
1131 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1132 apic_write_around(APIC_LVTT, v);
1121 } 1133 }
1122} 1134}
1123 1135
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 9e819eb68229..7c5729d1fd06 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -764,9 +764,9 @@ static int apm_do_idle(void)
764 int idled = 0; 764 int idled = 0;
765 int polling; 765 int polling;
766 766
767 polling = test_thread_flag(TIF_POLLING_NRFLAG); 767 polling = !!(current_thread_info()->status & TS_POLLING);
768 if (polling) { 768 if (polling) {
769 clear_thread_flag(TIF_POLLING_NRFLAG); 769 current_thread_info()->status &= ~TS_POLLING;
770 smp_mb__after_clear_bit(); 770 smp_mb__after_clear_bit();
771 } 771 }
772 if (!need_resched()) { 772 if (!need_resched()) {
@@ -774,7 +774,7 @@ static int apm_do_idle(void)
774 ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax); 774 ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax);
775 } 775 }
776 if (polling) 776 if (polling)
777 set_thread_flag(TIF_POLLING_NRFLAG); 777 current_thread_info()->status |= TS_POLLING;
778 778
779 if (!idled) 779 if (!idled)
780 return 0; 780 return 0;
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 786d1a57048b..fd0457c9c827 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -224,15 +224,17 @@ static void __init init_amd(struct cpuinfo_x86 *c)
224 224
225#ifdef CONFIG_X86_HT 225#ifdef CONFIG_X86_HT
226 /* 226 /*
227 * On a AMD dual core setup the lower bits of the APIC id 227 * On a AMD multi core setup the lower bits of the APIC id
228 * distingush the cores. Assumes number of cores is a power 228 * distingush the cores.
229 * of two.
230 */ 229 */
231 if (c->x86_max_cores > 1) { 230 if (c->x86_max_cores > 1) {
232 int cpu = smp_processor_id(); 231 int cpu = smp_processor_id();
233 unsigned bits = 0; 232 unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf;
234 while ((1 << bits) < c->x86_max_cores) 233
235 bits++; 234 if (bits == 0) {
235 while ((1 << bits) < c->x86_max_cores)
236 bits++;
237 }
236 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1); 238 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1);
237 phys_proc_id[cpu] >>= bits; 239 phys_proc_id[cpu] >>= bits;
238 printk(KERN_INFO "CPU %d(%d) -> Core %d\n", 240 printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
@@ -240,6 +242,8 @@ static void __init init_amd(struct cpuinfo_x86 *c)
240 } 242 }
241#endif 243#endif
242 244
245 if (cpuid_eax(0x80000000) >= 0x80000006)
246 num_cache_leaves = 3;
243} 247}
244 248
245static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) 249static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 5386b29bb5a5..10afc645c540 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -122,6 +122,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
122 122
123 select_idle_routine(c); 123 select_idle_routine(c);
124 l2 = init_intel_cacheinfo(c); 124 l2 = init_intel_cacheinfo(c);
125 if (c->cpuid_level > 9 ) {
126 unsigned eax = cpuid_eax(10);
127 /* Check for version and the number of counters */
128 if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
129 set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
130 }
125 131
126 /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ 132 /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */
127 if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) 133 if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index c8547a6fa7e6..6c37b4fd8ce2 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -4,6 +4,7 @@
4 * Changes: 4 * Changes:
5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4) 5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. 6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 * Andi Kleen : CPUID4 emulation on AMD.
7 */ 8 */
8 9
9#include <linux/init.h> 10#include <linux/init.h>
@@ -130,25 +131,111 @@ struct _cpuid4_info {
130 cpumask_t shared_cpu_map; 131 cpumask_t shared_cpu_map;
131}; 132};
132 133
133static unsigned short num_cache_leaves; 134unsigned short num_cache_leaves;
135
136/* AMD doesn't have CPUID4. Emulate it here to report the same
137 information to the user. This makes some assumptions about the machine:
138 No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs.
139
140 In theory the TLBs could be reported as fake type (they are in "dummy").
141 Maybe later */
142union l1_cache {
143 struct {
144 unsigned line_size : 8;
145 unsigned lines_per_tag : 8;
146 unsigned assoc : 8;
147 unsigned size_in_kb : 8;
148 };
149 unsigned val;
150};
151
152union l2_cache {
153 struct {
154 unsigned line_size : 8;
155 unsigned lines_per_tag : 4;
156 unsigned assoc : 4;
157 unsigned size_in_kb : 16;
158 };
159 unsigned val;
160};
161
162static unsigned short assocs[] = {
163 [1] = 1, [2] = 2, [4] = 4, [6] = 8,
164 [8] = 16,
165 [0xf] = 0xffff // ??
166 };
167static unsigned char levels[] = { 1, 1, 2 };
168static unsigned char types[] = { 1, 2, 3 };
169
170static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
171 union _cpuid4_leaf_ebx *ebx,
172 union _cpuid4_leaf_ecx *ecx)
173{
174 unsigned dummy;
175 unsigned line_size, lines_per_tag, assoc, size_in_kb;
176 union l1_cache l1i, l1d;
177 union l2_cache l2;
178
179 eax->full = 0;
180 ebx->full = 0;
181 ecx->full = 0;
182
183 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
184 cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy);
185
186 if (leaf > 2 || !l1d.val || !l1i.val || !l2.val)
187 return;
188
189 eax->split.is_self_initializing = 1;
190 eax->split.type = types[leaf];
191 eax->split.level = levels[leaf];
192 eax->split.num_threads_sharing = 0;
193 eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
194
195 if (leaf <= 1) {
196 union l1_cache *l1 = leaf == 0 ? &l1d : &l1i;
197 assoc = l1->assoc;
198 line_size = l1->line_size;
199 lines_per_tag = l1->lines_per_tag;
200 size_in_kb = l1->size_in_kb;
201 } else {
202 assoc = l2.assoc;
203 line_size = l2.line_size;
204 lines_per_tag = l2.lines_per_tag;
205 /* cpu_data has errata corrections for K7 applied */
206 size_in_kb = current_cpu_data.x86_cache_size;
207 }
208
209 if (assoc == 0xf)
210 eax->split.is_fully_associative = 1;
211 ebx->split.coherency_line_size = line_size - 1;
212 ebx->split.ways_of_associativity = assocs[assoc] - 1;
213 ebx->split.physical_line_partition = lines_per_tag - 1;
214 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
215 (ebx->split.ways_of_associativity + 1) - 1;
216}
134 217
135static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) 218static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
136{ 219{
137 unsigned int eax, ebx, ecx, edx; 220 union _cpuid4_leaf_eax eax;
138 union _cpuid4_leaf_eax cache_eax; 221 union _cpuid4_leaf_ebx ebx;
222 union _cpuid4_leaf_ecx ecx;
223 unsigned edx;
139 224
140 cpuid_count(4, index, &eax, &ebx, &ecx, &edx); 225 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
141 cache_eax.full = eax; 226 amd_cpuid4(index, &eax, &ebx, &ecx);
142 if (cache_eax.split.type == CACHE_TYPE_NULL) 227 else
228 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
229 if (eax.split.type == CACHE_TYPE_NULL)
143 return -EIO; /* better error ? */ 230 return -EIO; /* better error ? */
144 231
145 this_leaf->eax.full = eax; 232 this_leaf->eax = eax;
146 this_leaf->ebx.full = ebx; 233 this_leaf->ebx = ebx;
147 this_leaf->ecx.full = ecx; 234 this_leaf->ecx = ecx;
148 this_leaf->size = (this_leaf->ecx.split.number_of_sets + 1) * 235 this_leaf->size = (ecx.split.number_of_sets + 1) *
149 (this_leaf->ebx.split.coherency_line_size + 1) * 236 (ebx.split.coherency_line_size + 1) *
150 (this_leaf->ebx.split.physical_line_partition + 1) * 237 (ebx.split.physical_line_partition + 1) *
151 (this_leaf->ebx.split.ways_of_associativity + 1); 238 (ebx.split.ways_of_associativity + 1);
152 return 0; 239 return 0;
153} 240}
154 241
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
index 21dc1bbb8067..0c88d3ec8c18 100644
--- a/arch/i386/kernel/crash.c
+++ b/arch/i386/kernel/crash.c
@@ -120,14 +120,9 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
120 return 1; 120 return 1;
121} 121}
122 122
123/*
124 * By using the NMI code instead of a vector we just sneak thru the
125 * word generator coming out with just what we want. AND it does
126 * not matter if clustered_apic_mode is set or not.
127 */
128static void smp_send_nmi_allbutself(void) 123static void smp_send_nmi_allbutself(void)
129{ 124{
130 send_IPI_allbutself(APIC_DM_NMI); 125 send_IPI_allbutself(NMI_VECTOR);
131} 126}
132 127
133static void nmi_shootdown_cpus(void) 128static void nmi_shootdown_cpus(void)
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index cfc683f153b9..e6e4506e749a 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -48,6 +48,7 @@
48#include <asm/smp.h> 48#include <asm/smp.h>
49#include <asm/page.h> 49#include <asm/page.h>
50#include <asm/desc.h> 50#include <asm/desc.h>
51#include <asm/dwarf2.h>
51#include "irq_vectors.h" 52#include "irq_vectors.h"
52 53
53#define nr_syscalls ((syscall_table_size)/4) 54#define nr_syscalls ((syscall_table_size)/4)
@@ -85,31 +86,67 @@ VM_MASK = 0x00020000
85#define SAVE_ALL \ 86#define SAVE_ALL \
86 cld; \ 87 cld; \
87 pushl %es; \ 88 pushl %es; \
89 CFI_ADJUST_CFA_OFFSET 4;\
90 /*CFI_REL_OFFSET es, 0;*/\
88 pushl %ds; \ 91 pushl %ds; \
92 CFI_ADJUST_CFA_OFFSET 4;\
93 /*CFI_REL_OFFSET ds, 0;*/\
89 pushl %eax; \ 94 pushl %eax; \
95 CFI_ADJUST_CFA_OFFSET 4;\
96 CFI_REL_OFFSET eax, 0;\
90 pushl %ebp; \ 97 pushl %ebp; \
98 CFI_ADJUST_CFA_OFFSET 4;\
99 CFI_REL_OFFSET ebp, 0;\
91 pushl %edi; \ 100 pushl %edi; \
101 CFI_ADJUST_CFA_OFFSET 4;\
102 CFI_REL_OFFSET edi, 0;\
92 pushl %esi; \ 103 pushl %esi; \
104 CFI_ADJUST_CFA_OFFSET 4;\
105 CFI_REL_OFFSET esi, 0;\
93 pushl %edx; \ 106 pushl %edx; \
107 CFI_ADJUST_CFA_OFFSET 4;\
108 CFI_REL_OFFSET edx, 0;\
94 pushl %ecx; \ 109 pushl %ecx; \
110 CFI_ADJUST_CFA_OFFSET 4;\
111 CFI_REL_OFFSET ecx, 0;\
95 pushl %ebx; \ 112 pushl %ebx; \
113 CFI_ADJUST_CFA_OFFSET 4;\
114 CFI_REL_OFFSET ebx, 0;\
96 movl $(__USER_DS), %edx; \ 115 movl $(__USER_DS), %edx; \
97 movl %edx, %ds; \ 116 movl %edx, %ds; \
98 movl %edx, %es; 117 movl %edx, %es;
99 118
100#define RESTORE_INT_REGS \ 119#define RESTORE_INT_REGS \
101 popl %ebx; \ 120 popl %ebx; \
121 CFI_ADJUST_CFA_OFFSET -4;\
122 CFI_RESTORE ebx;\
102 popl %ecx; \ 123 popl %ecx; \
124 CFI_ADJUST_CFA_OFFSET -4;\
125 CFI_RESTORE ecx;\
103 popl %edx; \ 126 popl %edx; \
127 CFI_ADJUST_CFA_OFFSET -4;\
128 CFI_RESTORE edx;\
104 popl %esi; \ 129 popl %esi; \
130 CFI_ADJUST_CFA_OFFSET -4;\
131 CFI_RESTORE esi;\
105 popl %edi; \ 132 popl %edi; \
133 CFI_ADJUST_CFA_OFFSET -4;\
134 CFI_RESTORE edi;\
106 popl %ebp; \ 135 popl %ebp; \
107 popl %eax 136 CFI_ADJUST_CFA_OFFSET -4;\
137 CFI_RESTORE ebp;\
138 popl %eax; \
139 CFI_ADJUST_CFA_OFFSET -4;\
140 CFI_RESTORE eax
108 141
109#define RESTORE_REGS \ 142#define RESTORE_REGS \
110 RESTORE_INT_REGS; \ 143 RESTORE_INT_REGS; \
1111: popl %ds; \ 1441: popl %ds; \
145 CFI_ADJUST_CFA_OFFSET -4;\
146 /*CFI_RESTORE ds;*/\
1122: popl %es; \ 1472: popl %es; \
148 CFI_ADJUST_CFA_OFFSET -4;\
149 /*CFI_RESTORE es;*/\
113.section .fixup,"ax"; \ 150.section .fixup,"ax"; \
1143: movl $0,(%esp); \ 1513: movl $0,(%esp); \
115 jmp 1b; \ 152 jmp 1b; \
@@ -122,13 +159,43 @@ VM_MASK = 0x00020000
122 .long 2b,4b; \ 159 .long 2b,4b; \
123.previous 160.previous
124 161
162#define RING0_INT_FRAME \
163 CFI_STARTPROC simple;\
164 CFI_DEF_CFA esp, 3*4;\
165 /*CFI_OFFSET cs, -2*4;*/\
166 CFI_OFFSET eip, -3*4
167
168#define RING0_EC_FRAME \
169 CFI_STARTPROC simple;\
170 CFI_DEF_CFA esp, 4*4;\
171 /*CFI_OFFSET cs, -2*4;*/\
172 CFI_OFFSET eip, -3*4
173
174#define RING0_PTREGS_FRAME \
175 CFI_STARTPROC simple;\
176 CFI_DEF_CFA esp, OLDESP-EBX;\
177 /*CFI_OFFSET cs, CS-OLDESP;*/\
178 CFI_OFFSET eip, EIP-OLDESP;\
179 /*CFI_OFFSET es, ES-OLDESP;*/\
180 /*CFI_OFFSET ds, DS-OLDESP;*/\
181 CFI_OFFSET eax, EAX-OLDESP;\
182 CFI_OFFSET ebp, EBP-OLDESP;\
183 CFI_OFFSET edi, EDI-OLDESP;\
184 CFI_OFFSET esi, ESI-OLDESP;\
185 CFI_OFFSET edx, EDX-OLDESP;\
186 CFI_OFFSET ecx, ECX-OLDESP;\
187 CFI_OFFSET ebx, EBX-OLDESP
125 188
126ENTRY(ret_from_fork) 189ENTRY(ret_from_fork)
190 CFI_STARTPROC
127 pushl %eax 191 pushl %eax
192 CFI_ADJUST_CFA_OFFSET -4
128 call schedule_tail 193 call schedule_tail
129 GET_THREAD_INFO(%ebp) 194 GET_THREAD_INFO(%ebp)
130 popl %eax 195 popl %eax
196 CFI_ADJUST_CFA_OFFSET -4
131 jmp syscall_exit 197 jmp syscall_exit
198 CFI_ENDPROC
132 199
133/* 200/*
134 * Return to user mode is not as complex as all this looks, 201 * Return to user mode is not as complex as all this looks,
@@ -139,6 +206,7 @@ ENTRY(ret_from_fork)
139 206
140 # userspace resumption stub bypassing syscall exit tracing 207 # userspace resumption stub bypassing syscall exit tracing
141 ALIGN 208 ALIGN
209 RING0_PTREGS_FRAME
142ret_from_exception: 210ret_from_exception:
143 preempt_stop 211 preempt_stop
144ret_from_intr: 212ret_from_intr:
@@ -171,20 +239,33 @@ need_resched:
171 call preempt_schedule_irq 239 call preempt_schedule_irq
172 jmp need_resched 240 jmp need_resched
173#endif 241#endif
242 CFI_ENDPROC
174 243
175/* SYSENTER_RETURN points to after the "sysenter" instruction in 244/* SYSENTER_RETURN points to after the "sysenter" instruction in
176 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ 245 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
177 246
178 # sysenter call handler stub 247 # sysenter call handler stub
179ENTRY(sysenter_entry) 248ENTRY(sysenter_entry)
249 CFI_STARTPROC simple
250 CFI_DEF_CFA esp, 0
251 CFI_REGISTER esp, ebp
180 movl TSS_sysenter_esp0(%esp),%esp 252 movl TSS_sysenter_esp0(%esp),%esp
181sysenter_past_esp: 253sysenter_past_esp:
182 sti 254 sti
183 pushl $(__USER_DS) 255 pushl $(__USER_DS)
256 CFI_ADJUST_CFA_OFFSET 4
257 /*CFI_REL_OFFSET ss, 0*/
184 pushl %ebp 258 pushl %ebp
259 CFI_ADJUST_CFA_OFFSET 4
260 CFI_REL_OFFSET esp, 0
185 pushfl 261 pushfl
262 CFI_ADJUST_CFA_OFFSET 4
186 pushl $(__USER_CS) 263 pushl $(__USER_CS)
264 CFI_ADJUST_CFA_OFFSET 4
265 /*CFI_REL_OFFSET cs, 0*/
187 pushl $SYSENTER_RETURN 266 pushl $SYSENTER_RETURN
267 CFI_ADJUST_CFA_OFFSET 4
268 CFI_REL_OFFSET eip, 0
188 269
189/* 270/*
190 * Load the potential sixth argument from user stack. 271 * Load the potential sixth argument from user stack.
@@ -199,6 +280,7 @@ sysenter_past_esp:
199.previous 280.previous
200 281
201 pushl %eax 282 pushl %eax
283 CFI_ADJUST_CFA_OFFSET 4
202 SAVE_ALL 284 SAVE_ALL
203 GET_THREAD_INFO(%ebp) 285 GET_THREAD_INFO(%ebp)
204 286
@@ -219,11 +301,14 @@ sysenter_past_esp:
219 xorl %ebp,%ebp 301 xorl %ebp,%ebp
220 sti 302 sti
221 sysexit 303 sysexit
304 CFI_ENDPROC
222 305
223 306
224 # system call handler stub 307 # system call handler stub
225ENTRY(system_call) 308ENTRY(system_call)
309 RING0_INT_FRAME # can't unwind into user space anyway
226 pushl %eax # save orig_eax 310 pushl %eax # save orig_eax
311 CFI_ADJUST_CFA_OFFSET 4
227 SAVE_ALL 312 SAVE_ALL
228 GET_THREAD_INFO(%ebp) 313 GET_THREAD_INFO(%ebp)
229 testl $TF_MASK,EFLAGS(%esp) 314 testl $TF_MASK,EFLAGS(%esp)
@@ -256,10 +341,12 @@ restore_all:
256 movb CS(%esp), %al 341 movb CS(%esp), %al
257 andl $(VM_MASK | (4 << 8) | 3), %eax 342 andl $(VM_MASK | (4 << 8) | 3), %eax
258 cmpl $((4 << 8) | 3), %eax 343 cmpl $((4 << 8) | 3), %eax
344 CFI_REMEMBER_STATE
259 je ldt_ss # returning to user-space with LDT SS 345 je ldt_ss # returning to user-space with LDT SS
260restore_nocheck: 346restore_nocheck:
261 RESTORE_REGS 347 RESTORE_REGS
262 addl $4, %esp 348 addl $4, %esp
349 CFI_ADJUST_CFA_OFFSET -4
2631: iret 3501: iret
264.section .fixup,"ax" 351.section .fixup,"ax"
265iret_exc: 352iret_exc:
@@ -273,6 +360,7 @@ iret_exc:
273 .long 1b,iret_exc 360 .long 1b,iret_exc
274.previous 361.previous
275 362
363 CFI_RESTORE_STATE
276ldt_ss: 364ldt_ss:
277 larl OLDSS(%esp), %eax 365 larl OLDSS(%esp), %eax
278 jnz restore_nocheck 366 jnz restore_nocheck
@@ -285,11 +373,13 @@ ldt_ss:
285 * CPUs, which we can try to work around to make 373 * CPUs, which we can try to work around to make
286 * dosemu and wine happy. */ 374 * dosemu and wine happy. */
287 subl $8, %esp # reserve space for switch16 pointer 375 subl $8, %esp # reserve space for switch16 pointer
376 CFI_ADJUST_CFA_OFFSET 8
288 cli 377 cli
289 movl %esp, %eax 378 movl %esp, %eax
290 /* Set up the 16bit stack frame with switch32 pointer on top, 379 /* Set up the 16bit stack frame with switch32 pointer on top,
291 * and a switch16 pointer on top of the current frame. */ 380 * and a switch16 pointer on top of the current frame. */
292 call setup_x86_bogus_stack 381 call setup_x86_bogus_stack
382 CFI_ADJUST_CFA_OFFSET -8 # frame has moved
293 RESTORE_REGS 383 RESTORE_REGS
294 lss 20+4(%esp), %esp # switch to 16bit stack 384 lss 20+4(%esp), %esp # switch to 16bit stack
2951: iret 3851: iret
@@ -297,9 +387,11 @@ ldt_ss:
297 .align 4 387 .align 4
298 .long 1b,iret_exc 388 .long 1b,iret_exc
299.previous 389.previous
390 CFI_ENDPROC
300 391
301 # perform work that needs to be done immediately before resumption 392 # perform work that needs to be done immediately before resumption
302 ALIGN 393 ALIGN
394 RING0_PTREGS_FRAME # can't unwind into user space anyway
303work_pending: 395work_pending:
304 testb $_TIF_NEED_RESCHED, %cl 396 testb $_TIF_NEED_RESCHED, %cl
305 jz work_notifysig 397 jz work_notifysig
@@ -329,8 +421,10 @@ work_notifysig: # deal with pending signals and
329work_notifysig_v86: 421work_notifysig_v86:
330#ifdef CONFIG_VM86 422#ifdef CONFIG_VM86
331 pushl %ecx # save ti_flags for do_notify_resume 423 pushl %ecx # save ti_flags for do_notify_resume
424 CFI_ADJUST_CFA_OFFSET 4
332 call save_v86_state # %eax contains pt_regs pointer 425 call save_v86_state # %eax contains pt_regs pointer
333 popl %ecx 426 popl %ecx
427 CFI_ADJUST_CFA_OFFSET -4
334 movl %eax, %esp 428 movl %eax, %esp
335 xorl %edx, %edx 429 xorl %edx, %edx
336 call do_notify_resume 430 call do_notify_resume
@@ -363,19 +457,21 @@ syscall_exit_work:
363 movl $1, %edx 457 movl $1, %edx
364 call do_syscall_trace 458 call do_syscall_trace
365 jmp resume_userspace 459 jmp resume_userspace
460 CFI_ENDPROC
366 461
367 ALIGN 462 RING0_INT_FRAME # can't unwind into user space anyway
368syscall_fault: 463syscall_fault:
369 pushl %eax # save orig_eax 464 pushl %eax # save orig_eax
465 CFI_ADJUST_CFA_OFFSET 4
370 SAVE_ALL 466 SAVE_ALL
371 GET_THREAD_INFO(%ebp) 467 GET_THREAD_INFO(%ebp)
372 movl $-EFAULT,EAX(%esp) 468 movl $-EFAULT,EAX(%esp)
373 jmp resume_userspace 469 jmp resume_userspace
374 470
375 ALIGN
376syscall_badsys: 471syscall_badsys:
377 movl $-ENOSYS,EAX(%esp) 472 movl $-ENOSYS,EAX(%esp)
378 jmp resume_userspace 473 jmp resume_userspace
474 CFI_ENDPROC
379 475
380#define FIXUP_ESPFIX_STACK \ 476#define FIXUP_ESPFIX_STACK \
381 movl %esp, %eax; \ 477 movl %esp, %eax; \
@@ -387,16 +483,21 @@ syscall_badsys:
387 movl %eax, %esp; 483 movl %eax, %esp;
388#define UNWIND_ESPFIX_STACK \ 484#define UNWIND_ESPFIX_STACK \
389 pushl %eax; \ 485 pushl %eax; \
486 CFI_ADJUST_CFA_OFFSET 4; \
390 movl %ss, %eax; \ 487 movl %ss, %eax; \
391 /* see if on 16bit stack */ \ 488 /* see if on 16bit stack */ \
392 cmpw $__ESPFIX_SS, %ax; \ 489 cmpw $__ESPFIX_SS, %ax; \
393 jne 28f; \ 490 je 28f; \
394 movl $__KERNEL_DS, %edx; \ 49127: popl %eax; \
395 movl %edx, %ds; \ 492 CFI_ADJUST_CFA_OFFSET -4; \
396 movl %edx, %es; \ 493.section .fixup,"ax"; \
49428: movl $__KERNEL_DS, %eax; \
495 movl %eax, %ds; \
496 movl %eax, %es; \
397 /* switch to 32bit stack */ \ 497 /* switch to 32bit stack */ \
398 FIXUP_ESPFIX_STACK \ 498 FIXUP_ESPFIX_STACK; \
39928: popl %eax; 499 jmp 27b; \
500.previous
400 501
401/* 502/*
402 * Build the entry stubs and pointer table with 503 * Build the entry stubs and pointer table with
@@ -408,9 +509,14 @@ ENTRY(interrupt)
408 509
409vector=0 510vector=0
410ENTRY(irq_entries_start) 511ENTRY(irq_entries_start)
512 RING0_INT_FRAME
411.rept NR_IRQS 513.rept NR_IRQS
412 ALIGN 514 ALIGN
515 .if vector
516 CFI_ADJUST_CFA_OFFSET -4
517 .endif
4131: pushl $vector-256 5181: pushl $vector-256
519 CFI_ADJUST_CFA_OFFSET 4
414 jmp common_interrupt 520 jmp common_interrupt
415.data 521.data
416 .long 1b 522 .long 1b
@@ -424,60 +530,99 @@ common_interrupt:
424 movl %esp,%eax 530 movl %esp,%eax
425 call do_IRQ 531 call do_IRQ
426 jmp ret_from_intr 532 jmp ret_from_intr
533 CFI_ENDPROC
427 534
428#define BUILD_INTERRUPT(name, nr) \ 535#define BUILD_INTERRUPT(name, nr) \
429ENTRY(name) \ 536ENTRY(name) \
537 RING0_INT_FRAME; \
430 pushl $nr-256; \ 538 pushl $nr-256; \
431 SAVE_ALL \ 539 CFI_ADJUST_CFA_OFFSET 4; \
540 SAVE_ALL; \
432 movl %esp,%eax; \ 541 movl %esp,%eax; \
433 call smp_/**/name; \ 542 call smp_/**/name; \
434 jmp ret_from_intr; 543 jmp ret_from_intr; \
544 CFI_ENDPROC
435 545
436/* The include is where all of the SMP etc. interrupts come from */ 546/* The include is where all of the SMP etc. interrupts come from */
437#include "entry_arch.h" 547#include "entry_arch.h"
438 548
439ENTRY(divide_error) 549ENTRY(divide_error)
550 RING0_INT_FRAME
440 pushl $0 # no error code 551 pushl $0 # no error code
552 CFI_ADJUST_CFA_OFFSET 4
441 pushl $do_divide_error 553 pushl $do_divide_error
554 CFI_ADJUST_CFA_OFFSET 4
442 ALIGN 555 ALIGN
443error_code: 556error_code:
444 pushl %ds 557 pushl %ds
558 CFI_ADJUST_CFA_OFFSET 4
559 /*CFI_REL_OFFSET ds, 0*/
445 pushl %eax 560 pushl %eax
561 CFI_ADJUST_CFA_OFFSET 4
562 CFI_REL_OFFSET eax, 0
446 xorl %eax, %eax 563 xorl %eax, %eax
447 pushl %ebp 564 pushl %ebp
565 CFI_ADJUST_CFA_OFFSET 4
566 CFI_REL_OFFSET ebp, 0
448 pushl %edi 567 pushl %edi
568 CFI_ADJUST_CFA_OFFSET 4
569 CFI_REL_OFFSET edi, 0
449 pushl %esi 570 pushl %esi
571 CFI_ADJUST_CFA_OFFSET 4
572 CFI_REL_OFFSET esi, 0
450 pushl %edx 573 pushl %edx
574 CFI_ADJUST_CFA_OFFSET 4
575 CFI_REL_OFFSET edx, 0
451 decl %eax # eax = -1 576 decl %eax # eax = -1
452 pushl %ecx 577 pushl %ecx
578 CFI_ADJUST_CFA_OFFSET 4
579 CFI_REL_OFFSET ecx, 0
453 pushl %ebx 580 pushl %ebx
581 CFI_ADJUST_CFA_OFFSET 4
582 CFI_REL_OFFSET ebx, 0
454 cld 583 cld
455 pushl %es 584 pushl %es
585 CFI_ADJUST_CFA_OFFSET 4
586 /*CFI_REL_OFFSET es, 0*/
456 UNWIND_ESPFIX_STACK 587 UNWIND_ESPFIX_STACK
457 popl %ecx 588 popl %ecx
589 CFI_ADJUST_CFA_OFFSET -4
590 /*CFI_REGISTER es, ecx*/
458 movl ES(%esp), %edi # get the function address 591 movl ES(%esp), %edi # get the function address
459 movl ORIG_EAX(%esp), %edx # get the error code 592 movl ORIG_EAX(%esp), %edx # get the error code
460 movl %eax, ORIG_EAX(%esp) 593 movl %eax, ORIG_EAX(%esp)
461 movl %ecx, ES(%esp) 594 movl %ecx, ES(%esp)
595 /*CFI_REL_OFFSET es, ES*/
462 movl $(__USER_DS), %ecx 596 movl $(__USER_DS), %ecx
463 movl %ecx, %ds 597 movl %ecx, %ds
464 movl %ecx, %es 598 movl %ecx, %es
465 movl %esp,%eax # pt_regs pointer 599 movl %esp,%eax # pt_regs pointer
466 call *%edi 600 call *%edi
467 jmp ret_from_exception 601 jmp ret_from_exception
602 CFI_ENDPROC
468 603
469ENTRY(coprocessor_error) 604ENTRY(coprocessor_error)
605 RING0_INT_FRAME
470 pushl $0 606 pushl $0
607 CFI_ADJUST_CFA_OFFSET 4
471 pushl $do_coprocessor_error 608 pushl $do_coprocessor_error
609 CFI_ADJUST_CFA_OFFSET 4
472 jmp error_code 610 jmp error_code
611 CFI_ENDPROC
473 612
474ENTRY(simd_coprocessor_error) 613ENTRY(simd_coprocessor_error)
614 RING0_INT_FRAME
475 pushl $0 615 pushl $0
616 CFI_ADJUST_CFA_OFFSET 4
476 pushl $do_simd_coprocessor_error 617 pushl $do_simd_coprocessor_error
618 CFI_ADJUST_CFA_OFFSET 4
477 jmp error_code 619 jmp error_code
620 CFI_ENDPROC
478 621
479ENTRY(device_not_available) 622ENTRY(device_not_available)
623 RING0_INT_FRAME
480 pushl $-1 # mark this as an int 624 pushl $-1 # mark this as an int
625 CFI_ADJUST_CFA_OFFSET 4
481 SAVE_ALL 626 SAVE_ALL
482 movl %cr0, %eax 627 movl %cr0, %eax
483 testl $0x4, %eax # EM (math emulation bit) 628 testl $0x4, %eax # EM (math emulation bit)
@@ -487,9 +632,12 @@ ENTRY(device_not_available)
487 jmp ret_from_exception 632 jmp ret_from_exception
488device_not_available_emulate: 633device_not_available_emulate:
489 pushl $0 # temporary storage for ORIG_EIP 634 pushl $0 # temporary storage for ORIG_EIP
635 CFI_ADJUST_CFA_OFFSET 4
490 call math_emulate 636 call math_emulate
491 addl $4, %esp 637 addl $4, %esp
638 CFI_ADJUST_CFA_OFFSET -4
492 jmp ret_from_exception 639 jmp ret_from_exception
640 CFI_ENDPROC
493 641
494/* 642/*
495 * Debug traps and NMI can happen at the one SYSENTER instruction 643 * Debug traps and NMI can happen at the one SYSENTER instruction
@@ -514,16 +662,19 @@ label: \
514 pushl $sysenter_past_esp 662 pushl $sysenter_past_esp
515 663
516KPROBE_ENTRY(debug) 664KPROBE_ENTRY(debug)
665 RING0_INT_FRAME
517 cmpl $sysenter_entry,(%esp) 666 cmpl $sysenter_entry,(%esp)
518 jne debug_stack_correct 667 jne debug_stack_correct
519 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) 668 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
520debug_stack_correct: 669debug_stack_correct:
521 pushl $-1 # mark this as an int 670 pushl $-1 # mark this as an int
671 CFI_ADJUST_CFA_OFFSET 4
522 SAVE_ALL 672 SAVE_ALL
523 xorl %edx,%edx # error code 0 673 xorl %edx,%edx # error code 0
524 movl %esp,%eax # pt_regs pointer 674 movl %esp,%eax # pt_regs pointer
525 call do_debug 675 call do_debug
526 jmp ret_from_exception 676 jmp ret_from_exception
677 CFI_ENDPROC
527 .previous .text 678 .previous .text
528/* 679/*
529 * NMI is doubly nasty. It can happen _while_ we're handling 680 * NMI is doubly nasty. It can happen _while_ we're handling
@@ -534,14 +685,18 @@ debug_stack_correct:
534 * fault happened on the sysenter path. 685 * fault happened on the sysenter path.
535 */ 686 */
536ENTRY(nmi) 687ENTRY(nmi)
688 RING0_INT_FRAME
537 pushl %eax 689 pushl %eax
690 CFI_ADJUST_CFA_OFFSET 4
538 movl %ss, %eax 691 movl %ss, %eax
539 cmpw $__ESPFIX_SS, %ax 692 cmpw $__ESPFIX_SS, %ax
540 popl %eax 693 popl %eax
694 CFI_ADJUST_CFA_OFFSET -4
541 je nmi_16bit_stack 695 je nmi_16bit_stack
542 cmpl $sysenter_entry,(%esp) 696 cmpl $sysenter_entry,(%esp)
543 je nmi_stack_fixup 697 je nmi_stack_fixup
544 pushl %eax 698 pushl %eax
699 CFI_ADJUST_CFA_OFFSET 4
545 movl %esp,%eax 700 movl %esp,%eax
546 /* Do not access memory above the end of our stack page, 701 /* Do not access memory above the end of our stack page,
547 * it might not exist. 702 * it might not exist.
@@ -549,16 +704,19 @@ ENTRY(nmi)
549 andl $(THREAD_SIZE-1),%eax 704 andl $(THREAD_SIZE-1),%eax
550 cmpl $(THREAD_SIZE-20),%eax 705 cmpl $(THREAD_SIZE-20),%eax
551 popl %eax 706 popl %eax
707 CFI_ADJUST_CFA_OFFSET -4
552 jae nmi_stack_correct 708 jae nmi_stack_correct
553 cmpl $sysenter_entry,12(%esp) 709 cmpl $sysenter_entry,12(%esp)
554 je nmi_debug_stack_check 710 je nmi_debug_stack_check
555nmi_stack_correct: 711nmi_stack_correct:
556 pushl %eax 712 pushl %eax
713 CFI_ADJUST_CFA_OFFSET 4
557 SAVE_ALL 714 SAVE_ALL
558 xorl %edx,%edx # zero error code 715 xorl %edx,%edx # zero error code
559 movl %esp,%eax # pt_regs pointer 716 movl %esp,%eax # pt_regs pointer
560 call do_nmi 717 call do_nmi
561 jmp restore_all 718 jmp restore_all
719 CFI_ENDPROC
562 720
563nmi_stack_fixup: 721nmi_stack_fixup:
564 FIX_STACK(12,nmi_stack_correct, 1) 722 FIX_STACK(12,nmi_stack_correct, 1)
@@ -574,94 +732,177 @@ nmi_debug_stack_check:
574 jmp nmi_stack_correct 732 jmp nmi_stack_correct
575 733
576nmi_16bit_stack: 734nmi_16bit_stack:
735 RING0_INT_FRAME
577 /* create the pointer to lss back */ 736 /* create the pointer to lss back */
578 pushl %ss 737 pushl %ss
738 CFI_ADJUST_CFA_OFFSET 4
579 pushl %esp 739 pushl %esp
740 CFI_ADJUST_CFA_OFFSET 4
580 movzwl %sp, %esp 741 movzwl %sp, %esp
581 addw $4, (%esp) 742 addw $4, (%esp)
582 /* copy the iret frame of 12 bytes */ 743 /* copy the iret frame of 12 bytes */
583 .rept 3 744 .rept 3
584 pushl 16(%esp) 745 pushl 16(%esp)
746 CFI_ADJUST_CFA_OFFSET 4
585 .endr 747 .endr
586 pushl %eax 748 pushl %eax
749 CFI_ADJUST_CFA_OFFSET 4
587 SAVE_ALL 750 SAVE_ALL
588 FIXUP_ESPFIX_STACK # %eax == %esp 751 FIXUP_ESPFIX_STACK # %eax == %esp
752 CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved
589 xorl %edx,%edx # zero error code 753 xorl %edx,%edx # zero error code
590 call do_nmi 754 call do_nmi
591 RESTORE_REGS 755 RESTORE_REGS
592 lss 12+4(%esp), %esp # back to 16bit stack 756 lss 12+4(%esp), %esp # back to 16bit stack
5931: iret 7571: iret
758 CFI_ENDPROC
594.section __ex_table,"a" 759.section __ex_table,"a"
595 .align 4 760 .align 4
596 .long 1b,iret_exc 761 .long 1b,iret_exc
597.previous 762.previous
598 763
599KPROBE_ENTRY(int3) 764KPROBE_ENTRY(int3)
765 RING0_INT_FRAME
600 pushl $-1 # mark this as an int 766 pushl $-1 # mark this as an int
767 CFI_ADJUST_CFA_OFFSET 4
601 SAVE_ALL 768 SAVE_ALL
602 xorl %edx,%edx # zero error code 769 xorl %edx,%edx # zero error code
603 movl %esp,%eax # pt_regs pointer 770 movl %esp,%eax # pt_regs pointer
604 call do_int3 771 call do_int3
605 jmp ret_from_exception 772 jmp ret_from_exception
773 CFI_ENDPROC
606 .previous .text 774 .previous .text
607 775
608ENTRY(overflow) 776ENTRY(overflow)
777 RING0_INT_FRAME
609 pushl $0 778 pushl $0
779 CFI_ADJUST_CFA_OFFSET 4
610 pushl $do_overflow 780 pushl $do_overflow
781 CFI_ADJUST_CFA_OFFSET 4
611 jmp error_code 782 jmp error_code
783 CFI_ENDPROC
612 784
613ENTRY(bounds) 785ENTRY(bounds)
786 RING0_INT_FRAME
614 pushl $0 787 pushl $0
788 CFI_ADJUST_CFA_OFFSET 4
615 pushl $do_bounds 789 pushl $do_bounds
790 CFI_ADJUST_CFA_OFFSET 4
616 jmp error_code 791 jmp error_code
792 CFI_ENDPROC
617 793
618ENTRY(invalid_op) 794ENTRY(invalid_op)
795 RING0_INT_FRAME
619 pushl $0 796 pushl $0
797 CFI_ADJUST_CFA_OFFSET 4
620 pushl $do_invalid_op 798 pushl $do_invalid_op
799 CFI_ADJUST_CFA_OFFSET 4
621 jmp error_code 800 jmp error_code
801 CFI_ENDPROC
622 802
623ENTRY(coprocessor_segment_overrun) 803ENTRY(coprocessor_segment_overrun)
804 RING0_INT_FRAME
624 pushl $0 805 pushl $0
806 CFI_ADJUST_CFA_OFFSET 4
625 pushl $do_coprocessor_segment_overrun 807 pushl $do_coprocessor_segment_overrun
808 CFI_ADJUST_CFA_OFFSET 4
626 jmp error_code 809 jmp error_code
810 CFI_ENDPROC
627 811
628ENTRY(invalid_TSS) 812ENTRY(invalid_TSS)
813 RING0_EC_FRAME
629 pushl $do_invalid_TSS 814 pushl $do_invalid_TSS
815 CFI_ADJUST_CFA_OFFSET 4
630 jmp error_code 816 jmp error_code
817 CFI_ENDPROC
631 818
632ENTRY(segment_not_present) 819ENTRY(segment_not_present)
820 RING0_EC_FRAME
633 pushl $do_segment_not_present 821 pushl $do_segment_not_present
822 CFI_ADJUST_CFA_OFFSET 4
634 jmp error_code 823 jmp error_code
824 CFI_ENDPROC
635 825
636ENTRY(stack_segment) 826ENTRY(stack_segment)
827 RING0_EC_FRAME
637 pushl $do_stack_segment 828 pushl $do_stack_segment
829 CFI_ADJUST_CFA_OFFSET 4
638 jmp error_code 830 jmp error_code
831 CFI_ENDPROC
639 832
640KPROBE_ENTRY(general_protection) 833KPROBE_ENTRY(general_protection)
834 RING0_EC_FRAME
641 pushl $do_general_protection 835 pushl $do_general_protection
836 CFI_ADJUST_CFA_OFFSET 4
642 jmp error_code 837 jmp error_code
838 CFI_ENDPROC
643 .previous .text 839 .previous .text
644 840
645ENTRY(alignment_check) 841ENTRY(alignment_check)
842 RING0_EC_FRAME
646 pushl $do_alignment_check 843 pushl $do_alignment_check
844 CFI_ADJUST_CFA_OFFSET 4
647 jmp error_code 845 jmp error_code
846 CFI_ENDPROC
648 847
649KPROBE_ENTRY(page_fault) 848KPROBE_ENTRY(page_fault)
849 RING0_EC_FRAME
650 pushl $do_page_fault 850 pushl $do_page_fault
851 CFI_ADJUST_CFA_OFFSET 4
651 jmp error_code 852 jmp error_code
853 CFI_ENDPROC
652 .previous .text 854 .previous .text
653 855
654#ifdef CONFIG_X86_MCE 856#ifdef CONFIG_X86_MCE
655ENTRY(machine_check) 857ENTRY(machine_check)
858 RING0_INT_FRAME
656 pushl $0 859 pushl $0
860 CFI_ADJUST_CFA_OFFSET 4
657 pushl machine_check_vector 861 pushl machine_check_vector
862 CFI_ADJUST_CFA_OFFSET 4
658 jmp error_code 863 jmp error_code
864 CFI_ENDPROC
659#endif 865#endif
660 866
661ENTRY(spurious_interrupt_bug) 867ENTRY(spurious_interrupt_bug)
868 RING0_INT_FRAME
662 pushl $0 869 pushl $0
870 CFI_ADJUST_CFA_OFFSET 4
663 pushl $do_spurious_interrupt_bug 871 pushl $do_spurious_interrupt_bug
872 CFI_ADJUST_CFA_OFFSET 4
664 jmp error_code 873 jmp error_code
874 CFI_ENDPROC
875
876#ifdef CONFIG_STACK_UNWIND
877ENTRY(arch_unwind_init_running)
878 CFI_STARTPROC
879 movl 4(%esp), %edx
880 movl (%esp), %ecx
881 leal 4(%esp), %eax
882 movl %ebx, EBX(%edx)
883 xorl %ebx, %ebx
884 movl %ebx, ECX(%edx)
885 movl %ebx, EDX(%edx)
886 movl %esi, ESI(%edx)
887 movl %edi, EDI(%edx)
888 movl %ebp, EBP(%edx)
889 movl %ebx, EAX(%edx)
890 movl $__USER_DS, DS(%edx)
891 movl $__USER_DS, ES(%edx)
892 movl %ebx, ORIG_EAX(%edx)
893 movl %ecx, EIP(%edx)
894 movl 12(%esp), %ecx
895 movl $__KERNEL_CS, CS(%edx)
896 movl %ebx, EFLAGS(%edx)
897 movl %eax, OLDESP(%edx)
898 movl 8(%esp), %eax
899 movl %ecx, 8(%esp)
900 movl EBX(%edx), %ebx
901 movl $__KERNEL_DS, OLDSS(%edx)
902 jmpl *%eax
903 CFI_ENDPROC
904ENDPROC(arch_unwind_init_running)
905#endif
665 906
666.section .rodata,"a" 907.section .rodata,"a"
667#include "syscall_table.S" 908#include "syscall_table.S"
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index a62df3e764c5..72ae414e4d49 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -38,6 +38,7 @@
38#include <asm/desc.h> 38#include <asm/desc.h>
39#include <asm/timer.h> 39#include <asm/timer.h>
40#include <asm/i8259.h> 40#include <asm/i8259.h>
41#include <asm/nmi.h>
41 42
42#include <mach_apic.h> 43#include <mach_apic.h>
43 44
@@ -50,6 +51,7 @@ atomic_t irq_mis_count;
50static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; 51static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
51 52
52static DEFINE_SPINLOCK(ioapic_lock); 53static DEFINE_SPINLOCK(ioapic_lock);
54static DEFINE_SPINLOCK(vector_lock);
53 55
54int timer_over_8254 __initdata = 1; 56int timer_over_8254 __initdata = 1;
55 57
@@ -1161,10 +1163,17 @@ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
1161int assign_irq_vector(int irq) 1163int assign_irq_vector(int irq)
1162{ 1164{
1163 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; 1165 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
1166 unsigned long flags;
1167 int vector;
1168
1169 BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
1164 1170
1165 BUG_ON(irq >= NR_IRQ_VECTORS); 1171 spin_lock_irqsave(&vector_lock, flags);
1166 if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) 1172
1173 if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
1174 spin_unlock_irqrestore(&vector_lock, flags);
1167 return IO_APIC_VECTOR(irq); 1175 return IO_APIC_VECTOR(irq);
1176 }
1168next: 1177next:
1169 current_vector += 8; 1178 current_vector += 8;
1170 if (current_vector == SYSCALL_VECTOR) 1179 if (current_vector == SYSCALL_VECTOR)
@@ -1172,16 +1181,21 @@ next:
1172 1181
1173 if (current_vector >= FIRST_SYSTEM_VECTOR) { 1182 if (current_vector >= FIRST_SYSTEM_VECTOR) {
1174 offset++; 1183 offset++;
1175 if (!(offset%8)) 1184 if (!(offset%8)) {
1185 spin_unlock_irqrestore(&vector_lock, flags);
1176 return -ENOSPC; 1186 return -ENOSPC;
1187 }
1177 current_vector = FIRST_DEVICE_VECTOR + offset; 1188 current_vector = FIRST_DEVICE_VECTOR + offset;
1178 } 1189 }
1179 1190
1180 vector_irq[current_vector] = irq; 1191 vector = current_vector;
1192 vector_irq[vector] = irq;
1181 if (irq != AUTO_ASSIGN) 1193 if (irq != AUTO_ASSIGN)
1182 IO_APIC_VECTOR(irq) = current_vector; 1194 IO_APIC_VECTOR(irq) = vector;
1183 1195
1184 return current_vector; 1196 spin_unlock_irqrestore(&vector_lock, flags);
1197
1198 return vector;
1185} 1199}
1186 1200
1187static struct hw_interrupt_type ioapic_level_type; 1201static struct hw_interrupt_type ioapic_level_type;
@@ -1193,21 +1207,14 @@ static struct hw_interrupt_type ioapic_edge_type;
1193 1207
1194static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) 1208static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
1195{ 1209{
1196 if (use_pci_vector() && !platform_legacy_irq(irq)) { 1210 unsigned idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
1197 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 1211
1198 trigger == IOAPIC_LEVEL) 1212 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1199 irq_desc[vector].handler = &ioapic_level_type; 1213 trigger == IOAPIC_LEVEL)
1200 else 1214 irq_desc[idx].handler = &ioapic_level_type;
1201 irq_desc[vector].handler = &ioapic_edge_type; 1215 else
1202 set_intr_gate(vector, interrupt[vector]); 1216 irq_desc[idx].handler = &ioapic_edge_type;
1203 } else { 1217 set_intr_gate(vector, interrupt[idx]);
1204 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1205 trigger == IOAPIC_LEVEL)
1206 irq_desc[irq].handler = &ioapic_level_type;
1207 else
1208 irq_desc[irq].handler = &ioapic_edge_type;
1209 set_intr_gate(vector, interrupt[irq]);
1210 }
1211} 1218}
1212 1219
1213static void __init setup_IO_APIC_irqs(void) 1220static void __init setup_IO_APIC_irqs(void)
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 49ce4c31b713..061533e0cb5e 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -227,7 +227,7 @@ int show_interrupts(struct seq_file *p, void *v)
227 if (i == 0) { 227 if (i == 0) {
228 seq_printf(p, " "); 228 seq_printf(p, " ");
229 for_each_online_cpu(j) 229 for_each_online_cpu(j)
230 seq_printf(p, "CPU%d ",j); 230 seq_printf(p, "CPU%-8d",j);
231 seq_putc(p, '\n'); 231 seq_putc(p, '\n');
232 } 232 }
233 233
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index d43b498ec745..a76e93146585 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -14,21 +14,17 @@
14 */ 14 */
15 15
16#include <linux/config.h> 16#include <linux/config.h>
17#include <linux/mm.h>
18#include <linux/delay.h> 17#include <linux/delay.h>
19#include <linux/bootmem.h>
20#include <linux/smp_lock.h>
21#include <linux/interrupt.h> 18#include <linux/interrupt.h>
22#include <linux/mc146818rtc.h>
23#include <linux/kernel_stat.h>
24#include <linux/module.h> 19#include <linux/module.h>
25#include <linux/nmi.h> 20#include <linux/nmi.h>
26#include <linux/sysdev.h> 21#include <linux/sysdev.h>
27#include <linux/sysctl.h> 22#include <linux/sysctl.h>
23#include <linux/percpu.h>
28 24
29#include <asm/smp.h> 25#include <asm/smp.h>
30#include <asm/div64.h>
31#include <asm/nmi.h> 26#include <asm/nmi.h>
27#include <asm/intel_arch_perfmon.h>
32 28
33#include "mach_traps.h" 29#include "mach_traps.h"
34 30
@@ -100,6 +96,9 @@ int nmi_active;
100 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ 96 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
101 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) 97 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
102 98
99#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
100#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
101
103#ifdef CONFIG_SMP 102#ifdef CONFIG_SMP
104/* The performance counters used by NMI_LOCAL_APIC don't trigger when 103/* The performance counters used by NMI_LOCAL_APIC don't trigger when
105 * the CPU is idle. To make sure the NMI watchdog really ticks on all 104 * the CPU is idle. To make sure the NMI watchdog really ticks on all
@@ -212,6 +211,8 @@ static int __init setup_nmi_watchdog(char *str)
212 211
213__setup("nmi_watchdog=", setup_nmi_watchdog); 212__setup("nmi_watchdog=", setup_nmi_watchdog);
214 213
214static void disable_intel_arch_watchdog(void);
215
215static void disable_lapic_nmi_watchdog(void) 216static void disable_lapic_nmi_watchdog(void)
216{ 217{
217 if (nmi_active <= 0) 218 if (nmi_active <= 0)
@@ -221,6 +222,10 @@ static void disable_lapic_nmi_watchdog(void)
221 wrmsr(MSR_K7_EVNTSEL0, 0, 0); 222 wrmsr(MSR_K7_EVNTSEL0, 0, 0);
222 break; 223 break;
223 case X86_VENDOR_INTEL: 224 case X86_VENDOR_INTEL:
225 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
226 disable_intel_arch_watchdog();
227 break;
228 }
224 switch (boot_cpu_data.x86) { 229 switch (boot_cpu_data.x86) {
225 case 6: 230 case 6:
226 if (boot_cpu_data.x86_model > 0xd) 231 if (boot_cpu_data.x86_model > 0xd)
@@ -449,6 +454,53 @@ static int setup_p4_watchdog(void)
449 return 1; 454 return 1;
450} 455}
451 456
457static void disable_intel_arch_watchdog(void)
458{
459 unsigned ebx;
460
461 /*
462 * Check whether the Architectural PerfMon supports
463 * Unhalted Core Cycles Event or not.
464 * NOTE: Corresponding bit = 0 in ebp indicates event present.
465 */
466 ebx = cpuid_ebx(10);
467 if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
468 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
469}
470
471static int setup_intel_arch_watchdog(void)
472{
473 unsigned int evntsel;
474 unsigned ebx;
475
476 /*
477 * Check whether the Architectural PerfMon supports
478 * Unhalted Core Cycles Event or not.
479 * NOTE: Corresponding bit = 0 in ebp indicates event present.
480 */
481 ebx = cpuid_ebx(10);
482 if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
483 return 0;
484
485 nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
486
487 clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2);
488 clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2);
489
490 evntsel = ARCH_PERFMON_EVENTSEL_INT
491 | ARCH_PERFMON_EVENTSEL_OS
492 | ARCH_PERFMON_EVENTSEL_USR
493 | ARCH_PERFMON_NMI_EVENT_SEL
494 | ARCH_PERFMON_NMI_EVENT_UMASK;
495
496 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
497 write_watchdog_counter("INTEL_ARCH_PERFCTR0");
498 apic_write(APIC_LVTPC, APIC_DM_NMI);
499 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
500 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
501 return 1;
502}
503
452void setup_apic_nmi_watchdog (void) 504void setup_apic_nmi_watchdog (void)
453{ 505{
454 switch (boot_cpu_data.x86_vendor) { 506 switch (boot_cpu_data.x86_vendor) {
@@ -458,6 +510,11 @@ void setup_apic_nmi_watchdog (void)
458 setup_k7_watchdog(); 510 setup_k7_watchdog();
459 break; 511 break;
460 case X86_VENDOR_INTEL: 512 case X86_VENDOR_INTEL:
513 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
514 if (!setup_intel_arch_watchdog())
515 return;
516 break;
517 }
461 switch (boot_cpu_data.x86) { 518 switch (boot_cpu_data.x86) {
462 case 6: 519 case 6:
463 if (boot_cpu_data.x86_model > 0xd) 520 if (boot_cpu_data.x86_model > 0xd)
@@ -561,7 +618,8 @@ void nmi_watchdog_tick (struct pt_regs * regs)
561 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); 618 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
562 apic_write(APIC_LVTPC, APIC_DM_NMI); 619 apic_write(APIC_LVTPC, APIC_DM_NMI);
563 } 620 }
564 else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) { 621 else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 ||
622 nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
565 /* Only P6 based Pentium M need to re-unmask 623 /* Only P6 based Pentium M need to re-unmask
566 * the apic vector but it doesn't hurt 624 * the apic vector but it doesn't hurt
567 * other P6 variant */ 625 * other P6 variant */
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 6259afea46d1..6946b06e2784 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -102,7 +102,7 @@ void default_idle(void)
102 local_irq_enable(); 102 local_irq_enable();
103 103
104 if (!hlt_counter && boot_cpu_data.hlt_works_ok) { 104 if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
105 clear_thread_flag(TIF_POLLING_NRFLAG); 105 current_thread_info()->status &= ~TS_POLLING;
106 smp_mb__after_clear_bit(); 106 smp_mb__after_clear_bit();
107 while (!need_resched()) { 107 while (!need_resched()) {
108 local_irq_disable(); 108 local_irq_disable();
@@ -111,7 +111,7 @@ void default_idle(void)
111 else 111 else
112 local_irq_enable(); 112 local_irq_enable();
113 } 113 }
114 set_thread_flag(TIF_POLLING_NRFLAG); 114 current_thread_info()->status |= TS_POLLING;
115 } else { 115 } else {
116 while (!need_resched()) 116 while (!need_resched())
117 cpu_relax(); 117 cpu_relax();
@@ -174,7 +174,7 @@ void cpu_idle(void)
174{ 174{
175 int cpu = smp_processor_id(); 175 int cpu = smp_processor_id();
176 176
177 set_thread_flag(TIF_POLLING_NRFLAG); 177 current_thread_info()->status |= TS_POLLING;
178 178
179 /* endless idle loop with no priority at all */ 179 /* endless idle loop with no priority at all */
180 while (1) { 180 while (1) {
@@ -312,7 +312,7 @@ void show_regs(struct pt_regs * regs)
312 cr3 = read_cr3(); 312 cr3 = read_cr3();
313 cr4 = read_cr4_safe(); 313 cr4 = read_cr4_safe();
314 printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); 314 printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
315 show_trace(NULL, &regs->esp); 315 show_trace(NULL, regs, &regs->esp);
316} 316}
317 317
318/* 318/*
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index d134e9643a58..c10789d7a9d3 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -114,7 +114,17 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_m
114 114
115static inline int __prepare_ICR (unsigned int shortcut, int vector) 115static inline int __prepare_ICR (unsigned int shortcut, int vector)
116{ 116{
117 return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL; 117 unsigned int icr = shortcut | APIC_DEST_LOGICAL;
118
119 switch (vector) {
120 default:
121 icr |= APIC_DM_FIXED | vector;
122 break;
123 case NMI_VECTOR:
124 icr |= APIC_DM_NMI;
125 break;
126 }
127 return icr;
118} 128}
119 129
120static inline int __prepare_ICR2 (unsigned int mask) 130static inline int __prepare_ICR2 (unsigned int mask)
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index bd0ca5c9f053..bce5470ecb42 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -52,6 +52,7 @@
52#include <asm/tlbflush.h> 52#include <asm/tlbflush.h>
53#include <asm/desc.h> 53#include <asm/desc.h>
54#include <asm/arch_hooks.h> 54#include <asm/arch_hooks.h>
55#include <asm/nmi.h>
55 56
56#include <mach_apic.h> 57#include <mach_apic.h>
57#include <mach_wakecpu.h> 58#include <mach_wakecpu.h>
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index dcc14477af1f..78464097470a 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -28,6 +28,7 @@
28#include <linux/utsname.h> 28#include <linux/utsname.h>
29#include <linux/kprobes.h> 29#include <linux/kprobes.h>
30#include <linux/kexec.h> 30#include <linux/kexec.h>
31#include <linux/unwind.h>
31 32
32#ifdef CONFIG_EISA 33#ifdef CONFIG_EISA
33#include <linux/ioport.h> 34#include <linux/ioport.h>
@@ -47,7 +48,7 @@
47#include <asm/desc.h> 48#include <asm/desc.h>
48#include <asm/i387.h> 49#include <asm/i387.h>
49#include <asm/nmi.h> 50#include <asm/nmi.h>
50 51#include <asm/unwind.h>
51#include <asm/smp.h> 52#include <asm/smp.h>
52#include <asm/arch_hooks.h> 53#include <asm/arch_hooks.h>
53#include <asm/kdebug.h> 54#include <asm/kdebug.h>
@@ -92,6 +93,7 @@ asmlinkage void spurious_interrupt_bug(void);
92asmlinkage void machine_check(void); 93asmlinkage void machine_check(void);
93 94
94static int kstack_depth_to_print = 24; 95static int kstack_depth_to_print = 24;
96static int call_trace = 1;
95ATOMIC_NOTIFIER_HEAD(i386die_chain); 97ATOMIC_NOTIFIER_HEAD(i386die_chain);
96 98
97int register_die_notifier(struct notifier_block *nb) 99int register_die_notifier(struct notifier_block *nb)
@@ -170,7 +172,23 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
170 return ebp; 172 return ebp;
171} 173}
172 174
173static void show_trace_log_lvl(struct task_struct *task, 175static asmlinkage int show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
176{
177 int n = 0;
178 int printed = 0; /* nr of entries already printed on current line */
179
180 while (unwind(info) == 0 && UNW_PC(info)) {
181 ++n;
182 printed = print_addr_and_symbol(UNW_PC(info), log_lvl, printed);
183 if (arch_unw_user_mode(info))
184 break;
185 }
186 if (printed)
187 printk("\n");
188 return n;
189}
190
191static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
174 unsigned long *stack, char *log_lvl) 192 unsigned long *stack, char *log_lvl)
175{ 193{
176 unsigned long ebp; 194 unsigned long ebp;
@@ -178,6 +196,26 @@ static void show_trace_log_lvl(struct task_struct *task,
178 if (!task) 196 if (!task)
179 task = current; 197 task = current;
180 198
199 if (call_trace >= 0) {
200 int unw_ret = 0;
201 struct unwind_frame_info info;
202
203 if (regs) {
204 if (unwind_init_frame_info(&info, task, regs) == 0)
205 unw_ret = show_trace_unwind(&info, log_lvl);
206 } else if (task == current)
207 unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl);
208 else {
209 if (unwind_init_blocked(&info, task) == 0)
210 unw_ret = show_trace_unwind(&info, log_lvl);
211 }
212 if (unw_ret > 0) {
213 if (call_trace > 0)
214 return;
215 printk("%sLegacy call trace:\n", log_lvl);
216 }
217 }
218
181 if (task == current) { 219 if (task == current) {
182 /* Grab ebp right from our regs */ 220 /* Grab ebp right from our regs */
183 asm ("movl %%ebp, %0" : "=r" (ebp) : ); 221 asm ("movl %%ebp, %0" : "=r" (ebp) : );
@@ -198,13 +236,13 @@ static void show_trace_log_lvl(struct task_struct *task,
198 } 236 }
199} 237}
200 238
201void show_trace(struct task_struct *task, unsigned long * stack) 239void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
202{ 240{
203 show_trace_log_lvl(task, stack, ""); 241 show_trace_log_lvl(task, regs, stack, "");
204} 242}
205 243
206static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp, 244static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
207 char *log_lvl) 245 unsigned long *esp, char *log_lvl)
208{ 246{
209 unsigned long *stack; 247 unsigned long *stack;
210 int i; 248 int i;
@@ -225,13 +263,13 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp,
225 printk("%08lx ", *stack++); 263 printk("%08lx ", *stack++);
226 } 264 }
227 printk("\n%sCall Trace:\n", log_lvl); 265 printk("\n%sCall Trace:\n", log_lvl);
228 show_trace_log_lvl(task, esp, log_lvl); 266 show_trace_log_lvl(task, regs, esp, log_lvl);
229} 267}
230 268
231void show_stack(struct task_struct *task, unsigned long *esp) 269void show_stack(struct task_struct *task, unsigned long *esp)
232{ 270{
233 printk(" "); 271 printk(" ");
234 show_stack_log_lvl(task, esp, ""); 272 show_stack_log_lvl(task, NULL, esp, "");
235} 273}
236 274
237/* 275/*
@@ -241,7 +279,7 @@ void dump_stack(void)
241{ 279{
242 unsigned long stack; 280 unsigned long stack;
243 281
244 show_trace(current, &stack); 282 show_trace(current, NULL, &stack);
245} 283}
246 284
247EXPORT_SYMBOL(dump_stack); 285EXPORT_SYMBOL(dump_stack);
@@ -285,7 +323,7 @@ void show_registers(struct pt_regs *regs)
285 u8 __user *eip; 323 u8 __user *eip;
286 324
287 printk("\n" KERN_EMERG "Stack: "); 325 printk("\n" KERN_EMERG "Stack: ");
288 show_stack_log_lvl(NULL, (unsigned long *)esp, KERN_EMERG); 326 show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
289 327
290 printk(KERN_EMERG "Code: "); 328 printk(KERN_EMERG "Code: ");
291 329
@@ -1215,3 +1253,15 @@ static int __init kstack_setup(char *s)
1215 return 1; 1253 return 1;
1216} 1254}
1217__setup("kstack=", kstack_setup); 1255__setup("kstack=", kstack_setup);
1256
1257static int __init call_trace_setup(char *s)
1258{
1259 if (strcmp(s, "old") == 0)
1260 call_trace = -1;
1261 else if (strcmp(s, "both") == 0)
1262 call_trace = 0;
1263 else if (strcmp(s, "new") == 0)
1264 call_trace = 1;
1265 return 1;
1266}
1267__setup("call_trace=", call_trace_setup);
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 7512f39c9f25..2d4f1386e2b1 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -71,6 +71,15 @@ SECTIONS
71 .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } 71 .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) }
72 _edata = .; /* End of data section */ 72 _edata = .; /* End of data section */
73 73
74#ifdef CONFIG_STACK_UNWIND
75 . = ALIGN(4);
76 .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
77 __start_unwind = .;
78 *(.eh_frame)
79 __end_unwind = .;
80 }
81#endif
82
74 . = ALIGN(THREAD_SIZE); /* init_task */ 83 . = ALIGN(THREAD_SIZE); /* init_task */
75 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { 84 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
76 *(.data.init_task) 85 *(.data.init_task)
diff --git a/arch/i386/oprofile/op_model_athlon.c b/arch/i386/oprofile/op_model_athlon.c
index 3ad9a72a5036..693bdea4a52b 100644
--- a/arch/i386/oprofile/op_model_athlon.c
+++ b/arch/i386/oprofile/op_model_athlon.c
@@ -13,6 +13,7 @@
13#include <linux/oprofile.h> 13#include <linux/oprofile.h>
14#include <asm/ptrace.h> 14#include <asm/ptrace.h>
15#include <asm/msr.h> 15#include <asm/msr.h>
16#include <asm/nmi.h>
16 17
17#include "op_x86_model.h" 18#include "op_x86_model.h"
18#include "op_counter.h" 19#include "op_counter.h"
diff --git a/arch/i386/oprofile/op_model_p4.c b/arch/i386/oprofile/op_model_p4.c
index ac8a066035c2..7c61d357b82b 100644
--- a/arch/i386/oprofile/op_model_p4.c
+++ b/arch/i386/oprofile/op_model_p4.c
@@ -14,6 +14,7 @@
14#include <asm/ptrace.h> 14#include <asm/ptrace.h>
15#include <asm/fixmap.h> 15#include <asm/fixmap.h>
16#include <asm/apic.h> 16#include <asm/apic.h>
17#include <asm/nmi.h>
17 18
18#include "op_x86_model.h" 19#include "op_x86_model.h"
19#include "op_counter.h" 20#include "op_counter.h"
diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c
index d719015fc044..5c3ab4b027ad 100644
--- a/arch/i386/oprofile/op_model_ppro.c
+++ b/arch/i386/oprofile/op_model_ppro.c
@@ -14,6 +14,7 @@
14#include <asm/ptrace.h> 14#include <asm/ptrace.h>
15#include <asm/msr.h> 15#include <asm/msr.h>
16#include <asm/apic.h> 16#include <asm/apic.h>
17#include <asm/nmi.h>
17 18
18#include "op_x86_model.h" 19#include "op_x86_model.h"
19#include "op_counter.h" 20#include "op_counter.h"
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 355d57970ba3..b045c279136c 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -272,9 +272,9 @@ cpu_idle (void)
272 /* endless idle loop with no priority at all */ 272 /* endless idle loop with no priority at all */
273 while (1) { 273 while (1) {
274 if (can_do_pal_halt) 274 if (can_do_pal_halt)
275 clear_thread_flag(TIF_POLLING_NRFLAG); 275 current_thread_info()->status &= ~TS_POLLING;
276 else 276 else
277 set_thread_flag(TIF_POLLING_NRFLAG); 277 current_thread_info()->status |= TS_POLLING;
278 278
279 if (!need_resched()) { 279 if (!need_resched()) {
280 void (*idle)(void); 280 void (*idle)(void);
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index af44130f0d65..ccc4a7fb97a3 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -386,24 +386,45 @@ config HPET_EMULATE_RTC
386 bool "Provide RTC interrupt" 386 bool "Provide RTC interrupt"
387 depends on HPET_TIMER && RTC=y 387 depends on HPET_TIMER && RTC=y
388 388
389config GART_IOMMU 389# Mark as embedded because too many people got it wrong.
390 bool "K8 GART IOMMU support" 390# The code disables itself when not needed.
391config IOMMU
392 bool "IOMMU support" if EMBEDDED
391 default y 393 default y
392 select SWIOTLB 394 select SWIOTLB
393 select AGP 395 select AGP
394 depends on PCI 396 depends on PCI
395 help 397 help
396 Support for hardware IOMMU in AMD's Opteron/Athlon64 Processors 398 Support for full DMA access of devices with 32bit memory access only
397 and for the bounce buffering software IOMMU. 399 on systems with more than 3GB. This is usually needed for USB,
398 Needed to run systems with more than 3GB of memory properly with 400 sound, many IDE/SATA chipsets and some other devices.
399 32-bit PCI devices that do not support DAC (Double Address Cycle). 401 Provides a driver for the AMD Athlon64/Opteron/Turion/Sempron GART
400 The IOMMU can be turned off at runtime with the iommu=off parameter. 402 based IOMMU and a software bounce buffer based IOMMU used on Intel
401 Normally the kernel will take the right choice by itself. 403 systems and as fallback.
402 This option includes a driver for the AMD Opteron/Athlon64 IOMMU 404 The code is only active when needed (enough memory and limited
403 northbridge and a software emulation used on other systems without 405 device) unless CONFIG_IOMMU_DEBUG or iommu=force is specified
404 hardware IOMMU. If unsure, say Y. 406 too.
405 407
406# need this always selected by GART_IOMMU for the VIA workaround 408config CALGARY_IOMMU
409 bool "IBM Calgary IOMMU support"
410 default y
411 select SWIOTLB
412 depends on PCI && EXPERIMENTAL
413 help
414 Support for hardware IOMMUs in IBM's xSeries x366 and x460
415 systems. Needed to run systems with more than 3GB of memory
416 properly with 32-bit PCI devices that do not support DAC
417 (Double Address Cycle). Calgary also supports bus level
418 isolation, where all DMAs pass through the IOMMU. This
419 prevents them from going anywhere except their intended
420 destination. This catches hard-to-find kernel bugs and
421 mis-behaving drivers and devices that do not use the DMA-API
422 properly to set up their DMA buffers. The IOMMU can be
423 turned off at boot time with the iommu=off parameter.
424 Normally the kernel will make the right choice by itself.
425 If unsure, say Y.
426
427# need this always selected by IOMMU for the VIA workaround
407config SWIOTLB 428config SWIOTLB
408 bool 429 bool
409 430
@@ -501,6 +522,10 @@ config REORDER
501 optimal TLB usage. If you have pretty much any version of binutils, 522 optimal TLB usage. If you have pretty much any version of binutils,
502 this can increase your kernel build time by roughly one minute. 523 this can increase your kernel build time by roughly one minute.
503 524
525config K8_NB
526 def_bool y
527 depends on AGP_AMD64 || IOMMU || (PCI && NUMA)
528
504endmenu 529endmenu
505 530
506# 531#
diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug
index ea31b4c62105..1d92ab56c0f9 100644
--- a/arch/x86_64/Kconfig.debug
+++ b/arch/x86_64/Kconfig.debug
@@ -13,7 +13,7 @@ config DEBUG_RODATA
13 If in doubt, say "N". 13 If in doubt, say "N".
14 14
15config IOMMU_DEBUG 15config IOMMU_DEBUG
16 depends on GART_IOMMU && DEBUG_KERNEL 16 depends on IOMMU && DEBUG_KERNEL
17 bool "Enable IOMMU debugging" 17 bool "Enable IOMMU debugging"
18 help 18 help
19 Force the IOMMU to on even when you have less than 4GB of 19 Force the IOMMU to on even when you have less than 4GB of
@@ -35,6 +35,22 @@ config IOMMU_LEAK
35 Add a simple leak tracer to the IOMMU code. This is useful when you 35 Add a simple leak tracer to the IOMMU code. This is useful when you
36 are debugging a buggy device driver that leaks IOMMU mappings. 36 are debugging a buggy device driver that leaks IOMMU mappings.
37 37
38config DEBUG_STACKOVERFLOW
39 bool "Check for stack overflows"
40 depends on DEBUG_KERNEL
41 help
42 This option will cause messages to be printed if free stack space
43 drops below a certain limit.
44
45config DEBUG_STACK_USAGE
46 bool "Stack utilization instrumentation"
47 depends on DEBUG_KERNEL
48 help
49 Enables the display of the minimum amount of free stack which each
50 task has ever had available in the sysrq-T and sysrq-P debug output.
51
52 This option will slow down process creation somewhat.
53
38#config X86_REMOTE_DEBUG 54#config X86_REMOTE_DEBUG
39# bool "kgdb debugging stub" 55# bool "kgdb debugging stub"
40 56
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index e573e2ab5510..431bb4bc36cd 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -27,6 +27,7 @@ LDFLAGS_vmlinux :=
27CHECKFLAGS += -D__x86_64__ -m64 27CHECKFLAGS += -D__x86_64__ -m64
28 28
29cflags-y := 29cflags-y :=
30cflags-kernel-y :=
30cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) 31cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
31cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) 32cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
32cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) 33cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
@@ -35,7 +36,7 @@ cflags-y += -m64
35cflags-y += -mno-red-zone 36cflags-y += -mno-red-zone
36cflags-y += -mcmodel=kernel 37cflags-y += -mcmodel=kernel
37cflags-y += -pipe 38cflags-y += -pipe
38cflags-$(CONFIG_REORDER) += -ffunction-sections 39cflags-kernel-$(CONFIG_REORDER) += -ffunction-sections
39# this makes reading assembly source easier, but produces worse code 40# this makes reading assembly source easier, but produces worse code
40# actually it makes the kernel smaller too. 41# actually it makes the kernel smaller too.
41cflags-y += -fno-reorder-blocks 42cflags-y += -fno-reorder-blocks
@@ -55,6 +56,7 @@ cflags-y += $(call cc-option,-funit-at-a-time)
55cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) 56cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
56 57
57CFLAGS += $(cflags-y) 58CFLAGS += $(cflags-y)
59CFLAGS_KERNEL += $(cflags-kernel-y)
58AFLAGS += -m64 60AFLAGS += -m64
59 61
60head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o 62head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o
diff --git a/arch/x86_64/boot/Makefile b/arch/x86_64/boot/Makefile
index 43ee6c50c277..deb063e7762d 100644
--- a/arch/x86_64/boot/Makefile
+++ b/arch/x86_64/boot/Makefile
@@ -107,8 +107,13 @@ fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
107isoimage: $(BOOTIMAGE) 107isoimage: $(BOOTIMAGE)
108 -rm -rf $(obj)/isoimage 108 -rm -rf $(obj)/isoimage
109 mkdir $(obj)/isoimage 109 mkdir $(obj)/isoimage
110 cp `echo /usr/lib*/syslinux/isolinux.bin | awk '{ print $1; }'` \ 110 for i in lib lib64 share end ; do \
111 $(obj)/isoimage 111 if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \
112 cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \
113 break ; \
114 fi ; \
115 if [ $$i = end ] ; then exit 1 ; fi ; \
116 done
112 cp $(BOOTIMAGE) $(obj)/isoimage/linux 117 cp $(BOOTIMAGE) $(obj)/isoimage/linux
113 echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg 118 echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
114 if [ -f '$(FDINITRD)' ] ; then \ 119 if [ -f '$(FDINITRD)' ] ; then \
diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c
index cf4b88c416dc..3755b2e394d0 100644
--- a/arch/x86_64/boot/compressed/misc.c
+++ b/arch/x86_64/boot/compressed/misc.c
@@ -77,11 +77,11 @@ static void gzip_release(void **);
77 */ 77 */
78static unsigned char *real_mode; /* Pointer to real-mode data */ 78static unsigned char *real_mode; /* Pointer to real-mode data */
79 79
80#define EXT_MEM_K (*(unsigned short *)(real_mode + 0x2)) 80#define RM_EXT_MEM_K (*(unsigned short *)(real_mode + 0x2))
81#ifndef STANDARD_MEMORY_BIOS_CALL 81#ifndef STANDARD_MEMORY_BIOS_CALL
82#define ALT_MEM_K (*(unsigned long *)(real_mode + 0x1e0)) 82#define RM_ALT_MEM_K (*(unsigned long *)(real_mode + 0x1e0))
83#endif 83#endif
84#define SCREEN_INFO (*(struct screen_info *)(real_mode+0)) 84#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
85 85
86extern unsigned char input_data[]; 86extern unsigned char input_data[];
87extern int input_len; 87extern int input_len;
@@ -92,9 +92,9 @@ static unsigned long output_ptr = 0;
92 92
93static void *malloc(int size); 93static void *malloc(int size);
94static void free(void *where); 94static void free(void *where);
95 95
96void* memset(void* s, int c, unsigned n); 96static void *memset(void *s, int c, unsigned n);
97void* memcpy(void* dest, const void* src, unsigned n); 97static void *memcpy(void *dest, const void *src, unsigned n);
98 98
99static void putstr(const char *); 99static void putstr(const char *);
100 100
@@ -162,8 +162,8 @@ static void putstr(const char *s)
162 int x,y,pos; 162 int x,y,pos;
163 char c; 163 char c;
164 164
165 x = SCREEN_INFO.orig_x; 165 x = RM_SCREEN_INFO.orig_x;
166 y = SCREEN_INFO.orig_y; 166 y = RM_SCREEN_INFO.orig_y;
167 167
168 while ( ( c = *s++ ) != '\0' ) { 168 while ( ( c = *s++ ) != '\0' ) {
169 if ( c == '\n' ) { 169 if ( c == '\n' ) {
@@ -184,8 +184,8 @@ static void putstr(const char *s)
184 } 184 }
185 } 185 }
186 186
187 SCREEN_INFO.orig_x = x; 187 RM_SCREEN_INFO.orig_x = x;
188 SCREEN_INFO.orig_y = y; 188 RM_SCREEN_INFO.orig_y = y;
189 189
190 pos = (x + cols * y) * 2; /* Update cursor position */ 190 pos = (x + cols * y) * 2; /* Update cursor position */
191 outb_p(14, vidport); 191 outb_p(14, vidport);
@@ -194,7 +194,7 @@ static void putstr(const char *s)
194 outb_p(0xff & (pos >> 1), vidport+1); 194 outb_p(0xff & (pos >> 1), vidport+1);
195} 195}
196 196
197void* memset(void* s, int c, unsigned n) 197static void* memset(void* s, int c, unsigned n)
198{ 198{
199 int i; 199 int i;
200 char *ss = (char*)s; 200 char *ss = (char*)s;
@@ -203,7 +203,7 @@ void* memset(void* s, int c, unsigned n)
203 return s; 203 return s;
204} 204}
205 205
206void* memcpy(void* dest, const void* src, unsigned n) 206static void* memcpy(void* dest, const void* src, unsigned n)
207{ 207{
208 int i; 208 int i;
209 char *d = (char *)dest, *s = (char *)src; 209 char *d = (char *)dest, *s = (char *)src;
@@ -278,15 +278,15 @@ static void error(char *x)
278 putstr(x); 278 putstr(x);
279 putstr("\n\n -- System halted"); 279 putstr("\n\n -- System halted");
280 280
281 while(1); 281 while(1); /* Halt */
282} 282}
283 283
284void setup_normal_output_buffer(void) 284static void setup_normal_output_buffer(void)
285{ 285{
286#ifdef STANDARD_MEMORY_BIOS_CALL 286#ifdef STANDARD_MEMORY_BIOS_CALL
287 if (EXT_MEM_K < 1024) error("Less than 2MB of memory"); 287 if (RM_EXT_MEM_K < 1024) error("Less than 2MB of memory");
288#else 288#else
289 if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < 1024) error("Less than 2MB of memory"); 289 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
290#endif 290#endif
291 output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */ 291 output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
292 free_mem_end_ptr = (long)real_mode; 292 free_mem_end_ptr = (long)real_mode;
@@ -297,13 +297,13 @@ struct moveparams {
297 uch *high_buffer_start; int hcount; 297 uch *high_buffer_start; int hcount;
298}; 298};
299 299
300void setup_output_buffer_if_we_run_high(struct moveparams *mv) 300static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
301{ 301{
302 high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE); 302 high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE);
303#ifdef STANDARD_MEMORY_BIOS_CALL 303#ifdef STANDARD_MEMORY_BIOS_CALL
304 if (EXT_MEM_K < (3*1024)) error("Less than 4MB of memory"); 304 if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
305#else 305#else
306 if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory"); 306 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
307#endif 307#endif
308 mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START; 308 mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
309 low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX 309 low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
@@ -319,7 +319,7 @@ void setup_output_buffer_if_we_run_high(struct moveparams *mv)
319 mv->high_buffer_start = high_buffer_start; 319 mv->high_buffer_start = high_buffer_start;
320} 320}
321 321
322void close_output_buffer_if_we_run_high(struct moveparams *mv) 322static void close_output_buffer_if_we_run_high(struct moveparams *mv)
323{ 323{
324 if (bytes_out > low_buffer_size) { 324 if (bytes_out > low_buffer_size) {
325 mv->lcount = low_buffer_size; 325 mv->lcount = low_buffer_size;
@@ -335,7 +335,7 @@ int decompress_kernel(struct moveparams *mv, void *rmode)
335{ 335{
336 real_mode = rmode; 336 real_mode = rmode;
337 337
338 if (SCREEN_INFO.orig_video_mode == 7) { 338 if (RM_SCREEN_INFO.orig_video_mode == 7) {
339 vidmem = (char *) 0xb0000; 339 vidmem = (char *) 0xb0000;
340 vidport = 0x3b4; 340 vidport = 0x3b4;
341 } else { 341 } else {
@@ -343,8 +343,8 @@ int decompress_kernel(struct moveparams *mv, void *rmode)
343 vidport = 0x3d4; 343 vidport = 0x3d4;
344 } 344 }
345 345
346 lines = SCREEN_INFO.orig_video_lines; 346 lines = RM_SCREEN_INFO.orig_video_lines;
347 cols = SCREEN_INFO.orig_video_cols; 347 cols = RM_SCREEN_INFO.orig_video_cols;
348 348
349 if (free_mem_ptr < 0x100000) setup_normal_output_buffer(); 349 if (free_mem_ptr < 0x100000) setup_normal_output_buffer();
350 else setup_output_buffer_if_we_run_high(mv); 350 else setup_output_buffer_if_we_run_high(mv);
diff --git a/arch/x86_64/boot/tools/build.c b/arch/x86_64/boot/tools/build.c
index c44f5e2ec100..eae86691709a 100644
--- a/arch/x86_64/boot/tools/build.c
+++ b/arch/x86_64/boot/tools/build.c
@@ -149,10 +149,8 @@ int main(int argc, char ** argv)
149 sz = sb.st_size; 149 sz = sb.st_size;
150 fprintf (stderr, "System is %d kB\n", sz/1024); 150 fprintf (stderr, "System is %d kB\n", sz/1024);
151 sys_size = (sz + 15) / 16; 151 sys_size = (sz + 15) / 16;
152 /* 0x40000*16 = 4.0 MB, reasonable estimate for the current maximum */ 152 if (!is_big_kernel && sys_size > DEF_SYSSIZE)
153 if (sys_size > (is_big_kernel ? 0x40000 : DEF_SYSSIZE)) 153 die("System is too big. Try using bzImage or modules.");
154 die("System is too big. Try using %smodules.",
155 is_big_kernel ? "" : "bzImage or ");
156 while (sz > 0) { 154 while (sz > 0) {
157 int l, n; 155 int l, n;
158 156
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 69db0c0721d1..e69d403949c8 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.17-rc1-git11 3# Linux kernel version: 2.6.17-git6
4# Sun Apr 16 07:22:36 2006 4# Sat Jun 24 00:52:28 2006
5# 5#
6CONFIG_X86_64=y 6CONFIG_X86_64=y
7CONFIG_64BIT=y 7CONFIG_64BIT=y
@@ -42,7 +42,6 @@ CONFIG_IKCONFIG_PROC=y
42# CONFIG_RELAY is not set 42# CONFIG_RELAY is not set
43CONFIG_INITRAMFS_SOURCE="" 43CONFIG_INITRAMFS_SOURCE=""
44CONFIG_UID16=y 44CONFIG_UID16=y
45CONFIG_VM86=y
46CONFIG_CC_OPTIMIZE_FOR_SIZE=y 45CONFIG_CC_OPTIMIZE_FOR_SIZE=y
47# CONFIG_EMBEDDED is not set 46# CONFIG_EMBEDDED is not set
48CONFIG_KALLSYMS=y 47CONFIG_KALLSYMS=y
@@ -57,7 +56,6 @@ CONFIG_FUTEX=y
57CONFIG_EPOLL=y 56CONFIG_EPOLL=y
58CONFIG_SHMEM=y 57CONFIG_SHMEM=y
59CONFIG_SLAB=y 58CONFIG_SLAB=y
60CONFIG_DOUBLEFAULT=y
61# CONFIG_TINY_SHMEM is not set 59# CONFIG_TINY_SHMEM is not set
62CONFIG_BASE_SMALL=0 60CONFIG_BASE_SMALL=0
63# CONFIG_SLOB is not set 61# CONFIG_SLOB is not set
@@ -144,7 +142,8 @@ CONFIG_NR_CPUS=32
144CONFIG_HOTPLUG_CPU=y 142CONFIG_HOTPLUG_CPU=y
145CONFIG_HPET_TIMER=y 143CONFIG_HPET_TIMER=y
146CONFIG_HPET_EMULATE_RTC=y 144CONFIG_HPET_EMULATE_RTC=y
147CONFIG_GART_IOMMU=y 145CONFIG_IOMMU=y
146# CONFIG_CALGARY_IOMMU is not set
148CONFIG_SWIOTLB=y 147CONFIG_SWIOTLB=y
149CONFIG_X86_MCE=y 148CONFIG_X86_MCE=y
150CONFIG_X86_MCE_INTEL=y 149CONFIG_X86_MCE_INTEL=y
@@ -158,6 +157,7 @@ CONFIG_HZ_250=y
158# CONFIG_HZ_1000 is not set 157# CONFIG_HZ_1000 is not set
159CONFIG_HZ=250 158CONFIG_HZ=250
160# CONFIG_REORDER is not set 159# CONFIG_REORDER is not set
160CONFIG_K8_NB=y
161CONFIG_GENERIC_HARDIRQS=y 161CONFIG_GENERIC_HARDIRQS=y
162CONFIG_GENERIC_IRQ_PROBE=y 162CONFIG_GENERIC_IRQ_PROBE=y
163CONFIG_ISA_DMA_API=y 163CONFIG_ISA_DMA_API=y
@@ -293,6 +293,8 @@ CONFIG_IP_PNP_DHCP=y
293# CONFIG_INET_IPCOMP is not set 293# CONFIG_INET_IPCOMP is not set
294# CONFIG_INET_XFRM_TUNNEL is not set 294# CONFIG_INET_XFRM_TUNNEL is not set
295# CONFIG_INET_TUNNEL is not set 295# CONFIG_INET_TUNNEL is not set
296# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
297# CONFIG_INET_XFRM_MODE_TUNNEL is not set
296CONFIG_INET_DIAG=y 298CONFIG_INET_DIAG=y
297CONFIG_INET_TCP_DIAG=y 299CONFIG_INET_TCP_DIAG=y
298# CONFIG_TCP_CONG_ADVANCED is not set 300# CONFIG_TCP_CONG_ADVANCED is not set
@@ -305,7 +307,10 @@ CONFIG_IPV6=y
305# CONFIG_INET6_IPCOMP is not set 307# CONFIG_INET6_IPCOMP is not set
306# CONFIG_INET6_XFRM_TUNNEL is not set 308# CONFIG_INET6_XFRM_TUNNEL is not set
307# CONFIG_INET6_TUNNEL is not set 309# CONFIG_INET6_TUNNEL is not set
310# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set
311# CONFIG_INET6_XFRM_MODE_TUNNEL is not set
308# CONFIG_IPV6_TUNNEL is not set 312# CONFIG_IPV6_TUNNEL is not set
313# CONFIG_NETWORK_SECMARK is not set
309# CONFIG_NETFILTER is not set 314# CONFIG_NETFILTER is not set
310 315
311# 316#
@@ -344,6 +349,7 @@ CONFIG_IPV6=y
344# Network testing 349# Network testing
345# 350#
346# CONFIG_NET_PKTGEN is not set 351# CONFIG_NET_PKTGEN is not set
352# CONFIG_NET_TCPPROBE is not set
347# CONFIG_HAMRADIO is not set 353# CONFIG_HAMRADIO is not set
348# CONFIG_IRDA is not set 354# CONFIG_IRDA is not set
349# CONFIG_BT is not set 355# CONFIG_BT is not set
@@ -360,6 +366,7 @@ CONFIG_STANDALONE=y
360CONFIG_PREVENT_FIRMWARE_BUILD=y 366CONFIG_PREVENT_FIRMWARE_BUILD=y
361CONFIG_FW_LOADER=y 367CONFIG_FW_LOADER=y
362# CONFIG_DEBUG_DRIVER is not set 368# CONFIG_DEBUG_DRIVER is not set
369# CONFIG_SYS_HYPERVISOR is not set
363 370
364# 371#
365# Connector - unified userspace <-> kernelspace linker 372# Connector - unified userspace <-> kernelspace linker
@@ -526,6 +533,7 @@ CONFIG_SCSI_ATA_PIIX=y
526# CONFIG_SCSI_SATA_MV is not set 533# CONFIG_SCSI_SATA_MV is not set
527CONFIG_SCSI_SATA_NV=y 534CONFIG_SCSI_SATA_NV=y
528# CONFIG_SCSI_PDC_ADMA is not set 535# CONFIG_SCSI_PDC_ADMA is not set
536# CONFIG_SCSI_HPTIOP is not set
529# CONFIG_SCSI_SATA_QSTOR is not set 537# CONFIG_SCSI_SATA_QSTOR is not set
530# CONFIG_SCSI_SATA_PROMISE is not set 538# CONFIG_SCSI_SATA_PROMISE is not set
531# CONFIG_SCSI_SATA_SX4 is not set 539# CONFIG_SCSI_SATA_SX4 is not set
@@ -591,10 +599,7 @@ CONFIG_IEEE1394=y
591# 599#
592# Device Drivers 600# Device Drivers
593# 601#
594 602# CONFIG_IEEE1394_PCILYNX is not set
595#
596# Texas Instruments PCILynx requires I2C
597#
598CONFIG_IEEE1394_OHCI1394=y 603CONFIG_IEEE1394_OHCI1394=y
599 604
600# 605#
@@ -645,7 +650,16 @@ CONFIG_VORTEX=y
645# 650#
646# Tulip family network device support 651# Tulip family network device support
647# 652#
648# CONFIG_NET_TULIP is not set 653CONFIG_NET_TULIP=y
654# CONFIG_DE2104X is not set
655CONFIG_TULIP=y
656# CONFIG_TULIP_MWI is not set
657# CONFIG_TULIP_MMIO is not set
658# CONFIG_TULIP_NAPI is not set
659# CONFIG_DE4X5 is not set
660# CONFIG_WINBOND_840 is not set
661# CONFIG_DM9102 is not set
662# CONFIG_ULI526X is not set
649# CONFIG_HP100 is not set 663# CONFIG_HP100 is not set
650CONFIG_NET_PCI=y 664CONFIG_NET_PCI=y
651# CONFIG_PCNET32 is not set 665# CONFIG_PCNET32 is not set
@@ -697,6 +711,7 @@ CONFIG_TIGON3=y
697# CONFIG_IXGB is not set 711# CONFIG_IXGB is not set
698CONFIG_S2IO=m 712CONFIG_S2IO=m
699# CONFIG_S2IO_NAPI is not set 713# CONFIG_S2IO_NAPI is not set
714# CONFIG_MYRI10GE is not set
700 715
701# 716#
702# Token Ring devices 717# Token Ring devices
@@ -887,7 +902,56 @@ CONFIG_HPET_MMAP=y
887# 902#
888# I2C support 903# I2C support
889# 904#
890# CONFIG_I2C is not set 905CONFIG_I2C=m
906CONFIG_I2C_CHARDEV=m
907
908#
909# I2C Algorithms
910#
911# CONFIG_I2C_ALGOBIT is not set
912# CONFIG_I2C_ALGOPCF is not set
913# CONFIG_I2C_ALGOPCA is not set
914
915#
916# I2C Hardware Bus support
917#
918# CONFIG_I2C_ALI1535 is not set
919# CONFIG_I2C_ALI1563 is not set
920# CONFIG_I2C_ALI15X3 is not set
921# CONFIG_I2C_AMD756 is not set
922# CONFIG_I2C_AMD8111 is not set
923# CONFIG_I2C_I801 is not set
924# CONFIG_I2C_I810 is not set
925# CONFIG_I2C_PIIX4 is not set
926CONFIG_I2C_ISA=m
927# CONFIG_I2C_NFORCE2 is not set
928# CONFIG_I2C_OCORES is not set
929# CONFIG_I2C_PARPORT_LIGHT is not set
930# CONFIG_I2C_PROSAVAGE is not set
931# CONFIG_I2C_SAVAGE4 is not set
932# CONFIG_I2C_SIS5595 is not set
933# CONFIG_I2C_SIS630 is not set
934# CONFIG_I2C_SIS96X is not set
935# CONFIG_I2C_STUB is not set
936# CONFIG_I2C_VIA is not set
937# CONFIG_I2C_VIAPRO is not set
938# CONFIG_I2C_VOODOO3 is not set
939# CONFIG_I2C_PCA_ISA is not set
940
941#
942# Miscellaneous I2C Chip support
943#
944# CONFIG_SENSORS_DS1337 is not set
945# CONFIG_SENSORS_DS1374 is not set
946# CONFIG_SENSORS_EEPROM is not set
947# CONFIG_SENSORS_PCF8574 is not set
948# CONFIG_SENSORS_PCA9539 is not set
949# CONFIG_SENSORS_PCF8591 is not set
950# CONFIG_SENSORS_MAX6875 is not set
951# CONFIG_I2C_DEBUG_CORE is not set
952# CONFIG_I2C_DEBUG_ALGO is not set
953# CONFIG_I2C_DEBUG_BUS is not set
954# CONFIG_I2C_DEBUG_CHIP is not set
891 955
892# 956#
893# SPI support 957# SPI support
@@ -898,14 +962,51 @@ CONFIG_HPET_MMAP=y
898# 962#
899# Dallas's 1-wire bus 963# Dallas's 1-wire bus
900# 964#
901# CONFIG_W1 is not set
902 965
903# 966#
904# Hardware Monitoring support 967# Hardware Monitoring support
905# 968#
906CONFIG_HWMON=y 969CONFIG_HWMON=y
907# CONFIG_HWMON_VID is not set 970# CONFIG_HWMON_VID is not set
971# CONFIG_SENSORS_ABITUGURU is not set
972# CONFIG_SENSORS_ADM1021 is not set
973# CONFIG_SENSORS_ADM1025 is not set
974# CONFIG_SENSORS_ADM1026 is not set
975# CONFIG_SENSORS_ADM1031 is not set
976# CONFIG_SENSORS_ADM9240 is not set
977# CONFIG_SENSORS_ASB100 is not set
978# CONFIG_SENSORS_ATXP1 is not set
979# CONFIG_SENSORS_DS1621 is not set
908# CONFIG_SENSORS_F71805F is not set 980# CONFIG_SENSORS_F71805F is not set
981# CONFIG_SENSORS_FSCHER is not set
982# CONFIG_SENSORS_FSCPOS is not set
983# CONFIG_SENSORS_GL518SM is not set
984# CONFIG_SENSORS_GL520SM is not set
985# CONFIG_SENSORS_IT87 is not set
986# CONFIG_SENSORS_LM63 is not set
987# CONFIG_SENSORS_LM75 is not set
988# CONFIG_SENSORS_LM77 is not set
989# CONFIG_SENSORS_LM78 is not set
990# CONFIG_SENSORS_LM80 is not set
991# CONFIG_SENSORS_LM83 is not set
992# CONFIG_SENSORS_LM85 is not set
993# CONFIG_SENSORS_LM87 is not set
994# CONFIG_SENSORS_LM90 is not set
995# CONFIG_SENSORS_LM92 is not set
996# CONFIG_SENSORS_MAX1619 is not set
997# CONFIG_SENSORS_PC87360 is not set
998# CONFIG_SENSORS_SIS5595 is not set
999# CONFIG_SENSORS_SMSC47M1 is not set
1000# CONFIG_SENSORS_SMSC47M192 is not set
1001CONFIG_SENSORS_SMSC47B397=m
1002# CONFIG_SENSORS_VIA686A is not set
1003# CONFIG_SENSORS_VT8231 is not set
1004# CONFIG_SENSORS_W83781D is not set
1005# CONFIG_SENSORS_W83791D is not set
1006# CONFIG_SENSORS_W83792D is not set
1007# CONFIG_SENSORS_W83L785TS is not set
1008# CONFIG_SENSORS_W83627HF is not set
1009# CONFIG_SENSORS_W83627EHF is not set
909# CONFIG_SENSORS_HDAPS is not set 1010# CONFIG_SENSORS_HDAPS is not set
910# CONFIG_HWMON_DEBUG_CHIP is not set 1011# CONFIG_HWMON_DEBUG_CHIP is not set
911 1012
@@ -918,6 +1019,7 @@ CONFIG_HWMON=y
918# Multimedia devices 1019# Multimedia devices
919# 1020#
920# CONFIG_VIDEO_DEV is not set 1021# CONFIG_VIDEO_DEV is not set
1022CONFIG_VIDEO_V4L2=y
921 1023
922# 1024#
923# Digital Video Broadcasting Devices 1025# Digital Video Broadcasting Devices
@@ -953,28 +1055,17 @@ CONFIG_SOUND=y
953# Open Sound System 1055# Open Sound System
954# 1056#
955CONFIG_SOUND_PRIME=y 1057CONFIG_SOUND_PRIME=y
956CONFIG_OBSOLETE_OSS_DRIVER=y
957# CONFIG_SOUND_BT878 is not set 1058# CONFIG_SOUND_BT878 is not set
958# CONFIG_SOUND_CMPCI is not set
959# CONFIG_SOUND_EMU10K1 is not set 1059# CONFIG_SOUND_EMU10K1 is not set
960# CONFIG_SOUND_FUSION is not set 1060# CONFIG_SOUND_FUSION is not set
961# CONFIG_SOUND_CS4281 is not set
962# CONFIG_SOUND_ES1370 is not set
963# CONFIG_SOUND_ES1371 is not set 1061# CONFIG_SOUND_ES1371 is not set
964# CONFIG_SOUND_ESSSOLO1 is not set
965# CONFIG_SOUND_MAESTRO is not set
966# CONFIG_SOUND_MAESTRO3 is not set
967CONFIG_SOUND_ICH=y 1062CONFIG_SOUND_ICH=y
968# CONFIG_SOUND_SONICVIBES is not set
969# CONFIG_SOUND_TRIDENT is not set 1063# CONFIG_SOUND_TRIDENT is not set
970# CONFIG_SOUND_MSNDCLAS is not set 1064# CONFIG_SOUND_MSNDCLAS is not set
971# CONFIG_SOUND_MSNDPIN is not set 1065# CONFIG_SOUND_MSNDPIN is not set
972# CONFIG_SOUND_VIA82CXXX is not set 1066# CONFIG_SOUND_VIA82CXXX is not set
973# CONFIG_SOUND_OSS is not set 1067# CONFIG_SOUND_OSS is not set
974# CONFIG_SOUND_ALI5455 is not set 1068# CONFIG_SOUND_TVMIXER is not set
975# CONFIG_SOUND_FORTE is not set
976# CONFIG_SOUND_RME96XX is not set
977# CONFIG_SOUND_AD1980 is not set
978 1069
979# 1070#
980# USB support 1071# USB support
@@ -1000,6 +1091,7 @@ CONFIG_USB_DEVICEFS=y
1000CONFIG_USB_EHCI_HCD=y 1091CONFIG_USB_EHCI_HCD=y
1001# CONFIG_USB_EHCI_SPLIT_ISO is not set 1092# CONFIG_USB_EHCI_SPLIT_ISO is not set
1002# CONFIG_USB_EHCI_ROOT_HUB_TT is not set 1093# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
1094# CONFIG_USB_EHCI_TT_NEWSCHED is not set
1003# CONFIG_USB_ISP116X_HCD is not set 1095# CONFIG_USB_ISP116X_HCD is not set
1004CONFIG_USB_OHCI_HCD=y 1096CONFIG_USB_OHCI_HCD=y
1005# CONFIG_USB_OHCI_BIG_ENDIAN is not set 1097# CONFIG_USB_OHCI_BIG_ENDIAN is not set
@@ -1089,10 +1181,12 @@ CONFIG_USB_MON=y
1089# CONFIG_USB_LEGOTOWER is not set 1181# CONFIG_USB_LEGOTOWER is not set
1090# CONFIG_USB_LCD is not set 1182# CONFIG_USB_LCD is not set
1091# CONFIG_USB_LED is not set 1183# CONFIG_USB_LED is not set
1184# CONFIG_USB_CY7C63 is not set
1092# CONFIG_USB_CYTHERM is not set 1185# CONFIG_USB_CYTHERM is not set
1093# CONFIG_USB_PHIDGETKIT is not set 1186# CONFIG_USB_PHIDGETKIT is not set
1094# CONFIG_USB_PHIDGETSERVO is not set 1187# CONFIG_USB_PHIDGETSERVO is not set
1095# CONFIG_USB_IDMOUSE is not set 1188# CONFIG_USB_IDMOUSE is not set
1189# CONFIG_USB_APPLEDISPLAY is not set
1096# CONFIG_USB_SISUSBVGA is not set 1190# CONFIG_USB_SISUSBVGA is not set
1097# CONFIG_USB_LD is not set 1191# CONFIG_USB_LD is not set
1098# CONFIG_USB_TEST is not set 1192# CONFIG_USB_TEST is not set
@@ -1141,6 +1235,19 @@ CONFIG_USB_MON=y
1141# CONFIG_RTC_CLASS is not set 1235# CONFIG_RTC_CLASS is not set
1142 1236
1143# 1237#
1238# DMA Engine support
1239#
1240# CONFIG_DMA_ENGINE is not set
1241
1242#
1243# DMA Clients
1244#
1245
1246#
1247# DMA Devices
1248#
1249
1250#
1144# Firmware Drivers 1251# Firmware Drivers
1145# 1252#
1146# CONFIG_EDD is not set 1253# CONFIG_EDD is not set
@@ -1175,6 +1282,7 @@ CONFIG_FS_POSIX_ACL=y
1175# CONFIG_MINIX_FS is not set 1282# CONFIG_MINIX_FS is not set
1176# CONFIG_ROMFS_FS is not set 1283# CONFIG_ROMFS_FS is not set
1177CONFIG_INOTIFY=y 1284CONFIG_INOTIFY=y
1285CONFIG_INOTIFY_USER=y
1178# CONFIG_QUOTA is not set 1286# CONFIG_QUOTA is not set
1179CONFIG_DNOTIFY=y 1287CONFIG_DNOTIFY=y
1180CONFIG_AUTOFS_FS=y 1288CONFIG_AUTOFS_FS=y
@@ -1331,7 +1439,8 @@ CONFIG_DETECT_SOFTLOCKUP=y
1331CONFIG_DEBUG_FS=y 1439CONFIG_DEBUG_FS=y
1332# CONFIG_DEBUG_VM is not set 1440# CONFIG_DEBUG_VM is not set
1333# CONFIG_FRAME_POINTER is not set 1441# CONFIG_FRAME_POINTER is not set
1334# CONFIG_UNWIND_INFO is not set 1442CONFIG_UNWIND_INFO=y
1443CONFIG_STACK_UNWIND=y
1335# CONFIG_FORCED_INLINING is not set 1444# CONFIG_FORCED_INLINING is not set
1336# CONFIG_RCU_TORTURE_TEST is not set 1445# CONFIG_RCU_TORTURE_TEST is not set
1337# CONFIG_DEBUG_RODATA is not set 1446# CONFIG_DEBUG_RODATA is not set
diff --git a/arch/x86_64/ia32/fpu32.c b/arch/x86_64/ia32/fpu32.c
index 1c23095f1813..2c8209a3605a 100644
--- a/arch/x86_64/ia32/fpu32.c
+++ b/arch/x86_64/ia32/fpu32.c
@@ -2,7 +2,6 @@
2 * Copyright 2002 Andi Kleen, SuSE Labs. 2 * Copyright 2002 Andi Kleen, SuSE Labs.
3 * FXSAVE<->i387 conversion support. Based on code by Gareth Hughes. 3 * FXSAVE<->i387 conversion support. Based on code by Gareth Hughes.
4 * This is used for ptrace, signals and coredumps in 32bit emulation. 4 * This is used for ptrace, signals and coredumps in 32bit emulation.
5 * $Id: fpu32.c,v 1.1 2002/03/21 14:16:32 ak Exp $
6 */ 5 */
7 6
8#include <linux/sched.h> 7#include <linux/sched.h>
diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c
index e0a92439f634..25e5ca22204c 100644
--- a/arch/x86_64/ia32/ia32_signal.c
+++ b/arch/x86_64/ia32/ia32_signal.c
@@ -6,8 +6,6 @@
6 * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson 6 * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
7 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes 7 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
8 * 2000-12-* x86-64 compatibility mode signal handling by Andi Kleen 8 * 2000-12-* x86-64 compatibility mode signal handling by Andi Kleen
9 *
10 * $Id: ia32_signal.c,v 1.22 2002/07/29 10:34:03 ak Exp $
11 */ 9 */
12 10
13#include <linux/sched.h> 11#include <linux/sched.h>
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index 4ec594ab1a98..c536fa98ea37 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -155,6 +155,7 @@ sysenter_tracesys:
155 .previous 155 .previous
156 jmp sysenter_do_call 156 jmp sysenter_do_call
157 CFI_ENDPROC 157 CFI_ENDPROC
158ENDPROC(ia32_sysenter_target)
158 159
159/* 160/*
160 * 32bit SYSCALL instruction entry. 161 * 32bit SYSCALL instruction entry.
@@ -178,7 +179,7 @@ sysenter_tracesys:
178 */ 179 */
179ENTRY(ia32_cstar_target) 180ENTRY(ia32_cstar_target)
180 CFI_STARTPROC32 simple 181 CFI_STARTPROC32 simple
181 CFI_DEF_CFA rsp,0 182 CFI_DEF_CFA rsp,PDA_STACKOFFSET
182 CFI_REGISTER rip,rcx 183 CFI_REGISTER rip,rcx
183 /*CFI_REGISTER rflags,r11*/ 184 /*CFI_REGISTER rflags,r11*/
184 swapgs 185 swapgs
@@ -249,6 +250,7 @@ cstar_tracesys:
249 .quad 1b,ia32_badarg 250 .quad 1b,ia32_badarg
250 .previous 251 .previous
251 jmp cstar_do_call 252 jmp cstar_do_call
253END(ia32_cstar_target)
252 254
253ia32_badarg: 255ia32_badarg:
254 movq $-EFAULT,%rax 256 movq $-EFAULT,%rax
@@ -314,16 +316,13 @@ ia32_tracesys:
314 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ 316 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
315 RESTORE_REST 317 RESTORE_REST
316 jmp ia32_do_syscall 318 jmp ia32_do_syscall
319END(ia32_syscall)
317 320
318ia32_badsys: 321ia32_badsys:
319 movq $0,ORIG_RAX-ARGOFFSET(%rsp) 322 movq $0,ORIG_RAX-ARGOFFSET(%rsp)
320 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 323 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
321 jmp int_ret_from_sys_call 324 jmp int_ret_from_sys_call
322 325
323ni_syscall:
324 movq %rax,%rdi
325 jmp sys32_ni_syscall
326
327quiet_ni_syscall: 326quiet_ni_syscall:
328 movq $-ENOSYS,%rax 327 movq $-ENOSYS,%rax
329 ret 328 ret
@@ -370,10 +369,10 @@ ENTRY(ia32_ptregs_common)
370 RESTORE_REST 369 RESTORE_REST
371 jmp ia32_sysret /* misbalances the return cache */ 370 jmp ia32_sysret /* misbalances the return cache */
372 CFI_ENDPROC 371 CFI_ENDPROC
372END(ia32_ptregs_common)
373 373
374 .section .rodata,"a" 374 .section .rodata,"a"
375 .align 8 375 .align 8
376 .globl ia32_sys_call_table
377ia32_sys_call_table: 376ia32_sys_call_table:
378 .quad sys_restart_syscall 377 .quad sys_restart_syscall
379 .quad sys_exit 378 .quad sys_exit
diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c
index 23a4515a73b4..a590b7a0d92d 100644
--- a/arch/x86_64/ia32/ptrace32.c
+++ b/arch/x86_64/ia32/ptrace32.c
@@ -7,8 +7,6 @@
7 * 7 *
8 * This allows to access 64bit processes too; but there is no way to see the extended 8 * This allows to access 64bit processes too; but there is no way to see the extended
9 * register contents. 9 * register contents.
10 *
11 * $Id: ptrace32.c,v 1.16 2003/03/14 16:06:35 ak Exp $
12 */ 10 */
13 11
14#include <linux/kernel.h> 12#include <linux/kernel.h>
@@ -27,6 +25,7 @@
27#include <asm/debugreg.h> 25#include <asm/debugreg.h>
28#include <asm/i387.h> 26#include <asm/i387.h>
29#include <asm/fpu32.h> 27#include <asm/fpu32.h>
28#include <asm/ia32.h>
30 29
31/* 30/*
32 * Determines which flags the user has access to [1 = access, 0 = no access]. 31 * Determines which flags the user has access to [1 = access, 0 = no access].
@@ -199,6 +198,24 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
199 198
200#undef R32 199#undef R32
201 200
201static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data)
202{
203 int ret;
204 compat_siginfo_t *si32 = (compat_siginfo_t *)compat_ptr(data);
205 siginfo_t *si = compat_alloc_user_space(sizeof(siginfo_t));
206 if (request == PTRACE_SETSIGINFO) {
207 ret = copy_siginfo_from_user32(si, si32);
208 if (ret)
209 return ret;
210 }
211 ret = sys_ptrace(request, pid, addr, (unsigned long)si);
212 if (ret)
213 return ret;
214 if (request == PTRACE_GETSIGINFO)
215 ret = copy_siginfo_to_user32(si32, si);
216 return ret;
217}
218
202asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) 219asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
203{ 220{
204 struct task_struct *child; 221 struct task_struct *child;
@@ -208,9 +225,19 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
208 __u32 val; 225 __u32 val;
209 226
210 switch (request) { 227 switch (request) {
211 default: 228 case PTRACE_TRACEME:
229 case PTRACE_ATTACH:
230 case PTRACE_KILL:
231 case PTRACE_CONT:
232 case PTRACE_SINGLESTEP:
233 case PTRACE_DETACH:
234 case PTRACE_SYSCALL:
235 case PTRACE_SETOPTIONS:
212 return sys_ptrace(request, pid, addr, data); 236 return sys_ptrace(request, pid, addr, data);
213 237
238 default:
239 return -EINVAL;
240
214 case PTRACE_PEEKTEXT: 241 case PTRACE_PEEKTEXT:
215 case PTRACE_PEEKDATA: 242 case PTRACE_PEEKDATA:
216 case PTRACE_POKEDATA: 243 case PTRACE_POKEDATA:
@@ -225,10 +252,11 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
225 case PTRACE_GETFPXREGS: 252 case PTRACE_GETFPXREGS:
226 case PTRACE_GETEVENTMSG: 253 case PTRACE_GETEVENTMSG:
227 break; 254 break;
228 }
229 255
230 if (request == PTRACE_TRACEME) 256 case PTRACE_SETSIGINFO:
231 return ptrace_traceme(); 257 case PTRACE_GETSIGINFO:
258 return ptrace32_siginfo(request, pid, addr, data);
259 }
232 260
233 child = ptrace_get_task_struct(pid); 261 child = ptrace_get_task_struct(pid);
234 if (IS_ERR(child)) 262 if (IS_ERR(child))
@@ -349,8 +377,7 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
349 break; 377 break;
350 378
351 default: 379 default:
352 ret = -EINVAL; 380 BUG();
353 break;
354 } 381 }
355 382
356 out: 383 out:
diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c
index f182b20858e2..dc88154c412b 100644
--- a/arch/x86_64/ia32/sys_ia32.c
+++ b/arch/x86_64/ia32/sys_ia32.c
@@ -508,19 +508,6 @@ sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, int options)
508 return compat_sys_wait4(pid, stat_addr, options, NULL); 508 return compat_sys_wait4(pid, stat_addr, options, NULL);
509} 509}
510 510
511int sys32_ni_syscall(int call)
512{
513 struct task_struct *me = current;
514 static char lastcomm[sizeof(me->comm)];
515
516 if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
517 printk(KERN_INFO "IA32 syscall %d from %s not implemented\n",
518 call, me->comm);
519 strncpy(lastcomm, me->comm, sizeof(lastcomm));
520 }
521 return -ENOSYS;
522}
523
524/* 32-bit timeval and related flotsam. */ 511/* 32-bit timeval and related flotsam. */
525 512
526asmlinkage long 513asmlinkage long
@@ -916,7 +903,7 @@ long sys32_vm86_warning(void)
916 struct task_struct *me = current; 903 struct task_struct *me = current;
917 static char lastcomm[sizeof(me->comm)]; 904 static char lastcomm[sizeof(me->comm)];
918 if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) { 905 if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
919 printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n", 906 compat_printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
920 me->comm); 907 me->comm);
921 strncpy(lastcomm, me->comm, sizeof(lastcomm)); 908 strncpy(lastcomm, me->comm, sizeof(lastcomm));
922 } 909 }
@@ -929,13 +916,3 @@ long sys32_lookup_dcookie(u32 addr_low, u32 addr_high,
929 return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len); 916 return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len);
930} 917}
931 918
932static int __init ia32_init (void)
933{
934 printk("IA32 emulation $Id: sys_ia32.c,v 1.32 2002/03/24 13:02:28 ak Exp $\n");
935 return 0;
936}
937
938__initcall(ia32_init);
939
940extern unsigned long ia32_sys_call_table[];
941EXPORT_SYMBOL(ia32_sys_call_table);
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 059c88313f4e..aeb9c560be88 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ 8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
9 x8664_ksyms.o i387.o syscall.o vsyscall.o \ 9 x8664_ksyms.o i387.o syscall.o vsyscall.o \
10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ 10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
11 pci-dma.o pci-nommu.o 11 pci-dma.o pci-nommu.o alternative.o
12 12
13obj-$(CONFIG_X86_MCE) += mce.o 13obj-$(CONFIG_X86_MCE) += mce.o
14obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o 14obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
@@ -28,11 +28,13 @@ obj-$(CONFIG_PM) += suspend.o
28obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o 28obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o
29obj-$(CONFIG_CPU_FREQ) += cpufreq/ 29obj-$(CONFIG_CPU_FREQ) += cpufreq/
30obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 30obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
31obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o 31obj-$(CONFIG_IOMMU) += pci-gart.o aperture.o
32obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary.o tce.o
32obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o 33obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
33obj-$(CONFIG_KPROBES) += kprobes.o 34obj-$(CONFIG_KPROBES) += kprobes.o
34obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o 35obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
35obj-$(CONFIG_X86_VSMP) += vsmp.o 36obj-$(CONFIG_X86_VSMP) += vsmp.o
37obj-$(CONFIG_K8_NB) += k8.o
36 38
37obj-$(CONFIG_MODULES) += module.o 39obj-$(CONFIG_MODULES) += module.o
38 40
@@ -49,3 +51,5 @@ intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o
49quirks-y += ../../i386/kernel/quirks.o 51quirks-y += ../../i386/kernel/quirks.o
50i8237-y += ../../i386/kernel/i8237.o 52i8237-y += ../../i386/kernel/i8237.o
51msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o 53msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
54alternative-y += ../../i386/kernel/alternative.o
55
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index 70b9d21ed675..a195ef06ec55 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -8,7 +8,6 @@
8 * because only the bootmem allocator can allocate 32+MB. 8 * because only the bootmem allocator can allocate 32+MB.
9 * 9 *
10 * Copyright 2002 Andi Kleen, SuSE Labs. 10 * Copyright 2002 Andi Kleen, SuSE Labs.
11 * $Id: aperture.c,v 1.7 2003/08/01 03:36:18 ak Exp $
12 */ 11 */
13#include <linux/config.h> 12#include <linux/config.h>
14#include <linux/kernel.h> 13#include <linux/kernel.h>
@@ -24,6 +23,7 @@
24#include <asm/proto.h> 23#include <asm/proto.h>
25#include <asm/pci-direct.h> 24#include <asm/pci-direct.h>
26#include <asm/dma.h> 25#include <asm/dma.h>
26#include <asm/k8.h>
27 27
28int iommu_aperture; 28int iommu_aperture;
29int iommu_aperture_disabled __initdata = 0; 29int iommu_aperture_disabled __initdata = 0;
@@ -37,8 +37,6 @@ int fix_aperture __initdata = 1;
37/* This code runs before the PCI subsystem is initialized, so just 37/* This code runs before the PCI subsystem is initialized, so just
38 access the northbridge directly. */ 38 access the northbridge directly. */
39 39
40#define NB_ID_3 (PCI_VENDOR_ID_AMD | (0x1103<<16))
41
42static u32 __init allocate_aperture(void) 40static u32 __init allocate_aperture(void)
43{ 41{
44 pg_data_t *nd0 = NODE_DATA(0); 42 pg_data_t *nd0 = NODE_DATA(0);
@@ -68,20 +66,20 @@ static u32 __init allocate_aperture(void)
68 return (u32)__pa(p); 66 return (u32)__pa(p);
69} 67}
70 68
71static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size) 69static int __init aperture_valid(u64 aper_base, u32 aper_size)
72{ 70{
73 if (!aper_base) 71 if (!aper_base)
74 return 0; 72 return 0;
75 if (aper_size < 64*1024*1024) { 73 if (aper_size < 64*1024*1024) {
76 printk("Aperture from %s too small (%d MB)\n", name, aper_size>>20); 74 printk("Aperture too small (%d MB)\n", aper_size>>20);
77 return 0; 75 return 0;
78 } 76 }
79 if (aper_base + aper_size >= 0xffffffff) { 77 if (aper_base + aper_size >= 0xffffffff) {
80 printk("Aperture from %s beyond 4GB. Ignoring.\n",name); 78 printk("Aperture beyond 4GB. Ignoring.\n");
81 return 0; 79 return 0;
82 } 80 }
83 if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { 81 if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
84 printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name); 82 printk("Aperture pointing to e820 RAM. Ignoring.\n");
85 return 0; 83 return 0;
86 } 84 }
87 return 1; 85 return 1;
@@ -140,7 +138,7 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
140 printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", 138 printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n",
141 aper, 32 << *order, apsizereg); 139 aper, 32 << *order, apsizereg);
142 140
143 if (!aperture_valid("AGP bridge", aper, (32*1024*1024) << *order)) 141 if (!aperture_valid(aper, (32*1024*1024) << *order))
144 return 0; 142 return 0;
145 return (u32)aper; 143 return (u32)aper;
146} 144}
@@ -208,10 +206,10 @@ void __init iommu_hole_init(void)
208 206
209 fix = 0; 207 fix = 0;
210 for (num = 24; num < 32; num++) { 208 for (num = 24; num < 32; num++) {
211 char name[30]; 209 if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
212 if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) 210 continue;
213 continue;
214 211
212 iommu_detected = 1;
215 iommu_aperture = 1; 213 iommu_aperture = 1;
216 214
217 aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; 215 aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7;
@@ -222,9 +220,7 @@ void __init iommu_hole_init(void)
222 printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, 220 printk("CPU %d: aperture @ %Lx size %u MB\n", num-24,
223 aper_base, aper_size>>20); 221 aper_base, aper_size>>20);
224 222
225 sprintf(name, "northbridge cpu %d", num-24); 223 if (!aperture_valid(aper_base, aper_size)) {
226
227 if (!aperture_valid(name, aper_base, aper_size)) {
228 fix = 1; 224 fix = 1;
229 break; 225 break;
230 } 226 }
@@ -273,7 +269,7 @@ void __init iommu_hole_init(void)
273 269
274 /* Fix up the north bridges */ 270 /* Fix up the north bridges */
275 for (num = 24; num < 32; num++) { 271 for (num = 24; num < 32; num++) {
276 if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) 272 if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
277 continue; 273 continue;
278 274
279 /* Don't enable translation yet. That is done later. 275 /* Don't enable translation yet. That is done later.
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 29ef99001e05..b2ead91df218 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -100,7 +100,7 @@ void clear_local_APIC(void)
100 maxlvt = get_maxlvt(); 100 maxlvt = get_maxlvt();
101 101
102 /* 102 /*
103 * Masking an LVT entry on a P6 can trigger a local APIC error 103 * Masking an LVT entry can trigger a local APIC error
104 * if the vector is zero. Mask LVTERR first to prevent this. 104 * if the vector is zero. Mask LVTERR first to prevent this.
105 */ 105 */
106 if (maxlvt >= 3) { 106 if (maxlvt >= 3) {
@@ -851,7 +851,18 @@ void disable_APIC_timer(void)
851 unsigned long v; 851 unsigned long v;
852 852
853 v = apic_read(APIC_LVTT); 853 v = apic_read(APIC_LVTT);
854 apic_write(APIC_LVTT, v | APIC_LVT_MASKED); 854 /*
855 * When an illegal vector value (0-15) is written to an LVT
856 * entry and delivery mode is Fixed, the APIC may signal an
857 * illegal vector error, with out regard to whether the mask
858 * bit is set or whether an interrupt is actually seen on input.
859 *
860 * Boot sequence might call this function when the LVTT has
861 * '0' vector value. So make sure vector field is set to
862 * valid value.
863 */
864 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
865 apic_write(APIC_LVTT, v);
855 } 866 }
856} 867}
857 868
@@ -909,15 +920,13 @@ int setup_profiling_timer(unsigned int multiplier)
909 return -EINVAL; 920 return -EINVAL;
910} 921}
911 922
912#ifdef CONFIG_X86_MCE_AMD 923void setup_APIC_extened_lvt(unsigned char lvt_off, unsigned char vector,
913void setup_threshold_lvt(unsigned long lvt_off) 924 unsigned char msg_type, unsigned char mask)
914{ 925{
915 unsigned int v = 0; 926 unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE;
916 unsigned long reg = (lvt_off << 4) + 0x500; 927 unsigned int v = (mask << 16) | (msg_type << 8) | vector;
917 v |= THRESHOLD_APIC_VECTOR;
918 apic_write(reg, v); 928 apic_write(reg, v);
919} 929}
920#endif /* CONFIG_X86_MCE_AMD */
921 930
922#undef APIC_DIVISOR 931#undef APIC_DIVISOR
923 932
@@ -983,7 +992,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
983} 992}
984 993
985/* 994/*
986 * oem_force_hpet_timer -- force HPET mode for some boxes. 995 * apic_is_clustered_box() -- Check if we can expect good TSC
987 * 996 *
988 * Thus far, the major user of this is IBM's Summit2 series: 997 * Thus far, the major user of this is IBM's Summit2 series:
989 * 998 *
@@ -991,7 +1000,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
991 * multi-chassis. Use available data to take a good guess. 1000 * multi-chassis. Use available data to take a good guess.
992 * If in doubt, go HPET. 1001 * If in doubt, go HPET.
993 */ 1002 */
994__cpuinit int oem_force_hpet_timer(void) 1003__cpuinit int apic_is_clustered_box(void)
995{ 1004{
996 int i, clusters, zeros; 1005 int i, clusters, zeros;
997 unsigned id; 1006 unsigned id;
@@ -1022,8 +1031,7 @@ __cpuinit int oem_force_hpet_timer(void)
1022 } 1031 }
1023 1032
1024 /* 1033 /*
1025 * If clusters > 2, then should be multi-chassis. Return 1 for HPET. 1034 * If clusters > 2, then should be multi-chassis.
1026 * Else return 0 to use TSC.
1027 * May have to revisit this when multi-core + hyperthreaded CPUs come 1035 * May have to revisit this when multi-core + hyperthreaded CPUs come
1028 * out, but AFAIK this will work even for them. 1036 * out, but AFAIK this will work even for them.
1029 */ 1037 */
diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c
index 4e6c3b729e39..8ca04912b1cc 100644
--- a/arch/x86_64/kernel/crash.c
+++ b/arch/x86_64/kernel/crash.c
@@ -111,14 +111,14 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
111 atomic_dec(&waiting_for_crash_ipi); 111 atomic_dec(&waiting_for_crash_ipi);
112 /* Assume hlt works */ 112 /* Assume hlt works */
113 for(;;) 113 for(;;)
114 asm("hlt"); 114 halt();
115 115
116 return 1; 116 return 1;
117} 117}
118 118
119static void smp_send_nmi_allbutself(void) 119static void smp_send_nmi_allbutself(void)
120{ 120{
121 send_IPI_allbutself(APIC_DM_NMI); 121 send_IPI_allbutself(NMI_VECTOR);
122} 122}
123 123
124/* 124/*
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index 1ef6028f721e..9e94d834624b 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -1,7 +1,6 @@
1/* 1/*
2 * Handle the memory map. 2 * Handle the memory map.
3 * The functions here do the job until bootmem takes over. 3 * The functions here do the job until bootmem takes over.
4 * $Id: e820.c,v 1.4 2002/09/19 19:25:32 ak Exp $
5 * 4 *
6 * Getting sanitize_e820_map() in sync with i386 version by applying change: 5 * Getting sanitize_e820_map() in sync with i386 version by applying change:
7 * - Provisions for empty E820 memory regions (reported by certain BIOSes). 6 * - Provisions for empty E820 memory regions (reported by certain BIOSes).
@@ -621,6 +620,7 @@ void __init parse_memmapopt(char *p, char **from)
621} 620}
622 621
623unsigned long pci_mem_start = 0xaeedbabe; 622unsigned long pci_mem_start = 0xaeedbabe;
623EXPORT_SYMBOL(pci_mem_start);
624 624
625/* 625/*
626 * Search for the biggest gap in the low 32 bits of the e820 626 * Search for the biggest gap in the low 32 bits of the e820
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 586b34c00c48..7290e72b9a34 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -154,6 +154,7 @@ rff_trace:
154 GET_THREAD_INFO(%rcx) 154 GET_THREAD_INFO(%rcx)
155 jmp rff_action 155 jmp rff_action
156 CFI_ENDPROC 156 CFI_ENDPROC
157END(ret_from_fork)
157 158
158/* 159/*
159 * System call entry. Upto 6 arguments in registers are supported. 160 * System call entry. Upto 6 arguments in registers are supported.
@@ -188,7 +189,7 @@ rff_trace:
188 189
189ENTRY(system_call) 190ENTRY(system_call)
190 CFI_STARTPROC simple 191 CFI_STARTPROC simple
191 CFI_DEF_CFA rsp,0 192 CFI_DEF_CFA rsp,PDA_STACKOFFSET
192 CFI_REGISTER rip,rcx 193 CFI_REGISTER rip,rcx
193 /*CFI_REGISTER rflags,r11*/ 194 /*CFI_REGISTER rflags,r11*/
194 swapgs 195 swapgs
@@ -285,6 +286,7 @@ tracesys:
285 /* Use IRET because user could have changed frame */ 286 /* Use IRET because user could have changed frame */
286 jmp int_ret_from_sys_call 287 jmp int_ret_from_sys_call
287 CFI_ENDPROC 288 CFI_ENDPROC
289END(system_call)
288 290
289/* 291/*
290 * Syscall return path ending with IRET. 292 * Syscall return path ending with IRET.
@@ -364,6 +366,7 @@ int_restore_rest:
364 cli 366 cli
365 jmp int_with_check 367 jmp int_with_check
366 CFI_ENDPROC 368 CFI_ENDPROC
369END(int_ret_from_sys_call)
367 370
368/* 371/*
369 * Certain special system calls that need to save a complete full stack frame. 372 * Certain special system calls that need to save a complete full stack frame.
@@ -375,6 +378,7 @@ int_restore_rest:
375 leaq \func(%rip),%rax 378 leaq \func(%rip),%rax
376 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ 379 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
377 jmp ptregscall_common 380 jmp ptregscall_common
381END(\label)
378 .endm 382 .endm
379 383
380 CFI_STARTPROC 384 CFI_STARTPROC
@@ -404,6 +408,7 @@ ENTRY(ptregscall_common)
404 CFI_REL_OFFSET rip, 0 408 CFI_REL_OFFSET rip, 0
405 ret 409 ret
406 CFI_ENDPROC 410 CFI_ENDPROC
411END(ptregscall_common)
407 412
408ENTRY(stub_execve) 413ENTRY(stub_execve)
409 CFI_STARTPROC 414 CFI_STARTPROC
@@ -418,6 +423,7 @@ ENTRY(stub_execve)
418 RESTORE_REST 423 RESTORE_REST
419 jmp int_ret_from_sys_call 424 jmp int_ret_from_sys_call
420 CFI_ENDPROC 425 CFI_ENDPROC
426END(stub_execve)
421 427
422/* 428/*
423 * sigreturn is special because it needs to restore all registers on return. 429 * sigreturn is special because it needs to restore all registers on return.
@@ -435,6 +441,7 @@ ENTRY(stub_rt_sigreturn)
435 RESTORE_REST 441 RESTORE_REST
436 jmp int_ret_from_sys_call 442 jmp int_ret_from_sys_call
437 CFI_ENDPROC 443 CFI_ENDPROC
444END(stub_rt_sigreturn)
438 445
439/* 446/*
440 * initial frame state for interrupts and exceptions 447 * initial frame state for interrupts and exceptions
@@ -466,29 +473,18 @@ ENTRY(stub_rt_sigreturn)
466/* 0(%rsp): interrupt number */ 473/* 0(%rsp): interrupt number */
467 .macro interrupt func 474 .macro interrupt func
468 cld 475 cld
469#ifdef CONFIG_DEBUG_INFO
470 SAVE_ALL
471 movq %rsp,%rdi
472 /*
473 * Setup a stack frame pointer. This allows gdb to trace
474 * back to the original stack.
475 */
476 movq %rsp,%rbp
477 CFI_DEF_CFA_REGISTER rbp
478#else
479 SAVE_ARGS 476 SAVE_ARGS
480 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler 477 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
481#endif 478 pushq %rbp
479 CFI_ADJUST_CFA_OFFSET 8
480 CFI_REL_OFFSET rbp, 0
481 movq %rsp,%rbp
482 CFI_DEF_CFA_REGISTER rbp
482 testl $3,CS(%rdi) 483 testl $3,CS(%rdi)
483 je 1f 484 je 1f
484 swapgs 485 swapgs
4851: incl %gs:pda_irqcount # RED-PEN should check preempt count 4861: incl %gs:pda_irqcount # RED-PEN should check preempt count
486 movq %gs:pda_irqstackptr,%rax 487 cmoveq %gs:pda_irqstackptr,%rsp
487 cmoveq %rax,%rsp /*todo This needs CFI annotation! */
488 pushq %rdi # save old stack
489#ifndef CONFIG_DEBUG_INFO
490 CFI_ADJUST_CFA_OFFSET 8
491#endif
492 call \func 488 call \func
493 .endm 489 .endm
494 490
@@ -497,17 +493,11 @@ ENTRY(common_interrupt)
497 interrupt do_IRQ 493 interrupt do_IRQ
498 /* 0(%rsp): oldrsp-ARGOFFSET */ 494 /* 0(%rsp): oldrsp-ARGOFFSET */
499ret_from_intr: 495ret_from_intr:
500 popq %rdi
501#ifndef CONFIG_DEBUG_INFO
502 CFI_ADJUST_CFA_OFFSET -8
503#endif
504 cli 496 cli
505 decl %gs:pda_irqcount 497 decl %gs:pda_irqcount
506#ifdef CONFIG_DEBUG_INFO 498 leaveq
507 movq RBP(%rdi),%rbp
508 CFI_DEF_CFA_REGISTER rsp 499 CFI_DEF_CFA_REGISTER rsp
509#endif 500 CFI_ADJUST_CFA_OFFSET -8
510 leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */
511exit_intr: 501exit_intr:
512 GET_THREAD_INFO(%rcx) 502 GET_THREAD_INFO(%rcx)
513 testl $3,CS-ARGOFFSET(%rsp) 503 testl $3,CS-ARGOFFSET(%rsp)
@@ -589,7 +579,9 @@ retint_kernel:
589 call preempt_schedule_irq 579 call preempt_schedule_irq
590 jmp exit_intr 580 jmp exit_intr
591#endif 581#endif
582
592 CFI_ENDPROC 583 CFI_ENDPROC
584END(common_interrupt)
593 585
594/* 586/*
595 * APIC interrupts. 587 * APIC interrupts.
@@ -605,17 +597,21 @@ retint_kernel:
605 597
606ENTRY(thermal_interrupt) 598ENTRY(thermal_interrupt)
607 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt 599 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
600END(thermal_interrupt)
608 601
609ENTRY(threshold_interrupt) 602ENTRY(threshold_interrupt)
610 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt 603 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
604END(threshold_interrupt)
611 605
612#ifdef CONFIG_SMP 606#ifdef CONFIG_SMP
613ENTRY(reschedule_interrupt) 607ENTRY(reschedule_interrupt)
614 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt 608 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
609END(reschedule_interrupt)
615 610
616 .macro INVALIDATE_ENTRY num 611 .macro INVALIDATE_ENTRY num
617ENTRY(invalidate_interrupt\num) 612ENTRY(invalidate_interrupt\num)
618 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt 613 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
614END(invalidate_interrupt\num)
619 .endm 615 .endm
620 616
621 INVALIDATE_ENTRY 0 617 INVALIDATE_ENTRY 0
@@ -629,17 +625,21 @@ ENTRY(invalidate_interrupt\num)
629 625
630ENTRY(call_function_interrupt) 626ENTRY(call_function_interrupt)
631 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt 627 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
628END(call_function_interrupt)
632#endif 629#endif
633 630
634#ifdef CONFIG_X86_LOCAL_APIC 631#ifdef CONFIG_X86_LOCAL_APIC
635ENTRY(apic_timer_interrupt) 632ENTRY(apic_timer_interrupt)
636 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt 633 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
634END(apic_timer_interrupt)
637 635
638ENTRY(error_interrupt) 636ENTRY(error_interrupt)
639 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt 637 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
638END(error_interrupt)
640 639
641ENTRY(spurious_interrupt) 640ENTRY(spurious_interrupt)
642 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt 641 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
642END(spurious_interrupt)
643#endif 643#endif
644 644
645/* 645/*
@@ -777,6 +777,7 @@ error_kernelspace:
777 cmpq $gs_change,RIP(%rsp) 777 cmpq $gs_change,RIP(%rsp)
778 je error_swapgs 778 je error_swapgs
779 jmp error_sti 779 jmp error_sti
780END(error_entry)
780 781
781 /* Reload gs selector with exception handling */ 782 /* Reload gs selector with exception handling */
782 /* edi: new selector */ 783 /* edi: new selector */
@@ -794,6 +795,7 @@ gs_change:
794 CFI_ADJUST_CFA_OFFSET -8 795 CFI_ADJUST_CFA_OFFSET -8
795 ret 796 ret
796 CFI_ENDPROC 797 CFI_ENDPROC
798ENDPROC(load_gs_index)
797 799
798 .section __ex_table,"a" 800 .section __ex_table,"a"
799 .align 8 801 .align 8
@@ -847,7 +849,7 @@ ENTRY(kernel_thread)
847 UNFAKE_STACK_FRAME 849 UNFAKE_STACK_FRAME
848 ret 850 ret
849 CFI_ENDPROC 851 CFI_ENDPROC
850 852ENDPROC(kernel_thread)
851 853
852child_rip: 854child_rip:
853 /* 855 /*
@@ -860,6 +862,7 @@ child_rip:
860 # exit 862 # exit
861 xorl %edi, %edi 863 xorl %edi, %edi
862 call do_exit 864 call do_exit
865ENDPROC(child_rip)
863 866
864/* 867/*
865 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. 868 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
@@ -889,19 +892,24 @@ ENTRY(execve)
889 UNFAKE_STACK_FRAME 892 UNFAKE_STACK_FRAME
890 ret 893 ret
891 CFI_ENDPROC 894 CFI_ENDPROC
895ENDPROC(execve)
892 896
893KPROBE_ENTRY(page_fault) 897KPROBE_ENTRY(page_fault)
894 errorentry do_page_fault 898 errorentry do_page_fault
899END(page_fault)
895 .previous .text 900 .previous .text
896 901
897ENTRY(coprocessor_error) 902ENTRY(coprocessor_error)
898 zeroentry do_coprocessor_error 903 zeroentry do_coprocessor_error
904END(coprocessor_error)
899 905
900ENTRY(simd_coprocessor_error) 906ENTRY(simd_coprocessor_error)
901 zeroentry do_simd_coprocessor_error 907 zeroentry do_simd_coprocessor_error
908END(simd_coprocessor_error)
902 909
903ENTRY(device_not_available) 910ENTRY(device_not_available)
904 zeroentry math_state_restore 911 zeroentry math_state_restore
912END(device_not_available)
905 913
906 /* runs on exception stack */ 914 /* runs on exception stack */
907KPROBE_ENTRY(debug) 915KPROBE_ENTRY(debug)
@@ -911,6 +919,7 @@ KPROBE_ENTRY(debug)
911 paranoidentry do_debug, DEBUG_STACK 919 paranoidentry do_debug, DEBUG_STACK
912 jmp paranoid_exit 920 jmp paranoid_exit
913 CFI_ENDPROC 921 CFI_ENDPROC
922END(debug)
914 .previous .text 923 .previous .text
915 924
916 /* runs on exception stack */ 925 /* runs on exception stack */
@@ -961,6 +970,7 @@ paranoid_schedule:
961 cli 970 cli
962 jmp paranoid_userspace 971 jmp paranoid_userspace
963 CFI_ENDPROC 972 CFI_ENDPROC
973END(nmi)
964 .previous .text 974 .previous .text
965 975
966KPROBE_ENTRY(int3) 976KPROBE_ENTRY(int3)
@@ -970,22 +980,28 @@ KPROBE_ENTRY(int3)
970 paranoidentry do_int3, DEBUG_STACK 980 paranoidentry do_int3, DEBUG_STACK
971 jmp paranoid_exit 981 jmp paranoid_exit
972 CFI_ENDPROC 982 CFI_ENDPROC
983END(int3)
973 .previous .text 984 .previous .text
974 985
975ENTRY(overflow) 986ENTRY(overflow)
976 zeroentry do_overflow 987 zeroentry do_overflow
988END(overflow)
977 989
978ENTRY(bounds) 990ENTRY(bounds)
979 zeroentry do_bounds 991 zeroentry do_bounds
992END(bounds)
980 993
981ENTRY(invalid_op) 994ENTRY(invalid_op)
982 zeroentry do_invalid_op 995 zeroentry do_invalid_op
996END(invalid_op)
983 997
984ENTRY(coprocessor_segment_overrun) 998ENTRY(coprocessor_segment_overrun)
985 zeroentry do_coprocessor_segment_overrun 999 zeroentry do_coprocessor_segment_overrun
1000END(coprocessor_segment_overrun)
986 1001
987ENTRY(reserved) 1002ENTRY(reserved)
988 zeroentry do_reserved 1003 zeroentry do_reserved
1004END(reserved)
989 1005
990 /* runs on exception stack */ 1006 /* runs on exception stack */
991ENTRY(double_fault) 1007ENTRY(double_fault)
@@ -993,12 +1009,15 @@ ENTRY(double_fault)
993 paranoidentry do_double_fault 1009 paranoidentry do_double_fault
994 jmp paranoid_exit 1010 jmp paranoid_exit
995 CFI_ENDPROC 1011 CFI_ENDPROC
1012END(double_fault)
996 1013
997ENTRY(invalid_TSS) 1014ENTRY(invalid_TSS)
998 errorentry do_invalid_TSS 1015 errorentry do_invalid_TSS
1016END(invalid_TSS)
999 1017
1000ENTRY(segment_not_present) 1018ENTRY(segment_not_present)
1001 errorentry do_segment_not_present 1019 errorentry do_segment_not_present
1020END(segment_not_present)
1002 1021
1003 /* runs on exception stack */ 1022 /* runs on exception stack */
1004ENTRY(stack_segment) 1023ENTRY(stack_segment)
@@ -1006,19 +1025,24 @@ ENTRY(stack_segment)
1006 paranoidentry do_stack_segment 1025 paranoidentry do_stack_segment
1007 jmp paranoid_exit 1026 jmp paranoid_exit
1008 CFI_ENDPROC 1027 CFI_ENDPROC
1028END(stack_segment)
1009 1029
1010KPROBE_ENTRY(general_protection) 1030KPROBE_ENTRY(general_protection)
1011 errorentry do_general_protection 1031 errorentry do_general_protection
1032END(general_protection)
1012 .previous .text 1033 .previous .text
1013 1034
1014ENTRY(alignment_check) 1035ENTRY(alignment_check)
1015 errorentry do_alignment_check 1036 errorentry do_alignment_check
1037END(alignment_check)
1016 1038
1017ENTRY(divide_error) 1039ENTRY(divide_error)
1018 zeroentry do_divide_error 1040 zeroentry do_divide_error
1041END(divide_error)
1019 1042
1020ENTRY(spurious_interrupt_bug) 1043ENTRY(spurious_interrupt_bug)
1021 zeroentry do_spurious_interrupt_bug 1044 zeroentry do_spurious_interrupt_bug
1045END(spurious_interrupt_bug)
1022 1046
1023#ifdef CONFIG_X86_MCE 1047#ifdef CONFIG_X86_MCE
1024 /* runs on exception stack */ 1048 /* runs on exception stack */
@@ -1029,6 +1053,7 @@ ENTRY(machine_check)
1029 paranoidentry do_machine_check 1053 paranoidentry do_machine_check
1030 jmp paranoid_exit 1054 jmp paranoid_exit
1031 CFI_ENDPROC 1055 CFI_ENDPROC
1056END(machine_check)
1032#endif 1057#endif
1033 1058
1034ENTRY(call_softirq) 1059ENTRY(call_softirq)
@@ -1046,3 +1071,37 @@ ENTRY(call_softirq)
1046 decl %gs:pda_irqcount 1071 decl %gs:pda_irqcount
1047 ret 1072 ret
1048 CFI_ENDPROC 1073 CFI_ENDPROC
1074ENDPROC(call_softirq)
1075
1076#ifdef CONFIG_STACK_UNWIND
1077ENTRY(arch_unwind_init_running)
1078 CFI_STARTPROC
1079 movq %r15, R15(%rdi)
1080 movq %r14, R14(%rdi)
1081 xchgq %rsi, %rdx
1082 movq %r13, R13(%rdi)
1083 movq %r12, R12(%rdi)
1084 xorl %eax, %eax
1085 movq %rbp, RBP(%rdi)
1086 movq %rbx, RBX(%rdi)
1087 movq (%rsp), %rcx
1088 movq %rax, R11(%rdi)
1089 movq %rax, R10(%rdi)
1090 movq %rax, R9(%rdi)
1091 movq %rax, R8(%rdi)
1092 movq %rax, RAX(%rdi)
1093 movq %rax, RCX(%rdi)
1094 movq %rax, RDX(%rdi)
1095 movq %rax, RSI(%rdi)
1096 movq %rax, RDI(%rdi)
1097 movq %rax, ORIG_RAX(%rdi)
1098 movq %rcx, RIP(%rdi)
1099 leaq 8(%rsp), %rcx
1100 movq $__KERNEL_CS, CS(%rdi)
1101 movq %rax, EFLAGS(%rdi)
1102 movq %rcx, RSP(%rdi)
1103 movq $__KERNEL_DS, SS(%rdi)
1104 jmpq *%rdx
1105 CFI_ENDPROC
1106ENDPROC(arch_unwind_init_running)
1107#endif
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c
index 1a2ab825be98..21c7066e236a 100644
--- a/arch/x86_64/kernel/genapic_flat.c
+++ b/arch/x86_64/kernel/genapic_flat.c
@@ -78,22 +78,29 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
78 78
79static void flat_send_IPI_allbutself(int vector) 79static void flat_send_IPI_allbutself(int vector)
80{ 80{
81#ifndef CONFIG_HOTPLUG_CPU 81#ifdef CONFIG_HOTPLUG_CPU
82 if (((num_online_cpus()) - 1) >= 1) 82 int hotplug = 1;
83 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
84#else 83#else
85 cpumask_t allbutme = cpu_online_map; 84 int hotplug = 0;
85#endif
86 if (hotplug || vector == NMI_VECTOR) {
87 cpumask_t allbutme = cpu_online_map;
86 88
87 cpu_clear(smp_processor_id(), allbutme); 89 cpu_clear(smp_processor_id(), allbutme);
88 90
89 if (!cpus_empty(allbutme)) 91 if (!cpus_empty(allbutme))
90 flat_send_IPI_mask(allbutme, vector); 92 flat_send_IPI_mask(allbutme, vector);
91#endif 93 } else if (num_online_cpus() > 1) {
94 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
95 }
92} 96}
93 97
94static void flat_send_IPI_all(int vector) 98static void flat_send_IPI_all(int vector)
95{ 99{
96 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 100 if (vector == NMI_VECTOR)
101 flat_send_IPI_mask(cpu_online_map, vector);
102 else
103 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
97} 104}
98 105
99static int flat_apic_id_registered(void) 106static int flat_apic_id_registered(void)
@@ -108,10 +115,7 @@ static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
108 115
109static unsigned int phys_pkg_id(int index_msb) 116static unsigned int phys_pkg_id(int index_msb)
110{ 117{
111 u32 ebx; 118 return hard_smp_processor_id() >> index_msb;
112
113 ebx = cpuid_ebx(1);
114 return ((ebx >> 24) & 0xFF) >> index_msb;
115} 119}
116 120
117struct genapic apic_flat = { 121struct genapic apic_flat = {
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index cea20a66c150..e6a71c9556d9 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -2,8 +2,6 @@
2 * linux/arch/x86_64/kernel/head64.c -- prepare to run common code 2 * linux/arch/x86_64/kernel/head64.c -- prepare to run common code
3 * 3 *
4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5 *
6 * $Id: head64.c,v 1.22 2001/07/06 14:28:20 ak Exp $
7 */ 5 */
8 6
9#include <linux/init.h> 7#include <linux/init.h>
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index 5ecd34ab8c2b..9b1a4e147321 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -44,11 +44,11 @@
44 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ 44 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
45 BI(x,c) BI(x,d) BI(x,e) BI(x,f) 45 BI(x,c) BI(x,d) BI(x,e) BI(x,f)
46 46
47#define BUILD_14_IRQS(x) \ 47#define BUILD_15_IRQS(x) \
48 BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ 48 BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
49 BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ 49 BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
50 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ 50 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
51 BI(x,c) BI(x,d) 51 BI(x,c) BI(x,d) BI(x,e)
52 52
53/* 53/*
54 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: 54 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
@@ -73,13 +73,13 @@ BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
73BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) 73BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
74 74
75#ifdef CONFIG_PCI_MSI 75#ifdef CONFIG_PCI_MSI
76 BUILD_14_IRQS(0xe) 76 BUILD_15_IRQS(0xe)
77#endif 77#endif
78 78
79#endif 79#endif
80 80
81#undef BUILD_16_IRQS 81#undef BUILD_16_IRQS
82#undef BUILD_14_IRQS 82#undef BUILD_15_IRQS
83#undef BI 83#undef BI
84 84
85 85
@@ -92,11 +92,11 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
92 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ 92 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
93 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) 93 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
94 94
95#define IRQLIST_14(x) \ 95#define IRQLIST_15(x) \
96 IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ 96 IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
97 IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ 97 IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
98 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ 98 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
99 IRQ(x,c), IRQ(x,d) 99 IRQ(x,c), IRQ(x,d), IRQ(x,e)
100 100
101void (*interrupt[NR_IRQS])(void) = { 101void (*interrupt[NR_IRQS])(void) = {
102 IRQLIST_16(0x0), 102 IRQLIST_16(0x0),
@@ -108,7 +108,7 @@ void (*interrupt[NR_IRQS])(void) = {
108 IRQLIST_16(0xc), IRQLIST_16(0xd) 108 IRQLIST_16(0xc), IRQLIST_16(0xd)
109 109
110#ifdef CONFIG_PCI_MSI 110#ifdef CONFIG_PCI_MSI
111 , IRQLIST_14(0xe) 111 , IRQLIST_15(0xe)
112#endif 112#endif
113 113
114#endif 114#endif
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 9cc7031b7151..c768d8a036d0 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -41,6 +41,7 @@
41#include <asm/mach_apic.h> 41#include <asm/mach_apic.h>
42#include <asm/acpi.h> 42#include <asm/acpi.h>
43#include <asm/dma.h> 43#include <asm/dma.h>
44#include <asm/nmi.h>
44 45
45#define __apicdebuginit __init 46#define __apicdebuginit __init
46 47
@@ -56,6 +57,7 @@ int timer_over_8254 __initdata = 0;
56static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; 57static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
57 58
58static DEFINE_SPINLOCK(ioapic_lock); 59static DEFINE_SPINLOCK(ioapic_lock);
60static DEFINE_SPINLOCK(vector_lock);
59 61
60/* 62/*
61 * # of IRQ routing registers 63 * # of IRQ routing registers
@@ -317,7 +319,7 @@ void __init check_ioapic(void)
317 vendor &= 0xffff; 319 vendor &= 0xffff;
318 switch (vendor) { 320 switch (vendor) {
319 case PCI_VENDOR_ID_VIA: 321 case PCI_VENDOR_ID_VIA:
320#ifdef CONFIG_GART_IOMMU 322#ifdef CONFIG_IOMMU
321 if ((end_pfn > MAX_DMA32_PFN || 323 if ((end_pfn > MAX_DMA32_PFN ||
322 force_iommu) && 324 force_iommu) &&
323 !iommu_aperture_allowed) { 325 !iommu_aperture_allowed) {
@@ -834,10 +836,17 @@ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
834int assign_irq_vector(int irq) 836int assign_irq_vector(int irq)
835{ 837{
836 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; 838 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
839 unsigned long flags;
840 int vector;
837 841
838 BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); 842 BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
839 if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) 843
844 spin_lock_irqsave(&vector_lock, flags);
845
846 if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
847 spin_unlock_irqrestore(&vector_lock, flags);
840 return IO_APIC_VECTOR(irq); 848 return IO_APIC_VECTOR(irq);
849 }
841next: 850next:
842 current_vector += 8; 851 current_vector += 8;
843 if (current_vector == IA32_SYSCALL_VECTOR) 852 if (current_vector == IA32_SYSCALL_VECTOR)
@@ -849,11 +858,14 @@ next:
849 current_vector = FIRST_DEVICE_VECTOR + offset; 858 current_vector = FIRST_DEVICE_VECTOR + offset;
850 } 859 }
851 860
852 vector_irq[current_vector] = irq; 861 vector = current_vector;
862 vector_irq[vector] = irq;
853 if (irq != AUTO_ASSIGN) 863 if (irq != AUTO_ASSIGN)
854 IO_APIC_VECTOR(irq) = current_vector; 864 IO_APIC_VECTOR(irq) = vector;
865
866 spin_unlock_irqrestore(&vector_lock, flags);
855 867
856 return current_vector; 868 return vector;
857} 869}
858 870
859extern void (*interrupt[NR_IRQS])(void); 871extern void (*interrupt[NR_IRQS])(void);
@@ -866,21 +878,14 @@ static struct hw_interrupt_type ioapic_edge_type;
866 878
867static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) 879static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
868{ 880{
869 if (use_pci_vector() && !platform_legacy_irq(irq)) { 881 unsigned idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
870 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 882
871 trigger == IOAPIC_LEVEL) 883 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
872 irq_desc[vector].handler = &ioapic_level_type; 884 trigger == IOAPIC_LEVEL)
873 else 885 irq_desc[idx].handler = &ioapic_level_type;
874 irq_desc[vector].handler = &ioapic_edge_type; 886 else
875 set_intr_gate(vector, interrupt[vector]); 887 irq_desc[idx].handler = &ioapic_edge_type;
876 } else { 888 set_intr_gate(vector, interrupt[idx]);
877 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
878 trigger == IOAPIC_LEVEL)
879 irq_desc[irq].handler = &ioapic_level_type;
880 else
881 irq_desc[irq].handler = &ioapic_edge_type;
882 set_intr_gate(vector, interrupt[irq]);
883 }
884} 889}
885 890
886static void __init setup_IO_APIC_irqs(void) 891static void __init setup_IO_APIC_irqs(void)
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index d8bd0b345b1e..59518d4d4358 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -26,6 +26,30 @@ atomic_t irq_mis_count;
26#endif 26#endif
27#endif 27#endif
28 28
29#ifdef CONFIG_DEBUG_STACKOVERFLOW
30/*
31 * Probabilistic stack overflow check:
32 *
33 * Only check the stack in process context, because everything else
34 * runs on the big interrupt stacks. Checking reliably is too expensive,
35 * so we just check from interrupts.
36 */
37static inline void stack_overflow_check(struct pt_regs *regs)
38{
39 u64 curbase = (u64) current->thread_info;
40 static unsigned long warned = -60*HZ;
41
42 if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE &&
43 regs->rsp < curbase + sizeof(struct thread_info) + 128 &&
44 time_after(jiffies, warned + 60*HZ)) {
45 printk("do_IRQ: %s near stack overflow (cur:%Lx,rsp:%lx)\n",
46 current->comm, curbase, regs->rsp);
47 show_stack(NULL,NULL);
48 warned = jiffies;
49 }
50}
51#endif
52
29/* 53/*
30 * Generic, controller-independent functions: 54 * Generic, controller-independent functions:
31 */ 55 */
@@ -39,7 +63,7 @@ int show_interrupts(struct seq_file *p, void *v)
39 if (i == 0) { 63 if (i == 0) {
40 seq_printf(p, " "); 64 seq_printf(p, " ");
41 for_each_online_cpu(j) 65 for_each_online_cpu(j)
42 seq_printf(p, "CPU%d ",j); 66 seq_printf(p, "CPU%-8d",j);
43 seq_putc(p, '\n'); 67 seq_putc(p, '\n');
44 } 68 }
45 69
@@ -96,7 +120,9 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
96 120
97 exit_idle(); 121 exit_idle();
98 irq_enter(); 122 irq_enter();
99 123#ifdef CONFIG_DEBUG_STACKOVERFLOW
124 stack_overflow_check(regs);
125#endif
100 __do_IRQ(irq, regs); 126 __do_IRQ(irq, regs);
101 irq_exit(); 127 irq_exit();
102 128
diff --git a/arch/x86_64/kernel/k8.c b/arch/x86_64/kernel/k8.c
new file mode 100644
index 000000000000..6416682d33d0
--- /dev/null
+++ b/arch/x86_64/kernel/k8.c
@@ -0,0 +1,118 @@
1/*
2 * Shared support code for AMD K8 northbridges and derivates.
3 * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.
4 */
5#include <linux/gfp.h>
6#include <linux/types.h>
7#include <linux/init.h>
8#include <linux/errno.h>
9#include <linux/module.h>
10#include <linux/spinlock.h>
11#include <asm/k8.h>
12
13int num_k8_northbridges;
14EXPORT_SYMBOL(num_k8_northbridges);
15
16static u32 *flush_words;
17
18struct pci_device_id k8_nb_ids[] = {
19 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
20 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
21 {}
22};
23EXPORT_SYMBOL(k8_nb_ids);
24
25struct pci_dev **k8_northbridges;
26EXPORT_SYMBOL(k8_northbridges);
27
28static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
29{
30 do {
31 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
32 if (!dev)
33 break;
34 } while (!pci_match_id(&k8_nb_ids[0], dev));
35 return dev;
36}
37
38int cache_k8_northbridges(void)
39{
40 int i;
41 struct pci_dev *dev;
42 if (num_k8_northbridges)
43 return 0;
44
45 num_k8_northbridges = 0;
46 dev = NULL;
47 while ((dev = next_k8_northbridge(dev)) != NULL)
48 num_k8_northbridges++;
49
50 k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *),
51 GFP_KERNEL);
52 if (!k8_northbridges)
53 return -ENOMEM;
54
55 flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL);
56 if (!flush_words) {
57 kfree(k8_northbridges);
58 return -ENOMEM;
59 }
60
61 dev = NULL;
62 i = 0;
63 while ((dev = next_k8_northbridge(dev)) != NULL) {
64 k8_northbridges[i++] = dev;
65 pci_read_config_dword(dev, 0x9c, &flush_words[i]);
66 }
67 k8_northbridges[i] = NULL;
68 return 0;
69}
70EXPORT_SYMBOL_GPL(cache_k8_northbridges);
71
72/* Ignores subdevice/subvendor but as far as I can figure out
73 they're useless anyways */
74int __init early_is_k8_nb(u32 device)
75{
76 struct pci_device_id *id;
77 u32 vendor = device & 0xffff;
78 device >>= 16;
79 for (id = k8_nb_ids; id->vendor; id++)
80 if (vendor == id->vendor && device == id->device)
81 return 1;
82 return 0;
83}
84
85void k8_flush_garts(void)
86{
87 int flushed, i;
88 unsigned long flags;
89 static DEFINE_SPINLOCK(gart_lock);
90
91 /* Avoid races between AGP and IOMMU. In theory it's not needed
92 but I'm not sure if the hardware won't lose flush requests
93 when another is pending. This whole thing is so expensive anyways
94 that it doesn't matter to serialize more. -AK */
95 spin_lock_irqsave(&gart_lock, flags);
96 flushed = 0;
97 for (i = 0; i < num_k8_northbridges; i++) {
98 pci_write_config_dword(k8_northbridges[i], 0x9c,
99 flush_words[i]|1);
100 flushed++;
101 }
102 for (i = 0; i < num_k8_northbridges; i++) {
103 u32 w;
104 /* Make sure the hardware actually executed the flush*/
105 for (;;) {
106 pci_read_config_dword(k8_northbridges[i],
107 0x9c, &w);
108 if (!(w & 1))
109 break;
110 cpu_relax();
111 }
112 }
113 spin_unlock_irqrestore(&gart_lock, flags);
114 if (!flushed)
115 printk("nothing to flush?\n");
116}
117EXPORT_SYMBOL_GPL(k8_flush_garts);
118
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index c69fc43cee7b..acd5816b1a6f 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -562,7 +562,7 @@ static struct sysdev_class mce_sysclass = {
562 set_kset_name("machinecheck"), 562 set_kset_name("machinecheck"),
563}; 563};
564 564
565static DEFINE_PER_CPU(struct sys_device, device_mce); 565DEFINE_PER_CPU(struct sys_device, device_mce);
566 566
567/* Why are there no generic functions for this? */ 567/* Why are there no generic functions for this? */
568#define ACCESSOR(name, var, start) \ 568#define ACCESSOR(name, var, start) \
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c
index d13b241ad094..335200aa2737 100644
--- a/arch/x86_64/kernel/mce_amd.c
+++ b/arch/x86_64/kernel/mce_amd.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * (c) 2005 Advanced Micro Devices, Inc. 2 * (c) 2005, 2006 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the 3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or 4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html 5 * http://www.gnu.org/licenses/gpl.html
@@ -8,9 +8,10 @@
8 * 8 *
9 * Support : jacob.shin@amd.com 9 * Support : jacob.shin@amd.com
10 * 10 *
11 * MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F. 11 * April 2006
12 * MC4_MISC0 exists per physical processor. 12 * - added support for AMD Family 0x10 processors
13 * 13 *
14 * All MC4_MISCi registers are shared between multi-cores
14 */ 15 */
15 16
16#include <linux/cpu.h> 17#include <linux/cpu.h>
@@ -29,32 +30,45 @@
29#include <asm/percpu.h> 30#include <asm/percpu.h>
30#include <asm/idle.h> 31#include <asm/idle.h>
31 32
32#define PFX "mce_threshold: " 33#define PFX "mce_threshold: "
33#define VERSION "version 1.00.9" 34#define VERSION "version 1.1.1"
34#define NR_BANKS 5 35#define NR_BANKS 6
35#define THRESHOLD_MAX 0xFFF 36#define NR_BLOCKS 9
36#define INT_TYPE_APIC 0x00020000 37#define THRESHOLD_MAX 0xFFF
37#define MASK_VALID_HI 0x80000000 38#define INT_TYPE_APIC 0x00020000
38#define MASK_LVTOFF_HI 0x00F00000 39#define MASK_VALID_HI 0x80000000
39#define MASK_COUNT_EN_HI 0x00080000 40#define MASK_LVTOFF_HI 0x00F00000
40#define MASK_INT_TYPE_HI 0x00060000 41#define MASK_COUNT_EN_HI 0x00080000
41#define MASK_OVERFLOW_HI 0x00010000 42#define MASK_INT_TYPE_HI 0x00060000
43#define MASK_OVERFLOW_HI 0x00010000
42#define MASK_ERR_COUNT_HI 0x00000FFF 44#define MASK_ERR_COUNT_HI 0x00000FFF
43#define MASK_OVERFLOW 0x0001000000000000L 45#define MASK_BLKPTR_LO 0xFF000000
46#define MCG_XBLK_ADDR 0xC0000400
44 47
45struct threshold_bank { 48struct threshold_block {
49 unsigned int block;
50 unsigned int bank;
46 unsigned int cpu; 51 unsigned int cpu;
47 u8 bank; 52 u32 address;
48 u8 interrupt_enable; 53 u16 interrupt_enable;
49 u16 threshold_limit; 54 u16 threshold_limit;
50 struct kobject kobj; 55 struct kobject kobj;
56 struct list_head miscj;
51}; 57};
52 58
53static struct threshold_bank threshold_defaults = { 59/* defaults used early on boot */
60static struct threshold_block threshold_defaults = {
54 .interrupt_enable = 0, 61 .interrupt_enable = 0,
55 .threshold_limit = THRESHOLD_MAX, 62 .threshold_limit = THRESHOLD_MAX,
56}; 63};
57 64
65struct threshold_bank {
66 struct kobject kobj;
67 struct threshold_block *blocks;
68 cpumask_t cpus;
69};
70static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
71
58#ifdef CONFIG_SMP 72#ifdef CONFIG_SMP
59static unsigned char shared_bank[NR_BANKS] = { 73static unsigned char shared_bank[NR_BANKS] = {
60 0, 0, 0, 0, 1 74 0, 0, 0, 0, 1
@@ -68,12 +82,12 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
68 */ 82 */
69 83
70/* must be called with correct cpu affinity */ 84/* must be called with correct cpu affinity */
71static void threshold_restart_bank(struct threshold_bank *b, 85static void threshold_restart_bank(struct threshold_block *b,
72 int reset, u16 old_limit) 86 int reset, u16 old_limit)
73{ 87{
74 u32 mci_misc_hi, mci_misc_lo; 88 u32 mci_misc_hi, mci_misc_lo;
75 89
76 rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi); 90 rdmsr(b->address, mci_misc_lo, mci_misc_hi);
77 91
78 if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) 92 if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
79 reset = 1; /* limit cannot be lower than err count */ 93 reset = 1; /* limit cannot be lower than err count */
@@ -94,35 +108,57 @@ static void threshold_restart_bank(struct threshold_bank *b,
94 (mci_misc_hi &= ~MASK_INT_TYPE_HI); 108 (mci_misc_hi &= ~MASK_INT_TYPE_HI);
95 109
96 mci_misc_hi |= MASK_COUNT_EN_HI; 110 mci_misc_hi |= MASK_COUNT_EN_HI;
97 wrmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi); 111 wrmsr(b->address, mci_misc_lo, mci_misc_hi);
98} 112}
99 113
114/* cpu init entry point, called from mce.c with preempt off */
100void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) 115void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
101{ 116{
102 int bank; 117 unsigned int bank, block;
103 u32 mci_misc_lo, mci_misc_hi;
104 unsigned int cpu = smp_processor_id(); 118 unsigned int cpu = smp_processor_id();
119 u32 low = 0, high = 0, address = 0;
105 120
106 for (bank = 0; bank < NR_BANKS; ++bank) { 121 for (bank = 0; bank < NR_BANKS; ++bank) {
107 rdmsr(MSR_IA32_MC0_MISC + bank * 4, mci_misc_lo, mci_misc_hi); 122 for (block = 0; block < NR_BLOCKS; ++block) {
123 if (block == 0)
124 address = MSR_IA32_MC0_MISC + bank * 4;
125 else if (block == 1)
126 address = MCG_XBLK_ADDR
127 + ((low & MASK_BLKPTR_LO) >> 21);
128 else
129 ++address;
130
131 if (rdmsr_safe(address, &low, &high))
132 continue;
108 133
109 /* !valid, !counter present, bios locked */ 134 if (!(high & MASK_VALID_HI)) {
110 if (!(mci_misc_hi & MASK_VALID_HI) || 135 if (block)
111 !(mci_misc_hi & MASK_VALID_HI >> 1) || 136 continue;
112 (mci_misc_hi & MASK_VALID_HI >> 2)) 137 else
113 continue; 138 break;
139 }
114 140
115 per_cpu(bank_map, cpu) |= (1 << bank); 141 if (!(high & MASK_VALID_HI >> 1) ||
142 (high & MASK_VALID_HI >> 2))
143 continue;
116 144
145 if (!block)
146 per_cpu(bank_map, cpu) |= (1 << bank);
117#ifdef CONFIG_SMP 147#ifdef CONFIG_SMP
118 if (shared_bank[bank] && cpu_core_id[cpu]) 148 if (shared_bank[bank] && c->cpu_core_id)
119 continue; 149 break;
120#endif 150#endif
151 high &= ~MASK_LVTOFF_HI;
152 high |= K8_APIC_EXT_LVT_ENTRY_THRESHOLD << 20;
153 wrmsr(address, low, high);
121 154
122 setup_threshold_lvt((mci_misc_hi & MASK_LVTOFF_HI) >> 20); 155 setup_APIC_extened_lvt(K8_APIC_EXT_LVT_ENTRY_THRESHOLD,
123 threshold_defaults.cpu = cpu; 156 THRESHOLD_APIC_VECTOR,
124 threshold_defaults.bank = bank; 157 K8_APIC_EXT_INT_MSG_FIX, 0);
125 threshold_restart_bank(&threshold_defaults, 0, 0); 158
159 threshold_defaults.address = address;
160 threshold_restart_bank(&threshold_defaults, 0, 0);
161 }
126 } 162 }
127} 163}
128 164
@@ -137,8 +173,9 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
137 */ 173 */
138asmlinkage void mce_threshold_interrupt(void) 174asmlinkage void mce_threshold_interrupt(void)
139{ 175{
140 int bank; 176 unsigned int bank, block;
141 struct mce m; 177 struct mce m;
178 u32 low = 0, high = 0, address = 0;
142 179
143 ack_APIC_irq(); 180 ack_APIC_irq();
144 exit_idle(); 181 exit_idle();
@@ -150,15 +187,42 @@ asmlinkage void mce_threshold_interrupt(void)
150 187
151 /* assume first bank caused it */ 188 /* assume first bank caused it */
152 for (bank = 0; bank < NR_BANKS; ++bank) { 189 for (bank = 0; bank < NR_BANKS; ++bank) {
153 m.bank = MCE_THRESHOLD_BASE + bank; 190 for (block = 0; block < NR_BLOCKS; ++block) {
154 rdmsrl(MSR_IA32_MC0_MISC + bank * 4, m.misc); 191 if (block == 0)
192 address = MSR_IA32_MC0_MISC + bank * 4;
193 else if (block == 1)
194 address = MCG_XBLK_ADDR
195 + ((low & MASK_BLKPTR_LO) >> 21);
196 else
197 ++address;
198
199 if (rdmsr_safe(address, &low, &high))
200 continue;
155 201
156 if (m.misc & MASK_OVERFLOW) { 202 if (!(high & MASK_VALID_HI)) {
157 mce_log(&m); 203 if (block)
158 goto out; 204 continue;
205 else
206 break;
207 }
208
209 if (!(high & MASK_VALID_HI >> 1) ||
210 (high & MASK_VALID_HI >> 2))
211 continue;
212
213 if (high & MASK_OVERFLOW_HI) {
214 rdmsrl(address, m.misc);
215 rdmsrl(MSR_IA32_MC0_STATUS + bank * 4,
216 m.status);
217 m.bank = K8_MCE_THRESHOLD_BASE
218 + bank * NR_BLOCKS
219 + block;
220 mce_log(&m);
221 goto out;
222 }
159 } 223 }
160 } 224 }
161 out: 225out:
162 irq_exit(); 226 irq_exit();
163} 227}
164 228
@@ -166,20 +230,12 @@ asmlinkage void mce_threshold_interrupt(void)
166 * Sysfs Interface 230 * Sysfs Interface
167 */ 231 */
168 232
169static struct sysdev_class threshold_sysclass = {
170 set_kset_name("threshold"),
171};
172
173static DEFINE_PER_CPU(struct sys_device, device_threshold);
174
175struct threshold_attr { 233struct threshold_attr {
176 struct attribute attr; 234 struct attribute attr;
177 ssize_t(*show) (struct threshold_bank *, char *); 235 ssize_t(*show) (struct threshold_block *, char *);
178 ssize_t(*store) (struct threshold_bank *, const char *, size_t count); 236 ssize_t(*store) (struct threshold_block *, const char *, size_t count);
179}; 237};
180 238
181static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
182
183static cpumask_t affinity_set(unsigned int cpu) 239static cpumask_t affinity_set(unsigned int cpu)
184{ 240{
185 cpumask_t oldmask = current->cpus_allowed; 241 cpumask_t oldmask = current->cpus_allowed;
@@ -194,15 +250,15 @@ static void affinity_restore(cpumask_t oldmask)
194 set_cpus_allowed(current, oldmask); 250 set_cpus_allowed(current, oldmask);
195} 251}
196 252
197#define SHOW_FIELDS(name) \ 253#define SHOW_FIELDS(name) \
198 static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \ 254static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
199 { \ 255{ \
200 return sprintf(buf, "%lx\n", (unsigned long) b->name); \ 256 return sprintf(buf, "%lx\n", (unsigned long) b->name); \
201 } 257}
202SHOW_FIELDS(interrupt_enable) 258SHOW_FIELDS(interrupt_enable)
203SHOW_FIELDS(threshold_limit) 259SHOW_FIELDS(threshold_limit)
204 260
205static ssize_t store_interrupt_enable(struct threshold_bank *b, 261static ssize_t store_interrupt_enable(struct threshold_block *b,
206 const char *buf, size_t count) 262 const char *buf, size_t count)
207{ 263{
208 char *end; 264 char *end;
@@ -219,7 +275,7 @@ static ssize_t store_interrupt_enable(struct threshold_bank *b,
219 return end - buf; 275 return end - buf;
220} 276}
221 277
222static ssize_t store_threshold_limit(struct threshold_bank *b, 278static ssize_t store_threshold_limit(struct threshold_block *b,
223 const char *buf, size_t count) 279 const char *buf, size_t count)
224{ 280{
225 char *end; 281 char *end;
@@ -242,18 +298,18 @@ static ssize_t store_threshold_limit(struct threshold_bank *b,
242 return end - buf; 298 return end - buf;
243} 299}
244 300
245static ssize_t show_error_count(struct threshold_bank *b, char *buf) 301static ssize_t show_error_count(struct threshold_block *b, char *buf)
246{ 302{
247 u32 high, low; 303 u32 high, low;
248 cpumask_t oldmask; 304 cpumask_t oldmask;
249 oldmask = affinity_set(b->cpu); 305 oldmask = affinity_set(b->cpu);
250 rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, low, high); /* ignore low 32 */ 306 rdmsr(b->address, low, high);
251 affinity_restore(oldmask); 307 affinity_restore(oldmask);
252 return sprintf(buf, "%x\n", 308 return sprintf(buf, "%x\n",
253 (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); 309 (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
254} 310}
255 311
256static ssize_t store_error_count(struct threshold_bank *b, 312static ssize_t store_error_count(struct threshold_block *b,
257 const char *buf, size_t count) 313 const char *buf, size_t count)
258{ 314{
259 cpumask_t oldmask; 315 cpumask_t oldmask;
@@ -269,13 +325,13 @@ static ssize_t store_error_count(struct threshold_bank *b,
269 .store = _store, \ 325 .store = _store, \
270}; 326};
271 327
272#define ATTR_FIELDS(name) \ 328#define RW_ATTR(name) \
273 static struct threshold_attr name = \ 329static struct threshold_attr name = \
274 THRESHOLD_ATTR(name, 0644, show_## name, store_## name) 330 THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
275 331
276ATTR_FIELDS(interrupt_enable); 332RW_ATTR(interrupt_enable);
277ATTR_FIELDS(threshold_limit); 333RW_ATTR(threshold_limit);
278ATTR_FIELDS(error_count); 334RW_ATTR(error_count);
279 335
280static struct attribute *default_attrs[] = { 336static struct attribute *default_attrs[] = {
281 &interrupt_enable.attr, 337 &interrupt_enable.attr,
@@ -284,12 +340,12 @@ static struct attribute *default_attrs[] = {
284 NULL 340 NULL
285}; 341};
286 342
287#define to_bank(k) container_of(k,struct threshold_bank,kobj) 343#define to_block(k) container_of(k, struct threshold_block, kobj)
288#define to_attr(a) container_of(a,struct threshold_attr,attr) 344#define to_attr(a) container_of(a, struct threshold_attr, attr)
289 345
290static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) 346static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
291{ 347{
292 struct threshold_bank *b = to_bank(kobj); 348 struct threshold_block *b = to_block(kobj);
293 struct threshold_attr *a = to_attr(attr); 349 struct threshold_attr *a = to_attr(attr);
294 ssize_t ret; 350 ssize_t ret;
295 ret = a->show ? a->show(b, buf) : -EIO; 351 ret = a->show ? a->show(b, buf) : -EIO;
@@ -299,7 +355,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
299static ssize_t store(struct kobject *kobj, struct attribute *attr, 355static ssize_t store(struct kobject *kobj, struct attribute *attr,
300 const char *buf, size_t count) 356 const char *buf, size_t count)
301{ 357{
302 struct threshold_bank *b = to_bank(kobj); 358 struct threshold_block *b = to_block(kobj);
303 struct threshold_attr *a = to_attr(attr); 359 struct threshold_attr *a = to_attr(attr);
304 ssize_t ret; 360 ssize_t ret;
305 ret = a->store ? a->store(b, buf, count) : -EIO; 361 ret = a->store ? a->store(b, buf, count) : -EIO;
@@ -316,69 +372,174 @@ static struct kobj_type threshold_ktype = {
316 .default_attrs = default_attrs, 372 .default_attrs = default_attrs,
317}; 373};
318 374
375static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
376 unsigned int bank,
377 unsigned int block,
378 u32 address)
379{
380 int err;
381 u32 low, high;
382 struct threshold_block *b = NULL;
383
384 if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
385 return 0;
386
387 if (rdmsr_safe(address, &low, &high))
388 goto recurse;
389
390 if (!(high & MASK_VALID_HI)) {
391 if (block)
392 goto recurse;
393 else
394 return 0;
395 }
396
397 if (!(high & MASK_VALID_HI >> 1) ||
398 (high & MASK_VALID_HI >> 2))
399 goto recurse;
400
401 b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
402 if (!b)
403 return -ENOMEM;
404 memset(b, 0, sizeof(struct threshold_block));
405
406 b->block = block;
407 b->bank = bank;
408 b->cpu = cpu;
409 b->address = address;
410 b->interrupt_enable = 0;
411 b->threshold_limit = THRESHOLD_MAX;
412
413 INIT_LIST_HEAD(&b->miscj);
414
415 if (per_cpu(threshold_banks, cpu)[bank]->blocks)
416 list_add(&b->miscj,
417 &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
418 else
419 per_cpu(threshold_banks, cpu)[bank]->blocks = b;
420
421 kobject_set_name(&b->kobj, "misc%i", block);
422 b->kobj.parent = &per_cpu(threshold_banks, cpu)[bank]->kobj;
423 b->kobj.ktype = &threshold_ktype;
424 err = kobject_register(&b->kobj);
425 if (err)
426 goto out_free;
427recurse:
428 if (!block) {
429 address = (low & MASK_BLKPTR_LO) >> 21;
430 if (!address)
431 return 0;
432 address += MCG_XBLK_ADDR;
433 } else
434 ++address;
435
436 err = allocate_threshold_blocks(cpu, bank, ++block, address);
437 if (err)
438 goto out_free;
439
440 return err;
441
442out_free:
443 if (b) {
444 kobject_unregister(&b->kobj);
445 kfree(b);
446 }
447 return err;
448}
449
319/* symlinks sibling shared banks to first core. first core owns dir/files. */ 450/* symlinks sibling shared banks to first core. first core owns dir/files. */
320static __cpuinit int threshold_create_bank(unsigned int cpu, int bank) 451static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
321{ 452{
322 int err = 0; 453 int i, err = 0;
323 struct threshold_bank *b = NULL; 454 struct threshold_bank *b = NULL;
455 cpumask_t oldmask = CPU_MASK_NONE;
456 char name[32];
457
458 sprintf(name, "threshold_bank%i", bank);
324 459
325#ifdef CONFIG_SMP 460#ifdef CONFIG_SMP
326 if (cpu_core_id[cpu] && shared_bank[bank]) { /* symlink */ 461 if (cpu_data[cpu].cpu_core_id && shared_bank[bank]) { /* symlink */
327 char name[16]; 462 i = first_cpu(cpu_core_map[cpu]);
328 unsigned lcpu = first_cpu(cpu_core_map[cpu]); 463
329 if (cpu_core_id[lcpu]) 464 /* first core not up yet */
330 goto out; /* first core not up yet */ 465 if (cpu_data[i].cpu_core_id)
466 goto out;
467
468 /* already linked */
469 if (per_cpu(threshold_banks, cpu)[bank])
470 goto out;
471
472 b = per_cpu(threshold_banks, i)[bank];
331 473
332 b = per_cpu(threshold_banks, lcpu)[bank];
333 if (!b) 474 if (!b)
334 goto out; 475 goto out;
335 sprintf(name, "bank%i", bank); 476
336 err = sysfs_create_link(&per_cpu(device_threshold, cpu).kobj, 477 err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj,
337 &b->kobj, name); 478 &b->kobj, name);
338 if (err) 479 if (err)
339 goto out; 480 goto out;
481
482 b->cpus = cpu_core_map[cpu];
340 per_cpu(threshold_banks, cpu)[bank] = b; 483 per_cpu(threshold_banks, cpu)[bank] = b;
341 goto out; 484 goto out;
342 } 485 }
343#endif 486#endif
344 487
345 b = kmalloc(sizeof(struct threshold_bank), GFP_KERNEL); 488 b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
346 if (!b) { 489 if (!b) {
347 err = -ENOMEM; 490 err = -ENOMEM;
348 goto out; 491 goto out;
349 } 492 }
350 memset(b, 0, sizeof(struct threshold_bank)); 493 memset(b, 0, sizeof(struct threshold_bank));
351 494
352 b->cpu = cpu; 495 kobject_set_name(&b->kobj, "threshold_bank%i", bank);
353 b->bank = bank; 496 b->kobj.parent = &per_cpu(device_mce, cpu).kobj;
354 b->interrupt_enable = 0; 497#ifndef CONFIG_SMP
355 b->threshold_limit = THRESHOLD_MAX; 498 b->cpus = CPU_MASK_ALL;
356 kobject_set_name(&b->kobj, "bank%i", bank); 499#else
357 b->kobj.parent = &per_cpu(device_threshold, cpu).kobj; 500 b->cpus = cpu_core_map[cpu];
358 b->kobj.ktype = &threshold_ktype; 501#endif
359
360 err = kobject_register(&b->kobj); 502 err = kobject_register(&b->kobj);
361 if (err) { 503 if (err)
362 kfree(b); 504 goto out_free;
363 goto out; 505
364 }
365 per_cpu(threshold_banks, cpu)[bank] = b; 506 per_cpu(threshold_banks, cpu)[bank] = b;
366 out: 507
508 oldmask = affinity_set(cpu);
509 err = allocate_threshold_blocks(cpu, bank, 0,
510 MSR_IA32_MC0_MISC + bank * 4);
511 affinity_restore(oldmask);
512
513 if (err)
514 goto out_free;
515
516 for_each_cpu_mask(i, b->cpus) {
517 if (i == cpu)
518 continue;
519
520 err = sysfs_create_link(&per_cpu(device_mce, i).kobj,
521 &b->kobj, name);
522 if (err)
523 goto out;
524
525 per_cpu(threshold_banks, i)[bank] = b;
526 }
527
528 goto out;
529
530out_free:
531 per_cpu(threshold_banks, cpu)[bank] = NULL;
532 kfree(b);
533out:
367 return err; 534 return err;
368} 535}
369 536
370/* create dir/files for all valid threshold banks */ 537/* create dir/files for all valid threshold banks */
371static __cpuinit int threshold_create_device(unsigned int cpu) 538static __cpuinit int threshold_create_device(unsigned int cpu)
372{ 539{
373 int bank; 540 unsigned int bank;
374 int err = 0; 541 int err = 0;
375 542
376 per_cpu(device_threshold, cpu).id = cpu;
377 per_cpu(device_threshold, cpu).cls = &threshold_sysclass;
378 err = sysdev_register(&per_cpu(device_threshold, cpu));
379 if (err)
380 goto out;
381
382 for (bank = 0; bank < NR_BANKS; ++bank) { 543 for (bank = 0; bank < NR_BANKS; ++bank) {
383 if (!(per_cpu(bank_map, cpu) & 1 << bank)) 544 if (!(per_cpu(bank_map, cpu) & 1 << bank))
384 continue; 545 continue;
@@ -386,7 +547,7 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
386 if (err) 547 if (err)
387 goto out; 548 goto out;
388 } 549 }
389 out: 550out:
390 return err; 551 return err;
391} 552}
392 553
@@ -397,92 +558,85 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
397 * of shared sysfs dir/files, and rest of the cores will be symlinked to it. 558 * of shared sysfs dir/files, and rest of the cores will be symlinked to it.
398 */ 559 */
399 560
400/* cpu hotplug call removes all symlinks before first core dies */ 561static __cpuinit void deallocate_threshold_block(unsigned int cpu,
562 unsigned int bank)
563{
564 struct threshold_block *pos = NULL;
565 struct threshold_block *tmp = NULL;
566 struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank];
567
568 if (!head)
569 return;
570
571 list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
572 kobject_unregister(&pos->kobj);
573 list_del(&pos->miscj);
574 kfree(pos);
575 }
576
577 kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
578 per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
579}
580
401static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank) 581static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank)
402{ 582{
583 int i = 0;
403 struct threshold_bank *b; 584 struct threshold_bank *b;
404 char name[16]; 585 char name[32];
405 586
406 b = per_cpu(threshold_banks, cpu)[bank]; 587 b = per_cpu(threshold_banks, cpu)[bank];
588
407 if (!b) 589 if (!b)
408 return; 590 return;
409 if (shared_bank[bank] && atomic_read(&b->kobj.kref.refcount) > 2) { 591
410 sprintf(name, "bank%i", bank); 592 if (!b->blocks)
411 sysfs_remove_link(&per_cpu(device_threshold, cpu).kobj, name); 593 goto free_out;
412 per_cpu(threshold_banks, cpu)[bank] = NULL; 594
413 } else { 595 sprintf(name, "threshold_bank%i", bank);
414 kobject_unregister(&b->kobj); 596
415 kfree(per_cpu(threshold_banks, cpu)[bank]); 597 /* sibling symlink */
598 if (shared_bank[bank] && b->blocks->cpu != cpu) {
599 sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name);
600 per_cpu(threshold_banks, i)[bank] = NULL;
601 return;
602 }
603
604 /* remove all sibling symlinks before unregistering */
605 for_each_cpu_mask(i, b->cpus) {
606 if (i == cpu)
607 continue;
608
609 sysfs_remove_link(&per_cpu(device_mce, i).kobj, name);
610 per_cpu(threshold_banks, i)[bank] = NULL;
416 } 611 }
612
613 deallocate_threshold_block(cpu, bank);
614
615free_out:
616 kobject_unregister(&b->kobj);
617 kfree(b);
618 per_cpu(threshold_banks, cpu)[bank] = NULL;
417} 619}
418 620
419static __cpuinit void threshold_remove_device(unsigned int cpu) 621static __cpuinit void threshold_remove_device(unsigned int cpu)
420{ 622{
421 int bank; 623 unsigned int bank;
422 624
423 for (bank = 0; bank < NR_BANKS; ++bank) { 625 for (bank = 0; bank < NR_BANKS; ++bank) {
424 if (!(per_cpu(bank_map, cpu) & 1 << bank)) 626 if (!(per_cpu(bank_map, cpu) & 1 << bank))
425 continue; 627 continue;
426 threshold_remove_bank(cpu, bank); 628 threshold_remove_bank(cpu, bank);
427 } 629 }
428 sysdev_unregister(&per_cpu(device_threshold, cpu));
429} 630}
430 631
431/* link all existing siblings when first core comes up */
432static __cpuinit int threshold_create_symlinks(unsigned int cpu)
433{
434 int bank, err = 0;
435 unsigned int lcpu = 0;
436
437 if (cpu_core_id[cpu])
438 return 0;
439 for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
440 if (lcpu == cpu)
441 continue;
442 for (bank = 0; bank < NR_BANKS; ++bank) {
443 if (!(per_cpu(bank_map, cpu) & 1 << bank))
444 continue;
445 if (!shared_bank[bank])
446 continue;
447 err = threshold_create_bank(lcpu, bank);
448 }
449 }
450 return err;
451}
452
453/* remove all symlinks before first core dies. */
454static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
455{
456 int bank;
457 unsigned int lcpu = 0;
458 if (cpu_core_id[cpu])
459 return;
460 for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
461 if (lcpu == cpu)
462 continue;
463 for (bank = 0; bank < NR_BANKS; ++bank) {
464 if (!(per_cpu(bank_map, cpu) & 1 << bank))
465 continue;
466 if (!shared_bank[bank])
467 continue;
468 threshold_remove_bank(lcpu, bank);
469 }
470 }
471}
472#else /* !CONFIG_HOTPLUG_CPU */ 632#else /* !CONFIG_HOTPLUG_CPU */
473static __cpuinit void threshold_create_symlinks(unsigned int cpu)
474{
475}
476static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
477{
478}
479static void threshold_remove_device(unsigned int cpu) 633static void threshold_remove_device(unsigned int cpu)
480{ 634{
481} 635}
482#endif 636#endif
483 637
484/* get notified when a cpu comes on/off */ 638/* get notified when a cpu comes on/off */
485static int threshold_cpu_callback(struct notifier_block *nfb, 639static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb,
486 unsigned long action, void *hcpu) 640 unsigned long action, void *hcpu)
487{ 641{
488 /* cpu was unsigned int to begin with */ 642 /* cpu was unsigned int to begin with */
@@ -494,13 +648,6 @@ static int threshold_cpu_callback(struct notifier_block *nfb,
494 switch (action) { 648 switch (action) {
495 case CPU_ONLINE: 649 case CPU_ONLINE:
496 threshold_create_device(cpu); 650 threshold_create_device(cpu);
497 threshold_create_symlinks(cpu);
498 break;
499 case CPU_DOWN_PREPARE:
500 threshold_remove_symlinks(cpu);
501 break;
502 case CPU_DOWN_FAILED:
503 threshold_create_symlinks(cpu);
504 break; 651 break;
505 case CPU_DEAD: 652 case CPU_DEAD:
506 threshold_remove_device(cpu); 653 threshold_remove_device(cpu);
@@ -512,29 +659,22 @@ static int threshold_cpu_callback(struct notifier_block *nfb,
512 return NOTIFY_OK; 659 return NOTIFY_OK;
513} 660}
514 661
515static struct notifier_block threshold_cpu_notifier = { 662static struct notifier_block threshold_cpu_notifier __cpuinitdata = {
516 .notifier_call = threshold_cpu_callback, 663 .notifier_call = threshold_cpu_callback,
517}; 664};
518 665
519static __init int threshold_init_device(void) 666static __init int threshold_init_device(void)
520{ 667{
521 int err; 668 unsigned lcpu = 0;
522 int lcpu = 0;
523
524 err = sysdev_class_register(&threshold_sysclass);
525 if (err)
526 goto out;
527 669
528 /* to hit CPUs online before the notifier is up */ 670 /* to hit CPUs online before the notifier is up */
529 for_each_online_cpu(lcpu) { 671 for_each_online_cpu(lcpu) {
530 err = threshold_create_device(lcpu); 672 int err = threshold_create_device(lcpu);
531 if (err) 673 if (err)
532 goto out; 674 return err;
533 } 675 }
534 register_cpu_notifier(&threshold_cpu_notifier); 676 register_cpu_notifier(&threshold_cpu_notifier);
535 677 return 0;
536 out:
537 return err;
538} 678}
539 679
540device_initcall(threshold_init_device); 680device_initcall(threshold_init_device);
diff --git a/arch/x86_64/kernel/module.c b/arch/x86_64/kernel/module.c
index bac195c74bcc..9d0958ff547f 100644
--- a/arch/x86_64/kernel/module.c
+++ b/arch/x86_64/kernel/module.c
@@ -145,26 +145,38 @@ int apply_relocate(Elf_Shdr *sechdrs,
145 return -ENOSYS; 145 return -ENOSYS;
146} 146}
147 147
148extern void apply_alternatives(void *start, void *end);
149
150int module_finalize(const Elf_Ehdr *hdr, 148int module_finalize(const Elf_Ehdr *hdr,
151 const Elf_Shdr *sechdrs, 149 const Elf_Shdr *sechdrs,
152 struct module *me) 150 struct module *me)
153{ 151{
154 const Elf_Shdr *s; 152 const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL;
155 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; 153 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
156 154
157 /* look for .altinstructions to patch */ 155 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
158 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { 156 if (!strcmp(".text", secstrings + s->sh_name))
159 void *seg; 157 text = s;
160 if (strcmp(".altinstructions", secstrings + s->sh_name)) 158 if (!strcmp(".altinstructions", secstrings + s->sh_name))
161 continue; 159 alt = s;
162 seg = (void *)s->sh_addr; 160 if (!strcmp(".smp_locks", secstrings + s->sh_name))
163 apply_alternatives(seg, seg + s->sh_size); 161 locks= s;
164 } 162 }
163
164 if (alt) {
165 /* patch .altinstructions */
166 void *aseg = (void *)alt->sh_addr;
167 apply_alternatives(aseg, aseg + alt->sh_size);
168 }
169 if (locks && text) {
170 void *lseg = (void *)locks->sh_addr;
171 void *tseg = (void *)text->sh_addr;
172 alternatives_smp_module_add(me, me->name,
173 lseg, lseg + locks->sh_size,
174 tseg, tseg + text->sh_size);
175 }
165 return 0; 176 return 0;
166} 177}
167 178
168void module_arch_cleanup(struct module *mod) 179void module_arch_cleanup(struct module *mod)
169{ 180{
181 alternatives_smp_module_del(mod);
170} 182}
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 4e6357fe0ec3..399489c93132 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -15,11 +15,7 @@
15#include <linux/config.h> 15#include <linux/config.h>
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/delay.h> 17#include <linux/delay.h>
18#include <linux/bootmem.h>
19#include <linux/smp_lock.h>
20#include <linux/interrupt.h> 18#include <linux/interrupt.h>
21#include <linux/mc146818rtc.h>
22#include <linux/kernel_stat.h>
23#include <linux/module.h> 19#include <linux/module.h>
24#include <linux/sysdev.h> 20#include <linux/sysdev.h>
25#include <linux/nmi.h> 21#include <linux/nmi.h>
@@ -27,14 +23,11 @@
27#include <linux/kprobes.h> 23#include <linux/kprobes.h>
28 24
29#include <asm/smp.h> 25#include <asm/smp.h>
30#include <asm/mtrr.h>
31#include <asm/mpspec.h>
32#include <asm/nmi.h> 26#include <asm/nmi.h>
33#include <asm/msr.h>
34#include <asm/proto.h> 27#include <asm/proto.h>
35#include <asm/kdebug.h> 28#include <asm/kdebug.h>
36#include <asm/local.h>
37#include <asm/mce.h> 29#include <asm/mce.h>
30#include <asm/intel_arch_perfmon.h>
38 31
39/* 32/*
40 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: 33 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
@@ -74,6 +67,9 @@ static unsigned int nmi_p4_cccr_val;
74#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 67#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
75#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 68#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
76 69
70#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
71#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
72
77#define MSR_P4_MISC_ENABLE 0x1A0 73#define MSR_P4_MISC_ENABLE 0x1A0
78#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) 74#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
79#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) 75#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
@@ -105,7 +101,10 @@ static __cpuinit inline int nmi_known_cpu(void)
105 case X86_VENDOR_AMD: 101 case X86_VENDOR_AMD:
106 return boot_cpu_data.x86 == 15; 102 return boot_cpu_data.x86 == 15;
107 case X86_VENDOR_INTEL: 103 case X86_VENDOR_INTEL:
108 return boot_cpu_data.x86 == 15; 104 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
105 return 1;
106 else
107 return (boot_cpu_data.x86 == 15);
109 } 108 }
110 return 0; 109 return 0;
111} 110}
@@ -211,6 +210,8 @@ int __init setup_nmi_watchdog(char *str)
211 210
212__setup("nmi_watchdog=", setup_nmi_watchdog); 211__setup("nmi_watchdog=", setup_nmi_watchdog);
213 212
213static void disable_intel_arch_watchdog(void);
214
214static void disable_lapic_nmi_watchdog(void) 215static void disable_lapic_nmi_watchdog(void)
215{ 216{
216 if (nmi_active <= 0) 217 if (nmi_active <= 0)
@@ -223,6 +224,8 @@ static void disable_lapic_nmi_watchdog(void)
223 if (boot_cpu_data.x86 == 15) { 224 if (boot_cpu_data.x86 == 15) {
224 wrmsr(MSR_P4_IQ_CCCR0, 0, 0); 225 wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
225 wrmsr(MSR_P4_CRU_ESCR0, 0, 0); 226 wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
227 } else if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
228 disable_intel_arch_watchdog();
226 } 229 }
227 break; 230 break;
228 } 231 }
@@ -375,6 +378,53 @@ static void setup_k7_watchdog(void)
375 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 378 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
376} 379}
377 380
381static void disable_intel_arch_watchdog(void)
382{
383 unsigned ebx;
384
385 /*
386 * Check whether the Architectural PerfMon supports
387 * Unhalted Core Cycles Event or not.
388 * NOTE: Corresponding bit = 0 in ebp indicates event present.
389 */
390 ebx = cpuid_ebx(10);
391 if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
392 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
393}
394
395static int setup_intel_arch_watchdog(void)
396{
397 unsigned int evntsel;
398 unsigned ebx;
399
400 /*
401 * Check whether the Architectural PerfMon supports
402 * Unhalted Core Cycles Event or not.
403 * NOTE: Corresponding bit = 0 in ebp indicates event present.
404 */
405 ebx = cpuid_ebx(10);
406 if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
407 return 0;
408
409 nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
410
411 clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2);
412 clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2);
413
414 evntsel = ARCH_PERFMON_EVENTSEL_INT
415 | ARCH_PERFMON_EVENTSEL_OS
416 | ARCH_PERFMON_EVENTSEL_USR
417 | ARCH_PERFMON_NMI_EVENT_SEL
418 | ARCH_PERFMON_NMI_EVENT_UMASK;
419
420 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
421 wrmsrl(MSR_ARCH_PERFMON_PERFCTR0, -((u64)cpu_khz * 1000 / nmi_hz));
422 apic_write(APIC_LVTPC, APIC_DM_NMI);
423 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
424 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
425 return 1;
426}
427
378 428
379static int setup_p4_watchdog(void) 429static int setup_p4_watchdog(void)
380{ 430{
@@ -428,10 +478,16 @@ void setup_apic_nmi_watchdog(void)
428 setup_k7_watchdog(); 478 setup_k7_watchdog();
429 break; 479 break;
430 case X86_VENDOR_INTEL: 480 case X86_VENDOR_INTEL:
431 if (boot_cpu_data.x86 != 15) 481 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
432 return; 482 if (!setup_intel_arch_watchdog())
433 if (!setup_p4_watchdog()) 483 return;
484 } else if (boot_cpu_data.x86 == 15) {
485 if (!setup_p4_watchdog())
486 return;
487 } else {
434 return; 488 return;
489 }
490
435 break; 491 break;
436 492
437 default: 493 default:
@@ -516,7 +572,14 @@ void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
516 */ 572 */
517 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); 573 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
518 apic_write(APIC_LVTPC, APIC_DM_NMI); 574 apic_write(APIC_LVTPC, APIC_DM_NMI);
519 } 575 } else if (nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
576 /*
577 * For Intel based architectural perfmon
578 * - LVTPC is masked on interrupt and must be
579 * unmasked by the LVTPC handler.
580 */
581 apic_write(APIC_LVTPC, APIC_DM_NMI);
582 }
520 wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); 583 wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
521 } 584 }
522} 585}
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
new file mode 100644
index 000000000000..d91cb843f54d
--- /dev/null
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -0,0 +1,1018 @@
1/*
2 * Derived from arch/powerpc/kernel/iommu.c
3 *
4 * Copyright (C) 2006 Jon Mason <jdmason@us.ibm.com>, IBM Corporation
5 * Copyright (C) 2006 Muli Ben-Yehuda <muli@il.ibm.com>, IBM Corporation
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include <linux/config.h>
23#include <linux/kernel.h>
24#include <linux/init.h>
25#include <linux/types.h>
26#include <linux/slab.h>
27#include <linux/mm.h>
28#include <linux/spinlock.h>
29#include <linux/string.h>
30#include <linux/dma-mapping.h>
31#include <linux/init.h>
32#include <linux/bitops.h>
33#include <linux/pci_ids.h>
34#include <linux/pci.h>
35#include <linux/delay.h>
36#include <asm/proto.h>
37#include <asm/calgary.h>
38#include <asm/tce.h>
39#include <asm/pci-direct.h>
40#include <asm/system.h>
41#include <asm/dma.h>
42
43#define PCI_DEVICE_ID_IBM_CALGARY 0x02a1
44#define PCI_VENDOR_DEVICE_ID_CALGARY \
45 (PCI_VENDOR_ID_IBM | PCI_DEVICE_ID_IBM_CALGARY << 16)
46
47/* we need these for register space address calculation */
48#define START_ADDRESS 0xfe000000
49#define CHASSIS_BASE 0
50#define ONE_BASED_CHASSIS_NUM 1
51
52/* register offsets inside the host bridge space */
53#define PHB_CSR_OFFSET 0x0110
54#define PHB_PLSSR_OFFSET 0x0120
55#define PHB_CONFIG_RW_OFFSET 0x0160
56#define PHB_IOBASE_BAR_LOW 0x0170
57#define PHB_IOBASE_BAR_HIGH 0x0180
58#define PHB_MEM_1_LOW 0x0190
59#define PHB_MEM_1_HIGH 0x01A0
60#define PHB_IO_ADDR_SIZE 0x01B0
61#define PHB_MEM_1_SIZE 0x01C0
62#define PHB_MEM_ST_OFFSET 0x01D0
63#define PHB_AER_OFFSET 0x0200
64#define PHB_CONFIG_0_HIGH 0x0220
65#define PHB_CONFIG_0_LOW 0x0230
66#define PHB_CONFIG_0_END 0x0240
67#define PHB_MEM_2_LOW 0x02B0
68#define PHB_MEM_2_HIGH 0x02C0
69#define PHB_MEM_2_SIZE_HIGH 0x02D0
70#define PHB_MEM_2_SIZE_LOW 0x02E0
71#define PHB_DOSHOLE_OFFSET 0x08E0
72
73/* PHB_CONFIG_RW */
74#define PHB_TCE_ENABLE 0x20000000
75#define PHB_SLOT_DISABLE 0x1C000000
76#define PHB_DAC_DISABLE 0x01000000
77#define PHB_MEM2_ENABLE 0x00400000
78#define PHB_MCSR_ENABLE 0x00100000
79/* TAR (Table Address Register) */
80#define TAR_SW_BITS 0x0000ffffffff800fUL
81#define TAR_VALID 0x0000000000000008UL
82/* CSR (Channel/DMA Status Register) */
83#define CSR_AGENT_MASK 0xffe0ffff
84
85#define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */
86#define MAX_PHB_BUS_NUM (MAX_NUM_OF_PHBS * 2) /* max dev->bus->number */
87#define PHBS_PER_CALGARY 4
88
89/* register offsets in Calgary's internal register space */
90static const unsigned long tar_offsets[] = {
91 0x0580 /* TAR0 */,
92 0x0588 /* TAR1 */,
93 0x0590 /* TAR2 */,
94 0x0598 /* TAR3 */
95};
96
97static const unsigned long split_queue_offsets[] = {
98 0x4870 /* SPLIT QUEUE 0 */,
99 0x5870 /* SPLIT QUEUE 1 */,
100 0x6870 /* SPLIT QUEUE 2 */,
101 0x7870 /* SPLIT QUEUE 3 */
102};
103
104static const unsigned long phb_offsets[] = {
105 0x8000 /* PHB0 */,
106 0x9000 /* PHB1 */,
107 0xA000 /* PHB2 */,
108 0xB000 /* PHB3 */
109};
110
111void* tce_table_kva[MAX_NUM_OF_PHBS * MAX_NUMNODES];
112unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;
113static int translate_empty_slots __read_mostly = 0;
114static int calgary_detected __read_mostly = 0;
115
116/*
117 * the bitmap of PHBs the user requested that we disable
118 * translation on.
119 */
120static DECLARE_BITMAP(translation_disabled, MAX_NUMNODES * MAX_PHB_BUS_NUM);
121
122static void tce_cache_blast(struct iommu_table *tbl);
123
124/* enable this to stress test the chip's TCE cache */
125#ifdef CONFIG_IOMMU_DEBUG
126static inline void tce_cache_blast_stress(struct iommu_table *tbl)
127{
128 tce_cache_blast(tbl);
129}
130#else
131static inline void tce_cache_blast_stress(struct iommu_table *tbl)
132{
133}
134#endif /* BLAST_TCE_CACHE_ON_UNMAP */
135
136static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen)
137{
138 unsigned int npages;
139
140 npages = PAGE_ALIGN(dma + dmalen) - (dma & PAGE_MASK);
141 npages >>= PAGE_SHIFT;
142
143 return npages;
144}
145
146static inline int translate_phb(struct pci_dev* dev)
147{
148 int disabled = test_bit(dev->bus->number, translation_disabled);
149 return !disabled;
150}
151
152static void iommu_range_reserve(struct iommu_table *tbl,
153 unsigned long start_addr, unsigned int npages)
154{
155 unsigned long index;
156 unsigned long end;
157
158 index = start_addr >> PAGE_SHIFT;
159
160 /* bail out if we're asked to reserve a region we don't cover */
161 if (index >= tbl->it_size)
162 return;
163
164 end = index + npages;
165 if (end > tbl->it_size) /* don't go off the table */
166 end = tbl->it_size;
167
168 while (index < end) {
169 if (test_bit(index, tbl->it_map))
170 printk(KERN_ERR "Calgary: entry already allocated at "
171 "0x%lx tbl %p dma 0x%lx npages %u\n",
172 index, tbl, start_addr, npages);
173 ++index;
174 }
175 set_bit_string(tbl->it_map, start_addr >> PAGE_SHIFT, npages);
176}
177
178static unsigned long iommu_range_alloc(struct iommu_table *tbl,
179 unsigned int npages)
180{
181 unsigned long offset;
182
183 BUG_ON(npages == 0);
184
185 offset = find_next_zero_string(tbl->it_map, tbl->it_hint,
186 tbl->it_size, npages);
187 if (offset == ~0UL) {
188 tce_cache_blast(tbl);
189 offset = find_next_zero_string(tbl->it_map, 0,
190 tbl->it_size, npages);
191 if (offset == ~0UL) {
192 printk(KERN_WARNING "Calgary: IOMMU full.\n");
193 if (panic_on_overflow)
194 panic("Calgary: fix the allocator.\n");
195 else
196 return bad_dma_address;
197 }
198 }
199
200 set_bit_string(tbl->it_map, offset, npages);
201 tbl->it_hint = offset + npages;
202 BUG_ON(tbl->it_hint > tbl->it_size);
203
204 return offset;
205}
206
207static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *vaddr,
208 unsigned int npages, int direction)
209{
210 unsigned long entry, flags;
211 dma_addr_t ret = bad_dma_address;
212
213 spin_lock_irqsave(&tbl->it_lock, flags);
214
215 entry = iommu_range_alloc(tbl, npages);
216
217 if (unlikely(entry == bad_dma_address))
218 goto error;
219
220 /* set the return dma address */
221 ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK);
222
223 /* put the TCEs in the HW table */
224 tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK,
225 direction);
226
227 spin_unlock_irqrestore(&tbl->it_lock, flags);
228
229 return ret;
230
231error:
232 spin_unlock_irqrestore(&tbl->it_lock, flags);
233 printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
234 "iommu %p\n", npages, tbl);
235 return bad_dma_address;
236}
237
238static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
239 unsigned int npages)
240{
241 unsigned long entry;
242 unsigned long i;
243
244 entry = dma_addr >> PAGE_SHIFT;
245
246 BUG_ON(entry + npages > tbl->it_size);
247
248 tce_free(tbl, entry, npages);
249
250 for (i = 0; i < npages; ++i) {
251 if (!test_bit(entry + i, tbl->it_map))
252 printk(KERN_ERR "Calgary: bit is off at 0x%lx "
253 "tbl %p dma 0x%Lx entry 0x%lx npages %u\n",
254 entry + i, tbl, dma_addr, entry, npages);
255 }
256
257 __clear_bit_string(tbl->it_map, entry, npages);
258
259 tce_cache_blast_stress(tbl);
260}
261
262static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
263 unsigned int npages)
264{
265 unsigned long flags;
266
267 spin_lock_irqsave(&tbl->it_lock, flags);
268
269 __iommu_free(tbl, dma_addr, npages);
270
271 spin_unlock_irqrestore(&tbl->it_lock, flags);
272}
273
274static void __calgary_unmap_sg(struct iommu_table *tbl,
275 struct scatterlist *sglist, int nelems, int direction)
276{
277 while (nelems--) {
278 unsigned int npages;
279 dma_addr_t dma = sglist->dma_address;
280 unsigned int dmalen = sglist->dma_length;
281
282 if (dmalen == 0)
283 break;
284
285 npages = num_dma_pages(dma, dmalen);
286 __iommu_free(tbl, dma, npages);
287 sglist++;
288 }
289}
290
291void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist,
292 int nelems, int direction)
293{
294 unsigned long flags;
295 struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
296
297 if (!translate_phb(to_pci_dev(dev)))
298 return;
299
300 spin_lock_irqsave(&tbl->it_lock, flags);
301
302 __calgary_unmap_sg(tbl, sglist, nelems, direction);
303
304 spin_unlock_irqrestore(&tbl->it_lock, flags);
305}
306
307static int calgary_nontranslate_map_sg(struct device* dev,
308 struct scatterlist *sg, int nelems, int direction)
309{
310 int i;
311
312 for (i = 0; i < nelems; i++ ) {
313 struct scatterlist *s = &sg[i];
314 BUG_ON(!s->page);
315 s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
316 s->dma_length = s->length;
317 }
318 return nelems;
319}
320
321int calgary_map_sg(struct device *dev, struct scatterlist *sg,
322 int nelems, int direction)
323{
324 struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
325 unsigned long flags;
326 unsigned long vaddr;
327 unsigned int npages;
328 unsigned long entry;
329 int i;
330
331 if (!translate_phb(to_pci_dev(dev)))
332 return calgary_nontranslate_map_sg(dev, sg, nelems, direction);
333
334 spin_lock_irqsave(&tbl->it_lock, flags);
335
336 for (i = 0; i < nelems; i++ ) {
337 struct scatterlist *s = &sg[i];
338 BUG_ON(!s->page);
339
340 vaddr = (unsigned long)page_address(s->page) + s->offset;
341 npages = num_dma_pages(vaddr, s->length);
342
343 entry = iommu_range_alloc(tbl, npages);
344 if (entry == bad_dma_address) {
345 /* makes sure unmap knows to stop */
346 s->dma_length = 0;
347 goto error;
348 }
349
350 s->dma_address = (entry << PAGE_SHIFT) | s->offset;
351
352 /* insert into HW table */
353 tce_build(tbl, entry, npages, vaddr & PAGE_MASK,
354 direction);
355
356 s->dma_length = s->length;
357 }
358
359 spin_unlock_irqrestore(&tbl->it_lock, flags);
360
361 return nelems;
362error:
363 __calgary_unmap_sg(tbl, sg, nelems, direction);
364 for (i = 0; i < nelems; i++) {
365 sg[i].dma_address = bad_dma_address;
366 sg[i].dma_length = 0;
367 }
368 spin_unlock_irqrestore(&tbl->it_lock, flags);
369 return 0;
370}
371
372dma_addr_t calgary_map_single(struct device *dev, void *vaddr,
373 size_t size, int direction)
374{
375 dma_addr_t dma_handle = bad_dma_address;
376 unsigned long uaddr;
377 unsigned int npages;
378 struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
379
380 uaddr = (unsigned long)vaddr;
381 npages = num_dma_pages(uaddr, size);
382
383 if (translate_phb(to_pci_dev(dev)))
384 dma_handle = iommu_alloc(tbl, vaddr, npages, direction);
385 else
386 dma_handle = virt_to_bus(vaddr);
387
388 return dma_handle;
389}
390
391void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
392 size_t size, int direction)
393{
394 struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
395 unsigned int npages;
396
397 if (!translate_phb(to_pci_dev(dev)))
398 return;
399
400 npages = num_dma_pages(dma_handle, size);
401 iommu_free(tbl, dma_handle, npages);
402}
403
404void* calgary_alloc_coherent(struct device *dev, size_t size,
405 dma_addr_t *dma_handle, gfp_t flag)
406{
407 void *ret = NULL;
408 dma_addr_t mapping;
409 unsigned int npages, order;
410 struct iommu_table *tbl;
411
412 tbl = to_pci_dev(dev)->bus->self->sysdata;
413
414 size = PAGE_ALIGN(size); /* size rounded up to full pages */
415 npages = size >> PAGE_SHIFT;
416 order = get_order(size);
417
418 /* alloc enough pages (and possibly more) */
419 ret = (void *)__get_free_pages(flag, order);
420 if (!ret)
421 goto error;
422 memset(ret, 0, size);
423
424 if (translate_phb(to_pci_dev(dev))) {
425 /* set up tces to cover the allocated range */
426 mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL);
427 if (mapping == bad_dma_address)
428 goto free;
429
430 *dma_handle = mapping;
431 } else /* non translated slot */
432 *dma_handle = virt_to_bus(ret);
433
434 return ret;
435
436free:
437 free_pages((unsigned long)ret, get_order(size));
438 ret = NULL;
439error:
440 return ret;
441}
442
443static struct dma_mapping_ops calgary_dma_ops = {
444 .alloc_coherent = calgary_alloc_coherent,
445 .map_single = calgary_map_single,
446 .unmap_single = calgary_unmap_single,
447 .map_sg = calgary_map_sg,
448 .unmap_sg = calgary_unmap_sg,
449};
450
451static inline int busno_to_phbid(unsigned char num)
452{
453 return bus_to_phb(num) % PHBS_PER_CALGARY;
454}
455
456static inline unsigned long split_queue_offset(unsigned char num)
457{
458 size_t idx = busno_to_phbid(num);
459
460 return split_queue_offsets[idx];
461}
462
463static inline unsigned long tar_offset(unsigned char num)
464{
465 size_t idx = busno_to_phbid(num);
466
467 return tar_offsets[idx];
468}
469
470static inline unsigned long phb_offset(unsigned char num)
471{
472 size_t idx = busno_to_phbid(num);
473
474 return phb_offsets[idx];
475}
476
477static inline void __iomem* calgary_reg(void __iomem *bar, unsigned long offset)
478{
479 unsigned long target = ((unsigned long)bar) | offset;
480 return (void __iomem*)target;
481}
482
483static void tce_cache_blast(struct iommu_table *tbl)
484{
485 u64 val;
486 u32 aer;
487 int i = 0;
488 void __iomem *bbar = tbl->bbar;
489 void __iomem *target;
490
491 /* disable arbitration on the bus */
492 target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET);
493 aer = readl(target);
494 writel(0, target);
495
496 /* read plssr to ensure it got there */
497 target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_PLSSR_OFFSET);
498 val = readl(target);
499
500 /* poll split queues until all DMA activity is done */
501 target = calgary_reg(bbar, split_queue_offset(tbl->it_busno));
502 do {
503 val = readq(target);
504 i++;
505 } while ((val & 0xff) != 0xff && i < 100);
506 if (i == 100)
507 printk(KERN_WARNING "Calgary: PCI bus not quiesced, "
508 "continuing anyway\n");
509
510 /* invalidate TCE cache */
511 target = calgary_reg(bbar, tar_offset(tbl->it_busno));
512 writeq(tbl->tar_val, target);
513
514 /* enable arbitration */
515 target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET);
516 writel(aer, target);
517 (void)readl(target); /* flush */
518}
519
520static void __init calgary_reserve_mem_region(struct pci_dev *dev, u64 start,
521 u64 limit)
522{
523 unsigned int numpages;
524
525 limit = limit | 0xfffff;
526 limit++;
527
528 numpages = ((limit - start) >> PAGE_SHIFT);
529 iommu_range_reserve(dev->sysdata, start, numpages);
530}
531
532static void __init calgary_reserve_peripheral_mem_1(struct pci_dev *dev)
533{
534 void __iomem *target;
535 u64 low, high, sizelow;
536 u64 start, limit;
537 struct iommu_table *tbl = dev->sysdata;
538 unsigned char busnum = dev->bus->number;
539 void __iomem *bbar = tbl->bbar;
540
541 /* peripheral MEM_1 region */
542 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_LOW);
543 low = be32_to_cpu(readl(target));
544 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_HIGH);
545 high = be32_to_cpu(readl(target));
546 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_SIZE);
547 sizelow = be32_to_cpu(readl(target));
548
549 start = (high << 32) | low;
550 limit = sizelow;
551
552 calgary_reserve_mem_region(dev, start, limit);
553}
554
555static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev)
556{
557 void __iomem *target;
558 u32 val32;
559 u64 low, high, sizelow, sizehigh;
560 u64 start, limit;
561 struct iommu_table *tbl = dev->sysdata;
562 unsigned char busnum = dev->bus->number;
563 void __iomem *bbar = tbl->bbar;
564
565 /* is it enabled? */
566 target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
567 val32 = be32_to_cpu(readl(target));
568 if (!(val32 & PHB_MEM2_ENABLE))
569 return;
570
571 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_LOW);
572 low = be32_to_cpu(readl(target));
573 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_HIGH);
574 high = be32_to_cpu(readl(target));
575 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_LOW);
576 sizelow = be32_to_cpu(readl(target));
577 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_HIGH);
578 sizehigh = be32_to_cpu(readl(target));
579
580 start = (high << 32) | low;
581 limit = (sizehigh << 32) | sizelow;
582
583 calgary_reserve_mem_region(dev, start, limit);
584}
585
586/*
587 * some regions of the IO address space do not get translated, so we
588 * must not give devices IO addresses in those regions. The regions
589 * are the 640KB-1MB region and the two PCI peripheral memory holes.
590 * Reserve all of them in the IOMMU bitmap to avoid giving them out
591 * later.
592 */
593static void __init calgary_reserve_regions(struct pci_dev *dev)
594{
595 unsigned int npages;
596 void __iomem *bbar;
597 unsigned char busnum;
598 u64 start;
599 struct iommu_table *tbl = dev->sysdata;
600
601 bbar = tbl->bbar;
602 busnum = dev->bus->number;
603
604 /* reserve bad_dma_address in case it's a legal address */
605 iommu_range_reserve(tbl, bad_dma_address, 1);
606
607 /* avoid the BIOS/VGA first 640KB-1MB region */
608 start = (640 * 1024);
609 npages = ((1024 - 640) * 1024) >> PAGE_SHIFT;
610 iommu_range_reserve(tbl, start, npages);
611
612 /* reserve the two PCI peripheral memory regions in IO space */
613 calgary_reserve_peripheral_mem_1(dev);
614 calgary_reserve_peripheral_mem_2(dev);
615}
616
617static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
618{
619 u64 val64;
620 u64 table_phys;
621 void __iomem *target;
622 int ret;
623 struct iommu_table *tbl;
624
625 /* build TCE tables for each PHB */
626 ret = build_tce_table(dev, bbar);
627 if (ret)
628 return ret;
629
630 calgary_reserve_regions(dev);
631
632 /* set TARs for each PHB */
633 target = calgary_reg(bbar, tar_offset(dev->bus->number));
634 val64 = be64_to_cpu(readq(target));
635
636 /* zero out all TAR bits under sw control */
637 val64 &= ~TAR_SW_BITS;
638
639 tbl = dev->sysdata;
640 table_phys = (u64)__pa(tbl->it_base);
641 val64 |= table_phys;
642
643 BUG_ON(specified_table_size > TCE_TABLE_SIZE_8M);
644 val64 |= (u64) specified_table_size;
645
646 tbl->tar_val = cpu_to_be64(val64);
647 writeq(tbl->tar_val, target);
648 readq(target); /* flush */
649
650 return 0;
651}
652
653static void __init calgary_free_tar(struct pci_dev *dev)
654{
655 u64 val64;
656 struct iommu_table *tbl = dev->sysdata;
657 void __iomem *target;
658
659 target = calgary_reg(tbl->bbar, tar_offset(dev->bus->number));
660 val64 = be64_to_cpu(readq(target));
661 val64 &= ~TAR_SW_BITS;
662 writeq(cpu_to_be64(val64), target);
663 readq(target); /* flush */
664
665 kfree(tbl);
666 dev->sysdata = NULL;
667}
668
669static void calgary_watchdog(unsigned long data)
670{
671 struct pci_dev *dev = (struct pci_dev *)data;
672 struct iommu_table *tbl = dev->sysdata;
673 void __iomem *bbar = tbl->bbar;
674 u32 val32;
675 void __iomem *target;
676
677 target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_CSR_OFFSET);
678 val32 = be32_to_cpu(readl(target));
679
680 /* If no error, the agent ID in the CSR is not valid */
681 if (val32 & CSR_AGENT_MASK) {
682 printk(KERN_EMERG "calgary_watchdog: DMA error on bus %d, "
683 "CSR = %#x\n", dev->bus->number, val32);
684 writel(0, target);
685
686 /* Disable bus that caused the error */
687 target = calgary_reg(bbar, phb_offset(tbl->it_busno) |
688 PHB_CONFIG_RW_OFFSET);
689 val32 = be32_to_cpu(readl(target));
690 val32 |= PHB_SLOT_DISABLE;
691 writel(cpu_to_be32(val32), target);
692 readl(target); /* flush */
693 } else {
694 /* Reset the timer */
695 mod_timer(&tbl->watchdog_timer, jiffies + 2 * HZ);
696 }
697}
698
699static void __init calgary_enable_translation(struct pci_dev *dev)
700{
701 u32 val32;
702 unsigned char busnum;
703 void __iomem *target;
704 void __iomem *bbar;
705 struct iommu_table *tbl;
706
707 busnum = dev->bus->number;
708 tbl = dev->sysdata;
709 bbar = tbl->bbar;
710
711 /* enable TCE in PHB Config Register */
712 target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
713 val32 = be32_to_cpu(readl(target));
714 val32 |= PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE;
715
716 printk(KERN_INFO "Calgary: enabling translation on PHB %d\n", busnum);
717 printk(KERN_INFO "Calgary: errant DMAs will now be prevented on this "
718 "bus.\n");
719
720 writel(cpu_to_be32(val32), target);
721 readl(target); /* flush */
722
723 init_timer(&tbl->watchdog_timer);
724 tbl->watchdog_timer.function = &calgary_watchdog;
725 tbl->watchdog_timer.data = (unsigned long)dev;
726 mod_timer(&tbl->watchdog_timer, jiffies);
727}
728
729static void __init calgary_disable_translation(struct pci_dev *dev)
730{
731 u32 val32;
732 unsigned char busnum;
733 void __iomem *target;
734 void __iomem *bbar;
735 struct iommu_table *tbl;
736
737 busnum = dev->bus->number;
738 tbl = dev->sysdata;
739 bbar = tbl->bbar;
740
741 /* disable TCE in PHB Config Register */
742 target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
743 val32 = be32_to_cpu(readl(target));
744 val32 &= ~(PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE);
745
746 printk(KERN_INFO "Calgary: disabling translation on PHB %d!\n", busnum);
747 writel(cpu_to_be32(val32), target);
748 readl(target); /* flush */
749
750 del_timer_sync(&tbl->watchdog_timer);
751}
752
753static inline unsigned int __init locate_register_space(struct pci_dev *dev)
754{
755 int rionodeid;
756 u32 address;
757
758 rionodeid = (dev->bus->number % 15 > 4) ? 3 : 2;
759 /*
760 * register space address calculation as follows:
761 * FE0MB-8MB*OneBasedChassisNumber+1MB*(RioNodeId-ChassisBase)
762 * ChassisBase is always zero for x366/x260/x460
763 * RioNodeId is 2 for first Calgary, 3 for second Calgary
764 */
765 address = START_ADDRESS -
766 (0x800000 * (ONE_BASED_CHASSIS_NUM + dev->bus->number / 15)) +
767 (0x100000) * (rionodeid - CHASSIS_BASE);
768 return address;
769}
770
771static int __init calgary_init_one_nontraslated(struct pci_dev *dev)
772{
773 dev->sysdata = NULL;
774 dev->bus->self = dev;
775
776 return 0;
777}
778
779static int __init calgary_init_one(struct pci_dev *dev)
780{
781 u32 address;
782 void __iomem *bbar;
783 int ret;
784
785 address = locate_register_space(dev);
786 /* map entire 1MB of Calgary config space */
787 bbar = ioremap_nocache(address, 1024 * 1024);
788 if (!bbar) {
789 ret = -ENODATA;
790 goto done;
791 }
792
793 ret = calgary_setup_tar(dev, bbar);
794 if (ret)
795 goto iounmap;
796
797 dev->bus->self = dev;
798 calgary_enable_translation(dev);
799
800 return 0;
801
802iounmap:
803 iounmap(bbar);
804done:
805 return ret;
806}
807
808static int __init calgary_init(void)
809{
810 int i, ret = -ENODEV;
811 struct pci_dev *dev = NULL;
812
813 for (i = 0; i <= num_online_nodes() * MAX_NUM_OF_PHBS; i++) {
814 dev = pci_get_device(PCI_VENDOR_ID_IBM,
815 PCI_DEVICE_ID_IBM_CALGARY,
816 dev);
817 if (!dev)
818 break;
819 if (!translate_phb(dev)) {
820 calgary_init_one_nontraslated(dev);
821 continue;
822 }
823 if (!tce_table_kva[i] && !translate_empty_slots) {
824 pci_dev_put(dev);
825 continue;
826 }
827 ret = calgary_init_one(dev);
828 if (ret)
829 goto error;
830 }
831
832 return ret;
833
834error:
835 for (i--; i >= 0; i--) {
836 dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM,
837 PCI_DEVICE_ID_IBM_CALGARY,
838 dev);
839 if (!translate_phb(dev)) {
840 pci_dev_put(dev);
841 continue;
842 }
843 if (!tce_table_kva[i] && !translate_empty_slots)
844 continue;
845 calgary_disable_translation(dev);
846 calgary_free_tar(dev);
847 pci_dev_put(dev);
848 }
849
850 return ret;
851}
852
853static inline int __init determine_tce_table_size(u64 ram)
854{
855 int ret;
856
857 if (specified_table_size != TCE_TABLE_SIZE_UNSPECIFIED)
858 return specified_table_size;
859
860 /*
861 * Table sizes are from 0 to 7 (TCE_TABLE_SIZE_64K to
862 * TCE_TABLE_SIZE_8M). Table size 0 has 8K entries and each
863 * larger table size has twice as many entries, so shift the
864 * max ram address by 13 to divide by 8K and then look at the
865 * order of the result to choose between 0-7.
866 */
867 ret = get_order(ram >> 13);
868 if (ret > TCE_TABLE_SIZE_8M)
869 ret = TCE_TABLE_SIZE_8M;
870
871 return ret;
872}
873
874void __init detect_calgary(void)
875{
876 u32 val;
877 int bus, table_idx;
878 void *tbl;
879 int detected = 0;
880
881 /*
882 * if the user specified iommu=off or iommu=soft or we found
883 * another HW IOMMU already, bail out.
884 */
885 if (swiotlb || no_iommu || iommu_detected)
886 return;
887
888 specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
889
890 for (bus = 0, table_idx = 0;
891 bus <= num_online_nodes() * MAX_PHB_BUS_NUM;
892 bus++) {
893 BUG_ON(bus > MAX_NUMNODES * MAX_PHB_BUS_NUM);
894 if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY)
895 continue;
896 if (test_bit(bus, translation_disabled)) {
897 printk(KERN_INFO "Calgary: translation is disabled for "
898 "PHB 0x%x\n", bus);
899 /* skip this phb, don't allocate a tbl for it */
900 tce_table_kva[table_idx] = NULL;
901 table_idx++;
902 continue;
903 }
904 /*
905 * scan the first slot of the PCI bus to see if there
906 * are any devices present
907 */
908 val = read_pci_config(bus, 1, 0, 0);
909 if (val != 0xffffffff || translate_empty_slots) {
910 tbl = alloc_tce_table();
911 if (!tbl)
912 goto cleanup;
913 detected = 1;
914 } else
915 tbl = NULL;
916
917 tce_table_kva[table_idx] = tbl;
918 table_idx++;
919 }
920
921 if (detected) {
922 iommu_detected = 1;
923 calgary_detected = 1;
924 printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected. "
925 "TCE table spec is %d.\n", specified_table_size);
926 }
927 return;
928
929cleanup:
930 for (--table_idx; table_idx >= 0; --table_idx)
931 if (tce_table_kva[table_idx])
932 free_tce_table(tce_table_kva[table_idx]);
933}
934
935int __init calgary_iommu_init(void)
936{
937 int ret;
938
939 if (no_iommu || swiotlb)
940 return -ENODEV;
941
942 if (!calgary_detected)
943 return -ENODEV;
944
945 /* ok, we're trying to use Calgary - let's roll */
946 printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n");
947
948 ret = calgary_init();
949 if (ret) {
950 printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
951 "falling back to no_iommu\n", ret);
952 if (end_pfn > MAX_DMA32_PFN)
953 printk(KERN_ERR "WARNING more than 4GB of memory, "
954 "32bit PCI may malfunction.\n");
955 return ret;
956 }
957
958 force_iommu = 1;
959 dma_ops = &calgary_dma_ops;
960
961 return 0;
962}
963
964static int __init calgary_parse_options(char *p)
965{
966 unsigned int bridge;
967 size_t len;
968 char* endp;
969
970 while (*p) {
971 if (!strncmp(p, "64k", 3))
972 specified_table_size = TCE_TABLE_SIZE_64K;
973 else if (!strncmp(p, "128k", 4))
974 specified_table_size = TCE_TABLE_SIZE_128K;
975 else if (!strncmp(p, "256k", 4))
976 specified_table_size = TCE_TABLE_SIZE_256K;
977 else if (!strncmp(p, "512k", 4))
978 specified_table_size = TCE_TABLE_SIZE_512K;
979 else if (!strncmp(p, "1M", 2))
980 specified_table_size = TCE_TABLE_SIZE_1M;
981 else if (!strncmp(p, "2M", 2))
982 specified_table_size = TCE_TABLE_SIZE_2M;
983 else if (!strncmp(p, "4M", 2))
984 specified_table_size = TCE_TABLE_SIZE_4M;
985 else if (!strncmp(p, "8M", 2))
986 specified_table_size = TCE_TABLE_SIZE_8M;
987
988 len = strlen("translate_empty_slots");
989 if (!strncmp(p, "translate_empty_slots", len))
990 translate_empty_slots = 1;
991
992 len = strlen("disable");
993 if (!strncmp(p, "disable", len)) {
994 p += len;
995 if (*p == '=')
996 ++p;
997 if (*p == '\0')
998 break;
999 bridge = simple_strtol(p, &endp, 0);
1000 if (p == endp)
1001 break;
1002
1003 if (bridge <= (num_online_nodes() * MAX_PHB_BUS_NUM)) {
1004 printk(KERN_INFO "Calgary: disabling "
1005 "translation for PHB 0x%x\n", bridge);
1006 set_bit(bridge, translation_disabled);
1007 }
1008 }
1009
1010 p = strpbrk(p, ",");
1011 if (!p)
1012 break;
1013
1014 p++; /* skip ',' */
1015 }
1016 return 1;
1017}
1018__setup("calgary=", calgary_parse_options);
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
index a9275c9557cf..9c44f4f2433d 100644
--- a/arch/x86_64/kernel/pci-dma.c
+++ b/arch/x86_64/kernel/pci-dma.c
@@ -9,6 +9,7 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <asm/io.h> 10#include <asm/io.h>
11#include <asm/proto.h> 11#include <asm/proto.h>
12#include <asm/calgary.h>
12 13
13int iommu_merge __read_mostly = 0; 14int iommu_merge __read_mostly = 0;
14EXPORT_SYMBOL(iommu_merge); 15EXPORT_SYMBOL(iommu_merge);
@@ -33,12 +34,15 @@ int panic_on_overflow __read_mostly = 0;
33int force_iommu __read_mostly= 0; 34int force_iommu __read_mostly= 0;
34#endif 35#endif
35 36
37/* Set this to 1 if there is a HW IOMMU in the system */
38int iommu_detected __read_mostly = 0;
39
36/* Dummy device used for NULL arguments (normally ISA). Better would 40/* Dummy device used for NULL arguments (normally ISA). Better would
37 be probably a smaller DMA mask, but this is bug-to-bug compatible 41 be probably a smaller DMA mask, but this is bug-to-bug compatible
38 to i386. */ 42 to i386. */
39struct device fallback_dev = { 43struct device fallback_dev = {
40 .bus_id = "fallback device", 44 .bus_id = "fallback device",
41 .coherent_dma_mask = 0xffffffff, 45 .coherent_dma_mask = DMA_32BIT_MASK,
42 .dma_mask = &fallback_dev.coherent_dma_mask, 46 .dma_mask = &fallback_dev.coherent_dma_mask,
43}; 47};
44 48
@@ -77,7 +81,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
77 dev = &fallback_dev; 81 dev = &fallback_dev;
78 dma_mask = dev->coherent_dma_mask; 82 dma_mask = dev->coherent_dma_mask;
79 if (dma_mask == 0) 83 if (dma_mask == 0)
80 dma_mask = 0xffffffff; 84 dma_mask = DMA_32BIT_MASK;
81 85
82 /* Don't invoke OOM killer */ 86 /* Don't invoke OOM killer */
83 gfp |= __GFP_NORETRY; 87 gfp |= __GFP_NORETRY;
@@ -90,7 +94,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
90 larger than 16MB and in this case we have a chance of 94 larger than 16MB and in this case we have a chance of
91 finding fitting memory in the next higher zone first. If 95 finding fitting memory in the next higher zone first. If
92 not retry with true GFP_DMA. -AK */ 96 not retry with true GFP_DMA. -AK */
93 if (dma_mask <= 0xffffffff) 97 if (dma_mask <= DMA_32BIT_MASK)
94 gfp |= GFP_DMA32; 98 gfp |= GFP_DMA32;
95 99
96 again: 100 again:
@@ -111,7 +115,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
111 115
112 /* Don't use the 16MB ZONE_DMA unless absolutely 116 /* Don't use the 16MB ZONE_DMA unless absolutely
113 needed. It's better to use remapping first. */ 117 needed. It's better to use remapping first. */
114 if (dma_mask < 0xffffffff && !(gfp & GFP_DMA)) { 118 if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
115 gfp = (gfp & ~GFP_DMA32) | GFP_DMA; 119 gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
116 goto again; 120 goto again;
117 } 121 }
@@ -174,7 +178,7 @@ int dma_supported(struct device *dev, u64 mask)
174 /* Copied from i386. Doesn't make much sense, because it will 178 /* Copied from i386. Doesn't make much sense, because it will
175 only work for pci_alloc_coherent. 179 only work for pci_alloc_coherent.
176 The caller just has to use GFP_DMA in this case. */ 180 The caller just has to use GFP_DMA in this case. */
177 if (mask < 0x00ffffff) 181 if (mask < DMA_24BIT_MASK)
178 return 0; 182 return 0;
179 183
180 /* Tell the device to use SAC when IOMMU force is on. This 184 /* Tell the device to use SAC when IOMMU force is on. This
@@ -189,7 +193,7 @@ int dma_supported(struct device *dev, u64 mask)
189 SAC for these. Assume all masks <= 40 bits are of this 193 SAC for these. Assume all masks <= 40 bits are of this
190 type. Normally this doesn't make any difference, but gives 194 type. Normally this doesn't make any difference, but gives
191 more gentle handling of IOMMU overflow. */ 195 more gentle handling of IOMMU overflow. */
192 if (iommu_sac_force && (mask >= 0xffffffffffULL)) { 196 if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
193 printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask); 197 printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask);
194 return 0; 198 return 0;
195 } 199 }
@@ -266,7 +270,7 @@ __init int iommu_setup(char *p)
266 swiotlb = 1; 270 swiotlb = 1;
267#endif 271#endif
268 272
269#ifdef CONFIG_GART_IOMMU 273#ifdef CONFIG_IOMMU
270 gart_parse_options(p); 274 gart_parse_options(p);
271#endif 275#endif
272 276
@@ -276,3 +280,40 @@ __init int iommu_setup(char *p)
276 } 280 }
277 return 1; 281 return 1;
278} 282}
283__setup("iommu=", iommu_setup);
284
285void __init pci_iommu_alloc(void)
286{
287 /*
288 * The order of these functions is important for
289 * fall-back/fail-over reasons
290 */
291#ifdef CONFIG_IOMMU
292 iommu_hole_init();
293#endif
294
295#ifdef CONFIG_CALGARY_IOMMU
296 detect_calgary();
297#endif
298
299#ifdef CONFIG_SWIOTLB
300 pci_swiotlb_init();
301#endif
302}
303
304static int __init pci_iommu_init(void)
305{
306#ifdef CONFIG_CALGARY_IOMMU
307 calgary_iommu_init();
308#endif
309
310#ifdef CONFIG_IOMMU
311 gart_iommu_init();
312#endif
313
314 no_iommu_init();
315 return 0;
316}
317
318/* Must execute after PCI subsystem */
319fs_initcall(pci_iommu_init);
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 82a7c9bfdfa0..4ca674d16b09 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -32,6 +32,7 @@
32#include <asm/kdebug.h> 32#include <asm/kdebug.h>
33#include <asm/swiotlb.h> 33#include <asm/swiotlb.h>
34#include <asm/dma.h> 34#include <asm/dma.h>
35#include <asm/k8.h>
35 36
36unsigned long iommu_bus_base; /* GART remapping area (physical) */ 37unsigned long iommu_bus_base; /* GART remapping area (physical) */
37static unsigned long iommu_size; /* size of remapping area bytes */ 38static unsigned long iommu_size; /* size of remapping area bytes */
@@ -46,8 +47,6 @@ u32 *iommu_gatt_base; /* Remapping table */
46 also seen with Qlogic at least). */ 47 also seen with Qlogic at least). */
47int iommu_fullflush = 1; 48int iommu_fullflush = 1;
48 49
49#define MAX_NB 8
50
51/* Allocation bitmap for the remapping area */ 50/* Allocation bitmap for the remapping area */
52static DEFINE_SPINLOCK(iommu_bitmap_lock); 51static DEFINE_SPINLOCK(iommu_bitmap_lock);
53static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ 52static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
@@ -63,13 +62,6 @@ static u32 gart_unmapped_entry;
63#define to_pages(addr,size) \ 62#define to_pages(addr,size) \
64 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) 63 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
65 64
66#define for_all_nb(dev) \
67 dev = NULL; \
68 while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)
69
70static struct pci_dev *northbridges[MAX_NB];
71static u32 northbridge_flush_word[MAX_NB];
72
73#define EMERGENCY_PAGES 32 /* = 128KB */ 65#define EMERGENCY_PAGES 32 /* = 128KB */
74 66
75#ifdef CONFIG_AGP 67#ifdef CONFIG_AGP
@@ -93,7 +85,7 @@ static unsigned long alloc_iommu(int size)
93 offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size); 85 offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
94 if (offset == -1) { 86 if (offset == -1) {
95 need_flush = 1; 87 need_flush = 1;
96 offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size); 88 offset = find_next_zero_string(iommu_gart_bitmap,0,iommu_pages,size);
97 } 89 }
98 if (offset != -1) { 90 if (offset != -1) {
99 set_bit_string(iommu_gart_bitmap, offset, size); 91 set_bit_string(iommu_gart_bitmap, offset, size);
@@ -120,44 +112,17 @@ static void free_iommu(unsigned long offset, int size)
120/* 112/*
121 * Use global flush state to avoid races with multiple flushers. 113 * Use global flush state to avoid races with multiple flushers.
122 */ 114 */
123static void flush_gart(struct device *dev) 115static void flush_gart(void)
124{ 116{
125 unsigned long flags; 117 unsigned long flags;
126 int flushed = 0;
127 int i, max;
128
129 spin_lock_irqsave(&iommu_bitmap_lock, flags); 118 spin_lock_irqsave(&iommu_bitmap_lock, flags);
130 if (need_flush) { 119 if (need_flush) {
131 max = 0; 120 k8_flush_garts();
132 for (i = 0; i < MAX_NB; i++) {
133 if (!northbridges[i])
134 continue;
135 pci_write_config_dword(northbridges[i], 0x9c,
136 northbridge_flush_word[i] | 1);
137 flushed++;
138 max = i;
139 }
140 for (i = 0; i <= max; i++) {
141 u32 w;
142 if (!northbridges[i])
143 continue;
144 /* Make sure the hardware actually executed the flush. */
145 for (;;) {
146 pci_read_config_dword(northbridges[i], 0x9c, &w);
147 if (!(w & 1))
148 break;
149 cpu_relax();
150 }
151 }
152 if (!flushed)
153 printk("nothing to flush?\n");
154 need_flush = 0; 121 need_flush = 0;
155 } 122 }
156 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 123 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
157} 124}
158 125
159
160
161#ifdef CONFIG_IOMMU_LEAK 126#ifdef CONFIG_IOMMU_LEAK
162 127
163#define SET_LEAK(x) if (iommu_leak_tab) \ 128#define SET_LEAK(x) if (iommu_leak_tab) \
@@ -266,7 +231,7 @@ static dma_addr_t gart_map_simple(struct device *dev, char *buf,
266 size_t size, int dir) 231 size_t size, int dir)
267{ 232{
268 dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); 233 dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir);
269 flush_gart(dev); 234 flush_gart();
270 return map; 235 return map;
271} 236}
272 237
@@ -289,6 +254,28 @@ dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir)
289} 254}
290 255
291/* 256/*
257 * Free a DMA mapping.
258 */
259void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
260 size_t size, int direction)
261{
262 unsigned long iommu_page;
263 int npages;
264 int i;
265
266 if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
267 dma_addr >= iommu_bus_base + iommu_size)
268 return;
269 iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
270 npages = to_pages(dma_addr, size);
271 for (i = 0; i < npages; i++) {
272 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
273 CLEAR_LEAK(iommu_page + i);
274 }
275 free_iommu(iommu_page, npages);
276}
277
278/*
292 * Wrapper for pci_unmap_single working with scatterlists. 279 * Wrapper for pci_unmap_single working with scatterlists.
293 */ 280 */
294void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) 281void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
@@ -299,7 +286,7 @@ void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int di
299 struct scatterlist *s = &sg[i]; 286 struct scatterlist *s = &sg[i];
300 if (!s->dma_length || !s->length) 287 if (!s->dma_length || !s->length)
301 break; 288 break;
302 dma_unmap_single(dev, s->dma_address, s->dma_length, dir); 289 gart_unmap_single(dev, s->dma_address, s->dma_length, dir);
303 } 290 }
304} 291}
305 292
@@ -329,7 +316,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
329 s->dma_address = addr; 316 s->dma_address = addr;
330 s->dma_length = s->length; 317 s->dma_length = s->length;
331 } 318 }
332 flush_gart(dev); 319 flush_gart();
333 return nents; 320 return nents;
334} 321}
335 322
@@ -436,13 +423,13 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
436 if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0) 423 if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0)
437 goto error; 424 goto error;
438 out++; 425 out++;
439 flush_gart(dev); 426 flush_gart();
440 if (out < nents) 427 if (out < nents)
441 sg[out].dma_length = 0; 428 sg[out].dma_length = 0;
442 return out; 429 return out;
443 430
444error: 431error:
445 flush_gart(NULL); 432 flush_gart();
446 gart_unmap_sg(dev, sg, nents, dir); 433 gart_unmap_sg(dev, sg, nents, dir);
447 /* When it was forced or merged try again in a dumb way */ 434 /* When it was forced or merged try again in a dumb way */
448 if (force_iommu || iommu_merge) { 435 if (force_iommu || iommu_merge) {
@@ -458,28 +445,6 @@ error:
458 return 0; 445 return 0;
459} 446}
460 447
461/*
462 * Free a DMA mapping.
463 */
464void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
465 size_t size, int direction)
466{
467 unsigned long iommu_page;
468 int npages;
469 int i;
470
471 if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
472 dma_addr >= iommu_bus_base + iommu_size)
473 return;
474 iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
475 npages = to_pages(dma_addr, size);
476 for (i = 0; i < npages; i++) {
477 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
478 CLEAR_LEAK(iommu_page + i);
479 }
480 free_iommu(iommu_page, npages);
481}
482
483static int no_agp; 448static int no_agp;
484 449
485static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) 450static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
@@ -532,10 +497,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
532 void *gatt; 497 void *gatt;
533 unsigned aper_base, new_aper_base; 498 unsigned aper_base, new_aper_base;
534 unsigned aper_size, gatt_size, new_aper_size; 499 unsigned aper_size, gatt_size, new_aper_size;
535 500 int i;
501
536 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); 502 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
537 aper_size = aper_base = info->aper_size = 0; 503 aper_size = aper_base = info->aper_size = 0;
538 for_all_nb(dev) { 504 dev = NULL;
505 for (i = 0; i < num_k8_northbridges; i++) {
506 dev = k8_northbridges[i];
539 new_aper_base = read_aperture(dev, &new_aper_size); 507 new_aper_base = read_aperture(dev, &new_aper_size);
540 if (!new_aper_base) 508 if (!new_aper_base)
541 goto nommu; 509 goto nommu;
@@ -558,11 +526,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
558 panic("Cannot allocate GATT table"); 526 panic("Cannot allocate GATT table");
559 memset(gatt, 0, gatt_size); 527 memset(gatt, 0, gatt_size);
560 agp_gatt_table = gatt; 528 agp_gatt_table = gatt;
561 529
562 for_all_nb(dev) { 530 for (i = 0; i < num_k8_northbridges; i++) {
563 u32 ctl; 531 u32 ctl;
564 u32 gatt_reg; 532 u32 gatt_reg;
565 533
534 dev = k8_northbridges[i];
566 gatt_reg = __pa(gatt) >> 12; 535 gatt_reg = __pa(gatt) >> 12;
567 gatt_reg <<= 4; 536 gatt_reg <<= 4;
568 pci_write_config_dword(dev, 0x98, gatt_reg); 537 pci_write_config_dword(dev, 0x98, gatt_reg);
@@ -573,7 +542,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
573 542
574 pci_write_config_dword(dev, 0x90, ctl); 543 pci_write_config_dword(dev, 0x90, ctl);
575 } 544 }
576 flush_gart(NULL); 545 flush_gart();
577 546
578 printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); 547 printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10);
579 return 0; 548 return 0;
@@ -602,15 +571,19 @@ static struct dma_mapping_ops gart_dma_ops = {
602 .unmap_sg = gart_unmap_sg, 571 .unmap_sg = gart_unmap_sg,
603}; 572};
604 573
605static int __init pci_iommu_init(void) 574void __init gart_iommu_init(void)
606{ 575{
607 struct agp_kern_info info; 576 struct agp_kern_info info;
608 unsigned long aper_size; 577 unsigned long aper_size;
609 unsigned long iommu_start; 578 unsigned long iommu_start;
610 struct pci_dev *dev;
611 unsigned long scratch; 579 unsigned long scratch;
612 long i; 580 long i;
613 581
582 if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) {
583 printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n");
584 return;
585 }
586
614#ifndef CONFIG_AGP_AMD64 587#ifndef CONFIG_AGP_AMD64
615 no_agp = 1; 588 no_agp = 1;
616#else 589#else
@@ -622,7 +595,11 @@ static int __init pci_iommu_init(void)
622#endif 595#endif
623 596
624 if (swiotlb) 597 if (swiotlb)
625 return -1; 598 return;
599
600 /* Did we detect a different HW IOMMU? */
601 if (iommu_detected && !iommu_aperture)
602 return;
626 603
627 if (no_iommu || 604 if (no_iommu ||
628 (!force_iommu && end_pfn <= MAX_DMA32_PFN) || 605 (!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
@@ -634,15 +611,7 @@ static int __init pci_iommu_init(void)
634 "but IOMMU not available.\n" 611 "but IOMMU not available.\n"
635 KERN_ERR "WARNING 32bit PCI may malfunction.\n"); 612 KERN_ERR "WARNING 32bit PCI may malfunction.\n");
636 } 613 }
637 return -1; 614 return;
638 }
639
640 i = 0;
641 for_all_nb(dev)
642 i++;
643 if (i > MAX_NB) {
644 printk(KERN_ERR "PCI-GART: Too many northbridges (%ld). Disabled\n", i);
645 return -1;
646 } 615 }
647 616
648 printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); 617 printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
@@ -707,26 +676,10 @@ static int __init pci_iommu_init(void)
707 for (i = EMERGENCY_PAGES; i < iommu_pages; i++) 676 for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
708 iommu_gatt_base[i] = gart_unmapped_entry; 677 iommu_gatt_base[i] = gart_unmapped_entry;
709 678
710 for_all_nb(dev) { 679 flush_gart();
711 u32 flag;
712 int cpu = PCI_SLOT(dev->devfn) - 24;
713 if (cpu >= MAX_NB)
714 continue;
715 northbridges[cpu] = dev;
716 pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */
717 northbridge_flush_word[cpu] = flag;
718 }
719
720 flush_gart(NULL);
721
722 dma_ops = &gart_dma_ops; 680 dma_ops = &gart_dma_ops;
723
724 return 0;
725} 681}
726 682
727/* Must execute after PCI subsystem */
728fs_initcall(pci_iommu_init);
729
730void gart_parse_options(char *p) 683void gart_parse_options(char *p)
731{ 684{
732 int arg; 685 int arg;
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c
index 1f6ecc62061d..c4c3cc36ac5b 100644
--- a/arch/x86_64/kernel/pci-nommu.c
+++ b/arch/x86_64/kernel/pci-nommu.c
@@ -4,6 +4,8 @@
4#include <linux/init.h> 4#include <linux/init.h>
5#include <linux/pci.h> 5#include <linux/pci.h>
6#include <linux/string.h> 6#include <linux/string.h>
7#include <linux/dma-mapping.h>
8
7#include <asm/proto.h> 9#include <asm/proto.h>
8#include <asm/processor.h> 10#include <asm/processor.h>
9#include <asm/dma.h> 11#include <asm/dma.h>
@@ -12,10 +14,11 @@ static int
12check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) 14check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
13{ 15{
14 if (hwdev && bus + size > *hwdev->dma_mask) { 16 if (hwdev && bus + size > *hwdev->dma_mask) {
15 if (*hwdev->dma_mask >= 0xffffffffULL) 17 if (*hwdev->dma_mask >= DMA_32BIT_MASK)
16 printk(KERN_ERR 18 printk(KERN_ERR
17 "nommu_%s: overflow %Lx+%lu of device mask %Lx\n", 19 "nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
18 name, (long long)bus, size, (long long)*hwdev->dma_mask); 20 name, (long long)bus, size,
21 (long long)*hwdev->dma_mask);
19 return 0; 22 return 0;
20 } 23 }
21 return 1; 24 return 1;
diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c
index 990ed67896f2..ebdb77fe2057 100644
--- a/arch/x86_64/kernel/pci-swiotlb.c
+++ b/arch/x86_64/kernel/pci-swiotlb.c
@@ -31,7 +31,7 @@ struct dma_mapping_ops swiotlb_dma_ops = {
31void pci_swiotlb_init(void) 31void pci_swiotlb_init(void)
32{ 32{
33 /* don't initialize swiotlb if iommu=off (no_iommu=1) */ 33 /* don't initialize swiotlb if iommu=off (no_iommu=1) */
34 if (!iommu_aperture && !no_iommu && 34 if (!iommu_detected && !no_iommu &&
35 (end_pfn > MAX_DMA32_PFN || force_iommu)) 35 (end_pfn > MAX_DMA32_PFN || force_iommu))
36 swiotlb = 1; 36 swiotlb = 1;
37 if (swiotlb) { 37 if (swiotlb) {
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index fb903e65e079..ca56e19b8b6e 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -10,7 +10,6 @@
10 * Andi Kleen. 10 * Andi Kleen.
11 * 11 *
12 * CPU hotplug support - ashok.raj@intel.com 12 * CPU hotplug support - ashok.raj@intel.com
13 * $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $
14 */ 13 */
15 14
16/* 15/*
@@ -64,6 +63,7 @@ EXPORT_SYMBOL(boot_option_idle_override);
64 * Powermanagement idle function, if any.. 63 * Powermanagement idle function, if any..
65 */ 64 */
66void (*pm_idle)(void); 65void (*pm_idle)(void);
66EXPORT_SYMBOL(pm_idle);
67static DEFINE_PER_CPU(unsigned int, cpu_idle_state); 67static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
68 68
69static ATOMIC_NOTIFIER_HEAD(idle_notifier); 69static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -111,7 +111,7 @@ static void default_idle(void)
111{ 111{
112 local_irq_enable(); 112 local_irq_enable();
113 113
114 clear_thread_flag(TIF_POLLING_NRFLAG); 114 current_thread_info()->status &= ~TS_POLLING;
115 smp_mb__after_clear_bit(); 115 smp_mb__after_clear_bit();
116 while (!need_resched()) { 116 while (!need_resched()) {
117 local_irq_disable(); 117 local_irq_disable();
@@ -120,7 +120,7 @@ static void default_idle(void)
120 else 120 else
121 local_irq_enable(); 121 local_irq_enable();
122 } 122 }
123 set_thread_flag(TIF_POLLING_NRFLAG); 123 current_thread_info()->status |= TS_POLLING;
124} 124}
125 125
126/* 126/*
@@ -203,8 +203,7 @@ static inline void play_dead(void)
203 */ 203 */
204void cpu_idle (void) 204void cpu_idle (void)
205{ 205{
206 set_thread_flag(TIF_POLLING_NRFLAG); 206 current_thread_info()->status |= TS_POLLING;
207
208 /* endless idle loop with no priority at all */ 207 /* endless idle loop with no priority at all */
209 while (1) { 208 while (1) {
210 while (!need_resched()) { 209 while (!need_resched()) {
@@ -335,7 +334,7 @@ void show_regs(struct pt_regs *regs)
335{ 334{
336 printk("CPU %d:", smp_processor_id()); 335 printk("CPU %d:", smp_processor_id());
337 __show_regs(regs); 336 __show_regs(regs);
338 show_trace(&regs->rsp); 337 show_trace(NULL, regs, (void *)(regs + 1));
339} 338}
340 339
341/* 340/*
@@ -365,8 +364,11 @@ void flush_thread(void)
365 struct task_struct *tsk = current; 364 struct task_struct *tsk = current;
366 struct thread_info *t = current_thread_info(); 365 struct thread_info *t = current_thread_info();
367 366
368 if (t->flags & _TIF_ABI_PENDING) 367 if (t->flags & _TIF_ABI_PENDING) {
369 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32); 368 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
369 if (t->flags & _TIF_IA32)
370 current_thread_info()->status |= TS_COMPAT;
371 }
370 372
371 tsk->thread.debugreg0 = 0; 373 tsk->thread.debugreg0 = 0;
372 tsk->thread.debugreg1 = 0; 374 tsk->thread.debugreg1 = 0;
diff --git a/arch/x86_64/kernel/reboot.c b/arch/x86_64/kernel/reboot.c
index 57117b8beb2b..2d6769847456 100644
--- a/arch/x86_64/kernel/reboot.c
+++ b/arch/x86_64/kernel/reboot.c
@@ -20,6 +20,7 @@
20 * Power off function, if any 20 * Power off function, if any
21 */ 21 */
22void (*pm_power_off)(void); 22void (*pm_power_off)(void);
23EXPORT_SYMBOL(pm_power_off);
23 24
24static long no_idt[3]; 25static long no_idt[3];
25static enum { 26static enum {
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 143c65031539..1129918ede82 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * Nov 2001 Dave Jones <davej@suse.de> 6 * Nov 2001 Dave Jones <davej@suse.de>
7 * Forked from i386 setup code. 7 * Forked from i386 setup code.
8 *
9 * $Id$
10 */ 8 */
11 9
12/* 10/*
@@ -65,9 +63,7 @@
65#include <asm/setup.h> 63#include <asm/setup.h>
66#include <asm/mach_apic.h> 64#include <asm/mach_apic.h>
67#include <asm/numa.h> 65#include <asm/numa.h>
68#include <asm/swiotlb.h>
69#include <asm/sections.h> 66#include <asm/sections.h>
70#include <asm/gart-mapping.h>
71#include <asm/dmi.h> 67#include <asm/dmi.h>
72 68
73/* 69/*
@@ -75,6 +71,7 @@
75 */ 71 */
76 72
77struct cpuinfo_x86 boot_cpu_data __read_mostly; 73struct cpuinfo_x86 boot_cpu_data __read_mostly;
74EXPORT_SYMBOL(boot_cpu_data);
78 75
79unsigned long mmu_cr4_features; 76unsigned long mmu_cr4_features;
80 77
@@ -103,6 +100,7 @@ char dmi_alloc_data[DMI_MAX_DATA];
103 * Setup options 100 * Setup options
104 */ 101 */
105struct screen_info screen_info; 102struct screen_info screen_info;
103EXPORT_SYMBOL(screen_info);
106struct sys_desc_table_struct { 104struct sys_desc_table_struct {
107 unsigned short length; 105 unsigned short length;
108 unsigned char table[0]; 106 unsigned char table[0];
@@ -474,80 +472,6 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
474} 472}
475#endif 473#endif
476 474
477/* Use inline assembly to define this because the nops are defined
478 as inline assembly strings in the include files and we cannot
479 get them easily into strings. */
480asm("\t.data\nk8nops: "
481 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
482 K8_NOP7 K8_NOP8);
483
484extern unsigned char k8nops[];
485static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
486 NULL,
487 k8nops,
488 k8nops + 1,
489 k8nops + 1 + 2,
490 k8nops + 1 + 2 + 3,
491 k8nops + 1 + 2 + 3 + 4,
492 k8nops + 1 + 2 + 3 + 4 + 5,
493 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
494 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
495};
496
497extern char __vsyscall_0;
498
499/* Replace instructions with better alternatives for this CPU type.
500
501 This runs before SMP is initialized to avoid SMP problems with
502 self modifying code. This implies that assymetric systems where
503 APs have less capabilities than the boot processor are not handled.
504 In this case boot with "noreplacement". */
505void apply_alternatives(void *start, void *end)
506{
507 struct alt_instr *a;
508 int diff, i, k;
509 for (a = start; (void *)a < end; a++) {
510 u8 *instr;
511
512 if (!boot_cpu_has(a->cpuid))
513 continue;
514
515 BUG_ON(a->replacementlen > a->instrlen);
516 instr = a->instr;
517 /* vsyscall code is not mapped yet. resolve it manually. */
518 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END)
519 instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
520 __inline_memcpy(instr, a->replacement, a->replacementlen);
521 diff = a->instrlen - a->replacementlen;
522
523 /* Pad the rest with nops */
524 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
525 k = diff;
526 if (k > ASM_NOP_MAX)
527 k = ASM_NOP_MAX;
528 __inline_memcpy(instr + i, k8_nops[k], k);
529 }
530 }
531}
532
533static int no_replacement __initdata = 0;
534
535void __init alternative_instructions(void)
536{
537 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
538 if (no_replacement)
539 return;
540 apply_alternatives(__alt_instructions, __alt_instructions_end);
541}
542
543static int __init noreplacement_setup(char *s)
544{
545 no_replacement = 1;
546 return 1;
547}
548
549__setup("noreplacement", noreplacement_setup);
550
551#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) 475#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
552struct edd edd; 476struct edd edd;
553#ifdef CONFIG_EDD_MODULE 477#ifdef CONFIG_EDD_MODULE
@@ -780,10 +704,6 @@ void __init setup_arch(char **cmdline_p)
780 704
781 e820_setup_gap(); 705 e820_setup_gap();
782 706
783#ifdef CONFIG_GART_IOMMU
784 iommu_hole_init();
785#endif
786
787#ifdef CONFIG_VT 707#ifdef CONFIG_VT
788#if defined(CONFIG_VGA_CONSOLE) 708#if defined(CONFIG_VGA_CONSOLE)
789 conswitchp = &vga_con; 709 conswitchp = &vga_con;
@@ -868,24 +788,32 @@ static int nearby_node(int apicid)
868static void __init amd_detect_cmp(struct cpuinfo_x86 *c) 788static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
869{ 789{
870#ifdef CONFIG_SMP 790#ifdef CONFIG_SMP
871 int cpu = smp_processor_id();
872 unsigned bits; 791 unsigned bits;
873#ifdef CONFIG_NUMA 792#ifdef CONFIG_NUMA
793 int cpu = smp_processor_id();
874 int node = 0; 794 int node = 0;
875 unsigned apicid = hard_smp_processor_id(); 795 unsigned apicid = hard_smp_processor_id();
876#endif 796#endif
797 unsigned ecx = cpuid_ecx(0x80000008);
798
799 c->x86_max_cores = (ecx & 0xff) + 1;
877 800
878 bits = 0; 801 /* CPU telling us the core id bits shift? */
879 while ((1 << bits) < c->x86_max_cores) 802 bits = (ecx >> 12) & 0xF;
880 bits++; 803
804 /* Otherwise recompute */
805 if (bits == 0) {
806 while ((1 << bits) < c->x86_max_cores)
807 bits++;
808 }
881 809
882 /* Low order bits define the core id (index of core in socket) */ 810 /* Low order bits define the core id (index of core in socket) */
883 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1); 811 c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
884 /* Convert the APIC ID into the socket ID */ 812 /* Convert the APIC ID into the socket ID */
885 phys_proc_id[cpu] = phys_pkg_id(bits); 813 c->phys_proc_id = phys_pkg_id(bits);
886 814
887#ifdef CONFIG_NUMA 815#ifdef CONFIG_NUMA
888 node = phys_proc_id[cpu]; 816 node = c->phys_proc_id;
889 if (apicid_to_node[apicid] != NUMA_NO_NODE) 817 if (apicid_to_node[apicid] != NUMA_NO_NODE)
890 node = apicid_to_node[apicid]; 818 node = apicid_to_node[apicid];
891 if (!node_online(node)) { 819 if (!node_online(node)) {
@@ -898,7 +826,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
898 but in the same order as the HT nodeids. 826 but in the same order as the HT nodeids.
899 If that doesn't result in a usable node fall back to the 827 If that doesn't result in a usable node fall back to the
900 path for the previous case. */ 828 path for the previous case. */
901 int ht_nodeid = apicid - (phys_proc_id[0] << bits); 829 int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits);
902 if (ht_nodeid >= 0 && 830 if (ht_nodeid >= 0 &&
903 apicid_to_node[ht_nodeid] != NUMA_NO_NODE) 831 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
904 node = apicid_to_node[ht_nodeid]; 832 node = apicid_to_node[ht_nodeid];
@@ -908,15 +836,13 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
908 } 836 }
909 numa_set_node(cpu, node); 837 numa_set_node(cpu, node);
910 838
911 printk(KERN_INFO "CPU %d/%x(%d) -> Node %d -> Core %d\n", 839 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
912 cpu, apicid, c->x86_max_cores, node, cpu_core_id[cpu]);
913#endif 840#endif
914#endif 841#endif
915} 842}
916 843
917static int __init init_amd(struct cpuinfo_x86 *c) 844static void __init init_amd(struct cpuinfo_x86 *c)
918{ 845{
919 int r;
920 unsigned level; 846 unsigned level;
921 847
922#ifdef CONFIG_SMP 848#ifdef CONFIG_SMP
@@ -949,8 +875,8 @@ static int __init init_amd(struct cpuinfo_x86 *c)
949 if (c->x86 >= 6) 875 if (c->x86 >= 6)
950 set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability); 876 set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
951 877
952 r = get_model_name(c); 878 level = get_model_name(c);
953 if (!r) { 879 if (!level) {
954 switch (c->x86) { 880 switch (c->x86) {
955 case 15: 881 case 15:
956 /* Should distinguish Models here, but this is only 882 /* Should distinguish Models here, but this is only
@@ -965,13 +891,12 @@ static int __init init_amd(struct cpuinfo_x86 *c)
965 if (c->x86_power & (1<<8)) 891 if (c->x86_power & (1<<8))
966 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); 892 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
967 893
968 if (c->extended_cpuid_level >= 0x80000008) { 894 /* Multi core CPU? */
969 c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; 895 if (c->extended_cpuid_level >= 0x80000008)
970
971 amd_detect_cmp(c); 896 amd_detect_cmp(c);
972 }
973 897
974 return r; 898 /* Fix cpuid4 emulation for more */
899 num_cache_leaves = 3;
975} 900}
976 901
977static void __cpuinit detect_ht(struct cpuinfo_x86 *c) 902static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -979,13 +904,14 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
979#ifdef CONFIG_SMP 904#ifdef CONFIG_SMP
980 u32 eax, ebx, ecx, edx; 905 u32 eax, ebx, ecx, edx;
981 int index_msb, core_bits; 906 int index_msb, core_bits;
982 int cpu = smp_processor_id();
983 907
984 cpuid(1, &eax, &ebx, &ecx, &edx); 908 cpuid(1, &eax, &ebx, &ecx, &edx);
985 909
986 910
987 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) 911 if (!cpu_has(c, X86_FEATURE_HT))
988 return; 912 return;
913 if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
914 goto out;
989 915
990 smp_num_siblings = (ebx & 0xff0000) >> 16; 916 smp_num_siblings = (ebx & 0xff0000) >> 16;
991 917
@@ -1000,10 +926,7 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
1000 } 926 }
1001 927
1002 index_msb = get_count_order(smp_num_siblings); 928 index_msb = get_count_order(smp_num_siblings);
1003 phys_proc_id[cpu] = phys_pkg_id(index_msb); 929 c->phys_proc_id = phys_pkg_id(index_msb);
1004
1005 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
1006 phys_proc_id[cpu]);
1007 930
1008 smp_num_siblings = smp_num_siblings / c->x86_max_cores; 931 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
1009 932
@@ -1011,13 +934,15 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
1011 934
1012 core_bits = get_count_order(c->x86_max_cores); 935 core_bits = get_count_order(c->x86_max_cores);
1013 936
1014 cpu_core_id[cpu] = phys_pkg_id(index_msb) & 937 c->cpu_core_id = phys_pkg_id(index_msb) &
1015 ((1 << core_bits) - 1); 938 ((1 << core_bits) - 1);
1016
1017 if (c->x86_max_cores > 1)
1018 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
1019 cpu_core_id[cpu]);
1020 } 939 }
940out:
941 if ((c->x86_max_cores * smp_num_siblings) > 1) {
942 printk(KERN_INFO "CPU: Physical Processor ID: %d\n", c->phys_proc_id);
943 printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id);
944 }
945
1021#endif 946#endif
1022} 947}
1023 948
@@ -1026,15 +951,12 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
1026 */ 951 */
1027static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) 952static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
1028{ 953{
1029 unsigned int eax; 954 unsigned int eax, t;
1030 955
1031 if (c->cpuid_level < 4) 956 if (c->cpuid_level < 4)
1032 return 1; 957 return 1;
1033 958
1034 __asm__("cpuid" 959 cpuid_count(4, 0, &eax, &t, &t, &t);
1035 : "=a" (eax)
1036 : "0" (4), "c" (0)
1037 : "bx", "dx");
1038 960
1039 if (eax & 0x1f) 961 if (eax & 0x1f)
1040 return ((eax >> 26) + 1); 962 return ((eax >> 26) + 1);
@@ -1047,16 +969,17 @@ static void srat_detect_node(void)
1047#ifdef CONFIG_NUMA 969#ifdef CONFIG_NUMA
1048 unsigned node; 970 unsigned node;
1049 int cpu = smp_processor_id(); 971 int cpu = smp_processor_id();
972 int apicid = hard_smp_processor_id();
1050 973
1051 /* Don't do the funky fallback heuristics the AMD version employs 974 /* Don't do the funky fallback heuristics the AMD version employs
1052 for now. */ 975 for now. */
1053 node = apicid_to_node[hard_smp_processor_id()]; 976 node = apicid_to_node[apicid];
1054 if (node == NUMA_NO_NODE) 977 if (node == NUMA_NO_NODE)
1055 node = first_node(node_online_map); 978 node = first_node(node_online_map);
1056 numa_set_node(cpu, node); 979 numa_set_node(cpu, node);
1057 980
1058 if (acpi_numa > 0) 981 if (acpi_numa > 0)
1059 printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node); 982 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
1060#endif 983#endif
1061} 984}
1062 985
@@ -1066,6 +989,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
1066 unsigned n; 989 unsigned n;
1067 990
1068 init_intel_cacheinfo(c); 991 init_intel_cacheinfo(c);
992 if (c->cpuid_level > 9 ) {
993 unsigned eax = cpuid_eax(10);
994 /* Check for version and the number of counters */
995 if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
996 set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
997 }
998
1069 n = c->extended_cpuid_level; 999 n = c->extended_cpuid_level;
1070 if (n >= 0x80000008) { 1000 if (n >= 0x80000008) {
1071 unsigned eax = cpuid_eax(0x80000008); 1001 unsigned eax = cpuid_eax(0x80000008);
@@ -1157,7 +1087,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
1157 } 1087 }
1158 1088
1159#ifdef CONFIG_SMP 1089#ifdef CONFIG_SMP
1160 phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff; 1090 c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
1161#endif 1091#endif
1162} 1092}
1163 1093
@@ -1284,7 +1214,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
1284 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1214 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1285 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, 1215 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
1286 NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL, 1216 NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
1287 NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow", 1217 NULL, "fxsr_opt", NULL, "rdtscp", NULL, "lm", "3dnowext", "3dnow",
1288 1218
1289 /* Transmeta-defined */ 1219 /* Transmeta-defined */
1290 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, 1220 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
@@ -1295,7 +1225,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
1295 /* Other (Linux-defined) */ 1225 /* Other (Linux-defined) */
1296 "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL, 1226 "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
1297 "constant_tsc", NULL, NULL, 1227 "constant_tsc", NULL, NULL,
1298 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1228 "up", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1299 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1229 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1300 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1230 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1301 1231
@@ -1365,9 +1295,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
1365#ifdef CONFIG_SMP 1295#ifdef CONFIG_SMP
1366 if (smp_num_siblings * c->x86_max_cores > 1) { 1296 if (smp_num_siblings * c->x86_max_cores > 1) {
1367 int cpu = c - cpu_data; 1297 int cpu = c - cpu_data;
1368 seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]); 1298 seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
1369 seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu])); 1299 seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
1370 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]); 1300 seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
1371 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); 1301 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
1372 } 1302 }
1373#endif 1303#endif
@@ -1441,7 +1371,7 @@ struct seq_operations cpuinfo_op = {
1441 .show = show_cpuinfo, 1371 .show = show_cpuinfo,
1442}; 1372};
1443 1373
1444#ifdef CONFIG_INPUT_PCSPKR 1374#if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE)
1445#include <linux/platform_device.h> 1375#include <linux/platform_device.h>
1446static __init int add_pcspkr(void) 1376static __init int add_pcspkr(void)
1447{ 1377{
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index 8a691fa6d393..f5934cb4a2b6 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -3,7 +3,6 @@
3 * Copyright (C) 1995 Linus Torvalds 3 * Copyright (C) 1995 Linus Torvalds
4 * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen. 4 * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen.
5 * See setup.c for older changelog. 5 * See setup.c for older changelog.
6 * $Id: setup64.c,v 1.12 2002/03/21 10:09:17 ak Exp $
7 */ 6 */
8#include <linux/config.h> 7#include <linux/config.h>
9#include <linux/init.h> 8#include <linux/init.h>
@@ -31,6 +30,7 @@ char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
31cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; 30cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
32 31
33struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; 32struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
33EXPORT_SYMBOL(_cpu_pda);
34struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; 34struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
35 35
36struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; 36struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
@@ -38,6 +38,7 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
38char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); 38char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
39 39
40unsigned long __supported_pte_mask __read_mostly = ~0UL; 40unsigned long __supported_pte_mask __read_mostly = ~0UL;
41EXPORT_SYMBOL(__supported_pte_mask);
41static int do_not_nx __cpuinitdata = 0; 42static int do_not_nx __cpuinitdata = 0;
42 43
43/* noexec=on|off 44/* noexec=on|off
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
index e5f5ce7909a3..28161170fb0a 100644
--- a/arch/x86_64/kernel/signal.c
+++ b/arch/x86_64/kernel/signal.c
@@ -7,8 +7,6 @@
7 * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson 7 * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
8 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes 8 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
9 * 2000-2002 x86-64 support by Andi Kleen 9 * 2000-2002 x86-64 support by Andi Kleen
10 *
11 * $Id: signal.c,v 1.18 2001/10/17 22:30:37 ak Exp $
12 */ 10 */
13 11
14#include <linux/sched.h> 12#include <linux/sched.h>
@@ -239,7 +237,6 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
239 rsp = regs->rsp - 128; 237 rsp = regs->rsp - 128;
240 238
241 /* This is the X/Open sanctioned signal stack switching. */ 239 /* This is the X/Open sanctioned signal stack switching. */
242 /* RED-PEN: redzone on that stack? */
243 if (ka->sa.sa_flags & SA_ONSTACK) { 240 if (ka->sa.sa_flags & SA_ONSTACK) {
244 if (sas_ss_flags(rsp) == 0) 241 if (sas_ss_flags(rsp) == 0)
245 rsp = current->sas_ss_sp + current->sas_ss_size; 242 rsp = current->sas_ss_sp + current->sas_ss_size;
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index 4a6628b14d99..8188bae9c6d5 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -224,6 +224,7 @@ void flush_tlb_current_task(void)
224 flush_tlb_others(cpu_mask, mm, FLUSH_ALL); 224 flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
225 preempt_enable(); 225 preempt_enable();
226} 226}
227EXPORT_SYMBOL(flush_tlb_current_task);
227 228
228void flush_tlb_mm (struct mm_struct * mm) 229void flush_tlb_mm (struct mm_struct * mm)
229{ 230{
@@ -244,6 +245,7 @@ void flush_tlb_mm (struct mm_struct * mm)
244 245
245 preempt_enable(); 246 preempt_enable();
246} 247}
248EXPORT_SYMBOL(flush_tlb_mm);
247 249
248void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) 250void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
249{ 251{
@@ -266,6 +268,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
266 268
267 preempt_enable(); 269 preempt_enable();
268} 270}
271EXPORT_SYMBOL(flush_tlb_page);
269 272
270static void do_flush_tlb_all(void* info) 273static void do_flush_tlb_all(void* info)
271{ 274{
@@ -443,6 +446,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
443 spin_unlock(&call_lock); 446 spin_unlock(&call_lock);
444 return 0; 447 return 0;
445} 448}
449EXPORT_SYMBOL(smp_call_function);
446 450
447void smp_stop_cpu(void) 451void smp_stop_cpu(void)
448{ 452{
@@ -460,7 +464,7 @@ static void smp_really_stop_cpu(void *dummy)
460{ 464{
461 smp_stop_cpu(); 465 smp_stop_cpu();
462 for (;;) 466 for (;;)
463 asm("hlt"); 467 halt();
464} 468}
465 469
466void smp_send_stop(void) 470void smp_send_stop(void)
@@ -520,13 +524,13 @@ asmlinkage void smp_call_function_interrupt(void)
520 524
521int safe_smp_processor_id(void) 525int safe_smp_processor_id(void)
522{ 526{
523 int apicid, i; 527 unsigned apicid, i;
524 528
525 if (disable_apic) 529 if (disable_apic)
526 return 0; 530 return 0;
527 531
528 apicid = hard_smp_processor_id(); 532 apicid = hard_smp_processor_id();
529 if (x86_cpu_to_apicid[apicid] == apicid) 533 if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid)
530 return apicid; 534 return apicid;
531 535
532 for (i = 0; i < NR_CPUS; ++i) { 536 for (i = 0; i < NR_CPUS; ++i) {
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 71a7222cf9ce..4e9755179ecf 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -63,13 +63,11 @@
63 63
64/* Number of siblings per CPU package */ 64/* Number of siblings per CPU package */
65int smp_num_siblings = 1; 65int smp_num_siblings = 1;
66/* Package ID of each logical CPU */ 66EXPORT_SYMBOL(smp_num_siblings);
67u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
68/* core ID of each logical CPU */
69u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
70 67
71/* Last level cache ID of each logical CPU */ 68/* Last level cache ID of each logical CPU */
72u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; 69u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
70EXPORT_SYMBOL(cpu_llc_id);
73 71
74/* Bitmask of currently online CPUs */ 72/* Bitmask of currently online CPUs */
75cpumask_t cpu_online_map __read_mostly; 73cpumask_t cpu_online_map __read_mostly;
@@ -82,18 +80,21 @@ EXPORT_SYMBOL(cpu_online_map);
82 */ 80 */
83cpumask_t cpu_callin_map; 81cpumask_t cpu_callin_map;
84cpumask_t cpu_callout_map; 82cpumask_t cpu_callout_map;
83EXPORT_SYMBOL(cpu_callout_map);
85 84
86cpumask_t cpu_possible_map; 85cpumask_t cpu_possible_map;
87EXPORT_SYMBOL(cpu_possible_map); 86EXPORT_SYMBOL(cpu_possible_map);
88 87
89/* Per CPU bogomips and other parameters */ 88/* Per CPU bogomips and other parameters */
90struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; 89struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
90EXPORT_SYMBOL(cpu_data);
91 91
92/* Set when the idlers are all forked */ 92/* Set when the idlers are all forked */
93int smp_threads_ready; 93int smp_threads_ready;
94 94
95/* representing HT siblings of each logical CPU */ 95/* representing HT siblings of each logical CPU */
96cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; 96cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
97EXPORT_SYMBOL(cpu_sibling_map);
97 98
98/* representing HT and core siblings of each logical CPU */ 99/* representing HT and core siblings of each logical CPU */
99cpumask_t cpu_core_map[NR_CPUS] __read_mostly; 100cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
@@ -472,8 +473,8 @@ static inline void set_cpu_sibling_map(int cpu)
472 473
473 if (smp_num_siblings > 1) { 474 if (smp_num_siblings > 1) {
474 for_each_cpu_mask(i, cpu_sibling_setup_map) { 475 for_each_cpu_mask(i, cpu_sibling_setup_map) {
475 if (phys_proc_id[cpu] == phys_proc_id[i] && 476 if (c[cpu].phys_proc_id == c[i].phys_proc_id &&
476 cpu_core_id[cpu] == cpu_core_id[i]) { 477 c[cpu].cpu_core_id == c[i].cpu_core_id) {
477 cpu_set(i, cpu_sibling_map[cpu]); 478 cpu_set(i, cpu_sibling_map[cpu]);
478 cpu_set(cpu, cpu_sibling_map[i]); 479 cpu_set(cpu, cpu_sibling_map[i]);
479 cpu_set(i, cpu_core_map[cpu]); 480 cpu_set(i, cpu_core_map[cpu]);
@@ -500,7 +501,7 @@ static inline void set_cpu_sibling_map(int cpu)
500 cpu_set(i, c[cpu].llc_shared_map); 501 cpu_set(i, c[cpu].llc_shared_map);
501 cpu_set(cpu, c[i].llc_shared_map); 502 cpu_set(cpu, c[i].llc_shared_map);
502 } 503 }
503 if (phys_proc_id[cpu] == phys_proc_id[i]) { 504 if (c[cpu].phys_proc_id == c[i].phys_proc_id) {
504 cpu_set(i, cpu_core_map[cpu]); 505 cpu_set(i, cpu_core_map[cpu]);
505 cpu_set(cpu, cpu_core_map[i]); 506 cpu_set(cpu, cpu_core_map[i]);
506 /* 507 /*
@@ -797,6 +798,8 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
797 } 798 }
798 799
799 800
801 alternatives_smp_switch(1);
802
800 c_idle.idle = get_idle_for_cpu(cpu); 803 c_idle.idle = get_idle_for_cpu(cpu);
801 804
802 if (c_idle.idle) { 805 if (c_idle.idle) {
@@ -1199,8 +1202,8 @@ static void remove_siblinginfo(int cpu)
1199 cpu_clear(cpu, cpu_sibling_map[sibling]); 1202 cpu_clear(cpu, cpu_sibling_map[sibling]);
1200 cpus_clear(cpu_sibling_map[cpu]); 1203 cpus_clear(cpu_sibling_map[cpu]);
1201 cpus_clear(cpu_core_map[cpu]); 1204 cpus_clear(cpu_core_map[cpu]);
1202 phys_proc_id[cpu] = BAD_APICID; 1205 c[cpu].phys_proc_id = 0;
1203 cpu_core_id[cpu] = BAD_APICID; 1206 c[cpu].cpu_core_id = 0;
1204 cpu_clear(cpu, cpu_sibling_setup_map); 1207 cpu_clear(cpu, cpu_sibling_setup_map);
1205} 1208}
1206 1209
@@ -1259,6 +1262,8 @@ void __cpu_die(unsigned int cpu)
1259 /* They ack this in play_dead by setting CPU_DEAD */ 1262 /* They ack this in play_dead by setting CPU_DEAD */
1260 if (per_cpu(cpu_state, cpu) == CPU_DEAD) { 1263 if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
1261 printk ("CPU %d is now offline\n", cpu); 1264 printk ("CPU %d is now offline\n", cpu);
1265 if (1 == num_online_cpus())
1266 alternatives_smp_switch(0);
1262 return; 1267 return;
1263 } 1268 }
1264 msleep(100); 1269 msleep(100);
diff --git a/arch/x86_64/kernel/tce.c b/arch/x86_64/kernel/tce.c
new file mode 100644
index 000000000000..8d4c67f61b8e
--- /dev/null
+++ b/arch/x86_64/kernel/tce.c
@@ -0,0 +1,202 @@
1/*
2 * Derived from arch/powerpc/platforms/pseries/iommu.c
3 *
4 * Copyright (C) 2006 Jon Mason <jdmason@us.ibm.com>, IBM Corporation
5 * Copyright (C) 2006 Muli Ben-Yehuda <muli@il.ibm.com>, IBM Corporation
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include <linux/config.h>
23#include <linux/types.h>
24#include <linux/slab.h>
25#include <linux/mm.h>
26#include <linux/spinlock.h>
27#include <linux/string.h>
28#include <linux/pci.h>
29#include <linux/dma-mapping.h>
30#include <linux/bootmem.h>
31#include <asm/tce.h>
32#include <asm/calgary.h>
33#include <asm/proto.h>
34
35/* flush a tce at 'tceaddr' to main memory */
36static inline void flush_tce(void* tceaddr)
37{
38 /* a single tce can't cross a cache line */
39 if (cpu_has_clflush)
40 asm volatile("clflush (%0)" :: "r" (tceaddr));
41 else
42 asm volatile("wbinvd":::"memory");
43}
44
45void tce_build(struct iommu_table *tbl, unsigned long index,
46 unsigned int npages, unsigned long uaddr, int direction)
47{
48 u64* tp;
49 u64 t;
50 u64 rpn;
51
52 t = (1 << TCE_READ_SHIFT);
53 if (direction != DMA_TO_DEVICE)
54 t |= (1 << TCE_WRITE_SHIFT);
55
56 tp = ((u64*)tbl->it_base) + index;
57
58 while (npages--) {
59 rpn = (virt_to_bus((void*)uaddr)) >> PAGE_SHIFT;
60 t &= ~TCE_RPN_MASK;
61 t |= (rpn << TCE_RPN_SHIFT);
62
63 *tp = cpu_to_be64(t);
64 flush_tce(tp);
65
66 uaddr += PAGE_SIZE;
67 tp++;
68 }
69}
70
71void tce_free(struct iommu_table *tbl, long index, unsigned int npages)
72{
73 u64* tp;
74
75 tp = ((u64*)tbl->it_base) + index;
76
77 while (npages--) {
78 *tp = cpu_to_be64(0);
79 flush_tce(tp);
80 tp++;
81 }
82}
83
84static inline unsigned int table_size_to_number_of_entries(unsigned char size)
85{
86 /*
87 * size is the order of the table, 0-7
88 * smallest table is 8K entries, so shift result by 13 to
89 * multiply by 8K
90 */
91 return (1 << size) << 13;
92}
93
94static int tce_table_setparms(struct pci_dev *dev, struct iommu_table *tbl)
95{
96 unsigned int bitmapsz;
97 unsigned int tce_table_index;
98 unsigned long bmppages;
99 int ret;
100
101 tbl->it_busno = dev->bus->number;
102
103 /* set the tce table size - measured in entries */
104 tbl->it_size = table_size_to_number_of_entries(specified_table_size);
105
106 tce_table_index = bus_to_phb(tbl->it_busno);
107 tbl->it_base = (unsigned long)tce_table_kva[tce_table_index];
108 if (!tbl->it_base) {
109 printk(KERN_ERR "Calgary: iommu_table_setparms: "
110 "no table allocated?!\n");
111 ret = -ENOMEM;
112 goto done;
113 }
114
115 /*
116 * number of bytes needed for the bitmap size in number of
117 * entries; we need one bit per entry
118 */
119 bitmapsz = tbl->it_size / BITS_PER_BYTE;
120 bmppages = __get_free_pages(GFP_KERNEL, get_order(bitmapsz));
121 if (!bmppages) {
122 printk(KERN_ERR "Calgary: cannot allocate bitmap\n");
123 ret = -ENOMEM;
124 goto done;
125 }
126
127 tbl->it_map = (unsigned long*)bmppages;
128
129 memset(tbl->it_map, 0, bitmapsz);
130
131 tbl->it_hint = 0;
132
133 spin_lock_init(&tbl->it_lock);
134
135 return 0;
136
137done:
138 return ret;
139}
140
141int build_tce_table(struct pci_dev *dev, void __iomem *bbar)
142{
143 struct iommu_table *tbl;
144 int ret;
145
146 if (dev->sysdata) {
147 printk(KERN_ERR "Calgary: dev %p has sysdata %p\n",
148 dev, dev->sysdata);
149 BUG();
150 }
151
152 tbl = kzalloc(sizeof(struct iommu_table), GFP_KERNEL);
153 if (!tbl) {
154 printk(KERN_ERR "Calgary: error allocating iommu_table\n");
155 ret = -ENOMEM;
156 goto done;
157 }
158
159 ret = tce_table_setparms(dev, tbl);
160 if (ret)
161 goto free_tbl;
162
163 tce_free(tbl, 0, tbl->it_size);
164
165 tbl->bbar = bbar;
166
167 /*
168 * NUMA is already using the bus's sysdata pointer, so we use
169 * the bus's pci_dev's sysdata instead.
170 */
171 dev->sysdata = tbl;
172
173 return 0;
174
175free_tbl:
176 kfree(tbl);
177done:
178 return ret;
179}
180
181void* alloc_tce_table(void)
182{
183 unsigned int size;
184
185 size = table_size_to_number_of_entries(specified_table_size);
186 size *= TCE_ENTRY_SIZE;
187
188 return __alloc_bootmem_low(size, size, 0);
189}
190
191void free_tce_table(void *tbl)
192{
193 unsigned int size;
194
195 if (!tbl)
196 return;
197
198 size = table_size_to_number_of_entries(specified_table_size);
199 size *= TCE_ENTRY_SIZE;
200
201 free_bootmem(__pa(tbl), size);
202}
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 7392570f975d..ebbee6f59ff5 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -8,7 +8,7 @@
8 * Copyright (c) 1995 Markus Kuhn 8 * Copyright (c) 1995 Markus Kuhn
9 * Copyright (c) 1996 Ingo Molnar 9 * Copyright (c) 1996 Ingo Molnar
10 * Copyright (c) 1998 Andrea Arcangeli 10 * Copyright (c) 1998 Andrea Arcangeli
11 * Copyright (c) 2002 Vojtech Pavlik 11 * Copyright (c) 2002,2006 Vojtech Pavlik
12 * Copyright (c) 2003 Andi Kleen 12 * Copyright (c) 2003 Andi Kleen
13 * RTC support code taken from arch/i386/kernel/timers/time_hpet.c 13 * RTC support code taken from arch/i386/kernel/timers/time_hpet.c
14 */ 14 */
@@ -51,14 +51,21 @@ extern int using_apic_timer;
51static char *time_init_gtod(void); 51static char *time_init_gtod(void);
52 52
53DEFINE_SPINLOCK(rtc_lock); 53DEFINE_SPINLOCK(rtc_lock);
54EXPORT_SYMBOL(rtc_lock);
54DEFINE_SPINLOCK(i8253_lock); 55DEFINE_SPINLOCK(i8253_lock);
55 56
56int nohpet __initdata = 0; 57int nohpet __initdata = 0;
57static int notsc __initdata = 0; 58static int notsc __initdata = 0;
58 59
59#undef HPET_HACK_ENABLE_DANGEROUS 60#define USEC_PER_TICK (USEC_PER_SEC / HZ)
61#define NSEC_PER_TICK (NSEC_PER_SEC / HZ)
62#define FSEC_PER_TICK (FSEC_PER_SEC / HZ)
63
64#define NS_SCALE 10 /* 2^10, carefully chosen */
65#define US_SCALE 32 /* 2^32, arbitralrily chosen */
60 66
61unsigned int cpu_khz; /* TSC clocks / usec, not used here */ 67unsigned int cpu_khz; /* TSC clocks / usec, not used here */
68EXPORT_SYMBOL(cpu_khz);
62static unsigned long hpet_period; /* fsecs / HPET clock */ 69static unsigned long hpet_period; /* fsecs / HPET clock */
63unsigned long hpet_tick; /* HPET clocks / interrupt */ 70unsigned long hpet_tick; /* HPET clocks / interrupt */
64int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ 71int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */
@@ -90,7 +97,7 @@ static inline unsigned int do_gettimeoffset_tsc(void)
90 t = get_cycles_sync(); 97 t = get_cycles_sync();
91 if (t < vxtime.last_tsc) 98 if (t < vxtime.last_tsc)
92 t = vxtime.last_tsc; /* hack */ 99 t = vxtime.last_tsc; /* hack */
93 x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32; 100 x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE;
94 return x; 101 return x;
95} 102}
96 103
@@ -98,7 +105,7 @@ static inline unsigned int do_gettimeoffset_hpet(void)
98{ 105{
99 /* cap counter read to one tick to avoid inconsistencies */ 106 /* cap counter read to one tick to avoid inconsistencies */
100 unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last; 107 unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last;
101 return (min(counter,hpet_tick) * vxtime.quot) >> 32; 108 return (min(counter,hpet_tick) * vxtime.quot) >> US_SCALE;
102} 109}
103 110
104unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc; 111unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
@@ -118,7 +125,7 @@ void do_gettimeofday(struct timeval *tv)
118 seq = read_seqbegin(&xtime_lock); 125 seq = read_seqbegin(&xtime_lock);
119 126
120 sec = xtime.tv_sec; 127 sec = xtime.tv_sec;
121 usec = xtime.tv_nsec / 1000; 128 usec = xtime.tv_nsec / NSEC_PER_USEC;
122 129
123 /* i386 does some correction here to keep the clock 130 /* i386 does some correction here to keep the clock
124 monotonous even when ntpd is fixing drift. 131 monotonous even when ntpd is fixing drift.
@@ -129,14 +136,14 @@ void do_gettimeofday(struct timeval *tv)
129 in arch/x86_64/kernel/vsyscall.c and export all needed 136 in arch/x86_64/kernel/vsyscall.c and export all needed
130 variables in vmlinux.lds. -AK */ 137 variables in vmlinux.lds. -AK */
131 138
132 t = (jiffies - wall_jiffies) * (1000000L / HZ) + 139 t = (jiffies - wall_jiffies) * USEC_PER_TICK +
133 do_gettimeoffset(); 140 do_gettimeoffset();
134 usec += t; 141 usec += t;
135 142
136 } while (read_seqretry(&xtime_lock, seq)); 143 } while (read_seqretry(&xtime_lock, seq));
137 144
138 tv->tv_sec = sec + usec / 1000000; 145 tv->tv_sec = sec + usec / USEC_PER_SEC;
139 tv->tv_usec = usec % 1000000; 146 tv->tv_usec = usec % USEC_PER_SEC;
140} 147}
141 148
142EXPORT_SYMBOL(do_gettimeofday); 149EXPORT_SYMBOL(do_gettimeofday);
@@ -157,8 +164,8 @@ int do_settimeofday(struct timespec *tv)
157 164
158 write_seqlock_irq(&xtime_lock); 165 write_seqlock_irq(&xtime_lock);
159 166
160 nsec -= do_gettimeoffset() * 1000 + 167 nsec -= do_gettimeoffset() * NSEC_PER_USEC +
161 (jiffies - wall_jiffies) * (NSEC_PER_SEC/HZ); 168 (jiffies - wall_jiffies) * NSEC_PER_TICK;
162 169
163 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); 170 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
164 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); 171 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
@@ -288,7 +295,7 @@ unsigned long long monotonic_clock(void)
288 this_offset = hpet_readl(HPET_COUNTER); 295 this_offset = hpet_readl(HPET_COUNTER);
289 } while (read_seqretry(&xtime_lock, seq)); 296 } while (read_seqretry(&xtime_lock, seq));
290 offset = (this_offset - last_offset); 297 offset = (this_offset - last_offset);
291 offset *= (NSEC_PER_SEC/HZ) / hpet_tick; 298 offset *= NSEC_PER_TICK / hpet_tick;
292 } else { 299 } else {
293 do { 300 do {
294 seq = read_seqbegin(&xtime_lock); 301 seq = read_seqbegin(&xtime_lock);
@@ -297,7 +304,8 @@ unsigned long long monotonic_clock(void)
297 base = monotonic_base; 304 base = monotonic_base;
298 } while (read_seqretry(&xtime_lock, seq)); 305 } while (read_seqretry(&xtime_lock, seq));
299 this_offset = get_cycles_sync(); 306 this_offset = get_cycles_sync();
300 offset = (this_offset - last_offset)*1000 / cpu_khz; 307 /* FIXME: 1000 or 1000000? */
308 offset = (this_offset - last_offset)*1000 / cpu_khz;
301 } 309 }
302 return base + offset; 310 return base + offset;
303} 311}
@@ -382,7 +390,7 @@ void main_timer_handler(struct pt_regs *regs)
382 } 390 }
383 391
384 monotonic_base += 392 monotonic_base +=
385 (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick; 393 (offset - vxtime.last) * NSEC_PER_TICK / hpet_tick;
386 394
387 vxtime.last = offset; 395 vxtime.last = offset;
388#ifdef CONFIG_X86_PM_TIMER 396#ifdef CONFIG_X86_PM_TIMER
@@ -391,24 +399,25 @@ void main_timer_handler(struct pt_regs *regs)
391#endif 399#endif
392 } else { 400 } else {
393 offset = (((tsc - vxtime.last_tsc) * 401 offset = (((tsc - vxtime.last_tsc) *
394 vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ); 402 vxtime.tsc_quot) >> US_SCALE) - USEC_PER_TICK;
395 403
396 if (offset < 0) 404 if (offset < 0)
397 offset = 0; 405 offset = 0;
398 406
399 if (offset > (USEC_PER_SEC / HZ)) { 407 if (offset > USEC_PER_TICK) {
400 lost = offset / (USEC_PER_SEC / HZ); 408 lost = offset / USEC_PER_TICK;
401 offset %= (USEC_PER_SEC / HZ); 409 offset %= USEC_PER_TICK;
402 } 410 }
403 411
404 monotonic_base += (tsc - vxtime.last_tsc)*1000000/cpu_khz ; 412 /* FIXME: 1000 or 1000000? */
413 monotonic_base += (tsc - vxtime.last_tsc) * 1000000 / cpu_khz;
405 414
406 vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot; 415 vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
407 416
408 if ((((tsc - vxtime.last_tsc) * 417 if ((((tsc - vxtime.last_tsc) *
409 vxtime.tsc_quot) >> 32) < offset) 418 vxtime.tsc_quot) >> US_SCALE) < offset)
410 vxtime.last_tsc = tsc - 419 vxtime.last_tsc = tsc -
411 (((long) offset << 32) / vxtime.tsc_quot) - 1; 420 (((long) offset << US_SCALE) / vxtime.tsc_quot) - 1;
412 } 421 }
413 422
414 if (lost > 0) { 423 if (lost > 0) {
@@ -468,16 +477,15 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
468} 477}
469 478
470static unsigned int cyc2ns_scale __read_mostly; 479static unsigned int cyc2ns_scale __read_mostly;
471#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
472 480
473static inline void set_cyc2ns_scale(unsigned long cpu_khz) 481static inline void set_cyc2ns_scale(unsigned long cpu_khz)
474{ 482{
475 cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; 483 cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
476} 484}
477 485
478static inline unsigned long long cycles_2_ns(unsigned long long cyc) 486static inline unsigned long long cycles_2_ns(unsigned long long cyc)
479{ 487{
480 return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; 488 return (cyc * cyc2ns_scale) >> NS_SCALE;
481} 489}
482 490
483unsigned long long sched_clock(void) 491unsigned long long sched_clock(void)
@@ -490,7 +498,7 @@ unsigned long long sched_clock(void)
490 Disadvantage is a small drift between CPUs in some configurations, 498 Disadvantage is a small drift between CPUs in some configurations,
491 but that should be tolerable. */ 499 but that should be tolerable. */
492 if (__vxtime.mode == VXTIME_HPET) 500 if (__vxtime.mode == VXTIME_HPET)
493 return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> 32; 501 return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> US_SCALE;
494#endif 502#endif
495 503
496 /* Could do CPU core sync here. Opteron can execute rdtsc speculatively, 504 /* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
@@ -633,7 +641,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
633 641
634 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); 642 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
635 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 643 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
636 vxtime.tsc_quot = (1000L << 32) / cpu_khz; 644 vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
637 } 645 }
638 646
639 set_cyc2ns_scale(cpu_khz_ref); 647 set_cyc2ns_scale(cpu_khz_ref);
@@ -789,8 +797,8 @@ static int hpet_timer_stop_set_go(unsigned long tick)
789 if (hpet_use_timer) { 797 if (hpet_use_timer) {
790 hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | 798 hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
791 HPET_TN_32BIT, HPET_T0_CFG); 799 HPET_TN_32BIT, HPET_T0_CFG);
792 hpet_writel(hpet_tick, HPET_T0_CMP); 800 hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */
793 hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */ 801 hpet_writel(hpet_tick, HPET_T0_CMP); /* period */
794 cfg |= HPET_CFG_LEGACY; 802 cfg |= HPET_CFG_LEGACY;
795 } 803 }
796/* 804/*
@@ -825,8 +833,7 @@ static int hpet_init(void)
825 if (hpet_period < 100000 || hpet_period > 100000000) 833 if (hpet_period < 100000 || hpet_period > 100000000)
826 return -1; 834 return -1;
827 835
828 hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) / 836 hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period;
829 hpet_period;
830 837
831 hpet_use_timer = (id & HPET_ID_LEGSUP); 838 hpet_use_timer = (id & HPET_ID_LEGSUP);
832 839
@@ -890,18 +897,6 @@ void __init time_init(void)
890 char *timename; 897 char *timename;
891 char *gtod; 898 char *gtod;
892 899
893#ifdef HPET_HACK_ENABLE_DANGEROUS
894 if (!vxtime.hpet_address) {
895 printk(KERN_WARNING "time.c: WARNING: Enabling HPET base "
896 "manually!\n");
897 outl(0x800038a0, 0xcf8);
898 outl(0xff000001, 0xcfc);
899 outl(0x800038a0, 0xcf8);
900 vxtime.hpet_address = inl(0xcfc) & 0xfffffffe;
901 printk(KERN_WARNING "time.c: WARNING: Enabled HPET "
902 "at %#lx.\n", vxtime.hpet_address);
903 }
904#endif
905 if (nohpet) 900 if (nohpet)
906 vxtime.hpet_address = 0; 901 vxtime.hpet_address = 0;
907 902
@@ -912,7 +907,7 @@ void __init time_init(void)
912 -xtime.tv_sec, -xtime.tv_nsec); 907 -xtime.tv_sec, -xtime.tv_nsec);
913 908
914 if (!hpet_init()) 909 if (!hpet_init())
915 vxtime_hz = (1000000000000000L + hpet_period / 2) / hpet_period; 910 vxtime_hz = (FSEC_PER_SEC + hpet_period / 2) / hpet_period;
916 else 911 else
917 vxtime.hpet_address = 0; 912 vxtime.hpet_address = 0;
918 913
@@ -941,8 +936,8 @@ void __init time_init(void)
941 vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod); 936 vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod);
942 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", 937 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
943 cpu_khz / 1000, cpu_khz % 1000); 938 cpu_khz / 1000, cpu_khz % 1000);
944 vxtime.quot = (1000000L << 32) / vxtime_hz; 939 vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
945 vxtime.tsc_quot = (1000L << 32) / cpu_khz; 940 vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
946 vxtime.last_tsc = get_cycles_sync(); 941 vxtime.last_tsc = get_cycles_sync();
947 setup_irq(0, &irq0); 942 setup_irq(0, &irq0);
948 943
@@ -956,10 +951,10 @@ void __init time_init(void)
956__cpuinit int unsynchronized_tsc(void) 951__cpuinit int unsynchronized_tsc(void)
957{ 952{
958#ifdef CONFIG_SMP 953#ifdef CONFIG_SMP
959 if (oem_force_hpet_timer()) 954 if (apic_is_clustered_box())
960 return 1; 955 return 1;
961 /* Intel systems are normally all synchronized. Exceptions 956 /* Intel systems are normally all synchronized. Exceptions
962 are handled in the OEM check above. */ 957 are handled in the check above. */
963 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 958 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
964 return 0; 959 return 0;
965#endif 960#endif
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index cea335e8746c..3d11a2fe45b7 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -6,8 +6,6 @@
6 * 6 *
7 * Pentium III FXSR, SSE support 7 * Pentium III FXSR, SSE support
8 * Gareth Hughes <gareth@valinux.com>, May 2000 8 * Gareth Hughes <gareth@valinux.com>, May 2000
9 *
10 * $Id: traps.c,v 1.36 2002/03/24 11:09:10 ak Exp $
11 */ 9 */
12 10
13/* 11/*
@@ -31,6 +29,7 @@
31#include <linux/nmi.h> 29#include <linux/nmi.h>
32#include <linux/kprobes.h> 30#include <linux/kprobes.h>
33#include <linux/kexec.h> 31#include <linux/kexec.h>
32#include <linux/unwind.h>
34 33
35#include <asm/system.h> 34#include <asm/system.h>
36#include <asm/uaccess.h> 35#include <asm/uaccess.h>
@@ -41,7 +40,7 @@
41#include <asm/i387.h> 40#include <asm/i387.h>
42#include <asm/kdebug.h> 41#include <asm/kdebug.h>
43#include <asm/processor.h> 42#include <asm/processor.h>
44 43#include <asm/unwind.h>
45#include <asm/smp.h> 44#include <asm/smp.h>
46#include <asm/pgalloc.h> 45#include <asm/pgalloc.h>
47#include <asm/pda.h> 46#include <asm/pda.h>
@@ -71,6 +70,7 @@ asmlinkage void machine_check(void);
71asmlinkage void spurious_interrupt_bug(void); 70asmlinkage void spurious_interrupt_bug(void);
72 71
73ATOMIC_NOTIFIER_HEAD(die_chain); 72ATOMIC_NOTIFIER_HEAD(die_chain);
73EXPORT_SYMBOL(die_chain);
74 74
75int register_die_notifier(struct notifier_block *nb) 75int register_die_notifier(struct notifier_block *nb)
76{ 76{
@@ -107,7 +107,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
107 preempt_enable_no_resched(); 107 preempt_enable_no_resched();
108} 108}
109 109
110static int kstack_depth_to_print = 10; 110static int kstack_depth_to_print = 12;
111static int call_trace = 1;
111 112
112#ifdef CONFIG_KALLSYMS 113#ifdef CONFIG_KALLSYMS
113#include <linux/kallsyms.h> 114#include <linux/kallsyms.h>
@@ -191,6 +192,25 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
191 return NULL; 192 return NULL;
192} 193}
193 194
195static int show_trace_unwind(struct unwind_frame_info *info, void *context)
196{
197 int i = 11, n = 0;
198
199 while (unwind(info) == 0 && UNW_PC(info)) {
200 ++n;
201 if (i > 50) {
202 printk("\n ");
203 i = 7;
204 } else
205 i += printk(" ");
206 i += printk_address(UNW_PC(info));
207 if (arch_unw_user_mode(info))
208 break;
209 }
210 printk("\n");
211 return n;
212}
213
194/* 214/*
195 * x86-64 can have upto three kernel stacks: 215 * x86-64 can have upto three kernel stacks:
196 * process stack 216 * process stack
@@ -198,15 +218,39 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
198 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack 218 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
199 */ 219 */
200 220
201void show_trace(unsigned long *stack) 221void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack)
202{ 222{
203 const unsigned cpu = safe_smp_processor_id(); 223 const unsigned cpu = safe_smp_processor_id();
204 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; 224 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
205 int i; 225 int i = 11;
206 unsigned used = 0; 226 unsigned used = 0;
207 227
208 printk("\nCall Trace:"); 228 printk("\nCall Trace:");
209 229
230 if (!tsk)
231 tsk = current;
232
233 if (call_trace >= 0) {
234 int unw_ret = 0;
235 struct unwind_frame_info info;
236
237 if (regs) {
238 if (unwind_init_frame_info(&info, tsk, regs) == 0)
239 unw_ret = show_trace_unwind(&info, NULL);
240 } else if (tsk == current)
241 unw_ret = unwind_init_running(&info, show_trace_unwind, NULL);
242 else {
243 if (unwind_init_blocked(&info, tsk) == 0)
244 unw_ret = show_trace_unwind(&info, NULL);
245 }
246 if (unw_ret > 0) {
247 if (call_trace > 0)
248 return;
249 printk("Legacy call trace:");
250 i = 18;
251 }
252 }
253
210#define HANDLE_STACK(cond) \ 254#define HANDLE_STACK(cond) \
211 do while (cond) { \ 255 do while (cond) { \
212 unsigned long addr = *stack++; \ 256 unsigned long addr = *stack++; \
@@ -229,7 +273,7 @@ void show_trace(unsigned long *stack)
229 } \ 273 } \
230 } while (0) 274 } while (0)
231 275
232 for(i = 11; ; ) { 276 for(; ; ) {
233 const char *id; 277 const char *id;
234 unsigned long *estack_end; 278 unsigned long *estack_end;
235 estack_end = in_exception_stack(cpu, (unsigned long)stack, 279 estack_end = in_exception_stack(cpu, (unsigned long)stack,
@@ -264,7 +308,7 @@ void show_trace(unsigned long *stack)
264 printk("\n"); 308 printk("\n");
265} 309}
266 310
267void show_stack(struct task_struct *tsk, unsigned long * rsp) 311static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp)
268{ 312{
269 unsigned long *stack; 313 unsigned long *stack;
270 int i; 314 int i;
@@ -298,7 +342,12 @@ void show_stack(struct task_struct *tsk, unsigned long * rsp)
298 printk("%016lx ", *stack++); 342 printk("%016lx ", *stack++);
299 touch_nmi_watchdog(); 343 touch_nmi_watchdog();
300 } 344 }
301 show_trace((unsigned long *)rsp); 345 show_trace(tsk, regs, rsp);
346}
347
348void show_stack(struct task_struct *tsk, unsigned long * rsp)
349{
350 _show_stack(tsk, NULL, rsp);
302} 351}
303 352
304/* 353/*
@@ -307,7 +356,7 @@ void show_stack(struct task_struct *tsk, unsigned long * rsp)
307void dump_stack(void) 356void dump_stack(void)
308{ 357{
309 unsigned long dummy; 358 unsigned long dummy;
310 show_trace(&dummy); 359 show_trace(NULL, NULL, &dummy);
311} 360}
312 361
313EXPORT_SYMBOL(dump_stack); 362EXPORT_SYMBOL(dump_stack);
@@ -334,7 +383,7 @@ void show_registers(struct pt_regs *regs)
334 if (in_kernel) { 383 if (in_kernel) {
335 384
336 printk("Stack: "); 385 printk("Stack: ");
337 show_stack(NULL, (unsigned long*)rsp); 386 _show_stack(NULL, regs, (unsigned long*)rsp);
338 387
339 printk("\nCode: "); 388 printk("\nCode: ");
340 if (regs->rip < PAGE_OFFSET) 389 if (regs->rip < PAGE_OFFSET)
@@ -383,6 +432,7 @@ void out_of_line_bug(void)
383{ 432{
384 BUG(); 433 BUG();
385} 434}
435EXPORT_SYMBOL(out_of_line_bug);
386#endif 436#endif
387 437
388static DEFINE_SPINLOCK(die_lock); 438static DEFINE_SPINLOCK(die_lock);
@@ -1012,3 +1062,14 @@ static int __init kstack_setup(char *s)
1012} 1062}
1013__setup("kstack=", kstack_setup); 1063__setup("kstack=", kstack_setup);
1014 1064
1065static int __init call_trace_setup(char *s)
1066{
1067 if (strcmp(s, "old") == 0)
1068 call_trace = -1;
1069 else if (strcmp(s, "both") == 0)
1070 call_trace = 0;
1071 else if (strcmp(s, "new") == 0)
1072 call_trace = 1;
1073 return 1;
1074}
1075__setup("call_trace=", call_trace_setup);
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index b81f473c4a19..1c6a5f322919 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -45,6 +45,15 @@ SECTIONS
45 45
46 RODATA 46 RODATA
47 47
48#ifdef CONFIG_STACK_UNWIND
49 . = ALIGN(8);
50 .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
51 __start_unwind = .;
52 *(.eh_frame)
53 __end_unwind = .;
54 }
55#endif
56
48 /* Data */ 57 /* Data */
49 .data : AT(ADDR(.data) - LOAD_OFFSET) { 58 .data : AT(ADDR(.data) - LOAD_OFFSET) {
50 *(.data) 59 *(.data)
@@ -131,6 +140,26 @@ SECTIONS
131 *(.data.page_aligned) 140 *(.data.page_aligned)
132 } 141 }
133 142
143 /* might get freed after init */
144 . = ALIGN(4096);
145 __smp_alt_begin = .;
146 __smp_alt_instructions = .;
147 .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) {
148 *(.smp_altinstructions)
149 }
150 __smp_alt_instructions_end = .;
151 . = ALIGN(8);
152 __smp_locks = .;
153 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
154 *(.smp_locks)
155 }
156 __smp_locks_end = .;
157 .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) {
158 *(.smp_altinstr_replacement)
159 }
160 . = ALIGN(4096);
161 __smp_alt_end = .;
162
134 . = ALIGN(4096); /* Init code and data */ 163 . = ALIGN(4096); /* Init code and data */
135 __init_begin = .; 164 __init_begin = .;
136 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { 165 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index 9468fb20b0bc..f603037df162 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -107,7 +107,7 @@ static __always_inline long time_syscall(long *t)
107 107
108int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) 108int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
109{ 109{
110 if (unlikely(!__sysctl_vsyscall)) 110 if (!__sysctl_vsyscall)
111 return gettimeofday(tv,tz); 111 return gettimeofday(tv,tz);
112 if (tv) 112 if (tv)
113 do_vgettimeofday(tv); 113 do_vgettimeofday(tv);
@@ -120,7 +120,7 @@ int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
120 * unlikely */ 120 * unlikely */
121time_t __vsyscall(1) vtime(time_t *t) 121time_t __vsyscall(1) vtime(time_t *t)
122{ 122{
123 if (unlikely(!__sysctl_vsyscall)) 123 if (!__sysctl_vsyscall)
124 return time_syscall(t); 124 return time_syscall(t);
125 else if (t) 125 else if (t)
126 *t = __xtime.tv_sec; 126 *t = __xtime.tv_sec;
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c
index 1def21c9f7cd..370952c4ff22 100644
--- a/arch/x86_64/kernel/x8664_ksyms.c
+++ b/arch/x86_64/kernel/x8664_ksyms.c
@@ -1,66 +1,21 @@
1/* Exports for assembly files.
2 All C exports should go in the respective C files. */
3
1#include <linux/config.h> 4#include <linux/config.h>
2#include <linux/module.h> 5#include <linux/module.h>
3#include <linux/smp.h> 6#include <linux/smp.h>
4#include <linux/user.h>
5#include <linux/sched.h>
6#include <linux/in6.h>
7#include <linux/interrupt.h>
8#include <linux/smp_lock.h>
9#include <linux/pm.h>
10#include <linux/pci.h>
11#include <linux/apm_bios.h>
12#include <linux/kernel.h>
13#include <linux/string.h>
14#include <linux/syscalls.h>
15#include <linux/tty.h>
16 7
17#include <asm/semaphore.h> 8#include <asm/semaphore.h>
18#include <asm/processor.h> 9#include <asm/processor.h>
19#include <asm/i387.h>
20#include <asm/uaccess.h> 10#include <asm/uaccess.h>
21#include <asm/checksum.h>
22#include <asm/io.h>
23#include <asm/delay.h>
24#include <asm/irq.h>
25#include <asm/mmx.h>
26#include <asm/desc.h>
27#include <asm/pgtable.h> 11#include <asm/pgtable.h>
28#include <asm/pgalloc.h>
29#include <asm/nmi.h>
30#include <asm/kdebug.h>
31#include <asm/unistd.h>
32#include <asm/tlbflush.h>
33#include <asm/kdebug.h>
34
35extern spinlock_t rtc_lock;
36 12
37#ifdef CONFIG_SMP
38extern void __write_lock_failed(rwlock_t *rw);
39extern void __read_lock_failed(rwlock_t *rw);
40#endif
41
42/* platform dependent support */
43EXPORT_SYMBOL(boot_cpu_data);
44//EXPORT_SYMBOL(dump_fpu);
45EXPORT_SYMBOL(__ioremap);
46EXPORT_SYMBOL(ioremap_nocache);
47EXPORT_SYMBOL(iounmap);
48EXPORT_SYMBOL(kernel_thread); 13EXPORT_SYMBOL(kernel_thread);
49EXPORT_SYMBOL(pm_idle);
50EXPORT_SYMBOL(pm_power_off);
51 14
52EXPORT_SYMBOL(__down_failed); 15EXPORT_SYMBOL(__down_failed);
53EXPORT_SYMBOL(__down_failed_interruptible); 16EXPORT_SYMBOL(__down_failed_interruptible);
54EXPORT_SYMBOL(__down_failed_trylock); 17EXPORT_SYMBOL(__down_failed_trylock);
55EXPORT_SYMBOL(__up_wakeup); 18EXPORT_SYMBOL(__up_wakeup);
56/* Networking helper routines. */
57EXPORT_SYMBOL(csum_partial_copy_nocheck);
58EXPORT_SYMBOL(ip_compute_csum);
59/* Delay loops */
60EXPORT_SYMBOL(__udelay);
61EXPORT_SYMBOL(__ndelay);
62EXPORT_SYMBOL(__delay);
63EXPORT_SYMBOL(__const_udelay);
64 19
65EXPORT_SYMBOL(__get_user_1); 20EXPORT_SYMBOL(__get_user_1);
66EXPORT_SYMBOL(__get_user_2); 21EXPORT_SYMBOL(__get_user_2);
@@ -71,42 +26,20 @@ EXPORT_SYMBOL(__put_user_2);
71EXPORT_SYMBOL(__put_user_4); 26EXPORT_SYMBOL(__put_user_4);
72EXPORT_SYMBOL(__put_user_8); 27EXPORT_SYMBOL(__put_user_8);
73 28
74EXPORT_SYMBOL(strncpy_from_user);
75EXPORT_SYMBOL(__strncpy_from_user);
76EXPORT_SYMBOL(clear_user);
77EXPORT_SYMBOL(__clear_user);
78EXPORT_SYMBOL(copy_user_generic); 29EXPORT_SYMBOL(copy_user_generic);
79EXPORT_SYMBOL(copy_from_user); 30EXPORT_SYMBOL(copy_from_user);
80EXPORT_SYMBOL(copy_to_user); 31EXPORT_SYMBOL(copy_to_user);
81EXPORT_SYMBOL(copy_in_user);
82EXPORT_SYMBOL(strnlen_user);
83
84#ifdef CONFIG_PCI
85EXPORT_SYMBOL(pci_mem_start);
86#endif
87 32
88EXPORT_SYMBOL(copy_page); 33EXPORT_SYMBOL(copy_page);
89EXPORT_SYMBOL(clear_page); 34EXPORT_SYMBOL(clear_page);
90 35
91EXPORT_SYMBOL(_cpu_pda);
92#ifdef CONFIG_SMP 36#ifdef CONFIG_SMP
93EXPORT_SYMBOL(cpu_data); 37extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
38extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
94EXPORT_SYMBOL(__write_lock_failed); 39EXPORT_SYMBOL(__write_lock_failed);
95EXPORT_SYMBOL(__read_lock_failed); 40EXPORT_SYMBOL(__read_lock_failed);
96
97EXPORT_SYMBOL(smp_call_function);
98EXPORT_SYMBOL(cpu_callout_map);
99#endif
100
101#ifdef CONFIG_VT
102EXPORT_SYMBOL(screen_info);
103#endif 41#endif
104 42
105EXPORT_SYMBOL(rtc_lock);
106
107EXPORT_SYMBOL_GPL(set_nmi_callback);
108EXPORT_SYMBOL_GPL(unset_nmi_callback);
109
110/* Export string functions. We normally rely on gcc builtin for most of these, 43/* Export string functions. We normally rely on gcc builtin for most of these,
111 but gcc sometimes decides not to inline them. */ 44 but gcc sometimes decides not to inline them. */
112#undef memcpy 45#undef memcpy
@@ -114,51 +47,14 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback);
114#undef memmove 47#undef memmove
115 48
116extern void * memset(void *,int,__kernel_size_t); 49extern void * memset(void *,int,__kernel_size_t);
117extern size_t strlen(const char *);
118extern void * memmove(void * dest,const void *src,size_t count);
119extern void * memcpy(void *,const void *,__kernel_size_t); 50extern void * memcpy(void *,const void *,__kernel_size_t);
120extern void * __memcpy(void *,const void *,__kernel_size_t); 51extern void * __memcpy(void *,const void *,__kernel_size_t);
121 52
122EXPORT_SYMBOL(memset); 53EXPORT_SYMBOL(memset);
123EXPORT_SYMBOL(memmove);
124EXPORT_SYMBOL(memcpy); 54EXPORT_SYMBOL(memcpy);
125EXPORT_SYMBOL(__memcpy); 55EXPORT_SYMBOL(__memcpy);
126 56
127#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
128/* prototypes are wrong, these are assembly with custom calling functions */
129extern void rwsem_down_read_failed_thunk(void);
130extern void rwsem_wake_thunk(void);
131extern void rwsem_downgrade_thunk(void);
132extern void rwsem_down_write_failed_thunk(void);
133EXPORT_SYMBOL(rwsem_down_read_failed_thunk);
134EXPORT_SYMBOL(rwsem_wake_thunk);
135EXPORT_SYMBOL(rwsem_downgrade_thunk);
136EXPORT_SYMBOL(rwsem_down_write_failed_thunk);
137#endif
138
139EXPORT_SYMBOL(empty_zero_page); 57EXPORT_SYMBOL(empty_zero_page);
140
141EXPORT_SYMBOL(die_chain);
142
143#ifdef CONFIG_SMP
144EXPORT_SYMBOL(cpu_sibling_map);
145EXPORT_SYMBOL(smp_num_siblings);
146#endif
147
148#ifdef CONFIG_BUG
149EXPORT_SYMBOL(out_of_line_bug);
150#endif
151
152EXPORT_SYMBOL(init_level4_pgt); 58EXPORT_SYMBOL(init_level4_pgt);
153
154extern unsigned long __supported_pte_mask;
155EXPORT_SYMBOL(__supported_pte_mask);
156
157#ifdef CONFIG_SMP
158EXPORT_SYMBOL(flush_tlb_page);
159#endif
160
161EXPORT_SYMBOL(cpu_khz);
162
163EXPORT_SYMBOL(load_gs_index); 59EXPORT_SYMBOL(load_gs_index);
164 60
diff --git a/arch/x86_64/lib/csum-partial.c b/arch/x86_64/lib/csum-partial.c
index 5384e227cdf6..c493735218da 100644
--- a/arch/x86_64/lib/csum-partial.c
+++ b/arch/x86_64/lib/csum-partial.c
@@ -147,4 +147,5 @@ unsigned short ip_compute_csum(unsigned char * buff, int len)
147{ 147{
148 return csum_fold(csum_partial(buff,len,0)); 148 return csum_fold(csum_partial(buff,len,0));
149} 149}
150EXPORT_SYMBOL(ip_compute_csum);
150 151
diff --git a/arch/x86_64/lib/csum-wrappers.c b/arch/x86_64/lib/csum-wrappers.c
index 94323f20816e..b1320ec58428 100644
--- a/arch/x86_64/lib/csum-wrappers.c
+++ b/arch/x86_64/lib/csum-wrappers.c
@@ -109,6 +109,7 @@ csum_partial_copy_nocheck(const unsigned char *src, unsigned char *dst, int len,
109{ 109{
110 return csum_partial_copy_generic(src,dst,len,sum,NULL,NULL); 110 return csum_partial_copy_generic(src,dst,len,sum,NULL,NULL);
111} 111}
112EXPORT_SYMBOL(csum_partial_copy_nocheck);
112 113
113unsigned short csum_ipv6_magic(struct in6_addr *saddr, struct in6_addr *daddr, 114unsigned short csum_ipv6_magic(struct in6_addr *saddr, struct in6_addr *daddr,
114 __u32 len, unsigned short proto, unsigned int sum) 115 __u32 len, unsigned short proto, unsigned int sum)
diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c
index 03c460cbdd1c..b6cd3cca2f45 100644
--- a/arch/x86_64/lib/delay.c
+++ b/arch/x86_64/lib/delay.c
@@ -9,6 +9,7 @@
9 */ 9 */
10 10
11#include <linux/config.h> 11#include <linux/config.h>
12#include <linux/module.h>
12#include <linux/sched.h> 13#include <linux/sched.h>
13#include <linux/delay.h> 14#include <linux/delay.h>
14#include <asm/delay.h> 15#include <asm/delay.h>
@@ -36,18 +37,22 @@ void __delay(unsigned long loops)
36 } 37 }
37 while((now-bclock) < loops); 38 while((now-bclock) < loops);
38} 39}
40EXPORT_SYMBOL(__delay);
39 41
40inline void __const_udelay(unsigned long xloops) 42inline void __const_udelay(unsigned long xloops)
41{ 43{
42 __delay((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32); 44 __delay((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32);
43} 45}
46EXPORT_SYMBOL(__const_udelay);
44 47
45void __udelay(unsigned long usecs) 48void __udelay(unsigned long usecs)
46{ 49{
47 __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ 50 __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */
48} 51}
52EXPORT_SYMBOL(__udelay);
49 53
50void __ndelay(unsigned long nsecs) 54void __ndelay(unsigned long nsecs)
51{ 55{
52 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ 56 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
53} 57}
58EXPORT_SYMBOL(__ndelay);
diff --git a/arch/x86_64/lib/memmove.c b/arch/x86_64/lib/memmove.c
index e93d5255fdc9..751ebae8ec42 100644
--- a/arch/x86_64/lib/memmove.c
+++ b/arch/x86_64/lib/memmove.c
@@ -3,12 +3,13 @@
3 */ 3 */
4#define _STRING_C 4#define _STRING_C
5#include <linux/string.h> 5#include <linux/string.h>
6#include <linux/module.h>
6 7
7#undef memmove 8#undef memmove
8void *memmove(void * dest,const void *src,size_t count) 9void *memmove(void * dest,const void *src,size_t count)
9{ 10{
10 if (dest < src) { 11 if (dest < src) {
11 __inline_memcpy(dest,src,count); 12 return memcpy(dest,src,count);
12 } else { 13 } else {
13 char *p = (char *) dest + count; 14 char *p = (char *) dest + count;
14 char *s = (char *) src + count; 15 char *s = (char *) src + count;
@@ -17,3 +18,4 @@ void *memmove(void * dest,const void *src,size_t count)
17 } 18 }
18 return dest; 19 return dest;
19} 20}
21EXPORT_SYMBOL(memmove);
diff --git a/arch/x86_64/lib/usercopy.c b/arch/x86_64/lib/usercopy.c
index 9bc2c295818e..893d43f838cc 100644
--- a/arch/x86_64/lib/usercopy.c
+++ b/arch/x86_64/lib/usercopy.c
@@ -5,6 +5,7 @@
5 * Copyright 1997 Linus Torvalds 5 * Copyright 1997 Linus Torvalds
6 * Copyright 2002 Andi Kleen <ak@suse.de> 6 * Copyright 2002 Andi Kleen <ak@suse.de>
7 */ 7 */
8#include <linux/module.h>
8#include <asm/uaccess.h> 9#include <asm/uaccess.h>
9 10
10/* 11/*
@@ -47,15 +48,17 @@ __strncpy_from_user(char *dst, const char __user *src, long count)
47 __do_strncpy_from_user(dst, src, count, res); 48 __do_strncpy_from_user(dst, src, count, res);
48 return res; 49 return res;
49} 50}
51EXPORT_SYMBOL(__strncpy_from_user);
50 52
51long 53long
52strncpy_from_user(char *dst, const char __user *src, long count) 54strncpy_from_user(char *dst, const char __user *src, long count)
53{ 55{
54 long res = -EFAULT; 56 long res = -EFAULT;
55 if (access_ok(VERIFY_READ, src, 1)) 57 if (access_ok(VERIFY_READ, src, 1))
56 __do_strncpy_from_user(dst, src, count, res); 58 return __strncpy_from_user(dst, src, count);
57 return res; 59 return res;
58} 60}
61EXPORT_SYMBOL(strncpy_from_user);
59 62
60/* 63/*
61 * Zero Userspace 64 * Zero Userspace
@@ -94,7 +97,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
94 [zero] "r" (0UL), [eight] "r" (8UL)); 97 [zero] "r" (0UL), [eight] "r" (8UL));
95 return size; 98 return size;
96} 99}
97 100EXPORT_SYMBOL(__clear_user);
98 101
99unsigned long clear_user(void __user *to, unsigned long n) 102unsigned long clear_user(void __user *to, unsigned long n)
100{ 103{
@@ -102,6 +105,7 @@ unsigned long clear_user(void __user *to, unsigned long n)
102 return __clear_user(to, n); 105 return __clear_user(to, n);
103 return n; 106 return n;
104} 107}
108EXPORT_SYMBOL(clear_user);
105 109
106/* 110/*
107 * Return the size of a string (including the ending 0) 111 * Return the size of a string (including the ending 0)
@@ -125,6 +129,7 @@ long __strnlen_user(const char __user *s, long n)
125 s++; 129 s++;
126 } 130 }
127} 131}
132EXPORT_SYMBOL(__strnlen_user);
128 133
129long strnlen_user(const char __user *s, long n) 134long strnlen_user(const char __user *s, long n)
130{ 135{
@@ -132,6 +137,7 @@ long strnlen_user(const char __user *s, long n)
132 return 0; 137 return 0;
133 return __strnlen_user(s, n); 138 return __strnlen_user(s, n);
134} 139}
140EXPORT_SYMBOL(strnlen_user);
135 141
136long strlen_user(const char __user *s) 142long strlen_user(const char __user *s)
137{ 143{
@@ -147,6 +153,7 @@ long strlen_user(const char __user *s)
147 s++; 153 s++;
148 } 154 }
149} 155}
156EXPORT_SYMBOL(strlen_user);
150 157
151unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len) 158unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len)
152{ 159{
@@ -155,3 +162,5 @@ unsigned long copy_in_user(void __user *to, const void __user *from, unsigned le
155 } 162 }
156 return len; 163 return len;
157} 164}
165EXPORT_SYMBOL(copy_in_user);
166
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 0803d3858af1..08dc696f54ee 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -195,7 +195,7 @@ void dump_pagetable(unsigned long address)
195 printk("PGD %lx ", pgd_val(*pgd)); 195 printk("PGD %lx ", pgd_val(*pgd));
196 if (!pgd_present(*pgd)) goto ret; 196 if (!pgd_present(*pgd)) goto ret;
197 197
198 pud = __pud_offset_k((pud_t *)pgd_page(*pgd), address); 198 pud = pud_offset(pgd, address);
199 if (bad_address(pud)) goto bad; 199 if (bad_address(pud)) goto bad;
200 printk("PUD %lx ", pud_val(*pud)); 200 printk("PUD %lx ", pud_val(*pud));
201 if (!pud_present(*pud)) goto ret; 201 if (!pud_present(*pud)) goto ret;
@@ -445,8 +445,10 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
445 if (!(vma->vm_flags & VM_GROWSDOWN)) 445 if (!(vma->vm_flags & VM_GROWSDOWN))
446 goto bad_area; 446 goto bad_area;
447 if (error_code & 4) { 447 if (error_code & 4) {
448 // XXX: align red zone size with ABI 448 /* Allow userspace just enough access below the stack pointer
449 if (address + 128 < regs->rsp) 449 * to let the 'enter' instruction work.
450 */
451 if (address + 65536 + 32 * sizeof(unsigned long) < regs->rsp)
450 goto bad_area; 452 goto bad_area;
451 } 453 }
452 if (expand_stack(vma, address)) 454 if (expand_stack(vma, address))
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 4ba34e95d835..02add1d1dfa8 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -41,8 +41,6 @@
41#include <asm/proto.h> 41#include <asm/proto.h>
42#include <asm/smp.h> 42#include <asm/smp.h>
43#include <asm/sections.h> 43#include <asm/sections.h>
44#include <asm/dma-mapping.h>
45#include <asm/swiotlb.h>
46 44
47#ifndef Dprintk 45#ifndef Dprintk
48#define Dprintk(x...) 46#define Dprintk(x...)
@@ -90,8 +88,6 @@ void show_mem(void)
90 printk(KERN_INFO "%lu pages swap cached\n",cached); 88 printk(KERN_INFO "%lu pages swap cached\n",cached);
91} 89}
92 90
93/* References to section boundaries */
94
95int after_bootmem; 91int after_bootmem;
96 92
97static __init void *spp_getpage(void) 93static __init void *spp_getpage(void)
@@ -261,9 +257,10 @@ phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
261 for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) { 257 for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) {
262 unsigned long entry; 258 unsigned long entry;
263 259
264 if (address > end) { 260 if (address >= end) {
265 for (; i < PTRS_PER_PMD; i++, pmd++) 261 if (!after_bootmem)
266 set_pmd(pmd, __pmd(0)); 262 for (; i < PTRS_PER_PMD; i++, pmd++)
263 set_pmd(pmd, __pmd(0));
267 break; 264 break;
268 } 265 }
269 entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address; 266 entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
@@ -341,7 +338,8 @@ static void __init find_early_table_space(unsigned long end)
341 table_end = table_start; 338 table_end = table_start;
342 339
343 early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n", 340 early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
344 end, table_start << PAGE_SHIFT, table_end << PAGE_SHIFT); 341 end, table_start << PAGE_SHIFT,
342 (table_start << PAGE_SHIFT) + tables);
345} 343}
346 344
347/* Setup the direct mapping of the physical memory at PAGE_OFFSET. 345/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
@@ -372,7 +370,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
372 pud_t *pud; 370 pud_t *pud;
373 371
374 if (after_bootmem) 372 if (after_bootmem)
375 pud = pud_offset_k(pgd, start & PGDIR_MASK); 373 pud = pud_offset(pgd, start & PGDIR_MASK);
376 else 374 else
377 pud = alloc_low_page(&map, &pud_phys); 375 pud = alloc_low_page(&map, &pud_phys);
378 376
@@ -587,10 +585,7 @@ void __init mem_init(void)
587{ 585{
588 long codesize, reservedpages, datasize, initsize; 586 long codesize, reservedpages, datasize, initsize;
589 587
590#ifdef CONFIG_SWIOTLB 588 pci_iommu_alloc();
591 pci_swiotlb_init();
592#endif
593 no_iommu_init();
594 589
595 /* How many end-of-memory variables you have, grandma! */ 590 /* How many end-of-memory variables you have, grandma! */
596 max_low_pfn = end_pfn; 591 max_low_pfn = end_pfn;
@@ -644,20 +639,29 @@ void __init mem_init(void)
644#endif 639#endif
645} 640}
646 641
647void free_initmem(void) 642void free_init_pages(char *what, unsigned long begin, unsigned long end)
648{ 643{
649 unsigned long addr; 644 unsigned long addr;
650 645
651 addr = (unsigned long)(&__init_begin); 646 if (begin >= end)
652 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { 647 return;
648
649 printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
650 for (addr = begin; addr < end; addr += PAGE_SIZE) {
653 ClearPageReserved(virt_to_page(addr)); 651 ClearPageReserved(virt_to_page(addr));
654 init_page_count(virt_to_page(addr)); 652 init_page_count(virt_to_page(addr));
655 memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE); 653 memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE);
656 free_page(addr); 654 free_page(addr);
657 totalram_pages++; 655 totalram_pages++;
658 } 656 }
657}
658
659void free_initmem(void)
660{
659 memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin); 661 memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
660 printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10); 662 free_init_pages("unused kernel memory",
663 (unsigned long)(&__init_begin),
664 (unsigned long)(&__init_end));
661} 665}
662 666
663#ifdef CONFIG_DEBUG_RODATA 667#ifdef CONFIG_DEBUG_RODATA
@@ -686,15 +690,7 @@ void mark_rodata_ro(void)
686#ifdef CONFIG_BLK_DEV_INITRD 690#ifdef CONFIG_BLK_DEV_INITRD
687void free_initrd_mem(unsigned long start, unsigned long end) 691void free_initrd_mem(unsigned long start, unsigned long end)
688{ 692{
689 if (start >= end) 693 free_init_pages("initrd memory", start, end);
690 return;
691 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
692 for (; start < end; start += PAGE_SIZE) {
693 ClearPageReserved(virt_to_page(start));
694 init_page_count(virt_to_page(start));
695 free_page(start);
696 totalram_pages++;
697 }
698} 694}
699#endif 695#endif
700 696
diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c
index ae207064201e..45d7d823c3b8 100644
--- a/arch/x86_64/mm/ioremap.c
+++ b/arch/x86_64/mm/ioremap.c
@@ -11,6 +11,7 @@
11#include <linux/vmalloc.h> 11#include <linux/vmalloc.h>
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/module.h>
14#include <asm/io.h> 15#include <asm/io.h>
15#include <asm/pgalloc.h> 16#include <asm/pgalloc.h>
16#include <asm/fixmap.h> 17#include <asm/fixmap.h>
@@ -219,6 +220,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
219 } 220 }
220 return (__force void __iomem *) (offset + (char *)addr); 221 return (__force void __iomem *) (offset + (char *)addr);
221} 222}
223EXPORT_SYMBOL(__ioremap);
222 224
223/** 225/**
224 * ioremap_nocache - map bus memory into CPU space 226 * ioremap_nocache - map bus memory into CPU space
@@ -246,6 +248,7 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
246{ 248{
247 return __ioremap(phys_addr, size, _PAGE_PCD); 249 return __ioremap(phys_addr, size, _PAGE_PCD);
248} 250}
251EXPORT_SYMBOL(ioremap_nocache);
249 252
250/** 253/**
251 * iounmap - Free a IO remapping 254 * iounmap - Free a IO remapping
@@ -291,3 +294,5 @@ void iounmap(volatile void __iomem *addr)
291 BUG_ON(p != o || o == NULL); 294 BUG_ON(p != o || o == NULL);
292 kfree(p); 295 kfree(p);
293} 296}
297EXPORT_SYMBOL(iounmap);
298
diff --git a/arch/x86_64/pci/k8-bus.c b/arch/x86_64/pci/k8-bus.c
index 3acf60ded2a0..b50a7c7c47f8 100644
--- a/arch/x86_64/pci/k8-bus.c
+++ b/arch/x86_64/pci/k8-bus.c
@@ -2,6 +2,7 @@
2#include <linux/pci.h> 2#include <linux/pci.h>
3#include <asm/mpspec.h> 3#include <asm/mpspec.h>
4#include <linux/cpumask.h> 4#include <linux/cpumask.h>
5#include <asm/k8.h>
5 6
6/* 7/*
7 * This discovers the pcibus <-> node mapping on AMD K8. 8 * This discovers the pcibus <-> node mapping on AMD K8.
@@ -18,7 +19,6 @@
18#define NR_LDT_BUS_NUMBER_REGISTERS 3 19#define NR_LDT_BUS_NUMBER_REGISTERS 3
19#define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 8) & 0xFF) 20#define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 8) & 0xFF)
20#define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF) 21#define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF)
21#define PCI_DEVICE_ID_K8HTCONFIG 0x1100
22 22
23/** 23/**
24 * fill_mp_bus_to_cpumask() 24 * fill_mp_bus_to_cpumask()
@@ -28,8 +28,7 @@
28__init static int 28__init static int
29fill_mp_bus_to_cpumask(void) 29fill_mp_bus_to_cpumask(void)
30{ 30{
31 struct pci_dev *nb_dev = NULL; 31 int i, j, k;
32 int i, j;
33 u32 ldtbus, nid; 32 u32 ldtbus, nid;
34 static int lbnr[3] = { 33 static int lbnr[3] = {
35 LDT_BUS_NUMBER_REGISTER_0, 34 LDT_BUS_NUMBER_REGISTER_0,
@@ -37,8 +36,9 @@ fill_mp_bus_to_cpumask(void)
37 LDT_BUS_NUMBER_REGISTER_2 36 LDT_BUS_NUMBER_REGISTER_2
38 }; 37 };
39 38
40 while ((nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, 39 cache_k8_northbridges();
41 PCI_DEVICE_ID_K8HTCONFIG, nb_dev))) { 40 for (k = 0; k < num_k8_northbridges; k++) {
41 struct pci_dev *nb_dev = k8_northbridges[k];
42 pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid); 42 pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid);
43 43
44 for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) { 44 for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) {