aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/x86_64/boot-options.txt21
-rw-r--r--arch/i386/Kconfig13
-rw-r--r--arch/i386/boot/Makefile9
-rw-r--r--arch/i386/boot/compressed/misc.c32
-rw-r--r--arch/i386/kernel/Makefile4
-rw-r--r--arch/i386/kernel/alternative.c118
-rw-r--r--arch/i386/kernel/apic.c16
-rw-r--r--arch/i386/kernel/apm.c6
-rw-r--r--arch/i386/kernel/cpu/amd.c16
-rw-r--r--arch/i386/kernel/cpu/intel.c6
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c113
-rw-r--r--arch/i386/kernel/crash.c7
-rw-r--r--arch/i386/kernel/entry.S263
-rw-r--r--arch/i386/kernel/io_apic.c49
-rw-r--r--arch/i386/kernel/irq.c2
-rw-r--r--arch/i386/kernel/nmi.c72
-rw-r--r--arch/i386/kernel/process.c8
-rw-r--r--arch/i386/kernel/smp.c12
-rw-r--r--arch/i386/kernel/smpboot.c1
-rw-r--r--arch/i386/kernel/traps.c70
-rw-r--r--arch/i386/kernel/vmlinux.lds.S9
-rw-r--r--arch/i386/oprofile/op_model_athlon.c1
-rw-r--r--arch/i386/oprofile/op_model_p4.c1
-rw-r--r--arch/i386/oprofile/op_model_ppro.c1
-rw-r--r--arch/ia64/kernel/process.c4
-rw-r--r--arch/x86_64/Kconfig51
-rw-r--r--arch/x86_64/Kconfig.debug18
-rw-r--r--arch/x86_64/Makefile4
-rw-r--r--arch/x86_64/boot/Makefile9
-rw-r--r--arch/x86_64/boot/compressed/misc.c46
-rw-r--r--arch/x86_64/boot/tools/build.c6
-rw-r--r--arch/x86_64/defconfig159
-rw-r--r--arch/x86_64/ia32/fpu32.c1
-rw-r--r--arch/x86_64/ia32/ia32_signal.c2
-rw-r--r--arch/x86_64/ia32/ia32entry.S11
-rw-r--r--arch/x86_64/ia32/ptrace32.c43
-rw-r--r--arch/x86_64/ia32/sys_ia32.c25
-rw-r--r--arch/x86_64/kernel/Makefile8
-rw-r--r--arch/x86_64/kernel/aperture.c26
-rw-r--r--arch/x86_64/kernel/apic.c32
-rw-r--r--arch/x86_64/kernel/crash.c4
-rw-r--r--arch/x86_64/kernel/e820.c2
-rw-r--r--arch/x86_64/kernel/entry.S113
-rw-r--r--arch/x86_64/kernel/genapic_flat.c30
-rw-r--r--arch/x86_64/kernel/head64.c2
-rw-r--r--arch/x86_64/kernel/i8259.c14
-rw-r--r--arch/x86_64/kernel/io_apic.c45
-rw-r--r--arch/x86_64/kernel/irq.c30
-rw-r--r--arch/x86_64/kernel/k8.c118
-rw-r--r--arch/x86_64/kernel/mce.c2
-rw-r--r--arch/x86_64/kernel/mce_amd.c506
-rw-r--r--arch/x86_64/kernel/module.c38
-rw-r--r--arch/x86_64/kernel/nmi.c89
-rw-r--r--arch/x86_64/kernel/pci-calgary.c1018
-rw-r--r--arch/x86_64/kernel/pci-dma.c55
-rw-r--r--arch/x86_64/kernel/pci-gart.c155
-rw-r--r--arch/x86_64/kernel/pci-nommu.c9
-rw-r--r--arch/x86_64/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86_64/kernel/process.c16
-rw-r--r--arch/x86_64/kernel/reboot.c1
-rw-r--r--arch/x86_64/kernel/setup.c180
-rw-r--r--arch/x86_64/kernel/setup64.c3
-rw-r--r--arch/x86_64/kernel/signal.c3
-rw-r--r--arch/x86_64/kernel/smp.c10
-rw-r--r--arch/x86_64/kernel/smpboot.c23
-rw-r--r--arch/x86_64/kernel/tce.c202
-rw-r--r--arch/x86_64/kernel/time.c87
-rw-r--r--arch/x86_64/kernel/traps.c83
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S29
-rw-r--r--arch/x86_64/kernel/vsyscall.c4
-rw-r--r--arch/x86_64/kernel/x8664_ksyms.c114
-rw-r--r--arch/x86_64/lib/csum-partial.c1
-rw-r--r--arch/x86_64/lib/csum-wrappers.c1
-rw-r--r--arch/x86_64/lib/delay.c5
-rw-r--r--arch/x86_64/lib/memmove.c4
-rw-r--r--arch/x86_64/lib/usercopy.c13
-rw-r--r--arch/x86_64/mm/fault.c8
-rw-r--r--arch/x86_64/mm/init.c48
-rw-r--r--arch/x86_64/mm/ioremap.c5
-rw-r--r--arch/x86_64/pci/k8-bus.c10
-rw-r--r--drivers/acpi/processor_idle.c12
-rw-r--r--drivers/char/agp/Kconfig4
-rw-r--r--drivers/char/agp/amd64-agp.c81
-rw-r--r--drivers/pci/msi-apic.c1
-rw-r--r--drivers/scsi/aacraid/comminit.c5
-rw-r--r--fs/compat.c16
-rw-r--r--include/asm-i386/alternative.h2
-rw-r--r--include/asm-i386/apic.h12
-rw-r--r--include/asm-i386/cpufeature.h1
-rw-r--r--include/asm-i386/dwarf2.h54
-rw-r--r--include/asm-i386/hw_irq.h2
-rw-r--r--include/asm-i386/intel_arch_perfmon.h19
-rw-r--r--include/asm-i386/k8.h1
-rw-r--r--include/asm-i386/local.h26
-rw-r--r--include/asm-i386/mach-default/mach_ipi.h7
-rw-r--r--include/asm-i386/nmi.h28
-rw-r--r--include/asm-i386/processor.h3
-rw-r--r--include/asm-i386/thread_info.h7
-rw-r--r--include/asm-i386/unwind.h98
-rw-r--r--include/asm-ia64/thread_info.h5
-rw-r--r--include/asm-x86_64/alternative.h146
-rw-r--r--include/asm-x86_64/apic.h26
-rw-r--r--include/asm-x86_64/atomic.h42
-rw-r--r--include/asm-x86_64/bitops.h7
-rw-r--r--include/asm-x86_64/calgary.h66
-rw-r--r--include/asm-x86_64/cpufeature.h3
-rw-r--r--include/asm-x86_64/dma-mapping.h17
-rw-r--r--include/asm-x86_64/dma.h2
-rw-r--r--include/asm-x86_64/gart-mapping.h16
-rw-r--r--include/asm-x86_64/hpet.h2
-rw-r--r--include/asm-x86_64/hw_irq.h2
-rw-r--r--include/asm-x86_64/ia32_unistd.h308
-rw-r--r--include/asm-x86_64/intel_arch_perfmon.h19
-rw-r--r--include/asm-x86_64/k8.h14
-rw-r--r--include/asm-x86_64/local.h26
-rw-r--r--include/asm-x86_64/mce.h13
-rw-r--r--include/asm-x86_64/mutex.h4
-rw-r--r--include/asm-x86_64/nmi.h30
-rw-r--r--include/asm-x86_64/pci.h4
-rw-r--r--include/asm-x86_64/pgtable.h6
-rw-r--r--include/asm-x86_64/processor.h5
-rw-r--r--include/asm-x86_64/proto.h15
-rw-r--r--include/asm-x86_64/rwlock.h8
-rw-r--r--include/asm-x86_64/semaphore.h8
-rw-r--r--include/asm-x86_64/smp.h2
-rw-r--r--include/asm-x86_64/spinlock.h10
-rw-r--r--include/asm-x86_64/string.h3
-rw-r--r--include/asm-x86_64/system.h86
-rw-r--r--include/asm-x86_64/tce.h47
-rw-r--r--include/asm-x86_64/thread_info.h19
-rw-r--r--include/asm-x86_64/topology.h8
-rw-r--r--include/asm-x86_64/unwind.h106
-rw-r--r--include/linux/bitmap.h5
-rw-r--r--include/linux/compat.h2
-rw-r--r--include/linux/kernel.h7
-rw-r--r--include/linux/module.h3
-rw-r--r--include/linux/sysctl.h2
-rw-r--r--include/linux/time.h11
-rw-r--r--include/linux/unwind.h127
-rw-r--r--init/Kconfig10
-rw-r--r--init/main.c2
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/module.c16
-rw-r--r--kernel/sched.c9
-rw-r--r--kernel/sysctl.c11
-rw-r--r--kernel/unwind.c918
-rw-r--r--lib/Kconfig.debug12
147 files changed, 5319 insertions, 1648 deletions
diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt
index f2cd6ef53ff3..6887d44d2661 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -205,6 +205,27 @@ IOMMU
205 pages Prereserve that many 128K pages for the software IO bounce buffering. 205 pages Prereserve that many 128K pages for the software IO bounce buffering.
206 force Force all IO through the software TLB. 206 force Force all IO through the software TLB.
207 207
208 calgary=[64k,128k,256k,512k,1M,2M,4M,8M]
209 calgary=[translate_empty_slots]
210 calgary=[disable=<PCI bus number>]
211
212 64k,...,8M - Set the size of each PCI slot's translation table
213 when using the Calgary IOMMU. This is the size of the translation
214 table itself in main memory. The smallest table, 64k, covers an IO
215 space of 32MB; the largest, 8MB table, can cover an IO space of
216 4GB. Normally the kernel will make the right choice by itself.
217
218 translate_empty_slots - Enable translation even on slots that have
219 no devices attached to them, in case a device will be hotplugged
220 in the future.
221
222 disable=<PCI bus number> - Disable translation on a given PHB. For
223 example, the built-in graphics adapter resides on the first bridge
224 (PCI bus number 0); if translation (isolation) is enabled on this
225 bridge, X servers that access the hardware directly from user
226 space might stop working. Use this option if you have devices that
227 are accessed from userspace directly on some PCI host bridge.
228
208Debugging 229Debugging
209 230
210 oops=panic Always panic on oopses. Default is to just kill the process, 231 oops=panic Always panic on oopses. Default is to just kill the process,
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 374fb50608a0..f3eaf22f273d 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -328,6 +328,15 @@ config X86_MCE_P4THERMAL
328 Enabling this feature will cause a message to be printed when the P4 328 Enabling this feature will cause a message to be printed when the P4
329 enters thermal throttling. 329 enters thermal throttling.
330 330
331config VM86
332 default y
333 bool "Enable VM86 support" if EMBEDDED
334 help
335 This option is required by programs like DOSEMU to run 16-bit legacy
336 code on X86 processors. It also may be needed by software like
337 XFree86 to initialize some video cards via BIOS. Disabling this
338 option saves about 6k.
339
331config TOSHIBA 340config TOSHIBA
332 tristate "Toshiba Laptop support" 341 tristate "Toshiba Laptop support"
333 ---help--- 342 ---help---
@@ -1068,6 +1077,10 @@ config SCx200HR_TIMER
1068 processor goes idle (as is done by the scheduler). The 1077 processor goes idle (as is done by the scheduler). The
1069 other workaround is idle=poll boot option. 1078 other workaround is idle=poll boot option.
1070 1079
1080config K8_NB
1081 def_bool y
1082 depends on AGP_AMD64
1083
1071source "drivers/pcmcia/Kconfig" 1084source "drivers/pcmcia/Kconfig"
1072 1085
1073source "drivers/pci/hotplug/Kconfig" 1086source "drivers/pci/hotplug/Kconfig"
diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile
index 33e55476381b..e97946626064 100644
--- a/arch/i386/boot/Makefile
+++ b/arch/i386/boot/Makefile
@@ -109,8 +109,13 @@ fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
109isoimage: $(BOOTIMAGE) 109isoimage: $(BOOTIMAGE)
110 -rm -rf $(obj)/isoimage 110 -rm -rf $(obj)/isoimage
111 mkdir $(obj)/isoimage 111 mkdir $(obj)/isoimage
112 cp `echo /usr/lib*/syslinux/isolinux.bin | awk '{ print $1; }'` \ 112 for i in lib lib64 share end ; do \
113 $(obj)/isoimage 113 if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \
114 cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \
115 break ; \
116 fi ; \
117 if [ $$i = end ] ; then exit 1 ; fi ; \
118 done
114 cp $(BOOTIMAGE) $(obj)/isoimage/linux 119 cp $(BOOTIMAGE) $(obj)/isoimage/linux
115 echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg 120 echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
116 if [ -f '$(FDINITRD)' ] ; then \ 121 if [ -f '$(FDINITRD)' ] ; then \
diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c
index f19f3a7492a5..b2ccd543410d 100644
--- a/arch/i386/boot/compressed/misc.c
+++ b/arch/i386/boot/compressed/misc.c
@@ -24,14 +24,6 @@
24 24
25#undef memset 25#undef memset
26#undef memcpy 26#undef memcpy
27
28/*
29 * Why do we do this? Don't ask me..
30 *
31 * Incomprehensible are the ways of bootloaders.
32 */
33static void* memset(void *, int, size_t);
34static void* memcpy(void *, __const void *, size_t);
35#define memzero(s, n) memset ((s), 0, (n)) 27#define memzero(s, n) memset ((s), 0, (n))
36 28
37typedef unsigned char uch; 29typedef unsigned char uch;
@@ -93,7 +85,7 @@ static unsigned char *real_mode; /* Pointer to real-mode data */
93#endif 85#endif
94#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0)) 86#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
95 87
96extern char input_data[]; 88extern unsigned char input_data[];
97extern int input_len; 89extern int input_len;
98 90
99static long bytes_out = 0; 91static long bytes_out = 0;
@@ -103,6 +95,9 @@ static unsigned long output_ptr = 0;
103static void *malloc(int size); 95static void *malloc(int size);
104static void free(void *where); 96static void free(void *where);
105 97
98static void *memset(void *s, int c, unsigned n);
99static void *memcpy(void *dest, const void *src, unsigned n);
100
106static void putstr(const char *); 101static void putstr(const char *);
107 102
108extern int end; 103extern int end;
@@ -205,7 +200,7 @@ static void putstr(const char *s)
205 outb_p(0xff & (pos >> 1), vidport+1); 200 outb_p(0xff & (pos >> 1), vidport+1);
206} 201}
207 202
208static void* memset(void* s, int c, size_t n) 203static void* memset(void* s, int c, unsigned n)
209{ 204{
210 int i; 205 int i;
211 char *ss = (char*)s; 206 char *ss = (char*)s;
@@ -214,14 +209,13 @@ static void* memset(void* s, int c, size_t n)
214 return s; 209 return s;
215} 210}
216 211
217static void* memcpy(void* __dest, __const void* __src, 212static void* memcpy(void* dest, const void* src, unsigned n)
218 size_t __n)
219{ 213{
220 int i; 214 int i;
221 char *d = (char *)__dest, *s = (char *)__src; 215 char *d = (char *)dest, *s = (char *)src;
222 216
223 for (i=0;i<__n;i++) d[i] = s[i]; 217 for (i=0;i<n;i++) d[i] = s[i];
224 return __dest; 218 return dest;
225} 219}
226 220
227/* =========================================================================== 221/* ===========================================================================
@@ -309,7 +303,7 @@ static void setup_normal_output_buffer(void)
309#else 303#else
310 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory"); 304 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
311#endif 305#endif
312 output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */ 306 output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
313 free_mem_end_ptr = (long)real_mode; 307 free_mem_end_ptr = (long)real_mode;
314} 308}
315 309
@@ -324,11 +318,9 @@ static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
324#ifdef STANDARD_MEMORY_BIOS_CALL 318#ifdef STANDARD_MEMORY_BIOS_CALL
325 if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory"); 319 if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
326#else 320#else
327 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 321 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
328 (3*1024))
329 error("Less than 4MB of memory");
330#endif 322#endif
331 mv->low_buffer_start = output_data = (char *)LOW_BUFFER_START; 323 mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
332 low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX 324 low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
333 ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff; 325 ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
334 low_buffer_size = low_buffer_end - LOW_BUFFER_START; 326 low_buffer_size = low_buffer_end - LOW_BUFFER_START;
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 0fac85df64f1..5e70c2fb273a 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
37obj-$(CONFIG_VM86) += vm86.o 37obj-$(CONFIG_VM86) += vm86.o
38obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 38obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
39obj-$(CONFIG_HPET_TIMER) += hpet.o 39obj-$(CONFIG_HPET_TIMER) += hpet.o
40obj-$(CONFIG_K8_NB) += k8.o
40 41
41EXTRA_AFLAGS := -traditional 42EXTRA_AFLAGS := -traditional
42 43
@@ -76,3 +77,6 @@ SYSCFLAGS_vsyscall-syms.o = -r
76$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ 77$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
77 $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE 78 $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE
78 $(call if_changed,syscall) 79 $(call if_changed,syscall)
80
81k8-y += ../../x86_64/kernel/k8.o
82
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index 5cbd6f99fb2a..50eb0e03777e 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -4,27 +4,41 @@
4#include <asm/alternative.h> 4#include <asm/alternative.h>
5#include <asm/sections.h> 5#include <asm/sections.h>
6 6
7#define DEBUG 0 7static int no_replacement = 0;
8#if DEBUG 8static int smp_alt_once = 0;
9# define DPRINTK(fmt, args...) printk(fmt, args) 9static int debug_alternative = 0;
10#else 10
11# define DPRINTK(fmt, args...) 11static int __init noreplacement_setup(char *s)
12#endif 12{
13 no_replacement = 1;
14 return 1;
15}
16static int __init bootonly(char *str)
17{
18 smp_alt_once = 1;
19 return 1;
20}
21static int __init debug_alt(char *str)
22{
23 debug_alternative = 1;
24 return 1;
25}
13 26
27__setup("noreplacement", noreplacement_setup);
28__setup("smp-alt-boot", bootonly);
29__setup("debug-alternative", debug_alt);
30
31#define DPRINTK(fmt, args...) if (debug_alternative) \
32 printk(KERN_DEBUG fmt, args)
33
34#ifdef GENERIC_NOP1
14/* Use inline assembly to define this because the nops are defined 35/* Use inline assembly to define this because the nops are defined
15 as inline assembly strings in the include files and we cannot 36 as inline assembly strings in the include files and we cannot
16 get them easily into strings. */ 37 get them easily into strings. */
17asm("\t.data\nintelnops: " 38asm("\t.data\nintelnops: "
18 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 39 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
19 GENERIC_NOP7 GENERIC_NOP8); 40 GENERIC_NOP7 GENERIC_NOP8);
20asm("\t.data\nk8nops: " 41extern unsigned char intelnops[];
21 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
22 K8_NOP7 K8_NOP8);
23asm("\t.data\nk7nops: "
24 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
25 K7_NOP7 K7_NOP8);
26
27extern unsigned char intelnops[], k8nops[], k7nops[];
28static unsigned char *intel_nops[ASM_NOP_MAX+1] = { 42static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
29 NULL, 43 NULL,
30 intelnops, 44 intelnops,
@@ -36,6 +50,13 @@ static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
36 intelnops + 1 + 2 + 3 + 4 + 5 + 6, 50 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
37 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, 51 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
38}; 52};
53#endif
54
55#ifdef K8_NOP1
56asm("\t.data\nk8nops: "
57 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
58 K8_NOP7 K8_NOP8);
59extern unsigned char k8nops[];
39static unsigned char *k8_nops[ASM_NOP_MAX+1] = { 60static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
40 NULL, 61 NULL,
41 k8nops, 62 k8nops,
@@ -47,6 +68,13 @@ static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
47 k8nops + 1 + 2 + 3 + 4 + 5 + 6, 68 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
48 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, 69 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
49}; 70};
71#endif
72
73#ifdef K7_NOP1
74asm("\t.data\nk7nops: "
75 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
76 K7_NOP7 K7_NOP8);
77extern unsigned char k7nops[];
50static unsigned char *k7_nops[ASM_NOP_MAX+1] = { 78static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
51 NULL, 79 NULL,
52 k7nops, 80 k7nops,
@@ -58,6 +86,18 @@ static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
58 k7nops + 1 + 2 + 3 + 4 + 5 + 6, 86 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
59 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, 87 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
60}; 88};
89#endif
90
91#ifdef CONFIG_X86_64
92
93extern char __vsyscall_0;
94static inline unsigned char** find_nop_table(void)
95{
96 return k8_nops;
97}
98
99#else /* CONFIG_X86_64 */
100
61static struct nop { 101static struct nop {
62 int cpuid; 102 int cpuid;
63 unsigned char **noptable; 103 unsigned char **noptable;
@@ -67,14 +107,6 @@ static struct nop {
67 { -1, NULL } 107 { -1, NULL }
68}; 108};
69 109
70
71extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
72extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
73extern u8 *__smp_locks[], *__smp_locks_end[];
74
75extern u8 __smp_alt_begin[], __smp_alt_end[];
76
77
78static unsigned char** find_nop_table(void) 110static unsigned char** find_nop_table(void)
79{ 111{
80 unsigned char **noptable = intel_nops; 112 unsigned char **noptable = intel_nops;
@@ -89,6 +121,14 @@ static unsigned char** find_nop_table(void)
89 return noptable; 121 return noptable;
90} 122}
91 123
124#endif /* CONFIG_X86_64 */
125
126extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
127extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
128extern u8 *__smp_locks[], *__smp_locks_end[];
129
130extern u8 __smp_alt_begin[], __smp_alt_end[];
131
92/* Replace instructions with better alternatives for this CPU type. 132/* Replace instructions with better alternatives for this CPU type.
93 This runs before SMP is initialized to avoid SMP problems with 133 This runs before SMP is initialized to avoid SMP problems with
94 self modifying code. This implies that assymetric systems where 134 self modifying code. This implies that assymetric systems where
@@ -99,6 +139,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
99{ 139{
100 unsigned char **noptable = find_nop_table(); 140 unsigned char **noptable = find_nop_table();
101 struct alt_instr *a; 141 struct alt_instr *a;
142 u8 *instr;
102 int diff, i, k; 143 int diff, i, k;
103 144
104 DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); 145 DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
@@ -106,7 +147,16 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
106 BUG_ON(a->replacementlen > a->instrlen); 147 BUG_ON(a->replacementlen > a->instrlen);
107 if (!boot_cpu_has(a->cpuid)) 148 if (!boot_cpu_has(a->cpuid))
108 continue; 149 continue;
109 memcpy(a->instr, a->replacement, a->replacementlen); 150 instr = a->instr;
151#ifdef CONFIG_X86_64
152 /* vsyscall code is not mapped yet. resolve it manually. */
153 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
154 instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
155 DPRINTK("%s: vsyscall fixup: %p => %p\n",
156 __FUNCTION__, a->instr, instr);
157 }
158#endif
159 memcpy(instr, a->replacement, a->replacementlen);
110 diff = a->instrlen - a->replacementlen; 160 diff = a->instrlen - a->replacementlen;
111 /* Pad the rest with nops */ 161 /* Pad the rest with nops */
112 for (i = a->replacementlen; diff > 0; diff -= k, i += k) { 162 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
@@ -186,14 +236,6 @@ struct smp_alt_module {
186static LIST_HEAD(smp_alt_modules); 236static LIST_HEAD(smp_alt_modules);
187static DEFINE_SPINLOCK(smp_alt); 237static DEFINE_SPINLOCK(smp_alt);
188 238
189static int smp_alt_once = 0;
190static int __init bootonly(char *str)
191{
192 smp_alt_once = 1;
193 return 1;
194}
195__setup("smp-alt-boot", bootonly);
196
197void alternatives_smp_module_add(struct module *mod, char *name, 239void alternatives_smp_module_add(struct module *mod, char *name,
198 void *locks, void *locks_end, 240 void *locks, void *locks_end,
199 void *text, void *text_end) 241 void *text, void *text_end)
@@ -201,6 +243,9 @@ void alternatives_smp_module_add(struct module *mod, char *name,
201 struct smp_alt_module *smp; 243 struct smp_alt_module *smp;
202 unsigned long flags; 244 unsigned long flags;
203 245
246 if (no_replacement)
247 return;
248
204 if (smp_alt_once) { 249 if (smp_alt_once) {
205 if (boot_cpu_has(X86_FEATURE_UP)) 250 if (boot_cpu_has(X86_FEATURE_UP))
206 alternatives_smp_unlock(locks, locks_end, 251 alternatives_smp_unlock(locks, locks_end,
@@ -235,7 +280,7 @@ void alternatives_smp_module_del(struct module *mod)
235 struct smp_alt_module *item; 280 struct smp_alt_module *item;
236 unsigned long flags; 281 unsigned long flags;
237 282
238 if (smp_alt_once) 283 if (no_replacement || smp_alt_once)
239 return; 284 return;
240 285
241 spin_lock_irqsave(&smp_alt, flags); 286 spin_lock_irqsave(&smp_alt, flags);
@@ -256,7 +301,7 @@ void alternatives_smp_switch(int smp)
256 struct smp_alt_module *mod; 301 struct smp_alt_module *mod;
257 unsigned long flags; 302 unsigned long flags;
258 303
259 if (smp_alt_once) 304 if (no_replacement || smp_alt_once)
260 return; 305 return;
261 BUG_ON(!smp && (num_online_cpus() > 1)); 306 BUG_ON(!smp && (num_online_cpus() > 1));
262 307
@@ -285,6 +330,13 @@ void alternatives_smp_switch(int smp)
285 330
286void __init alternative_instructions(void) 331void __init alternative_instructions(void)
287{ 332{
333 if (no_replacement) {
334 printk(KERN_INFO "(SMP-)alternatives turned off\n");
335 free_init_pages("SMP alternatives",
336 (unsigned long)__smp_alt_begin,
337 (unsigned long)__smp_alt_end);
338 return;
339 }
288 apply_alternatives(__alt_instructions, __alt_instructions_end); 340 apply_alternatives(__alt_instructions, __alt_instructions_end);
289 341
290 /* switch to patch-once-at-boottime-only mode and free the 342 /* switch to patch-once-at-boottime-only mode and free the
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 5ab59c12335b..7ce09492fc0c 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -36,6 +36,7 @@
36#include <asm/arch_hooks.h> 36#include <asm/arch_hooks.h>
37#include <asm/hpet.h> 37#include <asm/hpet.h>
38#include <asm/i8253.h> 38#include <asm/i8253.h>
39#include <asm/nmi.h>
39 40
40#include <mach_apic.h> 41#include <mach_apic.h>
41#include <mach_apicdef.h> 42#include <mach_apicdef.h>
@@ -156,7 +157,7 @@ void clear_local_APIC(void)
156 maxlvt = get_maxlvt(); 157 maxlvt = get_maxlvt();
157 158
158 /* 159 /*
159 * Masking an LVT entry on a P6 can trigger a local APIC error 160 * Masking an LVT entry can trigger a local APIC error
160 * if the vector is zero. Mask LVTERR first to prevent this. 161 * if the vector is zero. Mask LVTERR first to prevent this.
161 */ 162 */
162 if (maxlvt >= 3) { 163 if (maxlvt >= 3) {
@@ -1117,7 +1118,18 @@ void disable_APIC_timer(void)
1117 unsigned long v; 1118 unsigned long v;
1118 1119
1119 v = apic_read(APIC_LVTT); 1120 v = apic_read(APIC_LVTT);
1120 apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); 1121 /*
1122 * When an illegal vector value (0-15) is written to an LVT
1123 * entry and delivery mode is Fixed, the APIC may signal an
1124 * illegal vector error, with out regard to whether the mask
1125 * bit is set or whether an interrupt is actually seen on input.
1126 *
1127 * Boot sequence might call this function when the LVTT has
1128 * '0' vector value. So make sure vector field is set to
1129 * valid value.
1130 */
1131 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1132 apic_write_around(APIC_LVTT, v);
1121 } 1133 }
1122} 1134}
1123 1135
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 9e819eb68229..7c5729d1fd06 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -764,9 +764,9 @@ static int apm_do_idle(void)
764 int idled = 0; 764 int idled = 0;
765 int polling; 765 int polling;
766 766
767 polling = test_thread_flag(TIF_POLLING_NRFLAG); 767 polling = !!(current_thread_info()->status & TS_POLLING);
768 if (polling) { 768 if (polling) {
769 clear_thread_flag(TIF_POLLING_NRFLAG); 769 current_thread_info()->status &= ~TS_POLLING;
770 smp_mb__after_clear_bit(); 770 smp_mb__after_clear_bit();
771 } 771 }
772 if (!need_resched()) { 772 if (!need_resched()) {
@@ -774,7 +774,7 @@ static int apm_do_idle(void)
774 ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax); 774 ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax);
775 } 775 }
776 if (polling) 776 if (polling)
777 set_thread_flag(TIF_POLLING_NRFLAG); 777 current_thread_info()->status |= TS_POLLING;
778 778
779 if (!idled) 779 if (!idled)
780 return 0; 780 return 0;
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 786d1a57048b..fd0457c9c827 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -224,15 +224,17 @@ static void __init init_amd(struct cpuinfo_x86 *c)
224 224
225#ifdef CONFIG_X86_HT 225#ifdef CONFIG_X86_HT
226 /* 226 /*
227 * On a AMD dual core setup the lower bits of the APIC id 227 * On a AMD multi core setup the lower bits of the APIC id
228 * distingush the cores. Assumes number of cores is a power 228 * distingush the cores.
229 * of two.
230 */ 229 */
231 if (c->x86_max_cores > 1) { 230 if (c->x86_max_cores > 1) {
232 int cpu = smp_processor_id(); 231 int cpu = smp_processor_id();
233 unsigned bits = 0; 232 unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf;
234 while ((1 << bits) < c->x86_max_cores) 233
235 bits++; 234 if (bits == 0) {
235 while ((1 << bits) < c->x86_max_cores)
236 bits++;
237 }
236 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1); 238 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1);
237 phys_proc_id[cpu] >>= bits; 239 phys_proc_id[cpu] >>= bits;
238 printk(KERN_INFO "CPU %d(%d) -> Core %d\n", 240 printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
@@ -240,6 +242,8 @@ static void __init init_amd(struct cpuinfo_x86 *c)
240 } 242 }
241#endif 243#endif
242 244
245 if (cpuid_eax(0x80000000) >= 0x80000006)
246 num_cache_leaves = 3;
243} 247}
244 248
245static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) 249static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 5386b29bb5a5..10afc645c540 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -122,6 +122,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
122 122
123 select_idle_routine(c); 123 select_idle_routine(c);
124 l2 = init_intel_cacheinfo(c); 124 l2 = init_intel_cacheinfo(c);
125 if (c->cpuid_level > 9 ) {
126 unsigned eax = cpuid_eax(10);
127 /* Check for version and the number of counters */
128 if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
129 set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
130 }
125 131
126 /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ 132 /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */
127 if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) 133 if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index c8547a6fa7e6..6c37b4fd8ce2 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -4,6 +4,7 @@
4 * Changes: 4 * Changes:
5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4) 5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. 6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 * Andi Kleen : CPUID4 emulation on AMD.
7 */ 8 */
8 9
9#include <linux/init.h> 10#include <linux/init.h>
@@ -130,25 +131,111 @@ struct _cpuid4_info {
130 cpumask_t shared_cpu_map; 131 cpumask_t shared_cpu_map;
131}; 132};
132 133
133static unsigned short num_cache_leaves; 134unsigned short num_cache_leaves;
135
136/* AMD doesn't have CPUID4. Emulate it here to report the same
137 information to the user. This makes some assumptions about the machine:
138 No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs.
139
140 In theory the TLBs could be reported as fake type (they are in "dummy").
141 Maybe later */
142union l1_cache {
143 struct {
144 unsigned line_size : 8;
145 unsigned lines_per_tag : 8;
146 unsigned assoc : 8;
147 unsigned size_in_kb : 8;
148 };
149 unsigned val;
150};
151
152union l2_cache {
153 struct {
154 unsigned line_size : 8;
155 unsigned lines_per_tag : 4;
156 unsigned assoc : 4;
157 unsigned size_in_kb : 16;
158 };
159 unsigned val;
160};
161
162static unsigned short assocs[] = {
163 [1] = 1, [2] = 2, [4] = 4, [6] = 8,
164 [8] = 16,
165 [0xf] = 0xffff // ??
166 };
167static unsigned char levels[] = { 1, 1, 2 };
168static unsigned char types[] = { 1, 2, 3 };
169
170static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
171 union _cpuid4_leaf_ebx *ebx,
172 union _cpuid4_leaf_ecx *ecx)
173{
174 unsigned dummy;
175 unsigned line_size, lines_per_tag, assoc, size_in_kb;
176 union l1_cache l1i, l1d;
177 union l2_cache l2;
178
179 eax->full = 0;
180 ebx->full = 0;
181 ecx->full = 0;
182
183 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
184 cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy);
185
186 if (leaf > 2 || !l1d.val || !l1i.val || !l2.val)
187 return;
188
189 eax->split.is_self_initializing = 1;
190 eax->split.type = types[leaf];
191 eax->split.level = levels[leaf];
192 eax->split.num_threads_sharing = 0;
193 eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
194
195 if (leaf <= 1) {
196 union l1_cache *l1 = leaf == 0 ? &l1d : &l1i;
197 assoc = l1->assoc;
198 line_size = l1->line_size;
199 lines_per_tag = l1->lines_per_tag;
200 size_in_kb = l1->size_in_kb;
201 } else {
202 assoc = l2.assoc;
203 line_size = l2.line_size;
204 lines_per_tag = l2.lines_per_tag;
205 /* cpu_data has errata corrections for K7 applied */
206 size_in_kb = current_cpu_data.x86_cache_size;
207 }
208
209 if (assoc == 0xf)
210 eax->split.is_fully_associative = 1;
211 ebx->split.coherency_line_size = line_size - 1;
212 ebx->split.ways_of_associativity = assocs[assoc] - 1;
213 ebx->split.physical_line_partition = lines_per_tag - 1;
214 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
215 (ebx->split.ways_of_associativity + 1) - 1;
216}
134 217
135static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) 218static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
136{ 219{
137 unsigned int eax, ebx, ecx, edx; 220 union _cpuid4_leaf_eax eax;
138 union _cpuid4_leaf_eax cache_eax; 221 union _cpuid4_leaf_ebx ebx;
222 union _cpuid4_leaf_ecx ecx;
223 unsigned edx;
139 224
140 cpuid_count(4, index, &eax, &ebx, &ecx, &edx); 225 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
141 cache_eax.full = eax; 226 amd_cpuid4(index, &eax, &ebx, &ecx);
142 if (cache_eax.split.type == CACHE_TYPE_NULL) 227 else
228 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
229 if (eax.split.type == CACHE_TYPE_NULL)
143 return -EIO; /* better error ? */ 230 return -EIO; /* better error ? */
144 231
145 this_leaf->eax.full = eax; 232 this_leaf->eax = eax;
146 this_leaf->ebx.full = ebx; 233 this_leaf->ebx = ebx;
147 this_leaf->ecx.full = ecx; 234 this_leaf->ecx = ecx;
148 this_leaf->size = (this_leaf->ecx.split.number_of_sets + 1) * 235 this_leaf->size = (ecx.split.number_of_sets + 1) *
149 (this_leaf->ebx.split.coherency_line_size + 1) * 236 (ebx.split.coherency_line_size + 1) *
150 (this_leaf->ebx.split.physical_line_partition + 1) * 237 (ebx.split.physical_line_partition + 1) *
151 (this_leaf->ebx.split.ways_of_associativity + 1); 238 (ebx.split.ways_of_associativity + 1);
152 return 0; 239 return 0;
153} 240}
154 241
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
index 21dc1bbb8067..0c88d3ec8c18 100644
--- a/arch/i386/kernel/crash.c
+++ b/arch/i386/kernel/crash.c
@@ -120,14 +120,9 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
120 return 1; 120 return 1;
121} 121}
122 122
123/*
124 * By using the NMI code instead of a vector we just sneak thru the
125 * word generator coming out with just what we want. AND it does
126 * not matter if clustered_apic_mode is set or not.
127 */
128static void smp_send_nmi_allbutself(void) 123static void smp_send_nmi_allbutself(void)
129{ 124{
130 send_IPI_allbutself(APIC_DM_NMI); 125 send_IPI_allbutself(NMI_VECTOR);
131} 126}
132 127
133static void nmi_shootdown_cpus(void) 128static void nmi_shootdown_cpus(void)
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index cfc683f153b9..e6e4506e749a 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -48,6 +48,7 @@
48#include <asm/smp.h> 48#include <asm/smp.h>
49#include <asm/page.h> 49#include <asm/page.h>
50#include <asm/desc.h> 50#include <asm/desc.h>
51#include <asm/dwarf2.h>
51#include "irq_vectors.h" 52#include "irq_vectors.h"
52 53
53#define nr_syscalls ((syscall_table_size)/4) 54#define nr_syscalls ((syscall_table_size)/4)
@@ -85,31 +86,67 @@ VM_MASK = 0x00020000
85#define SAVE_ALL \ 86#define SAVE_ALL \
86 cld; \ 87 cld; \
87 pushl %es; \ 88 pushl %es; \
89 CFI_ADJUST_CFA_OFFSET 4;\
90 /*CFI_REL_OFFSET es, 0;*/\
88 pushl %ds; \ 91 pushl %ds; \
92 CFI_ADJUST_CFA_OFFSET 4;\
93 /*CFI_REL_OFFSET ds, 0;*/\
89 pushl %eax; \ 94 pushl %eax; \
95 CFI_ADJUST_CFA_OFFSET 4;\
96 CFI_REL_OFFSET eax, 0;\
90 pushl %ebp; \ 97 pushl %ebp; \
98 CFI_ADJUST_CFA_OFFSET 4;\
99 CFI_REL_OFFSET ebp, 0;\
91 pushl %edi; \ 100 pushl %edi; \
101 CFI_ADJUST_CFA_OFFSET 4;\
102 CFI_REL_OFFSET edi, 0;\
92 pushl %esi; \ 103 pushl %esi; \
104 CFI_ADJUST_CFA_OFFSET 4;\
105 CFI_REL_OFFSET esi, 0;\
93 pushl %edx; \ 106 pushl %edx; \
107 CFI_ADJUST_CFA_OFFSET 4;\
108 CFI_REL_OFFSET edx, 0;\
94 pushl %ecx; \ 109 pushl %ecx; \
110 CFI_ADJUST_CFA_OFFSET 4;\
111 CFI_REL_OFFSET ecx, 0;\
95 pushl %ebx; \ 112 pushl %ebx; \
113 CFI_ADJUST_CFA_OFFSET 4;\
114 CFI_REL_OFFSET ebx, 0;\
96 movl $(__USER_DS), %edx; \ 115 movl $(__USER_DS), %edx; \
97 movl %edx, %ds; \ 116 movl %edx, %ds; \
98 movl %edx, %es; 117 movl %edx, %es;
99 118
100#define RESTORE_INT_REGS \ 119#define RESTORE_INT_REGS \
101 popl %ebx; \ 120 popl %ebx; \
121 CFI_ADJUST_CFA_OFFSET -4;\
122 CFI_RESTORE ebx;\
102 popl %ecx; \ 123 popl %ecx; \
124 CFI_ADJUST_CFA_OFFSET -4;\
125 CFI_RESTORE ecx;\
103 popl %edx; \ 126 popl %edx; \
127 CFI_ADJUST_CFA_OFFSET -4;\
128 CFI_RESTORE edx;\
104 popl %esi; \ 129 popl %esi; \
130 CFI_ADJUST_CFA_OFFSET -4;\
131 CFI_RESTORE esi;\
105 popl %edi; \ 132 popl %edi; \
133 CFI_ADJUST_CFA_OFFSET -4;\
134 CFI_RESTORE edi;\
106 popl %ebp; \ 135 popl %ebp; \
107 popl %eax 136 CFI_ADJUST_CFA_OFFSET -4;\
137 CFI_RESTORE ebp;\
138 popl %eax; \
139 CFI_ADJUST_CFA_OFFSET -4;\
140 CFI_RESTORE eax
108 141
109#define RESTORE_REGS \ 142#define RESTORE_REGS \
110 RESTORE_INT_REGS; \ 143 RESTORE_INT_REGS; \
1111: popl %ds; \ 1441: popl %ds; \
145 CFI_ADJUST_CFA_OFFSET -4;\
146 /*CFI_RESTORE ds;*/\
1122: popl %es; \ 1472: popl %es; \
148 CFI_ADJUST_CFA_OFFSET -4;\
149 /*CFI_RESTORE es;*/\
113.section .fixup,"ax"; \ 150.section .fixup,"ax"; \
1143: movl $0,(%esp); \ 1513: movl $0,(%esp); \
115 jmp 1b; \ 152 jmp 1b; \
@@ -122,13 +159,43 @@ VM_MASK = 0x00020000
122 .long 2b,4b; \ 159 .long 2b,4b; \
123.previous 160.previous
124 161
162#define RING0_INT_FRAME \
163 CFI_STARTPROC simple;\
164 CFI_DEF_CFA esp, 3*4;\
165 /*CFI_OFFSET cs, -2*4;*/\
166 CFI_OFFSET eip, -3*4
167
168#define RING0_EC_FRAME \
169 CFI_STARTPROC simple;\
170 CFI_DEF_CFA esp, 4*4;\
171 /*CFI_OFFSET cs, -2*4;*/\
172 CFI_OFFSET eip, -3*4
173
174#define RING0_PTREGS_FRAME \
175 CFI_STARTPROC simple;\
176 CFI_DEF_CFA esp, OLDESP-EBX;\
177 /*CFI_OFFSET cs, CS-OLDESP;*/\
178 CFI_OFFSET eip, EIP-OLDESP;\
179 /*CFI_OFFSET es, ES-OLDESP;*/\
180 /*CFI_OFFSET ds, DS-OLDESP;*/\
181 CFI_OFFSET eax, EAX-OLDESP;\
182 CFI_OFFSET ebp, EBP-OLDESP;\
183 CFI_OFFSET edi, EDI-OLDESP;\
184 CFI_OFFSET esi, ESI-OLDESP;\
185 CFI_OFFSET edx, EDX-OLDESP;\
186 CFI_OFFSET ecx, ECX-OLDESP;\
187 CFI_OFFSET ebx, EBX-OLDESP
125 188
126ENTRY(ret_from_fork) 189ENTRY(ret_from_fork)
190 CFI_STARTPROC
127 pushl %eax 191 pushl %eax
192 CFI_ADJUST_CFA_OFFSET -4
128 call schedule_tail 193 call schedule_tail
129 GET_THREAD_INFO(%ebp) 194 GET_THREAD_INFO(%ebp)
130 popl %eax 195 popl %eax
196 CFI_ADJUST_CFA_OFFSET -4
131 jmp syscall_exit 197 jmp syscall_exit
198 CFI_ENDPROC
132 199
133/* 200/*
134 * Return to user mode is not as complex as all this looks, 201 * Return to user mode is not as complex as all this looks,
@@ -139,6 +206,7 @@ ENTRY(ret_from_fork)
139 206
140 # userspace resumption stub bypassing syscall exit tracing 207 # userspace resumption stub bypassing syscall exit tracing
141 ALIGN 208 ALIGN
209 RING0_PTREGS_FRAME
142ret_from_exception: 210ret_from_exception:
143 preempt_stop 211 preempt_stop
144ret_from_intr: 212ret_from_intr:
@@ -171,20 +239,33 @@ need_resched:
171 call preempt_schedule_irq 239 call preempt_schedule_irq
172 jmp need_resched 240 jmp need_resched
173#endif 241#endif
242 CFI_ENDPROC
174 243
175/* SYSENTER_RETURN points to after the "sysenter" instruction in 244/* SYSENTER_RETURN points to after the "sysenter" instruction in
176 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ 245 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
177 246
178 # sysenter call handler stub 247 # sysenter call handler stub
179ENTRY(sysenter_entry) 248ENTRY(sysenter_entry)
249 CFI_STARTPROC simple
250 CFI_DEF_CFA esp, 0
251 CFI_REGISTER esp, ebp
180 movl TSS_sysenter_esp0(%esp),%esp 252 movl TSS_sysenter_esp0(%esp),%esp
181sysenter_past_esp: 253sysenter_past_esp:
182 sti 254 sti
183 pushl $(__USER_DS) 255 pushl $(__USER_DS)
256 CFI_ADJUST_CFA_OFFSET 4
257 /*CFI_REL_OFFSET ss, 0*/
184 pushl %ebp 258 pushl %ebp
259 CFI_ADJUST_CFA_OFFSET 4
260 CFI_REL_OFFSET esp, 0
185 pushfl 261 pushfl
262 CFI_ADJUST_CFA_OFFSET 4
186 pushl $(__USER_CS) 263 pushl $(__USER_CS)
264 CFI_ADJUST_CFA_OFFSET 4
265 /*CFI_REL_OFFSET cs, 0*/
187 pushl $SYSENTER_RETURN 266 pushl $SYSENTER_RETURN
267 CFI_ADJUST_CFA_OFFSET 4
268 CFI_REL_OFFSET eip, 0
188 269
189/* 270/*
190 * Load the potential sixth argument from user stack. 271 * Load the potential sixth argument from user stack.
@@ -199,6 +280,7 @@ sysenter_past_esp:
199.previous 280.previous
200 281
201 pushl %eax 282 pushl %eax
283 CFI_ADJUST_CFA_OFFSET 4
202 SAVE_ALL 284 SAVE_ALL
203 GET_THREAD_INFO(%ebp) 285 GET_THREAD_INFO(%ebp)
204 286
@@ -219,11 +301,14 @@ sysenter_past_esp:
219 xorl %ebp,%ebp 301 xorl %ebp,%ebp
220 sti 302 sti
221 sysexit 303 sysexit
304 CFI_ENDPROC
222 305
223 306
224 # system call handler stub 307 # system call handler stub
225ENTRY(system_call) 308ENTRY(system_call)
309 RING0_INT_FRAME # can't unwind into user space anyway
226 pushl %eax # save orig_eax 310 pushl %eax # save orig_eax
311 CFI_ADJUST_CFA_OFFSET 4
227 SAVE_ALL 312 SAVE_ALL
228 GET_THREAD_INFO(%ebp) 313 GET_THREAD_INFO(%ebp)
229 testl $TF_MASK,EFLAGS(%esp) 314 testl $TF_MASK,EFLAGS(%esp)
@@ -256,10 +341,12 @@ restore_all:
256 movb CS(%esp), %al 341 movb CS(%esp), %al
257 andl $(VM_MASK | (4 << 8) | 3), %eax 342 andl $(VM_MASK | (4 << 8) | 3), %eax
258 cmpl $((4 << 8) | 3), %eax 343 cmpl $((4 << 8) | 3), %eax
344 CFI_REMEMBER_STATE
259 je ldt_ss # returning to user-space with LDT SS 345 je ldt_ss # returning to user-space with LDT SS
260restore_nocheck: 346restore_nocheck:
261 RESTORE_REGS 347 RESTORE_REGS
262 addl $4, %esp 348 addl $4, %esp
349 CFI_ADJUST_CFA_OFFSET -4
2631: iret 3501: iret
264.section .fixup,"ax" 351.section .fixup,"ax"
265iret_exc: 352iret_exc:
@@ -273,6 +360,7 @@ iret_exc:
273 .long 1b,iret_exc 360 .long 1b,iret_exc
274.previous 361.previous
275 362
363 CFI_RESTORE_STATE
276ldt_ss: 364ldt_ss:
277 larl OLDSS(%esp), %eax 365 larl OLDSS(%esp), %eax
278 jnz restore_nocheck 366 jnz restore_nocheck
@@ -285,11 +373,13 @@ ldt_ss:
285 * CPUs, which we can try to work around to make 373 * CPUs, which we can try to work around to make
286 * dosemu and wine happy. */ 374 * dosemu and wine happy. */
287 subl $8, %esp # reserve space for switch16 pointer 375 subl $8, %esp # reserve space for switch16 pointer
376 CFI_ADJUST_CFA_OFFSET 8
288 cli 377 cli
289 movl %esp, %eax 378 movl %esp, %eax
290 /* Set up the 16bit stack frame with switch32 pointer on top, 379 /* Set up the 16bit stack frame with switch32 pointer on top,
291 * and a switch16 pointer on top of the current frame. */ 380 * and a switch16 pointer on top of the current frame. */
292 call setup_x86_bogus_stack 381 call setup_x86_bogus_stack
382 CFI_ADJUST_CFA_OFFSET -8 # frame has moved
293 RESTORE_REGS 383 RESTORE_REGS
294 lss 20+4(%esp), %esp # switch to 16bit stack 384 lss 20+4(%esp), %esp # switch to 16bit stack
2951: iret 3851: iret
@@ -297,9 +387,11 @@ ldt_ss:
297 .align 4 387 .align 4
298 .long 1b,iret_exc 388 .long 1b,iret_exc
299.previous 389.previous
390 CFI_ENDPROC
300 391
301 # perform work that needs to be done immediately before resumption 392 # perform work that needs to be done immediately before resumption
302 ALIGN 393 ALIGN
394 RING0_PTREGS_FRAME # can't unwind into user space anyway
303work_pending: 395work_pending:
304 testb $_TIF_NEED_RESCHED, %cl 396 testb $_TIF_NEED_RESCHED, %cl
305 jz work_notifysig 397 jz work_notifysig
@@ -329,8 +421,10 @@ work_notifysig: # deal with pending signals and
329work_notifysig_v86: 421work_notifysig_v86:
330#ifdef CONFIG_VM86 422#ifdef CONFIG_VM86
331 pushl %ecx # save ti_flags for do_notify_resume 423 pushl %ecx # save ti_flags for do_notify_resume
424 CFI_ADJUST_CFA_OFFSET 4
332 call save_v86_state # %eax contains pt_regs pointer 425 call save_v86_state # %eax contains pt_regs pointer
333 popl %ecx 426 popl %ecx
427 CFI_ADJUST_CFA_OFFSET -4
334 movl %eax, %esp 428 movl %eax, %esp
335 xorl %edx, %edx 429 xorl %edx, %edx
336 call do_notify_resume 430 call do_notify_resume
@@ -363,19 +457,21 @@ syscall_exit_work:
363 movl $1, %edx 457 movl $1, %edx
364 call do_syscall_trace 458 call do_syscall_trace
365 jmp resume_userspace 459 jmp resume_userspace
460 CFI_ENDPROC
366 461
367 ALIGN 462 RING0_INT_FRAME # can't unwind into user space anyway
368syscall_fault: 463syscall_fault:
369 pushl %eax # save orig_eax 464 pushl %eax # save orig_eax
465 CFI_ADJUST_CFA_OFFSET 4
370 SAVE_ALL 466 SAVE_ALL
371 GET_THREAD_INFO(%ebp) 467 GET_THREAD_INFO(%ebp)
372 movl $-EFAULT,EAX(%esp) 468 movl $-EFAULT,EAX(%esp)
373 jmp resume_userspace 469 jmp resume_userspace
374 470
375 ALIGN
376syscall_badsys: 471syscall_badsys:
377 movl $-ENOSYS,EAX(%esp) 472 movl $-ENOSYS,EAX(%esp)
378 jmp resume_userspace 473 jmp resume_userspace
474 CFI_ENDPROC
379 475
380#define FIXUP_ESPFIX_STACK \ 476#define FIXUP_ESPFIX_STACK \
381 movl %esp, %eax; \ 477 movl %esp, %eax; \
@@ -387,16 +483,21 @@ syscall_badsys:
387 movl %eax, %esp; 483 movl %eax, %esp;
388#define UNWIND_ESPFIX_STACK \ 484#define UNWIND_ESPFIX_STACK \
389 pushl %eax; \ 485 pushl %eax; \
486 CFI_ADJUST_CFA_OFFSET 4; \
390 movl %ss, %eax; \ 487 movl %ss, %eax; \
391 /* see if on 16bit stack */ \ 488 /* see if on 16bit stack */ \
392 cmpw $__ESPFIX_SS, %ax; \ 489 cmpw $__ESPFIX_SS, %ax; \
393 jne 28f; \ 490 je 28f; \
394 movl $__KERNEL_DS, %edx; \ 49127: popl %eax; \
395 movl %edx, %ds; \ 492 CFI_ADJUST_CFA_OFFSET -4; \
396 movl %edx, %es; \ 493.section .fixup,"ax"; \
49428: movl $__KERNEL_DS, %eax; \
495 movl %eax, %ds; \
496 movl %eax, %es; \
397 /* switch to 32bit stack */ \ 497 /* switch to 32bit stack */ \
398 FIXUP_ESPFIX_STACK \ 498 FIXUP_ESPFIX_STACK; \
39928: popl %eax; 499 jmp 27b; \
500.previous
400 501
401/* 502/*
402 * Build the entry stubs and pointer table with 503 * Build the entry stubs and pointer table with
@@ -408,9 +509,14 @@ ENTRY(interrupt)
408 509
409vector=0 510vector=0
410ENTRY(irq_entries_start) 511ENTRY(irq_entries_start)
512 RING0_INT_FRAME
411.rept NR_IRQS 513.rept NR_IRQS
412 ALIGN 514 ALIGN
515 .if vector
516 CFI_ADJUST_CFA_OFFSET -4
517 .endif
4131: pushl $vector-256 5181: pushl $vector-256
519 CFI_ADJUST_CFA_OFFSET 4
414 jmp common_interrupt 520 jmp common_interrupt
415.data 521.data
416 .long 1b 522 .long 1b
@@ -424,60 +530,99 @@ common_interrupt:
424 movl %esp,%eax 530 movl %esp,%eax
425 call do_IRQ 531 call do_IRQ
426 jmp ret_from_intr 532 jmp ret_from_intr
533 CFI_ENDPROC
427 534
428#define BUILD_INTERRUPT(name, nr) \ 535#define BUILD_INTERRUPT(name, nr) \
429ENTRY(name) \ 536ENTRY(name) \
537 RING0_INT_FRAME; \
430 pushl $nr-256; \ 538 pushl $nr-256; \
431 SAVE_ALL \ 539 CFI_ADJUST_CFA_OFFSET 4; \
540 SAVE_ALL; \
432 movl %esp,%eax; \ 541 movl %esp,%eax; \
433 call smp_/**/name; \ 542 call smp_/**/name; \
434 jmp ret_from_intr; 543 jmp ret_from_intr; \
544 CFI_ENDPROC
435 545
436/* The include is where all of the SMP etc. interrupts come from */ 546/* The include is where all of the SMP etc. interrupts come from */
437#include "entry_arch.h" 547#include "entry_arch.h"
438 548
439ENTRY(divide_error) 549ENTRY(divide_error)
550 RING0_INT_FRAME
440 pushl $0 # no error code 551 pushl $0 # no error code
552 CFI_ADJUST_CFA_OFFSET 4
441 pushl $do_divide_error 553 pushl $do_divide_error
554 CFI_ADJUST_CFA_OFFSET 4
442 ALIGN 555 ALIGN
443error_code: 556error_code:
444 pushl %ds 557 pushl %ds
558 CFI_ADJUST_CFA_OFFSET 4
559 /*CFI_REL_OFFSET ds, 0*/
445 pushl %eax 560 pushl %eax
561 CFI_ADJUST_CFA_OFFSET 4
562 CFI_REL_OFFSET eax, 0
446 xorl %eax, %eax 563 xorl %eax, %eax
447 pushl %ebp 564 pushl %ebp
565 CFI_ADJUST_CFA_OFFSET 4
566 CFI_REL_OFFSET ebp, 0
448 pushl %edi 567 pushl %edi
568 CFI_ADJUST_CFA_OFFSET 4
569 CFI_REL_OFFSET edi, 0
449 pushl %esi 570 pushl %esi
571 CFI_ADJUST_CFA_OFFSET 4
572 CFI_REL_OFFSET esi, 0
450 pushl %edx 573 pushl %edx
574 CFI_ADJUST_CFA_OFFSET 4
575 CFI_REL_OFFSET edx, 0
451 decl %eax # eax = -1 576 decl %eax # eax = -1
452 pushl %ecx 577 pushl %ecx
578 CFI_ADJUST_CFA_OFFSET 4
579 CFI_REL_OFFSET ecx, 0
453 pushl %ebx 580 pushl %ebx
581 CFI_ADJUST_CFA_OFFSET 4
582 CFI_REL_OFFSET ebx, 0
454 cld 583 cld
455 pushl %es 584 pushl %es
585 CFI_ADJUST_CFA_OFFSET 4
586 /*CFI_REL_OFFSET es, 0*/
456 UNWIND_ESPFIX_STACK 587 UNWIND_ESPFIX_STACK
457 popl %ecx 588 popl %ecx
589 CFI_ADJUST_CFA_OFFSET -4
590 /*CFI_REGISTER es, ecx*/
458 movl ES(%esp), %edi # get the function address 591 movl ES(%esp), %edi # get the function address
459 movl ORIG_EAX(%esp), %edx # get the error code 592 movl ORIG_EAX(%esp), %edx # get the error code
460 movl %eax, ORIG_EAX(%esp) 593 movl %eax, ORIG_EAX(%esp)
461 movl %ecx, ES(%esp) 594 movl %ecx, ES(%esp)
595 /*CFI_REL_OFFSET es, ES*/
462 movl $(__USER_DS), %ecx 596 movl $(__USER_DS), %ecx
463 movl %ecx, %ds 597 movl %ecx, %ds
464 movl %ecx, %es 598 movl %ecx, %es
465 movl %esp,%eax # pt_regs pointer 599 movl %esp,%eax # pt_regs pointer
466 call *%edi 600 call *%edi
467 jmp ret_from_exception 601 jmp ret_from_exception
602 CFI_ENDPROC
468 603
469ENTRY(coprocessor_error) 604ENTRY(coprocessor_error)
605 RING0_INT_FRAME
470 pushl $0 606 pushl $0
607 CFI_ADJUST_CFA_OFFSET 4
471 pushl $do_coprocessor_error 608 pushl $do_coprocessor_error
609 CFI_ADJUST_CFA_OFFSET 4
472 jmp error_code 610 jmp error_code
611 CFI_ENDPROC
473 612
474ENTRY(simd_coprocessor_error) 613ENTRY(simd_coprocessor_error)
614 RING0_INT_FRAME
475 pushl $0 615 pushl $0
616 CFI_ADJUST_CFA_OFFSET 4
476 pushl $do_simd_coprocessor_error 617 pushl $do_simd_coprocessor_error
618 CFI_ADJUST_CFA_OFFSET 4
477 jmp error_code 619 jmp error_code
620 CFI_ENDPROC
478 621
479ENTRY(device_not_available) 622ENTRY(device_not_available)
623 RING0_INT_FRAME
480 pushl $-1 # mark this as an int 624 pushl $-1 # mark this as an int
625 CFI_ADJUST_CFA_OFFSET 4
481 SAVE_ALL 626 SAVE_ALL
482 movl %cr0, %eax 627 movl %cr0, %eax
483 testl $0x4, %eax # EM (math emulation bit) 628 testl $0x4, %eax # EM (math emulation bit)
@@ -487,9 +632,12 @@ ENTRY(device_not_available)
487 jmp ret_from_exception 632 jmp ret_from_exception
488device_not_available_emulate: 633device_not_available_emulate:
489 pushl $0 # temporary storage for ORIG_EIP 634 pushl $0 # temporary storage for ORIG_EIP
635 CFI_ADJUST_CFA_OFFSET 4
490 call math_emulate 636 call math_emulate
491 addl $4, %esp 637 addl $4, %esp
638 CFI_ADJUST_CFA_OFFSET -4
492 jmp ret_from_exception 639 jmp ret_from_exception
640 CFI_ENDPROC
493 641
494/* 642/*
495 * Debug traps and NMI can happen at the one SYSENTER instruction 643 * Debug traps and NMI can happen at the one SYSENTER instruction
@@ -514,16 +662,19 @@ label: \
514 pushl $sysenter_past_esp 662 pushl $sysenter_past_esp
515 663
516KPROBE_ENTRY(debug) 664KPROBE_ENTRY(debug)
665 RING0_INT_FRAME
517 cmpl $sysenter_entry,(%esp) 666 cmpl $sysenter_entry,(%esp)
518 jne debug_stack_correct 667 jne debug_stack_correct
519 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) 668 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
520debug_stack_correct: 669debug_stack_correct:
521 pushl $-1 # mark this as an int 670 pushl $-1 # mark this as an int
671 CFI_ADJUST_CFA_OFFSET 4
522 SAVE_ALL 672 SAVE_ALL
523 xorl %edx,%edx # error code 0 673 xorl %edx,%edx # error code 0
524 movl %esp,%eax # pt_regs pointer 674 movl %esp,%eax # pt_regs pointer
525 call do_debug 675 call do_debug
526 jmp ret_from_exception 676 jmp ret_from_exception
677 CFI_ENDPROC
527 .previous .text 678 .previous .text
528/* 679/*
529 * NMI is doubly nasty. It can happen _while_ we're handling 680 * NMI is doubly nasty. It can happen _while_ we're handling
@@ -534,14 +685,18 @@ debug_stack_correct:
534 * fault happened on the sysenter path. 685 * fault happened on the sysenter path.
535 */ 686 */
536ENTRY(nmi) 687ENTRY(nmi)
688 RING0_INT_FRAME
537 pushl %eax 689 pushl %eax
690 CFI_ADJUST_CFA_OFFSET 4
538 movl %ss, %eax 691 movl %ss, %eax
539 cmpw $__ESPFIX_SS, %ax 692 cmpw $__ESPFIX_SS, %ax
540 popl %eax 693 popl %eax
694 CFI_ADJUST_CFA_OFFSET -4
541 je nmi_16bit_stack 695 je nmi_16bit_stack
542 cmpl $sysenter_entry,(%esp) 696 cmpl $sysenter_entry,(%esp)
543 je nmi_stack_fixup 697 je nmi_stack_fixup
544 pushl %eax 698 pushl %eax
699 CFI_ADJUST_CFA_OFFSET 4
545 movl %esp,%eax 700 movl %esp,%eax
546 /* Do not access memory above the end of our stack page, 701 /* Do not access memory above the end of our stack page,
547 * it might not exist. 702 * it might not exist.
@@ -549,16 +704,19 @@ ENTRY(nmi)
549 andl $(THREAD_SIZE-1),%eax 704 andl $(THREAD_SIZE-1),%eax
550 cmpl $(THREAD_SIZE-20),%eax 705 cmpl $(THREAD_SIZE-20),%eax
551 popl %eax 706 popl %eax
707 CFI_ADJUST_CFA_OFFSET -4
552 jae nmi_stack_correct 708 jae nmi_stack_correct
553 cmpl $sysenter_entry,12(%esp) 709 cmpl $sysenter_entry,12(%esp)
554 je nmi_debug_stack_check 710 je nmi_debug_stack_check
555nmi_stack_correct: 711nmi_stack_correct:
556 pushl %eax 712 pushl %eax
713 CFI_ADJUST_CFA_OFFSET 4
557 SAVE_ALL 714 SAVE_ALL
558 xorl %edx,%edx # zero error code 715 xorl %edx,%edx # zero error code
559 movl %esp,%eax # pt_regs pointer 716 movl %esp,%eax # pt_regs pointer
560 call do_nmi 717 call do_nmi
561 jmp restore_all 718 jmp restore_all
719 CFI_ENDPROC
562 720
563nmi_stack_fixup: 721nmi_stack_fixup:
564 FIX_STACK(12,nmi_stack_correct, 1) 722 FIX_STACK(12,nmi_stack_correct, 1)
@@ -574,94 +732,177 @@ nmi_debug_stack_check:
574 jmp nmi_stack_correct 732 jmp nmi_stack_correct
575 733
576nmi_16bit_stack: 734nmi_16bit_stack:
735 RING0_INT_FRAME
577 /* create the pointer to lss back */ 736 /* create the pointer to lss back */
578 pushl %ss 737 pushl %ss
738 CFI_ADJUST_CFA_OFFSET 4
579 pushl %esp 739 pushl %esp
740 CFI_ADJUST_CFA_OFFSET 4
580 movzwl %sp, %esp 741 movzwl %sp, %esp
581 addw $4, (%esp) 742 addw $4, (%esp)
582 /* copy the iret frame of 12 bytes */ 743 /* copy the iret frame of 12 bytes */
583 .rept 3 744 .rept 3
584 pushl 16(%esp) 745 pushl 16(%esp)
746 CFI_ADJUST_CFA_OFFSET 4
585 .endr 747 .endr
586 pushl %eax 748 pushl %eax
749 CFI_ADJUST_CFA_OFFSET 4
587 SAVE_ALL 750 SAVE_ALL
588 FIXUP_ESPFIX_STACK # %eax == %esp 751 FIXUP_ESPFIX_STACK # %eax == %esp
752 CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved
589 xorl %edx,%edx # zero error code 753 xorl %edx,%edx # zero error code
590 call do_nmi 754 call do_nmi
591 RESTORE_REGS 755 RESTORE_REGS
592 lss 12+4(%esp), %esp # back to 16bit stack 756 lss 12+4(%esp), %esp # back to 16bit stack
5931: iret 7571: iret
758 CFI_ENDPROC
594.section __ex_table,"a" 759.section __ex_table,"a"
595 .align 4 760 .align 4
596 .long 1b,iret_exc 761 .long 1b,iret_exc
597.previous 762.previous
598 763
599KPROBE_ENTRY(int3) 764KPROBE_ENTRY(int3)
765 RING0_INT_FRAME
600 pushl $-1 # mark this as an int 766 pushl $-1 # mark this as an int
767 CFI_ADJUST_CFA_OFFSET 4
601 SAVE_ALL 768 SAVE_ALL
602 xorl %edx,%edx # zero error code 769 xorl %edx,%edx # zero error code
603 movl %esp,%eax # pt_regs pointer 770 movl %esp,%eax # pt_regs pointer
604 call do_int3 771 call do_int3
605 jmp ret_from_exception 772 jmp ret_from_exception
773 CFI_ENDPROC
606 .previous .text 774 .previous .text
607 775
608ENTRY(overflow) 776ENTRY(overflow)
777 RING0_INT_FRAME
609 pushl $0 778 pushl $0
779 CFI_ADJUST_CFA_OFFSET 4
610 pushl $do_overflow 780 pushl $do_overflow
781 CFI_ADJUST_CFA_OFFSET 4
611 jmp error_code 782 jmp error_code
783 CFI_ENDPROC
612 784
613ENTRY(bounds) 785ENTRY(bounds)
786 RING0_INT_FRAME
614 pushl $0 787 pushl $0
788 CFI_ADJUST_CFA_OFFSET 4
615 pushl $do_bounds 789 pushl $do_bounds
790 CFI_ADJUST_CFA_OFFSET 4
616 jmp error_code 791 jmp error_code
792 CFI_ENDPROC
617 793
618ENTRY(invalid_op) 794ENTRY(invalid_op)
795 RING0_INT_FRAME
619 pushl $0 796 pushl $0
797 CFI_ADJUST_CFA_OFFSET 4
620 pushl $do_invalid_op 798 pushl $do_invalid_op
799 CFI_ADJUST_CFA_OFFSET 4
621 jmp error_code 800 jmp error_code
801 CFI_ENDPROC
622 802
623ENTRY(coprocessor_segment_overrun) 803ENTRY(coprocessor_segment_overrun)
804 RING0_INT_FRAME
624 pushl $0 805 pushl $0
806 CFI_ADJUST_CFA_OFFSET 4
625 pushl $do_coprocessor_segment_overrun 807 pushl $do_coprocessor_segment_overrun
808 CFI_ADJUST_CFA_OFFSET 4
626 jmp error_code 809 jmp error_code
810 CFI_ENDPROC
627 811
628ENTRY(invalid_TSS) 812ENTRY(invalid_TSS)
813 RING0_EC_FRAME
629 pushl $do_invalid_TSS 814 pushl $do_invalid_TSS
815 CFI_ADJUST_CFA_OFFSET 4
630 jmp error_code 816 jmp error_code
817 CFI_ENDPROC
631 818
632ENTRY(segment_not_present) 819ENTRY(segment_not_present)
820 RING0_EC_FRAME
633 pushl $do_segment_not_present 821 pushl $do_segment_not_present
822 CFI_ADJUST_CFA_OFFSET 4
634 jmp error_code 823 jmp error_code
824 CFI_ENDPROC
635 825
636ENTRY(stack_segment) 826ENTRY(stack_segment)
827 RING0_EC_FRAME
637 pushl $do_stack_segment 828 pushl $do_stack_segment
829 CFI_ADJUST_CFA_OFFSET 4
638 jmp error_code 830 jmp error_code
831 CFI_ENDPROC
639 832
640KPROBE_ENTRY(general_protection) 833KPROBE_ENTRY(general_protection)
834 RING0_EC_FRAME
641 pushl $do_general_protection 835 pushl $do_general_protection
836 CFI_ADJUST_CFA_OFFSET 4
642 jmp error_code 837 jmp error_code
838 CFI_ENDPROC
643 .previous .text 839 .previous .text
644 840
645ENTRY(alignment_check) 841ENTRY(alignment_check)
842 RING0_EC_FRAME
646 pushl $do_alignment_check 843 pushl $do_alignment_check
844 CFI_ADJUST_CFA_OFFSET 4
647 jmp error_code 845 jmp error_code
846 CFI_ENDPROC
648 847
649KPROBE_ENTRY(page_fault) 848KPROBE_ENTRY(page_fault)
849 RING0_EC_FRAME
650 pushl $do_page_fault 850 pushl $do_page_fault
851 CFI_ADJUST_CFA_OFFSET 4
651 jmp error_code 852 jmp error_code
853 CFI_ENDPROC
652 .previous .text 854 .previous .text
653 855
654#ifdef CONFIG_X86_MCE 856#ifdef CONFIG_X86_MCE
655ENTRY(machine_check) 857ENTRY(machine_check)
858 RING0_INT_FRAME
656 pushl $0 859 pushl $0
860 CFI_ADJUST_CFA_OFFSET 4
657 pushl machine_check_vector 861 pushl machine_check_vector
862 CFI_ADJUST_CFA_OFFSET 4
658 jmp error_code 863 jmp error_code
864 CFI_ENDPROC
659#endif 865#endif
660 866
661ENTRY(spurious_interrupt_bug) 867ENTRY(spurious_interrupt_bug)
868 RING0_INT_FRAME
662 pushl $0 869 pushl $0
870 CFI_ADJUST_CFA_OFFSET 4
663 pushl $do_spurious_interrupt_bug 871 pushl $do_spurious_interrupt_bug
872 CFI_ADJUST_CFA_OFFSET 4
664 jmp error_code 873 jmp error_code
874 CFI_ENDPROC
875
876#ifdef CONFIG_STACK_UNWIND
877ENTRY(arch_unwind_init_running)
878 CFI_STARTPROC
879 movl 4(%esp), %edx
880 movl (%esp), %ecx
881 leal 4(%esp), %eax
882 movl %ebx, EBX(%edx)
883 xorl %ebx, %ebx
884 movl %ebx, ECX(%edx)
885 movl %ebx, EDX(%edx)
886 movl %esi, ESI(%edx)
887 movl %edi, EDI(%edx)
888 movl %ebp, EBP(%edx)
889 movl %ebx, EAX(%edx)
890 movl $__USER_DS, DS(%edx)
891 movl $__USER_DS, ES(%edx)
892 movl %ebx, ORIG_EAX(%edx)
893 movl %ecx, EIP(%edx)
894 movl 12(%esp), %ecx
895 movl $__KERNEL_CS, CS(%edx)
896 movl %ebx, EFLAGS(%edx)
897 movl %eax, OLDESP(%edx)
898 movl 8(%esp), %eax
899 movl %ecx, 8(%esp)
900 movl EBX(%edx), %ebx
901 movl $__KERNEL_DS, OLDSS(%edx)
902 jmpl *%eax
903 CFI_ENDPROC
904ENDPROC(arch_unwind_init_running)
905#endif
665 906
666.section .rodata,"a" 907.section .rodata,"a"
667#include "syscall_table.S" 908#include "syscall_table.S"
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index a62df3e764c5..72ae414e4d49 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -38,6 +38,7 @@
38#include <asm/desc.h> 38#include <asm/desc.h>
39#include <asm/timer.h> 39#include <asm/timer.h>
40#include <asm/i8259.h> 40#include <asm/i8259.h>
41#include <asm/nmi.h>
41 42
42#include <mach_apic.h> 43#include <mach_apic.h>
43 44
@@ -50,6 +51,7 @@ atomic_t irq_mis_count;
50static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; 51static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
51 52
52static DEFINE_SPINLOCK(ioapic_lock); 53static DEFINE_SPINLOCK(ioapic_lock);
54static DEFINE_SPINLOCK(vector_lock);
53 55
54int timer_over_8254 __initdata = 1; 56int timer_over_8254 __initdata = 1;
55 57
@@ -1161,10 +1163,17 @@ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
1161int assign_irq_vector(int irq) 1163int assign_irq_vector(int irq)
1162{ 1164{
1163 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; 1165 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
1166 unsigned long flags;
1167 int vector;
1168
1169 BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
1164 1170
1165 BUG_ON(irq >= NR_IRQ_VECTORS); 1171 spin_lock_irqsave(&vector_lock, flags);
1166 if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) 1172
1173 if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
1174 spin_unlock_irqrestore(&vector_lock, flags);
1167 return IO_APIC_VECTOR(irq); 1175 return IO_APIC_VECTOR(irq);
1176 }
1168next: 1177next:
1169 current_vector += 8; 1178 current_vector += 8;
1170 if (current_vector == SYSCALL_VECTOR) 1179 if (current_vector == SYSCALL_VECTOR)
@@ -1172,16 +1181,21 @@ next:
1172 1181
1173 if (current_vector >= FIRST_SYSTEM_VECTOR) { 1182 if (current_vector >= FIRST_SYSTEM_VECTOR) {
1174 offset++; 1183 offset++;
1175 if (!(offset%8)) 1184 if (!(offset%8)) {
1185 spin_unlock_irqrestore(&vector_lock, flags);
1176 return -ENOSPC; 1186 return -ENOSPC;
1187 }
1177 current_vector = FIRST_DEVICE_VECTOR + offset; 1188 current_vector = FIRST_DEVICE_VECTOR + offset;
1178 } 1189 }
1179 1190
1180 vector_irq[current_vector] = irq; 1191 vector = current_vector;
1192 vector_irq[vector] = irq;
1181 if (irq != AUTO_ASSIGN) 1193 if (irq != AUTO_ASSIGN)
1182 IO_APIC_VECTOR(irq) = current_vector; 1194 IO_APIC_VECTOR(irq) = vector;
1183 1195
1184 return current_vector; 1196 spin_unlock_irqrestore(&vector_lock, flags);
1197
1198 return vector;
1185} 1199}
1186 1200
1187static struct hw_interrupt_type ioapic_level_type; 1201static struct hw_interrupt_type ioapic_level_type;
@@ -1193,21 +1207,14 @@ static struct hw_interrupt_type ioapic_edge_type;
1193 1207
1194static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) 1208static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
1195{ 1209{
1196 if (use_pci_vector() && !platform_legacy_irq(irq)) { 1210 unsigned idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
1197 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 1211
1198 trigger == IOAPIC_LEVEL) 1212 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1199 irq_desc[vector].handler = &ioapic_level_type; 1213 trigger == IOAPIC_LEVEL)
1200 else 1214 irq_desc[idx].handler = &ioapic_level_type;
1201 irq_desc[vector].handler = &ioapic_edge_type; 1215 else
1202 set_intr_gate(vector, interrupt[vector]); 1216 irq_desc[idx].handler = &ioapic_edge_type;
1203 } else { 1217 set_intr_gate(vector, interrupt[idx]);
1204 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1205 trigger == IOAPIC_LEVEL)
1206 irq_desc[irq].handler = &ioapic_level_type;
1207 else
1208 irq_desc[irq].handler = &ioapic_edge_type;
1209 set_intr_gate(vector, interrupt[irq]);
1210 }
1211} 1218}
1212 1219
1213static void __init setup_IO_APIC_irqs(void) 1220static void __init setup_IO_APIC_irqs(void)
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 49ce4c31b713..061533e0cb5e 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -227,7 +227,7 @@ int show_interrupts(struct seq_file *p, void *v)
227 if (i == 0) { 227 if (i == 0) {
228 seq_printf(p, " "); 228 seq_printf(p, " ");
229 for_each_online_cpu(j) 229 for_each_online_cpu(j)
230 seq_printf(p, "CPU%d ",j); 230 seq_printf(p, "CPU%-8d",j);
231 seq_putc(p, '\n'); 231 seq_putc(p, '\n');
232 } 232 }
233 233
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index d43b498ec745..a76e93146585 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -14,21 +14,17 @@
14 */ 14 */
15 15
16#include <linux/config.h> 16#include <linux/config.h>
17#include <linux/mm.h>
18#include <linux/delay.h> 17#include <linux/delay.h>
19#include <linux/bootmem.h>
20#include <linux/smp_lock.h>
21#include <linux/interrupt.h> 18#include <linux/interrupt.h>
22#include <linux/mc146818rtc.h>
23#include <linux/kernel_stat.h>
24#include <linux/module.h> 19#include <linux/module.h>
25#include <linux/nmi.h> 20#include <linux/nmi.h>
26#include <linux/sysdev.h> 21#include <linux/sysdev.h>
27#include <linux/sysctl.h> 22#include <linux/sysctl.h>
23#include <linux/percpu.h>
28 24
29#include <asm/smp.h> 25#include <asm/smp.h>
30#include <asm/div64.h>
31#include <asm/nmi.h> 26#include <asm/nmi.h>
27#include <asm/intel_arch_perfmon.h>
32 28
33#include "mach_traps.h" 29#include "mach_traps.h"
34 30
@@ -100,6 +96,9 @@ int nmi_active;
100 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ 96 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
101 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) 97 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
102 98
99#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
100#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
101
103#ifdef CONFIG_SMP 102#ifdef CONFIG_SMP
104/* The performance counters used by NMI_LOCAL_APIC don't trigger when 103/* The performance counters used by NMI_LOCAL_APIC don't trigger when
105 * the CPU is idle. To make sure the NMI watchdog really ticks on all 104 * the CPU is idle. To make sure the NMI watchdog really ticks on all
@@ -212,6 +211,8 @@ static int __init setup_nmi_watchdog(char *str)
212 211
213__setup("nmi_watchdog=", setup_nmi_watchdog); 212__setup("nmi_watchdog=", setup_nmi_watchdog);
214 213
214static void disable_intel_arch_watchdog(void);
215
215static void disable_lapic_nmi_watchdog(void) 216static void disable_lapic_nmi_watchdog(void)
216{ 217{
217 if (nmi_active <= 0) 218 if (nmi_active <= 0)
@@ -221,6 +222,10 @@ static void disable_lapic_nmi_watchdog(void)
221 wrmsr(MSR_K7_EVNTSEL0, 0, 0); 222 wrmsr(MSR_K7_EVNTSEL0, 0, 0);
222 break; 223 break;
223 case X86_VENDOR_INTEL: 224 case X86_VENDOR_INTEL:
225 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
226 disable_intel_arch_watchdog();
227 break;
228 }
224 switch (boot_cpu_data.x86) { 229 switch (boot_cpu_data.x86) {
225 case 6: 230 case 6:
226 if (boot_cpu_data.x86_model > 0xd) 231 if (boot_cpu_data.x86_model > 0xd)
@@ -449,6 +454,53 @@ static int setup_p4_watchdog(void)
449 return 1; 454 return 1;
450} 455}
451 456
457static void disable_intel_arch_watchdog(void)
458{
459 unsigned ebx;
460
461 /*
462 * Check whether the Architectural PerfMon supports
463 * Unhalted Core Cycles Event or not.
464 * NOTE: Corresponding bit = 0 in ebp indicates event present.
465 */
466 ebx = cpuid_ebx(10);
467 if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
468 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
469}
470
471static int setup_intel_arch_watchdog(void)
472{
473 unsigned int evntsel;
474 unsigned ebx;
475
476 /*
477 * Check whether the Architectural PerfMon supports
478 * Unhalted Core Cycles Event or not.
479 * NOTE: Corresponding bit = 0 in ebp indicates event present.
480 */
481 ebx = cpuid_ebx(10);
482 if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
483 return 0;
484
485 nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
486
487 clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2);
488 clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2);
489
490 evntsel = ARCH_PERFMON_EVENTSEL_INT
491 | ARCH_PERFMON_EVENTSEL_OS
492 | ARCH_PERFMON_EVENTSEL_USR
493 | ARCH_PERFMON_NMI_EVENT_SEL
494 | ARCH_PERFMON_NMI_EVENT_UMASK;
495
496 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
497 write_watchdog_counter("INTEL_ARCH_PERFCTR0");
498 apic_write(APIC_LVTPC, APIC_DM_NMI);
499 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
500 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
501 return 1;
502}
503
452void setup_apic_nmi_watchdog (void) 504void setup_apic_nmi_watchdog (void)
453{ 505{
454 switch (boot_cpu_data.x86_vendor) { 506 switch (boot_cpu_data.x86_vendor) {
@@ -458,6 +510,11 @@ void setup_apic_nmi_watchdog (void)
458 setup_k7_watchdog(); 510 setup_k7_watchdog();
459 break; 511 break;
460 case X86_VENDOR_INTEL: 512 case X86_VENDOR_INTEL:
513 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
514 if (!setup_intel_arch_watchdog())
515 return;
516 break;
517 }
461 switch (boot_cpu_data.x86) { 518 switch (boot_cpu_data.x86) {
462 case 6: 519 case 6:
463 if (boot_cpu_data.x86_model > 0xd) 520 if (boot_cpu_data.x86_model > 0xd)
@@ -561,7 +618,8 @@ void nmi_watchdog_tick (struct pt_regs * regs)
561 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); 618 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
562 apic_write(APIC_LVTPC, APIC_DM_NMI); 619 apic_write(APIC_LVTPC, APIC_DM_NMI);
563 } 620 }
564 else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) { 621 else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 ||
622 nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
565 /* Only P6 based Pentium M need to re-unmask 623 /* Only P6 based Pentium M need to re-unmask
566 * the apic vector but it doesn't hurt 624 * the apic vector but it doesn't hurt
567 * other P6 variant */ 625 * other P6 variant */
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 6259afea46d1..6946b06e2784 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -102,7 +102,7 @@ void default_idle(void)
102 local_irq_enable(); 102 local_irq_enable();
103 103
104 if (!hlt_counter && boot_cpu_data.hlt_works_ok) { 104 if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
105 clear_thread_flag(TIF_POLLING_NRFLAG); 105 current_thread_info()->status &= ~TS_POLLING;
106 smp_mb__after_clear_bit(); 106 smp_mb__after_clear_bit();
107 while (!need_resched()) { 107 while (!need_resched()) {
108 local_irq_disable(); 108 local_irq_disable();
@@ -111,7 +111,7 @@ void default_idle(void)
111 else 111 else
112 local_irq_enable(); 112 local_irq_enable();
113 } 113 }
114 set_thread_flag(TIF_POLLING_NRFLAG); 114 current_thread_info()->status |= TS_POLLING;
115 } else { 115 } else {
116 while (!need_resched()) 116 while (!need_resched())
117 cpu_relax(); 117 cpu_relax();
@@ -174,7 +174,7 @@ void cpu_idle(void)
174{ 174{
175 int cpu = smp_processor_id(); 175 int cpu = smp_processor_id();
176 176
177 set_thread_flag(TIF_POLLING_NRFLAG); 177 current_thread_info()->status |= TS_POLLING;
178 178
179 /* endless idle loop with no priority at all */ 179 /* endless idle loop with no priority at all */
180 while (1) { 180 while (1) {
@@ -312,7 +312,7 @@ void show_regs(struct pt_regs * regs)
312 cr3 = read_cr3(); 312 cr3 = read_cr3();
313 cr4 = read_cr4_safe(); 313 cr4 = read_cr4_safe();
314 printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); 314 printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
315 show_trace(NULL, &regs->esp); 315 show_trace(NULL, regs, &regs->esp);
316} 316}
317 317
318/* 318/*
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index d134e9643a58..c10789d7a9d3 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -114,7 +114,17 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_m
114 114
115static inline int __prepare_ICR (unsigned int shortcut, int vector) 115static inline int __prepare_ICR (unsigned int shortcut, int vector)
116{ 116{
117 return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL; 117 unsigned int icr = shortcut | APIC_DEST_LOGICAL;
118
119 switch (vector) {
120 default:
121 icr |= APIC_DM_FIXED | vector;
122 break;
123 case NMI_VECTOR:
124 icr |= APIC_DM_NMI;
125 break;
126 }
127 return icr;
118} 128}
119 129
120static inline int __prepare_ICR2 (unsigned int mask) 130static inline int __prepare_ICR2 (unsigned int mask)
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index bd0ca5c9f053..bce5470ecb42 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -52,6 +52,7 @@
52#include <asm/tlbflush.h> 52#include <asm/tlbflush.h>
53#include <asm/desc.h> 53#include <asm/desc.h>
54#include <asm/arch_hooks.h> 54#include <asm/arch_hooks.h>
55#include <asm/nmi.h>
55 56
56#include <mach_apic.h> 57#include <mach_apic.h>
57#include <mach_wakecpu.h> 58#include <mach_wakecpu.h>
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index dcc14477af1f..78464097470a 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -28,6 +28,7 @@
28#include <linux/utsname.h> 28#include <linux/utsname.h>
29#include <linux/kprobes.h> 29#include <linux/kprobes.h>
30#include <linux/kexec.h> 30#include <linux/kexec.h>
31#include <linux/unwind.h>
31 32
32#ifdef CONFIG_EISA 33#ifdef CONFIG_EISA
33#include <linux/ioport.h> 34#include <linux/ioport.h>
@@ -47,7 +48,7 @@
47#include <asm/desc.h> 48#include <asm/desc.h>
48#include <asm/i387.h> 49#include <asm/i387.h>
49#include <asm/nmi.h> 50#include <asm/nmi.h>
50 51#include <asm/unwind.h>
51#include <asm/smp.h> 52#include <asm/smp.h>
52#include <asm/arch_hooks.h> 53#include <asm/arch_hooks.h>
53#include <asm/kdebug.h> 54#include <asm/kdebug.h>
@@ -92,6 +93,7 @@ asmlinkage void spurious_interrupt_bug(void);
92asmlinkage void machine_check(void); 93asmlinkage void machine_check(void);
93 94
94static int kstack_depth_to_print = 24; 95static int kstack_depth_to_print = 24;
96static int call_trace = 1;
95ATOMIC_NOTIFIER_HEAD(i386die_chain); 97ATOMIC_NOTIFIER_HEAD(i386die_chain);
96 98
97int register_die_notifier(struct notifier_block *nb) 99int register_die_notifier(struct notifier_block *nb)
@@ -170,7 +172,23 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
170 return ebp; 172 return ebp;
171} 173}
172 174
173static void show_trace_log_lvl(struct task_struct *task, 175static asmlinkage int show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
176{
177 int n = 0;
178 int printed = 0; /* nr of entries already printed on current line */
179
180 while (unwind(info) == 0 && UNW_PC(info)) {
181 ++n;
182 printed = print_addr_and_symbol(UNW_PC(info), log_lvl, printed);
183 if (arch_unw_user_mode(info))
184 break;
185 }
186 if (printed)
187 printk("\n");
188 return n;
189}
190
191static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
174 unsigned long *stack, char *log_lvl) 192 unsigned long *stack, char *log_lvl)
175{ 193{
176 unsigned long ebp; 194 unsigned long ebp;
@@ -178,6 +196,26 @@ static void show_trace_log_lvl(struct task_struct *task,
178 if (!task) 196 if (!task)
179 task = current; 197 task = current;
180 198
199 if (call_trace >= 0) {
200 int unw_ret = 0;
201 struct unwind_frame_info info;
202
203 if (regs) {
204 if (unwind_init_frame_info(&info, task, regs) == 0)
205 unw_ret = show_trace_unwind(&info, log_lvl);
206 } else if (task == current)
207 unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl);
208 else {
209 if (unwind_init_blocked(&info, task) == 0)
210 unw_ret = show_trace_unwind(&info, log_lvl);
211 }
212 if (unw_ret > 0) {
213 if (call_trace > 0)
214 return;
215 printk("%sLegacy call trace:\n", log_lvl);
216 }
217 }
218
181 if (task == current) { 219 if (task == current) {
182 /* Grab ebp right from our regs */ 220 /* Grab ebp right from our regs */
183 asm ("movl %%ebp, %0" : "=r" (ebp) : ); 221 asm ("movl %%ebp, %0" : "=r" (ebp) : );
@@ -198,13 +236,13 @@ static void show_trace_log_lvl(struct task_struct *task,
198 } 236 }
199} 237}
200 238
201void show_trace(struct task_struct *task, unsigned long * stack) 239void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
202{ 240{
203 show_trace_log_lvl(task, stack, ""); 241 show_trace_log_lvl(task, regs, stack, "");
204} 242}
205 243
206static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp, 244static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
207 char *log_lvl) 245 unsigned long *esp, char *log_lvl)
208{ 246{
209 unsigned long *stack; 247 unsigned long *stack;
210 int i; 248 int i;
@@ -225,13 +263,13 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp,
225 printk("%08lx ", *stack++); 263 printk("%08lx ", *stack++);
226 } 264 }
227 printk("\n%sCall Trace:\n", log_lvl); 265 printk("\n%sCall Trace:\n", log_lvl);
228 show_trace_log_lvl(task, esp, log_lvl); 266 show_trace_log_lvl(task, regs, esp, log_lvl);
229} 267}
230 268
231void show_stack(struct task_struct *task, unsigned long *esp) 269void show_stack(struct task_struct *task, unsigned long *esp)
232{ 270{
233 printk(" "); 271 printk(" ");
234 show_stack_log_lvl(task, esp, ""); 272 show_stack_log_lvl(task, NULL, esp, "");
235} 273}
236 274
237/* 275/*
@@ -241,7 +279,7 @@ void dump_stack(void)
241{ 279{
242 unsigned long stack; 280 unsigned long stack;
243 281
244 show_trace(current, &stack); 282 show_trace(current, NULL, &stack);
245} 283}
246 284
247EXPORT_SYMBOL(dump_stack); 285EXPORT_SYMBOL(dump_stack);
@@ -285,7 +323,7 @@ void show_registers(struct pt_regs *regs)
285 u8 __user *eip; 323 u8 __user *eip;
286 324
287 printk("\n" KERN_EMERG "Stack: "); 325 printk("\n" KERN_EMERG "Stack: ");
288 show_stack_log_lvl(NULL, (unsigned long *)esp, KERN_EMERG); 326 show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
289 327
290 printk(KERN_EMERG "Code: "); 328 printk(KERN_EMERG "Code: ");
291 329
@@ -1215,3 +1253,15 @@ static int __init kstack_setup(char *s)
1215 return 1; 1253 return 1;
1216} 1254}
1217__setup("kstack=", kstack_setup); 1255__setup("kstack=", kstack_setup);
1256
1257static int __init call_trace_setup(char *s)
1258{
1259 if (strcmp(s, "old") == 0)
1260 call_trace = -1;
1261 else if (strcmp(s, "both") == 0)
1262 call_trace = 0;
1263 else if (strcmp(s, "new") == 0)
1264 call_trace = 1;
1265 return 1;
1266}
1267__setup("call_trace=", call_trace_setup);
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 7512f39c9f25..2d4f1386e2b1 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -71,6 +71,15 @@ SECTIONS
71 .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } 71 .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) }
72 _edata = .; /* End of data section */ 72 _edata = .; /* End of data section */
73 73
74#ifdef CONFIG_STACK_UNWIND
75 . = ALIGN(4);
76 .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
77 __start_unwind = .;
78 *(.eh_frame)
79 __end_unwind = .;
80 }
81#endif
82
74 . = ALIGN(THREAD_SIZE); /* init_task */ 83 . = ALIGN(THREAD_SIZE); /* init_task */
75 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { 84 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
76 *(.data.init_task) 85 *(.data.init_task)
diff --git a/arch/i386/oprofile/op_model_athlon.c b/arch/i386/oprofile/op_model_athlon.c
index 3ad9a72a5036..693bdea4a52b 100644
--- a/arch/i386/oprofile/op_model_athlon.c
+++ b/arch/i386/oprofile/op_model_athlon.c
@@ -13,6 +13,7 @@
13#include <linux/oprofile.h> 13#include <linux/oprofile.h>
14#include <asm/ptrace.h> 14#include <asm/ptrace.h>
15#include <asm/msr.h> 15#include <asm/msr.h>
16#include <asm/nmi.h>
16 17
17#include "op_x86_model.h" 18#include "op_x86_model.h"
18#include "op_counter.h" 19#include "op_counter.h"
diff --git a/arch/i386/oprofile/op_model_p4.c b/arch/i386/oprofile/op_model_p4.c
index ac8a066035c2..7c61d357b82b 100644
--- a/arch/i386/oprofile/op_model_p4.c
+++ b/arch/i386/oprofile/op_model_p4.c
@@ -14,6 +14,7 @@
14#include <asm/ptrace.h> 14#include <asm/ptrace.h>
15#include <asm/fixmap.h> 15#include <asm/fixmap.h>
16#include <asm/apic.h> 16#include <asm/apic.h>
17#include <asm/nmi.h>
17 18
18#include "op_x86_model.h" 19#include "op_x86_model.h"
19#include "op_counter.h" 20#include "op_counter.h"
diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c
index d719015fc044..5c3ab4b027ad 100644
--- a/arch/i386/oprofile/op_model_ppro.c
+++ b/arch/i386/oprofile/op_model_ppro.c
@@ -14,6 +14,7 @@
14#include <asm/ptrace.h> 14#include <asm/ptrace.h>
15#include <asm/msr.h> 15#include <asm/msr.h>
16#include <asm/apic.h> 16#include <asm/apic.h>
17#include <asm/nmi.h>
17 18
18#include "op_x86_model.h" 19#include "op_x86_model.h"
19#include "op_counter.h" 20#include "op_counter.h"
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 355d57970ba3..b045c279136c 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -272,9 +272,9 @@ cpu_idle (void)
272 /* endless idle loop with no priority at all */ 272 /* endless idle loop with no priority at all */
273 while (1) { 273 while (1) {
274 if (can_do_pal_halt) 274 if (can_do_pal_halt)
275 clear_thread_flag(TIF_POLLING_NRFLAG); 275 current_thread_info()->status &= ~TS_POLLING;
276 else 276 else
277 set_thread_flag(TIF_POLLING_NRFLAG); 277 current_thread_info()->status |= TS_POLLING;
278 278
279 if (!need_resched()) { 279 if (!need_resched()) {
280 void (*idle)(void); 280 void (*idle)(void);
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index af44130f0d65..ccc4a7fb97a3 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -386,24 +386,45 @@ config HPET_EMULATE_RTC
386 bool "Provide RTC interrupt" 386 bool "Provide RTC interrupt"
387 depends on HPET_TIMER && RTC=y 387 depends on HPET_TIMER && RTC=y
388 388
389config GART_IOMMU 389# Mark as embedded because too many people got it wrong.
390 bool "K8 GART IOMMU support" 390# The code disables itself when not needed.
391config IOMMU
392 bool "IOMMU support" if EMBEDDED
391 default y 393 default y
392 select SWIOTLB 394 select SWIOTLB
393 select AGP 395 select AGP
394 depends on PCI 396 depends on PCI
395 help 397 help
396 Support for hardware IOMMU in AMD's Opteron/Athlon64 Processors 398 Support for full DMA access of devices with 32bit memory access only
397 and for the bounce buffering software IOMMU. 399 on systems with more than 3GB. This is usually needed for USB,
398 Needed to run systems with more than 3GB of memory properly with 400 sound, many IDE/SATA chipsets and some other devices.
399 32-bit PCI devices that do not support DAC (Double Address Cycle). 401 Provides a driver for the AMD Athlon64/Opteron/Turion/Sempron GART
400 The IOMMU can be turned off at runtime with the iommu=off parameter. 402 based IOMMU and a software bounce buffer based IOMMU used on Intel
401 Normally the kernel will take the right choice by itself. 403 systems and as fallback.
402 This option includes a driver for the AMD Opteron/Athlon64 IOMMU 404 The code is only active when needed (enough memory and limited
403 northbridge and a software emulation used on other systems without 405 device) unless CONFIG_IOMMU_DEBUG or iommu=force is specified
404 hardware IOMMU. If unsure, say Y. 406 too.
405 407
406# need this always selected by GART_IOMMU for the VIA workaround 408config CALGARY_IOMMU
409 bool "IBM Calgary IOMMU support"
410 default y
411 select SWIOTLB
412 depends on PCI && EXPERIMENTAL
413 help
414 Support for hardware IOMMUs in IBM's xSeries x366 and x460
415 systems. Needed to run systems with more than 3GB of memory
416 properly with 32-bit PCI devices that do not support DAC
417 (Double Address Cycle). Calgary also supports bus level
418 isolation, where all DMAs pass through the IOMMU. This
419 prevents them from going anywhere except their intended
420 destination. This catches hard-to-find kernel bugs and
421 mis-behaving drivers and devices that do not use the DMA-API
422 properly to set up their DMA buffers. The IOMMU can be
423 turned off at boot time with the iommu=off parameter.
424 Normally the kernel will make the right choice by itself.
425 If unsure, say Y.
426
427# need this always selected by IOMMU for the VIA workaround
407config SWIOTLB 428config SWIOTLB
408 bool 429 bool
409 430
@@ -501,6 +522,10 @@ config REORDER
501 optimal TLB usage. If you have pretty much any version of binutils, 522 optimal TLB usage. If you have pretty much any version of binutils,
502 this can increase your kernel build time by roughly one minute. 523 this can increase your kernel build time by roughly one minute.
503 524
525config K8_NB
526 def_bool y
527 depends on AGP_AMD64 || IOMMU || (PCI && NUMA)
528
504endmenu 529endmenu
505 530
506# 531#
diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug
index ea31b4c62105..1d92ab56c0f9 100644
--- a/arch/x86_64/Kconfig.debug
+++ b/arch/x86_64/Kconfig.debug
@@ -13,7 +13,7 @@ config DEBUG_RODATA
13 If in doubt, say "N". 13 If in doubt, say "N".
14 14
15config IOMMU_DEBUG 15config IOMMU_DEBUG
16 depends on GART_IOMMU && DEBUG_KERNEL 16 depends on IOMMU && DEBUG_KERNEL
17 bool "Enable IOMMU debugging" 17 bool "Enable IOMMU debugging"
18 help 18 help
19 Force the IOMMU to on even when you have less than 4GB of 19 Force the IOMMU to on even when you have less than 4GB of
@@ -35,6 +35,22 @@ config IOMMU_LEAK
35 Add a simple leak tracer to the IOMMU code. This is useful when you 35 Add a simple leak tracer to the IOMMU code. This is useful when you
36 are debugging a buggy device driver that leaks IOMMU mappings. 36 are debugging a buggy device driver that leaks IOMMU mappings.
37 37
38config DEBUG_STACKOVERFLOW
39 bool "Check for stack overflows"
40 depends on DEBUG_KERNEL
41 help
42 This option will cause messages to be printed if free stack space
43 drops below a certain limit.
44
45config DEBUG_STACK_USAGE
46 bool "Stack utilization instrumentation"
47 depends on DEBUG_KERNEL
48 help
49 Enables the display of the minimum amount of free stack which each
50 task has ever had available in the sysrq-T and sysrq-P debug output.
51
52 This option will slow down process creation somewhat.
53
38#config X86_REMOTE_DEBUG 54#config X86_REMOTE_DEBUG
39# bool "kgdb debugging stub" 55# bool "kgdb debugging stub"
40 56
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index e573e2ab5510..431bb4bc36cd 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -27,6 +27,7 @@ LDFLAGS_vmlinux :=
27CHECKFLAGS += -D__x86_64__ -m64 27CHECKFLAGS += -D__x86_64__ -m64
28 28
29cflags-y := 29cflags-y :=
30cflags-kernel-y :=
30cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) 31cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
31cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) 32cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
32cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) 33cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
@@ -35,7 +36,7 @@ cflags-y += -m64
35cflags-y += -mno-red-zone 36cflags-y += -mno-red-zone
36cflags-y += -mcmodel=kernel 37cflags-y += -mcmodel=kernel
37cflags-y += -pipe 38cflags-y += -pipe
38cflags-$(CONFIG_REORDER) += -ffunction-sections 39cflags-kernel-$(CONFIG_REORDER) += -ffunction-sections
39# this makes reading assembly source easier, but produces worse code 40# this makes reading assembly source easier, but produces worse code
40# actually it makes the kernel smaller too. 41# actually it makes the kernel smaller too.
41cflags-y += -fno-reorder-blocks 42cflags-y += -fno-reorder-blocks
@@ -55,6 +56,7 @@ cflags-y += $(call cc-option,-funit-at-a-time)
55cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) 56cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
56 57
57CFLAGS += $(cflags-y) 58CFLAGS += $(cflags-y)
59CFLAGS_KERNEL += $(cflags-kernel-y)
58AFLAGS += -m64 60AFLAGS += -m64
59 61
60head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o 62head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o
diff --git a/arch/x86_64/boot/Makefile b/arch/x86_64/boot/Makefile
index 43ee6c50c277..deb063e7762d 100644
--- a/arch/x86_64/boot/Makefile
+++ b/arch/x86_64/boot/Makefile
@@ -107,8 +107,13 @@ fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
107isoimage: $(BOOTIMAGE) 107isoimage: $(BOOTIMAGE)
108 -rm -rf $(obj)/isoimage 108 -rm -rf $(obj)/isoimage
109 mkdir $(obj)/isoimage 109 mkdir $(obj)/isoimage
110 cp `echo /usr/lib*/syslinux/isolinux.bin | awk '{ print $1; }'` \ 110 for i in lib lib64 share end ; do \
111 $(obj)/isoimage 111 if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \
112 cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \
113 break ; \
114 fi ; \
115 if [ $$i = end ] ; then exit 1 ; fi ; \
116 done
112 cp $(BOOTIMAGE) $(obj)/isoimage/linux 117 cp $(BOOTIMAGE) $(obj)/isoimage/linux
113 echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg 118 echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
114 if [ -f '$(FDINITRD)' ] ; then \ 119 if [ -f '$(FDINITRD)' ] ; then \
diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c
index cf4b88c416dc..3755b2e394d0 100644
--- a/arch/x86_64/boot/compressed/misc.c
+++ b/arch/x86_64/boot/compressed/misc.c
@@ -77,11 +77,11 @@ static void gzip_release(void **);
77 */ 77 */
78static unsigned char *real_mode; /* Pointer to real-mode data */ 78static unsigned char *real_mode; /* Pointer to real-mode data */
79 79
80#define EXT_MEM_K (*(unsigned short *)(real_mode + 0x2)) 80#define RM_EXT_MEM_K (*(unsigned short *)(real_mode + 0x2))
81#ifndef STANDARD_MEMORY_BIOS_CALL 81#ifndef STANDARD_MEMORY_BIOS_CALL
82#define ALT_MEM_K (*(unsigned long *)(real_mode + 0x1e0)) 82#define RM_ALT_MEM_K (*(unsigned long *)(real_mode + 0x1e0))
83#endif 83#endif
84#define SCREEN_INFO (*(struct screen_info *)(real_mode+0)) 84#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
85 85
86extern unsigned char input_data[]; 86extern unsigned char input_data[];
87extern int input_len; 87extern int input_len;
@@ -92,9 +92,9 @@ static unsigned long output_ptr = 0;
92 92
93static void *malloc(int size); 93static void *malloc(int size);
94static void free(void *where); 94static void free(void *where);
95 95
96void* memset(void* s, int c, unsigned n); 96static void *memset(void *s, int c, unsigned n);
97void* memcpy(void* dest, const void* src, unsigned n); 97static void *memcpy(void *dest, const void *src, unsigned n);
98 98
99static void putstr(const char *); 99static void putstr(const char *);
100 100
@@ -162,8 +162,8 @@ static void putstr(const char *s)
162 int x,y,pos; 162 int x,y,pos;
163 char c; 163 char c;
164 164
165 x = SCREEN_INFO.orig_x; 165 x = RM_SCREEN_INFO.orig_x;
166 y = SCREEN_INFO.orig_y; 166 y = RM_SCREEN_INFO.orig_y;
167 167
168 while ( ( c = *s++ ) != '\0' ) { 168 while ( ( c = *s++ ) != '\0' ) {
169 if ( c == '\n' ) { 169 if ( c == '\n' ) {
@@ -184,8 +184,8 @@ static void putstr(const char *s)
184 } 184 }
185 } 185 }
186 186
187 SCREEN_INFO.orig_x = x; 187 RM_SCREEN_INFO.orig_x = x;
188 SCREEN_INFO.orig_y = y; 188 RM_SCREEN_INFO.orig_y = y;
189 189
190 pos = (x + cols * y) * 2; /* Update cursor position */ 190 pos = (x + cols * y) * 2; /* Update cursor position */
191 outb_p(14, vidport); 191 outb_p(14, vidport);
@@ -194,7 +194,7 @@ static void putstr(const char *s)
194 outb_p(0xff & (pos >> 1), vidport+1); 194 outb_p(0xff & (pos >> 1), vidport+1);
195} 195}
196 196
197void* memset(void* s, int c, unsigned n) 197static void* memset(void* s, int c, unsigned n)
198{ 198{
199 int i; 199 int i;
200 char *ss = (char*)s; 200 char *ss = (char*)s;
@@ -203,7 +203,7 @@ void* memset(void* s, int c, unsigned n)
203 return s; 203 return s;
204} 204}
205 205
206void* memcpy(void* dest, const void* src, unsigned n) 206static void* memcpy(void* dest, const void* src, unsigned n)
207{ 207{
208 int i; 208 int i;
209 char *d = (char *)dest, *s = (char *)src; 209 char *d = (char *)dest, *s = (char *)src;
@@ -278,15 +278,15 @@ static void error(char *x)
278 putstr(x); 278 putstr(x);
279 putstr("\n\n -- System halted"); 279 putstr("\n\n -- System halted");
280 280
281 while(1); 281 while(1); /* Halt */
282} 282}
283 283
284void setup_normal_output_buffer(void) 284static void setup_normal_output_buffer(void)
285{ 285{
286#ifdef STANDARD_MEMORY_BIOS_CALL 286#ifdef STANDARD_MEMORY_BIOS_CALL
287 if (EXT_MEM_K < 1024) error("Less than 2MB of memory"); 287 if (RM_EXT_MEM_K < 1024) error("Less than 2MB of memory");
288#else 288#else
289 if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < 1024) error("Less than 2MB of memory"); 289 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
290#endif 290#endif
291 output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */ 291 output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
292 free_mem_end_ptr = (long)real_mode; 292 free_mem_end_ptr = (long)real_mode;
@@ -297,13 +297,13 @@ struct moveparams {
297 uch *high_buffer_start; int hcount; 297 uch *high_buffer_start; int hcount;
298}; 298};
299 299
300void setup_output_buffer_if_we_run_high(struct moveparams *mv) 300static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
301{ 301{
302 high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE); 302 high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE);
303#ifdef STANDARD_MEMORY_BIOS_CALL 303#ifdef STANDARD_MEMORY_BIOS_CALL
304 if (EXT_MEM_K < (3*1024)) error("Less than 4MB of memory"); 304 if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
305#else 305#else
306 if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory"); 306 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
307#endif 307#endif
308 mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START; 308 mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
309 low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX 309 low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
@@ -319,7 +319,7 @@ void setup_output_buffer_if_we_run_high(struct moveparams *mv)
319 mv->high_buffer_start = high_buffer_start; 319 mv->high_buffer_start = high_buffer_start;
320} 320}
321 321
322void close_output_buffer_if_we_run_high(struct moveparams *mv) 322static void close_output_buffer_if_we_run_high(struct moveparams *mv)
323{ 323{
324 if (bytes_out > low_buffer_size) { 324 if (bytes_out > low_buffer_size) {
325 mv->lcount = low_buffer_size; 325 mv->lcount = low_buffer_size;
@@ -335,7 +335,7 @@ int decompress_kernel(struct moveparams *mv, void *rmode)
335{ 335{
336 real_mode = rmode; 336 real_mode = rmode;
337 337
338 if (SCREEN_INFO.orig_video_mode == 7) { 338 if (RM_SCREEN_INFO.orig_video_mode == 7) {
339 vidmem = (char *) 0xb0000; 339 vidmem = (char *) 0xb0000;
340 vidport = 0x3b4; 340 vidport = 0x3b4;
341 } else { 341 } else {
@@ -343,8 +343,8 @@ int decompress_kernel(struct moveparams *mv, void *rmode)
343 vidport = 0x3d4; 343 vidport = 0x3d4;
344 } 344 }
345 345
346 lines = SCREEN_INFO.orig_video_lines; 346 lines = RM_SCREEN_INFO.orig_video_lines;
347 cols = SCREEN_INFO.orig_video_cols; 347 cols = RM_SCREEN_INFO.orig_video_cols;
348 348
349 if (free_mem_ptr < 0x100000) setup_normal_output_buffer(); 349 if (free_mem_ptr < 0x100000) setup_normal_output_buffer();
350 else setup_output_buffer_if_we_run_high(mv); 350 else setup_output_buffer_if_we_run_high(mv);
diff --git a/arch/x86_64/boot/tools/build.c b/arch/x86_64/boot/tools/build.c
index c44f5e2ec100..eae86691709a 100644
--- a/arch/x86_64/boot/tools/build.c
+++ b/arch/x86_64/boot/tools/build.c
@@ -149,10 +149,8 @@ int main(int argc, char ** argv)
149 sz = sb.st_size; 149 sz = sb.st_size;
150 fprintf (stderr, "System is %d kB\n", sz/1024); 150 fprintf (stderr, "System is %d kB\n", sz/1024);
151 sys_size = (sz + 15) / 16; 151 sys_size = (sz + 15) / 16;
152 /* 0x40000*16 = 4.0 MB, reasonable estimate for the current maximum */ 152 if (!is_big_kernel && sys_size > DEF_SYSSIZE)
153 if (sys_size > (is_big_kernel ? 0x40000 : DEF_SYSSIZE)) 153 die("System is too big. Try using bzImage or modules.");
154 die("System is too big. Try using %smodules.",
155 is_big_kernel ? "" : "bzImage or ");
156 while (sz > 0) { 154 while (sz > 0) {
157 int l, n; 155 int l, n;
158 156
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 69db0c0721d1..e69d403949c8 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.17-rc1-git11 3# Linux kernel version: 2.6.17-git6
4# Sun Apr 16 07:22:36 2006 4# Sat Jun 24 00:52:28 2006
5# 5#
6CONFIG_X86_64=y 6CONFIG_X86_64=y
7CONFIG_64BIT=y 7CONFIG_64BIT=y
@@ -42,7 +42,6 @@ CONFIG_IKCONFIG_PROC=y
42# CONFIG_RELAY is not set 42# CONFIG_RELAY is not set
43CONFIG_INITRAMFS_SOURCE="" 43CONFIG_INITRAMFS_SOURCE=""
44CONFIG_UID16=y 44CONFIG_UID16=y
45CONFIG_VM86=y
46CONFIG_CC_OPTIMIZE_FOR_SIZE=y 45CONFIG_CC_OPTIMIZE_FOR_SIZE=y
47# CONFIG_EMBEDDED is not set 46# CONFIG_EMBEDDED is not set
48CONFIG_KALLSYMS=y 47CONFIG_KALLSYMS=y
@@ -57,7 +56,6 @@ CONFIG_FUTEX=y
57CONFIG_EPOLL=y 56CONFIG_EPOLL=y
58CONFIG_SHMEM=y 57CONFIG_SHMEM=y
59CONFIG_SLAB=y 58CONFIG_SLAB=y
60CONFIG_DOUBLEFAULT=y
61# CONFIG_TINY_SHMEM is not set 59# CONFIG_TINY_SHMEM is not set
62CONFIG_BASE_SMALL=0 60CONFIG_BASE_SMALL=0
63# CONFIG_SLOB is not set 61# CONFIG_SLOB is not set
@@ -144,7 +142,8 @@ CONFIG_NR_CPUS=32
144CONFIG_HOTPLUG_CPU=y 142CONFIG_HOTPLUG_CPU=y
145CONFIG_HPET_TIMER=y 143CONFIG_HPET_TIMER=y
146CONFIG_HPET_EMULATE_RTC=y 144CONFIG_HPET_EMULATE_RTC=y
147CONFIG_GART_IOMMU=y 145CONFIG_IOMMU=y
146# CONFIG_CALGARY_IOMMU is not set
148CONFIG_SWIOTLB=y 147CONFIG_SWIOTLB=y
149CONFIG_X86_MCE=y 148CONFIG_X86_MCE=y
150CONFIG_X86_MCE_INTEL=y 149CONFIG_X86_MCE_INTEL=y
@@ -158,6 +157,7 @@ CONFIG_HZ_250=y
158# CONFIG_HZ_1000 is not set 157# CONFIG_HZ_1000 is not set
159CONFIG_HZ=250 158CONFIG_HZ=250
160# CONFIG_REORDER is not set 159# CONFIG_REORDER is not set
160CONFIG_K8_NB=y
161CONFIG_GENERIC_HARDIRQS=y 161CONFIG_GENERIC_HARDIRQS=y
162CONFIG_GENERIC_IRQ_PROBE=y 162CONFIG_GENERIC_IRQ_PROBE=y
163CONFIG_ISA_DMA_API=y 163CONFIG_ISA_DMA_API=y
@@ -293,6 +293,8 @@ CONFIG_IP_PNP_DHCP=y
293# CONFIG_INET_IPCOMP is not set 293# CONFIG_INET_IPCOMP is not set
294# CONFIG_INET_XFRM_TUNNEL is not set 294# CONFIG_INET_XFRM_TUNNEL is not set
295# CONFIG_INET_TUNNEL is not set 295# CONFIG_INET_TUNNEL is not set
296# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
297# CONFIG_INET_XFRM_MODE_TUNNEL is not set
296CONFIG_INET_DIAG=y 298CONFIG_INET_DIAG=y
297CONFIG_INET_TCP_DIAG=y 299CONFIG_INET_TCP_DIAG=y
298# CONFIG_TCP_CONG_ADVANCED is not set 300# CONFIG_TCP_CONG_ADVANCED is not set
@@ -305,7 +307,10 @@ CONFIG_IPV6=y
305# CONFIG_INET6_IPCOMP is not set 307# CONFIG_INET6_IPCOMP is not set
306# CONFIG_INET6_XFRM_TUNNEL is not set 308# CONFIG_INET6_XFRM_TUNNEL is not set
307# CONFIG_INET6_TUNNEL is not set 309# CONFIG_INET6_TUNNEL is not set
310# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set
311# CONFIG_INET6_XFRM_MODE_TUNNEL is not set
308# CONFIG_IPV6_TUNNEL is not set 312# CONFIG_IPV6_TUNNEL is not set
313# CONFIG_NETWORK_SECMARK is not set
309# CONFIG_NETFILTER is not set 314# CONFIG_NETFILTER is not set
310 315
311# 316#
@@ -344,6 +349,7 @@ CONFIG_IPV6=y
344# Network testing 349# Network testing
345# 350#
346# CONFIG_NET_PKTGEN is not set 351# CONFIG_NET_PKTGEN is not set
352# CONFIG_NET_TCPPROBE is not set
347# CONFIG_HAMRADIO is not set 353# CONFIG_HAMRADIO is not set
348# CONFIG_IRDA is not set 354# CONFIG_IRDA is not set
349# CONFIG_BT is not set 355# CONFIG_BT is not set
@@ -360,6 +366,7 @@ CONFIG_STANDALONE=y
360CONFIG_PREVENT_FIRMWARE_BUILD=y 366CONFIG_PREVENT_FIRMWARE_BUILD=y
361CONFIG_FW_LOADER=y 367CONFIG_FW_LOADER=y
362# CONFIG_DEBUG_DRIVER is not set 368# CONFIG_DEBUG_DRIVER is not set
369# CONFIG_SYS_HYPERVISOR is not set
363 370
364# 371#
365# Connector - unified userspace <-> kernelspace linker 372# Connector - unified userspace <-> kernelspace linker
@@ -526,6 +533,7 @@ CONFIG_SCSI_ATA_PIIX=y
526# CONFIG_SCSI_SATA_MV is not set 533# CONFIG_SCSI_SATA_MV is not set
527CONFIG_SCSI_SATA_NV=y 534CONFIG_SCSI_SATA_NV=y
528# CONFIG_SCSI_PDC_ADMA is not set 535# CONFIG_SCSI_PDC_ADMA is not set
536# CONFIG_SCSI_HPTIOP is not set
529# CONFIG_SCSI_SATA_QSTOR is not set 537# CONFIG_SCSI_SATA_QSTOR is not set
530# CONFIG_SCSI_SATA_PROMISE is not set 538# CONFIG_SCSI_SATA_PROMISE is not set
531# CONFIG_SCSI_SATA_SX4 is not set 539# CONFIG_SCSI_SATA_SX4 is not set
@@ -591,10 +599,7 @@ CONFIG_IEEE1394=y
591# 599#
592# Device Drivers 600# Device Drivers
593# 601#
594 602# CONFIG_IEEE1394_PCILYNX is not set
595#
596# Texas Instruments PCILynx requires I2C
597#
598CONFIG_IEEE1394_OHCI1394=y 603CONFIG_IEEE1394_OHCI1394=y
599 604
600# 605#
@@ -645,7 +650,16 @@ CONFIG_VORTEX=y
645# 650#
646# Tulip family network device support 651# Tulip family network device support
647# 652#
648# CONFIG_NET_TULIP is not set 653CONFIG_NET_TULIP=y
654# CONFIG_DE2104X is not set
655CONFIG_TULIP=y
656# CONFIG_TULIP_MWI is not set
657# CONFIG_TULIP_MMIO is not set
658# CONFIG_TULIP_NAPI is not set
659# CONFIG_DE4X5 is not set
660# CONFIG_WINBOND_840 is not set
661# CONFIG_DM9102 is not set
662# CONFIG_ULI526X is not set
649# CONFIG_HP100 is not set 663# CONFIG_HP100 is not set
650CONFIG_NET_PCI=y 664CONFIG_NET_PCI=y
651# CONFIG_PCNET32 is not set 665# CONFIG_PCNET32 is not set
@@ -697,6 +711,7 @@ CONFIG_TIGON3=y
697# CONFIG_IXGB is not set 711# CONFIG_IXGB is not set
698CONFIG_S2IO=m 712CONFIG_S2IO=m
699# CONFIG_S2IO_NAPI is not set 713# CONFIG_S2IO_NAPI is not set
714# CONFIG_MYRI10GE is not set
700 715
701# 716#
702# Token Ring devices 717# Token Ring devices
@@ -887,7 +902,56 @@ CONFIG_HPET_MMAP=y
887# 902#
888# I2C support 903# I2C support
889# 904#
890# CONFIG_I2C is not set 905CONFIG_I2C=m
906CONFIG_I2C_CHARDEV=m
907
908#
909# I2C Algorithms
910#
911# CONFIG_I2C_ALGOBIT is not set
912# CONFIG_I2C_ALGOPCF is not set
913# CONFIG_I2C_ALGOPCA is not set
914
915#
916# I2C Hardware Bus support
917#
918# CONFIG_I2C_ALI1535 is not set
919# CONFIG_I2C_ALI1563 is not set
920# CONFIG_I2C_ALI15X3 is not set
921# CONFIG_I2C_AMD756 is not set
922# CONFIG_I2C_AMD8111 is not set
923# CONFIG_I2C_I801 is not set
924# CONFIG_I2C_I810 is not set
925# CONFIG_I2C_PIIX4 is not set
926CONFIG_I2C_ISA=m
927# CONFIG_I2C_NFORCE2 is not set
928# CONFIG_I2C_OCORES is not set
929# CONFIG_I2C_PARPORT_LIGHT is not set
930# CONFIG_I2C_PROSAVAGE is not set
931# CONFIG_I2C_SAVAGE4 is not set
932# CONFIG_I2C_SIS5595 is not set
933# CONFIG_I2C_SIS630 is not set
934# CONFIG_I2C_SIS96X is not set
935# CONFIG_I2C_STUB is not set
936# CONFIG_I2C_VIA is not set
937# CONFIG_I2C_VIAPRO is not set
938# CONFIG_I2C_VOODOO3 is not set
939# CONFIG_I2C_PCA_ISA is not set
940
941#
942# Miscellaneous I2C Chip support
943#
944# CONFIG_SENSORS_DS1337 is not set
945# CONFIG_SENSORS_DS1374 is not set
946# CONFIG_SENSORS_EEPROM is not set
947# CONFIG_SENSORS_PCF8574 is not set
948# CONFIG_SENSORS_PCA9539 is not set
949# CONFIG_SENSORS_PCF8591 is not set
950# CONFIG_SENSORS_MAX6875 is not set
951# CONFIG_I2C_DEBUG_CORE is not set
952# CONFIG_I2C_DEBUG_ALGO is not set
953# CONFIG_I2C_DEBUG_BUS is not set
954# CONFIG_I2C_DEBUG_CHIP is not set
891 955
892# 956#
893# SPI support 957# SPI support
@@ -898,14 +962,51 @@ CONFIG_HPET_MMAP=y
898# 962#
899# Dallas's 1-wire bus 963# Dallas's 1-wire bus
900# 964#
901# CONFIG_W1 is not set
902 965
903# 966#
904# Hardware Monitoring support 967# Hardware Monitoring support
905# 968#
906CONFIG_HWMON=y 969CONFIG_HWMON=y
907# CONFIG_HWMON_VID is not set 970# CONFIG_HWMON_VID is not set
971# CONFIG_SENSORS_ABITUGURU is not set
972# CONFIG_SENSORS_ADM1021 is not set
973# CONFIG_SENSORS_ADM1025 is not set
974# CONFIG_SENSORS_ADM1026 is not set
975# CONFIG_SENSORS_ADM1031 is not set
976# CONFIG_SENSORS_ADM9240 is not set
977# CONFIG_SENSORS_ASB100 is not set
978# CONFIG_SENSORS_ATXP1 is not set
979# CONFIG_SENSORS_DS1621 is not set
908# CONFIG_SENSORS_F71805F is not set 980# CONFIG_SENSORS_F71805F is not set
981# CONFIG_SENSORS_FSCHER is not set
982# CONFIG_SENSORS_FSCPOS is not set
983# CONFIG_SENSORS_GL518SM is not set
984# CONFIG_SENSORS_GL520SM is not set
985# CONFIG_SENSORS_IT87 is not set
986# CONFIG_SENSORS_LM63 is not set
987# CONFIG_SENSORS_LM75 is not set
988# CONFIG_SENSORS_LM77 is not set
989# CONFIG_SENSORS_LM78 is not set
990# CONFIG_SENSORS_LM80 is not set
991# CONFIG_SENSORS_LM83 is not set
992# CONFIG_SENSORS_LM85 is not set
993# CONFIG_SENSORS_LM87 is not set
994# CONFIG_SENSORS_LM90 is not set
995# CONFIG_SENSORS_LM92 is not set
996# CONFIG_SENSORS_MAX1619 is not set
997# CONFIG_SENSORS_PC87360 is not set
998# CONFIG_SENSORS_SIS5595 is not set
999# CONFIG_SENSORS_SMSC47M1 is not set
1000# CONFIG_SENSORS_SMSC47M192 is not set
1001CONFIG_SENSORS_SMSC47B397=m
1002# CONFIG_SENSORS_VIA686A is not set
1003# CONFIG_SENSORS_VT8231 is not set
1004# CONFIG_SENSORS_W83781D is not set
1005# CONFIG_SENSORS_W83791D is not set
1006# CONFIG_SENSORS_W83792D is not set
1007# CONFIG_SENSORS_W83L785TS is not set
1008# CONFIG_SENSORS_W83627HF is not set
1009# CONFIG_SENSORS_W83627EHF is not set
909# CONFIG_SENSORS_HDAPS is not set 1010# CONFIG_SENSORS_HDAPS is not set
910# CONFIG_HWMON_DEBUG_CHIP is not set 1011# CONFIG_HWMON_DEBUG_CHIP is not set
911 1012
@@ -918,6 +1019,7 @@ CONFIG_HWMON=y
918# Multimedia devices 1019# Multimedia devices
919# 1020#
920# CONFIG_VIDEO_DEV is not set 1021# CONFIG_VIDEO_DEV is not set
1022CONFIG_VIDEO_V4L2=y
921 1023
922# 1024#
923# Digital Video Broadcasting Devices 1025# Digital Video Broadcasting Devices
@@ -953,28 +1055,17 @@ CONFIG_SOUND=y
953# Open Sound System 1055# Open Sound System
954# 1056#
955CONFIG_SOUND_PRIME=y 1057CONFIG_SOUND_PRIME=y
956CONFIG_OBSOLETE_OSS_DRIVER=y
957# CONFIG_SOUND_BT878 is not set 1058# CONFIG_SOUND_BT878 is not set
958# CONFIG_SOUND_CMPCI is not set
959# CONFIG_SOUND_EMU10K1 is not set 1059# CONFIG_SOUND_EMU10K1 is not set
960# CONFIG_SOUND_FUSION is not set 1060# CONFIG_SOUND_FUSION is not set
961# CONFIG_SOUND_CS4281 is not set
962# CONFIG_SOUND_ES1370 is not set
963# CONFIG_SOUND_ES1371 is not set 1061# CONFIG_SOUND_ES1371 is not set
964# CONFIG_SOUND_ESSSOLO1 is not set
965# CONFIG_SOUND_MAESTRO is not set
966# CONFIG_SOUND_MAESTRO3 is not set
967CONFIG_SOUND_ICH=y 1062CONFIG_SOUND_ICH=y
968# CONFIG_SOUND_SONICVIBES is not set
969# CONFIG_SOUND_TRIDENT is not set 1063# CONFIG_SOUND_TRIDENT is not set
970# CONFIG_SOUND_MSNDCLAS is not set 1064# CONFIG_SOUND_MSNDCLAS is not set
971# CONFIG_SOUND_MSNDPIN is not set 1065# CONFIG_SOUND_MSNDPIN is not set
972# CONFIG_SOUND_VIA82CXXX is not set 1066# CONFIG_SOUND_VIA82CXXX is not set
973# CONFIG_SOUND_OSS is not set 1067# CONFIG_SOUND_OSS is not set
974# CONFIG_SOUND_ALI5455 is not set 1068# CONFIG_SOUND_TVMIXER is not set
975# CONFIG_SOUND_FORTE is not set
976# CONFIG_SOUND_RME96XX is not set
977# CONFIG_SOUND_AD1980 is not set
978 1069
979# 1070#
980# USB support 1071# USB support
@@ -1000,6 +1091,7 @@ CONFIG_USB_DEVICEFS=y
1000CONFIG_USB_EHCI_HCD=y 1091CONFIG_USB_EHCI_HCD=y
1001# CONFIG_USB_EHCI_SPLIT_ISO is not set 1092# CONFIG_USB_EHCI_SPLIT_ISO is not set
1002# CONFIG_USB_EHCI_ROOT_HUB_TT is not set 1093# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
1094# CONFIG_USB_EHCI_TT_NEWSCHED is not set
1003# CONFIG_USB_ISP116X_HCD is not set 1095# CONFIG_USB_ISP116X_HCD is not set
1004CONFIG_USB_OHCI_HCD=y 1096CONFIG_USB_OHCI_HCD=y
1005# CONFIG_USB_OHCI_BIG_ENDIAN is not set 1097# CONFIG_USB_OHCI_BIG_ENDIAN is not set
@@ -1089,10 +1181,12 @@ CONFIG_USB_MON=y
1089# CONFIG_USB_LEGOTOWER is not set 1181# CONFIG_USB_LEGOTOWER is not set
1090# CONFIG_USB_LCD is not set 1182# CONFIG_USB_LCD is not set
1091# CONFIG_USB_LED is not set 1183# CONFIG_USB_LED is not set
1184# CONFIG_USB_CY7C63 is not set
1092# CONFIG_USB_CYTHERM is not set 1185# CONFIG_USB_CYTHERM is not set
1093# CONFIG_USB_PHIDGETKIT is not set 1186# CONFIG_USB_PHIDGETKIT is not set
1094# CONFIG_USB_PHIDGETSERVO is not set 1187# CONFIG_USB_PHIDGETSERVO is not set
1095# CONFIG_USB_IDMOUSE is not set 1188# CONFIG_USB_IDMOUSE is not set
1189# CONFIG_USB_APPLEDISPLAY is not set
1096# CONFIG_USB_SISUSBVGA is not set 1190# CONFIG_USB_SISUSBVGA is not set
1097# CONFIG_USB_LD is not set 1191# CONFIG_USB_LD is not set
1098# CONFIG_USB_TEST is not set 1192# CONFIG_USB_TEST is not set
@@ -1141,6 +1235,19 @@ CONFIG_USB_MON=y
1141# CONFIG_RTC_CLASS is not set 1235# CONFIG_RTC_CLASS is not set
1142 1236
1143# 1237#
1238# DMA Engine support
1239#
1240# CONFIG_DMA_ENGINE is not set
1241
1242#
1243# DMA Clients
1244#
1245
1246#
1247# DMA Devices
1248#
1249
1250#
1144# Firmware Drivers 1251# Firmware Drivers
1145# 1252#
1146# CONFIG_EDD is not set 1253# CONFIG_EDD is not set
@@ -1175,6 +1282,7 @@ CONFIG_FS_POSIX_ACL=y
1175# CONFIG_MINIX_FS is not set 1282# CONFIG_MINIX_FS is not set
1176# CONFIG_ROMFS_FS is not set 1283# CONFIG_ROMFS_FS is not set
1177CONFIG_INOTIFY=y 1284CONFIG_INOTIFY=y
1285CONFIG_INOTIFY_USER=y
1178# CONFIG_QUOTA is not set 1286# CONFIG_QUOTA is not set
1179CONFIG_DNOTIFY=y 1287CONFIG_DNOTIFY=y
1180CONFIG_AUTOFS_FS=y 1288CONFIG_AUTOFS_FS=y
@@ -1331,7 +1439,8 @@ CONFIG_DETECT_SOFTLOCKUP=y
1331CONFIG_DEBUG_FS=y 1439CONFIG_DEBUG_FS=y
1332# CONFIG_DEBUG_VM is not set 1440# CONFIG_DEBUG_VM is not set
1333# CONFIG_FRAME_POINTER is not set 1441# CONFIG_FRAME_POINTER is not set
1334# CONFIG_UNWIND_INFO is not set 1442CONFIG_UNWIND_INFO=y
1443CONFIG_STACK_UNWIND=y
1335# CONFIG_FORCED_INLINING is not set 1444# CONFIG_FORCED_INLINING is not set
1336# CONFIG_RCU_TORTURE_TEST is not set 1445# CONFIG_RCU_TORTURE_TEST is not set
1337# CONFIG_DEBUG_RODATA is not set 1446# CONFIG_DEBUG_RODATA is not set
diff --git a/arch/x86_64/ia32/fpu32.c b/arch/x86_64/ia32/fpu32.c
index 1c23095f1813..2c8209a3605a 100644
--- a/arch/x86_64/ia32/fpu32.c
+++ b/arch/x86_64/ia32/fpu32.c
@@ -2,7 +2,6 @@
2 * Copyright 2002 Andi Kleen, SuSE Labs. 2 * Copyright 2002 Andi Kleen, SuSE Labs.
3 * FXSAVE<->i387 conversion support. Based on code by Gareth Hughes. 3 * FXSAVE<->i387 conversion support. Based on code by Gareth Hughes.
4 * This is used for ptrace, signals and coredumps in 32bit emulation. 4 * This is used for ptrace, signals and coredumps in 32bit emulation.
5 * $Id: fpu32.c,v 1.1 2002/03/21 14:16:32 ak Exp $
6 */ 5 */
7 6
8#include <linux/sched.h> 7#include <linux/sched.h>
diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c
index e0a92439f634..25e5ca22204c 100644
--- a/arch/x86_64/ia32/ia32_signal.c
+++ b/arch/x86_64/ia32/ia32_signal.c
@@ -6,8 +6,6 @@
6 * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson 6 * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
7 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes 7 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
8 * 2000-12-* x86-64 compatibility mode signal handling by Andi Kleen 8 * 2000-12-* x86-64 compatibility mode signal handling by Andi Kleen
9 *
10 * $Id: ia32_signal.c,v 1.22 2002/07/29 10:34:03 ak Exp $
11 */ 9 */
12 10
13#include <linux/sched.h> 11#include <linux/sched.h>
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index 4ec594ab1a98..c536fa98ea37 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -155,6 +155,7 @@ sysenter_tracesys:
155 .previous 155 .previous
156 jmp sysenter_do_call 156 jmp sysenter_do_call
157 CFI_ENDPROC 157 CFI_ENDPROC
158ENDPROC(ia32_sysenter_target)
158 159
159/* 160/*
160 * 32bit SYSCALL instruction entry. 161 * 32bit SYSCALL instruction entry.
@@ -178,7 +179,7 @@ sysenter_tracesys:
178 */ 179 */
179ENTRY(ia32_cstar_target) 180ENTRY(ia32_cstar_target)
180 CFI_STARTPROC32 simple 181 CFI_STARTPROC32 simple
181 CFI_DEF_CFA rsp,0 182 CFI_DEF_CFA rsp,PDA_STACKOFFSET
182 CFI_REGISTER rip,rcx 183 CFI_REGISTER rip,rcx
183 /*CFI_REGISTER rflags,r11*/ 184 /*CFI_REGISTER rflags,r11*/
184 swapgs 185 swapgs
@@ -249,6 +250,7 @@ cstar_tracesys:
249 .quad 1b,ia32_badarg 250 .quad 1b,ia32_badarg
250 .previous 251 .previous
251 jmp cstar_do_call 252 jmp cstar_do_call
253END(ia32_cstar_target)
252 254
253ia32_badarg: 255ia32_badarg:
254 movq $-EFAULT,%rax 256 movq $-EFAULT,%rax
@@ -314,16 +316,13 @@ ia32_tracesys:
314 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ 316 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
315 RESTORE_REST 317 RESTORE_REST
316 jmp ia32_do_syscall 318 jmp ia32_do_syscall
319END(ia32_syscall)
317 320
318ia32_badsys: 321ia32_badsys:
319 movq $0,ORIG_RAX-ARGOFFSET(%rsp) 322 movq $0,ORIG_RAX-ARGOFFSET(%rsp)
320 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 323 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
321 jmp int_ret_from_sys_call 324 jmp int_ret_from_sys_call
322 325
323ni_syscall:
324 movq %rax,%rdi
325 jmp sys32_ni_syscall
326
327quiet_ni_syscall: 326quiet_ni_syscall:
328 movq $-ENOSYS,%rax 327 movq $-ENOSYS,%rax
329 ret 328 ret
@@ -370,10 +369,10 @@ ENTRY(ia32_ptregs_common)
370 RESTORE_REST 369 RESTORE_REST
371 jmp ia32_sysret /* misbalances the return cache */ 370 jmp ia32_sysret /* misbalances the return cache */
372 CFI_ENDPROC 371 CFI_ENDPROC
372END(ia32_ptregs_common)
373 373
374 .section .rodata,"a" 374 .section .rodata,"a"
375 .align 8 375 .align 8
376 .globl ia32_sys_call_table
377ia32_sys_call_table: 376ia32_sys_call_table:
378 .quad sys_restart_syscall 377 .quad sys_restart_syscall
379 .quad sys_exit 378 .quad sys_exit
diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c
index 23a4515a73b4..a590b7a0d92d 100644
--- a/arch/x86_64/ia32/ptrace32.c
+++ b/arch/x86_64/ia32/ptrace32.c
@@ -7,8 +7,6 @@
7 * 7 *
8 * This allows to access 64bit processes too; but there is no way to see the extended 8 * This allows to access 64bit processes too; but there is no way to see the extended
9 * register contents. 9 * register contents.
10 *
11 * $Id: ptrace32.c,v 1.16 2003/03/14 16:06:35 ak Exp $
12 */ 10 */
13 11
14#include <linux/kernel.h> 12#include <linux/kernel.h>
@@ -27,6 +25,7 @@
27#include <asm/debugreg.h> 25#include <asm/debugreg.h>
28#include <asm/i387.h> 26#include <asm/i387.h>
29#include <asm/fpu32.h> 27#include <asm/fpu32.h>
28#include <asm/ia32.h>
30 29
31/* 30/*
32 * Determines which flags the user has access to [1 = access, 0 = no access]. 31 * Determines which flags the user has access to [1 = access, 0 = no access].
@@ -199,6 +198,24 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
199 198
200#undef R32 199#undef R32
201 200
201static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data)
202{
203 int ret;
204 compat_siginfo_t *si32 = (compat_siginfo_t *)compat_ptr(data);
205 siginfo_t *si = compat_alloc_user_space(sizeof(siginfo_t));
206 if (request == PTRACE_SETSIGINFO) {
207 ret = copy_siginfo_from_user32(si, si32);
208 if (ret)
209 return ret;
210 }
211 ret = sys_ptrace(request, pid, addr, (unsigned long)si);
212 if (ret)
213 return ret;
214 if (request == PTRACE_GETSIGINFO)
215 ret = copy_siginfo_to_user32(si32, si);
216 return ret;
217}
218
202asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) 219asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
203{ 220{
204 struct task_struct *child; 221 struct task_struct *child;
@@ -208,9 +225,19 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
208 __u32 val; 225 __u32 val;
209 226
210 switch (request) { 227 switch (request) {
211 default: 228 case PTRACE_TRACEME:
229 case PTRACE_ATTACH:
230 case PTRACE_KILL:
231 case PTRACE_CONT:
232 case PTRACE_SINGLESTEP:
233 case PTRACE_DETACH:
234 case PTRACE_SYSCALL:
235 case PTRACE_SETOPTIONS:
212 return sys_ptrace(request, pid, addr, data); 236 return sys_ptrace(request, pid, addr, data);
213 237
238 default:
239 return -EINVAL;
240
214 case PTRACE_PEEKTEXT: 241 case PTRACE_PEEKTEXT:
215 case PTRACE_PEEKDATA: 242 case PTRACE_PEEKDATA:
216 case PTRACE_POKEDATA: 243 case PTRACE_POKEDATA:
@@ -225,10 +252,11 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
225 case PTRACE_GETFPXREGS: 252 case PTRACE_GETFPXREGS:
226 case PTRACE_GETEVENTMSG: 253 case PTRACE_GETEVENTMSG:
227 break; 254 break;
228 }
229 255
230 if (request == PTRACE_TRACEME) 256 case PTRACE_SETSIGINFO:
231 return ptrace_traceme(); 257 case PTRACE_GETSIGINFO:
258 return ptrace32_siginfo(request, pid, addr, data);
259 }
232 260
233 child = ptrace_get_task_struct(pid); 261 child = ptrace_get_task_struct(pid);
234 if (IS_ERR(child)) 262 if (IS_ERR(child))
@@ -349,8 +377,7 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
349 break; 377 break;
350 378
351 default: 379 default:
352 ret = -EINVAL; 380 BUG();
353 break;
354 } 381 }
355 382
356 out: 383 out:
diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c
index f182b20858e2..dc88154c412b 100644
--- a/arch/x86_64/ia32/sys_ia32.c
+++ b/arch/x86_64/ia32/sys_ia32.c
@@ -508,19 +508,6 @@ sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, int options)
508 return compat_sys_wait4(pid, stat_addr, options, NULL); 508 return compat_sys_wait4(pid, stat_addr, options, NULL);
509} 509}
510 510
511int sys32_ni_syscall(int call)
512{
513 struct task_struct *me = current;
514 static char lastcomm[sizeof(me->comm)];
515
516 if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
517 printk(KERN_INFO "IA32 syscall %d from %s not implemented\n",
518 call, me->comm);
519 strncpy(lastcomm, me->comm, sizeof(lastcomm));
520 }
521 return -ENOSYS;
522}
523
524/* 32-bit timeval and related flotsam. */ 511/* 32-bit timeval and related flotsam. */
525 512
526asmlinkage long 513asmlinkage long
@@ -916,7 +903,7 @@ long sys32_vm86_warning(void)
916 struct task_struct *me = current; 903 struct task_struct *me = current;
917 static char lastcomm[sizeof(me->comm)]; 904 static char lastcomm[sizeof(me->comm)];
918 if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) { 905 if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
919 printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n", 906 compat_printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
920 me->comm); 907 me->comm);
921 strncpy(lastcomm, me->comm, sizeof(lastcomm)); 908 strncpy(lastcomm, me->comm, sizeof(lastcomm));
922 } 909 }
@@ -929,13 +916,3 @@ long sys32_lookup_dcookie(u32 addr_low, u32 addr_high,
929 return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len); 916 return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len);
930} 917}
931 918
932static int __init ia32_init (void)
933{
934 printk("IA32 emulation $Id: sys_ia32.c,v 1.32 2002/03/24 13:02:28 ak Exp $\n");
935 return 0;
936}
937
938__initcall(ia32_init);
939
940extern unsigned long ia32_sys_call_table[];
941EXPORT_SYMBOL(ia32_sys_call_table);
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 059c88313f4e..aeb9c560be88 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ 8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
9 x8664_ksyms.o i387.o syscall.o vsyscall.o \ 9 x8664_ksyms.o i387.o syscall.o vsyscall.o \
10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ 10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
11 pci-dma.o pci-nommu.o 11 pci-dma.o pci-nommu.o alternative.o
12 12
13obj-$(CONFIG_X86_MCE) += mce.o 13obj-$(CONFIG_X86_MCE) += mce.o
14obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o 14obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
@@ -28,11 +28,13 @@ obj-$(CONFIG_PM) += suspend.o
28obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o 28obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o
29obj-$(CONFIG_CPU_FREQ) += cpufreq/ 29obj-$(CONFIG_CPU_FREQ) += cpufreq/
30obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 30obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
31obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o 31obj-$(CONFIG_IOMMU) += pci-gart.o aperture.o
32obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary.o tce.o
32obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o 33obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
33obj-$(CONFIG_KPROBES) += kprobes.o 34obj-$(CONFIG_KPROBES) += kprobes.o
34obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o 35obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
35obj-$(CONFIG_X86_VSMP) += vsmp.o 36obj-$(CONFIG_X86_VSMP) += vsmp.o
37obj-$(CONFIG_K8_NB) += k8.o
36 38
37obj-$(CONFIG_MODULES) += module.o 39obj-$(CONFIG_MODULES) += module.o
38 40
@@ -49,3 +51,5 @@ intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o
49quirks-y += ../../i386/kernel/quirks.o 51quirks-y += ../../i386/kernel/quirks.o
50i8237-y += ../../i386/kernel/i8237.o 52i8237-y += ../../i386/kernel/i8237.o
51msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o 53msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
54alternative-y += ../../i386/kernel/alternative.o
55
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index 70b9d21ed675..a195ef06ec55 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -8,7 +8,6 @@
8 * because only the bootmem allocator can allocate 32+MB. 8 * because only the bootmem allocator can allocate 32+MB.
9 * 9 *
10 * Copyright 2002 Andi Kleen, SuSE Labs. 10 * Copyright 2002 Andi Kleen, SuSE Labs.
11 * $Id: aperture.c,v 1.7 2003/08/01 03:36:18 ak Exp $
12 */ 11 */
13#include <linux/config.h> 12#include <linux/config.h>
14#include <linux/kernel.h> 13#include <linux/kernel.h>
@@ -24,6 +23,7 @@
24#include <asm/proto.h> 23#include <asm/proto.h>
25#include <asm/pci-direct.h> 24#include <asm/pci-direct.h>
26#include <asm/dma.h> 25#include <asm/dma.h>
26#include <asm/k8.h>
27 27
28int iommu_aperture; 28int iommu_aperture;
29int iommu_aperture_disabled __initdata = 0; 29int iommu_aperture_disabled __initdata = 0;
@@ -37,8 +37,6 @@ int fix_aperture __initdata = 1;
37/* This code runs before the PCI subsystem is initialized, so just 37/* This code runs before the PCI subsystem is initialized, so just
38 access the northbridge directly. */ 38 access the northbridge directly. */
39 39
40#define NB_ID_3 (PCI_VENDOR_ID_AMD | (0x1103<<16))
41
42static u32 __init allocate_aperture(void) 40static u32 __init allocate_aperture(void)
43{ 41{
44 pg_data_t *nd0 = NODE_DATA(0); 42 pg_data_t *nd0 = NODE_DATA(0);
@@ -68,20 +66,20 @@ static u32 __init allocate_aperture(void)
68 return (u32)__pa(p); 66 return (u32)__pa(p);
69} 67}
70 68
71static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size) 69static int __init aperture_valid(u64 aper_base, u32 aper_size)
72{ 70{
73 if (!aper_base) 71 if (!aper_base)
74 return 0; 72 return 0;
75 if (aper_size < 64*1024*1024) { 73 if (aper_size < 64*1024*1024) {
76 printk("Aperture from %s too small (%d MB)\n", name, aper_size>>20); 74 printk("Aperture too small (%d MB)\n", aper_size>>20);
77 return 0; 75 return 0;
78 } 76 }
79 if (aper_base + aper_size >= 0xffffffff) { 77 if (aper_base + aper_size >= 0xffffffff) {
80 printk("Aperture from %s beyond 4GB. Ignoring.\n",name); 78 printk("Aperture beyond 4GB. Ignoring.\n");
81 return 0; 79 return 0;
82 } 80 }
83 if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { 81 if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
84 printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name); 82 printk("Aperture pointing to e820 RAM. Ignoring.\n");
85 return 0; 83 return 0;
86 } 84 }
87 return 1; 85 return 1;
@@ -140,7 +138,7 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
140 printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", 138 printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n",
141 aper, 32 << *order, apsizereg); 139 aper, 32 << *order, apsizereg);
142 140
143 if (!aperture_valid("AGP bridge", aper, (32*1024*1024) << *order)) 141 if (!aperture_valid(aper, (32*1024*1024) << *order))
144 return 0; 142 return 0;
145 return (u32)aper; 143 return (u32)aper;
146} 144}
@@ -208,10 +206,10 @@ void __init iommu_hole_init(void)
208 206
209 fix = 0; 207 fix = 0;
210 for (num = 24; num < 32; num++) { 208 for (num = 24; num < 32; num++) {
211 char name[30]; 209 if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
212 if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) 210 continue;
213 continue;
214 211
212 iommu_detected = 1;
215 iommu_aperture = 1; 213 iommu_aperture = 1;
216 214
217 aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; 215 aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7;
@@ -222,9 +220,7 @@ void __init iommu_hole_init(void)
222 printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, 220 printk("CPU %d: aperture @ %Lx size %u MB\n", num-24,
223 aper_base, aper_size>>20); 221 aper_base, aper_size>>20);
224 222
225 sprintf(name, "northbridge cpu %d", num-24); 223 if (!aperture_valid(aper_base, aper_size)) {
226
227 if (!aperture_valid(name, aper_base, aper_size)) {
228 fix = 1; 224 fix = 1;
229 break; 225 break;
230 } 226 }
@@ -273,7 +269,7 @@ void __init iommu_hole_init(void)
273 269
274 /* Fix up the north bridges */ 270 /* Fix up the north bridges */
275 for (num = 24; num < 32; num++) { 271 for (num = 24; num < 32; num++) {
276 if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) 272 if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
277 continue; 273 continue;
278 274
279 /* Don't enable translation yet. That is done later. 275 /* Don't enable translation yet. That is done later.
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 29ef99001e05..b2ead91df218 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -100,7 +100,7 @@ void clear_local_APIC(void)
100 maxlvt = get_maxlvt(); 100 maxlvt = get_maxlvt();
101 101
102 /* 102 /*
103 * Masking an LVT entry on a P6 can trigger a local APIC error 103 * Masking an LVT entry can trigger a local APIC error
104 * if the vector is zero. Mask LVTERR first to prevent this. 104 * if the vector is zero. Mask LVTERR first to prevent this.
105 */ 105 */
106 if (maxlvt >= 3) { 106 if (maxlvt >= 3) {
@@ -851,7 +851,18 @@ void disable_APIC_timer(void)
851 unsigned long v; 851 unsigned long v;
852 852
853 v = apic_read(APIC_LVTT); 853 v = apic_read(APIC_LVTT);
854 apic_write(APIC_LVTT, v | APIC_LVT_MASKED); 854 /*
855 * When an illegal vector value (0-15) is written to an LVT
856 * entry and delivery mode is Fixed, the APIC may signal an
857 * illegal vector error, with out regard to whether the mask
858 * bit is set or whether an interrupt is actually seen on input.
859 *
860 * Boot sequence might call this function when the LVTT has
861 * '0' vector value. So make sure vector field is set to
862 * valid value.
863 */
864 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
865 apic_write(APIC_LVTT, v);
855 } 866 }
856} 867}
857 868
@@ -909,15 +920,13 @@ int setup_profiling_timer(unsigned int multiplier)
909 return -EINVAL; 920 return -EINVAL;
910} 921}
911 922
912#ifdef CONFIG_X86_MCE_AMD 923void setup_APIC_extened_lvt(unsigned char lvt_off, unsigned char vector,
913void setup_threshold_lvt(unsigned long lvt_off) 924 unsigned char msg_type, unsigned char mask)
914{ 925{
915 unsigned int v = 0; 926 unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE;
916 unsigned long reg = (lvt_off << 4) + 0x500; 927 unsigned int v = (mask << 16) | (msg_type << 8) | vector;
917 v |= THRESHOLD_APIC_VECTOR;
918 apic_write(reg, v); 928 apic_write(reg, v);
919} 929}
920#endif /* CONFIG_X86_MCE_AMD */
921 930
922#undef APIC_DIVISOR 931#undef APIC_DIVISOR
923 932
@@ -983,7 +992,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
983} 992}
984 993
985/* 994/*
986 * oem_force_hpet_timer -- force HPET mode for some boxes. 995 * apic_is_clustered_box() -- Check if we can expect good TSC
987 * 996 *
988 * Thus far, the major user of this is IBM's Summit2 series: 997 * Thus far, the major user of this is IBM's Summit2 series:
989 * 998 *
@@ -991,7 +1000,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
991 * multi-chassis. Use available data to take a good guess. 1000 * multi-chassis. Use available data to take a good guess.
992 * If in doubt, go HPET. 1001 * If in doubt, go HPET.
993 */ 1002 */
994__cpuinit int oem_force_hpet_timer(void) 1003__cpuinit int apic_is_clustered_box(void)
995{ 1004{
996 int i, clusters, zeros; 1005 int i, clusters, zeros;
997 unsigned id; 1006 unsigned id;
@@ -1022,8 +1031,7 @@ __cpuinit int oem_force_hpet_timer(void)
1022 } 1031 }
1023 1032
1024 /* 1033 /*
1025 * If clusters > 2, then should be multi-chassis. Return 1 for HPET. 1034 * If clusters > 2, then should be multi-chassis.
1026 * Else return 0 to use TSC.
1027 * May have to revisit this when multi-core + hyperthreaded CPUs come 1035 * May have to revisit this when multi-core + hyperthreaded CPUs come
1028 * out, but AFAIK this will work even for them. 1036 * out, but AFAIK this will work even for them.
1029 */ 1037 */
diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c
index 4e6c3b729e39..8ca04912b1cc 100644
--- a/arch/x86_64/kernel/crash.c
+++ b/arch/x86_64/kernel/crash.c
@@ -111,14 +111,14 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
111 atomic_dec(&waiting_for_crash_ipi); 111 atomic_dec(&waiting_for_crash_ipi);
112 /* Assume hlt works */ 112 /* Assume hlt works */
113 for(;;) 113 for(;;)
114 asm("hlt"); 114 halt();
115 115
116 return 1; 116 return 1;
117} 117}
118 118
119static void smp_send_nmi_allbutself(void) 119static void smp_send_nmi_allbutself(void)
120{ 120{
121 send_IPI_allbutself(APIC_DM_NMI); 121 send_IPI_allbutself(NMI_VECTOR);
122} 122}
123 123
124/* 124/*
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index 1ef6028f721e..9e94d834624b 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -1,7 +1,6 @@
1/* 1/*
2 * Handle the memory map. 2 * Handle the memory map.
3 * The functions here do the job until bootmem takes over. 3 * The functions here do the job until bootmem takes over.
4 * $Id: e820.c,v 1.4 2002/09/19 19:25:32 ak Exp $
5 * 4 *
6 * Getting sanitize_e820_map() in sync with i386 version by applying change: 5 * Getting sanitize_e820_map() in sync with i386 version by applying change:
7 * - Provisions for empty E820 memory regions (reported by certain BIOSes). 6 * - Provisions for empty E820 memory regions (reported by certain BIOSes).
@@ -621,6 +620,7 @@ void __init parse_memmapopt(char *p, char **from)
621} 620}
622 621
623unsigned long pci_mem_start = 0xaeedbabe; 622unsigned long pci_mem_start = 0xaeedbabe;
623EXPORT_SYMBOL(pci_mem_start);
624 624
625/* 625/*
626 * Search for the biggest gap in the low 32 bits of the e820 626 * Search for the biggest gap in the low 32 bits of the e820
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 586b34c00c48..7290e72b9a34 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -154,6 +154,7 @@ rff_trace:
154 GET_THREAD_INFO(%rcx) 154 GET_THREAD_INFO(%rcx)
155 jmp rff_action 155 jmp rff_action
156 CFI_ENDPROC 156 CFI_ENDPROC
157END(ret_from_fork)
157 158
158/* 159/*
159 * System call entry. Upto 6 arguments in registers are supported. 160 * System call entry. Upto 6 arguments in registers are supported.
@@ -188,7 +189,7 @@ rff_trace:
188 189
189ENTRY(system_call) 190ENTRY(system_call)
190 CFI_STARTPROC simple 191 CFI_STARTPROC simple
191 CFI_DEF_CFA rsp,0 192 CFI_DEF_CFA rsp,PDA_STACKOFFSET
192 CFI_REGISTER rip,rcx 193 CFI_REGISTER rip,rcx
193 /*CFI_REGISTER rflags,r11*/ 194 /*CFI_REGISTER rflags,r11*/
194 swapgs 195 swapgs
@@ -285,6 +286,7 @@ tracesys:
285 /* Use IRET because user could have changed frame */ 286 /* Use IRET because user could have changed frame */
286 jmp int_ret_from_sys_call 287 jmp int_ret_from_sys_call
287 CFI_ENDPROC 288 CFI_ENDPROC
289END(system_call)
288 290
289/* 291/*
290 * Syscall return path ending with IRET. 292 * Syscall return path ending with IRET.
@@ -364,6 +366,7 @@ int_restore_rest:
364 cli 366 cli
365 jmp int_with_check 367 jmp int_with_check
366 CFI_ENDPROC 368 CFI_ENDPROC
369END(int_ret_from_sys_call)
367 370
368/* 371/*
369 * Certain special system calls that need to save a complete full stack frame. 372 * Certain special system calls that need to save a complete full stack frame.
@@ -375,6 +378,7 @@ int_restore_rest:
375 leaq \func(%rip),%rax 378 leaq \func(%rip),%rax
376 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ 379 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
377 jmp ptregscall_common 380 jmp ptregscall_common
381END(\label)
378 .endm 382 .endm
379 383
380 CFI_STARTPROC 384 CFI_STARTPROC
@@ -404,6 +408,7 @@ ENTRY(ptregscall_common)
404 CFI_REL_OFFSET rip, 0 408 CFI_REL_OFFSET rip, 0
405 ret 409 ret
406 CFI_ENDPROC 410 CFI_ENDPROC
411END(ptregscall_common)
407 412
408ENTRY(stub_execve) 413ENTRY(stub_execve)
409 CFI_STARTPROC 414 CFI_STARTPROC
@@ -418,6 +423,7 @@ ENTRY(stub_execve)
418 RESTORE_REST 423 RESTORE_REST
419 jmp int_ret_from_sys_call 424 jmp int_ret_from_sys_call
420 CFI_ENDPROC 425 CFI_ENDPROC
426END(stub_execve)
421 427
422/* 428/*
423 * sigreturn is special because it needs to restore all registers on return. 429 * sigreturn is special because it needs to restore all registers on return.
@@ -435,6 +441,7 @@ ENTRY(stub_rt_sigreturn)
435 RESTORE_REST 441 RESTORE_REST
436 jmp int_ret_from_sys_call 442 jmp int_ret_from_sys_call
437 CFI_ENDPROC 443 CFI_ENDPROC
444END(stub_rt_sigreturn)
438 445
439/* 446/*
440 * initial frame state for interrupts and exceptions 447 * initial frame state for interrupts and exceptions
@@ -466,29 +473,18 @@ ENTRY(stub_rt_sigreturn)
466/* 0(%rsp): interrupt number */ 473/* 0(%rsp): interrupt number */
467 .macro interrupt func 474 .macro interrupt func
468 cld 475 cld
469#ifdef CONFIG_DEBUG_INFO
470 SAVE_ALL
471 movq %rsp,%rdi
472 /*
473 * Setup a stack frame pointer. This allows gdb to trace
474 * back to the original stack.
475 */
476 movq %rsp,%rbp
477 CFI_DEF_CFA_REGISTER rbp
478#else
479 SAVE_ARGS 476 SAVE_ARGS
480 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler 477 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
481#endif 478 pushq %rbp
479 CFI_ADJUST_CFA_OFFSET 8
480 CFI_REL_OFFSET rbp, 0
481 movq %rsp,%rbp
482 CFI_DEF_CFA_REGISTER rbp
482 testl $3,CS(%rdi) 483 testl $3,CS(%rdi)
483 je 1f 484 je 1f
484 swapgs 485 swapgs
4851: incl %gs:pda_irqcount # RED-PEN should check preempt count 4861: incl %gs:pda_irqcount # RED-PEN should check preempt count
486 movq %gs:pda_irqstackptr,%rax 487 cmoveq %gs:pda_irqstackptr,%rsp
487 cmoveq %rax,%rsp /*todo This needs CFI annotation! */
488 pushq %rdi # save old stack
489#ifndef CONFIG_DEBUG_INFO
490 CFI_ADJUST_CFA_OFFSET 8
491#endif
492 call \func 488 call \func
493 .endm 489 .endm
494 490
@@ -497,17 +493,11 @@ ENTRY(common_interrupt)
497 interrupt do_IRQ 493 interrupt do_IRQ
498 /* 0(%rsp): oldrsp-ARGOFFSET */ 494 /* 0(%rsp): oldrsp-ARGOFFSET */
499ret_from_intr: 495ret_from_intr:
500 popq %rdi
501#ifndef CONFIG_DEBUG_INFO
502 CFI_ADJUST_CFA_OFFSET -8
503#endif
504 cli 496 cli
505 decl %gs:pda_irqcount 497 decl %gs:pda_irqcount
506#ifdef CONFIG_DEBUG_INFO 498 leaveq
507 movq RBP(%rdi),%rbp
508 CFI_DEF_CFA_REGISTER rsp 499 CFI_DEF_CFA_REGISTER rsp
509#endif 500 CFI_ADJUST_CFA_OFFSET -8
510 leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */
511exit_intr: 501exit_intr:
512 GET_THREAD_INFO(%rcx) 502 GET_THREAD_INFO(%rcx)
513 testl $3,CS-ARGOFFSET(%rsp) 503 testl $3,CS-ARGOFFSET(%rsp)
@@ -589,7 +579,9 @@ retint_kernel:
589 call preempt_schedule_irq 579 call preempt_schedule_irq
590 jmp exit_intr 580 jmp exit_intr
591#endif 581#endif
582
592 CFI_ENDPROC 583 CFI_ENDPROC
584END(common_interrupt)
593 585
594/* 586/*
595 * APIC interrupts. 587 * APIC interrupts.
@@ -605,17 +597,21 @@ retint_kernel:
605 597
606ENTRY(thermal_interrupt) 598ENTRY(thermal_interrupt)
607 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt 599 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
600END(thermal_interrupt)
608 601
609ENTRY(threshold_interrupt) 602ENTRY(threshold_interrupt)
610 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt 603 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
604END(threshold_interrupt)
611 605
612#ifdef CONFIG_SMP 606#ifdef CONFIG_SMP
613ENTRY(reschedule_interrupt) 607ENTRY(reschedule_interrupt)
614 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt 608 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
609END(reschedule_interrupt)
615 610
616 .macro INVALIDATE_ENTRY num 611 .macro INVALIDATE_ENTRY num
617ENTRY(invalidate_interrupt\num) 612ENTRY(invalidate_interrupt\num)
618 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt 613 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
614END(invalidate_interrupt\num)
619 .endm 615 .endm
620 616
621 INVALIDATE_ENTRY 0 617 INVALIDATE_ENTRY 0
@@ -629,17 +625,21 @@ ENTRY(invalidate_interrupt\num)
629 625
630ENTRY(call_function_interrupt) 626ENTRY(call_function_interrupt)
631 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt 627 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
628END(call_function_interrupt)
632#endif 629#endif
633 630
634#ifdef CONFIG_X86_LOCAL_APIC 631#ifdef CONFIG_X86_LOCAL_APIC
635ENTRY(apic_timer_interrupt) 632ENTRY(apic_timer_interrupt)
636 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt 633 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
634END(apic_timer_interrupt)
637 635
638ENTRY(error_interrupt) 636ENTRY(error_interrupt)
639 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt 637 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
638END(error_interrupt)
640 639
641ENTRY(spurious_interrupt) 640ENTRY(spurious_interrupt)
642 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt 641 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
642END(spurious_interrupt)
643#endif 643#endif
644 644
645/* 645/*
@@ -777,6 +777,7 @@ error_kernelspace:
777 cmpq $gs_change,RIP(%rsp) 777 cmpq $gs_change,RIP(%rsp)
778 je error_swapgs 778 je error_swapgs
779 jmp error_sti 779 jmp error_sti
780END(error_entry)
780 781
781 /* Reload gs selector with exception handling */ 782 /* Reload gs selector with exception handling */
782 /* edi: new selector */ 783 /* edi: new selector */
@@ -794,6 +795,7 @@ gs_change:
794 CFI_ADJUST_CFA_OFFSET -8 795 CFI_ADJUST_CFA_OFFSET -8
795 ret 796 ret
796 CFI_ENDPROC 797 CFI_ENDPROC
798ENDPROC(load_gs_index)
797 799
798 .section __ex_table,"a" 800 .section __ex_table,"a"
799 .align 8 801 .align 8
@@ -847,7 +849,7 @@ ENTRY(kernel_thread)
847 UNFAKE_STACK_FRAME 849 UNFAKE_STACK_FRAME
848 ret 850 ret
849 CFI_ENDPROC 851 CFI_ENDPROC
850 852ENDPROC(kernel_thread)
851 853
852child_rip: 854child_rip:
853 /* 855 /*
@@ -860,6 +862,7 @@ child_rip:
860 # exit 862 # exit
861 xorl %edi, %edi 863 xorl %edi, %edi
862 call do_exit 864 call do_exit
865ENDPROC(child_rip)
863 866
864/* 867/*
865 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. 868 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
@@ -889,19 +892,24 @@ ENTRY(execve)
889 UNFAKE_STACK_FRAME 892 UNFAKE_STACK_FRAME
890 ret 893 ret
891 CFI_ENDPROC 894 CFI_ENDPROC
895ENDPROC(execve)
892 896
893KPROBE_ENTRY(page_fault) 897KPROBE_ENTRY(page_fault)
894 errorentry do_page_fault 898 errorentry do_page_fault
899END(page_fault)
895 .previous .text 900 .previous .text
896 901
897ENTRY(coprocessor_error) 902ENTRY(coprocessor_error)
898 zeroentry do_coprocessor_error 903 zeroentry do_coprocessor_error
904END(coprocessor_error)
899 905
900ENTRY(simd_coprocessor_error) 906ENTRY(simd_coprocessor_error)
901 zeroentry do_simd_coprocessor_error 907 zeroentry do_simd_coprocessor_error
908END(simd_coprocessor_error)
902 909
903ENTRY(device_not_available) 910ENTRY(device_not_available)
904 zeroentry math_state_restore 911 zeroentry math_state_restore
912END(device_not_available)
905 913
906 /* runs on exception stack */ 914 /* runs on exception stack */
907KPROBE_ENTRY(debug) 915KPROBE_ENTRY(debug)
@@ -911,6 +919,7 @@ KPROBE_ENTRY(debug)
911 paranoidentry do_debug, DEBUG_STACK 919 paranoidentry do_debug, DEBUG_STACK
912 jmp paranoid_exit 920 jmp paranoid_exit
913 CFI_ENDPROC 921 CFI_ENDPROC
922END(debug)
914 .previous .text 923 .previous .text
915 924
916 /* runs on exception stack */ 925 /* runs on exception stack */
@@ -961,6 +970,7 @@ paranoid_schedule:
961 cli 970 cli
962 jmp paranoid_userspace 971 jmp paranoid_userspace
963 CFI_ENDPROC 972 CFI_ENDPROC
973END(nmi)
964 .previous .text 974 .previous .text
965 975
966KPROBE_ENTRY(int3) 976KPROBE_ENTRY(int3)
@@ -970,22 +980,28 @@ KPROBE_ENTRY(int3)
970 paranoidentry do_int3, DEBUG_STACK 980 paranoidentry do_int3, DEBUG_STACK
971 jmp paranoid_exit 981 jmp paranoid_exit
972 CFI_ENDPROC 982 CFI_ENDPROC
983END(int3)
973 .previous .text 984 .previous .text
974 985
975ENTRY(overflow) 986ENTRY(overflow)
976 zeroentry do_overflow 987 zeroentry do_overflow
988END(overflow)
977 989
978ENTRY(bounds) 990ENTRY(bounds)
979 zeroentry do_bounds 991 zeroentry do_bounds
992END(bounds)
980 993
981ENTRY(invalid_op) 994ENTRY(invalid_op)
982 zeroentry do_invalid_op 995 zeroentry do_invalid_op
996END(invalid_op)
983 997
984ENTRY(coprocessor_segment_overrun) 998ENTRY(coprocessor_segment_overrun)
985 zeroentry do_coprocessor_segment_overrun 999 zeroentry do_coprocessor_segment_overrun
1000END(coprocessor_segment_overrun)
986 1001
987ENTRY(reserved) 1002ENTRY(reserved)
988 zeroentry do_reserved 1003 zeroentry do_reserved
1004END(reserved)
989 1005
990 /* runs on exception stack */ 1006 /* runs on exception stack */
991ENTRY(double_fault) 1007ENTRY(double_fault)
@@ -993,12 +1009,15 @@ ENTRY(double_fault)
993 paranoidentry do_double_fault 1009 paranoidentry do_double_fault
994 jmp paranoid_exit 1010 jmp paranoid_exit
995 CFI_ENDPROC 1011 CFI_ENDPROC
1012END(double_fault)
996 1013
997ENTRY(invalid_TSS) 1014ENTRY(invalid_TSS)
998 errorentry do_invalid_TSS 1015 errorentry do_invalid_TSS
1016END(invalid_TSS)
999 1017
1000ENTRY(segment_not_present) 1018ENTRY(segment_not_present)
1001 errorentry do_segment_not_present 1019 errorentry do_segment_not_present
1020END(segment_not_present)
1002 1021
1003 /* runs on exception stack */ 1022 /* runs on exception stack */
1004ENTRY(stack_segment) 1023ENTRY(stack_segment)
@@ -1006,19 +1025,24 @@ ENTRY(stack_segment)
1006 paranoidentry do_stack_segment 1025 paranoidentry do_stack_segment
1007 jmp paranoid_exit 1026 jmp paranoid_exit
1008 CFI_ENDPROC 1027 CFI_ENDPROC
1028END(stack_segment)
1009 1029
1010KPROBE_ENTRY(general_protection) 1030KPROBE_ENTRY(general_protection)
1011 errorentry do_general_protection 1031 errorentry do_general_protection
1032END(general_protection)
1012 .previous .text 1033 .previous .text
1013 1034
1014ENTRY(alignment_check) 1035ENTRY(alignment_check)
1015 errorentry do_alignment_check 1036 errorentry do_alignment_check
1037END(alignment_check)
1016 1038
1017ENTRY(divide_error) 1039ENTRY(divide_error)
1018 zeroentry do_divide_error 1040 zeroentry do_divide_error
1041END(divide_error)
1019 1042
1020ENTRY(spurious_interrupt_bug) 1043ENTRY(spurious_interrupt_bug)
1021 zeroentry do_spurious_interrupt_bug 1044 zeroentry do_spurious_interrupt_bug
1045END(spurious_interrupt_bug)
1022 1046
1023#ifdef CONFIG_X86_MCE 1047#ifdef CONFIG_X86_MCE
1024 /* runs on exception stack */ 1048 /* runs on exception stack */
@@ -1029,6 +1053,7 @@ ENTRY(machine_check)
1029 paranoidentry do_machine_check 1053 paranoidentry do_machine_check
1030 jmp paranoid_exit 1054 jmp paranoid_exit
1031 CFI_ENDPROC 1055 CFI_ENDPROC
1056END(machine_check)
1032#endif 1057#endif
1033 1058
1034ENTRY(call_softirq) 1059ENTRY(call_softirq)
@@ -1046,3 +1071,37 @@ ENTRY(call_softirq)
1046 decl %gs:pda_irqcount 1071 decl %gs:pda_irqcount
1047 ret 1072 ret
1048 CFI_ENDPROC 1073 CFI_ENDPROC
1074ENDPROC(call_softirq)
1075
1076#ifdef CONFIG_STACK_UNWIND
1077ENTRY(arch_unwind_init_running)
1078 CFI_STARTPROC
1079 movq %r15, R15(%rdi)
1080 movq %r14, R14(%rdi)
1081 xchgq %rsi, %rdx
1082 movq %r13, R13(%rdi)
1083 movq %r12, R12(%rdi)
1084 xorl %eax, %eax
1085 movq %rbp, RBP(%rdi)
1086 movq %rbx, RBX(%rdi)
1087 movq (%rsp), %rcx
1088 movq %rax, R11(%rdi)
1089 movq %rax, R10(%rdi)
1090 movq %rax, R9(%rdi)
1091 movq %rax, R8(%rdi)
1092 movq %rax, RAX(%rdi)
1093 movq %rax, RCX(%rdi)
1094 movq %rax, RDX(%rdi)
1095 movq %rax, RSI(%rdi)
1096 movq %rax, RDI(%rdi)
1097 movq %rax, ORIG_RAX(%rdi)
1098 movq %rcx, RIP(%rdi)
1099 leaq 8(%rsp), %rcx
1100 movq $__KERNEL_CS, CS(%rdi)
1101 movq %rax, EFLAGS(%rdi)
1102 movq %rcx, RSP(%rdi)
1103 movq $__KERNEL_DS, SS(%rdi)
1104 jmpq *%rdx
1105 CFI_ENDPROC
1106ENDPROC(arch_unwind_init_running)
1107#endif
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c
index 1a2ab825be98..21c7066e236a 100644
--- a/arch/x86_64/kernel/genapic_flat.c
+++ b/arch/x86_64/kernel/genapic_flat.c
@@ -78,22 +78,29 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
78 78
79static void flat_send_IPI_allbutself(int vector) 79static void flat_send_IPI_allbutself(int vector)
80{ 80{
81#ifndef CONFIG_HOTPLUG_CPU 81#ifdef CONFIG_HOTPLUG_CPU
82 if (((num_online_cpus()) - 1) >= 1) 82 int hotplug = 1;
83 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
84#else 83#else
85 cpumask_t allbutme = cpu_online_map; 84 int hotplug = 0;
85#endif
86 if (hotplug || vector == NMI_VECTOR) {
87 cpumask_t allbutme = cpu_online_map;
86 88
87 cpu_clear(smp_processor_id(), allbutme); 89 cpu_clear(smp_processor_id(), allbutme);
88 90
89 if (!cpus_empty(allbutme)) 91 if (!cpus_empty(allbutme))
90 flat_send_IPI_mask(allbutme, vector); 92 flat_send_IPI_mask(allbutme, vector);
91#endif 93 } else if (num_online_cpus() > 1) {
94 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
95 }
92} 96}
93 97
94static void flat_send_IPI_all(int vector) 98static void flat_send_IPI_all(int vector)
95{ 99{
96 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 100 if (vector == NMI_VECTOR)
101 flat_send_IPI_mask(cpu_online_map, vector);
102 else
103 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
97} 104}
98 105
99static int flat_apic_id_registered(void) 106static int flat_apic_id_registered(void)
@@ -108,10 +115,7 @@ static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
108 115
109static unsigned int phys_pkg_id(int index_msb) 116static unsigned int phys_pkg_id(int index_msb)
110{ 117{
111 u32 ebx; 118 return hard_smp_processor_id() >> index_msb;
112
113 ebx = cpuid_ebx(1);
114 return ((ebx >> 24) & 0xFF) >> index_msb;
115} 119}
116 120
117struct genapic apic_flat = { 121struct genapic apic_flat = {
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index cea20a66c150..e6a71c9556d9 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -2,8 +2,6 @@
2 * linux/arch/x86_64/kernel/head64.c -- prepare to run common code 2 * linux/arch/x86_64/kernel/head64.c -- prepare to run common code
3 * 3 *
4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5 *
6 * $Id: head64.c,v 1.22 2001/07/06 14:28:20 ak Exp $
7 */ 5 */
8 6
9#include <linux/init.h> 7#include <linux/init.h>
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index 5ecd34ab8c2b..9b1a4e147321 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -44,11 +44,11 @@
44 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ 44 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
45 BI(x,c) BI(x,d) BI(x,e) BI(x,f) 45 BI(x,c) BI(x,d) BI(x,e) BI(x,f)
46 46
47#define BUILD_14_IRQS(x) \ 47#define BUILD_15_IRQS(x) \
48 BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ 48 BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
49 BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ 49 BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
50 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ 50 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
51 BI(x,c) BI(x,d) 51 BI(x,c) BI(x,d) BI(x,e)
52 52
53/* 53/*
54 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: 54 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
@@ -73,13 +73,13 @@ BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
73BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) 73BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
74 74
75#ifdef CONFIG_PCI_MSI 75#ifdef CONFIG_PCI_MSI
76 BUILD_14_IRQS(0xe) 76 BUILD_15_IRQS(0xe)
77#endif 77#endif
78 78
79#endif 79#endif
80 80
81#undef BUILD_16_IRQS 81#undef BUILD_16_IRQS
82#undef BUILD_14_IRQS 82#undef BUILD_15_IRQS
83#undef BI 83#undef BI
84 84
85 85
@@ -92,11 +92,11 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
92 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ 92 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
93 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) 93 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
94 94
95#define IRQLIST_14(x) \ 95#define IRQLIST_15(x) \
96 IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ 96 IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
97 IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ 97 IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
98 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ 98 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
99 IRQ(x,c), IRQ(x,d) 99 IRQ(x,c), IRQ(x,d), IRQ(x,e)
100 100
101void (*interrupt[NR_IRQS])(void) = { 101void (*interrupt[NR_IRQS])(void) = {
102 IRQLIST_16(0x0), 102 IRQLIST_16(0x0),
@@ -108,7 +108,7 @@ void (*interrupt[NR_IRQS])(void) = {
108 IRQLIST_16(0xc), IRQLIST_16(0xd) 108 IRQLIST_16(0xc), IRQLIST_16(0xd)
109 109
110#ifdef CONFIG_PCI_MSI 110#ifdef CONFIG_PCI_MSI
111 , IRQLIST_14(0xe) 111 , IRQLIST_15(0xe)
112#endif 112#endif
113 113
114#endif 114#endif
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 9cc7031b7151..c768d8a036d0 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -41,6 +41,7 @@
41#include <asm/mach_apic.h> 41#include <asm/mach_apic.h>
42#include <asm/acpi.h> 42#include <asm/acpi.h>
43#include <asm/dma.h> 43#include <asm/dma.h>
44#include <asm/nmi.h>
44 45
45#define __apicdebuginit __init 46#define __apicdebuginit __init
46 47
@@ -56,6 +57,7 @@ int timer_over_8254 __initdata = 0;
56static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; 57static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
57 58
58static DEFINE_SPINLOCK(ioapic_lock); 59static DEFINE_SPINLOCK(ioapic_lock);
60static DEFINE_SPINLOCK(vector_lock);
59 61
60/* 62/*
61 * # of IRQ routing registers 63 * # of IRQ routing registers
@@ -317,7 +319,7 @@ void __init check_ioapic(void)
317 vendor &= 0xffff; 319 vendor &= 0xffff;
318 switch (vendor) { 320 switch (vendor) {
319 case PCI_VENDOR_ID_VIA: 321 case PCI_VENDOR_ID_VIA:
320#ifdef CONFIG_GART_IOMMU 322#ifdef CONFIG_IOMMU
321 if ((end_pfn > MAX_DMA32_PFN || 323 if ((end_pfn > MAX_DMA32_PFN ||
322 force_iommu) && 324 force_iommu) &&
323 !iommu_aperture_allowed) { 325 !iommu_aperture_allowed) {
@@ -834,10 +836,17 @@ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
834int assign_irq_vector(int irq) 836int assign_irq_vector(int irq)
835{ 837{
836 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; 838 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
839 unsigned long flags;
840 int vector;
837 841
838 BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); 842 BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
839 if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) 843
844 spin_lock_irqsave(&vector_lock, flags);
845
846 if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
847 spin_unlock_irqrestore(&vector_lock, flags);
840 return IO_APIC_VECTOR(irq); 848 return IO_APIC_VECTOR(irq);
849 }
841next: 850next:
842 current_vector += 8; 851 current_vector += 8;
843 if (current_vector == IA32_SYSCALL_VECTOR) 852 if (current_vector == IA32_SYSCALL_VECTOR)
@@ -849,11 +858,14 @@ next:
849 current_vector = FIRST_DEVICE_VECTOR + offset; 858 current_vector = FIRST_DEVICE_VECTOR + offset;
850 } 859 }
851 860
852 vector_irq[current_vector] = irq; 861 vector = current_vector;
862 vector_irq[vector] = irq;
853 if (irq != AUTO_ASSIGN) 863 if (irq != AUTO_ASSIGN)
854 IO_APIC_VECTOR(irq) = current_vector; 864 IO_APIC_VECTOR(irq) = vector;
865
866 spin_unlock_irqrestore(&vector_lock, flags);
855 867
856 return current_vector; 868 return vector;
857} 869}
858 870
859extern void (*interrupt[NR_IRQS])(void); 871extern void (*interrupt[NR_IRQS])(void);
@@ -866,21 +878,14 @@ static struct hw_interrupt_type ioapic_edge_type;
866 878
867static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) 879static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
868{ 880{
869 if (use_pci_vector() && !platform_legacy_irq(irq)) { 881 unsigned idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
870 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 882
871 trigger == IOAPIC_LEVEL) 883 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
872 irq_desc[vector].handler = &ioapic_level_type; 884 trigger == IOAPIC_LEVEL)
873 else 885 irq_desc[idx].handler = &ioapic_level_type;
874 irq_desc[vector].handler = &ioapic_edge_type; 886 else
875 set_intr_gate(vector, interrupt[vector]); 887 irq_desc[idx].handler = &ioapic_edge_type;
876 } else { 888 set_intr_gate(vector, interrupt[idx]);
877 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
878 trigger == IOAPIC_LEVEL)
879 irq_desc[irq].handler = &ioapic_level_type;
880 else
881 irq_desc[irq].handler = &ioapic_edge_type;
882 set_intr_gate(vector, interrupt[irq]);
883 }
884} 889}
885 890
886static void __init setup_IO_APIC_irqs(void) 891static void __init setup_IO_APIC_irqs(void)
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index d8bd0b345b1e..59518d4d4358 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -26,6 +26,30 @@ atomic_t irq_mis_count;
26#endif 26#endif
27#endif 27#endif
28 28
29#ifdef CONFIG_DEBUG_STACKOVERFLOW
30/*
31 * Probabilistic stack overflow check:
32 *
33 * Only check the stack in process context, because everything else
34 * runs on the big interrupt stacks. Checking reliably is too expensive,
35 * so we just check from interrupts.
36 */
37static inline void stack_overflow_check(struct pt_regs *regs)
38{
39 u64 curbase = (u64) current->thread_info;
40 static unsigned long warned = -60*HZ;
41
42 if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE &&
43 regs->rsp < curbase + sizeof(struct thread_info) + 128 &&
44 time_after(jiffies, warned + 60*HZ)) {
45 printk("do_IRQ: %s near stack overflow (cur:%Lx,rsp:%lx)\n",
46 current->comm, curbase, regs->rsp);
47 show_stack(NULL,NULL);
48 warned = jiffies;
49 }
50}
51#endif
52
29/* 53/*
30 * Generic, controller-independent functions: 54 * Generic, controller-independent functions:
31 */ 55 */
@@ -39,7 +63,7 @@ int show_interrupts(struct seq_file *p, void *v)
39 if (i == 0) { 63 if (i == 0) {
40 seq_printf(p, " "); 64 seq_printf(p, " ");
41 for_each_online_cpu(j) 65 for_each_online_cpu(j)
42 seq_printf(p, "CPU%d ",j); 66 seq_printf(p, "CPU%-8d",j);
43 seq_putc(p, '\n'); 67 seq_putc(p, '\n');
44 } 68 }
45 69
@@ -96,7 +120,9 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
96 120
97 exit_idle(); 121 exit_idle();
98 irq_enter(); 122 irq_enter();
99 123#ifdef CONFIG_DEBUG_STACKOVERFLOW
124 stack_overflow_check(regs);
125#endif
100 __do_IRQ(irq, regs); 126 __do_IRQ(irq, regs);
101 irq_exit(); 127 irq_exit();
102 128
diff --git a/arch/x86_64/kernel/k8.c b/arch/x86_64/kernel/k8.c
new file mode 100644
index 000000000000..6416682d33d0
--- /dev/null
+++ b/arch/x86_64/kernel/k8.c
@@ -0,0 +1,118 @@
1/*
2 * Shared support code for AMD K8 northbridges and derivates.
3 * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.
4 */
5#include <linux/gfp.h>
6#include <linux/types.h>
7#include <linux/init.h>
8#include <linux/errno.h>
9#include <linux/module.h>
10#include <linux/spinlock.h>
11#include <asm/k8.h>
12
13int num_k8_northbridges;
14EXPORT_SYMBOL(num_k8_northbridges);
15
16static u32 *flush_words;
17
18struct pci_device_id k8_nb_ids[] = {
19 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
20 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
21 {}
22};
23EXPORT_SYMBOL(k8_nb_ids);
24
25struct pci_dev **k8_northbridges;
26EXPORT_SYMBOL(k8_northbridges);
27
28static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
29{
30 do {
31 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
32 if (!dev)
33 break;
34 } while (!pci_match_id(&k8_nb_ids[0], dev));
35 return dev;
36}
37
38int cache_k8_northbridges(void)
39{
40 int i;
41 struct pci_dev *dev;
42 if (num_k8_northbridges)
43 return 0;
44
45 num_k8_northbridges = 0;
46 dev = NULL;
47 while ((dev = next_k8_northbridge(dev)) != NULL)
48 num_k8_northbridges++;
49
50 k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *),
51 GFP_KERNEL);
52 if (!k8_northbridges)
53 return -ENOMEM;
54
55 flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL);
56 if (!flush_words) {
57 kfree(k8_northbridges);
58 return -ENOMEM;
59 }
60
61 dev = NULL;
62 i = 0;
63 while ((dev = next_k8_northbridge(dev)) != NULL) {
64 k8_northbridges[i++] = dev;
65 pci_read_config_dword(dev, 0x9c, &flush_words[i]);
66 }
67 k8_northbridges[i] = NULL;
68 return 0;
69}
70EXPORT_SYMBOL_GPL(cache_k8_northbridges);
71
72/* Ignores subdevice/subvendor but as far as I can figure out
73 they're useless anyways */
74int __init early_is_k8_nb(u32 device)
75{
76 struct pci_device_id *id;
77 u32 vendor = device & 0xffff;
78 device >>= 16;
79 for (id = k8_nb_ids; id->vendor; id++)
80 if (vendor == id->vendor && device == id->device)
81 return 1;
82 return 0;
83}
84
85void k8_flush_garts(void)
86{
87 int flushed, i;
88 unsigned long flags;
89 static DEFINE_SPINLOCK(gart_lock);
90
91 /* Avoid races between AGP and IOMMU. In theory it's not needed
92 but I'm not sure if the hardware won't lose flush requests
93 when another is pending. This whole thing is so expensive anyways
94 that it doesn't matter to serialize more. -AK */
95 spin_lock_irqsave(&gart_lock, flags);
96 flushed = 0;
97 for (i = 0; i < num_k8_northbridges; i++) {
98 pci_write_config_dword(k8_northbridges[i], 0x9c,
99 flush_words[i]|1);
100 flushed++;
101 }
102 for (i = 0; i < num_k8_northbridges; i++) {
103 u32 w;
104 /* Make sure the hardware actually executed the flush*/
105 for (;;) {
106 pci_read_config_dword(k8_northbridges[i],
107 0x9c, &w);
108 if (!(w & 1))
109 break;
110 cpu_relax();
111 }
112 }
113 spin_unlock_irqrestore(&gart_lock, flags);
114 if (!flushed)
115 printk("nothing to flush?\n");
116}
117EXPORT_SYMBOL_GPL(k8_flush_garts);
118
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index c69fc43cee7b..acd5816b1a6f 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -562,7 +562,7 @@ static struct sysdev_class mce_sysclass = {
562 set_kset_name("machinecheck"), 562 set_kset_name("machinecheck"),
563}; 563};
564 564
565static DEFINE_PER_CPU(struct sys_device, device_mce); 565DEFINE_PER_CPU(struct sys_device, device_mce);
566 566
567/* Why are there no generic functions for this? */ 567/* Why are there no generic functions for this? */
568#define ACCESSOR(name, var, start) \ 568#define ACCESSOR(name, var, start) \
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c
index d13b241ad094..335200aa2737 100644
--- a/arch/x86_64/kernel/mce_amd.c
+++ b/arch/x86_64/kernel/mce_amd.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * (c) 2005 Advanced Micro Devices, Inc. 2 * (c) 2005, 2006 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the 3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or 4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html 5 * http://www.gnu.org/licenses/gpl.html
@@ -8,9 +8,10 @@
8 * 8 *
9 * Support : jacob.shin@amd.com 9 * Support : jacob.shin@amd.com
10 * 10 *
11 * MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F. 11 * April 2006
12 * MC4_MISC0 exists per physical processor. 12 * - added support for AMD Family 0x10 processors
13 * 13 *
14 * All MC4_MISCi registers are shared between multi-cores
14 */ 15 */
15 16
16#include <linux/cpu.h> 17#include <linux/cpu.h>
@@ -29,32 +30,45 @@
29#include <asm/percpu.h> 30#include <asm/percpu.h>
30#include <asm/idle.h> 31#include <asm/idle.h>
31 32
32#define PFX "mce_threshold: " 33#define PFX "mce_threshold: "
33#define VERSION "version 1.00.9" 34#define VERSION "version 1.1.1"
34#define NR_BANKS 5 35#define NR_BANKS 6
35#define THRESHOLD_MAX 0xFFF 36#define NR_BLOCKS 9
36#define INT_TYPE_APIC 0x00020000 37#define THRESHOLD_MAX 0xFFF
37#define MASK_VALID_HI 0x80000000 38#define INT_TYPE_APIC 0x00020000
38#define MASK_LVTOFF_HI 0x00F00000 39#define MASK_VALID_HI 0x80000000
39#define MASK_COUNT_EN_HI 0x00080000 40#define MASK_LVTOFF_HI 0x00F00000
40#define MASK_INT_TYPE_HI 0x00060000 41#define MASK_COUNT_EN_HI 0x00080000
41#define MASK_OVERFLOW_HI 0x00010000 42#define MASK_INT_TYPE_HI 0x00060000
43#define MASK_OVERFLOW_HI 0x00010000
42#define MASK_ERR_COUNT_HI 0x00000FFF 44#define MASK_ERR_COUNT_HI 0x00000FFF
43#define MASK_OVERFLOW 0x0001000000000000L 45#define MASK_BLKPTR_LO 0xFF000000
46#define MCG_XBLK_ADDR 0xC0000400
44 47
45struct threshold_bank { 48struct threshold_block {
49 unsigned int block;
50 unsigned int bank;
46 unsigned int cpu; 51 unsigned int cpu;
47 u8 bank; 52 u32 address;
48 u8 interrupt_enable; 53 u16 interrupt_enable;
49 u16 threshold_limit; 54 u16 threshold_limit;
50 struct kobject kobj; 55 struct kobject kobj;
56 struct list_head miscj;
51}; 57};
52 58
53static struct threshold_bank threshold_defaults = { 59/* defaults used early on boot */
60static struct threshold_block threshold_defaults = {
54 .interrupt_enable = 0, 61 .interrupt_enable = 0,
55 .threshold_limit = THRESHOLD_MAX, 62 .threshold_limit = THRESHOLD_MAX,
56}; 63};
57 64
65struct threshold_bank {
66 struct kobject kobj;
67 struct threshold_block *blocks;
68 cpumask_t cpus;
69};
70static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
71
58#ifdef CONFIG_SMP 72#ifdef CONFIG_SMP
59static unsigned char shared_bank[NR_BANKS] = { 73static unsigned char shared_bank[NR_BANKS] = {
60 0, 0, 0, 0, 1 74 0, 0, 0, 0, 1
@@ -68,12 +82,12 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
68 */ 82 */
69 83
70/* must be called with correct cpu affinity */ 84/* must be called with correct cpu affinity */
71static void threshold_restart_bank(struct threshold_bank *b, 85static void threshold_restart_bank(struct threshold_block *b,
72 int reset, u16 old_limit) 86 int reset, u16 old_limit)
73{ 87{
74 u32 mci_misc_hi, mci_misc_lo; 88 u32 mci_misc_hi, mci_misc_lo;
75 89
76 rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi); 90 rdmsr(b->address, mci_misc_lo, mci_misc_hi);
77 91
78 if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) 92 if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
79 reset = 1; /* limit cannot be lower than err count */ 93 reset = 1; /* limit cannot be lower than err count */
@@ -94,35 +108,57 @@ static void threshold_restart_bank(struct threshold_bank *b,
94 (mci_misc_hi &= ~MASK_INT_TYPE_HI); 108 (mci_misc_hi &= ~MASK_INT_TYPE_HI);
95 109
96 mci_misc_hi |= MASK_COUNT_EN_HI; 110 mci_misc_hi |= MASK_COUNT_EN_HI;
97 wrmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi); 111 wrmsr(b->address, mci_misc_lo, mci_misc_hi);
98} 112}
99 113
114/* cpu init entry point, called from mce.c with preempt off */
100void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) 115void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
101{ 116{
102 int bank; 117 unsigned int bank, block;
103 u32 mci_misc_lo, mci_misc_hi;
104 unsigned int cpu = smp_processor_id(); 118 unsigned int cpu = smp_processor_id();
119 u32 low = 0, high = 0, address = 0;
105 120
106 for (bank = 0; bank < NR_BANKS; ++bank) { 121 for (bank = 0; bank < NR_BANKS; ++bank) {
107 rdmsr(MSR_IA32_MC0_MISC + bank * 4, mci_misc_lo, mci_misc_hi); 122 for (block = 0; block < NR_BLOCKS; ++block) {
123 if (block == 0)
124 address = MSR_IA32_MC0_MISC + bank * 4;
125 else if (block == 1)
126 address = MCG_XBLK_ADDR
127 + ((low & MASK_BLKPTR_LO) >> 21);
128 else
129 ++address;
130
131 if (rdmsr_safe(address, &low, &high))
132 continue;
108 133
109 /* !valid, !counter present, bios locked */ 134 if (!(high & MASK_VALID_HI)) {
110 if (!(mci_misc_hi & MASK_VALID_HI) || 135 if (block)
111 !(mci_misc_hi & MASK_VALID_HI >> 1) || 136 continue;
112 (mci_misc_hi & MASK_VALID_HI >> 2)) 137 else
113 continue; 138 break;
139 }
114 140
115 per_cpu(bank_map, cpu) |= (1 << bank); 141 if (!(high & MASK_VALID_HI >> 1) ||
142 (high & MASK_VALID_HI >> 2))
143 continue;
116 144
145 if (!block)
146 per_cpu(bank_map, cpu) |= (1 << bank);
117#ifdef CONFIG_SMP 147#ifdef CONFIG_SMP
118 if (shared_bank[bank] && cpu_core_id[cpu]) 148 if (shared_bank[bank] && c->cpu_core_id)
119 continue; 149 break;
120#endif 150#endif
151 high &= ~MASK_LVTOFF_HI;
152 high |= K8_APIC_EXT_LVT_ENTRY_THRESHOLD << 20;
153 wrmsr(address, low, high);
121 154
122 setup_threshold_lvt((mci_misc_hi & MASK_LVTOFF_HI) >> 20); 155 setup_APIC_extened_lvt(K8_APIC_EXT_LVT_ENTRY_THRESHOLD,
123 threshold_defaults.cpu = cpu; 156 THRESHOLD_APIC_VECTOR,
124 threshold_defaults.bank = bank; 157 K8_APIC_EXT_INT_MSG_FIX, 0);
125 threshold_restart_bank(&threshold_defaults, 0, 0); 158
159 threshold_defaults.address = address;
160 threshold_restart_bank(&threshold_defaults, 0, 0);
161 }
126 } 162 }
127} 163}
128 164
@@ -137,8 +173,9 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
137 */ 173 */
138asmlinkage void mce_threshold_interrupt(void) 174asmlinkage void mce_threshold_interrupt(void)
139{ 175{
140 int bank; 176 unsigned int bank, block;
141 struct mce m; 177 struct mce m;
178 u32 low = 0, high = 0, address = 0;
142 179
143 ack_APIC_irq(); 180 ack_APIC_irq();
144 exit_idle(); 181 exit_idle();
@@ -150,15 +187,42 @@ asmlinkage void mce_threshold_interrupt(void)
150 187
151 /* assume first bank caused it */ 188 /* assume first bank caused it */
152 for (bank = 0; bank < NR_BANKS; ++bank) { 189 for (bank = 0; bank < NR_BANKS; ++bank) {
153 m.bank = MCE_THRESHOLD_BASE + bank; 190 for (block = 0; block < NR_BLOCKS; ++block) {
154 rdmsrl(MSR_IA32_MC0_MISC + bank * 4, m.misc); 191 if (block == 0)
192 address = MSR_IA32_MC0_MISC + bank * 4;
193 else if (block == 1)
194 address = MCG_XBLK_ADDR
195 + ((low & MASK_BLKPTR_LO) >> 21);
196 else
197 ++address;
198
199 if (rdmsr_safe(address, &low, &high))
200 continue;
155 201
156 if (m.misc & MASK_OVERFLOW) { 202 if (!(high & MASK_VALID_HI)) {
157 mce_log(&m); 203 if (block)
158 goto out; 204 continue;
205 else
206 break;
207 }
208
209 if (!(high & MASK_VALID_HI >> 1) ||
210 (high & MASK_VALID_HI >> 2))
211 continue;
212
213 if (high & MASK_OVERFLOW_HI) {
214 rdmsrl(address, m.misc);
215 rdmsrl(MSR_IA32_MC0_STATUS + bank * 4,
216 m.status);
217 m.bank = K8_MCE_THRESHOLD_BASE
218 + bank * NR_BLOCKS
219 + block;
220 mce_log(&m);
221 goto out;
222 }
159 } 223 }
160 } 224 }
161 out: 225out:
162 irq_exit(); 226 irq_exit();
163} 227}
164 228
@@ -166,20 +230,12 @@ asmlinkage void mce_threshold_interrupt(void)
166 * Sysfs Interface 230 * Sysfs Interface
167 */ 231 */
168 232
169static struct sysdev_class threshold_sysclass = {
170 set_kset_name("threshold"),
171};
172
173static DEFINE_PER_CPU(struct sys_device, device_threshold);
174
175struct threshold_attr { 233struct threshold_attr {
176 struct attribute attr; 234 struct attribute attr;
177 ssize_t(*show) (struct threshold_bank *, char *); 235 ssize_t(*show) (struct threshold_block *, char *);
178 ssize_t(*store) (struct threshold_bank *, const char *, size_t count); 236 ssize_t(*store) (struct threshold_block *, const char *, size_t count);
179}; 237};
180 238
181static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
182
183static cpumask_t affinity_set(unsigned int cpu) 239static cpumask_t affinity_set(unsigned int cpu)
184{ 240{
185 cpumask_t oldmask = current->cpus_allowed; 241 cpumask_t oldmask = current->cpus_allowed;
@@ -194,15 +250,15 @@ static void affinity_restore(cpumask_t oldmask)
194 set_cpus_allowed(current, oldmask); 250 set_cpus_allowed(current, oldmask);
195} 251}
196 252
197#define SHOW_FIELDS(name) \ 253#define SHOW_FIELDS(name) \
198 static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \ 254static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
199 { \ 255{ \
200 return sprintf(buf, "%lx\n", (unsigned long) b->name); \ 256 return sprintf(buf, "%lx\n", (unsigned long) b->name); \
201 } 257}
202SHOW_FIELDS(interrupt_enable) 258SHOW_FIELDS(interrupt_enable)
203SHOW_FIELDS(threshold_limit) 259SHOW_FIELDS(threshold_limit)
204 260
205static ssize_t store_interrupt_enable(struct threshold_bank *b, 261static ssize_t store_interrupt_enable(struct threshold_block *b,
206 const char *buf, size_t count) 262 const char *buf, size_t count)
207{ 263{
208 char *end; 264 char *end;
@@ -219,7 +275,7 @@ static ssize_t store_interrupt_enable(struct threshold_bank *b,
219 return end - buf; 275 return end - buf;
220} 276}
221 277
222static ssize_t store_threshold_limit(struct threshold_bank *b, 278static ssize_t store_threshold_limit(struct threshold_block *b,
223 const char *buf, size_t count) 279 const char *buf, size_t count)
224{ 280{
225 char *end; 281 char *end;
@@ -242,18 +298,18 @@ static ssize_t store_threshold_limit(struct threshold_bank *b,
242 return end - buf; 298 return end - buf;
243} 299}
244 300
245static ssize_t show_error_count(struct threshold_bank *b, char *buf) 301static ssize_t show_error_count(struct threshold_block *b, char *buf)
246{ 302{
247 u32 high, low; 303 u32 high, low;
248 cpumask_t oldmask; 304 cpumask_t oldmask;
249 oldmask = affinity_set(b->cpu); 305 oldmask = affinity_set(b->cpu);
250 rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, low, high); /* ignore low 32 */ 306 rdmsr(b->address, low, high);
251 affinity_restore(oldmask); 307 affinity_restore(oldmask);
252 return sprintf(buf, "%x\n", 308 return sprintf(buf, "%x\n",
253 (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); 309 (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
254} 310}
255 311
256static ssize_t store_error_count(struct threshold_bank *b, 312static ssize_t store_error_count(struct threshold_block *b,
257 const char *buf, size_t count) 313 const char *buf, size_t count)
258{ 314{
259 cpumask_t oldmask; 315 cpumask_t oldmask;
@@ -269,13 +325,13 @@ static ssize_t store_error_count(struct threshold_bank *b,
269 .store = _store, \ 325 .store = _store, \
270}; 326};
271 327
272#define ATTR_FIELDS(name) \ 328#define RW_ATTR(name) \
273 static struct threshold_attr name = \ 329static struct threshold_attr name = \
274 THRESHOLD_ATTR(name, 0644, show_## name, store_## name) 330 THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
275 331
276ATTR_FIELDS(interrupt_enable); 332RW_ATTR(interrupt_enable);
277ATTR_FIELDS(threshold_limit); 333RW_ATTR(threshold_limit);
278ATTR_FIELDS(error_count); 334RW_ATTR(error_count);
279 335
280static struct attribute *default_attrs[] = { 336static struct attribute *default_attrs[] = {
281 &interrupt_enable.attr, 337 &interrupt_enable.attr,
@@ -284,12 +340,12 @@ static struct attribute *default_attrs[] = {
284 NULL 340 NULL
285}; 341};
286 342
287#define to_bank(k) container_of(k,struct threshold_bank,kobj) 343#define to_block(k) container_of(k, struct threshold_block, kobj)
288#define to_attr(a) container_of(a,struct threshold_attr,attr) 344#define to_attr(a) container_of(a, struct threshold_attr, attr)
289 345
290static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) 346static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
291{ 347{
292 struct threshold_bank *b = to_bank(kobj); 348 struct threshold_block *b = to_block(kobj);
293 struct threshold_attr *a = to_attr(attr); 349 struct threshold_attr *a = to_attr(attr);
294 ssize_t ret; 350 ssize_t ret;
295 ret = a->show ? a->show(b, buf) : -EIO; 351 ret = a->show ? a->show(b, buf) : -EIO;
@@ -299,7 +355,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
299static ssize_t store(struct kobject *kobj, struct attribute *attr, 355static ssize_t store(struct kobject *kobj, struct attribute *attr,
300 const char *buf, size_t count) 356 const char *buf, size_t count)
301{ 357{
302 struct threshold_bank *b = to_bank(kobj); 358 struct threshold_block *b = to_block(kobj);
303 struct threshold_attr *a = to_attr(attr); 359 struct threshold_attr *a = to_attr(attr);
304 ssize_t ret; 360 ssize_t ret;
305 ret = a->store ? a->store(b, buf, count) : -EIO; 361 ret = a->store ? a->store(b, buf, count) : -EIO;
@@ -316,69 +372,174 @@ static struct kobj_type threshold_ktype = {
316 .default_attrs = default_attrs, 372 .default_attrs = default_attrs,
317}; 373};
318 374
375static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
376 unsigned int bank,
377 unsigned int block,
378 u32 address)
379{
380 int err;
381 u32 low, high;
382 struct threshold_block *b = NULL;
383
384 if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
385 return 0;
386
387 if (rdmsr_safe(address, &low, &high))
388 goto recurse;
389
390 if (!(high & MASK_VALID_HI)) {
391 if (block)
392 goto recurse;
393 else
394 return 0;
395 }
396
397 if (!(high & MASK_VALID_HI >> 1) ||
398 (high & MASK_VALID_HI >> 2))
399 goto recurse;
400
401 b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
402 if (!b)
403 return -ENOMEM;
404 memset(b, 0, sizeof(struct threshold_block));
405
406 b->block = block;
407 b->bank = bank;
408 b->cpu = cpu;
409 b->address = address;
410 b->interrupt_enable = 0;
411 b->threshold_limit = THRESHOLD_MAX;
412
413 INIT_LIST_HEAD(&b->miscj);
414
415 if (per_cpu(threshold_banks, cpu)[bank]->blocks)
416 list_add(&b->miscj,
417 &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
418 else
419 per_cpu(threshold_banks, cpu)[bank]->blocks = b;
420
421 kobject_set_name(&b->kobj, "misc%i", block);
422 b->kobj.parent = &per_cpu(threshold_banks, cpu)[bank]->kobj;
423 b->kobj.ktype = &threshold_ktype;
424 err = kobject_register(&b->kobj);
425 if (err)
426 goto out_free;
427recurse:
428 if (!block) {
429 address = (low & MASK_BLKPTR_LO) >> 21;
430 if (!address)
431 return 0;
432 address += MCG_XBLK_ADDR;
433 } else
434 ++address;
435
436 err = allocate_threshold_blocks(cpu, bank, ++block, address);
437 if (err)
438 goto out_free;
439
440 return err;
441
442out_free:
443 if (b) {
444 kobject_unregister(&b->kobj);
445 kfree(b);
446 }
447 return err;
448}
449
319/* symlinks sibling shared banks to first core. first core owns dir/files. */ 450/* symlinks sibling shared banks to first core. first core owns dir/files. */
320static __cpuinit int threshold_create_bank(unsigned int cpu, int bank) 451static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
321{ 452{
322 int err = 0; 453 int i, err = 0;
323 struct threshold_bank *b = NULL; 454 struct threshold_bank *b = NULL;
455 cpumask_t oldmask = CPU_MASK_NONE;
456 char name[32];
457
458 sprintf(name, "threshold_bank%i", bank);
324 459
325#ifdef CONFIG_SMP 460#ifdef CONFIG_SMP
326 if (cpu_core_id[cpu] && shared_bank[bank]) { /* symlink */ 461 if (cpu_data[cpu].cpu_core_id && shared_bank[bank]) { /* symlink */
327 char name[16]; 462 i = first_cpu(cpu_core_map[cpu]);
328 unsigned lcpu = first_cpu(cpu_core_map[cpu]); 463
329 if (cpu_core_id[lcpu]) 464 /* first core not up yet */
330 goto out; /* first core not up yet */ 465 if (cpu_data[i].cpu_core_id)
466 goto out;
467
468 /* already linked */
469 if (per_cpu(threshold_banks, cpu)[bank])
470 goto out;
471
472 b = per_cpu(threshold_banks, i)[bank];
331 473
332 b = per_cpu(threshold_banks, lcpu)[bank];
333 if (!b) 474 if (!b)
334 goto out; 475 goto out;
335 sprintf(name, "bank%i", bank); 476
336 err = sysfs_create_link(&per_cpu(device_threshold, cpu).kobj, 477 err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj,
337 &b->kobj, name); 478 &b->kobj, name);
338 if (err) 479 if (err)
339 goto out; 480 goto out;
481
482 b->cpus = cpu_core_map[cpu];
340 per_cpu(threshold_banks, cpu)[bank] = b; 483 per_cpu(threshold_banks, cpu)[bank] = b;
341 goto out; 484 goto out;
342 } 485 }
343#endif 486#endif
344 487
345 b = kmalloc(sizeof(struct threshold_bank), GFP_KERNEL); 488 b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
346 if (!b) { 489 if (!b) {
347 err = -ENOMEM; 490 err = -ENOMEM;
348 goto out; 491 goto out;
349 } 492 }
350 memset(b, 0, sizeof(struct threshold_bank)); 493 memset(b, 0, sizeof(struct threshold_bank));
351 494
352 b->cpu = cpu; 495 kobject_set_name(&b->kobj, "threshold_bank%i", bank);
353 b->bank = bank; 496 b->kobj.parent = &per_cpu(device_mce, cpu).kobj;
354 b->interrupt_enable = 0; 497#ifndef CONFIG_SMP
355 b->threshold_limit = THRESHOLD_MAX; 498 b->cpus = CPU_MASK_ALL;
356 kobject_set_name(&b->kobj, "bank%i", bank); 499#else
357 b->kobj.parent = &per_cpu(device_threshold, cpu).kobj; 500 b->cpus = cpu_core_map[cpu];
358 b->kobj.ktype = &threshold_ktype; 501#endif
359
360 err = kobject_register(&b->kobj); 502 err = kobject_register(&b->kobj);
361 if (err) { 503 if (err)
362 kfree(b); 504 goto out_free;
363 goto out; 505
364 }
365 per_cpu(threshold_banks, cpu)[bank] = b; 506 per_cpu(threshold_banks, cpu)[bank] = b;
366 out: 507
508 oldmask = affinity_set(cpu);
509 err = allocate_threshold_blocks(cpu, bank, 0,
510 MSR_IA32_MC0_MISC + bank * 4);
511 affinity_restore(oldmask);
512
513 if (err)
514 goto out_free;
515
516 for_each_cpu_mask(i, b->cpus) {
517 if (i == cpu)
518 continue;
519
520 err = sysfs_create_link(&per_cpu(device_mce, i).kobj,
521 &b->kobj, name);
522 if (err)
523 goto out;
524
525 per_cpu(threshold_banks, i)[bank] = b;
526 }
527
528 goto out;
529
530out_free:
531 per_cpu(threshold_banks, cpu)[bank] = NULL;
532 kfree(b);
533out:
367 return err; 534 return err;
368} 535}
369 536
370/* create dir/files for all valid threshold banks */ 537/* create dir/files for all valid threshold banks */
371static __cpuinit int threshold_create_device(unsigned int cpu) 538static __cpuinit int threshold_create_device(unsigned int cpu)
372{ 539{
373 int bank; 540 unsigned int bank;
374 int err = 0; 541 int err = 0;
375 542
376 per_cpu(device_threshold, cpu).id = cpu;
377 per_cpu(device_threshold, cpu).cls = &threshold_sysclass;
378 err = sysdev_register(&per_cpu(device_threshold, cpu));
379 if (err)
380 goto out;
381
382 for (bank = 0; bank < NR_BANKS; ++bank) { 543 for (bank = 0; bank < NR_BANKS; ++bank) {
383 if (!(per_cpu(bank_map, cpu) & 1 << bank)) 544 if (!(per_cpu(bank_map, cpu) & 1 << bank))
384 continue; 545 continue;
@@ -386,7 +547,7 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
386 if (err) 547 if (err)
387 goto out; 548 goto out;
388 } 549 }
389 out: 550out:
390 return err; 551 return err;
391} 552}
392 553
@@ -397,92 +558,85 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
397 * of shared sysfs dir/files, and rest of the cores will be symlinked to it. 558 * of shared sysfs dir/files, and rest of the cores will be symlinked to it.
398 */ 559 */
399 560
400/* cpu hotplug call removes all symlinks before first core dies */ 561static __cpuinit void deallocate_threshold_block(unsigned int cpu,
562 unsigned int bank)
563{
564 struct threshold_block *pos = NULL;
565 struct threshold_block *tmp = NULL;
566 struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank];
567
568 if (!head)
569 return;
570
571 list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
572 kobject_unregister(&pos->kobj);
573 list_del(&pos->miscj);
574 kfree(pos);
575 }
576
577 kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
578 per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
579}
580
401static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank) 581static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank)
402{ 582{
583 int i = 0;
403 struct threshold_bank *b; 584 struct threshold_bank *b;
404 char name[16]; 585 char name[32];
405 586
406 b = per_cpu(threshold_banks, cpu)[bank]; 587 b = per_cpu(threshold_banks, cpu)[bank];
588
407 if (!b) 589 if (!b)
408 return; 590 return;
409 if (shared_bank[bank] && atomic_read(&b->kobj.kref.refcount) > 2) { 591
410 sprintf(name, "bank%i", bank); 592 if (!b->blocks)
411 sysfs_remove_link(&per_cpu(device_threshold, cpu).kobj, name); 593 goto free_out;
412 per_cpu(threshold_banks, cpu)[bank] = NULL; 594
413 } else { 595 sprintf(name, "threshold_bank%i", bank);
414 kobject_unregister(&b->kobj); 596
415 kfree(per_cpu(threshold_banks, cpu)[bank]); 597 /* sibling symlink */
598 if (shared_bank[bank] && b->blocks->cpu != cpu) {
599 sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name);
600 per_cpu(threshold_banks, i)[bank] = NULL;
601 return;
602 }
603
604 /* remove all sibling symlinks before unregistering */
605 for_each_cpu_mask(i, b->cpus) {
606 if (i == cpu)
607 continue;
608
609 sysfs_remove_link(&per_cpu(device_mce, i).kobj, name);
610 per_cpu(threshold_banks, i)[bank] = NULL;
416 } 611 }
612
613 deallocate_threshold_block(cpu, bank);
614
615free_out:
616 kobject_unregister(&b->kobj);
617 kfree(b);
618 per_cpu(threshold_banks, cpu)[bank] = NULL;
417} 619}
418 620
419static __cpuinit void threshold_remove_device(unsigned int cpu) 621static __cpuinit void threshold_remove_device(unsigned int cpu)
420{ 622{
421 int bank; 623 unsigned int bank;
422 624
423 for (bank = 0; bank < NR_BANKS; ++bank) { 625 for (bank = 0; bank < NR_BANKS; ++bank) {
424 if (!(per_cpu(bank_map, cpu) & 1 << bank)) 626 if (!(per_cpu(bank_map, cpu) & 1 << bank))
425 continue; 627 continue;
426 threshold_remove_bank(cpu, bank); 628 threshold_remove_bank(cpu, bank);
427 } 629 }
428 sysdev_unregister(&per_cpu(device_threshold, cpu));
429} 630}
430 631
431/* link all existing siblings when first core comes up */
432static __cpuinit int threshold_create_symlinks(unsigned int cpu)
433{
434 int bank, err = 0;
435 unsigned int lcpu = 0;
436
437 if (cpu_core_id[cpu])
438 return 0;
439 for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
440 if (lcpu == cpu)
441 continue;
442 for (bank = 0; bank < NR_BANKS; ++bank) {
443 if (!(per_cpu(bank_map, cpu) & 1 << bank))
444 continue;
445 if (!shared_bank[bank])
446 continue;
447 err = threshold_create_bank(lcpu, bank);
448 }
449 }
450 return err;
451}
452
453/* remove all symlinks before first core dies. */
454static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
455{
456 int bank;
457 unsigned int lcpu = 0;
458 if (cpu_core_id[cpu])
459 return;
460 for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
461 if (lcpu == cpu)
462 continue;
463 for (bank = 0; bank < NR_BANKS; ++bank) {
464 if (!(per_cpu(bank_map, cpu) & 1 << bank))
465 continue;
466 if (!shared_bank[bank])
467 continue;
468 threshold_remove_bank(lcpu, bank);
469 }
470 }
471}
472#else /* !CONFIG_HOTPLUG_CPU */ 632#else /* !CONFIG_HOTPLUG_CPU */
473static __cpuinit void threshold_create_symlinks(unsigned int cpu)
474{
475}
476static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
477{
478}
479static void threshold_remove_device(unsigned int cpu) 633static void threshold_remove_device(unsigned int cpu)
480{ 634{
481} 635}
482#endif 636#endif
483 637
484/* get notified when a cpu comes on/off */ 638/* get notified when a cpu comes on/off */
485static int threshold_cpu_callback(struct notifier_block *nfb, 639static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb,
486 unsigned long action, void *hcpu) 640 unsigned long action, void *hcpu)
487{ 641{
488 /* cpu was unsigned int to begin with */ 642 /* cpu was unsigned int to begin with */
@@ -494,13 +648,6 @@ static int threshold_cpu_callback(struct notifier_block *nfb,
494 switch (action) { 648 switch (action) {
495 case CPU_ONLINE: 649 case CPU_ONLINE:
496 threshold_create_device(cpu); 650 threshold_create_device(cpu);
497 threshold_create_symlinks(cpu);
498 break;
499 case CPU_DOWN_PREPARE:
500 threshold_remove_symlinks(cpu);
501 break;
502 case CPU_DOWN_FAILED:
503 threshold_create_symlinks(cpu);
504 break; 651 break;
505 case CPU_DEAD: 652 case CPU_DEAD:
506 threshold_remove_device(cpu); 653 threshold_remove_device(cpu);
@@ -512,29 +659,22 @@ static int threshold_cpu_callback(struct notifier_block *nfb,
512 return NOTIFY_OK; 659 return NOTIFY_OK;
513} 660}
514 661
515static struct notifier_block threshold_cpu_notifier = { 662static struct notifier_block threshold_cpu_notifier __cpuinitdata = {
516 .notifier_call = threshold_cpu_callback, 663 .notifier_call = threshold_cpu_callback,
517}; 664};
518 665
519static __init int threshold_init_device(void) 666static __init int threshold_init_device(void)
520{ 667{
521 int err; 668 unsigned lcpu = 0;
522 int lcpu = 0;
523
524 err = sysdev_class_register(&threshold_sysclass);
525 if (err)
526 goto out;
527 669
528 /* to hit CPUs online before the notifier is up */ 670 /* to hit CPUs online before the notifier is up */
529 for_each_online_cpu(lcpu) { 671 for_each_online_cpu(lcpu) {
530 err = threshold_create_device(lcpu); 672 int err = threshold_create_device(lcpu);
531 if (err) 673 if (err)
532 goto out; 674 return err;
533 } 675 }
534 register_cpu_notifier(&threshold_cpu_notifier); 676 register_cpu_notifier(&threshold_cpu_notifier);
535 677 return 0;
536 out:
537 return err;
538} 678}
539 679
540device_initcall(threshold_init_device); 680device_initcall(threshold_init_device);
diff --git a/arch/x86_64/kernel/module.c b/arch/x86_64/kernel/module.c
index bac195c74bcc..9d0958ff547f 100644
--- a/arch/x86_64/kernel/module.c
+++ b/arch/x86_64/kernel/module.c
@@ -145,26 +145,38 @@ int apply_relocate(Elf_Shdr *sechdrs,
145 return -ENOSYS; 145 return -ENOSYS;
146} 146}
147 147
148extern void apply_alternatives(void *start, void *end);
149
150int module_finalize(const Elf_Ehdr *hdr, 148int module_finalize(const Elf_Ehdr *hdr,
151 const Elf_Shdr *sechdrs, 149 const Elf_Shdr *sechdrs,
152 struct module *me) 150 struct module *me)
153{ 151{
154 const Elf_Shdr *s; 152 const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL;
155 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; 153 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
156 154
157 /* look for .altinstructions to patch */ 155 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
158 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { 156 if (!strcmp(".text", secstrings + s->sh_name))
159 void *seg; 157 text = s;
160 if (strcmp(".altinstructions", secstrings + s->sh_name)) 158 if (!strcmp(".altinstructions", secstrings + s->sh_name))
161 continue; 159 alt = s;
162 seg = (void *)s->sh_addr; 160 if (!strcmp(".smp_locks", secstrings + s->sh_name))
163 apply_alternatives(seg, seg + s->sh_size); 161 locks= s;
164 } 162 }
163
164 if (alt) {
165 /* patch .altinstructions */
166 void *aseg = (void *)alt->sh_addr;
167 apply_alternatives(aseg, aseg + alt->sh_size);
168 }
169 if (locks && text) {
170 void *lseg = (void *)locks->sh_addr;
171 void *tseg = (void *)text->sh_addr;
172 alternatives_smp_module_add(me, me->name,
173 lseg, lseg + locks->sh_size,
174 tseg, tseg + text->sh_size);
175 }
165 return 0; 176 return 0;
166} 177}
167 178
168void module_arch_cleanup(struct module *mod) 179void module_arch_cleanup(struct module *mod)
169{ 180{
181 alternatives_smp_module_del(mod);
170} 182}
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 4e6357fe0ec3..399489c93132 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -15,11 +15,7 @@
15#include <linux/config.h> 15#include <linux/config.h>
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/delay.h> 17#include <linux/delay.h>
18#include <linux/bootmem.h>
19#include <linux/smp_lock.h>
20#include <linux/interrupt.h> 18#include <linux/interrupt.h>
21#include <linux/mc146818rtc.h>
22#include <linux/kernel_stat.h>
23#include <linux/module.h> 19#include <linux/module.h>
24#include <linux/sysdev.h> 20#include <linux/sysdev.h>
25#include <linux/nmi.h> 21#include <linux/nmi.h>
@@ -27,14 +23,11 @@
27#include <linux/kprobes.h> 23#include <linux/kprobes.h>
28 24
29#include <asm/smp.h> 25#include <asm/smp.h>
30#include <asm/mtrr.h>
31#include <asm/mpspec.h>
32#include <asm/nmi.h> 26#include <asm/nmi.h>
33#include <asm/msr.h>
34#include <asm/proto.h> 27#include <asm/proto.h>
35#include <asm/kdebug.h> 28#include <asm/kdebug.h>
36#include <asm/local.h>
37#include <asm/mce.h> 29#include <asm/mce.h>
30#include <asm/intel_arch_perfmon.h>
38 31
39/* 32/*
40 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: 33 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
@@ -74,6 +67,9 @@ static unsigned int nmi_p4_cccr_val;
74#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 67#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
75#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 68#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
76 69
70#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
71#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
72
77#define MSR_P4_MISC_ENABLE 0x1A0 73#define MSR_P4_MISC_ENABLE 0x1A0
78#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) 74#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
79#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) 75#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
@@ -105,7 +101,10 @@ static __cpuinit inline int nmi_known_cpu(void)
105 case X86_VENDOR_AMD: 101 case X86_VENDOR_AMD:
106 return boot_cpu_data.x86 == 15; 102 return boot_cpu_data.x86 == 15;
107 case X86_VENDOR_INTEL: 103 case X86_VENDOR_INTEL:
108 return boot_cpu_data.x86 == 15; 104 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
105 return 1;
106 else
107 return (boot_cpu_data.x86 == 15);
109 } 108 }
110 return 0; 109 return 0;
111} 110}
@@ -211,6 +210,8 @@ int __init setup_nmi_watchdog(char *str)
211 210
212__setup("nmi_watchdog=", setup_nmi_watchdog); 211__setup("nmi_watchdog=", setup_nmi_watchdog);
213 212
213static void disable_intel_arch_watchdog(void);
214
214static void disable_lapic_nmi_watchdog(void) 215static void disable_lapic_nmi_watchdog(void)
215{ 216{
216 if (nmi_active <= 0) 217 if (nmi_active <= 0)
@@ -223,6 +224,8 @@ static void disable_lapic_nmi_watchdog(void)
223 if (boot_cpu_data.x86 == 15) { 224 if (boot_cpu_data.x86 == 15) {
224 wrmsr(MSR_P4_IQ_CCCR0, 0, 0); 225 wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
225 wrmsr(MSR_P4_CRU_ESCR0, 0, 0); 226 wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
227 } else if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
228 disable_intel_arch_watchdog();
226 } 229 }
227 break; 230 break;
228 } 231 }
@@ -375,6 +378,53 @@ static void setup_k7_watchdog(void)
375 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 378 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
376} 379}
377 380
381static void disable_intel_arch_watchdog(void)
382{
383 unsigned ebx;
384
385 /*
386 * Check whether the Architectural PerfMon supports
387 * Unhalted Core Cycles Event or not.
388 * NOTE: Corresponding bit = 0 in ebp indicates event present.
389 */
390 ebx = cpuid_ebx(10);
391 if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
392 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
393}
394
395static int setup_intel_arch_watchdog(void)
396{
397 unsigned int evntsel;
398 unsigned ebx;
399
400 /*
401 * Check whether the Architectural PerfMon supports
402 * Unhalted Core Cycles Event or not.
403 * NOTE: Corresponding bit = 0 in ebp indicates event present.
404 */
405 ebx = cpuid_ebx(10);
406 if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
407 return 0;
408
409 nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
410
411 clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2);
412 clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2);
413
414 evntsel = ARCH_PERFMON_EVENTSEL_INT
415 | ARCH_PERFMON_EVENTSEL_OS
416 | ARCH_PERFMON_EVENTSEL_USR
417 | ARCH_PERFMON_NMI_EVENT_SEL
418 | ARCH_PERFMON_NMI_EVENT_UMASK;
419
420 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
421 wrmsrl(MSR_ARCH_PERFMON_PERFCTR0, -((u64)cpu_khz * 1000 / nmi_hz));
422 apic_write(APIC_LVTPC, APIC_DM_NMI);
423 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
424 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
425 return 1;
426}
427
378 428
379static int setup_p4_watchdog(void) 429static int setup_p4_watchdog(void)
380{ 430{
@@ -428,10 +478,16 @@ void setup_apic_nmi_watchdog(void)
428 setup_k7_watchdog(); 478 setup_k7_watchdog();
429 break; 479 break;
430 case X86_VENDOR_INTEL: 480 case X86_VENDOR_INTEL:
431 if (boot_cpu_data.x86 != 15) 481 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
432 return; 482 if (!setup_intel_arch_watchdog())
433 if (!setup_p4_watchdog()) 483 return;
484 } else if (boot_cpu_data.x86 == 15) {
485 if (!setup_p4_watchdog())
486 return;
487 } else {
434 return; 488 return;
489 }
490
435 break; 491 break;
436 492
437 default: 493 default:
@@ -516,7 +572,14 @@ void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
516 */ 572 */
517 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); 573 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
518 apic_write(APIC_LVTPC, APIC_DM_NMI); 574 apic_write(APIC_LVTPC, APIC_DM_NMI);
519 } 575 } else if (nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
576 /*
577 * For Intel based architectural perfmon
578 * - LVTPC is masked on interrupt and must be
579 * unmasked by the LVTPC handler.
580 */
581 apic_write(APIC_LVTPC, APIC_DM_NMI);
582 }
520 wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); 583 wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
521 } 584 }
522} 585}
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
new file mode 100644
index 000000000000..d91cb843f54d
--- /dev/null
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -0,0 +1,1018 @@
1/*
2 * Derived from arch/powerpc/kernel/iommu.c
3 *
4 * Copyright (C) 2006 Jon Mason <jdmason@us.ibm.com>, IBM Corporation
5 * Copyright (C) 2006 Muli Ben-Yehuda <muli@il.ibm.com>, IBM Corporation
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include <linux/config.h>
23#include <linux/kernel.h>
24#include <linux/init.h>
25#include <linux/types.h>
26#include <linux/slab.h>
27#include <linux/mm.h>
28#include <linux/spinlock.h>
29#include <linux/string.h>
30#include <linux/dma-mapping.h>
31#include <linux/init.h>
32#include <linux/bitops.h>
33#include <linux/pci_ids.h>
34#include <linux/pci.h>
35#include <linux/delay.h>
36#include <asm/proto.h>
37#include <asm/calgary.h>
38#include <asm/tce.h>
39#include <asm/pci-direct.h>
40#include <asm/system.h>
41#include <asm/dma.h>
42
43#define PCI_DEVICE_ID_IBM_CALGARY 0x02a1
44#define PCI_VENDOR_DEVICE_ID_CALGARY \
45 (PCI_VENDOR_ID_IBM | PCI_DEVICE_ID_IBM_CALGARY << 16)
46
47/* we need these for register space address calculation */
48#define START_ADDRESS 0xfe000000
49#define CHASSIS_BASE 0
50#define ONE_BASED_CHASSIS_NUM 1
51
52/* register offsets inside the host bridge space */
53#define PHB_CSR_OFFSET 0x0110
54#define PHB_PLSSR_OFFSET 0x0120
55#define PHB_CONFIG_RW_OFFSET 0x0160
56#define PHB_IOBASE_BAR_LOW 0x0170
57#define PHB_IOBASE_BAR_HIGH 0x0180
58#define PHB_MEM_1_LOW 0x0190
59#define PHB_MEM_1_HIGH 0x01A0
60#define PHB_IO_ADDR_SIZE 0x01B0
61#define PHB_MEM_1_SIZE 0x01C0
62#define PHB_MEM_ST_OFFSET 0x01D0
63#define PHB_AER_OFFSET 0x0200
64#define PHB_CONFIG_0_HIGH 0x0220
65#define PHB_CONFIG_0_LOW 0x0230
66#define PHB_CONFIG_0_END 0x0240
67#define PHB_MEM_2_LOW 0x02B0
68#define PHB_MEM_2_HIGH 0x02C0
69#define PHB_MEM_2_SIZE_HIGH 0x02D0
70#define PHB_MEM_2_SIZE_LOW 0x02E0
71#define PHB_DOSHOLE_OFFSET 0x08E0
72
73/* PHB_CONFIG_RW */
74#define PHB_TCE_ENABLE 0x20000000
75#define PHB_SLOT_DISABLE 0x1C000000
76#define PHB_DAC_DISABLE 0x01000000
77#define PHB_MEM2_ENABLE 0x00400000
78#define PHB_MCSR_ENABLE 0x00100000
79/* TAR (Table Address Register) */
80#define TAR_SW_BITS 0x0000ffffffff800fUL
81#define TAR_VALID 0x0000000000000008UL
82/* CSR (Channel/DMA Status Register) */
83#define CSR_AGENT_MASK 0xffe0ffff
84
85#define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */
86#define MAX_PHB_BUS_NUM (MAX_NUM_OF_PHBS * 2) /* max dev->bus->number */
87#define PHBS_PER_CALGARY 4
88
89/* register offsets in Calgary's internal register space */
90static const unsigned long tar_offsets[] = {
91 0x0580 /* TAR0 */,
92 0x0588 /* TAR1 */,
93 0x0590 /* TAR2 */,
94 0x0598 /* TAR3 */
95};
96
97static const unsigned long split_queue_offsets[] = {
98 0x4870 /* SPLIT QUEUE 0 */,
99 0x5870 /* SPLIT QUEUE 1 */,
100 0x6870 /* SPLIT QUEUE 2 */,
101 0x7870 /* SPLIT QUEUE 3 */
102};
103
104static const unsigned long phb_offsets[] = {
105 0x8000 /* PHB0 */,
106 0x9000 /* PHB1 */,
107 0xA000 /* PHB2 */,
108 0xB000 /* PHB3 */
109};
110
111void* tce_table_kva[MAX_NUM_OF_PHBS * MAX_NUMNODES];
112unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;
113static int translate_empty_slots __read_mostly = 0;
114static int calgary_detected __read_mostly = 0;
115
116/*
117 * the bitmap of PHBs the user requested that we disable
118 * translation on.
119 */
120static DECLARE_BITMAP(translation_disabled, MAX_NUMNODES * MAX_PHB_BUS_NUM);
121
122static void tce_cache_blast(struct iommu_table *tbl);
123
124/* enable this to stress test the chip's TCE cache */
125#ifdef CONFIG_IOMMU_DEBUG
126static inline void tce_cache_blast_stress(struct iommu_table *tbl)
127{
128 tce_cache_blast(tbl);
129}
130#else
131static inline void tce_cache_blast_stress(struct iommu_table *tbl)
132{
133}
134#endif /* BLAST_TCE_CACHE_ON_UNMAP */
135
136static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen)
137{
138 unsigned int npages;
139
140 npages = PAGE_ALIGN(dma + dmalen) - (dma & PAGE_MASK);
141 npages >>= PAGE_SHIFT;
142
143 return npages;
144}
145
146static inline int translate_phb(struct pci_dev* dev)
147{
148 int disabled = test_bit(dev->bus->number, translation_disabled);
149 return !disabled;
150}
151
152static void iommu_range_reserve(struct iommu_table *tbl,
153 unsigned long start_addr, unsigned int npages)
154{
155 unsigned long index;
156 unsigned long end;
157
158 index = start_addr >> PAGE_SHIFT;
159
160 /* bail out if we're asked to reserve a region we don't cover */
161 if (index >= tbl->it_size)
162 return;
163
164 end = index + npages;
165 if (end > tbl->it_size) /* don't go off the table */
166 end = tbl->it_size;
167
168 while (index < end) {
169 if (test_bit(index, tbl->it_map))
170 printk(KERN_ERR "Calgary: entry already allocated at "
171 "0x%lx tbl %p dma 0x%lx npages %u\n",
172 index, tbl, start_addr, npages);
173 ++index;
174 }
175 set_bit_string(tbl->it_map, start_addr >> PAGE_SHIFT, npages);
176}
177
178static unsigned long iommu_range_alloc(struct iommu_table *tbl,
179 unsigned int npages)
180{
181 unsigned long offset;
182
183 BUG_ON(npages == 0);
184
185 offset = find_next_zero_string(tbl->it_map, tbl->it_hint,
186 tbl->it_size, npages);
187 if (offset == ~0UL) {
188 tce_cache_blast(tbl);
189 offset = find_next_zero_string(tbl->it_map, 0,
190 tbl->it_size, npages);
191 if (offset == ~0UL) {
192 printk(KERN_WARNING "Calgary: IOMMU full.\n");
193 if (panic_on_overflow)
194 panic("Calgary: fix the allocator.\n");
195 else
196 return bad_dma_address;
197 }
198 }
199
200 set_bit_string(tbl->it_map, offset, npages);
201 tbl->it_hint = offset + npages;
202 BUG_ON(tbl->it_hint > tbl->it_size);
203
204 return offset;
205}
206
207static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *vaddr,
208 unsigned int npages, int direction)
209{
210 unsigned long entry, flags;
211 dma_addr_t ret = bad_dma_address;
212
213 spin_lock_irqsave(&tbl->it_lock, flags);
214
215 entry = iommu_range_alloc(tbl, npages);
216
217 if (unlikely(entry == bad_dma_address))
218 goto error;
219
220 /* set the return dma address */
221 ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK);
222
223 /* put the TCEs in the HW table */
224 tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK,
225 direction);
226
227 spin_unlock_irqrestore(&tbl->it_lock, flags);
228
229 return ret;
230
231error:
232 spin_unlock_irqrestore(&tbl->it_lock, flags);
233 printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
234 "iommu %p\n", npages, tbl);
235 return bad_dma_address;
236}
237
238static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
239 unsigned int npages)
240{
241 unsigned long entry;
242 unsigned long i;
243
244 entry = dma_addr >> PAGE_SHIFT;
245
246 BUG_ON(entry + npages > tbl->it_size);
247
248 tce_free(tbl, entry, npages);
249
250 for (i = 0; i < npages; ++i) {
251 if (!test_bit(entry + i, tbl->it_map))
252 printk(KERN_ERR "Calgary: bit is off at 0x%lx "
253 "tbl %p dma 0x%Lx entry 0x%lx npages %u\n",
254 entry + i, tbl, dma_addr, entry, npages);
255 }
256
257 __clear_bit_string(tbl->it_map, entry, npages);
258
259 tce_cache_blast_stress(tbl);
260}
261
262static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
263 unsigned int npages)
264{
265 unsigned long flags;
266
267 spin_lock_irqsave(&tbl->it_lock, flags);
268
269 __iommu_free(tbl, dma_addr, npages);
270
271 spin_unlock_irqrestore(&tbl->it_lock, flags);
272}
273
274static void __calgary_unmap_sg(struct iommu_table *tbl,
275 struct scatterlist *sglist, int nelems, int direction)
276{
277 while (nelems--) {
278 unsigned int npages;
279 dma_addr_t dma = sglist->dma_address;
280 unsigned int dmalen = sglist->dma_length;
281
282 if (dmalen == 0)
283 break;
284
285 npages = num_dma_pages(dma, dmalen);
286 __iommu_free(tbl, dma, npages);
287 sglist++;
288 }
289}
290
291void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist,
292 int nelems, int direction)
293{
294 unsigned long flags;
295 struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
296
297 if (!translate_phb(to_pci_dev(dev)))
298 return;
299
300 spin_lock_irqsave(&tbl->it_lock, flags);
301
302 __calgary_unmap_sg(tbl, sglist, nelems, direction);
303
304 spin_unlock_irqrestore(&tbl->it_lock, flags);
305}
306
307static int calgary_nontranslate_map_sg(struct device* dev,
308 struct scatterlist *sg, int nelems, int direction)
309{
310 int i;
311
312 for (i = 0; i < nelems; i++ ) {
313 struct scatterlist *s = &sg[i];
314 BUG_ON(!s->page);
315 s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
316 s->dma_length = s->length;
317 }
318 return nelems;
319}
320
321int calgary_map_sg(struct device *dev, struct scatterlist *sg,
322 int nelems, int direction)
323{
324 struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
325 unsigned long flags;
326 unsigned long vaddr;
327 unsigned int npages;
328 unsigned long entry;
329 int i;
330
331 if (!translate_phb(to_pci_dev(dev)))
332 return calgary_nontranslate_map_sg(dev, sg, nelems, direction);
333
334 spin_lock_irqsave(&tbl->it_lock, flags);
335
336 for (i = 0; i < nelems; i++ ) {
337 struct scatterlist *s = &sg[i];
338 BUG_ON(!s->page);
339
340 vaddr = (unsigned long)page_address(s->page) + s->offset;
341 npages = num_dma_pages(vaddr, s->length);
342
343 entry = iommu_range_alloc(tbl, npages);
344 if (entry == bad_dma_address) {
345 /* makes sure unmap knows to stop */
346 s->dma_length = 0;
347 goto error;
348 }
349
350 s->dma_address = (entry << PAGE_SHIFT) | s->offset;
351
352 /* insert into HW table */
353 tce_build(tbl, entry, npages, vaddr & PAGE_MASK,
354 direction);
355
356 s->dma_length = s->length;
357 }
358
359 spin_unlock_irqrestore(&tbl->it_lock, flags);
360
361 return nelems;
362error:
363 __calgary_unmap_sg(tbl, sg, nelems, direction);
364 for (i = 0; i < nelems; i++) {
365 sg[i].dma_address = bad_dma_address;
366 sg[i].dma_length = 0;
367 }
368 spin_unlock_irqrestore(&tbl->it_lock, flags);
369 return 0;
370}
371
372dma_addr_t calgary_map_single(struct device *dev, void *vaddr,
373 size_t size, int direction)
374{
375 dma_addr_t dma_handle = bad_dma_address;
376 unsigned long uaddr;
377 unsigned int npages;
378 struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
379
380 uaddr = (unsigned long)vaddr;
381 npages = num_dma_pages(uaddr, size);
382
383 if (translate_phb(to_pci_dev(dev)))
384 dma_handle = iommu_alloc(tbl, vaddr, npages, direction);
385 else
386 dma_handle = virt_to_bus(vaddr);
387
388 return dma_handle;
389}
390
391void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
392 size_t size, int direction)
393{
394 struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
395 unsigned int npages;
396
397 if (!translate_phb(to_pci_dev(dev)))
398 return;
399
400 npages = num_dma_pages(dma_handle, size);
401 iommu_free(tbl, dma_handle, npages);
402}
403
404void* calgary_alloc_coherent(struct device *dev, size_t size,
405 dma_addr_t *dma_handle, gfp_t flag)
406{
407 void *ret = NULL;
408 dma_addr_t mapping;
409 unsigned int npages, order;
410 struct iommu_table *tbl;
411
412 tbl = to_pci_dev(dev)->bus->self->sysdata;
413
414 size = PAGE_ALIGN(size); /* size rounded up to full pages */
415 npages = size >> PAGE_SHIFT;
416 order = get_order(size);
417
418 /* alloc enough pages (and possibly more) */
419 ret = (void *)__get_free_pages(flag, order);
420 if (!ret)
421 goto error;
422 memset(ret, 0, size);
423
424 if (translate_phb(to_pci_dev(dev))) {
425 /* set up tces to cover the allocated range */
426 mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL);
427 if (mapping == bad_dma_address)
428 goto free;
429
430 *dma_handle = mapping;
431 } else /* non translated slot */
432 *dma_handle = virt_to_bus(ret);
433
434 return ret;
435
436free:
437 free_pages((unsigned long)ret, get_order(size));
438 ret = NULL;
439error:
440 return ret;
441}
442
443static struct dma_mapping_ops calgary_dma_ops = {
444 .alloc_coherent = calgary_alloc_coherent,
445 .map_single = calgary_map_single,
446 .unmap_single = calgary_unmap_single,
447 .map_sg = calgary_map_sg,
448 .unmap_sg = calgary_unmap_sg,
449};
450
451static inline int busno_to_phbid(unsigned char num)
452{
453 return bus_to_phb(num) % PHBS_PER_CALGARY;
454}
455
456static inline unsigned long split_queue_offset(unsigned char num)
457{
458 size_t idx = busno_to_phbid(num);
459
460 return split_queue_offsets[idx];
461}
462
463static inline unsigned long tar_offset(unsigned char num)
464{
465 size_t idx = busno_to_phbid(num);
466
467 return tar_offsets[idx];
468}
469
470static inline unsigned long phb_offset(unsigned char num)
471{
472 size_t idx = busno_to_phbid(num);
473
474 return phb_offsets[idx];
475}
476
477static inline void __iomem* calgary_reg(void __iomem *bar, unsigned long offset)
478{
479 unsigned long target = ((unsigned long)bar) | offset;
480 return (void __iomem*)target;
481}
482
483static void tce_cache_blast(struct iommu_table *tbl)
484{
485 u64 val;
486 u32 aer;
487 int i = 0;
488 void __iomem *bbar = tbl->bbar;
489 void __iomem *target;
490
491 /* disable arbitration on the bus */
492 target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET);
493 aer = readl(target);
494 writel(0, target);
495
496 /* read plssr to ensure it got there */
497 target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_PLSSR_OFFSET);
498 val = readl(target);
499
500 /* poll split queues until all DMA activity is done */
501 target = calgary_reg(bbar, split_queue_offset(tbl->it_busno));
502 do {
503 val = readq(target);
504 i++;
505 } while ((val & 0xff) != 0xff && i < 100);
506 if (i == 100)
507 printk(KERN_WARNING "Calgary: PCI bus not quiesced, "
508 "continuing anyway\n");
509
510 /* invalidate TCE cache */
511 target = calgary_reg(bbar, tar_offset(tbl->it_busno));
512 writeq(tbl->tar_val, target);
513
514 /* enable arbitration */
515 target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET);
516 writel(aer, target);
517 (void)readl(target); /* flush */
518}
519
520static void __init calgary_reserve_mem_region(struct pci_dev *dev, u64 start,
521 u64 limit)
522{
523 unsigned int numpages;
524
525 limit = limit | 0xfffff;
526 limit++;
527
528 numpages = ((limit - start) >> PAGE_SHIFT);
529 iommu_range_reserve(dev->sysdata, start, numpages);
530}
531
532static void __init calgary_reserve_peripheral_mem_1(struct pci_dev *dev)
533{
534 void __iomem *target;
535 u64 low, high, sizelow;
536 u64 start, limit;
537 struct iommu_table *tbl = dev->sysdata;
538 unsigned char busnum = dev->bus->number;
539 void __iomem *bbar = tbl->bbar;
540
541 /* peripheral MEM_1 region */
542 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_LOW);
543 low = be32_to_cpu(readl(target));
544 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_HIGH);
545 high = be32_to_cpu(readl(target));
546 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_SIZE);
547 sizelow = be32_to_cpu(readl(target));
548
549 start = (high << 32) | low;
550 limit = sizelow;
551
552 calgary_reserve_mem_region(dev, start, limit);
553}
554
555static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev)
556{
557 void __iomem *target;
558 u32 val32;
559 u64 low, high, sizelow, sizehigh;
560 u64 start, limit;
561 struct iommu_table *tbl = dev->sysdata;
562 unsigned char busnum = dev->bus->number;
563 void __iomem *bbar = tbl->bbar;
564
565 /* is it enabled? */
566 target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
567 val32 = be32_to_cpu(readl(target));
568 if (!(val32 & PHB_MEM2_ENABLE))
569 return;
570
571 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_LOW);
572 low = be32_to_cpu(readl(target));
573 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_HIGH);
574 high = be32_to_cpu(readl(target));
575 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_LOW);
576 sizelow = be32_to_cpu(readl(target));
577 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_HIGH);
578 sizehigh = be32_to_cpu(readl(target));
579
580 start = (high << 32) | low;
581 limit = (sizehigh << 32) | sizelow;
582
583 calgary_reserve_mem_region(dev, start, limit);
584}
585
586/*
587 * some regions of the IO address space do not get translated, so we
588 * must not give devices IO addresses in those regions. The regions
589 * are the 640KB-1MB region and the two PCI peripheral memory holes.
590 * Reserve all of them in the IOMMU bitmap to avoid giving them out
591 * later.
592 */
593static void __init calgary_reserve_regions(struct pci_dev *dev)
594{
595 unsigned int npages;
596 void __iomem *bbar;
597 unsigned char busnum;
598 u64 start;
599 struct iommu_table *tbl = dev->sysdata;
600
601 bbar = tbl->bbar;
602 busnum = dev->bus->number;
603
604 /* reserve bad_dma_address in case it's a legal address */
605 iommu_range_reserve(tbl, bad_dma_address, 1);
606
607 /* avoid the BIOS/VGA first 640KB-1MB region */
608 start = (640 * 1024);
609 npages = ((1024 - 640) * 1024) >> PAGE_SHIFT;
610 iommu_range_reserve(tbl, start, npages);
611
612 /* reserve the two PCI peripheral memory regions in IO space */
613 calgary_reserve_peripheral_mem_1(dev);
614 calgary_reserve_peripheral_mem_2(dev);
615}
616
617static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
618{
619 u64 val64;
620 u64 table_phys;
621 void __iomem *target;
622 int ret;
623 struct iommu_table *tbl;
624
625 /* build TCE tables for each PHB */
626 ret = build_tce_table(dev, bbar);
627 if (ret)
628 return ret;
629
630 calgary_reserve_regions(dev);
631
632 /* set TARs for each PHB */
633 target = calgary_reg(bbar, tar_offset(dev->bus->number));
634 val64 = be64_to_cpu(readq(target));
635
636 /* zero out all TAR bits under sw control */
637 val64 &= ~TAR_SW_BITS;
638
639 tbl = dev->sysdata;
640 table_phys = (u64)__pa(tbl->it_base);
641 val64 |= table_phys;
642
643 BUG_ON(specified_table_size > TCE_TABLE_SIZE_8M);
644 val64 |= (u64) specified_table_size;
645
646 tbl->tar_val = cpu_to_be64(val64);
647 writeq(tbl->tar_val, target);
648 readq(target); /* flush */
649
650 return 0;
651}
652
653static void __init calgary_free_tar(struct pci_dev *dev)
654{
655 u64 val64;
656 struct iommu_table *tbl = dev->sysdata;
657 void __iomem *target;
658
659 target = calgary_reg(tbl->bbar, tar_offset(dev->bus->number));
660 val64 = be64_to_cpu(readq(target));
661 val64 &= ~TAR_SW_BITS;
662 writeq(cpu_to_be64(val64), target);
663 readq(target); /* flush */
664
665 kfree(tbl);
666 dev->sysdata = NULL;
667}
668
669static void calgary_watchdog(unsigned long data)
670{
671 struct pci_dev *dev = (struct pci_dev *)data;
672 struct iommu_table *tbl = dev->sysdata;
673 void __iomem *bbar = tbl->bbar;
674 u32 val32;
675 void __iomem *target;
676
677 target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_CSR_OFFSET);
678 val32 = be32_to_cpu(readl(target));
679
680 /* If no error, the agent ID in the CSR is not valid */
681 if (val32 & CSR_AGENT_MASK) {
682 printk(KERN_EMERG "calgary_watchdog: DMA error on bus %d, "
683 "CSR = %#x\n", dev->bus->number, val32);
684 writel(0, target);
685
686 /* Disable bus that caused the error */
687 target = calgary_reg(bbar, phb_offset(tbl->it_busno) |
688 PHB_CONFIG_RW_OFFSET);
689 val32 = be32_to_cpu(readl(target));
690 val32 |= PHB_SLOT_DISABLE;
691 writel(cpu_to_be32(val32), target);
692 readl(target); /* flush */
693 } else {
694 /* Reset the timer */
695 mod_timer(&tbl->watchdog_timer, jiffies + 2 * HZ);
696 }
697}
698
699static void __init calgary_enable_translation(struct pci_dev *dev)
700{
701 u32 val32;
702 unsigned char busnum;
703 void __iomem *target;
704 void __iomem *bbar;
705 struct iommu_table *tbl;
706
707 busnum = dev->bus->number;
708 tbl = dev->sysdata;
709 bbar = tbl->bbar;
710
711 /* enable TCE in PHB Config Register */
712 target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
713 val32 = be32_to_cpu(readl(target));
714 val32 |= PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE;
715
716 printk(KERN_INFO "Calgary: enabling translation on PHB %d\n", busnum);
717 printk(KERN_INFO "Calgary: errant DMAs will now be prevented on this "
718 "bus.\n");
719
720 writel(cpu_to_be32(val32), target);
721 readl(target); /* flush */
722
723 init_timer(&tbl->watchdog_timer);
724 tbl->watchdog_timer.function = &calgary_watchdog;
725 tbl->watchdog_timer.data = (unsigned long)dev;
726 mod_timer(&tbl->watchdog_timer, jiffies);
727}
728
729static void __init calgary_disable_translation(struct pci_dev *dev)
730{
731 u32 val32;
732 unsigned char busnum;
733 void __iomem *target;
734 void __iomem *bbar;
735 struct iommu_table *tbl;
736
737 busnum = dev->bus->number;
738 tbl = dev->sysdata;
739 bbar = tbl->bbar;
740
741 /* disable TCE in PHB Config Register */
742 target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
743 val32 = be32_to_cpu(readl(target));
744 val32 &= ~(PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE);
745
746 printk(KERN_INFO "Calgary: disabling translation on PHB %d!\n", busnum);
747 writel(cpu_to_be32(val32), target);
748 readl(target); /* flush */
749
750 del_timer_sync(&tbl->watchdog_timer);
751}
752
753static inline unsigned int __init locate_register_space(struct pci_dev *dev)
754{
755 int rionodeid;
756 u32 address;
757
758 rionodeid = (dev->bus->number % 15 > 4) ? 3 : 2;
759 /*
760 * register space address calculation as follows:
761 * FE0MB-8MB*OneBasedChassisNumber+1MB*(RioNodeId-ChassisBase)
762 * ChassisBase is always zero for x366/x260/x460
763 * RioNodeId is 2 for first Calgary, 3 for second Calgary
764 */
765 address = START_ADDRESS -
766 (0x800000 * (ONE_BASED_CHASSIS_NUM + dev->bus->number / 15)) +
767 (0x100000) * (rionodeid - CHASSIS_BASE);
768 return address;
769}
770
771static int __init calgary_init_one_nontraslated(struct pci_dev *dev)
772{
773 dev->sysdata = NULL;
774 dev->bus->self = dev;
775
776 return 0;
777}
778
779static int __init calgary_init_one(struct pci_dev *dev)
780{
781 u32 address;
782 void __iomem *bbar;
783 int ret;
784
785 address = locate_register_space(dev);
786 /* map entire 1MB of Calgary config space */
787 bbar = ioremap_nocache(address, 1024 * 1024);
788 if (!bbar) {
789 ret = -ENODATA;
790 goto done;
791 }
792
793 ret = calgary_setup_tar(dev, bbar);
794 if (ret)
795 goto iounmap;
796
797 dev->bus->self = dev;
798 calgary_enable_translation(dev);
799
800 return 0;
801
802iounmap:
803 iounmap(bbar);
804done:
805 return ret;
806}
807
808static int __init calgary_init(void)
809{
810 int i, ret = -ENODEV;
811 struct pci_dev *dev = NULL;
812
813 for (i = 0; i <= num_online_nodes() * MAX_NUM_OF_PHBS; i++) {
814 dev = pci_get_device(PCI_VENDOR_ID_IBM,
815 PCI_DEVICE_ID_IBM_CALGARY,
816 dev);
817 if (!dev)
818 break;
819 if (!translate_phb(dev)) {
820 calgary_init_one_nontraslated(dev);
821 continue;
822 }
823 if (!tce_table_kva[i] && !translate_empty_slots) {
824 pci_dev_put(dev);
825 continue;
826 }
827 ret = calgary_init_one(dev);
828 if (ret)
829 goto error;
830 }
831
832 return ret;
833
834error:
835 for (i--; i >= 0; i--) {
836 dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM,
837 PCI_DEVICE_ID_IBM_CALGARY,
838 dev);
839 if (!translate_phb(dev)) {
840 pci_dev_put(dev);
841 continue;
842 }
843 if (!tce_table_kva[i] && !translate_empty_slots)
844 continue;
845 calgary_disable_translation(dev);
846 calgary_free_tar(dev);
847 pci_dev_put(dev);
848 }
849
850 return ret;
851}
852
853static inline int __init determine_tce_table_size(u64 ram)
854{
855 int ret;
856
857 if (specified_table_size != TCE_TABLE_SIZE_UNSPECIFIED)
858 return specified_table_size;
859
860 /*
861 * Table sizes are from 0 to 7 (TCE_TABLE_SIZE_64K to
862 * TCE_TABLE_SIZE_8M). Table size 0 has 8K entries and each
863 * larger table size has twice as many entries, so shift the
864 * max ram address by 13 to divide by 8K and then look at the
865 * order of the result to choose between 0-7.
866 */
867 ret = get_order(ram >> 13);
868 if (ret > TCE_TABLE_SIZE_8M)
869 ret = TCE_TABLE_SIZE_8M;
870
871 return ret;
872}
873
874void __init detect_calgary(void)
875{
876 u32 val;
877 int bus, table_idx;
878 void *tbl;
879 int detected = 0;
880
881 /*
882 * if the user specified iommu=off or iommu=soft or we found
883 * another HW IOMMU already, bail out.
884 */
885 if (swiotlb || no_iommu || iommu_detected)
886 return;
887
888 specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
889
890 for (bus = 0, table_idx = 0;
891 bus <= num_online_nodes() * MAX_PHB_BUS_NUM;
892 bus++) {
893 BUG_ON(bus > MAX_NUMNODES * MAX_PHB_BUS_NUM);
894 if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY)
895 continue;
896 if (test_bit(bus, translation_disabled)) {
897 printk(KERN_INFO "Calgary: translation is disabled for "
898 "PHB 0x%x\n", bus);
899 /* skip this phb, don't allocate a tbl for it */
900 tce_table_kva[table_idx] = NULL;
901 table_idx++;
902 continue;
903 }
904 /*
905 * scan the first slot of the PCI bus to see if there
906 * are any devices present
907 */
908 val = read_pci_config(bus, 1, 0, 0);
909 if (val != 0xffffffff || translate_empty_slots) {
910 tbl = alloc_tce_table();
911 if (!tbl)
912 goto cleanup;
913 detected = 1;
914 } else
915 tbl = NULL;
916
917 tce_table_kva[table_idx] = tbl;
918 table_idx++;
919 }
920
921 if (detected) {
922 iommu_detected = 1;
923 calgary_detected = 1;
924 printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected. "
925 "TCE table spec is %d.\n", specified_table_size);
926 }
927 return;
928
929cleanup:
930 for (--table_idx; table_idx >= 0; --table_idx)
931 if (tce_table_kva[table_idx])
932 free_tce_table(tce_table_kva[table_idx]);
933}
934
935int __init calgary_iommu_init(void)
936{
937 int ret;
938
939 if (no_iommu || swiotlb)
940 return -ENODEV;
941
942 if (!calgary_detected)
943 return -ENODEV;
944
945 /* ok, we're trying to use Calgary - let's roll */
946 printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n");
947
948 ret = calgary_init();
949 if (ret) {
950 printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
951 "falling back to no_iommu\n", ret);
952 if (end_pfn > MAX_DMA32_PFN)
953 printk(KERN_ERR "WARNING more than 4GB of memory, "
954 "32bit PCI may malfunction.\n");
955 return ret;
956 }
957
958 force_iommu = 1;
959 dma_ops = &calgary_dma_ops;
960
961 return 0;
962}
963
964static int __init calgary_parse_options(char *p)
965{
966 unsigned int bridge;
967 size_t len;
968 char* endp;
969
970 while (*p) {
971 if (!strncmp(p, "64k", 3))
972 specified_table_size = TCE_TABLE_SIZE_64K;
973 else if (!strncmp(p, "128k", 4))
974 specified_table_size = TCE_TABLE_SIZE_128K;
975 else if (!strncmp(p, "256k", 4))
976 specified_table_size = TCE_TABLE_SIZE_256K;
977 else if (!strncmp(p, "512k", 4))
978 specified_table_size = TCE_TABLE_SIZE_512K;
979 else if (!strncmp(p, "1M", 2))
980 specified_table_size = TCE_TABLE_SIZE_1M;
981 else if (!strncmp(p, "2M", 2))
982 specified_table_size = TCE_TABLE_SIZE_2M;
983 else if (!strncmp(p, "4M", 2))
984 specified_table_size = TCE_TABLE_SIZE_4M;
985 else if (!strncmp(p, "8M", 2))
986 specified_table_size = TCE_TABLE_SIZE_8M;
987
988 len = strlen("translate_empty_slots");
989 if (!strncmp(p, "translate_empty_slots", len))
990 translate_empty_slots = 1;
991
992 len = strlen("disable");
993 if (!strncmp(p, "disable", len)) {
994 p += len;
995 if (*p == '=')
996 ++p;
997 if (*p == '\0')
998 break;
999 bridge = simple_strtol(p, &endp, 0);
1000 if (p == endp)
1001 break;
1002
1003 if (bridge <= (num_online_nodes() * MAX_PHB_BUS_NUM)) {
1004 printk(KERN_INFO "Calgary: disabling "
1005 "translation for PHB 0x%x\n", bridge);
1006 set_bit(bridge, translation_disabled);
1007 }
1008 }
1009
1010 p = strpbrk(p, ",");
1011 if (!p)
1012 break;
1013
1014 p++; /* skip ',' */
1015 }
1016 return 1;
1017}
1018__setup("calgary=", calgary_parse_options);
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
index a9275c9557cf..9c44f4f2433d 100644
--- a/arch/x86_64/kernel/pci-dma.c
+++ b/arch/x86_64/kernel/pci-dma.c
@@ -9,6 +9,7 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <asm/io.h> 10#include <asm/io.h>
11#include <asm/proto.h> 11#include <asm/proto.h>
12#include <asm/calgary.h>
12 13
13int iommu_merge __read_mostly = 0; 14int iommu_merge __read_mostly = 0;
14EXPORT_SYMBOL(iommu_merge); 15EXPORT_SYMBOL(iommu_merge);
@@ -33,12 +34,15 @@ int panic_on_overflow __read_mostly = 0;
33int force_iommu __read_mostly= 0; 34int force_iommu __read_mostly= 0;
34#endif 35#endif
35 36
37/* Set this to 1 if there is a HW IOMMU in the system */
38int iommu_detected __read_mostly = 0;
39
36/* Dummy device used for NULL arguments (normally ISA). Better would 40/* Dummy device used for NULL arguments (normally ISA). Better would
37 be probably a smaller DMA mask, but this is bug-to-bug compatible 41 be probably a smaller DMA mask, but this is bug-to-bug compatible
38 to i386. */ 42 to i386. */
39struct device fallback_dev = { 43struct device fallback_dev = {
40 .bus_id = "fallback device", 44 .bus_id = "fallback device",
41 .coherent_dma_mask = 0xffffffff, 45 .coherent_dma_mask = DMA_32BIT_MASK,
42 .dma_mask = &fallback_dev.coherent_dma_mask, 46 .dma_mask = &fallback_dev.coherent_dma_mask,
43}; 47};
44 48
@@ -77,7 +81,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
77 dev = &fallback_dev; 81 dev = &fallback_dev;
78 dma_mask = dev->coherent_dma_mask; 82 dma_mask = dev->coherent_dma_mask;
79 if (dma_mask == 0) 83 if (dma_mask == 0)
80 dma_mask = 0xffffffff; 84 dma_mask = DMA_32BIT_MASK;
81 85
82 /* Don't invoke OOM killer */ 86 /* Don't invoke OOM killer */
83 gfp |= __GFP_NORETRY; 87 gfp |= __GFP_NORETRY;
@@ -90,7 +94,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
90 larger than 16MB and in this case we have a chance of 94 larger than 16MB and in this case we have a chance of
91 finding fitting memory in the next higher zone first. If 95 finding fitting memory in the next higher zone first. If
92 not retry with true GFP_DMA. -AK */ 96 not retry with true GFP_DMA. -AK */
93 if (dma_mask <= 0xffffffff) 97 if (dma_mask <= DMA_32BIT_MASK)
94 gfp |= GFP_DMA32; 98 gfp |= GFP_DMA32;
95 99
96 again: 100 again:
@@ -111,7 +115,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
111 115
112 /* Don't use the 16MB ZONE_DMA unless absolutely 116 /* Don't use the 16MB ZONE_DMA unless absolutely
113 needed. It's better to use remapping first. */ 117 needed. It's better to use remapping first. */
114 if (dma_mask < 0xffffffff && !(gfp & GFP_DMA)) { 118 if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
115 gfp = (gfp & ~GFP_DMA32) | GFP_DMA; 119 gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
116 goto again; 120 goto again;
117 } 121 }
@@ -174,7 +178,7 @@ int dma_supported(struct device *dev, u64 mask)
174 /* Copied from i386. Doesn't make much sense, because it will 178 /* Copied from i386. Doesn't make much sense, because it will
175 only work for pci_alloc_coherent. 179 only work for pci_alloc_coherent.
176 The caller just has to use GFP_DMA in this case. */ 180 The caller just has to use GFP_DMA in this case. */
177 if (mask < 0x00ffffff) 181 if (mask < DMA_24BIT_MASK)
178 return 0; 182 return 0;
179 183
180 /* Tell the device to use SAC when IOMMU force is on. This 184 /* Tell the device to use SAC when IOMMU force is on. This
@@ -189,7 +193,7 @@ int dma_supported(struct device *dev, u64 mask)
189 SAC for these. Assume all masks <= 40 bits are of this 193 SAC for these. Assume all masks <= 40 bits are of this
190 type. Normally this doesn't make any difference, but gives 194 type. Normally this doesn't make any difference, but gives
191 more gentle handling of IOMMU overflow. */ 195 more gentle handling of IOMMU overflow. */
192 if (iommu_sac_force && (mask >= 0xffffffffffULL)) { 196 if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
193 printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask); 197 printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask);
194 return 0; 198 return 0;
195 } 199 }
@@ -266,7 +270,7 @@ __init int iommu_setup(char *p)
266 swiotlb = 1; 270 swiotlb = 1;
267#endif 271#endif
268 272
269#ifdef CONFIG_GART_IOMMU 273#ifdef CONFIG_IOMMU
270 gart_parse_options(p); 274 gart_parse_options(p);
271#endif 275#endif
272 276
@@ -276,3 +280,40 @@ __init int iommu_setup(char *p)
276 } 280 }
277 return 1; 281 return 1;
278} 282}
283__setup("iommu=", iommu_setup);
284
285void __init pci_iommu_alloc(void)
286{
287 /*
288 * The order of these functions is important for
289 * fall-back/fail-over reasons
290 */
291#ifdef CONFIG_IOMMU
292 iommu_hole_init();
293#endif
294
295#ifdef CONFIG_CALGARY_IOMMU
296 detect_calgary();
297#endif
298
299#ifdef CONFIG_SWIOTLB
300 pci_swiotlb_init();
301#endif
302}
303
304static int __init pci_iommu_init(void)
305{
306#ifdef CONFIG_CALGARY_IOMMU
307 calgary_iommu_init();
308#endif
309
310#ifdef CONFIG_IOMMU
311 gart_iommu_init();
312#endif
313
314 no_iommu_init();
315 return 0;
316}
317
318/* Must execute after PCI subsystem */
319fs_initcall(pci_iommu_init);
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 82a7c9bfdfa0..4ca674d16b09 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -32,6 +32,7 @@
32#include <asm/kdebug.h> 32#include <asm/kdebug.h>
33#include <asm/swiotlb.h> 33#include <asm/swiotlb.h>
34#include <asm/dma.h> 34#include <asm/dma.h>
35#include <asm/k8.h>
35 36
36unsigned long iommu_bus_base; /* GART remapping area (physical) */ 37unsigned long iommu_bus_base; /* GART remapping area (physical) */
37static unsigned long iommu_size; /* size of remapping area bytes */ 38static unsigned long iommu_size; /* size of remapping area bytes */
@@ -46,8 +47,6 @@ u32 *iommu_gatt_base; /* Remapping table */
46 also seen with Qlogic at least). */ 47 also seen with Qlogic at least). */
47int iommu_fullflush = 1; 48int iommu_fullflush = 1;
48 49
49#define MAX_NB 8
50
51/* Allocation bitmap for the remapping area */ 50/* Allocation bitmap for the remapping area */
52static DEFINE_SPINLOCK(iommu_bitmap_lock); 51static DEFINE_SPINLOCK(iommu_bitmap_lock);
53static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ 52static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
@@ -63,13 +62,6 @@ static u32 gart_unmapped_entry;
63#define to_pages(addr,size) \ 62#define to_pages(addr,size) \
64 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) 63 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
65 64
66#define for_all_nb(dev) \
67 dev = NULL; \
68 while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)
69
70static struct pci_dev *northbridges[MAX_NB];
71static u32 northbridge_flush_word[MAX_NB];
72
73#define EMERGENCY_PAGES 32 /* = 128KB */ 65#define EMERGENCY_PAGES 32 /* = 128KB */
74 66
75#ifdef CONFIG_AGP 67#ifdef CONFIG_AGP
@@ -93,7 +85,7 @@ static unsigned long alloc_iommu(int size)
93 offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size); 85 offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
94 if (offset == -1) { 86 if (offset == -1) {
95 need_flush = 1; 87 need_flush = 1;
96 offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size); 88 offset = find_next_zero_string(iommu_gart_bitmap,0,iommu_pages,size);
97 } 89 }
98 if (offset != -1) { 90 if (offset != -1) {
99 set_bit_string(iommu_gart_bitmap, offset, size); 91 set_bit_string(iommu_gart_bitmap, offset, size);
@@ -120,44 +112,17 @@ static void free_iommu(unsigned long offset, int size)
120/* 112/*
121 * Use global flush state to avoid races with multiple flushers. 113 * Use global flush state to avoid races with multiple flushers.
122 */ 114 */
123static void flush_gart(struct device *dev) 115static void flush_gart(void)
124{ 116{
125 unsigned long flags; 117 unsigned long flags;
126 int flushed = 0;
127 int i, max;
128
129 spin_lock_irqsave(&iommu_bitmap_lock, flags); 118 spin_lock_irqsave(&iommu_bitmap_lock, flags);
130 if (need_flush) { 119 if (need_flush) {
131 max = 0; 120 k8_flush_garts();
132 for (i = 0; i < MAX_NB; i++) {
133 if (!northbridges[i])
134 continue;
135 pci_write_config_dword(northbridges[i], 0x9c,
136 northbridge_flush_word[i] | 1);
137 flushed++;
138 max = i;
139 }
140 for (i = 0; i <= max; i++) {
141 u32 w;
142 if (!northbridges[i])
143 continue;
144 /* Make sure the hardware actually executed the flush. */
145 for (;;) {
146 pci_read_config_dword(northbridges[i], 0x9c, &w);
147 if (!(w & 1))
148 break;
149 cpu_relax();
150 }
151 }
152 if (!flushed)
153 printk("nothing to flush?\n");
154 need_flush = 0; 121 need_flush = 0;
155 } 122 }
156 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 123 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
157} 124}
158 125
159
160
161#ifdef CONFIG_IOMMU_LEAK 126#ifdef CONFIG_IOMMU_LEAK
162 127
163#define SET_LEAK(x) if (iommu_leak_tab) \ 128#define SET_LEAK(x) if (iommu_leak_tab) \
@@ -266,7 +231,7 @@ static dma_addr_t gart_map_simple(struct device *dev, char *buf,
266 size_t size, int dir) 231 size_t size, int dir)
267{ 232{
268 dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); 233 dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir);
269 flush_gart(dev); 234 flush_gart();
270 return map; 235 return map;
271} 236}
272 237
@@ -289,6 +254,28 @@ dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir)
289} 254}
290 255
291/* 256/*
257 * Free a DMA mapping.
258 */
259void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
260 size_t size, int direction)
261{
262 unsigned long iommu_page;
263 int npages;
264 int i;
265
266 if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
267 dma_addr >= iommu_bus_base + iommu_size)
268 return;
269 iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
270 npages = to_pages(dma_addr, size);
271 for (i = 0; i < npages; i++) {
272 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
273 CLEAR_LEAK(iommu_page + i);
274 }
275 free_iommu(iommu_page, npages);
276}
277
278/*
292 * Wrapper for pci_unmap_single working with scatterlists. 279 * Wrapper for pci_unmap_single working with scatterlists.
293 */ 280 */
294void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) 281void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
@@ -299,7 +286,7 @@ void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int di
299 struct scatterlist *s = &sg[i]; 286 struct scatterlist *s = &sg[i];
300 if (!s->dma_length || !s->length) 287 if (!s->dma_length || !s->length)
301 break; 288 break;
302 dma_unmap_single(dev, s->dma_address, s->dma_length, dir); 289 gart_unmap_single(dev, s->dma_address, s->dma_length, dir);
303 } 290 }
304} 291}
305 292
@@ -329,7 +316,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
329 s->dma_address = addr; 316 s->dma_address = addr;
330 s->dma_length = s->length; 317 s->dma_length = s->length;
331 } 318 }
332 flush_gart(dev); 319 flush_gart();
333 return nents; 320 return nents;
334} 321}
335 322
@@ -436,13 +423,13 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
436 if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0) 423 if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0)
437 goto error; 424 goto error;
438 out++; 425 out++;
439 flush_gart(dev); 426 flush_gart();
440 if (out < nents) 427 if (out < nents)
441 sg[out].dma_length = 0; 428 sg[out].dma_length = 0;
442 return out; 429 return out;
443 430
444error: 431error:
445 flush_gart(NULL); 432 flush_gart();
446 gart_unmap_sg(dev, sg, nents, dir); 433 gart_unmap_sg(dev, sg, nents, dir);
447 /* When it was forced or merged try again in a dumb way */ 434 /* When it was forced or merged try again in a dumb way */
448 if (force_iommu || iommu_merge) { 435 if (force_iommu || iommu_merge) {
@@ -458,28 +445,6 @@ error:
458 return 0; 445 return 0;
459} 446}
460 447
461/*
462 * Free a DMA mapping.
463 */
464void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
465 size_t size, int direction)
466{
467 unsigned long iommu_page;
468 int npages;
469 int i;
470
471 if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
472 dma_addr >= iommu_bus_base + iommu_size)
473 return;
474 iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
475 npages = to_pages(dma_addr, size);
476 for (i = 0; i < npages; i++) {
477 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
478 CLEAR_LEAK(iommu_page + i);
479 }
480 free_iommu(iommu_page, npages);
481}
482
483static int no_agp; 448static int no_agp;
484 449
485static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) 450static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
@@ -532,10 +497,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
532 void *gatt; 497 void *gatt;
533 unsigned aper_base, new_aper_base; 498 unsigned aper_base, new_aper_base;
534 unsigned aper_size, gatt_size, new_aper_size; 499 unsigned aper_size, gatt_size, new_aper_size;
535 500 int i;
501
536 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); 502 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
537 aper_size = aper_base = info->aper_size = 0; 503 aper_size = aper_base = info->aper_size = 0;
538 for_all_nb(dev) { 504 dev = NULL;
505 for (i = 0; i < num_k8_northbridges; i++) {
506 dev = k8_northbridges[i];
539 new_aper_base = read_aperture(dev, &new_aper_size); 507 new_aper_base = read_aperture(dev, &new_aper_size);
540 if (!new_aper_base) 508 if (!new_aper_base)
541 goto nommu; 509 goto nommu;
@@ -558,11 +526,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
558 panic("Cannot allocate GATT table"); 526 panic("Cannot allocate GATT table");
559 memset(gatt, 0, gatt_size); 527 memset(gatt, 0, gatt_size);
560 agp_gatt_table = gatt; 528 agp_gatt_table = gatt;
561 529
562 for_all_nb(dev) { 530 for (i = 0; i < num_k8_northbridges; i++) {
563 u32 ctl; 531 u32 ctl;
564 u32 gatt_reg; 532 u32 gatt_reg;
565 533
534 dev = k8_northbridges[i];
566 gatt_reg = __pa(gatt) >> 12; 535 gatt_reg = __pa(gatt) >> 12;
567 gatt_reg <<= 4; 536 gatt_reg <<= 4;
568 pci_write_config_dword(dev, 0x98, gatt_reg); 537 pci_write_config_dword(dev, 0x98, gatt_reg);
@@ -573,7 +542,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
573 542
574 pci_write_config_dword(dev, 0x90, ctl); 543 pci_write_config_dword(dev, 0x90, ctl);
575 } 544 }
576 flush_gart(NULL); 545 flush_gart();
577 546
578 printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); 547 printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10);
579 return 0; 548 return 0;
@@ -602,15 +571,19 @@ static struct dma_mapping_ops gart_dma_ops = {
602 .unmap_sg = gart_unmap_sg, 571 .unmap_sg = gart_unmap_sg,
603}; 572};
604 573
605static int __init pci_iommu_init(void) 574void __init gart_iommu_init(void)
606{ 575{
607 struct agp_kern_info info; 576 struct agp_kern_info info;
608 unsigned long aper_size; 577 unsigned long aper_size;
609 unsigned long iommu_start; 578 unsigned long iommu_start;
610 struct pci_dev *dev;
611 unsigned long scratch; 579 unsigned long scratch;
612 long i; 580 long i;
613 581
582 if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) {
583 printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n");
584 return;
585 }
586
614#ifndef CONFIG_AGP_AMD64 587#ifndef CONFIG_AGP_AMD64
615 no_agp = 1; 588 no_agp = 1;
616#else 589#else
@@ -622,7 +595,11 @@ static int __init pci_iommu_init(void)
622#endif 595#endif
623 596
624 if (swiotlb) 597 if (swiotlb)
625 return -1; 598 return;
599
600 /* Did we detect a different HW IOMMU? */
601 if (iommu_detected && !iommu_aperture)
602 return;
626 603
627 if (no_iommu || 604 if (no_iommu ||
628 (!force_iommu && end_pfn <= MAX_DMA32_PFN) || 605 (!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
@@ -634,15 +611,7 @@ static int __init pci_iommu_init(void)
634 "but IOMMU not available.\n" 611 "but IOMMU not available.\n"
635 KERN_ERR "WARNING 32bit PCI may malfunction.\n"); 612 KERN_ERR "WARNING 32bit PCI may malfunction.\n");
636 } 613 }
637 return -1; 614 return;
638 }
639
640 i = 0;
641 for_all_nb(dev)
642 i++;
643 if (i > MAX_NB) {
644 printk(KERN_ERR "PCI-GART: Too many northbridges (%ld). Disabled\n", i);
645 return -1;
646 } 615 }
647 616
648 printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); 617 printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
@@ -707,26 +676,10 @@ static int __init pci_iommu_init(void)
707 for (i = EMERGENCY_PAGES; i < iommu_pages; i++) 676 for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
708 iommu_gatt_base[i] = gart_unmapped_entry; 677 iommu_gatt_base[i] = gart_unmapped_entry;
709 678
710 for_all_nb(dev) { 679 flush_gart();
711 u32 flag;
712 int cpu = PCI_SLOT(dev->devfn) - 24;
713 if (cpu >= MAX_NB)
714 continue;
715 northbridges[cpu] = dev;
716 pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */
717 northbridge_flush_word[cpu] = flag;
718 }
719
720 flush_gart(NULL);
721
722 dma_ops = &gart_dma_ops; 680 dma_ops = &gart_dma_ops;
723
724 return 0;
725} 681}
726 682
727/* Must execute after PCI subsystem */
728fs_initcall(pci_iommu_init);
729
730void gart_parse_options(char *p) 683void gart_parse_options(char *p)
731{ 684{
732 int arg; 685 int arg;
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c
index 1f6ecc62061d..c4c3cc36ac5b 100644
--- a/arch/x86_64/kernel/pci-nommu.c
+++ b/arch/x86_64/kernel/pci-nommu.c
@@ -4,6 +4,8 @@
4#include <linux/init.h> 4#include <linux/init.h>
5#include <linux/pci.h> 5#include <linux/pci.h>
6#include <linux/string.h> 6#include <linux/string.h>
7#include <linux/dma-mapping.h>
8
7#include <asm/proto.h> 9#include <asm/proto.h>
8#include <asm/processor.h> 10#include <asm/processor.h>
9#include <asm/dma.h> 11#include <asm/dma.h>
@@ -12,10 +14,11 @@ static int
12check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) 14check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
13{ 15{
14 if (hwdev && bus + size > *hwdev->dma_mask) { 16 if (hwdev && bus + size > *hwdev->dma_mask) {
15 if (*hwdev->dma_mask >= 0xffffffffULL) 17 if (*hwdev->dma_mask >= DMA_32BIT_MASK)
16 printk(KERN_ERR 18 printk(KERN_ERR
17 "nommu_%s: overflow %Lx+%lu of device mask %Lx\n", 19 "nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
18 name, (long long)bus, size, (long long)*hwdev->dma_mask); 20 name, (long long)bus, size,
21 (long long)*hwdev->dma_mask);
19 return 0; 22 return 0;
20 } 23 }
21 return 1; 24 return 1;
diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c
index 990ed67896f2..ebdb77fe2057 100644
--- a/arch/x86_64/kernel/pci-swiotlb.c
+++ b/arch/x86_64/kernel/pci-swiotlb.c
@@ -31,7 +31,7 @@ struct dma_mapping_ops swiotlb_dma_ops = {
31void pci_swiotlb_init(void) 31void pci_swiotlb_init(void)
32{ 32{
33 /* don't initialize swiotlb if iommu=off (no_iommu=1) */ 33 /* don't initialize swiotlb if iommu=off (no_iommu=1) */
34 if (!iommu_aperture && !no_iommu && 34 if (!iommu_detected && !no_iommu &&
35 (end_pfn > MAX_DMA32_PFN || force_iommu)) 35 (end_pfn > MAX_DMA32_PFN || force_iommu))
36 swiotlb = 1; 36 swiotlb = 1;
37 if (swiotlb) { 37 if (swiotlb) {
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index fb903e65e079..ca56e19b8b6e 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -10,7 +10,6 @@
10 * Andi Kleen. 10 * Andi Kleen.
11 * 11 *
12 * CPU hotplug support - ashok.raj@intel.com 12 * CPU hotplug support - ashok.raj@intel.com
13 * $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $
14 */ 13 */
15 14
16/* 15/*
@@ -64,6 +63,7 @@ EXPORT_SYMBOL(boot_option_idle_override);
64 * Powermanagement idle function, if any.. 63 * Powermanagement idle function, if any..
65 */ 64 */
66void (*pm_idle)(void); 65void (*pm_idle)(void);
66EXPORT_SYMBOL(pm_idle);
67static DEFINE_PER_CPU(unsigned int, cpu_idle_state); 67static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
68 68
69static ATOMIC_NOTIFIER_HEAD(idle_notifier); 69static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -111,7 +111,7 @@ static void default_idle(void)
111{ 111{
112 local_irq_enable(); 112 local_irq_enable();
113 113
114 clear_thread_flag(TIF_POLLING_NRFLAG); 114 current_thread_info()->status &= ~TS_POLLING;
115 smp_mb__after_clear_bit(); 115 smp_mb__after_clear_bit();
116 while (!need_resched()) { 116 while (!need_resched()) {
117 local_irq_disable(); 117 local_irq_disable();
@@ -120,7 +120,7 @@ static void default_idle(void)
120 else 120 else
121 local_irq_enable(); 121 local_irq_enable();
122 } 122 }
123 set_thread_flag(TIF_POLLING_NRFLAG); 123 current_thread_info()->status |= TS_POLLING;
124} 124}
125 125
126/* 126/*
@@ -203,8 +203,7 @@ static inline void play_dead(void)
203 */ 203 */
204void cpu_idle (void) 204void cpu_idle (void)
205{ 205{
206 set_thread_flag(TIF_POLLING_NRFLAG); 206 current_thread_info()->status |= TS_POLLING;
207
208 /* endless idle loop with no priority at all */ 207 /* endless idle loop with no priority at all */
209 while (1) { 208 while (1) {
210 while (!need_resched()) { 209 while (!need_resched()) {
@@ -335,7 +334,7 @@ void show_regs(struct pt_regs *regs)
335{ 334{
336 printk("CPU %d:", smp_processor_id()); 335 printk("CPU %d:", smp_processor_id());
337 __show_regs(regs); 336 __show_regs(regs);
338 show_trace(&regs->rsp); 337 show_trace(NULL, regs, (void *)(regs + 1));
339} 338}
340 339
341/* 340/*
@@ -365,8 +364,11 @@ void flush_thread(void)
365 struct task_struct *tsk = current; 364 struct task_struct *tsk = current;
366 struct thread_info *t = current_thread_info(); 365 struct thread_info *t = current_thread_info();
367 366
368 if (t->flags & _TIF_ABI_PENDING) 367 if (t->flags & _TIF_ABI_PENDING) {
369 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32); 368 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
369 if (t->flags & _TIF_IA32)
370 current_thread_info()->status |= TS_COMPAT;
371 }
370 372
371 tsk->thread.debugreg0 = 0; 373 tsk->thread.debugreg0 = 0;
372 tsk->thread.debugreg1 = 0; 374 tsk->thread.debugreg1 = 0;
diff --git a/arch/x86_64/kernel/reboot.c b/arch/x86_64/kernel/reboot.c
index 57117b8beb2b..2d6769847456 100644
--- a/arch/x86_64/kernel/reboot.c
+++ b/arch/x86_64/kernel/reboot.c
@@ -20,6 +20,7 @@
20 * Power off function, if any 20 * Power off function, if any
21 */ 21 */
22void (*pm_power_off)(void); 22void (*pm_power_off)(void);
23EXPORT_SYMBOL(pm_power_off);
23 24
24static long no_idt[3]; 25static long no_idt[3];
25static enum { 26static enum {
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 143c65031539..1129918ede82 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * Nov 2001 Dave Jones <davej@suse.de> 6 * Nov 2001 Dave Jones <davej@suse.de>
7 * Forked from i386 setup code. 7 * Forked from i386 setup code.
8 *
9 * $Id$
10 */ 8 */
11 9
12/* 10/*
@@ -65,9 +63,7 @@
65#include <asm/setup.h> 63#include <asm/setup.h>
66#include <asm/mach_apic.h> 64#include <asm/mach_apic.h>
67#include <asm/numa.h> 65#include <asm/numa.h>
68#include <asm/swiotlb.h>
69#include <asm/sections.h> 66#include <asm/sections.h>
70#include <asm/gart-mapping.h>
71#include <asm/dmi.h> 67#include <asm/dmi.h>
72 68
73/* 69/*
@@ -75,6 +71,7 @@
75 */ 71 */
76 72
77struct cpuinfo_x86 boot_cpu_data __read_mostly; 73struct cpuinfo_x86 boot_cpu_data __read_mostly;
74EXPORT_SYMBOL(boot_cpu_data);
78 75
79unsigned long mmu_cr4_features; 76unsigned long mmu_cr4_features;
80 77
@@ -103,6 +100,7 @@ char dmi_alloc_data[DMI_MAX_DATA];
103 * Setup options 100 * Setup options
104 */ 101 */
105struct screen_info screen_info; 102struct screen_info screen_info;
103EXPORT_SYMBOL(screen_info);
106struct sys_desc_table_struct { 104struct sys_desc_table_struct {
107 unsigned short length; 105 unsigned short length;
108 unsigned char table[0]; 106 unsigned char table[0];
@@ -474,80 +472,6 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
474} 472}
475#endif 473#endif
476 474
477/* Use inline assembly to define this because the nops are defined
478 as inline assembly strings in the include files and we cannot
479 get them easily into strings. */
480asm("\t.data\nk8nops: "
481 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
482 K8_NOP7 K8_NOP8);
483
484extern unsigned char k8nops[];
485static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
486 NULL,
487 k8nops,
488 k8nops + 1,
489 k8nops + 1 + 2,
490 k8nops + 1 + 2 + 3,
491 k8nops + 1 + 2 + 3 + 4,
492 k8nops + 1 + 2 + 3 + 4 + 5,
493 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
494 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
495};
496
497extern char __vsyscall_0;
498
499/* Replace instructions with better alternatives for this CPU type.
500
501 This runs before SMP is initialized to avoid SMP problems with
502 self modifying code. This implies that assymetric systems where
503 APs have less capabilities than the boot processor are not handled.
504 In this case boot with "noreplacement". */
505void apply_alternatives(void *start, void *end)
506{
507 struct alt_instr *a;
508 int diff, i, k;
509 for (a = start; (void *)a < end; a++) {
510 u8 *instr;
511
512 if (!boot_cpu_has(a->cpuid))
513 continue;
514
515 BUG_ON(a->replacementlen > a->instrlen);
516 instr = a->instr;
517 /* vsyscall code is not mapped yet. resolve it manually. */
518 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END)
519 instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
520 __inline_memcpy(instr, a->replacement, a->replacementlen);
521 diff = a->instrlen - a->replacementlen;
522
523 /* Pad the rest with nops */
524 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
525 k = diff;
526 if (k > ASM_NOP_MAX)
527 k = ASM_NOP_MAX;
528 __inline_memcpy(instr + i, k8_nops[k], k);
529 }
530 }
531}
532
533static int no_replacement __initdata = 0;
534
535void __init alternative_instructions(void)
536{
537 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
538 if (no_replacement)
539 return;
540 apply_alternatives(__alt_instructions, __alt_instructions_end);
541}
542
543static int __init noreplacement_setup(char *s)
544{
545 no_replacement = 1;
546 return 1;
547}
548
549__setup("noreplacement", noreplacement_setup);
550
551#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) 475#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
552struct edd edd; 476struct edd edd;
553#ifdef CONFIG_EDD_MODULE 477#ifdef CONFIG_EDD_MODULE
@@ -780,10 +704,6 @@ void __init setup_arch(char **cmdline_p)
780 704
781 e820_setup_gap(); 705 e820_setup_gap();
782 706
783#ifdef CONFIG_GART_IOMMU
784 iommu_hole_init();
785#endif
786
787#ifdef CONFIG_VT 707#ifdef CONFIG_VT
788#if defined(CONFIG_VGA_CONSOLE) 708#if defined(CONFIG_VGA_CONSOLE)
789 conswitchp = &vga_con; 709 conswitchp = &vga_con;
@@ -868,24 +788,32 @@ static int nearby_node(int apicid)
868static void __init amd_detect_cmp(struct cpuinfo_x86 *c) 788static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
869{ 789{
870#ifdef CONFIG_SMP 790#ifdef CONFIG_SMP
871 int cpu = smp_processor_id();
872 unsigned bits; 791 unsigned bits;
873#ifdef CONFIG_NUMA 792#ifdef CONFIG_NUMA
793 int cpu = smp_processor_id();
874 int node = 0; 794 int node = 0;
875 unsigned apicid = hard_smp_processor_id(); 795 unsigned apicid = hard_smp_processor_id();
876#endif 796#endif
797 unsigned ecx = cpuid_ecx(0x80000008);
798
799 c->x86_max_cores = (ecx & 0xff) + 1;
877 800
878 bits = 0; 801 /* CPU telling us the core id bits shift? */
879 while ((1 << bits) < c->x86_max_cores) 802 bits = (ecx >> 12) & 0xF;
880 bits++; 803
804 /* Otherwise recompute */
805 if (bits == 0) {
806 while ((1 << bits) < c->x86_max_cores)
807 bits++;
808 }
881 809
882 /* Low order bits define the core id (index of core in socket) */ 810 /* Low order bits define the core id (index of core in socket) */
883 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1); 811 c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
884 /* Convert the APIC ID into the socket ID */ 812 /* Convert the APIC ID into the socket ID */
885 phys_proc_id[cpu] = phys_pkg_id(bits); 813 c->phys_proc_id = phys_pkg_id(bits);
886 814
887#ifdef CONFIG_NUMA 815#ifdef CONFIG_NUMA
888 node = phys_proc_id[cpu]; 816 node = c->phys_proc_id;
889 if (apicid_to_node[apicid] != NUMA_NO_NODE) 817 if (apicid_to_node[apicid] != NUMA_NO_NODE)
890 node = apicid_to_node[apicid]; 818 node = apicid_to_node[apicid];
891 if (!node_online(node)) { 819 if (!node_online(node)) {
@@ -898,7 +826,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
898 but in the same order as the HT nodeids. 826 but in the same order as the HT nodeids.
899 If that doesn't result in a usable node fall back to the 827 If that doesn't result in a usable node fall back to the
900 path for the previous case. */ 828 path for the previous case. */
901 int ht_nodeid = apicid - (phys_proc_id[0] << bits); 829 int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits);
902 if (ht_nodeid >= 0 && 830 if (ht_nodeid >= 0 &&
903 apicid_to_node[ht_nodeid] != NUMA_NO_NODE) 831 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
904 node = apicid_to_node[ht_nodeid]; 832 node = apicid_to_node[ht_nodeid];
@@ -908,15 +836,13 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
908 } 836 }
909 numa_set_node(cpu, node); 837 numa_set_node(cpu, node);
910 838
911 printk(KERN_INFO "CPU %d/%x(%d) -> Node %d -> Core %d\n", 839 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
912 cpu, apicid, c->x86_max_cores, node, cpu_core_id[cpu]);
913#endif 840#endif
914#endif 841#endif
915} 842}
916 843
917static int __init init_amd(struct cpuinfo_x86 *c) 844static void __init init_amd(struct cpuinfo_x86 *c)
918{ 845{
919 int r;
920 unsigned level; 846 unsigned level;
921 847
922#ifdef CONFIG_SMP 848#ifdef CONFIG_SMP
@@ -949,8 +875,8 @@ static int __init init_amd(struct cpuinfo_x86 *c)
949 if (c->x86 >= 6) 875 if (c->x86 >= 6)
950 set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability); 876 set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
951 877
952 r = get_model_name(c); 878 level = get_model_name(c);
953 if (!r) { 879 if (!level) {
954 switch (c->x86) { 880 switch (c->x86) {
955 case 15: 881 case 15:
956 /* Should distinguish Models here, but this is only 882 /* Should distinguish Models here, but this is only
@@ -965,13 +891,12 @@ static int __init init_amd(struct cpuinfo_x86 *c)
965 if (c->x86_power & (1<<8)) 891 if (c->x86_power & (1<<8))
966 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); 892 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
967 893
968 if (c->extended_cpuid_level >= 0x80000008) { 894 /* Multi core CPU? */
969 c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; 895 if (c->extended_cpuid_level >= 0x80000008)
970
971 amd_detect_cmp(c); 896 amd_detect_cmp(c);
972 }
973 897
974 return r; 898 /* Fix cpuid4 emulation for more */
899 num_cache_leaves = 3;
975} 900}
976 901
977static void __cpuinit detect_ht(struct cpuinfo_x86 *c) 902static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -979,13 +904,14 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
979#ifdef CONFIG_SMP 904#ifdef CONFIG_SMP
980 u32 eax, ebx, ecx, edx; 905 u32 eax, ebx, ecx, edx;
981 int index_msb, core_bits; 906 int index_msb, core_bits;
982 int cpu = smp_processor_id();
983 907
984 cpuid(1, &eax, &ebx, &ecx, &edx); 908 cpuid(1, &eax, &ebx, &ecx, &edx);
985 909
986 910
987 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) 911 if (!cpu_has(c, X86_FEATURE_HT))
988 return; 912 return;
913 if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
914 goto out;
989 915
990 smp_num_siblings = (ebx & 0xff0000) >> 16; 916 smp_num_siblings = (ebx & 0xff0000) >> 16;
991 917
@@ -1000,10 +926,7 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
1000 } 926 }
1001 927
1002 index_msb = get_count_order(smp_num_siblings); 928 index_msb = get_count_order(smp_num_siblings);
1003 phys_proc_id[cpu] = phys_pkg_id(index_msb); 929 c->phys_proc_id = phys_pkg_id(index_msb);
1004
1005 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
1006 phys_proc_id[cpu]);
1007 930
1008 smp_num_siblings = smp_num_siblings / c->x86_max_cores; 931 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
1009 932
@@ -1011,13 +934,15 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
1011 934
1012 core_bits = get_count_order(c->x86_max_cores); 935 core_bits = get_count_order(c->x86_max_cores);
1013 936
1014 cpu_core_id[cpu] = phys_pkg_id(index_msb) & 937 c->cpu_core_id = phys_pkg_id(index_msb) &
1015 ((1 << core_bits) - 1); 938 ((1 << core_bits) - 1);
1016
1017 if (c->x86_max_cores > 1)
1018 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
1019 cpu_core_id[cpu]);
1020 } 939 }
940out:
941 if ((c->x86_max_cores * smp_num_siblings) > 1) {
942 printk(KERN_INFO "CPU: Physical Processor ID: %d\n", c->phys_proc_id);
943 printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id);
944 }
945
1021#endif 946#endif
1022} 947}
1023 948
@@ -1026,15 +951,12 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
1026 */ 951 */
1027static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) 952static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
1028{ 953{
1029 unsigned int eax; 954 unsigned int eax, t;
1030 955
1031 if (c->cpuid_level < 4) 956 if (c->cpuid_level < 4)
1032 return 1; 957 return 1;
1033 958
1034 __asm__("cpuid" 959 cpuid_count(4, 0, &eax, &t, &t, &t);
1035 : "=a" (eax)
1036 : "0" (4), "c" (0)
1037 : "bx", "dx");
1038 960
1039 if (eax & 0x1f) 961 if (eax & 0x1f)
1040 return ((eax >> 26) + 1); 962 return ((eax >> 26) + 1);
@@ -1047,16 +969,17 @@ static void srat_detect_node(void)
1047#ifdef CONFIG_NUMA 969#ifdef CONFIG_NUMA
1048 unsigned node; 970 unsigned node;
1049 int cpu = smp_processor_id(); 971 int cpu = smp_processor_id();
972 int apicid = hard_smp_processor_id();
1050 973
1051 /* Don't do the funky fallback heuristics the AMD version employs 974 /* Don't do the funky fallback heuristics the AMD version employs
1052 for now. */ 975 for now. */
1053 node = apicid_to_node[hard_smp_processor_id()]; 976 node = apicid_to_node[apicid];
1054 if (node == NUMA_NO_NODE) 977 if (node == NUMA_NO_NODE)
1055 node = first_node(node_online_map); 978 node = first_node(node_online_map);
1056 numa_set_node(cpu, node); 979 numa_set_node(cpu, node);
1057 980
1058 if (acpi_numa > 0) 981 if (acpi_numa > 0)
1059 printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node); 982 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
1060#endif 983#endif
1061} 984}
1062 985
@@ -1066,6 +989,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
1066 unsigned n; 989 unsigned n;
1067 990
1068 init_intel_cacheinfo(c); 991 init_intel_cacheinfo(c);
992 if (c->cpuid_level > 9 ) {
993 unsigned eax = cpuid_eax(10);
994 /* Check for version and the number of counters */
995 if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
996 set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
997 }
998
1069 n = c->extended_cpuid_level; 999 n = c->extended_cpuid_level;
1070 if (n >= 0x80000008) { 1000 if (n >= 0x80000008) {
1071 unsigned eax = cpuid_eax(0x80000008); 1001 unsigned eax = cpuid_eax(0x80000008);
@@ -1157,7 +1087,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
1157 } 1087 }
1158 1088
1159#ifdef CONFIG_SMP 1089#ifdef CONFIG_SMP
1160 phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff; 1090 c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
1161#endif 1091#endif
1162} 1092}
1163 1093
@@ -1284,7 +1214,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
1284 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1214 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1285 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, 1215 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
1286 NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL, 1216 NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
1287 NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow", 1217 NULL, "fxsr_opt", NULL, "rdtscp", NULL, "lm", "3dnowext", "3dnow",
1288 1218
1289 /* Transmeta-defined */ 1219 /* Transmeta-defined */
1290 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, 1220 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
@@ -1295,7 +1225,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
1295 /* Other (Linux-defined) */ 1225 /* Other (Linux-defined) */
1296 "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL, 1226 "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
1297 "constant_tsc", NULL, NULL, 1227 "constant_tsc", NULL, NULL,
1298 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1228 "up", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1299 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1229 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1300 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1230 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1301 1231
@@ -1365,9 +1295,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
1365#ifdef CONFIG_SMP 1295#ifdef CONFIG_SMP
1366 if (smp_num_siblings * c->x86_max_cores > 1) { 1296 if (smp_num_siblings * c->x86_max_cores > 1) {
1367 int cpu = c - cpu_data; 1297 int cpu = c - cpu_data;
1368 seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]); 1298 seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
1369 seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu])); 1299 seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
1370 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]); 1300 seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
1371 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); 1301 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
1372 } 1302 }
1373#endif 1303#endif
@@ -1441,7 +1371,7 @@ struct seq_operations cpuinfo_op = {
1441 .show = show_cpuinfo, 1371 .show = show_cpuinfo,
1442}; 1372};
1443 1373
1444#ifdef CONFIG_INPUT_PCSPKR 1374#if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE)
1445#include <linux/platform_device.h> 1375#include <linux/platform_device.h>
1446static __init int add_pcspkr(void) 1376static __init int add_pcspkr(void)
1447{ 1377{
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index 8a691fa6d393..f5934cb4a2b6 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -3,7 +3,6 @@
3 * Copyright (C) 1995 Linus Torvalds 3 * Copyright (C) 1995 Linus Torvalds
4 * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen. 4 * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen.
5 * See setup.c for older changelog. 5 * See setup.c for older changelog.
6 * $Id: setup64.c,v 1.12 2002/03/21 10:09:17 ak Exp $
7 */ 6 */
8#include <linux/config.h> 7#include <linux/config.h>
9#include <linux/init.h> 8#include <linux/init.h>
@@ -31,6 +30,7 @@ char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
31cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; 30cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
32 31
33struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; 32struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
33EXPORT_SYMBOL(_cpu_pda);
34struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; 34struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
35 35
36struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; 36struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
@@ -38,6 +38,7 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
38char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); 38char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
39 39
40unsigned long __supported_pte_mask __read_mostly = ~0UL; 40unsigned long __supported_pte_mask __read_mostly = ~0UL;
41EXPORT_SYMBOL(__supported_pte_mask);
41static int do_not_nx __cpuinitdata = 0; 42static int do_not_nx __cpuinitdata = 0;
42 43
43/* noexec=on|off 44/* noexec=on|off
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
index e5f5ce7909a3..28161170fb0a 100644
--- a/arch/x86_64/kernel/signal.c
+++ b/arch/x86_64/kernel/signal.c
@@ -7,8 +7,6 @@
7 * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson 7 * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
8 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes 8 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
9 * 2000-2002 x86-64 support by Andi Kleen 9 * 2000-2002 x86-64 support by Andi Kleen
10 *
11 * $Id: signal.c,v 1.18 2001/10/17 22:30:37 ak Exp $
12 */ 10 */
13 11
14#include <linux/sched.h> 12#include <linux/sched.h>
@@ -239,7 +237,6 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
239 rsp = regs->rsp - 128; 237 rsp = regs->rsp - 128;
240 238
241 /* This is the X/Open sanctioned signal stack switching. */ 239 /* This is the X/Open sanctioned signal stack switching. */
242 /* RED-PEN: redzone on that stack? */
243 if (ka->sa.sa_flags & SA_ONSTACK) { 240 if (ka->sa.sa_flags & SA_ONSTACK) {
244 if (sas_ss_flags(rsp) == 0) 241 if (sas_ss_flags(rsp) == 0)
245 rsp = current->sas_ss_sp + current->sas_ss_size; 242 rsp = current->sas_ss_sp + current->sas_ss_size;
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index 4a6628b14d99..8188bae9c6d5 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -224,6 +224,7 @@ void flush_tlb_current_task(void)
224 flush_tlb_others(cpu_mask, mm, FLUSH_ALL); 224 flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
225 preempt_enable(); 225 preempt_enable();
226} 226}
227EXPORT_SYMBOL(flush_tlb_current_task);
227 228
228void flush_tlb_mm (struct mm_struct * mm) 229void flush_tlb_mm (struct mm_struct * mm)
229{ 230{
@@ -244,6 +245,7 @@ void flush_tlb_mm (struct mm_struct * mm)
244 245
245 preempt_enable(); 246 preempt_enable();
246} 247}
248EXPORT_SYMBOL(flush_tlb_mm);
247 249
248void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) 250void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
249{ 251{
@@ -266,6 +268,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
266 268
267 preempt_enable(); 269 preempt_enable();
268} 270}
271EXPORT_SYMBOL(flush_tlb_page);
269 272
270static void do_flush_tlb_all(void* info) 273static void do_flush_tlb_all(void* info)
271{ 274{
@@ -443,6 +446,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
443 spin_unlock(&call_lock); 446 spin_unlock(&call_lock);
444 return 0; 447 return 0;
445} 448}
449EXPORT_SYMBOL(smp_call_function);
446 450
447void smp_stop_cpu(void) 451void smp_stop_cpu(void)
448{ 452{
@@ -460,7 +464,7 @@ static void smp_really_stop_cpu(void *dummy)
460{ 464{
461 smp_stop_cpu(); 465 smp_stop_cpu();
462 for (;;) 466 for (;;)
463 asm("hlt"); 467 halt();
464} 468}
465 469
466void smp_send_stop(void) 470void smp_send_stop(void)
@@ -520,13 +524,13 @@ asmlinkage void smp_call_function_interrupt(void)
520 524
521int safe_smp_processor_id(void) 525int safe_smp_processor_id(void)
522{ 526{
523 int apicid, i; 527 unsigned apicid, i;
524 528
525 if (disable_apic) 529 if (disable_apic)
526 return 0; 530 return 0;
527 531
528 apicid = hard_smp_processor_id(); 532 apicid = hard_smp_processor_id();
529 if (x86_cpu_to_apicid[apicid] == apicid) 533 if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid)
530 return apicid; 534 return apicid;
531 535
532 for (i = 0; i < NR_CPUS; ++i) { 536 for (i = 0; i < NR_CPUS; ++i) {
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 71a7222cf9ce..4e9755179ecf 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -63,13 +63,11 @@
63 63
64/* Number of siblings per CPU package */ 64/* Number of siblings per CPU package */
65int smp_num_siblings = 1; 65int smp_num_siblings = 1;
66/* Package ID of each logical CPU */ 66EXPORT_SYMBOL(smp_num_siblings);
67u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
68/* core ID of each logical CPU */
69u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
70 67
71/* Last level cache ID of each logical CPU */ 68/* Last level cache ID of each logical CPU */
72u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; 69u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
70EXPORT_SYMBOL(cpu_llc_id);
73 71
74/* Bitmask of currently online CPUs */ 72/* Bitmask of currently online CPUs */
75cpumask_t cpu_online_map __read_mostly; 73cpumask_t cpu_online_map __read_mostly;
@@ -82,18 +80,21 @@ EXPORT_SYMBOL(cpu_online_map);
82 */ 80 */
83cpumask_t cpu_callin_map; 81cpumask_t cpu_callin_map;
84cpumask_t cpu_callout_map; 82cpumask_t cpu_callout_map;
83EXPORT_SYMBOL(cpu_callout_map);
85 84
86cpumask_t cpu_possible_map; 85cpumask_t cpu_possible_map;
87EXPORT_SYMBOL(cpu_possible_map); 86EXPORT_SYMBOL(cpu_possible_map);
88 87
89/* Per CPU bogomips and other parameters */ 88/* Per CPU bogomips and other parameters */
90struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; 89struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
90EXPORT_SYMBOL(cpu_data);
91 91
92/* Set when the idlers are all forked */ 92/* Set when the idlers are all forked */
93int smp_threads_ready; 93int smp_threads_ready;
94 94
95/* representing HT siblings of each logical CPU */ 95/* representing HT siblings of each logical CPU */
96cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; 96cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
97EXPORT_SYMBOL(cpu_sibling_map);
97 98
98/* representing HT and core siblings of each logical CPU */ 99/* representing HT and core siblings of each logical CPU */
99cpumask_t cpu_core_map[NR_CPUS] __read_mostly; 100cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
@@ -472,8 +473,8 @@ static inline void set_cpu_sibling_map(int cpu)
472 473
473 if (smp_num_siblings > 1) { 474 if (smp_num_siblings > 1) {
474 for_each_cpu_mask(i, cpu_sibling_setup_map) { 475 for_each_cpu_mask(i, cpu_sibling_setup_map) {
475 if (phys_proc_id[cpu] == phys_proc_id[i] && 476 if (c[cpu].phys_proc_id == c[i].phys_proc_id &&
476 cpu_core_id[cpu] == cpu_core_id[i]) { 477 c[cpu].cpu_core_id == c[i].cpu_core_id) {
477 cpu_set(i, cpu_sibling_map[cpu]); 478 cpu_set(i, cpu_sibling_map[cpu]);
478 cpu_set(cpu, cpu_sibling_map[i]); 479 cpu_set(cpu, cpu_sibling_map[i]);
479 cpu_set(i, cpu_core_map[cpu]); 480 cpu_set(i, cpu_core_map[cpu]);
@@ -500,7 +501,7 @@ static inline void set_cpu_sibling_map(int cpu)
500 cpu_set(i, c[cpu].llc_shared_map); 501 cpu_set(i, c[cpu].llc_shared_map);
501 cpu_set(cpu, c[i].llc_shared_map); 502 cpu_set(cpu, c[i].llc_shared_map);
502 } 503 }
503 if (phys_proc_id[cpu] == phys_proc_id[i]) { 504 if (c[cpu].phys_proc_id == c[i].phys_proc_id) {
504 cpu_set(i, cpu_core_map[cpu]); 505 cpu_set(i, cpu_core_map[cpu]);
505 cpu_set(cpu, cpu_core_map[i]); 506 cpu_set(cpu, cpu_core_map[i]);
506 /* 507 /*
@@ -797,6 +798,8 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
797 } 798 }
798 799
799 800
801 alternatives_smp_switch(1);
802
800 c_idle.idle = get_idle_for_cpu(cpu); 803 c_idle.idle = get_idle_for_cpu(cpu);
801 804
802 if (c_idle.idle) { 805 if (c_idle.idle) {
@@ -1199,8 +1202,8 @@ static void remove_siblinginfo(int cpu)
1199 cpu_clear(cpu, cpu_sibling_map[sibling]); 1202 cpu_clear(cpu, cpu_sibling_map[sibling]);
1200 cpus_clear(cpu_sibling_map[cpu]); 1203 cpus_clear(cpu_sibling_map[cpu]);
1201 cpus_clear(cpu_core_map[cpu]); 1204 cpus_clear(cpu_core_map[cpu]);
1202 phys_proc_id[cpu] = BAD_APICID; 1205 c[cpu].phys_proc_id = 0;
1203 cpu_core_id[cpu] = BAD_APICID; 1206 c[cpu].cpu_core_id = 0;
1204 cpu_clear(cpu, cpu_sibling_setup_map); 1207 cpu_clear(cpu, cpu_sibling_setup_map);
1205} 1208}
1206 1209
@@ -1259,6 +1262,8 @@ void __cpu_die(unsigned int cpu)
1259 /* They ack this in play_dead by setting CPU_DEAD */ 1262 /* They ack this in play_dead by setting CPU_DEAD */
1260 if (per_cpu(cpu_state, cpu) == CPU_DEAD) { 1263 if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
1261 printk ("CPU %d is now offline\n", cpu); 1264 printk ("CPU %d is now offline\n", cpu);
1265 if (1 == num_online_cpus())
1266 alternatives_smp_switch(0);
1262 return; 1267 return;
1263 } 1268 }
1264 msleep(100); 1269 msleep(100);
diff --git a/arch/x86_64/kernel/tce.c b/arch/x86_64/kernel/tce.c
new file mode 100644
index 000000000000..8d4c67f61b8e
--- /dev/null
+++ b/arch/x86_64/kernel/tce.c
@@ -0,0 +1,202 @@
1/*
2 * Derived from arch/powerpc/platforms/pseries/iommu.c
3 *
4 * Copyright (C) 2006 Jon Mason <jdmason@us.ibm.com>, IBM Corporation
5 * Copyright (C) 2006 Muli Ben-Yehuda <muli@il.ibm.com>, IBM Corporation
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include <linux/config.h>
23#include <linux/types.h>
24#include <linux/slab.h>
25#include <linux/mm.h>
26#include <linux/spinlock.h>
27#include <linux/string.h>
28#include <linux/pci.h>
29#include <linux/dma-mapping.h>
30#include <linux/bootmem.h>
31#include <asm/tce.h>
32#include <asm/calgary.h>
33#include <asm/proto.h>
34
35/* flush a tce at 'tceaddr' to main memory */
36static inline void flush_tce(void* tceaddr)
37{
38 /* a single tce can't cross a cache line */
39 if (cpu_has_clflush)
40 asm volatile("clflush (%0)" :: "r" (tceaddr));
41 else
42 asm volatile("wbinvd":::"memory");
43}
44
45void tce_build(struct iommu_table *tbl, unsigned long index,
46 unsigned int npages, unsigned long uaddr, int direction)
47{
48 u64* tp;
49 u64 t;
50 u64 rpn;
51
52 t = (1 << TCE_READ_SHIFT);
53 if (direction != DMA_TO_DEVICE)
54 t |= (1 << TCE_WRITE_SHIFT);
55
56 tp = ((u64*)tbl->it_base) + index;
57
58 while (npages--) {
59 rpn = (virt_to_bus((void*)uaddr)) >> PAGE_SHIFT;
60 t &= ~TCE_RPN_MASK;
61 t |= (rpn << TCE_RPN_SHIFT);
62
63 *tp = cpu_to_be64(t);
64 flush_tce(tp);
65
66 uaddr += PAGE_SIZE;
67 tp++;
68 }
69}
70
71void tce_free(struct iommu_table *tbl, long index, unsigned int npages)
72{
73 u64* tp;
74
75 tp = ((u64*)tbl->it_base) + index;
76
77 while (npages--) {
78 *tp = cpu_to_be64(0);
79 flush_tce(tp);
80 tp++;
81 }
82}
83
84static inline unsigned int table_size_to_number_of_entries(unsigned char size)
85{
86 /*
87 * size is the order of the table, 0-7
88 * smallest table is 8K entries, so shift result by 13 to
89 * multiply by 8K
90 */
91 return (1 << size) << 13;
92}
93
94static int tce_table_setparms(struct pci_dev *dev, struct iommu_table *tbl)
95{
96 unsigned int bitmapsz;
97 unsigned int tce_table_index;
98 unsigned long bmppages;
99 int ret;
100
101 tbl->it_busno = dev->bus->number;
102
103 /* set the tce table size - measured in entries */
104 tbl->it_size = table_size_to_number_of_entries(specified_table_size);
105
106 tce_table_index = bus_to_phb(tbl->it_busno);
107 tbl->it_base = (unsigned long)tce_table_kva[tce_table_index];
108 if (!tbl->it_base) {
109 printk(KERN_ERR "Calgary: iommu_table_setparms: "
110 "no table allocated?!\n");
111 ret = -ENOMEM;
112 goto done;
113 }
114
115 /*
116 * number of bytes needed for the bitmap size in number of
117 * entries; we need one bit per entry
118 */
119 bitmapsz = tbl->it_size / BITS_PER_BYTE;
120 bmppages = __get_free_pages(GFP_KERNEL, get_order(bitmapsz));
121 if (!bmppages) {
122 printk(KERN_ERR "Calgary: cannot allocate bitmap\n");
123 ret = -ENOMEM;
124 goto done;
125 }
126
127 tbl->it_map = (unsigned long*)bmppages;
128
129 memset(tbl->it_map, 0, bitmapsz);
130
131 tbl->it_hint = 0;
132
133 spin_lock_init(&tbl->it_lock);
134
135 return 0;
136
137done:
138 return ret;
139}
140
141int build_tce_table(struct pci_dev *dev, void __iomem *bbar)
142{
143 struct iommu_table *tbl;
144 int ret;
145
146 if (dev->sysdata) {
147 printk(KERN_ERR "Calgary: dev %p has sysdata %p\n",
148 dev, dev->sysdata);
149 BUG();
150 }
151
152 tbl = kzalloc(sizeof(struct iommu_table), GFP_KERNEL);
153 if (!tbl) {
154 printk(KERN_ERR "Calgary: error allocating iommu_table\n");
155 ret = -ENOMEM;
156 goto done;
157 }
158
159 ret = tce_table_setparms(dev, tbl);
160 if (ret)
161 goto free_tbl;
162
163 tce_free(tbl, 0, tbl->it_size);
164
165 tbl->bbar = bbar;
166
167 /*
168 * NUMA is already using the bus's sysdata pointer, so we use
169 * the bus's pci_dev's sysdata instead.
170 */
171 dev->sysdata = tbl;
172
173 return 0;
174
175free_tbl:
176 kfree(tbl);
177done:
178 return ret;
179}
180
181void* alloc_tce_table(void)
182{
183 unsigned int size;
184
185 size = table_size_to_number_of_entries(specified_table_size);
186 size *= TCE_ENTRY_SIZE;
187
188 return __alloc_bootmem_low(size, size, 0);
189}
190
191void free_tce_table(void *tbl)
192{
193 unsigned int size;
194
195 if (!tbl)
196 return;
197
198 size = table_size_to_number_of_entries(specified_table_size);
199 size *= TCE_ENTRY_SIZE;
200
201 free_bootmem(__pa(tbl), size);
202}
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 7392570f975d..ebbee6f59ff5 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -8,7 +8,7 @@
8 * Copyright (c) 1995 Markus Kuhn 8 * Copyright (c) 1995 Markus Kuhn
9 * Copyright (c) 1996 Ingo Molnar 9 * Copyright (c) 1996 Ingo Molnar
10 * Copyright (c) 1998 Andrea Arcangeli 10 * Copyright (c) 1998 Andrea Arcangeli
11 * Copyright (c) 2002 Vojtech Pavlik 11 * Copyright (c) 2002,2006 Vojtech Pavlik
12 * Copyright (c) 2003 Andi Kleen 12 * Copyright (c) 2003 Andi Kleen
13 * RTC support code taken from arch/i386/kernel/timers/time_hpet.c 13 * RTC support code taken from arch/i386/kernel/timers/time_hpet.c
14 */ 14 */
@@ -51,14 +51,21 @@ extern int using_apic_timer;
51static char *time_init_gtod(void); 51static char *time_init_gtod(void);
52 52
53DEFINE_SPINLOCK(rtc_lock); 53DEFINE_SPINLOCK(rtc_lock);
54EXPORT_SYMBOL(rtc_lock);
54DEFINE_SPINLOCK(i8253_lock); 55DEFINE_SPINLOCK(i8253_lock);
55 56
56int nohpet __initdata = 0; 57int nohpet __initdata = 0;
57static int notsc __initdata = 0; 58static int notsc __initdata = 0;
58 59
59#undef HPET_HACK_ENABLE_DANGEROUS 60#define USEC_PER_TICK (USEC_PER_SEC / HZ)
61#define NSEC_PER_TICK (NSEC_PER_SEC / HZ)
62#define FSEC_PER_TICK (FSEC_PER_SEC / HZ)
63
64#define NS_SCALE 10 /* 2^10, carefully chosen */
65#define US_SCALE 32 /* 2^32, arbitralrily chosen */
60 66
61unsigned int cpu_khz; /* TSC clocks / usec, not used here */ 67unsigned int cpu_khz; /* TSC clocks / usec, not used here */
68EXPORT_SYMBOL(cpu_khz);
62static unsigned long hpet_period; /* fsecs / HPET clock */ 69static unsigned long hpet_period; /* fsecs / HPET clock */
63unsigned long hpet_tick; /* HPET clocks / interrupt */ 70unsigned long hpet_tick; /* HPET clocks / interrupt */
64int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ 71int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */
@@ -90,7 +97,7 @@ static inline unsigned int do_gettimeoffset_tsc(void)
90 t = get_cycles_sync(); 97 t = get_cycles_sync();
91 if (t < vxtime.last_tsc) 98 if (t < vxtime.last_tsc)
92 t = vxtime.last_tsc; /* hack */ 99 t = vxtime.last_tsc; /* hack */
93 x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32; 100 x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE;
94 return x; 101 return x;
95} 102}
96 103
@@ -98,7 +105,7 @@ static inline unsigned int do_gettimeoffset_hpet(void)
98{ 105{
99 /* cap counter read to one tick to avoid inconsistencies */ 106 /* cap counter read to one tick to avoid inconsistencies */
100 unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last; 107 unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last;
101 return (min(counter,hpet_tick) * vxtime.quot) >> 32; 108 return (min(counter,hpet_tick) * vxtime.quot) >> US_SCALE;
102} 109}
103 110
104unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc; 111unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
@@ -118,7 +125,7 @@ void do_gettimeofday(struct timeval *tv)
118 seq = read_seqbegin(&xtime_lock); 125 seq = read_seqbegin(&xtime_lock);
119 126
120 sec = xtime.tv_sec; 127 sec = xtime.tv_sec;
121 usec = xtime.tv_nsec / 1000; 128 usec = xtime.tv_nsec / NSEC_PER_USEC;
122 129
123 /* i386 does some correction here to keep the clock 130 /* i386 does some correction here to keep the clock
124 monotonous even when ntpd is fixing drift. 131 monotonous even when ntpd is fixing drift.
@@ -129,14 +136,14 @@ void do_gettimeofday(struct timeval *tv)
129 in arch/x86_64/kernel/vsyscall.c and export all needed 136 in arch/x86_64/kernel/vsyscall.c and export all needed
130 variables in vmlinux.lds. -AK */ 137 variables in vmlinux.lds. -AK */
131 138
132 t = (jiffies - wall_jiffies) * (1000000L / HZ) + 139 t = (jiffies - wall_jiffies) * USEC_PER_TICK +
133 do_gettimeoffset(); 140 do_gettimeoffset();
134 usec += t; 141 usec += t;
135 142
136 } while (read_seqretry(&xtime_lock, seq)); 143 } while (read_seqretry(&xtime_lock, seq));
137 144
138 tv->tv_sec = sec + usec / 1000000; 145 tv->tv_sec = sec + usec / USEC_PER_SEC;
139 tv->tv_usec = usec % 1000000; 146 tv->tv_usec = usec % USEC_PER_SEC;
140} 147}
141 148
142EXPORT_SYMBOL(do_gettimeofday); 149EXPORT_SYMBOL(do_gettimeofday);
@@ -157,8 +164,8 @@ int do_settimeofday(struct timespec *tv)
157 164
158 write_seqlock_irq(&xtime_lock); 165 write_seqlock_irq(&xtime_lock);
159 166
160 nsec -= do_gettimeoffset() * 1000 + 167 nsec -= do_gettimeoffset() * NSEC_PER_USEC +
161 (jiffies - wall_jiffies) * (NSEC_PER_SEC/HZ); 168 (jiffies - wall_jiffies) * NSEC_PER_TICK;
162 169
163 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); 170 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
164 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); 171 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
@@ -288,7 +295,7 @@ unsigned long long monotonic_clock(void)
288 this_offset = hpet_readl(HPET_COUNTER); 295 this_offset = hpet_readl(HPET_COUNTER);
289 } while (read_seqretry(&xtime_lock, seq)); 296 } while (read_seqretry(&xtime_lock, seq));
290 offset = (this_offset - last_offset); 297 offset = (this_offset - last_offset);
291 offset *= (NSEC_PER_SEC/HZ) / hpet_tick; 298 offset *= NSEC_PER_TICK / hpet_tick;
292 } else { 299 } else {
293 do { 300 do {
294 seq = read_seqbegin(&xtime_lock); 301 seq = read_seqbegin(&xtime_lock);
@@ -297,7 +304,8 @@ unsigned long long monotonic_clock(void)
297 base = monotonic_base; 304 base = monotonic_base;
298 } while (read_seqretry(&xtime_lock, seq)); 305 } while (read_seqretry(&xtime_lock, seq));
299 this_offset = get_cycles_sync(); 306 this_offset = get_cycles_sync();
300 offset = (this_offset - last_offset)*1000 / cpu_khz; 307 /* FIXME: 1000 or 1000000? */
308 offset = (this_offset - last_offset)*1000 / cpu_khz;
301 } 309 }
302 return base + offset; 310 return base + offset;
303} 311}
@@ -382,7 +390,7 @@ void main_timer_handler(struct pt_regs *regs)
382 } 390 }
383 391
384 monotonic_base += 392 monotonic_base +=
385 (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick; 393 (offset - vxtime.last) * NSEC_PER_TICK / hpet_tick;
386 394
387 vxtime.last = offset; 395 vxtime.last = offset;
388#ifdef CONFIG_X86_PM_TIMER 396#ifdef CONFIG_X86_PM_TIMER
@@ -391,24 +399,25 @@ void main_timer_handler(struct pt_regs *regs)
391#endif 399#endif
392 } else { 400 } else {
393 offset = (((tsc - vxtime.last_tsc) * 401 offset = (((tsc - vxtime.last_tsc) *
394 vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ); 402 vxtime.tsc_quot) >> US_SCALE) - USEC_PER_TICK;
395 403
396 if (offset < 0) 404 if (offset < 0)
397 offset = 0; 405 offset = 0;
398 406
399 if (offset > (USEC_PER_SEC / HZ)) { 407 if (offset > USEC_PER_TICK) {
400 lost = offset / (USEC_PER_SEC / HZ); 408 lost = offset / USEC_PER_TICK;
401 offset %= (USEC_PER_SEC / HZ); 409 offset %= USEC_PER_TICK;
402 } 410 }
403 411
404 monotonic_base += (tsc - vxtime.last_tsc)*1000000/cpu_khz ; 412 /* FIXME: 1000 or 1000000? */
413 monotonic_base += (tsc - vxtime.last_tsc) * 1000000 / cpu_khz;
405 414
406 vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot; 415 vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
407 416
408 if ((((tsc - vxtime.last_tsc) * 417 if ((((tsc - vxtime.last_tsc) *
409 vxtime.tsc_quot) >> 32) < offset) 418 vxtime.tsc_quot) >> US_SCALE) < offset)
410 vxtime.last_tsc = tsc - 419 vxtime.last_tsc = tsc -
411 (((long) offset << 32) / vxtime.tsc_quot) - 1; 420 (((long) offset << US_SCALE) / vxtime.tsc_quot) - 1;
412 } 421 }
413 422
414 if (lost > 0) { 423 if (lost > 0) {
@@ -468,16 +477,15 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
468} 477}
469 478
470static unsigned int cyc2ns_scale __read_mostly; 479static unsigned int cyc2ns_scale __read_mostly;
471#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
472 480
473static inline void set_cyc2ns_scale(unsigned long cpu_khz) 481static inline void set_cyc2ns_scale(unsigned long cpu_khz)
474{ 482{
475 cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; 483 cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
476} 484}
477 485
478static inline unsigned long long cycles_2_ns(unsigned long long cyc) 486static inline unsigned long long cycles_2_ns(unsigned long long cyc)
479{ 487{
480 return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; 488 return (cyc * cyc2ns_scale) >> NS_SCALE;
481} 489}
482 490
483unsigned long long sched_clock(void) 491unsigned long long sched_clock(void)
@@ -490,7 +498,7 @@ unsigned long long sched_clock(void)
490 Disadvantage is a small drift between CPUs in some configurations, 498 Disadvantage is a small drift between CPUs in some configurations,
491 but that should be tolerable. */ 499 but that should be tolerable. */
492 if (__vxtime.mode == VXTIME_HPET) 500 if (__vxtime.mode == VXTIME_HPET)
493 return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> 32; 501 return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> US_SCALE;
494#endif 502#endif
495 503
496 /* Could do CPU core sync here. Opteron can execute rdtsc speculatively, 504 /* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
@@ -633,7 +641,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
633 641
634 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); 642 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
635 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 643 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
636 vxtime.tsc_quot = (1000L << 32) / cpu_khz; 644 vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
637 } 645 }
638 646
639 set_cyc2ns_scale(cpu_khz_ref); 647 set_cyc2ns_scale(cpu_khz_ref);
@@ -789,8 +797,8 @@ static int hpet_timer_stop_set_go(unsigned long tick)
789 if (hpet_use_timer) { 797 if (hpet_use_timer) {
790 hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | 798 hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
791 HPET_TN_32BIT, HPET_T0_CFG); 799 HPET_TN_32BIT, HPET_T0_CFG);
792 hpet_writel(hpet_tick, HPET_T0_CMP); 800 hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */
793 hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */ 801 hpet_writel(hpet_tick, HPET_T0_CMP); /* period */
794 cfg |= HPET_CFG_LEGACY; 802 cfg |= HPET_CFG_LEGACY;
795 } 803 }
796/* 804/*
@@ -825,8 +833,7 @@ static int hpet_init(void)
825 if (hpet_period < 100000 || hpet_period > 100000000) 833 if (hpet_period < 100000 || hpet_period > 100000000)
826 return -1; 834 return -1;
827 835
828 hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) / 836 hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period;
829 hpet_period;
830 837
831 hpet_use_timer = (id & HPET_ID_LEGSUP); 838 hpet_use_timer = (id & HPET_ID_LEGSUP);
832 839
@@ -890,18 +897,6 @@ void __init time_init(void)
890 char *timename; 897 char *timename;
891 char *gtod; 898 char *gtod;
892 899
893#ifdef HPET_HACK_ENABLE_DANGEROUS
894 if (!vxtime.hpet_address) {
895 printk(KERN_WARNING "time.c: WARNING: Enabling HPET base "
896 "manually!\n");
897 outl(0x800038a0, 0xcf8);
898 outl(0xff000001, 0xcfc);
899 outl(0x800038a0, 0xcf8);
900 vxtime.hpet_address = inl(0xcfc) & 0xfffffffe;
901 printk(KERN_WARNING "time.c: WARNING: Enabled HPET "
902 "at %#lx.\n", vxtime.hpet_address);
903 }
904#endif
905 if (nohpet) 900 if (nohpet)
906 vxtime.hpet_address = 0; 901 vxtime.hpet_address = 0;
907 902
@@ -912,7 +907,7 @@ void __init time_init(void)
912 -xtime.tv_sec, -xtime.tv_nsec); 907 -xtime.tv_sec, -xtime.tv_nsec);
913 908
914 if (!hpet_init()) 909 if (!hpet_init())
915 vxtime_hz = (1000000000000000L + hpet_period / 2) / hpet_period; 910 vxtime_hz = (FSEC_PER_SEC + hpet_period / 2) / hpet_period;
916 else 911 else
917 vxtime.hpet_address = 0; 912 vxtime.hpet_address = 0;
918 913
@@ -941,8 +936,8 @@ void __init time_init(void)
941 vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod); 936 vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod);
942 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", 937 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
943 cpu_khz / 1000, cpu_khz % 1000); 938 cpu_khz / 1000, cpu_khz % 1000);
944 vxtime.quot = (1000000L << 32) / vxtime_hz; 939 vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
945 vxtime.tsc_quot = (1000L << 32) / cpu_khz; 940 vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
946 vxtime.last_tsc = get_cycles_sync(); 941 vxtime.last_tsc = get_cycles_sync();
947 setup_irq(0, &irq0); 942 setup_irq(0, &irq0);
948 943
@@ -956,10 +951,10 @@ void __init time_init(void)
956__cpuinit int unsynchronized_tsc(void) 951__cpuinit int unsynchronized_tsc(void)
957{ 952{
958#ifdef CONFIG_SMP 953#ifdef CONFIG_SMP
959 if (oem_force_hpet_timer()) 954 if (apic_is_clustered_box())
960 return 1; 955 return 1;
961 /* Intel systems are normally all synchronized. Exceptions 956 /* Intel systems are normally all synchronized. Exceptions
962 are handled in the OEM check above. */ 957 are handled in the check above. */
963 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 958 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
964 return 0; 959 return 0;
965#endif 960#endif
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index cea335e8746c..3d11a2fe45b7 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -6,8 +6,6 @@
6 * 6 *
7 * Pentium III FXSR, SSE support 7 * Pentium III FXSR, SSE support
8 * Gareth Hughes <gareth@valinux.com>, May 2000 8 * Gareth Hughes <gareth@valinux.com>, May 2000
9 *
10 * $Id: traps.c,v 1.36 2002/03/24 11:09:10 ak Exp $
11 */ 9 */
12 10
13/* 11/*
@@ -31,6 +29,7 @@
31#include <linux/nmi.h> 29#include <linux/nmi.h>
32#include <linux/kprobes.h> 30#include <linux/kprobes.h>
33#include <linux/kexec.h> 31#include <linux/kexec.h>
32#include <linux/unwind.h>
34 33
35#include <asm/system.h> 34#include <asm/system.h>
36#include <asm/uaccess.h> 35#include <asm/uaccess.h>
@@ -41,7 +40,7 @@
41#include <asm/i387.h> 40#include <asm/i387.h>
42#include <asm/kdebug.h> 41#include <asm/kdebug.h>
43#include <asm/processor.h> 42#include <asm/processor.h>
44 43#include <asm/unwind.h>
45#include <asm/smp.h> 44#include <asm/smp.h>
46#include <asm/pgalloc.h> 45#include <asm/pgalloc.h>
47#include <asm/pda.h> 46#include <asm/pda.h>
@@ -71,6 +70,7 @@ asmlinkage void machine_check(void);
71asmlinkage void spurious_interrupt_bug(void); 70asmlinkage void spurious_interrupt_bug(void);
72 71
73ATOMIC_NOTIFIER_HEAD(die_chain); 72ATOMIC_NOTIFIER_HEAD(die_chain);
73EXPORT_SYMBOL(die_chain);
74 74
75int register_die_notifier(struct notifier_block *nb) 75int register_die_notifier(struct notifier_block *nb)
76{ 76{
@@ -107,7 +107,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
107 preempt_enable_no_resched(); 107 preempt_enable_no_resched();
108} 108}
109 109
110static int kstack_depth_to_print = 10; 110static int kstack_depth_to_print = 12;
111static int call_trace = 1;
111 112
112#ifdef CONFIG_KALLSYMS 113#ifdef CONFIG_KALLSYMS
113#include <linux/kallsyms.h> 114#include <linux/kallsyms.h>
@@ -191,6 +192,25 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
191 return NULL; 192 return NULL;
192} 193}
193 194
195static int show_trace_unwind(struct unwind_frame_info *info, void *context)
196{
197 int i = 11, n = 0;
198
199 while (unwind(info) == 0 && UNW_PC(info)) {
200 ++n;
201 if (i > 50) {
202 printk("\n ");
203 i = 7;
204 } else
205 i += printk(" ");
206 i += printk_address(UNW_PC(info));
207 if (arch_unw_user_mode(info))
208 break;
209 }
210 printk("\n");
211 return n;
212}
213
194/* 214/*
195 * x86-64 can have upto three kernel stacks: 215 * x86-64 can have upto three kernel stacks:
196 * process stack 216 * process stack
@@ -198,15 +218,39 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
198 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack 218 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
199 */ 219 */
200 220
201void show_trace(unsigned long *stack) 221void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack)
202{ 222{
203 const unsigned cpu = safe_smp_processor_id(); 223 const unsigned cpu = safe_smp_processor_id();
204 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; 224 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
205 int i; 225 int i = 11;
206 unsigned used = 0; 226 unsigned used = 0;
207 227
208 printk("\nCall Trace:"); 228 printk("\nCall Trace:");
209 229
230 if (!tsk)
231 tsk = current;
232
233 if (call_trace >= 0) {
234 int unw_ret = 0;
235 struct unwind_frame_info info;
236
237 if (regs) {
238 if (unwind_init_frame_info(&info, tsk, regs) == 0)
239 unw_ret = show_trace_unwind(&info, NULL);
240 } else if (tsk == current)
241 unw_ret = unwind_init_running(&info, show_trace_unwind, NULL);
242 else {
243 if (unwind_init_blocked(&info, tsk) == 0)
244 unw_ret = show_trace_unwind(&info, NULL);
245 }
246 if (unw_ret > 0) {
247 if (call_trace > 0)
248 return;
249 printk("Legacy call trace:");
250 i = 18;
251 }
252 }
253
210#define HANDLE_STACK(cond) \ 254#define HANDLE_STACK(cond) \
211 do while (cond) { \ 255 do while (cond) { \
212 unsigned long addr = *stack++; \ 256 unsigned long addr = *stack++; \
@@ -229,7 +273,7 @@ void show_trace(unsigned long *stack)
229 } \ 273 } \
230 } while (0) 274 } while (0)
231 275
232 for(i = 11; ; ) { 276 for(; ; ) {
233 const char *id; 277 const char *id;
234 unsigned long *estack_end; 278 unsigned long *estack_end;
235 estack_end = in_exception_stack(cpu, (unsigned long)stack, 279 estack_end = in_exception_stack(cpu, (unsigned long)stack,
@@ -264,7 +308,7 @@ void show_trace(unsigned long *stack)
264 printk("\n"); 308 printk("\n");
265} 309}
266 310
267void show_stack(struct task_struct *tsk, unsigned long * rsp) 311static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp)
268{ 312{
269 unsigned long *stack; 313 unsigned long *stack;
270 int i; 314 int i;
@@ -298,7 +342,12 @@ void show_stack(struct task_struct *tsk, unsigned long * rsp)
298 printk("%016lx ", *stack++); 342 printk("%016lx ", *stack++);
299 touch_nmi_watchdog(); 343 touch_nmi_watchdog();
300 } 344 }
301 show_trace((unsigned long *)rsp); 345 show_trace(tsk, regs, rsp);
346}
347
348void show_stack(struct task_struct *tsk, unsigned long * rsp)
349{
350 _show_stack(tsk, NULL, rsp);
302} 351}
303 352
304/* 353/*
@@ -307,7 +356,7 @@ void show_stack(struct task_struct *tsk, unsigned long * rsp)
307void dump_stack(void) 356void dump_stack(void)
308{ 357{
309 unsigned long dummy; 358 unsigned long dummy;
310 show_trace(&dummy); 359 show_trace(NULL, NULL, &dummy);
311} 360}
312 361
313EXPORT_SYMBOL(dump_stack); 362EXPORT_SYMBOL(dump_stack);
@@ -334,7 +383,7 @@ void show_registers(struct pt_regs *regs)
334 if (in_kernel) { 383 if (in_kernel) {
335 384
336 printk("Stack: "); 385 printk("Stack: ");
337 show_stack(NULL, (unsigned long*)rsp); 386 _show_stack(NULL, regs, (unsigned long*)rsp);
338 387
339 printk("\nCode: "); 388 printk("\nCode: ");
340 if (regs->rip < PAGE_OFFSET) 389 if (regs->rip < PAGE_OFFSET)
@@ -383,6 +432,7 @@ void out_of_line_bug(void)
383{ 432{
384 BUG(); 433 BUG();
385} 434}
435EXPORT_SYMBOL(out_of_line_bug);
386#endif 436#endif
387 437
388static DEFINE_SPINLOCK(die_lock); 438static DEFINE_SPINLOCK(die_lock);
@@ -1012,3 +1062,14 @@ static int __init kstack_setup(char *s)
1012} 1062}
1013__setup("kstack=", kstack_setup); 1063__setup("kstack=", kstack_setup);
1014 1064
1065static int __init call_trace_setup(char *s)
1066{
1067 if (strcmp(s, "old") == 0)
1068 call_trace = -1;
1069 else if (strcmp(s, "both") == 0)
1070 call_trace = 0;
1071 else if (strcmp(s, "new") == 0)
1072 call_trace = 1;
1073 return 1;
1074}
1075__setup("call_trace=", call_trace_setup);
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index b81f473c4a19..1c6a5f322919 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -45,6 +45,15 @@ SECTIONS
45 45
46 RODATA 46 RODATA
47 47
48#ifdef CONFIG_STACK_UNWIND
49 . = ALIGN(8);
50 .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
51 __start_unwind = .;
52 *(.eh_frame)
53 __end_unwind = .;
54 }
55#endif
56
48 /* Data */ 57 /* Data */
49 .data : AT(ADDR(.data) - LOAD_OFFSET) { 58 .data : AT(ADDR(.data) - LOAD_OFFSET) {
50 *(.data) 59 *(.data)
@@ -131,6 +140,26 @@ SECTIONS
131 *(.data.page_aligned) 140 *(.data.page_aligned)
132 } 141 }
133 142
143 /* might get freed after init */
144 . = ALIGN(4096);
145 __smp_alt_begin = .;
146 __smp_alt_instructions = .;
147 .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) {
148 *(.smp_altinstructions)
149 }
150 __smp_alt_instructions_end = .;
151 . = ALIGN(8);
152 __smp_locks = .;
153 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
154 *(.smp_locks)
155 }
156 __smp_locks_end = .;
157 .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) {
158 *(.smp_altinstr_replacement)
159 }
160 . = ALIGN(4096);
161 __smp_alt_end = .;
162
134 . = ALIGN(4096); /* Init code and data */ 163 . = ALIGN(4096); /* Init code and data */
135 __init_begin = .; 164 __init_begin = .;
136 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { 165 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index 9468fb20b0bc..f603037df162 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -107,7 +107,7 @@ static __always_inline long time_syscall(long *t)
107 107
108int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) 108int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
109{ 109{
110 if (unlikely(!__sysctl_vsyscall)) 110 if (!__sysctl_vsyscall)
111 return gettimeofday(tv,tz); 111 return gettimeofday(tv,tz);
112 if (tv) 112 if (tv)
113 do_vgettimeofday(tv); 113 do_vgettimeofday(tv);
@@ -120,7 +120,7 @@ int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
120 * unlikely */ 120 * unlikely */
121time_t __vsyscall(1) vtime(time_t *t) 121time_t __vsyscall(1) vtime(time_t *t)
122{ 122{
123 if (unlikely(!__sysctl_vsyscall)) 123 if (!__sysctl_vsyscall)
124 return time_syscall(t); 124 return time_syscall(t);
125 else if (t) 125 else if (t)
126 *t = __xtime.tv_sec; 126 *t = __xtime.tv_sec;
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c
index 1def21c9f7cd..370952c4ff22 100644
--- a/arch/x86_64/kernel/x8664_ksyms.c
+++ b/arch/x86_64/kernel/x8664_ksyms.c
@@ -1,66 +1,21 @@
1/* Exports for assembly files.
2 All C exports should go in the respective C files. */
3
1#include <linux/config.h> 4#include <linux/config.h>
2#include <linux/module.h> 5#include <linux/module.h>
3#include <linux/smp.h> 6#include <linux/smp.h>
4#include <linux/user.h>
5#include <linux/sched.h>
6#include <linux/in6.h>
7#include <linux/interrupt.h>
8#include <linux/smp_lock.h>
9#include <linux/pm.h>
10#include <linux/pci.h>
11#include <linux/apm_bios.h>
12#include <linux/kernel.h>
13#include <linux/string.h>
14#include <linux/syscalls.h>
15#include <linux/tty.h>
16 7
17#include <asm/semaphore.h> 8#include <asm/semaphore.h>
18#include <asm/processor.h> 9#include <asm/processor.h>
19#include <asm/i387.h>
20#include <asm/uaccess.h> 10#include <asm/uaccess.h>
21#include <asm/checksum.h>
22#include <asm/io.h>
23#include <asm/delay.h>
24#include <asm/irq.h>
25#include <asm/mmx.h>
26#include <asm/desc.h>
27#include <asm/pgtable.h> 11#include <asm/pgtable.h>
28#include <asm/pgalloc.h>
29#include <asm/nmi.h>
30#include <asm/kdebug.h>
31#include <asm/unistd.h>
32#include <asm/tlbflush.h>
33#include <asm/kdebug.h>
34
35extern spinlock_t rtc_lock;
36 12
37#ifdef CONFIG_SMP
38extern void __write_lock_failed(rwlock_t *rw);
39extern void __read_lock_failed(rwlock_t *rw);
40#endif
41
42/* platform dependent support */
43EXPORT_SYMBOL(boot_cpu_data);
44//EXPORT_SYMBOL(dump_fpu);
45EXPORT_SYMBOL(__ioremap);
46EXPORT_SYMBOL(ioremap_nocache);
47EXPORT_SYMBOL(iounmap);
48EXPORT_SYMBOL(kernel_thread); 13EXPORT_SYMBOL(kernel_thread);
49EXPORT_SYMBOL(pm_idle);
50EXPORT_SYMBOL(pm_power_off);
51 14
52EXPORT_SYMBOL(__down_failed); 15EXPORT_SYMBOL(__down_failed);
53EXPORT_SYMBOL(__down_failed_interruptible); 16EXPORT_SYMBOL(__down_failed_interruptible);
54EXPORT_SYMBOL(__down_failed_trylock); 17EXPORT_SYMBOL(__down_failed_trylock);
55EXPORT_SYMBOL(__up_wakeup); 18EXPORT_SYMBOL(__up_wakeup);
56/* Networking helper routines. */
57EXPORT_SYMBOL(csum_partial_copy_nocheck);
58EXPORT_SYMBOL(ip_compute_csum);
59/* Delay loops */
60EXPORT_SYMBOL(__udelay);
61EXPORT_SYMBOL(__ndelay);
62EXPORT_SYMBOL(__delay);
63EXPORT_SYMBOL(__const_udelay);
64 19
65EXPORT_SYMBOL(__get_user_1); 20EXPORT_SYMBOL(__get_user_1);
66EXPORT_SYMBOL(__get_user_2); 21EXPORT_SYMBOL(__get_user_2);
@@ -71,42 +26,20 @@ EXPORT_SYMBOL(__put_user_2);
71EXPORT_SYMBOL(__put_user_4); 26EXPORT_SYMBOL(__put_user_4);
72EXPORT_SYMBOL(__put_user_8); 27EXPORT_SYMBOL(__put_user_8);
73 28
74EXPORT_SYMBOL(strncpy_from_user);
75EXPORT_SYMBOL(__strncpy_from_user);
76EXPORT_SYMBOL(clear_user);
77EXPORT_SYMBOL(__clear_user);
78EXPORT_SYMBOL(copy_user_generic); 29EXPORT_SYMBOL(copy_user_generic);
79EXPORT_SYMBOL(copy_from_user); 30EXPORT_SYMBOL(copy_from_user);
80EXPORT_SYMBOL(copy_to_user); 31EXPORT_SYMBOL(copy_to_user);
81EXPORT_SYMBOL(copy_in_user);
82EXPORT_SYMBOL(strnlen_user);
83
84#ifdef CONFIG_PCI
85EXPORT_SYMBOL(pci_mem_start);
86#endif
87 32
88EXPORT_SYMBOL(copy_page); 33EXPORT_SYMBOL(copy_page);
89EXPORT_SYMBOL(clear_page); 34EXPORT_SYMBOL(clear_page);
90 35
91EXPORT_SYMBOL(_cpu_pda);
92#ifdef CONFIG_SMP 36#ifdef CONFIG_SMP
93EXPORT_SYMBOL(cpu_data); 37extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
38extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
94EXPORT_SYMBOL(__write_lock_failed); 39EXPORT_SYMBOL(__write_lock_failed);
95EXPORT_SYMBOL(__read_lock_failed); 40EXPORT_SYMBOL(__read_lock_failed);
96
97EXPORT_SYMBOL(smp_call_function);
98EXPORT_SYMBOL(cpu_callout_map);
99#endif
100
101#ifdef CONFIG_VT
102EXPORT_SYMBOL(screen_info);
103#endif 41#endif
104 42
105EXPORT_SYMBOL(rtc_lock);
106
107EXPORT_SYMBOL_GPL(set_nmi_callback);
108EXPORT_SYMBOL_GPL(unset_nmi_callback);
109
110/* Export string functions. We normally rely on gcc builtin for most of these, 43/* Export string functions. We normally rely on gcc builtin for most of these,
111 but gcc sometimes decides not to inline them. */ 44 but gcc sometimes decides not to inline them. */
112#undef memcpy 45#undef memcpy
@@ -114,51 +47,14 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback);
114#undef memmove 47#undef memmove
115 48
116extern void * memset(void *,int,__kernel_size_t); 49extern void * memset(void *,int,__kernel_size_t);
117extern size_t strlen(const char *);
118extern void * memmove(void * dest,const void *src,size_t count);
119extern void * memcpy(void *,const void *,__kernel_size_t); 50extern void * memcpy(void *,const void *,__kernel_size_t);
120extern void * __memcpy(void *,const void *,__kernel_size_t); 51extern void * __memcpy(void *,const void *,__kernel_size_t);
121 52
122EXPORT_SYMBOL(memset); 53EXPORT_SYMBOL(memset);
123EXPORT_SYMBOL(memmove);
124EXPORT_SYMBOL(memcpy); 54EXPORT_SYMBOL(memcpy);
125EXPORT_SYMBOL(__memcpy); 55EXPORT_SYMBOL(__memcpy);
126 56
127#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
128/* prototypes are wrong, these are assembly with custom calling functions */
129extern void rwsem_down_read_failed_thunk(void);
130extern void rwsem_wake_thunk(void);
131extern void rwsem_downgrade_thunk(void);
132extern void rwsem_down_write_failed_thunk(void);
133EXPORT_SYMBOL(rwsem_down_read_failed_thunk);
134EXPORT_SYMBOL(rwsem_wake_thunk);
135EXPORT_SYMBOL(rwsem_downgrade_thunk);
136EXPORT_SYMBOL(rwsem_down_write_failed_thunk);
137#endif
138
139EXPORT_SYMBOL(empty_zero_page); 57EXPORT_SYMBOL(empty_zero_page);
140
141EXPORT_SYMBOL(die_chain);
142
143#ifdef CONFIG_SMP
144EXPORT_SYMBOL(cpu_sibling_map);
145EXPORT_SYMBOL(smp_num_siblings);
146#endif
147
148#ifdef CONFIG_BUG
149EXPORT_SYMBOL(out_of_line_bug);
150#endif
151
152EXPORT_SYMBOL(init_level4_pgt); 58EXPORT_SYMBOL(init_level4_pgt);
153
154extern unsigned long __supported_pte_mask;
155EXPORT_SYMBOL(__supported_pte_mask);
156
157#ifdef CONFIG_SMP
158EXPORT_SYMBOL(flush_tlb_page);
159#endif
160
161EXPORT_SYMBOL(cpu_khz);
162
163EXPORT_SYMBOL(load_gs_index); 59EXPORT_SYMBOL(load_gs_index);
164 60
diff --git a/arch/x86_64/lib/csum-partial.c b/arch/x86_64/lib/csum-partial.c
index 5384e227cdf6..c493735218da 100644
--- a/arch/x86_64/lib/csum-partial.c
+++ b/arch/x86_64/lib/csum-partial.c
@@ -147,4 +147,5 @@ unsigned short ip_compute_csum(unsigned char * buff, int len)
147{ 147{
148 return csum_fold(csum_partial(buff,len,0)); 148 return csum_fold(csum_partial(buff,len,0));
149} 149}
150EXPORT_SYMBOL(ip_compute_csum);
150 151
diff --git a/arch/x86_64/lib/csum-wrappers.c b/arch/x86_64/lib/csum-wrappers.c
index 94323f20816e..b1320ec58428 100644
--- a/arch/x86_64/lib/csum-wrappers.c
+++ b/arch/x86_64/lib/csum-wrappers.c
@@ -109,6 +109,7 @@ csum_partial_copy_nocheck(const unsigned char *src, unsigned char *dst, int len,
109{ 109{
110 return csum_partial_copy_generic(src,dst,len,sum,NULL,NULL); 110 return csum_partial_copy_generic(src,dst,len,sum,NULL,NULL);
111} 111}
112EXPORT_SYMBOL(csum_partial_copy_nocheck);
112 113
113unsigned short csum_ipv6_magic(struct in6_addr *saddr, struct in6_addr *daddr, 114unsigned short csum_ipv6_magic(struct in6_addr *saddr, struct in6_addr *daddr,
114 __u32 len, unsigned short proto, unsigned int sum) 115 __u32 len, unsigned short proto, unsigned int sum)
diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c
index 03c460cbdd1c..b6cd3cca2f45 100644
--- a/arch/x86_64/lib/delay.c
+++ b/arch/x86_64/lib/delay.c
@@ -9,6 +9,7 @@
9 */ 9 */
10 10
11#include <linux/config.h> 11#include <linux/config.h>
12#include <linux/module.h>
12#include <linux/sched.h> 13#include <linux/sched.h>
13#include <linux/delay.h> 14#include <linux/delay.h>
14#include <asm/delay.h> 15#include <asm/delay.h>
@@ -36,18 +37,22 @@ void __delay(unsigned long loops)
36 } 37 }
37 while((now-bclock) < loops); 38 while((now-bclock) < loops);
38} 39}
40EXPORT_SYMBOL(__delay);
39 41
40inline void __const_udelay(unsigned long xloops) 42inline void __const_udelay(unsigned long xloops)
41{ 43{
42 __delay((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32); 44 __delay((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32);
43} 45}
46EXPORT_SYMBOL(__const_udelay);
44 47
45void __udelay(unsigned long usecs) 48void __udelay(unsigned long usecs)
46{ 49{
47 __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ 50 __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */
48} 51}
52EXPORT_SYMBOL(__udelay);
49 53
50void __ndelay(unsigned long nsecs) 54void __ndelay(unsigned long nsecs)
51{ 55{
52 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ 56 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
53} 57}
58EXPORT_SYMBOL(__ndelay);
diff --git a/arch/x86_64/lib/memmove.c b/arch/x86_64/lib/memmove.c
index e93d5255fdc9..751ebae8ec42 100644
--- a/arch/x86_64/lib/memmove.c
+++ b/arch/x86_64/lib/memmove.c
@@ -3,12 +3,13 @@
3 */ 3 */
4#define _STRING_C 4#define _STRING_C
5#include <linux/string.h> 5#include <linux/string.h>
6#include <linux/module.h>
6 7
7#undef memmove 8#undef memmove
8void *memmove(void * dest,const void *src,size_t count) 9void *memmove(void * dest,const void *src,size_t count)
9{ 10{
10 if (dest < src) { 11 if (dest < src) {
11 __inline_memcpy(dest,src,count); 12 return memcpy(dest,src,count);
12 } else { 13 } else {
13 char *p = (char *) dest + count; 14 char *p = (char *) dest + count;
14 char *s = (char *) src + count; 15 char *s = (char *) src + count;
@@ -17,3 +18,4 @@ void *memmove(void * dest,const void *src,size_t count)
17 } 18 }
18 return dest; 19 return dest;
19} 20}
21EXPORT_SYMBOL(memmove);
diff --git a/arch/x86_64/lib/usercopy.c b/arch/x86_64/lib/usercopy.c
index 9bc2c295818e..893d43f838cc 100644
--- a/arch/x86_64/lib/usercopy.c
+++ b/arch/x86_64/lib/usercopy.c
@@ -5,6 +5,7 @@
5 * Copyright 1997 Linus Torvalds 5 * Copyright 1997 Linus Torvalds
6 * Copyright 2002 Andi Kleen <ak@suse.de> 6 * Copyright 2002 Andi Kleen <ak@suse.de>
7 */ 7 */
8#include <linux/module.h>
8#include <asm/uaccess.h> 9#include <asm/uaccess.h>
9 10
10/* 11/*
@@ -47,15 +48,17 @@ __strncpy_from_user(char *dst, const char __user *src, long count)
47 __do_strncpy_from_user(dst, src, count, res); 48 __do_strncpy_from_user(dst, src, count, res);
48 return res; 49 return res;
49} 50}
51EXPORT_SYMBOL(__strncpy_from_user);
50 52
51long 53long
52strncpy_from_user(char *dst, const char __user *src, long count) 54strncpy_from_user(char *dst, const char __user *src, long count)
53{ 55{
54 long res = -EFAULT; 56 long res = -EFAULT;
55 if (access_ok(VERIFY_READ, src, 1)) 57 if (access_ok(VERIFY_READ, src, 1))
56 __do_strncpy_from_user(dst, src, count, res); 58 return __strncpy_from_user(dst, src, count);
57 return res; 59 return res;
58} 60}
61EXPORT_SYMBOL(strncpy_from_user);
59 62
60/* 63/*
61 * Zero Userspace 64 * Zero Userspace
@@ -94,7 +97,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
94 [zero] "r" (0UL), [eight] "r" (8UL)); 97 [zero] "r" (0UL), [eight] "r" (8UL));
95 return size; 98 return size;
96} 99}
97 100EXPORT_SYMBOL(__clear_user);
98 101
99unsigned long clear_user(void __user *to, unsigned long n) 102unsigned long clear_user(void __user *to, unsigned long n)
100{ 103{
@@ -102,6 +105,7 @@ unsigned long clear_user(void __user *to, unsigned long n)
102 return __clear_user(to, n); 105 return __clear_user(to, n);
103 return n; 106 return n;
104} 107}
108EXPORT_SYMBOL(clear_user);
105 109
106/* 110/*
107 * Return the size of a string (including the ending 0) 111 * Return the size of a string (including the ending 0)
@@ -125,6 +129,7 @@ long __strnlen_user(const char __user *s, long n)
125 s++; 129 s++;
126 } 130 }
127} 131}
132EXPORT_SYMBOL(__strnlen_user);
128 133
129long strnlen_user(const char __user *s, long n) 134long strnlen_user(const char __user *s, long n)
130{ 135{
@@ -132,6 +137,7 @@ long strnlen_user(const char __user *s, long n)
132 return 0; 137 return 0;
133 return __strnlen_user(s, n); 138 return __strnlen_user(s, n);
134} 139}
140EXPORT_SYMBOL(strnlen_user);
135 141
136long strlen_user(const char __user *s) 142long strlen_user(const char __user *s)
137{ 143{
@@ -147,6 +153,7 @@ long strlen_user(const char __user *s)
147 s++; 153 s++;
148 } 154 }
149} 155}
156EXPORT_SYMBOL(strlen_user);
150 157
151unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len) 158unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len)
152{ 159{
@@ -155,3 +162,5 @@ unsigned long copy_in_user(void __user *to, const void __user *from, unsigned le
155 } 162 }
156 return len; 163 return len;
157} 164}
165EXPORT_SYMBOL(copy_in_user);
166
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 0803d3858af1..08dc696f54ee 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -195,7 +195,7 @@ void dump_pagetable(unsigned long address)
195 printk("PGD %lx ", pgd_val(*pgd)); 195 printk("PGD %lx ", pgd_val(*pgd));
196 if (!pgd_present(*pgd)) goto ret; 196 if (!pgd_present(*pgd)) goto ret;
197 197
198 pud = __pud_offset_k((pud_t *)pgd_page(*pgd), address); 198 pud = pud_offset(pgd, address);
199 if (bad_address(pud)) goto bad; 199 if (bad_address(pud)) goto bad;
200 printk("PUD %lx ", pud_val(*pud)); 200 printk("PUD %lx ", pud_val(*pud));
201 if (!pud_present(*pud)) goto ret; 201 if (!pud_present(*pud)) goto ret;
@@ -445,8 +445,10 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
445 if (!(vma->vm_flags & VM_GROWSDOWN)) 445 if (!(vma->vm_flags & VM_GROWSDOWN))
446 goto bad_area; 446 goto bad_area;
447 if (error_code & 4) { 447 if (error_code & 4) {
448 // XXX: align red zone size with ABI 448 /* Allow userspace just enough access below the stack pointer
449 if (address + 128 < regs->rsp) 449 * to let the 'enter' instruction work.
450 */
451 if (address + 65536 + 32 * sizeof(unsigned long) < regs->rsp)
450 goto bad_area; 452 goto bad_area;
451 } 453 }
452 if (expand_stack(vma, address)) 454 if (expand_stack(vma, address))
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 4ba34e95d835..02add1d1dfa8 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -41,8 +41,6 @@
41#include <asm/proto.h> 41#include <asm/proto.h>
42#include <asm/smp.h> 42#include <asm/smp.h>
43#include <asm/sections.h> 43#include <asm/sections.h>
44#include <asm/dma-mapping.h>
45#include <asm/swiotlb.h>
46 44
47#ifndef Dprintk 45#ifndef Dprintk
48#define Dprintk(x...) 46#define Dprintk(x...)
@@ -90,8 +88,6 @@ void show_mem(void)
90 printk(KERN_INFO "%lu pages swap cached\n",cached); 88 printk(KERN_INFO "%lu pages swap cached\n",cached);
91} 89}
92 90
93/* References to section boundaries */
94
95int after_bootmem; 91int after_bootmem;
96 92
97static __init void *spp_getpage(void) 93static __init void *spp_getpage(void)
@@ -261,9 +257,10 @@ phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
261 for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) { 257 for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) {
262 unsigned long entry; 258 unsigned long entry;
263 259
264 if (address > end) { 260 if (address >= end) {
265 for (; i < PTRS_PER_PMD; i++, pmd++) 261 if (!after_bootmem)
266 set_pmd(pmd, __pmd(0)); 262 for (; i < PTRS_PER_PMD; i++, pmd++)
263 set_pmd(pmd, __pmd(0));
267 break; 264 break;
268 } 265 }
269 entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address; 266 entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
@@ -341,7 +338,8 @@ static void __init find_early_table_space(unsigned long end)
341 table_end = table_start; 338 table_end = table_start;
342 339
343 early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n", 340 early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
344 end, table_start << PAGE_SHIFT, table_end << PAGE_SHIFT); 341 end, table_start << PAGE_SHIFT,
342 (table_start << PAGE_SHIFT) + tables);
345} 343}
346 344
347/* Setup the direct mapping of the physical memory at PAGE_OFFSET. 345/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
@@ -372,7 +370,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
372 pud_t *pud; 370 pud_t *pud;
373 371
374 if (after_bootmem) 372 if (after_bootmem)
375 pud = pud_offset_k(pgd, start & PGDIR_MASK); 373 pud = pud_offset(pgd, start & PGDIR_MASK);
376 else 374 else
377 pud = alloc_low_page(&map, &pud_phys); 375 pud = alloc_low_page(&map, &pud_phys);
378 376
@@ -587,10 +585,7 @@ void __init mem_init(void)
587{ 585{
588 long codesize, reservedpages, datasize, initsize; 586 long codesize, reservedpages, datasize, initsize;
589 587
590#ifdef CONFIG_SWIOTLB 588 pci_iommu_alloc();
591 pci_swiotlb_init();
592#endif
593 no_iommu_init();
594 589
595 /* How many end-of-memory variables you have, grandma! */ 590 /* How many end-of-memory variables you have, grandma! */
596 max_low_pfn = end_pfn; 591 max_low_pfn = end_pfn;
@@ -644,20 +639,29 @@ void __init mem_init(void)
644#endif 639#endif
645} 640}
646 641
647void free_initmem(void) 642void free_init_pages(char *what, unsigned long begin, unsigned long end)
648{ 643{
649 unsigned long addr; 644 unsigned long addr;
650 645
651 addr = (unsigned long)(&__init_begin); 646 if (begin >= end)
652 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { 647 return;
648
649 printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
650 for (addr = begin; addr < end; addr += PAGE_SIZE) {
653 ClearPageReserved(virt_to_page(addr)); 651 ClearPageReserved(virt_to_page(addr));
654 init_page_count(virt_to_page(addr)); 652 init_page_count(virt_to_page(addr));
655 memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE); 653 memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE);
656 free_page(addr); 654 free_page(addr);
657 totalram_pages++; 655 totalram_pages++;
658 } 656 }
657}
658
659void free_initmem(void)
660{
659 memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin); 661 memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
660 printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10); 662 free_init_pages("unused kernel memory",
663 (unsigned long)(&__init_begin),
664 (unsigned long)(&__init_end));
661} 665}
662 666
663#ifdef CONFIG_DEBUG_RODATA 667#ifdef CONFIG_DEBUG_RODATA
@@ -686,15 +690,7 @@ void mark_rodata_ro(void)
686#ifdef CONFIG_BLK_DEV_INITRD 690#ifdef CONFIG_BLK_DEV_INITRD
687void free_initrd_mem(unsigned long start, unsigned long end) 691void free_initrd_mem(unsigned long start, unsigned long end)
688{ 692{
689 if (start >= end) 693 free_init_pages("initrd memory", start, end);
690 return;
691 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
692 for (; start < end; start += PAGE_SIZE) {
693 ClearPageReserved(virt_to_page(start));
694 init_page_count(virt_to_page(start));
695 free_page(start);
696 totalram_pages++;
697 }
698} 694}
699#endif 695#endif
700 696
diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c
index ae207064201e..45d7d823c3b8 100644
--- a/arch/x86_64/mm/ioremap.c
+++ b/arch/x86_64/mm/ioremap.c
@@ -11,6 +11,7 @@
11#include <linux/vmalloc.h> 11#include <linux/vmalloc.h>
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/module.h>
14#include <asm/io.h> 15#include <asm/io.h>
15#include <asm/pgalloc.h> 16#include <asm/pgalloc.h>
16#include <asm/fixmap.h> 17#include <asm/fixmap.h>
@@ -219,6 +220,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
219 } 220 }
220 return (__force void __iomem *) (offset + (char *)addr); 221 return (__force void __iomem *) (offset + (char *)addr);
221} 222}
223EXPORT_SYMBOL(__ioremap);
222 224
223/** 225/**
224 * ioremap_nocache - map bus memory into CPU space 226 * ioremap_nocache - map bus memory into CPU space
@@ -246,6 +248,7 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
246{ 248{
247 return __ioremap(phys_addr, size, _PAGE_PCD); 249 return __ioremap(phys_addr, size, _PAGE_PCD);
248} 250}
251EXPORT_SYMBOL(ioremap_nocache);
249 252
250/** 253/**
251 * iounmap - Free a IO remapping 254 * iounmap - Free a IO remapping
@@ -291,3 +294,5 @@ void iounmap(volatile void __iomem *addr)
291 BUG_ON(p != o || o == NULL); 294 BUG_ON(p != o || o == NULL);
292 kfree(p); 295 kfree(p);
293} 296}
297EXPORT_SYMBOL(iounmap);
298
diff --git a/arch/x86_64/pci/k8-bus.c b/arch/x86_64/pci/k8-bus.c
index 3acf60ded2a0..b50a7c7c47f8 100644
--- a/arch/x86_64/pci/k8-bus.c
+++ b/arch/x86_64/pci/k8-bus.c
@@ -2,6 +2,7 @@
2#include <linux/pci.h> 2#include <linux/pci.h>
3#include <asm/mpspec.h> 3#include <asm/mpspec.h>
4#include <linux/cpumask.h> 4#include <linux/cpumask.h>
5#include <asm/k8.h>
5 6
6/* 7/*
7 * This discovers the pcibus <-> node mapping on AMD K8. 8 * This discovers the pcibus <-> node mapping on AMD K8.
@@ -18,7 +19,6 @@
18#define NR_LDT_BUS_NUMBER_REGISTERS 3 19#define NR_LDT_BUS_NUMBER_REGISTERS 3
19#define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 8) & 0xFF) 20#define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 8) & 0xFF)
20#define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF) 21#define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF)
21#define PCI_DEVICE_ID_K8HTCONFIG 0x1100
22 22
23/** 23/**
24 * fill_mp_bus_to_cpumask() 24 * fill_mp_bus_to_cpumask()
@@ -28,8 +28,7 @@
28__init static int 28__init static int
29fill_mp_bus_to_cpumask(void) 29fill_mp_bus_to_cpumask(void)
30{ 30{
31 struct pci_dev *nb_dev = NULL; 31 int i, j, k;
32 int i, j;
33 u32 ldtbus, nid; 32 u32 ldtbus, nid;
34 static int lbnr[3] = { 33 static int lbnr[3] = {
35 LDT_BUS_NUMBER_REGISTER_0, 34 LDT_BUS_NUMBER_REGISTER_0,
@@ -37,8 +36,9 @@ fill_mp_bus_to_cpumask(void)
37 LDT_BUS_NUMBER_REGISTER_2 36 LDT_BUS_NUMBER_REGISTER_2
38 }; 37 };
39 38
40 while ((nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, 39 cache_k8_northbridges();
41 PCI_DEVICE_ID_K8HTCONFIG, nb_dev))) { 40 for (k = 0; k < num_k8_northbridges; k++) {
41 struct pci_dev *nb_dev = k8_northbridges[k];
42 pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid); 42 pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid);
43 43
44 for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) { 44 for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) {
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index a5f4f2aa007a..8a74bf3efd8e 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -206,11 +206,11 @@ acpi_processor_power_activate(struct acpi_processor *pr,
206 206
207static void acpi_safe_halt(void) 207static void acpi_safe_halt(void)
208{ 208{
209 clear_thread_flag(TIF_POLLING_NRFLAG); 209 current_thread_info()->status &= ~TS_POLLING;
210 smp_mb__after_clear_bit(); 210 smp_mb__after_clear_bit();
211 if (!need_resched()) 211 if (!need_resched())
212 safe_halt(); 212 safe_halt();
213 set_thread_flag(TIF_POLLING_NRFLAG); 213 current_thread_info()->status |= TS_POLLING;
214} 214}
215 215
216static atomic_t c3_cpu_count; 216static atomic_t c3_cpu_count;
@@ -330,10 +330,10 @@ static void acpi_processor_idle(void)
330 * Invoke the current Cx state to put the processor to sleep. 330 * Invoke the current Cx state to put the processor to sleep.
331 */ 331 */
332 if (cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3) { 332 if (cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3) {
333 clear_thread_flag(TIF_POLLING_NRFLAG); 333 current_thread_info()->status &= ~TS_POLLING;
334 smp_mb__after_clear_bit(); 334 smp_mb__after_clear_bit();
335 if (need_resched()) { 335 if (need_resched()) {
336 set_thread_flag(TIF_POLLING_NRFLAG); 336 current_thread_info()->status |= TS_POLLING;
337 local_irq_enable(); 337 local_irq_enable();
338 return; 338 return;
339 } 339 }
@@ -376,7 +376,7 @@ static void acpi_processor_idle(void)
376#endif 376#endif
377 /* Re-enable interrupts */ 377 /* Re-enable interrupts */
378 local_irq_enable(); 378 local_irq_enable();
379 set_thread_flag(TIF_POLLING_NRFLAG); 379 current_thread_info()->status |= TS_POLLING;
380 /* Compute time (ticks) that we were actually asleep */ 380 /* Compute time (ticks) that we were actually asleep */
381 sleep_ticks = 381 sleep_ticks =
382 ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD; 382 ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD;
@@ -420,7 +420,7 @@ static void acpi_processor_idle(void)
420#endif 420#endif
421 /* Re-enable interrupts */ 421 /* Re-enable interrupts */
422 local_irq_enable(); 422 local_irq_enable();
423 set_thread_flag(TIF_POLLING_NRFLAG); 423 current_thread_info()->status |= TS_POLLING;
424 /* Compute time (ticks) that we were actually asleep */ 424 /* Compute time (ticks) that we were actually asleep */
425 sleep_ticks = 425 sleep_ticks =
426 ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD; 426 ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD;
diff --git a/drivers/char/agp/Kconfig b/drivers/char/agp/Kconfig
index 46685a540772..9826a399fa02 100644
--- a/drivers/char/agp/Kconfig
+++ b/drivers/char/agp/Kconfig
@@ -55,9 +55,9 @@ config AGP_AMD
55 X on AMD Irongate, 761, and 762 chipsets. 55 X on AMD Irongate, 761, and 762 chipsets.
56 56
57config AGP_AMD64 57config AGP_AMD64
58 tristate "AMD Opteron/Athlon64 on-CPU GART support" if !GART_IOMMU 58 tristate "AMD Opteron/Athlon64 on-CPU GART support" if !IOMMU
59 depends on AGP && X86 59 depends on AGP && X86
60 default y if GART_IOMMU 60 default y if IOMMU
61 help 61 help
62 This option gives you AGP support for the GLX component of 62 This option gives you AGP support for the GLX component of
63 X using the on-CPU northbridge of the AMD Athlon64/Opteron CPUs. 63 X using the on-CPU northbridge of the AMD Athlon64/Opteron CPUs.
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index ac3c33a2e37d..f690ee8cb732 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -15,11 +15,9 @@
15#include <linux/agp_backend.h> 15#include <linux/agp_backend.h>
16#include <linux/mmzone.h> 16#include <linux/mmzone.h>
17#include <asm/page.h> /* PAGE_SIZE */ 17#include <asm/page.h> /* PAGE_SIZE */
18#include <asm/k8.h>
18#include "agp.h" 19#include "agp.h"
19 20
20/* Will need to be increased if AMD64 ever goes >8-way. */
21#define MAX_HAMMER_GARTS 8
22
23/* PTE bits. */ 21/* PTE bits. */
24#define GPTE_VALID 1 22#define GPTE_VALID 1
25#define GPTE_COHERENT 2 23#define GPTE_COHERENT 2
@@ -53,28 +51,12 @@
53#define ULI_X86_64_HTT_FEA_REG 0x50 51#define ULI_X86_64_HTT_FEA_REG 0x50
54#define ULI_X86_64_ENU_SCR_REG 0x54 52#define ULI_X86_64_ENU_SCR_REG 0x54
55 53
56static int nr_garts;
57static struct pci_dev * hammers[MAX_HAMMER_GARTS];
58
59static struct resource *aperture_resource; 54static struct resource *aperture_resource;
60static int __initdata agp_try_unsupported = 1; 55static int __initdata agp_try_unsupported = 1;
61 56
62#define for_each_nb() for(gart_iterator=0;gart_iterator<nr_garts;gart_iterator++)
63
64static void flush_amd64_tlb(struct pci_dev *dev)
65{
66 u32 tmp;
67
68 pci_read_config_dword (dev, AMD64_GARTCACHECTL, &tmp);
69 tmp |= INVGART;
70 pci_write_config_dword (dev, AMD64_GARTCACHECTL, tmp);
71}
72
73static void amd64_tlbflush(struct agp_memory *temp) 57static void amd64_tlbflush(struct agp_memory *temp)
74{ 58{
75 int gart_iterator; 59 k8_flush_garts();
76 for_each_nb()
77 flush_amd64_tlb(hammers[gart_iterator]);
78} 60}
79 61
80static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type) 62static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
@@ -153,7 +135,7 @@ static int amd64_fetch_size(void)
153 u32 temp; 135 u32 temp;
154 struct aper_size_info_32 *values; 136 struct aper_size_info_32 *values;
155 137
156 dev = hammers[0]; 138 dev = k8_northbridges[0];
157 if (dev==NULL) 139 if (dev==NULL)
158 return 0; 140 return 0;
159 141
@@ -201,9 +183,6 @@ static u64 amd64_configure (struct pci_dev *hammer, u64 gatt_table)
201 tmp &= ~(DISGARTCPU | DISGARTIO); 183 tmp &= ~(DISGARTCPU | DISGARTIO);
202 pci_write_config_dword(hammer, AMD64_GARTAPERTURECTL, tmp); 184 pci_write_config_dword(hammer, AMD64_GARTAPERTURECTL, tmp);
203 185
204 /* keep CPU's coherent. */
205 flush_amd64_tlb (hammer);
206
207 return aper_base; 186 return aper_base;
208} 187}
209 188
@@ -222,13 +201,14 @@ static struct aper_size_info_32 amd_8151_sizes[7] =
222static int amd_8151_configure(void) 201static int amd_8151_configure(void)
223{ 202{
224 unsigned long gatt_bus = virt_to_gart(agp_bridge->gatt_table_real); 203 unsigned long gatt_bus = virt_to_gart(agp_bridge->gatt_table_real);
225 int gart_iterator; 204 int i;
226 205
227 /* Configure AGP regs in each x86-64 host bridge. */ 206 /* Configure AGP regs in each x86-64 host bridge. */
228 for_each_nb() { 207 for (i = 0; i < num_k8_northbridges; i++) {
229 agp_bridge->gart_bus_addr = 208 agp_bridge->gart_bus_addr =
230 amd64_configure(hammers[gart_iterator],gatt_bus); 209 amd64_configure(k8_northbridges[i], gatt_bus);
231 } 210 }
211 k8_flush_garts();
232 return 0; 212 return 0;
233} 213}
234 214
@@ -236,12 +216,13 @@ static int amd_8151_configure(void)
236static void amd64_cleanup(void) 216static void amd64_cleanup(void)
237{ 217{
238 u32 tmp; 218 u32 tmp;
239 int gart_iterator; 219 int i;
240 for_each_nb() { 220 for (i = 0; i < num_k8_northbridges; i++) {
221 struct pci_dev *dev = k8_northbridges[i];
241 /* disable gart translation */ 222 /* disable gart translation */
242 pci_read_config_dword (hammers[gart_iterator], AMD64_GARTAPERTURECTL, &tmp); 223 pci_read_config_dword (dev, AMD64_GARTAPERTURECTL, &tmp);
243 tmp &= ~AMD64_GARTEN; 224 tmp &= ~AMD64_GARTEN;
244 pci_write_config_dword (hammers[gart_iterator], AMD64_GARTAPERTURECTL, tmp); 225 pci_write_config_dword (dev, AMD64_GARTAPERTURECTL, tmp);
245 } 226 }
246} 227}
247 228
@@ -311,7 +292,7 @@ static int __devinit aperture_valid(u64 aper, u32 size)
311/* 292/*
312 * W*s centric BIOS sometimes only set up the aperture in the AGP 293 * W*s centric BIOS sometimes only set up the aperture in the AGP
313 * bridge, not the northbridge. On AMD64 this is handled early 294 * bridge, not the northbridge. On AMD64 this is handled early
314 * in aperture.c, but when GART_IOMMU is not enabled or we run 295 * in aperture.c, but when IOMMU is not enabled or we run
315 * on a 32bit kernel this needs to be redone. 296 * on a 32bit kernel this needs to be redone.
316 * Unfortunately it is impossible to fix the aperture here because it's too late 297 * Unfortunately it is impossible to fix the aperture here because it's too late
317 * to allocate that much memory. But at least error out cleanly instead of 298 * to allocate that much memory. But at least error out cleanly instead of
@@ -361,17 +342,15 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp,
361 342
362static __devinit int cache_nbs (struct pci_dev *pdev, u32 cap_ptr) 343static __devinit int cache_nbs (struct pci_dev *pdev, u32 cap_ptr)
363{ 344{
364 struct pci_dev *loop_dev = NULL; 345 int i;
365 int i = 0; 346
366 347 if (cache_k8_northbridges() < 0)
367 /* cache pci_devs of northbridges. */ 348 return -ENODEV;
368 while ((loop_dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, loop_dev)) 349
369 != NULL) { 350 i = 0;
370 if (i == MAX_HAMMER_GARTS) { 351 for (i = 0; i < num_k8_northbridges; i++) {
371 printk(KERN_ERR PFX "Too many northbridges for AGP\n"); 352 struct pci_dev *dev = k8_northbridges[i];
372 return -1; 353 if (fix_northbridge(dev, pdev, cap_ptr) < 0) {
373 }
374 if (fix_northbridge(loop_dev, pdev, cap_ptr) < 0) {
375 printk(KERN_ERR PFX "No usable aperture found.\n"); 354 printk(KERN_ERR PFX "No usable aperture found.\n");
376#ifdef __x86_64__ 355#ifdef __x86_64__
377 /* should port this to i386 */ 356 /* should port this to i386 */
@@ -379,10 +358,8 @@ static __devinit int cache_nbs (struct pci_dev *pdev, u32 cap_ptr)
379#endif 358#endif
380 return -1; 359 return -1;
381 } 360 }
382 hammers[i++] = loop_dev;
383 } 361 }
384 nr_garts = i; 362 return 0;
385 return i == 0 ? -1 : 0;
386} 363}
387 364
388/* Handle AMD 8151 quirks */ 365/* Handle AMD 8151 quirks */
@@ -450,7 +427,7 @@ static int __devinit uli_agp_init(struct pci_dev *pdev)
450 } 427 }
451 428
452 /* shadow x86-64 registers into ULi registers */ 429 /* shadow x86-64 registers into ULi registers */
453 pci_read_config_dword (hammers[0], AMD64_GARTAPERTUREBASE, &httfea); 430 pci_read_config_dword (k8_northbridges[0], AMD64_GARTAPERTUREBASE, &httfea);
454 431
455 /* if x86-64 aperture base is beyond 4G, exit here */ 432 /* if x86-64 aperture base is beyond 4G, exit here */
456 if ((httfea & 0x7fff) >> (32 - 25)) 433 if ((httfea & 0x7fff) >> (32 - 25))
@@ -513,7 +490,7 @@ static int __devinit nforce3_agp_init(struct pci_dev *pdev)
513 pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp); 490 pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp);
514 491
515 /* shadow x86-64 registers into NVIDIA registers */ 492 /* shadow x86-64 registers into NVIDIA registers */
516 pci_read_config_dword (hammers[0], AMD64_GARTAPERTUREBASE, &apbase); 493 pci_read_config_dword (k8_northbridges[0], AMD64_GARTAPERTUREBASE, &apbase);
517 494
518 /* if x86-64 aperture base is beyond 4G, exit here */ 495 /* if x86-64 aperture base is beyond 4G, exit here */
519 if ( (apbase & 0x7fff) >> (32 - 25) ) { 496 if ( (apbase & 0x7fff) >> (32 - 25) ) {
@@ -754,10 +731,6 @@ static struct pci_driver agp_amd64_pci_driver = {
754int __init agp_amd64_init(void) 731int __init agp_amd64_init(void)
755{ 732{
756 int err = 0; 733 int err = 0;
757 static struct pci_device_id amd64nb[] = {
758 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
759 { },
760 };
761 734
762 if (agp_off) 735 if (agp_off)
763 return -EINVAL; 736 return -EINVAL;
@@ -774,7 +747,7 @@ int __init agp_amd64_init(void)
774 } 747 }
775 748
776 /* First check that we have at least one AMD64 NB */ 749 /* First check that we have at least one AMD64 NB */
777 if (!pci_dev_present(amd64nb)) 750 if (!pci_dev_present(k8_nb_ids))
778 return -ENODEV; 751 return -ENODEV;
779 752
780 /* Look for any AGP bridge */ 753 /* Look for any AGP bridge */
@@ -802,7 +775,7 @@ static void __exit agp_amd64_cleanup(void)
802 775
803/* On AMD64 the PCI driver needs to initialize this driver early 776/* On AMD64 the PCI driver needs to initialize this driver early
804 for the IOMMU, so it has to be called via a backdoor. */ 777 for the IOMMU, so it has to be called via a backdoor. */
805#ifndef CONFIG_GART_IOMMU 778#ifndef CONFIG_IOMMU
806module_init(agp_amd64_init); 779module_init(agp_amd64_init);
807module_exit(agp_amd64_cleanup); 780module_exit(agp_amd64_cleanup);
808#endif 781#endif
diff --git a/drivers/pci/msi-apic.c b/drivers/pci/msi-apic.c
index 0eb5fe9003a2..5ed798b319c7 100644
--- a/drivers/pci/msi-apic.c
+++ b/drivers/pci/msi-apic.c
@@ -4,6 +4,7 @@
4 4
5#include <linux/pci.h> 5#include <linux/pci.h>
6#include <linux/irq.h> 6#include <linux/irq.h>
7#include <asm/smp.h>
7 8
8#include "msi.h" 9#include "msi.h"
9 10
diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c
index 35b0a6ebd3f5..7cea514e810a 100644
--- a/drivers/scsi/aacraid/comminit.c
+++ b/drivers/scsi/aacraid/comminit.c
@@ -104,8 +104,11 @@ static int aac_alloc_comm(struct aac_dev *dev, void **commaddr, unsigned long co
104 * always true on real computers. It also has some slight problems 104 * always true on real computers. It also has some slight problems
105 * with the GART on x86-64. I've btw never tried DMA from PCI space 105 * with the GART on x86-64. I've btw never tried DMA from PCI space
106 * on this platform but don't be surprised if its problematic. 106 * on this platform but don't be surprised if its problematic.
107 * [AK: something is very very wrong when a driver tests this symbol.
108 * Someone should figure out what the comment writer really meant here and fix
109 * the code. Or just remove that bad code. ]
107 */ 110 */
108#ifndef CONFIG_GART_IOMMU 111#ifndef CONFIG_IOMMU
109 if ((num_physpages << (PAGE_SHIFT - 12)) <= AAC_MAX_HOSTPHYSMEMPAGES) { 112 if ((num_physpages << (PAGE_SHIFT - 12)) <= AAC_MAX_HOSTPHYSMEMPAGES) {
110 init->HostPhysMemPages = 113 init->HostPhysMemPages =
111 cpu_to_le32(num_physpages << (PAGE_SHIFT-12)); 114 cpu_to_le32(num_physpages << (PAGE_SHIFT-12));
diff --git a/fs/compat.c b/fs/compat.c
index 7e7e5bc4f3cf..e31e9cf96647 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -55,6 +55,20 @@
55 55
56extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat); 56extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
57 57
58int compat_log = 1;
59
60int compat_printk(const char *fmt, ...)
61{
62 va_list ap;
63 int ret;
64 if (!compat_log)
65 return 0;
66 va_start(ap, fmt);
67 ret = vprintk(fmt, ap);
68 va_end(ap);
69 return ret;
70}
71
58/* 72/*
59 * Not all architectures have sys_utime, so implement this in terms 73 * Not all architectures have sys_utime, so implement this in terms
60 * of sys_utimes. 74 * of sys_utimes.
@@ -359,7 +373,7 @@ static void compat_ioctl_error(struct file *filp, unsigned int fd,
359 sprintf(buf,"'%c'", (cmd>>24) & 0x3f); 373 sprintf(buf,"'%c'", (cmd>>24) & 0x3f);
360 if (!isprint(buf[1])) 374 if (!isprint(buf[1]))
361 sprintf(buf, "%02x", buf[1]); 375 sprintf(buf, "%02x", buf[1]);
362 printk("ioctl32(%s:%d): Unknown cmd fd(%d) " 376 compat_printk("ioctl32(%s:%d): Unknown cmd fd(%d) "
363 "cmd(%08x){%s} arg(%08x) on %s\n", 377 "cmd(%08x){%s} arg(%08x) on %s\n",
364 current->comm, current->pid, 378 current->comm, current->pid,
365 (int)fd, (unsigned int)cmd, buf, 379 (int)fd, (unsigned int)cmd, buf,
diff --git a/include/asm-i386/alternative.h b/include/asm-i386/alternative.h
index d79e9ee10fd7..c61bd1a17f37 100644
--- a/include/asm-i386/alternative.h
+++ b/include/asm-i386/alternative.h
@@ -5,6 +5,8 @@
5 5
6#include <asm/types.h> 6#include <asm/types.h>
7 7
8#include <linux/types.h>
9
8struct alt_instr { 10struct alt_instr {
9 u8 *instr; /* original instruction */ 11 u8 *instr; /* original instruction */
10 u8 *replacement; 12 u8 *replacement;
diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h
index 1d8362cb2c5d..2c1e371cebb6 100644
--- a/include/asm-i386/apic.h
+++ b/include/asm-i386/apic.h
@@ -111,24 +111,12 @@ extern void init_apic_mappings (void);
111extern void smp_local_timer_interrupt (struct pt_regs * regs); 111extern void smp_local_timer_interrupt (struct pt_regs * regs);
112extern void setup_boot_APIC_clock (void); 112extern void setup_boot_APIC_clock (void);
113extern void setup_secondary_APIC_clock (void); 113extern void setup_secondary_APIC_clock (void);
114extern void setup_apic_nmi_watchdog (void);
115extern int reserve_lapic_nmi(void);
116extern void release_lapic_nmi(void);
117extern void disable_timer_nmi_watchdog(void);
118extern void enable_timer_nmi_watchdog(void);
119extern void nmi_watchdog_tick (struct pt_regs * regs);
120extern int APIC_init_uniprocessor (void); 114extern int APIC_init_uniprocessor (void);
121extern void disable_APIC_timer(void); 115extern void disable_APIC_timer(void);
122extern void enable_APIC_timer(void); 116extern void enable_APIC_timer(void);
123 117
124extern void enable_NMI_through_LVT0 (void * dummy); 118extern void enable_NMI_through_LVT0 (void * dummy);
125 119
126extern unsigned int nmi_watchdog;
127#define NMI_NONE 0
128#define NMI_IO_APIC 1
129#define NMI_LOCAL_APIC 2
130#define NMI_INVALID 3
131
132extern int disable_timer_pin_1; 120extern int disable_timer_pin_1;
133 121
134void smp_send_timer_broadcast_ipi(struct pt_regs *regs); 122void smp_send_timer_broadcast_ipi(struct pt_regs *regs);
diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h
index 3ecedbafa8ce..d314ebb3d59e 100644
--- a/include/asm-i386/cpufeature.h
+++ b/include/asm-i386/cpufeature.h
@@ -72,6 +72,7 @@
72#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */ 72#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
73#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ 73#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */
74#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */ 74#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */
75#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
75 76
76/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ 77/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
77#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ 78#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
diff --git a/include/asm-i386/dwarf2.h b/include/asm-i386/dwarf2.h
new file mode 100644
index 000000000000..2280f6272f80
--- /dev/null
+++ b/include/asm-i386/dwarf2.h
@@ -0,0 +1,54 @@
1#ifndef _DWARF2_H
2#define _DWARF2_H
3
4#include <linux/config.h>
5
6#ifndef __ASSEMBLY__
7#warning "asm/dwarf2.h should be only included in pure assembly files"
8#endif
9
10/*
11 Macros for dwarf2 CFI unwind table entries.
12 See "as.info" for details on these pseudo ops. Unfortunately
13 they are only supported in very new binutils, so define them
14 away for older version.
15 */
16
17#ifdef CONFIG_UNWIND_INFO
18
19#define CFI_STARTPROC .cfi_startproc
20#define CFI_ENDPROC .cfi_endproc
21#define CFI_DEF_CFA .cfi_def_cfa
22#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
23#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
24#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
25#define CFI_OFFSET .cfi_offset
26#define CFI_REL_OFFSET .cfi_rel_offset
27#define CFI_REGISTER .cfi_register
28#define CFI_RESTORE .cfi_restore
29#define CFI_REMEMBER_STATE .cfi_remember_state
30#define CFI_RESTORE_STATE .cfi_restore_state
31
32#else
33
34/* Due to the structure of pre-exisiting code, don't use assembler line
35 comment character # to ignore the arguments. Instead, use a dummy macro. */
36.macro ignore a=0, b=0, c=0, d=0
37.endm
38
39#define CFI_STARTPROC ignore
40#define CFI_ENDPROC ignore
41#define CFI_DEF_CFA ignore
42#define CFI_DEF_CFA_REGISTER ignore
43#define CFI_DEF_CFA_OFFSET ignore
44#define CFI_ADJUST_CFA_OFFSET ignore
45#define CFI_OFFSET ignore
46#define CFI_REL_OFFSET ignore
47#define CFI_REGISTER ignore
48#define CFI_RESTORE ignore
49#define CFI_REMEMBER_STATE ignore
50#define CFI_RESTORE_STATE ignore
51
52#endif
53
54#endif
diff --git a/include/asm-i386/hw_irq.h b/include/asm-i386/hw_irq.h
index 95d3fd090298..a4c0a5a9ffd8 100644
--- a/include/asm-i386/hw_irq.h
+++ b/include/asm-i386/hw_irq.h
@@ -19,6 +19,8 @@
19 19
20struct hw_interrupt_type; 20struct hw_interrupt_type;
21 21
22#define NMI_VECTOR 0x02
23
22/* 24/*
23 * Various low-level irq details needed by irq.c, process.c, 25 * Various low-level irq details needed by irq.c, process.c,
24 * time.c, io_apic.c and smp.c 26 * time.c, io_apic.c and smp.c
diff --git a/include/asm-i386/intel_arch_perfmon.h b/include/asm-i386/intel_arch_perfmon.h
new file mode 100644
index 000000000000..134ea9cc5283
--- /dev/null
+++ b/include/asm-i386/intel_arch_perfmon.h
@@ -0,0 +1,19 @@
1#ifndef X86_INTEL_ARCH_PERFMON_H
2#define X86_INTEL_ARCH_PERFMON_H 1
3
4#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
5#define MSR_ARCH_PERFMON_PERFCTR1 0xc2
6
7#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
8#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
9
10#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
11#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
12#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
13#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
14
15#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c)
16#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
17#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT (1 << 0)
18
19#endif /* X86_INTEL_ARCH_PERFMON_H */
diff --git a/include/asm-i386/k8.h b/include/asm-i386/k8.h
new file mode 100644
index 000000000000..dfd88a6e6040
--- /dev/null
+++ b/include/asm-i386/k8.h
@@ -0,0 +1 @@
#include <asm-x86_64/k8.h>
diff --git a/include/asm-i386/local.h b/include/asm-i386/local.h
index e67fa08260fe..3b4998c51d08 100644
--- a/include/asm-i386/local.h
+++ b/include/asm-i386/local.h
@@ -55,12 +55,26 @@ static __inline__ void local_sub(long i, local_t *v)
55 * much more efficient than these naive implementations. Note they take 55 * much more efficient than these naive implementations. Note they take
56 * a variable, not an address. 56 * a variable, not an address.
57 */ 57 */
58#define cpu_local_read(v) local_read(&__get_cpu_var(v)) 58
59#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) 59/* Need to disable preemption for the cpu local counters otherwise we could
60#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) 60 still access a variable of a previous CPU in a non atomic way. */
61#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) 61#define cpu_local_wrap_v(v) \
62#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) 62 ({ local_t res__; \
63#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) 63 preempt_disable(); \
64 res__ = (v); \
65 preempt_enable(); \
66 res__; })
67#define cpu_local_wrap(v) \
68 ({ preempt_disable(); \
69 v; \
70 preempt_enable(); }) \
71
72#define cpu_local_read(v) cpu_local_wrap_v(local_read(&__get_cpu_var(v)))
73#define cpu_local_set(v, i) cpu_local_wrap(local_set(&__get_cpu_var(v), (i)))
74#define cpu_local_inc(v) cpu_local_wrap(local_inc(&__get_cpu_var(v)))
75#define cpu_local_dec(v) cpu_local_wrap(local_dec(&__get_cpu_var(v)))
76#define cpu_local_add(i, v) cpu_local_wrap(local_add((i), &__get_cpu_var(v)))
77#define cpu_local_sub(i, v) cpu_local_wrap(local_sub((i), &__get_cpu_var(v)))
64 78
65#define __cpu_local_inc(v) cpu_local_inc(v) 79#define __cpu_local_inc(v) cpu_local_inc(v)
66#define __cpu_local_dec(v) cpu_local_dec(v) 80#define __cpu_local_dec(v) cpu_local_dec(v)
diff --git a/include/asm-i386/mach-default/mach_ipi.h b/include/asm-i386/mach-default/mach_ipi.h
index a1d0072e36bc..0dba244c86db 100644
--- a/include/asm-i386/mach-default/mach_ipi.h
+++ b/include/asm-i386/mach-default/mach_ipi.h
@@ -1,6 +1,9 @@
1#ifndef __ASM_MACH_IPI_H 1#ifndef __ASM_MACH_IPI_H
2#define __ASM_MACH_IPI_H 2#define __ASM_MACH_IPI_H
3 3
4/* Avoid include hell */
5#define NMI_VECTOR 0x02
6
4void send_IPI_mask_bitmask(cpumask_t mask, int vector); 7void send_IPI_mask_bitmask(cpumask_t mask, int vector);
5void __send_IPI_shortcut(unsigned int shortcut, int vector); 8void __send_IPI_shortcut(unsigned int shortcut, int vector);
6 9
@@ -13,7 +16,7 @@ static inline void send_IPI_mask(cpumask_t mask, int vector)
13 16
14static inline void __local_send_IPI_allbutself(int vector) 17static inline void __local_send_IPI_allbutself(int vector)
15{ 18{
16 if (no_broadcast) { 19 if (no_broadcast || vector == NMI_VECTOR) {
17 cpumask_t mask = cpu_online_map; 20 cpumask_t mask = cpu_online_map;
18 21
19 cpu_clear(smp_processor_id(), mask); 22 cpu_clear(smp_processor_id(), mask);
@@ -24,7 +27,7 @@ static inline void __local_send_IPI_allbutself(int vector)
24 27
25static inline void __local_send_IPI_all(int vector) 28static inline void __local_send_IPI_all(int vector)
26{ 29{
27 if (no_broadcast) 30 if (no_broadcast || vector == NMI_VECTOR)
28 send_IPI_mask(cpu_online_map, vector); 31 send_IPI_mask(cpu_online_map, vector);
29 else 32 else
30 __send_IPI_shortcut(APIC_DEST_ALLINC, vector); 33 __send_IPI_shortcut(APIC_DEST_ALLINC, vector);
diff --git a/include/asm-i386/nmi.h b/include/asm-i386/nmi.h
index 21f16638fc61..67d994799999 100644
--- a/include/asm-i386/nmi.h
+++ b/include/asm-i386/nmi.h
@@ -5,24 +5,38 @@
5#define ASM_NMI_H 5#define ASM_NMI_H
6 6
7#include <linux/pm.h> 7#include <linux/pm.h>
8 8
9struct pt_regs; 9struct pt_regs;
10 10
11typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu); 11typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu);
12 12
13/** 13/**
14 * set_nmi_callback 14 * set_nmi_callback
15 * 15 *
16 * Set a handler for an NMI. Only one handler may be 16 * Set a handler for an NMI. Only one handler may be
17 * set. Return 1 if the NMI was handled. 17 * set. Return 1 if the NMI was handled.
18 */ 18 */
19void set_nmi_callback(nmi_callback_t callback); 19void set_nmi_callback(nmi_callback_t callback);
20 20
21/** 21/**
22 * unset_nmi_callback 22 * unset_nmi_callback
23 * 23 *
24 * Remove the handler previously set. 24 * Remove the handler previously set.
25 */ 25 */
26void unset_nmi_callback(void); 26void unset_nmi_callback(void);
27 27
28extern void setup_apic_nmi_watchdog (void);
29extern int reserve_lapic_nmi(void);
30extern void release_lapic_nmi(void);
31extern void disable_timer_nmi_watchdog(void);
32extern void enable_timer_nmi_watchdog(void);
33extern void nmi_watchdog_tick (struct pt_regs * regs);
34
35extern unsigned int nmi_watchdog;
36#define NMI_DEFAULT -1
37#define NMI_NONE 0
38#define NMI_IO_APIC 1
39#define NMI_LOCAL_APIC 2
40#define NMI_INVALID 3
41
28#endif /* ASM_NMI_H */ 42#endif /* ASM_NMI_H */
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index 0c83cf12eec9..55ea992da329 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -112,6 +112,7 @@ extern char ignore_fpu_irq;
112extern void identify_cpu(struct cpuinfo_x86 *); 112extern void identify_cpu(struct cpuinfo_x86 *);
113extern void print_cpu_info(struct cpuinfo_x86 *); 113extern void print_cpu_info(struct cpuinfo_x86 *);
114extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); 114extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
115extern unsigned short num_cache_leaves;
115 116
116#ifdef CONFIG_X86_HT 117#ifdef CONFIG_X86_HT
117extern void detect_ht(struct cpuinfo_x86 *c); 118extern void detect_ht(struct cpuinfo_x86 *c);
@@ -554,7 +555,7 @@ extern void prepare_to_copy(struct task_struct *tsk);
554extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); 555extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
555 556
556extern unsigned long thread_saved_pc(struct task_struct *tsk); 557extern unsigned long thread_saved_pc(struct task_struct *tsk);
557void show_trace(struct task_struct *task, unsigned long *stack); 558void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack);
558 559
559unsigned long get_wchan(struct task_struct *p); 560unsigned long get_wchan(struct task_struct *p);
560 561
diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h
index 8420ed12491e..fdbc7f422ea5 100644
--- a/include/asm-i386/thread_info.h
+++ b/include/asm-i386/thread_info.h
@@ -140,8 +140,7 @@ register unsigned long current_stack_pointer asm("esp") __attribute_used__;
140#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ 140#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
141#define TIF_SECCOMP 8 /* secure computing */ 141#define TIF_SECCOMP 8 /* secure computing */
142#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */ 142#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */
143#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ 143#define TIF_MEMDIE 16
144#define TIF_MEMDIE 17
145 144
146#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) 145#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
147#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) 146#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
@@ -153,7 +152,6 @@ register unsigned long current_stack_pointer asm("esp") __attribute_used__;
153#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) 152#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
154#define _TIF_SECCOMP (1<<TIF_SECCOMP) 153#define _TIF_SECCOMP (1<<TIF_SECCOMP)
155#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) 154#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
156#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
157 155
158/* work to do on interrupt/exception return */ 156/* work to do on interrupt/exception return */
159#define _TIF_WORK_MASK \ 157#define _TIF_WORK_MASK \
@@ -170,6 +168,9 @@ register unsigned long current_stack_pointer asm("esp") __attribute_used__;
170 * have to worry about atomic accesses. 168 * have to worry about atomic accesses.
171 */ 169 */
172#define TS_USEDFPU 0x0001 /* FPU was used by this task this quantum (SMP) */ 170#define TS_USEDFPU 0x0001 /* FPU was used by this task this quantum (SMP) */
171#define TS_POLLING 0x0002 /* True if in idle loop and not sleeping */
172
173#define tsk_is_polling(t) ((t)->thread_info->status & TS_POLLING)
173 174
174#endif /* __KERNEL__ */ 175#endif /* __KERNEL__ */
175 176
diff --git a/include/asm-i386/unwind.h b/include/asm-i386/unwind.h
new file mode 100644
index 000000000000..d480f2e38215
--- /dev/null
+++ b/include/asm-i386/unwind.h
@@ -0,0 +1,98 @@
1#ifndef _ASM_I386_UNWIND_H
2#define _ASM_I386_UNWIND_H
3
4/*
5 * Copyright (C) 2002-2006 Novell, Inc.
6 * Jan Beulich <jbeulich@novell.com>
7 * This code is released under version 2 of the GNU GPL.
8 */
9
10#ifdef CONFIG_STACK_UNWIND
11
12#include <linux/sched.h>
13#include <asm/fixmap.h>
14#include <asm/ptrace.h>
15#include <asm/uaccess.h>
16
17struct unwind_frame_info
18{
19 struct pt_regs regs;
20 struct task_struct *task;
21};
22
23#define UNW_PC(frame) (frame)->regs.eip
24#define UNW_SP(frame) (frame)->regs.esp
25#ifdef CONFIG_FRAME_POINTER
26#define UNW_FP(frame) (frame)->regs.ebp
27#define FRAME_RETADDR_OFFSET 4
28#define FRAME_LINK_OFFSET 0
29#define STACK_BOTTOM(tsk) STACK_LIMIT((tsk)->thread.esp0)
30#define STACK_TOP(tsk) ((tsk)->thread.esp0)
31#endif
32#define STACK_LIMIT(ptr) (((ptr) - 1) & ~(THREAD_SIZE - 1))
33
34#define UNW_REGISTER_INFO \
35 PTREGS_INFO(eax), \
36 PTREGS_INFO(ecx), \
37 PTREGS_INFO(edx), \
38 PTREGS_INFO(ebx), \
39 PTREGS_INFO(esp), \
40 PTREGS_INFO(ebp), \
41 PTREGS_INFO(esi), \
42 PTREGS_INFO(edi), \
43 PTREGS_INFO(eip)
44
45static inline void arch_unw_init_frame_info(struct unwind_frame_info *info,
46 /*const*/ struct pt_regs *regs)
47{
48 if (user_mode_vm(regs))
49 info->regs = *regs;
50 else {
51 memcpy(&info->regs, regs, offsetof(struct pt_regs, esp));
52 info->regs.esp = (unsigned long)&regs->esp;
53 info->regs.xss = __KERNEL_DS;
54 }
55}
56
57static inline void arch_unw_init_blocked(struct unwind_frame_info *info)
58{
59 memset(&info->regs, 0, sizeof(info->regs));
60 info->regs.eip = info->task->thread.eip;
61 info->regs.xcs = __KERNEL_CS;
62 __get_user(info->regs.ebp, (long *)info->task->thread.esp);
63 info->regs.esp = info->task->thread.esp;
64 info->regs.xss = __KERNEL_DS;
65 info->regs.xds = __USER_DS;
66 info->regs.xes = __USER_DS;
67}
68
69extern asmlinkage int arch_unwind_init_running(struct unwind_frame_info *,
70 asmlinkage int (*callback)(struct unwind_frame_info *,
71 void *arg),
72 void *arg);
73
74static inline int arch_unw_user_mode(const struct unwind_frame_info *info)
75{
76#if 0 /* This can only work when selector register and EFLAGS saves/restores
77 are properly annotated (and tracked in UNW_REGISTER_INFO). */
78 return user_mode_vm(&info->regs);
79#else
80 return info->regs.eip < PAGE_OFFSET
81 || (info->regs.eip >= __fix_to_virt(FIX_VSYSCALL)
82 && info->regs.eip < __fix_to_virt(FIX_VSYSCALL) + PAGE_SIZE)
83 || info->regs.esp < PAGE_OFFSET;
84#endif
85}
86
87#else
88
89#define UNW_PC(frame) ((void)(frame), 0)
90
91static inline int arch_unw_user_mode(const void *info)
92{
93 return 0;
94}
95
96#endif
97
98#endif /* _ASM_I386_UNWIND_H */
diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h
index e5392c4d30c6..8bc9869e5765 100644
--- a/include/asm-ia64/thread_info.h
+++ b/include/asm-ia64/thread_info.h
@@ -27,6 +27,7 @@ struct thread_info {
27 __u32 flags; /* thread_info flags (see TIF_*) */ 27 __u32 flags; /* thread_info flags (see TIF_*) */
28 __u32 cpu; /* current CPU */ 28 __u32 cpu; /* current CPU */
29 __u32 last_cpu; /* Last CPU thread ran on */ 29 __u32 last_cpu; /* Last CPU thread ran on */
30 __u32 status; /* Thread synchronous flags */
30 mm_segment_t addr_limit; /* user-level address space limit */ 31 mm_segment_t addr_limit; /* user-level address space limit */
31 int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ 32 int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */
32 struct restart_block restart_block; 33 struct restart_block restart_block;
@@ -103,4 +104,8 @@ struct thread_info {
103/* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */ 104/* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */
104#define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)) 105#define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT))
105 106
107#define TS_POLLING 1 /* true if in idle loop and not sleeping */
108
109#define tsk_is_polling(t) ((t)->thread_info->status & TS_POLLING)
110
106#endif /* _ASM_IA64_THREAD_INFO_H */ 111#endif /* _ASM_IA64_THREAD_INFO_H */
diff --git a/include/asm-x86_64/alternative.h b/include/asm-x86_64/alternative.h
new file mode 100644
index 000000000000..387c8f66af7d
--- /dev/null
+++ b/include/asm-x86_64/alternative.h
@@ -0,0 +1,146 @@
1#ifndef _X86_64_ALTERNATIVE_H
2#define _X86_64_ALTERNATIVE_H
3
4#ifdef __KERNEL__
5
6#include <linux/types.h>
7
8struct alt_instr {
9 u8 *instr; /* original instruction */
10 u8 *replacement;
11 u8 cpuid; /* cpuid bit set for replacement */
12 u8 instrlen; /* length of original instruction */
13 u8 replacementlen; /* length of new instruction, <= instrlen */
14 u8 pad[5];
15};
16
17extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
18
19struct module;
20extern void alternatives_smp_module_add(struct module *mod, char *name,
21 void *locks, void *locks_end,
22 void *text, void *text_end);
23extern void alternatives_smp_module_del(struct module *mod);
24extern void alternatives_smp_switch(int smp);
25
26#endif
27
28/*
29 * Alternative instructions for different CPU types or capabilities.
30 *
31 * This allows to use optimized instructions even on generic binary
32 * kernels.
33 *
34 * length of oldinstr must be longer or equal the length of newinstr
35 * It can be padded with nops as needed.
36 *
37 * For non barrier like inlines please define new variants
38 * without volatile and memory clobber.
39 */
40#define alternative(oldinstr, newinstr, feature) \
41 asm volatile ("661:\n\t" oldinstr "\n662:\n" \
42 ".section .altinstructions,\"a\"\n" \
43 " .align 8\n" \
44 " .quad 661b\n" /* label */ \
45 " .quad 663f\n" /* new instruction */ \
46 " .byte %c0\n" /* feature bit */ \
47 " .byte 662b-661b\n" /* sourcelen */ \
48 " .byte 664f-663f\n" /* replacementlen */ \
49 ".previous\n" \
50 ".section .altinstr_replacement,\"ax\"\n" \
51 "663:\n\t" newinstr "\n664:\n" /* replacement */ \
52 ".previous" :: "i" (feature) : "memory")
53
54/*
55 * Alternative inline assembly with input.
56 *
57 * Pecularities:
58 * No memory clobber here.
59 * Argument numbers start with 1.
60 * Best is to use constraints that are fixed size (like (%1) ... "r")
61 * If you use variable sized constraints like "m" or "g" in the
62 * replacement make sure to pad to the worst case length.
63 */
64#define alternative_input(oldinstr, newinstr, feature, input...) \
65 asm volatile ("661:\n\t" oldinstr "\n662:\n" \
66 ".section .altinstructions,\"a\"\n" \
67 " .align 8\n" \
68 " .quad 661b\n" /* label */ \
69 " .quad 663f\n" /* new instruction */ \
70 " .byte %c0\n" /* feature bit */ \
71 " .byte 662b-661b\n" /* sourcelen */ \
72 " .byte 664f-663f\n" /* replacementlen */ \
73 ".previous\n" \
74 ".section .altinstr_replacement,\"ax\"\n" \
75 "663:\n\t" newinstr "\n664:\n" /* replacement */ \
76 ".previous" :: "i" (feature), ##input)
77
78/* Like alternative_input, but with a single output argument */
79#define alternative_io(oldinstr, newinstr, feature, output, input...) \
80 asm volatile ("661:\n\t" oldinstr "\n662:\n" \
81 ".section .altinstructions,\"a\"\n" \
82 " .align 8\n" \
83 " .quad 661b\n" /* label */ \
84 " .quad 663f\n" /* new instruction */ \
85 " .byte %c[feat]\n" /* feature bit */ \
86 " .byte 662b-661b\n" /* sourcelen */ \
87 " .byte 664f-663f\n" /* replacementlen */ \
88 ".previous\n" \
89 ".section .altinstr_replacement,\"ax\"\n" \
90 "663:\n\t" newinstr "\n664:\n" /* replacement */ \
91 ".previous" : output : [feat] "i" (feature), ##input)
92
93/*
94 * Alternative inline assembly for SMP.
95 *
96 * alternative_smp() takes two versions (SMP first, UP second) and is
97 * for more complex stuff such as spinlocks.
98 *
99 * The LOCK_PREFIX macro defined here replaces the LOCK and
100 * LOCK_PREFIX macros used everywhere in the source tree.
101 *
102 * SMP alternatives use the same data structures as the other
103 * alternatives and the X86_FEATURE_UP flag to indicate the case of a
104 * UP system running a SMP kernel. The existing apply_alternatives()
105 * works fine for patching a SMP kernel for UP.
106 *
107 * The SMP alternative tables can be kept after boot and contain both
108 * UP and SMP versions of the instructions to allow switching back to
109 * SMP at runtime, when hotplugging in a new CPU, which is especially
110 * useful in virtualized environments.
111 *
112 * The very common lock prefix is handled as special case in a
113 * separate table which is a pure address list without replacement ptr
114 * and size information. That keeps the table sizes small.
115 */
116
117#ifdef CONFIG_SMP
118#define alternative_smp(smpinstr, upinstr, args...) \
119 asm volatile ("661:\n\t" smpinstr "\n662:\n" \
120 ".section .smp_altinstructions,\"a\"\n" \
121 " .align 8\n" \
122 " .quad 661b\n" /* label */ \
123 " .quad 663f\n" /* new instruction */ \
124 " .byte 0x66\n" /* X86_FEATURE_UP */ \
125 " .byte 662b-661b\n" /* sourcelen */ \
126 " .byte 664f-663f\n" /* replacementlen */ \
127 ".previous\n" \
128 ".section .smp_altinstr_replacement,\"awx\"\n" \
129 "663:\n\t" upinstr "\n" /* replacement */ \
130 "664:\n\t.fill 662b-661b,1,0x42\n" /* space for original */ \
131 ".previous" : args)
132
133#define LOCK_PREFIX \
134 ".section .smp_locks,\"a\"\n" \
135 " .align 8\n" \
136 " .quad 661f\n" /* address */ \
137 ".previous\n" \
138 "661:\n\tlock; "
139
140#else /* ! CONFIG_SMP */
141#define alternative_smp(smpinstr, upinstr, args...) \
142 asm volatile (upinstr : args)
143#define LOCK_PREFIX ""
144#endif
145
146#endif /* _X86_64_ALTERNATIVE_H */
diff --git a/include/asm-x86_64/apic.h b/include/asm-x86_64/apic.h
index a731be2204d2..9c96a0a8d1bd 100644
--- a/include/asm-x86_64/apic.h
+++ b/include/asm-x86_64/apic.h
@@ -49,7 +49,8 @@ static __inline unsigned int apic_read(unsigned long reg)
49 49
50static __inline__ void apic_wait_icr_idle(void) 50static __inline__ void apic_wait_icr_idle(void)
51{ 51{
52 while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY ); 52 while (apic_read( APIC_ICR ) & APIC_ICR_BUSY)
53 cpu_relax();
53} 54}
54 55
55static inline void ack_APIC_irq(void) 56static inline void ack_APIC_irq(void)
@@ -79,30 +80,23 @@ extern void init_apic_mappings (void);
79extern void smp_local_timer_interrupt (struct pt_regs * regs); 80extern void smp_local_timer_interrupt (struct pt_regs * regs);
80extern void setup_boot_APIC_clock (void); 81extern void setup_boot_APIC_clock (void);
81extern void setup_secondary_APIC_clock (void); 82extern void setup_secondary_APIC_clock (void);
82extern void setup_apic_nmi_watchdog (void);
83extern int reserve_lapic_nmi(void);
84extern void release_lapic_nmi(void);
85extern void disable_timer_nmi_watchdog(void);
86extern void enable_timer_nmi_watchdog(void);
87extern void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
88extern int APIC_init_uniprocessor (void); 83extern int APIC_init_uniprocessor (void);
89extern void disable_APIC_timer(void); 84extern void disable_APIC_timer(void);
90extern void enable_APIC_timer(void); 85extern void enable_APIC_timer(void);
91extern void clustered_apic_check(void); 86extern void clustered_apic_check(void);
92 87
93extern void nmi_watchdog_default(void); 88extern void setup_APIC_extened_lvt(unsigned char lvt_off, unsigned char vector,
94extern int setup_nmi_watchdog(char *); 89 unsigned char msg_type, unsigned char mask);
95 90
96extern unsigned int nmi_watchdog; 91#define K8_APIC_EXT_LVT_BASE 0x500
97#define NMI_DEFAULT -1 92#define K8_APIC_EXT_INT_MSG_FIX 0x0
98#define NMI_NONE 0 93#define K8_APIC_EXT_INT_MSG_SMI 0x2
99#define NMI_IO_APIC 1 94#define K8_APIC_EXT_INT_MSG_NMI 0x4
100#define NMI_LOCAL_APIC 2 95#define K8_APIC_EXT_INT_MSG_EXT 0x7
101#define NMI_INVALID 3 96#define K8_APIC_EXT_LVT_ENTRY_THRESHOLD 0
102 97
103extern int disable_timer_pin_1; 98extern int disable_timer_pin_1;
104 99
105extern void setup_threshold_lvt(unsigned long lvt_off);
106 100
107void smp_send_timer_broadcast_ipi(void); 101void smp_send_timer_broadcast_ipi(void);
108void switch_APIC_timer_to_ipi(void *cpumask); 102void switch_APIC_timer_to_ipi(void *cpumask);
diff --git a/include/asm-x86_64/atomic.h b/include/asm-x86_64/atomic.h
index bd3fa67ed835..007e88d6d43f 100644
--- a/include/asm-x86_64/atomic.h
+++ b/include/asm-x86_64/atomic.h
@@ -1,7 +1,7 @@
1#ifndef __ARCH_X86_64_ATOMIC__ 1#ifndef __ARCH_X86_64_ATOMIC__
2#define __ARCH_X86_64_ATOMIC__ 2#define __ARCH_X86_64_ATOMIC__
3 3
4#include <asm/types.h> 4#include <asm/alternative.h>
5 5
6/* atomic_t should be 32 bit signed type */ 6/* atomic_t should be 32 bit signed type */
7 7
@@ -52,7 +52,7 @@ typedef struct { volatile int counter; } atomic_t;
52static __inline__ void atomic_add(int i, atomic_t *v) 52static __inline__ void atomic_add(int i, atomic_t *v)
53{ 53{
54 __asm__ __volatile__( 54 __asm__ __volatile__(
55 LOCK "addl %1,%0" 55 LOCK_PREFIX "addl %1,%0"
56 :"=m" (v->counter) 56 :"=m" (v->counter)
57 :"ir" (i), "m" (v->counter)); 57 :"ir" (i), "m" (v->counter));
58} 58}
@@ -67,7 +67,7 @@ static __inline__ void atomic_add(int i, atomic_t *v)
67static __inline__ void atomic_sub(int i, atomic_t *v) 67static __inline__ void atomic_sub(int i, atomic_t *v)
68{ 68{
69 __asm__ __volatile__( 69 __asm__ __volatile__(
70 LOCK "subl %1,%0" 70 LOCK_PREFIX "subl %1,%0"
71 :"=m" (v->counter) 71 :"=m" (v->counter)
72 :"ir" (i), "m" (v->counter)); 72 :"ir" (i), "m" (v->counter));
73} 73}
@@ -86,7 +86,7 @@ static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
86 unsigned char c; 86 unsigned char c;
87 87
88 __asm__ __volatile__( 88 __asm__ __volatile__(
89 LOCK "subl %2,%0; sete %1" 89 LOCK_PREFIX "subl %2,%0; sete %1"
90 :"=m" (v->counter), "=qm" (c) 90 :"=m" (v->counter), "=qm" (c)
91 :"ir" (i), "m" (v->counter) : "memory"); 91 :"ir" (i), "m" (v->counter) : "memory");
92 return c; 92 return c;
@@ -101,7 +101,7 @@ static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
101static __inline__ void atomic_inc(atomic_t *v) 101static __inline__ void atomic_inc(atomic_t *v)
102{ 102{
103 __asm__ __volatile__( 103 __asm__ __volatile__(
104 LOCK "incl %0" 104 LOCK_PREFIX "incl %0"
105 :"=m" (v->counter) 105 :"=m" (v->counter)
106 :"m" (v->counter)); 106 :"m" (v->counter));
107} 107}
@@ -115,7 +115,7 @@ static __inline__ void atomic_inc(atomic_t *v)
115static __inline__ void atomic_dec(atomic_t *v) 115static __inline__ void atomic_dec(atomic_t *v)
116{ 116{
117 __asm__ __volatile__( 117 __asm__ __volatile__(
118 LOCK "decl %0" 118 LOCK_PREFIX "decl %0"
119 :"=m" (v->counter) 119 :"=m" (v->counter)
120 :"m" (v->counter)); 120 :"m" (v->counter));
121} 121}
@@ -133,7 +133,7 @@ static __inline__ int atomic_dec_and_test(atomic_t *v)
133 unsigned char c; 133 unsigned char c;
134 134
135 __asm__ __volatile__( 135 __asm__ __volatile__(
136 LOCK "decl %0; sete %1" 136 LOCK_PREFIX "decl %0; sete %1"
137 :"=m" (v->counter), "=qm" (c) 137 :"=m" (v->counter), "=qm" (c)
138 :"m" (v->counter) : "memory"); 138 :"m" (v->counter) : "memory");
139 return c != 0; 139 return c != 0;
@@ -152,7 +152,7 @@ static __inline__ int atomic_inc_and_test(atomic_t *v)
152 unsigned char c; 152 unsigned char c;
153 153
154 __asm__ __volatile__( 154 __asm__ __volatile__(
155 LOCK "incl %0; sete %1" 155 LOCK_PREFIX "incl %0; sete %1"
156 :"=m" (v->counter), "=qm" (c) 156 :"=m" (v->counter), "=qm" (c)
157 :"m" (v->counter) : "memory"); 157 :"m" (v->counter) : "memory");
158 return c != 0; 158 return c != 0;
@@ -172,7 +172,7 @@ static __inline__ int atomic_add_negative(int i, atomic_t *v)
172 unsigned char c; 172 unsigned char c;
173 173
174 __asm__ __volatile__( 174 __asm__ __volatile__(
175 LOCK "addl %2,%0; sets %1" 175 LOCK_PREFIX "addl %2,%0; sets %1"
176 :"=m" (v->counter), "=qm" (c) 176 :"=m" (v->counter), "=qm" (c)
177 :"ir" (i), "m" (v->counter) : "memory"); 177 :"ir" (i), "m" (v->counter) : "memory");
178 return c; 178 return c;
@@ -189,7 +189,7 @@ static __inline__ int atomic_add_return(int i, atomic_t *v)
189{ 189{
190 int __i = i; 190 int __i = i;
191 __asm__ __volatile__( 191 __asm__ __volatile__(
192 LOCK "xaddl %0, %1;" 192 LOCK_PREFIX "xaddl %0, %1;"
193 :"=r"(i) 193 :"=r"(i)
194 :"m"(v->counter), "0"(i)); 194 :"m"(v->counter), "0"(i));
195 return i + __i; 195 return i + __i;
@@ -237,7 +237,7 @@ typedef struct { volatile long counter; } atomic64_t;
237static __inline__ void atomic64_add(long i, atomic64_t *v) 237static __inline__ void atomic64_add(long i, atomic64_t *v)
238{ 238{
239 __asm__ __volatile__( 239 __asm__ __volatile__(
240 LOCK "addq %1,%0" 240 LOCK_PREFIX "addq %1,%0"
241 :"=m" (v->counter) 241 :"=m" (v->counter)
242 :"ir" (i), "m" (v->counter)); 242 :"ir" (i), "m" (v->counter));
243} 243}
@@ -252,7 +252,7 @@ static __inline__ void atomic64_add(long i, atomic64_t *v)
252static __inline__ void atomic64_sub(long i, atomic64_t *v) 252static __inline__ void atomic64_sub(long i, atomic64_t *v)
253{ 253{
254 __asm__ __volatile__( 254 __asm__ __volatile__(
255 LOCK "subq %1,%0" 255 LOCK_PREFIX "subq %1,%0"
256 :"=m" (v->counter) 256 :"=m" (v->counter)
257 :"ir" (i), "m" (v->counter)); 257 :"ir" (i), "m" (v->counter));
258} 258}
@@ -271,7 +271,7 @@ static __inline__ int atomic64_sub_and_test(long i, atomic64_t *v)
271 unsigned char c; 271 unsigned char c;
272 272
273 __asm__ __volatile__( 273 __asm__ __volatile__(
274 LOCK "subq %2,%0; sete %1" 274 LOCK_PREFIX "subq %2,%0; sete %1"
275 :"=m" (v->counter), "=qm" (c) 275 :"=m" (v->counter), "=qm" (c)
276 :"ir" (i), "m" (v->counter) : "memory"); 276 :"ir" (i), "m" (v->counter) : "memory");
277 return c; 277 return c;
@@ -286,7 +286,7 @@ static __inline__ int atomic64_sub_and_test(long i, atomic64_t *v)
286static __inline__ void atomic64_inc(atomic64_t *v) 286static __inline__ void atomic64_inc(atomic64_t *v)
287{ 287{
288 __asm__ __volatile__( 288 __asm__ __volatile__(
289 LOCK "incq %0" 289 LOCK_PREFIX "incq %0"
290 :"=m" (v->counter) 290 :"=m" (v->counter)
291 :"m" (v->counter)); 291 :"m" (v->counter));
292} 292}
@@ -300,7 +300,7 @@ static __inline__ void atomic64_inc(atomic64_t *v)
300static __inline__ void atomic64_dec(atomic64_t *v) 300static __inline__ void atomic64_dec(atomic64_t *v)
301{ 301{
302 __asm__ __volatile__( 302 __asm__ __volatile__(
303 LOCK "decq %0" 303 LOCK_PREFIX "decq %0"
304 :"=m" (v->counter) 304 :"=m" (v->counter)
305 :"m" (v->counter)); 305 :"m" (v->counter));
306} 306}
@@ -318,7 +318,7 @@ static __inline__ int atomic64_dec_and_test(atomic64_t *v)
318 unsigned char c; 318 unsigned char c;
319 319
320 __asm__ __volatile__( 320 __asm__ __volatile__(
321 LOCK "decq %0; sete %1" 321 LOCK_PREFIX "decq %0; sete %1"
322 :"=m" (v->counter), "=qm" (c) 322 :"=m" (v->counter), "=qm" (c)
323 :"m" (v->counter) : "memory"); 323 :"m" (v->counter) : "memory");
324 return c != 0; 324 return c != 0;
@@ -337,7 +337,7 @@ static __inline__ int atomic64_inc_and_test(atomic64_t *v)
337 unsigned char c; 337 unsigned char c;
338 338
339 __asm__ __volatile__( 339 __asm__ __volatile__(
340 LOCK "incq %0; sete %1" 340 LOCK_PREFIX "incq %0; sete %1"
341 :"=m" (v->counter), "=qm" (c) 341 :"=m" (v->counter), "=qm" (c)
342 :"m" (v->counter) : "memory"); 342 :"m" (v->counter) : "memory");
343 return c != 0; 343 return c != 0;
@@ -357,7 +357,7 @@ static __inline__ int atomic64_add_negative(long i, atomic64_t *v)
357 unsigned char c; 357 unsigned char c;
358 358
359 __asm__ __volatile__( 359 __asm__ __volatile__(
360 LOCK "addq %2,%0; sets %1" 360 LOCK_PREFIX "addq %2,%0; sets %1"
361 :"=m" (v->counter), "=qm" (c) 361 :"=m" (v->counter), "=qm" (c)
362 :"ir" (i), "m" (v->counter) : "memory"); 362 :"ir" (i), "m" (v->counter) : "memory");
363 return c; 363 return c;
@@ -374,7 +374,7 @@ static __inline__ long atomic64_add_return(long i, atomic64_t *v)
374{ 374{
375 long __i = i; 375 long __i = i;
376 __asm__ __volatile__( 376 __asm__ __volatile__(
377 LOCK "xaddq %0, %1;" 377 LOCK_PREFIX "xaddq %0, %1;"
378 :"=r"(i) 378 :"=r"(i)
379 :"m"(v->counter), "0"(i)); 379 :"m"(v->counter), "0"(i));
380 return i + __i; 380 return i + __i;
@@ -418,11 +418,11 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t *v)
418 418
419/* These are x86-specific, used by some header files */ 419/* These are x86-specific, used by some header files */
420#define atomic_clear_mask(mask, addr) \ 420#define atomic_clear_mask(mask, addr) \
421__asm__ __volatile__(LOCK "andl %0,%1" \ 421__asm__ __volatile__(LOCK_PREFIX "andl %0,%1" \
422: : "r" (~(mask)),"m" (*addr) : "memory") 422: : "r" (~(mask)),"m" (*addr) : "memory")
423 423
424#define atomic_set_mask(mask, addr) \ 424#define atomic_set_mask(mask, addr) \
425__asm__ __volatile__(LOCK "orl %0,%1" \ 425__asm__ __volatile__(LOCK_PREFIX "orl %0,%1" \
426: : "r" ((unsigned)mask),"m" (*(addr)) : "memory") 426: : "r" ((unsigned)mask),"m" (*(addr)) : "memory")
427 427
428/* Atomic operations are already serializing on x86 */ 428/* Atomic operations are already serializing on x86 */
diff --git a/include/asm-x86_64/bitops.h b/include/asm-x86_64/bitops.h
index e9bf933d25d0..f7ba57b1cc08 100644
--- a/include/asm-x86_64/bitops.h
+++ b/include/asm-x86_64/bitops.h
@@ -5,12 +5,7 @@
5 * Copyright 1992, Linus Torvalds. 5 * Copyright 1992, Linus Torvalds.
6 */ 6 */
7 7
8 8#include <asm/alternative.h>
9#ifdef CONFIG_SMP
10#define LOCK_PREFIX "lock ; "
11#else
12#define LOCK_PREFIX ""
13#endif
14 9
15#define ADDR (*(volatile long *) addr) 10#define ADDR (*(volatile long *) addr)
16 11
diff --git a/include/asm-x86_64/calgary.h b/include/asm-x86_64/calgary.h
new file mode 100644
index 000000000000..6e1654f30986
--- /dev/null
+++ b/include/asm-x86_64/calgary.h
@@ -0,0 +1,66 @@
1/*
2 * Derived from include/asm-powerpc/iommu.h
3 *
4 * Copyright (C) 2006 Jon Mason <jdmason@us.ibm.com>, IBM Corporation
5 * Copyright (C) 2006 Muli Ben-Yehuda <muli@il.ibm.com>, IBM Corporation
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#ifndef _ASM_X86_64_CALGARY_H
23#define _ASM_X86_64_CALGARY_H
24
25#include <linux/config.h>
26#include <linux/spinlock.h>
27#include <linux/device.h>
28#include <linux/dma-mapping.h>
29#include <asm/types.h>
30
31struct iommu_table {
32 unsigned long it_base; /* mapped address of tce table */
33 unsigned long it_hint; /* Hint for next alloc */
34 unsigned long *it_map; /* A simple allocation bitmap for now */
35 spinlock_t it_lock; /* Protects it_map */
36 unsigned int it_size; /* Size of iommu table in entries */
37 unsigned char it_busno; /* Bus number this table belongs to */
38 void __iomem *bbar;
39 u64 tar_val;
40 struct timer_list watchdog_timer;
41};
42
43#define TCE_TABLE_SIZE_UNSPECIFIED ~0
44#define TCE_TABLE_SIZE_64K 0
45#define TCE_TABLE_SIZE_128K 1
46#define TCE_TABLE_SIZE_256K 2
47#define TCE_TABLE_SIZE_512K 3
48#define TCE_TABLE_SIZE_1M 4
49#define TCE_TABLE_SIZE_2M 5
50#define TCE_TABLE_SIZE_4M 6
51#define TCE_TABLE_SIZE_8M 7
52
53#ifdef CONFIG_CALGARY_IOMMU
54extern int calgary_iommu_init(void);
55extern void detect_calgary(void);
56#else
57static inline int calgary_iommu_init(void) { return 1; }
58static inline void detect_calgary(void) { return; }
59#endif
60
61static inline unsigned int bus_to_phb(unsigned char busno)
62{
63 return ((busno % 15 == 0) ? 0 : busno / 2 + 1);
64}
65
66#endif /* _ASM_X86_64_CALGARY_H */
diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h
index 662964b74e34..ee792faaca01 100644
--- a/include/asm-x86_64/cpufeature.h
+++ b/include/asm-x86_64/cpufeature.h
@@ -46,6 +46,7 @@
46#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */ 46#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */
47#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ 47#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
48#define X86_FEATURE_FXSR_OPT (1*32+25) /* FXSR optimizations */ 48#define X86_FEATURE_FXSR_OPT (1*32+25) /* FXSR optimizations */
49#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */
49#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ 50#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */
50#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ 51#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */
51#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */ 52#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */
@@ -65,6 +66,8 @@
65#define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */ 66#define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */
66#define X86_FEATURE_SYNC_RDTSC (3*32+6) /* RDTSC syncs CPU core */ 67#define X86_FEATURE_SYNC_RDTSC (3*32+6) /* RDTSC syncs CPU core */
67#define X86_FEATURE_FXSAVE_LEAK (3*32+7) /* FIP/FOP/FDP leaks through FXSAVE */ 68#define X86_FEATURE_FXSAVE_LEAK (3*32+7) /* FIP/FOP/FDP leaks through FXSAVE */
69#define X86_FEATURE_UP (3*32+8) /* SMP kernel running on UP */
70#define X86_FEATURE_ARCH_PERFMON (3*32+9) /* Intel Architectural PerfMon */
68 71
69/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ 72/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
70#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ 73#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
diff --git a/include/asm-x86_64/dma-mapping.h b/include/asm-x86_64/dma-mapping.h
index 498f66df36b9..b6da83dcc7a6 100644
--- a/include/asm-x86_64/dma-mapping.h
+++ b/include/asm-x86_64/dma-mapping.h
@@ -55,6 +55,13 @@ extern dma_addr_t bad_dma_address;
55extern struct dma_mapping_ops* dma_ops; 55extern struct dma_mapping_ops* dma_ops;
56extern int iommu_merge; 56extern int iommu_merge;
57 57
58static inline int valid_dma_direction(int dma_direction)
59{
60 return ((dma_direction == DMA_BIDIRECTIONAL) ||
61 (dma_direction == DMA_TO_DEVICE) ||
62 (dma_direction == DMA_FROM_DEVICE));
63}
64
58static inline int dma_mapping_error(dma_addr_t dma_addr) 65static inline int dma_mapping_error(dma_addr_t dma_addr)
59{ 66{
60 if (dma_ops->mapping_error) 67 if (dma_ops->mapping_error)
@@ -72,6 +79,7 @@ static inline dma_addr_t
72dma_map_single(struct device *hwdev, void *ptr, size_t size, 79dma_map_single(struct device *hwdev, void *ptr, size_t size,
73 int direction) 80 int direction)
74{ 81{
82 BUG_ON(!valid_dma_direction(direction));
75 return dma_ops->map_single(hwdev, ptr, size, direction); 83 return dma_ops->map_single(hwdev, ptr, size, direction);
76} 84}
77 85
@@ -79,6 +87,7 @@ static inline void
79dma_unmap_single(struct device *dev, dma_addr_t addr,size_t size, 87dma_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
80 int direction) 88 int direction)
81{ 89{
90 BUG_ON(!valid_dma_direction(direction));
82 dma_ops->unmap_single(dev, addr, size, direction); 91 dma_ops->unmap_single(dev, addr, size, direction);
83} 92}
84 93
@@ -91,6 +100,7 @@ static inline void
91dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle, 100dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
92 size_t size, int direction) 101 size_t size, int direction)
93{ 102{
103 BUG_ON(!valid_dma_direction(direction));
94 if (dma_ops->sync_single_for_cpu) 104 if (dma_ops->sync_single_for_cpu)
95 dma_ops->sync_single_for_cpu(hwdev, dma_handle, size, 105 dma_ops->sync_single_for_cpu(hwdev, dma_handle, size,
96 direction); 106 direction);
@@ -101,6 +111,7 @@ static inline void
101dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle, 111dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle,
102 size_t size, int direction) 112 size_t size, int direction)
103{ 113{
114 BUG_ON(!valid_dma_direction(direction));
104 if (dma_ops->sync_single_for_device) 115 if (dma_ops->sync_single_for_device)
105 dma_ops->sync_single_for_device(hwdev, dma_handle, size, 116 dma_ops->sync_single_for_device(hwdev, dma_handle, size,
106 direction); 117 direction);
@@ -111,6 +122,7 @@ static inline void
111dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle, 122dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
112 unsigned long offset, size_t size, int direction) 123 unsigned long offset, size_t size, int direction)
113{ 124{
125 BUG_ON(!valid_dma_direction(direction));
114 if (dma_ops->sync_single_range_for_cpu) { 126 if (dma_ops->sync_single_range_for_cpu) {
115 dma_ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, size, direction); 127 dma_ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, size, direction);
116 } 128 }
@@ -122,6 +134,7 @@ static inline void
122dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle, 134dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle,
123 unsigned long offset, size_t size, int direction) 135 unsigned long offset, size_t size, int direction)
124{ 136{
137 BUG_ON(!valid_dma_direction(direction));
125 if (dma_ops->sync_single_range_for_device) 138 if (dma_ops->sync_single_range_for_device)
126 dma_ops->sync_single_range_for_device(hwdev, dma_handle, 139 dma_ops->sync_single_range_for_device(hwdev, dma_handle,
127 offset, size, direction); 140 offset, size, direction);
@@ -133,6 +146,7 @@ static inline void
133dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, 146dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
134 int nelems, int direction) 147 int nelems, int direction)
135{ 148{
149 BUG_ON(!valid_dma_direction(direction));
136 if (dma_ops->sync_sg_for_cpu) 150 if (dma_ops->sync_sg_for_cpu)
137 dma_ops->sync_sg_for_cpu(hwdev, sg, nelems, direction); 151 dma_ops->sync_sg_for_cpu(hwdev, sg, nelems, direction);
138 flush_write_buffers(); 152 flush_write_buffers();
@@ -142,6 +156,7 @@ static inline void
142dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, 156dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
143 int nelems, int direction) 157 int nelems, int direction)
144{ 158{
159 BUG_ON(!valid_dma_direction(direction));
145 if (dma_ops->sync_sg_for_device) { 160 if (dma_ops->sync_sg_for_device) {
146 dma_ops->sync_sg_for_device(hwdev, sg, nelems, direction); 161 dma_ops->sync_sg_for_device(hwdev, sg, nelems, direction);
147 } 162 }
@@ -152,6 +167,7 @@ dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
152static inline int 167static inline int
153dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction) 168dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction)
154{ 169{
170 BUG_ON(!valid_dma_direction(direction));
155 return dma_ops->map_sg(hwdev, sg, nents, direction); 171 return dma_ops->map_sg(hwdev, sg, nents, direction);
156} 172}
157 173
@@ -159,6 +175,7 @@ static inline void
159dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, 175dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
160 int direction) 176 int direction)
161{ 177{
178 BUG_ON(!valid_dma_direction(direction));
162 dma_ops->unmap_sg(hwdev, sg, nents, direction); 179 dma_ops->unmap_sg(hwdev, sg, nents, direction);
163} 180}
164 181
diff --git a/include/asm-x86_64/dma.h b/include/asm-x86_64/dma.h
index c556208d3dd7..a37c16f06289 100644
--- a/include/asm-x86_64/dma.h
+++ b/include/asm-x86_64/dma.h
@@ -1,4 +1,4 @@
1/* $Id: dma.h,v 1.1.1.1 2001/04/19 20:00:38 ak Exp $ 1/*
2 * linux/include/asm/dma.h: Defines for using and allocating dma channels. 2 * linux/include/asm/dma.h: Defines for using and allocating dma channels.
3 * Written by Hennus Bergman, 1992. 3 * Written by Hennus Bergman, 1992.
4 * High DMA channel support & info by Hannu Savolainen 4 * High DMA channel support & info by Hannu Savolainen
diff --git a/include/asm-x86_64/gart-mapping.h b/include/asm-x86_64/gart-mapping.h
deleted file mode 100644
index ada497b0b55b..000000000000
--- a/include/asm-x86_64/gart-mapping.h
+++ /dev/null
@@ -1,16 +0,0 @@
1#ifndef _X8664_GART_MAPPING_H
2#define _X8664_GART_MAPPING_H 1
3
4#include <linux/types.h>
5#include <asm/types.h>
6
7struct device;
8
9extern void*
10gart_alloc_coherent(struct device *dev, size_t size,
11 dma_addr_t *dma_handle, gfp_t gfp);
12
13extern int
14gart_dma_supported(struct device *hwdev, u64 mask);
15
16#endif /* _X8664_GART_MAPPING_H */
diff --git a/include/asm-x86_64/hpet.h b/include/asm-x86_64/hpet.h
index 18ff7ee9e774..b39098408b69 100644
--- a/include/asm-x86_64/hpet.h
+++ b/include/asm-x86_64/hpet.h
@@ -55,7 +55,7 @@
55 55
56extern int is_hpet_enabled(void); 56extern int is_hpet_enabled(void);
57extern int hpet_rtc_timer_init(void); 57extern int hpet_rtc_timer_init(void);
58extern int oem_force_hpet_timer(void); 58extern int apic_is_clustered_box(void);
59 59
60extern int hpet_use_timer; 60extern int hpet_use_timer;
61 61
diff --git a/include/asm-x86_64/hw_irq.h b/include/asm-x86_64/hw_irq.h
index 3de96fd86a70..1b2ac55d3204 100644
--- a/include/asm-x86_64/hw_irq.h
+++ b/include/asm-x86_64/hw_irq.h
@@ -12,8 +12,6 @@
12 * <tomsoft@informatik.tu-chemnitz.de> 12 * <tomsoft@informatik.tu-chemnitz.de>
13 * 13 *
14 * hacked by Andi Kleen for x86-64. 14 * hacked by Andi Kleen for x86-64.
15 *
16 * $Id: hw_irq.h,v 1.24 2001/09/14 20:55:03 vojtech Exp $
17 */ 15 */
18 16
19#ifndef __ASSEMBLY__ 17#ifndef __ASSEMBLY__
diff --git a/include/asm-x86_64/ia32_unistd.h b/include/asm-x86_64/ia32_unistd.h
index b4f4b172b15a..5b52ce507338 100644
--- a/include/asm-x86_64/ia32_unistd.h
+++ b/include/asm-x86_64/ia32_unistd.h
@@ -4,317 +4,15 @@
4/* 4/*
5 * This file contains the system call numbers of the ia32 port, 5 * This file contains the system call numbers of the ia32 port,
6 * this is for the kernel only. 6 * this is for the kernel only.
7 * Only add syscalls here where some part of the kernel needs to know
8 * the number. This should be otherwise in sync with asm-i386/unistd.h. -AK
7 */ 9 */
8 10
9#define __NR_ia32_restart_syscall 0 11#define __NR_ia32_restart_syscall 0
10#define __NR_ia32_exit 1 12#define __NR_ia32_exit 1
11#define __NR_ia32_fork 2
12#define __NR_ia32_read 3 13#define __NR_ia32_read 3
13#define __NR_ia32_write 4 14#define __NR_ia32_write 4
14#define __NR_ia32_open 5 15#define __NR_ia32_sigreturn 119
15#define __NR_ia32_close 6
16#define __NR_ia32_waitpid 7
17#define __NR_ia32_creat 8
18#define __NR_ia32_link 9
19#define __NR_ia32_unlink 10
20#define __NR_ia32_execve 11
21#define __NR_ia32_chdir 12
22#define __NR_ia32_time 13
23#define __NR_ia32_mknod 14
24#define __NR_ia32_chmod 15
25#define __NR_ia32_lchown 16
26#define __NR_ia32_break 17
27#define __NR_ia32_oldstat 18
28#define __NR_ia32_lseek 19
29#define __NR_ia32_getpid 20
30#define __NR_ia32_mount 21
31#define __NR_ia32_umount 22
32#define __NR_ia32_setuid 23
33#define __NR_ia32_getuid 24
34#define __NR_ia32_stime 25
35#define __NR_ia32_ptrace 26
36#define __NR_ia32_alarm 27
37#define __NR_ia32_oldfstat 28
38#define __NR_ia32_pause 29
39#define __NR_ia32_utime 30
40#define __NR_ia32_stty 31
41#define __NR_ia32_gtty 32
42#define __NR_ia32_access 33
43#define __NR_ia32_nice 34
44#define __NR_ia32_ftime 35
45#define __NR_ia32_sync 36
46#define __NR_ia32_kill 37
47#define __NR_ia32_rename 38
48#define __NR_ia32_mkdir 39
49#define __NR_ia32_rmdir 40
50#define __NR_ia32_dup 41
51#define __NR_ia32_pipe 42
52#define __NR_ia32_times 43
53#define __NR_ia32_prof 44
54#define __NR_ia32_brk 45
55#define __NR_ia32_setgid 46
56#define __NR_ia32_getgid 47
57#define __NR_ia32_signal 48
58#define __NR_ia32_geteuid 49
59#define __NR_ia32_getegid 50
60#define __NR_ia32_acct 51
61#define __NR_ia32_umount2 52
62#define __NR_ia32_lock 53
63#define __NR_ia32_ioctl 54
64#define __NR_ia32_fcntl 55
65#define __NR_ia32_mpx 56
66#define __NR_ia32_setpgid 57
67#define __NR_ia32_ulimit 58
68#define __NR_ia32_oldolduname 59
69#define __NR_ia32_umask 60
70#define __NR_ia32_chroot 61
71#define __NR_ia32_ustat 62
72#define __NR_ia32_dup2 63
73#define __NR_ia32_getppid 64
74#define __NR_ia32_getpgrp 65
75#define __NR_ia32_setsid 66
76#define __NR_ia32_sigaction 67
77#define __NR_ia32_sgetmask 68
78#define __NR_ia32_ssetmask 69
79#define __NR_ia32_setreuid 70
80#define __NR_ia32_setregid 71
81#define __NR_ia32_sigsuspend 72
82#define __NR_ia32_sigpending 73
83#define __NR_ia32_sethostname 74
84#define __NR_ia32_setrlimit 75
85#define __NR_ia32_getrlimit 76 /* Back compatible 2Gig limited rlimit */
86#define __NR_ia32_getrusage 77
87#define __NR_ia32_gettimeofday 78
88#define __NR_ia32_settimeofday 79
89#define __NR_ia32_getgroups 80
90#define __NR_ia32_setgroups 81
91#define __NR_ia32_select 82
92#define __NR_ia32_symlink 83
93#define __NR_ia32_oldlstat 84
94#define __NR_ia32_readlink 85
95#define __NR_ia32_uselib 86
96#define __NR_ia32_swapon 87
97#define __NR_ia32_reboot 88
98#define __NR_ia32_readdir 89
99#define __NR_ia32_mmap 90
100#define __NR_ia32_munmap 91
101#define __NR_ia32_truncate 92
102#define __NR_ia32_ftruncate 93
103#define __NR_ia32_fchmod 94
104#define __NR_ia32_fchown 95
105#define __NR_ia32_getpriority 96
106#define __NR_ia32_setpriority 97
107#define __NR_ia32_profil 98
108#define __NR_ia32_statfs 99
109#define __NR_ia32_fstatfs 100
110#define __NR_ia32_ioperm 101
111#define __NR_ia32_socketcall 102
112#define __NR_ia32_syslog 103
113#define __NR_ia32_setitimer 104
114#define __NR_ia32_getitimer 105
115#define __NR_ia32_stat 106
116#define __NR_ia32_lstat 107
117#define __NR_ia32_fstat 108
118#define __NR_ia32_olduname 109
119#define __NR_ia32_iopl 110
120#define __NR_ia32_vhangup 111
121#define __NR_ia32_idle 112
122#define __NR_ia32_vm86old 113
123#define __NR_ia32_wait4 114
124#define __NR_ia32_swapoff 115
125#define __NR_ia32_sysinfo 116
126#define __NR_ia32_ipc 117
127#define __NR_ia32_fsync 118
128#define __NR_ia32_sigreturn 119
129#define __NR_ia32_clone 120
130#define __NR_ia32_setdomainname 121
131#define __NR_ia32_uname 122
132#define __NR_ia32_modify_ldt 123
133#define __NR_ia32_adjtimex 124
134#define __NR_ia32_mprotect 125
135#define __NR_ia32_sigprocmask 126
136#define __NR_ia32_create_module 127
137#define __NR_ia32_init_module 128
138#define __NR_ia32_delete_module 129
139#define __NR_ia32_get_kernel_syms 130
140#define __NR_ia32_quotactl 131
141#define __NR_ia32_getpgid 132
142#define __NR_ia32_fchdir 133
143#define __NR_ia32_bdflush 134
144#define __NR_ia32_sysfs 135
145#define __NR_ia32_personality 136
146#define __NR_ia32_afs_syscall 137 /* Syscall for Andrew File System */
147#define __NR_ia32_setfsuid 138
148#define __NR_ia32_setfsgid 139
149#define __NR_ia32__llseek 140
150#define __NR_ia32_getdents 141
151#define __NR_ia32__newselect 142
152#define __NR_ia32_flock 143
153#define __NR_ia32_msync 144
154#define __NR_ia32_readv 145
155#define __NR_ia32_writev 146
156#define __NR_ia32_getsid 147
157#define __NR_ia32_fdatasync 148
158#define __NR_ia32__sysctl 149
159#define __NR_ia32_mlock 150
160#define __NR_ia32_munlock 151
161#define __NR_ia32_mlockall 152
162#define __NR_ia32_munlockall 153
163#define __NR_ia32_sched_setparam 154
164#define __NR_ia32_sched_getparam 155
165#define __NR_ia32_sched_setscheduler 156
166#define __NR_ia32_sched_getscheduler 157
167#define __NR_ia32_sched_yield 158
168#define __NR_ia32_sched_get_priority_max 159
169#define __NR_ia32_sched_get_priority_min 160
170#define __NR_ia32_sched_rr_get_interval 161
171#define __NR_ia32_nanosleep 162
172#define __NR_ia32_mremap 163
173#define __NR_ia32_setresuid 164
174#define __NR_ia32_getresuid 165
175#define __NR_ia32_vm86 166
176#define __NR_ia32_query_module 167
177#define __NR_ia32_poll 168
178#define __NR_ia32_nfsservctl 169
179#define __NR_ia32_setresgid 170
180#define __NR_ia32_getresgid 171
181#define __NR_ia32_prctl 172
182#define __NR_ia32_rt_sigreturn 173 16#define __NR_ia32_rt_sigreturn 173
183#define __NR_ia32_rt_sigaction 174
184#define __NR_ia32_rt_sigprocmask 175
185#define __NR_ia32_rt_sigpending 176
186#define __NR_ia32_rt_sigtimedwait 177
187#define __NR_ia32_rt_sigqueueinfo 178
188#define __NR_ia32_rt_sigsuspend 179
189#define __NR_ia32_pread 180
190#define __NR_ia32_pwrite 181
191#define __NR_ia32_chown 182
192#define __NR_ia32_getcwd 183
193#define __NR_ia32_capget 184
194#define __NR_ia32_capset 185
195#define __NR_ia32_sigaltstack 186
196#define __NR_ia32_sendfile 187
197#define __NR_ia32_getpmsg 188 /* some people actually want streams */
198#define __NR_ia32_putpmsg 189 /* some people actually want streams */
199#define __NR_ia32_vfork 190
200#define __NR_ia32_ugetrlimit 191 /* SuS compliant getrlimit */
201#define __NR_ia32_mmap2 192
202#define __NR_ia32_truncate64 193
203#define __NR_ia32_ftruncate64 194
204#define __NR_ia32_stat64 195
205#define __NR_ia32_lstat64 196
206#define __NR_ia32_fstat64 197
207#define __NR_ia32_lchown32 198
208#define __NR_ia32_getuid32 199
209#define __NR_ia32_getgid32 200
210#define __NR_ia32_geteuid32 201
211#define __NR_ia32_getegid32 202
212#define __NR_ia32_setreuid32 203
213#define __NR_ia32_setregid32 204
214#define __NR_ia32_getgroups32 205
215#define __NR_ia32_setgroups32 206
216#define __NR_ia32_fchown32 207
217#define __NR_ia32_setresuid32 208
218#define __NR_ia32_getresuid32 209
219#define __NR_ia32_setresgid32 210
220#define __NR_ia32_getresgid32 211
221#define __NR_ia32_chown32 212
222#define __NR_ia32_setuid32 213
223#define __NR_ia32_setgid32 214
224#define __NR_ia32_setfsuid32 215
225#define __NR_ia32_setfsgid32 216
226#define __NR_ia32_pivot_root 217
227#define __NR_ia32_mincore 218
228#define __NR_ia32_madvise 219
229#define __NR_ia32_madvise1 219 /* delete when C lib stub is removed */
230#define __NR_ia32_getdents64 220
231#define __NR_ia32_fcntl64 221
232#define __NR_ia32_tuxcall 222
233#define __NR_ia32_security 223
234#define __NR_ia32_gettid 224
235#define __NR_ia32_readahead 225
236#define __NR_ia32_setxattr 226
237#define __NR_ia32_lsetxattr 227
238#define __NR_ia32_fsetxattr 228
239#define __NR_ia32_getxattr 229
240#define __NR_ia32_lgetxattr 230
241#define __NR_ia32_fgetxattr 231
242#define __NR_ia32_listxattr 232
243#define __NR_ia32_llistxattr 233
244#define __NR_ia32_flistxattr 234
245#define __NR_ia32_removexattr 235
246#define __NR_ia32_lremovexattr 236
247#define __NR_ia32_fremovexattr 237
248#define __NR_ia32_tkill 238
249#define __NR_ia32_sendfile64 239
250#define __NR_ia32_futex 240
251#define __NR_ia32_sched_setaffinity 241
252#define __NR_ia32_sched_getaffinity 242
253#define __NR_ia32_set_thread_area 243
254#define __NR_ia32_get_thread_area 244
255#define __NR_ia32_io_setup 245
256#define __NR_ia32_io_destroy 246
257#define __NR_ia32_io_getevents 247
258#define __NR_ia32_io_submit 248
259#define __NR_ia32_io_cancel 249
260#define __NR_ia32_exit_group 252
261#define __NR_ia32_lookup_dcookie 253
262#define __NR_ia32_sys_epoll_create 254
263#define __NR_ia32_sys_epoll_ctl 255
264#define __NR_ia32_sys_epoll_wait 256
265#define __NR_ia32_remap_file_pages 257
266#define __NR_ia32_set_tid_address 258
267#define __NR_ia32_timer_create 259
268#define __NR_ia32_timer_settime (__NR_ia32_timer_create+1)
269#define __NR_ia32_timer_gettime (__NR_ia32_timer_create+2)
270#define __NR_ia32_timer_getoverrun (__NR_ia32_timer_create+3)
271#define __NR_ia32_timer_delete (__NR_ia32_timer_create+4)
272#define __NR_ia32_clock_settime (__NR_ia32_timer_create+5)
273#define __NR_ia32_clock_gettime (__NR_ia32_timer_create+6)
274#define __NR_ia32_clock_getres (__NR_ia32_timer_create+7)
275#define __NR_ia32_clock_nanosleep (__NR_ia32_timer_create+8)
276#define __NR_ia32_statfs64 268
277#define __NR_ia32_fstatfs64 269
278#define __NR_ia32_tgkill 270
279#define __NR_ia32_utimes 271
280#define __NR_ia32_fadvise64_64 272
281#define __NR_ia32_vserver 273
282#define __NR_ia32_mbind 274
283#define __NR_ia32_get_mempolicy 275
284#define __NR_ia32_set_mempolicy 276
285#define __NR_ia32_mq_open 277
286#define __NR_ia32_mq_unlink (__NR_ia32_mq_open+1)
287#define __NR_ia32_mq_timedsend (__NR_ia32_mq_open+2)
288#define __NR_ia32_mq_timedreceive (__NR_ia32_mq_open+3)
289#define __NR_ia32_mq_notify (__NR_ia32_mq_open+4)
290#define __NR_ia32_mq_getsetattr (__NR_ia32_mq_open+5)
291#define __NR_ia32_kexec 283
292#define __NR_ia32_waitid 284
293/* #define __NR_sys_setaltroot 285 */
294#define __NR_ia32_add_key 286
295#define __NR_ia32_request_key 287
296#define __NR_ia32_keyctl 288
297#define __NR_ia32_ioprio_set 289
298#define __NR_ia32_ioprio_get 290
299#define __NR_ia32_inotify_init 291
300#define __NR_ia32_inotify_add_watch 292
301#define __NR_ia32_inotify_rm_watch 293
302#define __NR_ia32_migrate_pages 294
303#define __NR_ia32_openat 295
304#define __NR_ia32_mkdirat 296
305#define __NR_ia32_mknodat 297
306#define __NR_ia32_fchownat 298
307#define __NR_ia32_futimesat 299
308#define __NR_ia32_fstatat64 300
309#define __NR_ia32_unlinkat 301
310#define __NR_ia32_renameat 302
311#define __NR_ia32_linkat 303
312#define __NR_ia32_symlinkat 304
313#define __NR_ia32_readlinkat 305
314#define __NR_ia32_fchmodat 306
315#define __NR_ia32_faccessat 307
316#define __NR_ia32_pselect6 308
317#define __NR_ia32_ppoll 309
318#define __NR_ia32_unshare 310
319 17
320#endif /* _ASM_X86_64_IA32_UNISTD_H_ */ 18#endif /* _ASM_X86_64_IA32_UNISTD_H_ */
diff --git a/include/asm-x86_64/intel_arch_perfmon.h b/include/asm-x86_64/intel_arch_perfmon.h
new file mode 100644
index 000000000000..59c396431569
--- /dev/null
+++ b/include/asm-x86_64/intel_arch_perfmon.h
@@ -0,0 +1,19 @@
1#ifndef X86_64_INTEL_ARCH_PERFMON_H
2#define X86_64_INTEL_ARCH_PERFMON_H 1
3
4#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
5#define MSR_ARCH_PERFMON_PERFCTR1 0xc2
6
7#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
8#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
9
10#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
11#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
12#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
13#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
14
15#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c)
16#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
17#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT (1 << 0)
18
19#endif /* X86_64_INTEL_ARCH_PERFMON_H */
diff --git a/include/asm-x86_64/k8.h b/include/asm-x86_64/k8.h
new file mode 100644
index 000000000000..699dd6961eda
--- /dev/null
+++ b/include/asm-x86_64/k8.h
@@ -0,0 +1,14 @@
1#ifndef _ASM_K8_H
2#define _ASM_K8_H 1
3
4#include <linux/pci.h>
5
6extern struct pci_device_id k8_nb_ids[];
7
8extern int early_is_k8_nb(u32 value);
9extern struct pci_dev **k8_northbridges;
10extern int num_k8_northbridges;
11extern int cache_k8_northbridges(void);
12extern void k8_flush_garts(void);
13
14#endif
diff --git a/include/asm-x86_64/local.h b/include/asm-x86_64/local.h
index cd17945bf218..e769e6200225 100644
--- a/include/asm-x86_64/local.h
+++ b/include/asm-x86_64/local.h
@@ -59,12 +59,26 @@ static inline void local_sub(long i, local_t *v)
59 * This could be done better if we moved the per cpu data directly 59 * This could be done better if we moved the per cpu data directly
60 * after GS. 60 * after GS.
61 */ 61 */
62#define cpu_local_read(v) local_read(&__get_cpu_var(v)) 62
63#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) 63/* Need to disable preemption for the cpu local counters otherwise we could
64#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) 64 still access a variable of a previous CPU in a non atomic way. */
65#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) 65#define cpu_local_wrap_v(v) \
66#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) 66 ({ local_t res__; \
67#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) 67 preempt_disable(); \
68 res__ = (v); \
69 preempt_enable(); \
70 res__; })
71#define cpu_local_wrap(v) \
72 ({ preempt_disable(); \
73 v; \
74 preempt_enable(); }) \
75
76#define cpu_local_read(v) cpu_local_wrap_v(local_read(&__get_cpu_var(v)))
77#define cpu_local_set(v, i) cpu_local_wrap(local_set(&__get_cpu_var(v), (i)))
78#define cpu_local_inc(v) cpu_local_wrap(local_inc(&__get_cpu_var(v)))
79#define cpu_local_dec(v) cpu_local_wrap(local_dec(&__get_cpu_var(v)))
80#define cpu_local_add(i, v) cpu_local_wrap(local_add((i), &__get_cpu_var(v)))
81#define cpu_local_sub(i, v) cpu_local_wrap(local_sub((i), &__get_cpu_var(v)))
68 82
69#define __cpu_local_inc(v) cpu_local_inc(v) 83#define __cpu_local_inc(v) cpu_local_inc(v)
70#define __cpu_local_dec(v) cpu_local_dec(v) 84#define __cpu_local_dec(v) cpu_local_dec(v)
diff --git a/include/asm-x86_64/mce.h b/include/asm-x86_64/mce.h
index 7229785094e3..d13687dfd691 100644
--- a/include/asm-x86_64/mce.h
+++ b/include/asm-x86_64/mce.h
@@ -67,13 +67,22 @@ struct mce_log {
67/* Software defined banks */ 67/* Software defined banks */
68#define MCE_EXTENDED_BANK 128 68#define MCE_EXTENDED_BANK 128
69#define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0 69#define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0
70#define MCE_THRESHOLD_BASE MCE_EXTENDED_BANK + 1 /* MCE_AMD */ 70
71#define MCE_THRESHOLD_DRAM_ECC MCE_THRESHOLD_BASE + 4 71#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) /* MCE_AMD */
72#define K8_MCE_THRESHOLD_BANK_0 (MCE_THRESHOLD_BASE + 0 * 9)
73#define K8_MCE_THRESHOLD_BANK_1 (MCE_THRESHOLD_BASE + 1 * 9)
74#define K8_MCE_THRESHOLD_BANK_2 (MCE_THRESHOLD_BASE + 2 * 9)
75#define K8_MCE_THRESHOLD_BANK_3 (MCE_THRESHOLD_BASE + 3 * 9)
76#define K8_MCE_THRESHOLD_BANK_4 (MCE_THRESHOLD_BASE + 4 * 9)
77#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9)
78#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0)
72 79
73#ifdef __KERNEL__ 80#ifdef __KERNEL__
74#include <asm/atomic.h> 81#include <asm/atomic.h>
75 82
76void mce_log(struct mce *m); 83void mce_log(struct mce *m);
84DECLARE_PER_CPU(struct sys_device, device_mce);
85
77#ifdef CONFIG_X86_MCE_INTEL 86#ifdef CONFIG_X86_MCE_INTEL
78void mce_intel_feature_init(struct cpuinfo_x86 *c); 87void mce_intel_feature_init(struct cpuinfo_x86 *c);
79#else 88#else
diff --git a/include/asm-x86_64/mutex.h b/include/asm-x86_64/mutex.h
index 11fbee2bd6c0..06fab6de2a88 100644
--- a/include/asm-x86_64/mutex.h
+++ b/include/asm-x86_64/mutex.h
@@ -24,7 +24,7 @@ do { \
24 typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \ 24 typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \
25 \ 25 \
26 __asm__ __volatile__( \ 26 __asm__ __volatile__( \
27 LOCK " decl (%%rdi) \n" \ 27 LOCK_PREFIX " decl (%%rdi) \n" \
28 " js 2f \n" \ 28 " js 2f \n" \
29 "1: \n" \ 29 "1: \n" \
30 \ 30 \
@@ -74,7 +74,7 @@ do { \
74 typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \ 74 typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \
75 \ 75 \
76 __asm__ __volatile__( \ 76 __asm__ __volatile__( \
77 LOCK " incl (%%rdi) \n" \ 77 LOCK_PREFIX " incl (%%rdi) \n" \
78 " jle 2f \n" \ 78 " jle 2f \n" \
79 "1: \n" \ 79 "1: \n" \
80 \ 80 \
diff --git a/include/asm-x86_64/nmi.h b/include/asm-x86_64/nmi.h
index d3abfc6a8fd5..efb45c894d76 100644
--- a/include/asm-x86_64/nmi.h
+++ b/include/asm-x86_64/nmi.h
@@ -5,26 +5,27 @@
5#define ASM_NMI_H 5#define ASM_NMI_H
6 6
7#include <linux/pm.h> 7#include <linux/pm.h>
8#include <asm/io.h>
8 9
9struct pt_regs; 10struct pt_regs;
10 11
11typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu); 12typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu);
12 13
13/** 14/**
14 * set_nmi_callback 15 * set_nmi_callback
15 * 16 *
16 * Set a handler for an NMI. Only one handler may be 17 * Set a handler for an NMI. Only one handler may be
17 * set. Return 1 if the NMI was handled. 18 * set. Return 1 if the NMI was handled.
18 */ 19 */
19void set_nmi_callback(nmi_callback_t callback); 20void set_nmi_callback(nmi_callback_t callback);
20 21
21/** 22/**
22 * unset_nmi_callback 23 * unset_nmi_callback
23 * 24 *
24 * Remove the handler previously set. 25 * Remove the handler previously set.
25 */ 26 */
26void unset_nmi_callback(void); 27void unset_nmi_callback(void);
27 28
28#ifdef CONFIG_PM 29#ifdef CONFIG_PM
29 30
30/** Replace the PM callback routine for NMI. */ 31/** Replace the PM callback routine for NMI. */
@@ -56,4 +57,21 @@ extern int unknown_nmi_panic;
56 57
57extern int check_nmi_watchdog(void); 58extern int check_nmi_watchdog(void);
58 59
60extern void setup_apic_nmi_watchdog (void);
61extern int reserve_lapic_nmi(void);
62extern void release_lapic_nmi(void);
63extern void disable_timer_nmi_watchdog(void);
64extern void enable_timer_nmi_watchdog(void);
65extern void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
66
67extern void nmi_watchdog_default(void);
68extern int setup_nmi_watchdog(char *);
69
70extern unsigned int nmi_watchdog;
71#define NMI_DEFAULT -1
72#define NMI_NONE 0
73#define NMI_IO_APIC 1
74#define NMI_LOCAL_APIC 2
75#define NMI_INVALID 3
76
59#endif /* ASM_NMI_H */ 77#endif /* ASM_NMI_H */
diff --git a/include/asm-x86_64/pci.h b/include/asm-x86_64/pci.h
index 2db0620d5449..49c5e9280598 100644
--- a/include/asm-x86_64/pci.h
+++ b/include/asm-x86_64/pci.h
@@ -39,8 +39,8 @@ int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
39#include <asm/scatterlist.h> 39#include <asm/scatterlist.h>
40#include <linux/string.h> 40#include <linux/string.h>
41#include <asm/page.h> 41#include <asm/page.h>
42#include <linux/dma-mapping.h> /* for have_iommu */
43 42
43extern void pci_iommu_alloc(void);
44extern int iommu_setup(char *opt); 44extern int iommu_setup(char *opt);
45 45
46/* The PCI address space does equal the physical memory 46/* The PCI address space does equal the physical memory
@@ -52,7 +52,7 @@ extern int iommu_setup(char *opt);
52 */ 52 */
53#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) 53#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
54 54
55#ifdef CONFIG_GART_IOMMU 55#if defined(CONFIG_IOMMU) || defined(CONFIG_CALGARY_IOMMU)
56 56
57/* 57/*
58 * x86-64 always supports DAC, but sometimes it is useful to force 58 * x86-64 always supports DAC, but sometimes it is useful to force
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index 31e83c3bd022..a31ab4e68a9b 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -337,14 +337,8 @@ static inline int pmd_large(pmd_t pte) {
337/* to find an entry in a page-table-directory. */ 337/* to find an entry in a page-table-directory. */
338#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) 338#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
339#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address)) 339#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address))
340#define pud_offset_k(pgd, addr) pud_offset(pgd, addr)
341#define pud_present(pud) (pud_val(pud) & _PAGE_PRESENT) 340#define pud_present(pud) (pud_val(pud) & _PAGE_PRESENT)
342 341
343static inline pud_t *__pud_offset_k(pud_t *pud, unsigned long address)
344{
345 return pud + pud_index(address);
346}
347
348/* PMD - Level 2 access */ 342/* PMD - Level 2 access */
349#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK)) 343#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
350#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) 344#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h
index 3061a38a3b1d..3b3c1217fe61 100644
--- a/include/asm-x86_64/processor.h
+++ b/include/asm-x86_64/processor.h
@@ -69,7 +69,11 @@ struct cpuinfo_x86 {
69 cpumask_t llc_shared_map; /* cpus sharing the last level cache */ 69 cpumask_t llc_shared_map; /* cpus sharing the last level cache */
70#endif 70#endif
71 __u8 apicid; 71 __u8 apicid;
72#ifdef CONFIG_SMP
72 __u8 booted_cores; /* number of cores as seen by OS */ 73 __u8 booted_cores; /* number of cores as seen by OS */
74 __u8 phys_proc_id; /* Physical Processor id. */
75 __u8 cpu_core_id; /* Core id. */
76#endif
73} ____cacheline_aligned; 77} ____cacheline_aligned;
74 78
75#define X86_VENDOR_INTEL 0 79#define X86_VENDOR_INTEL 0
@@ -96,6 +100,7 @@ extern char ignore_irq13;
96extern void identify_cpu(struct cpuinfo_x86 *); 100extern void identify_cpu(struct cpuinfo_x86 *);
97extern void print_cpu_info(struct cpuinfo_x86 *); 101extern void print_cpu_info(struct cpuinfo_x86 *);
98extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); 102extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
103extern unsigned short num_cache_leaves;
99 104
100/* 105/*
101 * EFLAGS bits 106 * EFLAGS bits
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h
index 8abf2a43c944..038fe1f47e6f 100644
--- a/include/asm-x86_64/proto.h
+++ b/include/asm-x86_64/proto.h
@@ -37,7 +37,6 @@ extern void ia32_sysenter_target(void);
37 37
38extern void config_acpi_tables(void); 38extern void config_acpi_tables(void);
39extern void ia32_syscall(void); 39extern void ia32_syscall(void);
40extern void iommu_hole_init(void);
41 40
42extern int pmtimer_mark_offset(void); 41extern int pmtimer_mark_offset(void);
43extern void pmtimer_resume(void); 42extern void pmtimer_resume(void);
@@ -75,7 +74,7 @@ extern void main_timer_handler(struct pt_regs *regs);
75 74
76extern unsigned long end_pfn_map; 75extern unsigned long end_pfn_map;
77 76
78extern void show_trace(unsigned long * rsp); 77extern void show_trace(struct task_struct *, struct pt_regs *, unsigned long * rsp);
79extern void show_registers(struct pt_regs *regs); 78extern void show_registers(struct pt_regs *regs);
80 79
81extern void exception_table_check(void); 80extern void exception_table_check(void);
@@ -101,13 +100,9 @@ extern int unsynchronized_tsc(void);
101 100
102extern void select_idle_routine(const struct cpuinfo_x86 *c); 101extern void select_idle_routine(const struct cpuinfo_x86 *c);
103 102
104extern void gart_parse_options(char *);
105extern void __init no_iommu_init(void);
106
107extern unsigned long table_start, table_end; 103extern unsigned long table_start, table_end;
108 104
109extern int exception_trace; 105extern int exception_trace;
110extern int force_iommu, no_iommu;
111extern int using_apic_timer; 106extern int using_apic_timer;
112extern int disable_apic; 107extern int disable_apic;
113extern unsigned cpu_khz; 108extern unsigned cpu_khz;
@@ -116,7 +111,13 @@ extern int skip_ioapic_setup;
116extern int acpi_ht; 111extern int acpi_ht;
117extern int acpi_disabled; 112extern int acpi_disabled;
118 113
119#ifdef CONFIG_GART_IOMMU 114extern void no_iommu_init(void);
115extern int force_iommu, no_iommu;
116extern int iommu_detected;
117#ifdef CONFIG_IOMMU
118extern void gart_iommu_init(void);
119extern void gart_parse_options(char *);
120extern void iommu_hole_init(void);
120extern int fallback_aper_order; 121extern int fallback_aper_order;
121extern int fallback_aper_force; 122extern int fallback_aper_force;
122extern int iommu_aperture; 123extern int iommu_aperture;
diff --git a/include/asm-x86_64/rwlock.h b/include/asm-x86_64/rwlock.h
index 9942cc393064..dea0e9459264 100644
--- a/include/asm-x86_64/rwlock.h
+++ b/include/asm-x86_64/rwlock.h
@@ -24,7 +24,7 @@
24#define RW_LOCK_BIAS_STR "0x01000000" 24#define RW_LOCK_BIAS_STR "0x01000000"
25 25
26#define __build_read_lock_ptr(rw, helper) \ 26#define __build_read_lock_ptr(rw, helper) \
27 asm volatile(LOCK "subl $1,(%0)\n\t" \ 27 asm volatile(LOCK_PREFIX "subl $1,(%0)\n\t" \
28 "js 2f\n" \ 28 "js 2f\n" \
29 "1:\n" \ 29 "1:\n" \
30 LOCK_SECTION_START("") \ 30 LOCK_SECTION_START("") \
@@ -34,7 +34,7 @@
34 ::"a" (rw) : "memory") 34 ::"a" (rw) : "memory")
35 35
36#define __build_read_lock_const(rw, helper) \ 36#define __build_read_lock_const(rw, helper) \
37 asm volatile(LOCK "subl $1,%0\n\t" \ 37 asm volatile(LOCK_PREFIX "subl $1,%0\n\t" \
38 "js 2f\n" \ 38 "js 2f\n" \
39 "1:\n" \ 39 "1:\n" \
40 LOCK_SECTION_START("") \ 40 LOCK_SECTION_START("") \
@@ -54,7 +54,7 @@
54 } while (0) 54 } while (0)
55 55
56#define __build_write_lock_ptr(rw, helper) \ 56#define __build_write_lock_ptr(rw, helper) \
57 asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ 57 asm volatile(LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
58 "jnz 2f\n" \ 58 "jnz 2f\n" \
59 "1:\n" \ 59 "1:\n" \
60 LOCK_SECTION_START("") \ 60 LOCK_SECTION_START("") \
@@ -64,7 +64,7 @@
64 ::"a" (rw) : "memory") 64 ::"a" (rw) : "memory")
65 65
66#define __build_write_lock_const(rw, helper) \ 66#define __build_write_lock_const(rw, helper) \
67 asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",%0\n\t" \ 67 asm volatile(LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",%0\n\t" \
68 "jnz 2f\n" \ 68 "jnz 2f\n" \
69 "1:\n" \ 69 "1:\n" \
70 LOCK_SECTION_START("") \ 70 LOCK_SECTION_START("") \
diff --git a/include/asm-x86_64/semaphore.h b/include/asm-x86_64/semaphore.h
index a389aa6fe80f..064df08b9a0f 100644
--- a/include/asm-x86_64/semaphore.h
+++ b/include/asm-x86_64/semaphore.h
@@ -106,7 +106,7 @@ static inline void down(struct semaphore * sem)
106 106
107 __asm__ __volatile__( 107 __asm__ __volatile__(
108 "# atomic down operation\n\t" 108 "# atomic down operation\n\t"
109 LOCK "decl %0\n\t" /* --sem->count */ 109 LOCK_PREFIX "decl %0\n\t" /* --sem->count */
110 "js 2f\n" 110 "js 2f\n"
111 "1:\n" 111 "1:\n"
112 LOCK_SECTION_START("") 112 LOCK_SECTION_START("")
@@ -130,7 +130,7 @@ static inline int down_interruptible(struct semaphore * sem)
130 130
131 __asm__ __volatile__( 131 __asm__ __volatile__(
132 "# atomic interruptible down operation\n\t" 132 "# atomic interruptible down operation\n\t"
133 LOCK "decl %1\n\t" /* --sem->count */ 133 LOCK_PREFIX "decl %1\n\t" /* --sem->count */
134 "js 2f\n\t" 134 "js 2f\n\t"
135 "xorl %0,%0\n" 135 "xorl %0,%0\n"
136 "1:\n" 136 "1:\n"
@@ -154,7 +154,7 @@ static inline int down_trylock(struct semaphore * sem)
154 154
155 __asm__ __volatile__( 155 __asm__ __volatile__(
156 "# atomic interruptible down operation\n\t" 156 "# atomic interruptible down operation\n\t"
157 LOCK "decl %1\n\t" /* --sem->count */ 157 LOCK_PREFIX "decl %1\n\t" /* --sem->count */
158 "js 2f\n\t" 158 "js 2f\n\t"
159 "xorl %0,%0\n" 159 "xorl %0,%0\n"
160 "1:\n" 160 "1:\n"
@@ -178,7 +178,7 @@ static inline void up(struct semaphore * sem)
178{ 178{
179 __asm__ __volatile__( 179 __asm__ __volatile__(
180 "# atomic up operation\n\t" 180 "# atomic up operation\n\t"
181 LOCK "incl %0\n\t" /* ++sem->count */ 181 LOCK_PREFIX "incl %0\n\t" /* ++sem->count */
182 "jle 2f\n" 182 "jle 2f\n"
183 "1:\n" 183 "1:\n"
184 LOCK_SECTION_START("") 184 LOCK_SECTION_START("")
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
index 7686b9b25aef..6805e1feb300 100644
--- a/include/asm-x86_64/smp.h
+++ b/include/asm-x86_64/smp.h
@@ -53,8 +53,6 @@ extern int smp_call_function_single(int cpuid, void (*func) (void *info),
53 53
54extern cpumask_t cpu_sibling_map[NR_CPUS]; 54extern cpumask_t cpu_sibling_map[NR_CPUS];
55extern cpumask_t cpu_core_map[NR_CPUS]; 55extern cpumask_t cpu_core_map[NR_CPUS];
56extern u8 phys_proc_id[NR_CPUS];
57extern u8 cpu_core_id[NR_CPUS];
58extern u8 cpu_llc_id[NR_CPUS]; 56extern u8 cpu_llc_id[NR_CPUS];
59 57
60#define SMP_TRAMPOLINE_BASE 0x6000 58#define SMP_TRAMPOLINE_BASE 0x6000
diff --git a/include/asm-x86_64/spinlock.h b/include/asm-x86_64/spinlock.h
index 5d8a5e3589ff..8d3421996f94 100644
--- a/include/asm-x86_64/spinlock.h
+++ b/include/asm-x86_64/spinlock.h
@@ -31,15 +31,19 @@
31 "jmp 1b\n" \ 31 "jmp 1b\n" \
32 LOCK_SECTION_END 32 LOCK_SECTION_END
33 33
34#define __raw_spin_lock_string_up \
35 "\n\tdecl %0"
36
34#define __raw_spin_unlock_string \ 37#define __raw_spin_unlock_string \
35 "movl $1,%0" \ 38 "movl $1,%0" \
36 :"=m" (lock->slock) : : "memory" 39 :"=m" (lock->slock) : : "memory"
37 40
38static inline void __raw_spin_lock(raw_spinlock_t *lock) 41static inline void __raw_spin_lock(raw_spinlock_t *lock)
39{ 42{
40 __asm__ __volatile__( 43 alternative_smp(
41 __raw_spin_lock_string 44 __raw_spin_lock_string,
42 :"=m" (lock->slock) : : "memory"); 45 __raw_spin_lock_string_up,
46 "=m" (lock->slock) : : "memory");
43} 47}
44 48
45#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) 49#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
diff --git a/include/asm-x86_64/string.h b/include/asm-x86_64/string.h
index ee6bf275349e..9505d9f4bead 100644
--- a/include/asm-x86_64/string.h
+++ b/include/asm-x86_64/string.h
@@ -6,7 +6,8 @@
6/* Written 2002 by Andi Kleen */ 6/* Written 2002 by Andi Kleen */
7 7
8/* Only used for special circumstances. Stolen from i386/string.h */ 8/* Only used for special circumstances. Stolen from i386/string.h */
9static inline void * __inline_memcpy(void * to, const void * from, size_t n) 9static __always_inline void *
10__inline_memcpy(void * to, const void * from, size_t n)
10{ 11{
11unsigned long d0, d1, d2; 12unsigned long d0, d1, d2;
12__asm__ __volatile__( 13__asm__ __volatile__(
diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h
index f48e0dad8b3d..68e559f3631c 100644
--- a/include/asm-x86_64/system.h
+++ b/include/asm-x86_64/system.h
@@ -3,15 +3,10 @@
3 3
4#include <linux/kernel.h> 4#include <linux/kernel.h>
5#include <asm/segment.h> 5#include <asm/segment.h>
6#include <asm/alternative.h>
6 7
7#ifdef __KERNEL__ 8#ifdef __KERNEL__
8 9
9#ifdef CONFIG_SMP
10#define LOCK_PREFIX "lock ; "
11#else
12#define LOCK_PREFIX ""
13#endif
14
15#define __STR(x) #x 10#define __STR(x) #x
16#define STR(x) __STR(x) 11#define STR(x) __STR(x)
17 12
@@ -34,7 +29,7 @@
34 "thread_return:\n\t" \ 29 "thread_return:\n\t" \
35 "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ 30 "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \
36 "movq %P[thread_info](%%rsi),%%r8\n\t" \ 31 "movq %P[thread_info](%%rsi),%%r8\n\t" \
37 LOCK "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ 32 LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
38 "movq %%rax,%%rdi\n\t" \ 33 "movq %%rax,%%rdi\n\t" \
39 "jc ret_from_fork\n\t" \ 34 "jc ret_from_fork\n\t" \
40 RESTORE_CONTEXT \ 35 RESTORE_CONTEXT \
@@ -69,82 +64,6 @@ extern void load_gs_index(unsigned);
69 ".previous" \ 64 ".previous" \
70 : :"r" (value), "r" (0)) 65 : :"r" (value), "r" (0))
71 66
72#ifdef __KERNEL__
73struct alt_instr {
74 __u8 *instr; /* original instruction */
75 __u8 *replacement;
76 __u8 cpuid; /* cpuid bit set for replacement */
77 __u8 instrlen; /* length of original instruction */
78 __u8 replacementlen; /* length of new instruction, <= instrlen */
79 __u8 pad[5];
80};
81#endif
82
83/*
84 * Alternative instructions for different CPU types or capabilities.
85 *
86 * This allows to use optimized instructions even on generic binary
87 * kernels.
88 *
89 * length of oldinstr must be longer or equal the length of newinstr
90 * It can be padded with nops as needed.
91 *
92 * For non barrier like inlines please define new variants
93 * without volatile and memory clobber.
94 */
95#define alternative(oldinstr, newinstr, feature) \
96 asm volatile ("661:\n\t" oldinstr "\n662:\n" \
97 ".section .altinstructions,\"a\"\n" \
98 " .align 8\n" \
99 " .quad 661b\n" /* label */ \
100 " .quad 663f\n" /* new instruction */ \
101 " .byte %c0\n" /* feature bit */ \
102 " .byte 662b-661b\n" /* sourcelen */ \
103 " .byte 664f-663f\n" /* replacementlen */ \
104 ".previous\n" \
105 ".section .altinstr_replacement,\"ax\"\n" \
106 "663:\n\t" newinstr "\n664:\n" /* replacement */ \
107 ".previous" :: "i" (feature) : "memory")
108
109/*
110 * Alternative inline assembly with input.
111 *
112 * Peculiarities:
113 * No memory clobber here.
114 * Argument numbers start with 1.
115 * Best is to use constraints that are fixed size (like (%1) ... "r")
116 * If you use variable sized constraints like "m" or "g" in the
117 * replacement make sure to pad to the worst case length.
118 */
119#define alternative_input(oldinstr, newinstr, feature, input...) \
120 asm volatile ("661:\n\t" oldinstr "\n662:\n" \
121 ".section .altinstructions,\"a\"\n" \
122 " .align 8\n" \
123 " .quad 661b\n" /* label */ \
124 " .quad 663f\n" /* new instruction */ \
125 " .byte %c0\n" /* feature bit */ \
126 " .byte 662b-661b\n" /* sourcelen */ \
127 " .byte 664f-663f\n" /* replacementlen */ \
128 ".previous\n" \
129 ".section .altinstr_replacement,\"ax\"\n" \
130 "663:\n\t" newinstr "\n664:\n" /* replacement */ \
131 ".previous" :: "i" (feature), ##input)
132
133/* Like alternative_input, but with a single output argument */
134#define alternative_io(oldinstr, newinstr, feature, output, input...) \
135 asm volatile ("661:\n\t" oldinstr "\n662:\n" \
136 ".section .altinstructions,\"a\"\n" \
137 " .align 8\n" \
138 " .quad 661b\n" /* label */ \
139 " .quad 663f\n" /* new instruction */ \
140 " .byte %c[feat]\n" /* feature bit */ \
141 " .byte 662b-661b\n" /* sourcelen */ \
142 " .byte 664f-663f\n" /* replacementlen */ \
143 ".previous\n" \
144 ".section .altinstr_replacement,\"ax\"\n" \
145 "663:\n\t" newinstr "\n664:\n" /* replacement */ \
146 ".previous" : output : [feat] "i" (feature), ##input)
147
148/* 67/*
149 * Clear and set 'TS' bit respectively 68 * Clear and set 'TS' bit respectively
150 */ 69 */
@@ -366,5 +285,6 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
366void cpu_idle_wait(void); 285void cpu_idle_wait(void);
367 286
368extern unsigned long arch_align_stack(unsigned long sp); 287extern unsigned long arch_align_stack(unsigned long sp);
288extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
369 289
370#endif 290#endif
diff --git a/include/asm-x86_64/tce.h b/include/asm-x86_64/tce.h
new file mode 100644
index 000000000000..ee51d31528d6
--- /dev/null
+++ b/include/asm-x86_64/tce.h
@@ -0,0 +1,47 @@
1/*
2 * Copyright (C) 2006 Muli Ben-Yehuda <muli@il.ibm.com>, IBM Corporation
3 * Copyright (C) 2006 Jon Mason <jdmason@us.ibm.com>, IBM Corporation
4 *
5 * This file is derived from asm-powerpc/tce.h.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#ifndef _ASM_X86_64_TCE_H
23#define _ASM_X86_64_TCE_H
24
25extern void* tce_table_kva[];
26extern unsigned int specified_table_size;
27struct iommu_table;
28
29#define TCE_ENTRY_SIZE 8 /* in bytes */
30
31#define TCE_READ_SHIFT 0
32#define TCE_WRITE_SHIFT 1
33#define TCE_HUBID_SHIFT 2 /* unused */
34#define TCE_RSVD_SHIFT 8 /* unused */
35#define TCE_RPN_SHIFT 12
36#define TCE_UNUSED_SHIFT 48 /* unused */
37
38#define TCE_RPN_MASK 0x0000fffffffff000ULL
39
40extern void tce_build(struct iommu_table *tbl, unsigned long index,
41 unsigned int npages, unsigned long uaddr, int direction);
42extern void tce_free(struct iommu_table *tbl, long index, unsigned int npages);
43extern void* alloc_tce_table(void);
44extern void free_tce_table(void *tbl);
45extern int build_tce_table(struct pci_dev *dev, void __iomem *bbar);
46
47#endif /* _ASM_X86_64_TCE_H */
diff --git a/include/asm-x86_64/thread_info.h b/include/asm-x86_64/thread_info.h
index 4ac0e0a36934..2029b00351f3 100644
--- a/include/asm-x86_64/thread_info.h
+++ b/include/asm-x86_64/thread_info.h
@@ -73,8 +73,21 @@ static inline struct thread_info *stack_thread_info(void)
73} 73}
74 74
75/* thread information allocation */ 75/* thread information allocation */
76#ifdef CONFIG_DEBUG_STACK_USAGE
77#define alloc_thread_info(tsk) \
78 ({ \
79 struct thread_info *ret; \
80 \
81 ret = ((struct thread_info *) __get_free_pages(GFP_KERNEL,THREAD_ORDER)); \
82 if (ret) \
83 memset(ret, 0, THREAD_SIZE); \
84 ret; \
85 })
86#else
76#define alloc_thread_info(tsk) \ 87#define alloc_thread_info(tsk) \
77 ((struct thread_info *) __get_free_pages(GFP_KERNEL,THREAD_ORDER)) 88 ((struct thread_info *) __get_free_pages(GFP_KERNEL,THREAD_ORDER))
89#endif
90
78#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER) 91#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER)
79 92
80#else /* !__ASSEMBLY__ */ 93#else /* !__ASSEMBLY__ */
@@ -101,7 +114,7 @@ static inline struct thread_info *stack_thread_info(void)
101#define TIF_IRET 5 /* force IRET */ 114#define TIF_IRET 5 /* force IRET */
102#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ 115#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
103#define TIF_SECCOMP 8 /* secure computing */ 116#define TIF_SECCOMP 8 /* secure computing */
104#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ 117/* 16 free */
105#define TIF_IA32 17 /* 32bit process */ 118#define TIF_IA32 17 /* 32bit process */
106#define TIF_FORK 18 /* ret_from_fork */ 119#define TIF_FORK 18 /* ret_from_fork */
107#define TIF_ABI_PENDING 19 120#define TIF_ABI_PENDING 19
@@ -115,7 +128,6 @@ static inline struct thread_info *stack_thread_info(void)
115#define _TIF_IRET (1<<TIF_IRET) 128#define _TIF_IRET (1<<TIF_IRET)
116#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) 129#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
117#define _TIF_SECCOMP (1<<TIF_SECCOMP) 130#define _TIF_SECCOMP (1<<TIF_SECCOMP)
118#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
119#define _TIF_IA32 (1<<TIF_IA32) 131#define _TIF_IA32 (1<<TIF_IA32)
120#define _TIF_FORK (1<<TIF_FORK) 132#define _TIF_FORK (1<<TIF_FORK)
121#define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING) 133#define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING)
@@ -137,6 +149,9 @@ static inline struct thread_info *stack_thread_info(void)
137 */ 149 */
138#define TS_USEDFPU 0x0001 /* FPU was used by this task this quantum (SMP) */ 150#define TS_USEDFPU 0x0001 /* FPU was used by this task this quantum (SMP) */
139#define TS_COMPAT 0x0002 /* 32bit syscall active */ 151#define TS_COMPAT 0x0002 /* 32bit syscall active */
152#define TS_POLLING 0x0004 /* true if in idle loop and not sleeping */
153
154#define tsk_is_polling(t) ((t)->thread_info->status & TS_POLLING)
140 155
141#endif /* __KERNEL__ */ 156#endif /* __KERNEL__ */
142 157
diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h
index 80c4e44d011c..c4e46e7fa7ba 100644
--- a/include/asm-x86_64/topology.h
+++ b/include/asm-x86_64/topology.h
@@ -7,8 +7,6 @@
7#include <asm/mpspec.h> 7#include <asm/mpspec.h>
8#include <asm/bitops.h> 8#include <asm/bitops.h>
9 9
10/* Map the K8 CPU local memory controllers to a simple 1:1 CPU:NODE topology */
11
12extern cpumask_t cpu_online_map; 10extern cpumask_t cpu_online_map;
13 11
14extern unsigned char cpu_to_node[]; 12extern unsigned char cpu_to_node[];
@@ -57,10 +55,8 @@ extern int __node_distance(int, int);
57#endif 55#endif
58 56
59#ifdef CONFIG_SMP 57#ifdef CONFIG_SMP
60#define topology_physical_package_id(cpu) \ 58#define topology_physical_package_id(cpu) (cpu_data[cpu].phys_proc_id)
61 (phys_proc_id[cpu] == BAD_APICID ? -1 : phys_proc_id[cpu]) 59#define topology_core_id(cpu) (cpu_data[cpu].cpu_core_id)
62#define topology_core_id(cpu) \
63 (cpu_core_id[cpu] == BAD_APICID ? 0 : cpu_core_id[cpu])
64#define topology_core_siblings(cpu) (cpu_core_map[cpu]) 60#define topology_core_siblings(cpu) (cpu_core_map[cpu])
65#define topology_thread_siblings(cpu) (cpu_sibling_map[cpu]) 61#define topology_thread_siblings(cpu) (cpu_sibling_map[cpu])
66#endif 62#endif
diff --git a/include/asm-x86_64/unwind.h b/include/asm-x86_64/unwind.h
new file mode 100644
index 000000000000..f3e7124effe3
--- /dev/null
+++ b/include/asm-x86_64/unwind.h
@@ -0,0 +1,106 @@
1#ifndef _ASM_X86_64_UNWIND_H
2#define _ASM_X86_64_UNWIND_H
3
4/*
5 * Copyright (C) 2002-2006 Novell, Inc.
6 * Jan Beulich <jbeulich@novell.com>
7 * This code is released under version 2 of the GNU GPL.
8 */
9
10#ifdef CONFIG_STACK_UNWIND
11
12#include <linux/sched.h>
13#include <asm/ptrace.h>
14#include <asm/uaccess.h>
15#include <asm/vsyscall.h>
16
17struct unwind_frame_info
18{
19 struct pt_regs regs;
20 struct task_struct *task;
21};
22
23#define UNW_PC(frame) (frame)->regs.rip
24#define UNW_SP(frame) (frame)->regs.rsp
25#ifdef CONFIG_FRAME_POINTER
26#define UNW_FP(frame) (frame)->regs.rbp
27#define FRAME_RETADDR_OFFSET 8
28#define FRAME_LINK_OFFSET 0
29#define STACK_BOTTOM(tsk) (((tsk)->thread.rsp0 - 1) & ~(THREAD_SIZE - 1))
30#define STACK_TOP(tsk) ((tsk)->thread.rsp0)
31#endif
32/* Might need to account for the special exception and interrupt handling
33 stacks here, since normally
34 EXCEPTION_STACK_ORDER < THREAD_ORDER < IRQSTACK_ORDER,
35 but the construct is needed only for getting across the stack switch to
36 the interrupt stack - thus considering the IRQ stack itself is unnecessary,
37 and the overhead of comparing against all exception handling stacks seems
38 not desirable. */
39#define STACK_LIMIT(ptr) (((ptr) - 1) & ~(THREAD_SIZE - 1))
40
41#define UNW_REGISTER_INFO \
42 PTREGS_INFO(rax), \
43 PTREGS_INFO(rdx), \
44 PTREGS_INFO(rcx), \
45 PTREGS_INFO(rbx), \
46 PTREGS_INFO(rsi), \
47 PTREGS_INFO(rdi), \
48 PTREGS_INFO(rbp), \
49 PTREGS_INFO(rsp), \
50 PTREGS_INFO(r8), \
51 PTREGS_INFO(r9), \
52 PTREGS_INFO(r10), \
53 PTREGS_INFO(r11), \
54 PTREGS_INFO(r12), \
55 PTREGS_INFO(r13), \
56 PTREGS_INFO(r14), \
57 PTREGS_INFO(r15), \
58 PTREGS_INFO(rip)
59
60static inline void arch_unw_init_frame_info(struct unwind_frame_info *info,
61 /*const*/ struct pt_regs *regs)
62{
63 info->regs = *regs;
64}
65
66static inline void arch_unw_init_blocked(struct unwind_frame_info *info)
67{
68 extern const char thread_return[];
69
70 memset(&info->regs, 0, sizeof(info->regs));
71 info->regs.rip = (unsigned long)thread_return;
72 info->regs.cs = __KERNEL_CS;
73 __get_user(info->regs.rbp, (unsigned long *)info->task->thread.rsp);
74 info->regs.rsp = info->task->thread.rsp;
75 info->regs.ss = __KERNEL_DS;
76}
77
78extern int arch_unwind_init_running(struct unwind_frame_info *,
79 int (*callback)(struct unwind_frame_info *,
80 void *arg),
81 void *arg);
82
83static inline int arch_unw_user_mode(const struct unwind_frame_info *info)
84{
85#if 0 /* This can only work when selector register saves/restores
86 are properly annotated (and tracked in UNW_REGISTER_INFO). */
87 return user_mode(&info->regs);
88#else
89 return (long)info->regs.rip >= 0
90 || (info->regs.rip >= VSYSCALL_START && info->regs.rip < VSYSCALL_END)
91 || (long)info->regs.rsp >= 0;
92#endif
93}
94
95#else
96
97#define UNW_PC(frame) ((void)(frame), 0)
98
99static inline int arch_unw_user_mode(const void *info)
100{
101 return 0;
102}
103
104#endif
105
106#endif /* _ASM_X86_64_UNWIND_H */
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index d9ed27969855..dcc5de7cc487 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -24,6 +24,9 @@
24 * The available bitmap operations and their rough meaning in the 24 * The available bitmap operations and their rough meaning in the
25 * case that the bitmap is a single unsigned long are thus: 25 * case that the bitmap is a single unsigned long are thus:
26 * 26 *
27 * Note that nbits should be always a compile time evaluable constant.
28 * Otherwise many inlines will generate horrible code.
29 *
27 * bitmap_zero(dst, nbits) *dst = 0UL 30 * bitmap_zero(dst, nbits) *dst = 0UL
28 * bitmap_fill(dst, nbits) *dst = ~0UL 31 * bitmap_fill(dst, nbits) *dst = ~0UL
29 * bitmap_copy(dst, src, nbits) *dst = *src 32 * bitmap_copy(dst, src, nbits) *dst = *src
@@ -244,6 +247,8 @@ static inline int bitmap_full(const unsigned long *src, int nbits)
244 247
245static inline int bitmap_weight(const unsigned long *src, int nbits) 248static inline int bitmap_weight(const unsigned long *src, int nbits)
246{ 249{
250 if (nbits <= BITS_PER_LONG)
251 return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
247 return __bitmap_weight(src, nbits); 252 return __bitmap_weight(src, nbits);
248} 253}
249 254
diff --git a/include/linux/compat.h b/include/linux/compat.h
index dda1697ec753..9760753e662b 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -226,5 +226,7 @@ static inline int compat_timespec_compare(struct compat_timespec *lhs,
226 226
227asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); 227asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp);
228 228
229extern int compat_printk(const char *fmt, ...);
230
229#endif /* CONFIG_COMPAT */ 231#endif /* CONFIG_COMPAT */
230#endif /* _LINUX_COMPAT_H */ 232#endif /* _LINUX_COMPAT_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3c5e4c2e517d..5c1ec1f84eab 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -32,6 +32,7 @@ extern const char linux_banner[];
32 32
33#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 33#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
34#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) 34#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1))
35#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
35 36
36#define KERN_EMERG "<0>" /* system is unusable */ 37#define KERN_EMERG "<0>" /* system is unusable */
37#define KERN_ALERT "<1>" /* action must be taken immediately */ 38#define KERN_ALERT "<1>" /* action must be taken immediately */
@@ -336,6 +337,12 @@ struct sysinfo {
336/* Force a compilation error if condition is true */ 337/* Force a compilation error if condition is true */
337#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) 338#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
338 339
340/* Force a compilation error if condition is true, but also produce a
341 result (of value 0 and type size_t), so the expression can be used
342 e.g. in a structure initializer (or where-ever else comma expressions
343 aren't permitted). */
344#define BUILD_BUG_ON_ZERO(e) (sizeof(char[1 - 2 * !!(e)]) - 1)
345
339/* Trap pasters of __FUNCTION__ at compile-time */ 346/* Trap pasters of __FUNCTION__ at compile-time */
340#define __FUNCTION__ (__func__) 347#define __FUNCTION__ (__func__)
341 348
diff --git a/include/linux/module.h b/include/linux/module.h
index 2d366098eab5..9ebbb74b7b72 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -285,6 +285,9 @@ struct module
285 /* The size of the executable code in each section. */ 285 /* The size of the executable code in each section. */
286 unsigned long init_text_size, core_text_size; 286 unsigned long init_text_size, core_text_size;
287 287
288 /* The handle returned from unwind_add_table. */
289 void *unwind_info;
290
288 /* Arch-specific module values */ 291 /* Arch-specific module values */
289 struct mod_arch_specific arch; 292 struct mod_arch_specific arch;
290 293
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 6a60770984e9..349ef908a222 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -148,9 +148,11 @@ enum
148 KERN_SPIN_RETRY=70, /* int: number of spinlock retries */ 148 KERN_SPIN_RETRY=70, /* int: number of spinlock retries */
149 KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */ 149 KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */
150 KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */ 150 KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */
151 KERN_COMPAT_LOG=73, /* int: print compat layer messages */
151}; 152};
152 153
153 154
155
154/* CTL_VM names: */ 156/* CTL_VM names: */
155enum 157enum
156{ 158{
diff --git a/include/linux/time.h b/include/linux/time.h
index 65dd85b2105e..c05f8bb9a323 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -28,10 +28,13 @@ struct timezone {
28#ifdef __KERNEL__ 28#ifdef __KERNEL__
29 29
30/* Parameters used to convert the timespec values: */ 30/* Parameters used to convert the timespec values: */
31#define MSEC_PER_SEC 1000L 31#define MSEC_PER_SEC 1000L
32#define USEC_PER_SEC 1000000L 32#define USEC_PER_MSEC 1000L
33#define NSEC_PER_SEC 1000000000L 33#define NSEC_PER_USEC 1000L
34#define NSEC_PER_USEC 1000L 34#define NSEC_PER_MSEC 1000000L
35#define USEC_PER_SEC 1000000L
36#define NSEC_PER_SEC 1000000000L
37#define FSEC_PER_SEC 1000000000000000L
35 38
36static inline int timespec_equal(struct timespec *a, struct timespec *b) 39static inline int timespec_equal(struct timespec *a, struct timespec *b)
37{ 40{
diff --git a/include/linux/unwind.h b/include/linux/unwind.h
new file mode 100644
index 000000000000..ce48e2cd37a2
--- /dev/null
+++ b/include/linux/unwind.h
@@ -0,0 +1,127 @@
1#ifndef _LINUX_UNWIND_H
2#define _LINUX_UNWIND_H
3
4/*
5 * Copyright (C) 2002-2006 Novell, Inc.
6 * Jan Beulich <jbeulich@novell.com>
7 * This code is released under version 2 of the GNU GPL.
8 *
9 * A simple API for unwinding kernel stacks. This is used for
10 * debugging and error reporting purposes. The kernel doesn't need
11 * full-blown stack unwinding with all the bells and whistles, so there
12 * is not much point in implementing the full Dwarf2 unwind API.
13 */
14
15#include <linux/config.h>
16
17struct module;
18
19#ifdef CONFIG_STACK_UNWIND
20
21#include <asm/unwind.h>
22
23#ifndef ARCH_UNWIND_SECTION_NAME
24#define ARCH_UNWIND_SECTION_NAME ".eh_frame"
25#endif
26
27/*
28 * Initialize unwind support.
29 */
30extern void unwind_init(void);
31
32#ifdef CONFIG_MODULES
33
34extern void *unwind_add_table(struct module *,
35 const void *table_start,
36 unsigned long table_size);
37
38extern void unwind_remove_table(void *handle, int init_only);
39
40#endif
41
42extern int unwind_init_frame_info(struct unwind_frame_info *,
43 struct task_struct *,
44 /*const*/ struct pt_regs *);
45
46/*
47 * Prepare to unwind a blocked task.
48 */
49extern int unwind_init_blocked(struct unwind_frame_info *,
50 struct task_struct *);
51
52/*
53 * Prepare to unwind the currently running thread.
54 */
55extern int unwind_init_running(struct unwind_frame_info *,
56 asmlinkage int (*callback)(struct unwind_frame_info *,
57 void *arg),
58 void *arg);
59
60/*
61 * Unwind to previous to frame. Returns 0 if successful, negative
62 * number in case of an error.
63 */
64extern int unwind(struct unwind_frame_info *);
65
66/*
67 * Unwind until the return pointer is in user-land (or until an error
68 * occurs). Returns 0 if successful, negative number in case of
69 * error.
70 */
71extern int unwind_to_user(struct unwind_frame_info *);
72
73#else
74
75struct unwind_frame_info {};
76
77static inline void unwind_init(void) {}
78
79#ifdef CONFIG_MODULES
80
81static inline void *unwind_add_table(struct module *mod,
82 const void *table_start,
83 unsigned long table_size)
84{
85 return NULL;
86}
87
88#endif
89
90static inline void unwind_remove_table(void *handle, int init_only)
91{
92}
93
94static inline int unwind_init_frame_info(struct unwind_frame_info *info,
95 struct task_struct *tsk,
96 const struct pt_regs *regs)
97{
98 return -ENOSYS;
99}
100
101static inline int unwind_init_blocked(struct unwind_frame_info *info,
102 struct task_struct *tsk)
103{
104 return -ENOSYS;
105}
106
107static inline int unwind_init_running(struct unwind_frame_info *info,
108 asmlinkage int (*cb)(struct unwind_frame_info *,
109 void *arg),
110 void *arg)
111{
112 return -ENOSYS;
113}
114
115static inline int unwind(struct unwind_frame_info *info)
116{
117 return -ENOSYS;
118}
119
120static inline int unwind_to_user(struct unwind_frame_info *info)
121{
122 return -ENOSYS;
123}
124
125#endif
126
127#endif /* _LINUX_UNWIND_H */
diff --git a/init/Kconfig b/init/Kconfig
index e0358f3946a1..36b02d5924e9 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -236,16 +236,6 @@ config UID16
236 help 236 help
237 This enables the legacy 16-bit UID syscall wrappers. 237 This enables the legacy 16-bit UID syscall wrappers.
238 238
239config VM86
240 depends X86
241 default y
242 bool "Enable VM86 support" if EMBEDDED
243 help
244 This option is required by programs like DOSEMU to run 16-bit legacy
245 code on X86 processors. It also may be needed by software like
246 XFree86 to initialize some video cards via BIOS. Disabling this
247 option saves about 6k.
248
249config CC_OPTIMIZE_FOR_SIZE 239config CC_OPTIMIZE_FOR_SIZE
250 bool "Optimize for size (Look out for broken compilers!)" 240 bool "Optimize for size (Look out for broken compilers!)"
251 default y 241 default y
diff --git a/init/main.c b/init/main.c
index 9a970d317ea5..80af1a52485f 100644
--- a/init/main.c
+++ b/init/main.c
@@ -47,6 +47,7 @@
47#include <linux/rmap.h> 47#include <linux/rmap.h>
48#include <linux/mempolicy.h> 48#include <linux/mempolicy.h>
49#include <linux/key.h> 49#include <linux/key.h>
50#include <linux/unwind.h>
50 51
51#include <asm/io.h> 52#include <asm/io.h>
52#include <asm/bugs.h> 53#include <asm/bugs.h>
@@ -482,6 +483,7 @@ asmlinkage void __init start_kernel(void)
482 __stop___param - __start___param, 483 __stop___param - __start___param,
483 &unknown_bootoption); 484 &unknown_bootoption);
484 sort_main_extable(); 485 sort_main_extable();
486 unwind_init();
485 trap_init(); 487 trap_init();
486 rcu_init(); 488 rcu_init();
487 init_IRQ(); 489 init_IRQ();
diff --git a/kernel/Makefile b/kernel/Makefile
index bc4b8a7161ff..752bd7d383af 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
22obj-$(CONFIG_UID16) += uid16.o 22obj-$(CONFIG_UID16) += uid16.o
23obj-$(CONFIG_MODULES) += module.o 23obj-$(CONFIG_MODULES) += module.o
24obj-$(CONFIG_KALLSYMS) += kallsyms.o 24obj-$(CONFIG_KALLSYMS) += kallsyms.o
25obj-$(CONFIG_STACK_UNWIND) += unwind.o
25obj-$(CONFIG_PM) += power/ 26obj-$(CONFIG_PM) += power/
26obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o 27obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
27obj-$(CONFIG_KEXEC) += kexec.o 28obj-$(CONFIG_KEXEC) += kexec.o
diff --git a/kernel/module.c b/kernel/module.c
index d75275de1c28..08811e26ac9d 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -40,6 +40,7 @@
40#include <linux/string.h> 40#include <linux/string.h>
41#include <linux/sched.h> 41#include <linux/sched.h>
42#include <linux/mutex.h> 42#include <linux/mutex.h>
43#include <linux/unwind.h>
43#include <asm/uaccess.h> 44#include <asm/uaccess.h>
44#include <asm/semaphore.h> 45#include <asm/semaphore.h>
45#include <asm/cacheflush.h> 46#include <asm/cacheflush.h>
@@ -1051,6 +1052,8 @@ static void free_module(struct module *mod)
1051 remove_sect_attrs(mod); 1052 remove_sect_attrs(mod);
1052 mod_kobject_remove(mod); 1053 mod_kobject_remove(mod);
1053 1054
1055 unwind_remove_table(mod->unwind_info, 0);
1056
1054 /* Arch-specific cleanup. */ 1057 /* Arch-specific cleanup. */
1055 module_arch_cleanup(mod); 1058 module_arch_cleanup(mod);
1056 1059
@@ -1412,7 +1415,7 @@ static struct module *load_module(void __user *umod,
1412 unsigned int i, symindex = 0, strindex = 0, setupindex, exindex, 1415 unsigned int i, symindex = 0, strindex = 0, setupindex, exindex,
1413 exportindex, modindex, obsparmindex, infoindex, gplindex, 1416 exportindex, modindex, obsparmindex, infoindex, gplindex,
1414 crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex, 1417 crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex,
1415 gplfuturecrcindex; 1418 gplfuturecrcindex, unwindex = 0;
1416 struct module *mod; 1419 struct module *mod;
1417 long err = 0; 1420 long err = 0;
1418 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ 1421 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1502,6 +1505,9 @@ static struct module *load_module(void __user *umod,
1502 versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); 1505 versindex = find_sec(hdr, sechdrs, secstrings, "__versions");
1503 infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); 1506 infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo");
1504 pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); 1507 pcpuindex = find_pcpusec(hdr, sechdrs, secstrings);
1508#ifdef ARCH_UNWIND_SECTION_NAME
1509 unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME);
1510#endif
1505 1511
1506 /* Don't keep modinfo section */ 1512 /* Don't keep modinfo section */
1507 sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; 1513 sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -1510,6 +1516,8 @@ static struct module *load_module(void __user *umod,
1510 sechdrs[symindex].sh_flags |= SHF_ALLOC; 1516 sechdrs[symindex].sh_flags |= SHF_ALLOC;
1511 sechdrs[strindex].sh_flags |= SHF_ALLOC; 1517 sechdrs[strindex].sh_flags |= SHF_ALLOC;
1512#endif 1518#endif
1519 if (unwindex)
1520 sechdrs[unwindex].sh_flags |= SHF_ALLOC;
1513 1521
1514 /* Check module struct version now, before we try to use module. */ 1522 /* Check module struct version now, before we try to use module. */
1515 if (!check_modstruct_version(sechdrs, versindex, mod)) { 1523 if (!check_modstruct_version(sechdrs, versindex, mod)) {
@@ -1738,6 +1746,11 @@ static struct module *load_module(void __user *umod,
1738 goto arch_cleanup; 1746 goto arch_cleanup;
1739 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); 1747 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
1740 1748
1749 /* Size of section 0 is 0, so this works well if no unwind info. */
1750 mod->unwind_info = unwind_add_table(mod,
1751 (void *)sechdrs[unwindex].sh_addr,
1752 sechdrs[unwindex].sh_size);
1753
1741 /* Get rid of temporary copy */ 1754 /* Get rid of temporary copy */
1742 vfree(hdr); 1755 vfree(hdr);
1743 1756
@@ -1836,6 +1849,7 @@ sys_init_module(void __user *umod,
1836 mod->state = MODULE_STATE_LIVE; 1849 mod->state = MODULE_STATE_LIVE;
1837 /* Drop initial reference. */ 1850 /* Drop initial reference. */
1838 module_put(mod); 1851 module_put(mod);
1852 unwind_remove_table(mod->unwind_info, 1);
1839 module_free(mod, mod->module_init); 1853 module_free(mod, mod->module_init);
1840 mod->module_init = NULL; 1854 mod->module_init = NULL;
1841 mod->init_size = 0; 1855 mod->init_size = 0;
diff --git a/kernel/sched.c b/kernel/sched.c
index cfaf3fabeecd..a856040c200a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -818,6 +818,11 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq)
818 * the target CPU. 818 * the target CPU.
819 */ 819 */
820#ifdef CONFIG_SMP 820#ifdef CONFIG_SMP
821
822#ifndef tsk_is_polling
823#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
824#endif
825
821static void resched_task(task_t *p) 826static void resched_task(task_t *p)
822{ 827{
823 int cpu; 828 int cpu;
@@ -833,9 +838,9 @@ static void resched_task(task_t *p)
833 if (cpu == smp_processor_id()) 838 if (cpu == smp_processor_id())
834 return; 839 return;
835 840
836 /* NEED_RESCHED must be visible before we test POLLING_NRFLAG */ 841 /* NEED_RESCHED must be visible before we test polling */
837 smp_mb(); 842 smp_mb();
838 if (!test_tsk_thread_flag(p, TIF_POLLING_NRFLAG)) 843 if (!tsk_is_polling(p))
839 smp_send_reschedule(cpu); 844 smp_send_reschedule(cpu);
840} 845}
841#else 846#else
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 2c0e65819448..f1a4eb1a655e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -73,6 +73,7 @@ extern int printk_ratelimit_burst;
73extern int pid_max_min, pid_max_max; 73extern int pid_max_min, pid_max_max;
74extern int sysctl_drop_caches; 74extern int sysctl_drop_caches;
75extern int percpu_pagelist_fraction; 75extern int percpu_pagelist_fraction;
76extern int compat_log;
76 77
77#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) 78#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
78int unknown_nmi_panic; 79int unknown_nmi_panic;
@@ -677,6 +678,16 @@ static ctl_table kern_table[] = {
677 .proc_handler = &proc_dointvec, 678 .proc_handler = &proc_dointvec,
678 }, 679 },
679#endif 680#endif
681#ifdef CONFIG_COMPAT
682 {
683 .ctl_name = KERN_COMPAT_LOG,
684 .procname = "compat-log",
685 .data = &compat_log,
686 .maxlen = sizeof (int),
687 .mode = 0644,
688 .proc_handler = &proc_dointvec,
689 },
690#endif
680 { .ctl_name = 0 } 691 { .ctl_name = 0 }
681}; 692};
682 693
diff --git a/kernel/unwind.c b/kernel/unwind.c
new file mode 100644
index 000000000000..f69c804c8e62
--- /dev/null
+++ b/kernel/unwind.c
@@ -0,0 +1,918 @@
1/*
2 * Copyright (C) 2002-2006 Novell, Inc.
3 * Jan Beulich <jbeulich@novell.com>
4 * This code is released under version 2 of the GNU GPL.
5 *
6 * A simple API for unwinding kernel stacks. This is used for
7 * debugging and error reporting purposes. The kernel doesn't need
8 * full-blown stack unwinding with all the bells and whistles, so there
9 * is not much point in implementing the full Dwarf2 unwind API.
10 */
11
12#include <linux/unwind.h>
13#include <linux/module.h>
14#include <linux/delay.h>
15#include <linux/stop_machine.h>
16#include <asm/sections.h>
17#include <asm/uaccess.h>
18#include <asm/unaligned.h>
19
20extern char __start_unwind[], __end_unwind[];
21
22#define MAX_STACK_DEPTH 8
23
24#define EXTRA_INFO(f) { \
25 BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \
26 % FIELD_SIZEOF(struct unwind_frame_info, f)) \
27 + offsetof(struct unwind_frame_info, f) \
28 / FIELD_SIZEOF(struct unwind_frame_info, f), \
29 FIELD_SIZEOF(struct unwind_frame_info, f) \
30 }
31#define PTREGS_INFO(f) EXTRA_INFO(regs.f)
32
33static const struct {
34 unsigned offs:BITS_PER_LONG / 2;
35 unsigned width:BITS_PER_LONG / 2;
36} reg_info[] = {
37 UNW_REGISTER_INFO
38};
39
40#undef PTREGS_INFO
41#undef EXTRA_INFO
42
43#ifndef REG_INVALID
44#define REG_INVALID(r) (reg_info[r].width == 0)
45#endif
46
47#define DW_CFA_nop 0x00
48#define DW_CFA_set_loc 0x01
49#define DW_CFA_advance_loc1 0x02
50#define DW_CFA_advance_loc2 0x03
51#define DW_CFA_advance_loc4 0x04
52#define DW_CFA_offset_extended 0x05
53#define DW_CFA_restore_extended 0x06
54#define DW_CFA_undefined 0x07
55#define DW_CFA_same_value 0x08
56#define DW_CFA_register 0x09
57#define DW_CFA_remember_state 0x0a
58#define DW_CFA_restore_state 0x0b
59#define DW_CFA_def_cfa 0x0c
60#define DW_CFA_def_cfa_register 0x0d
61#define DW_CFA_def_cfa_offset 0x0e
62#define DW_CFA_def_cfa_expression 0x0f
63#define DW_CFA_expression 0x10
64#define DW_CFA_offset_extended_sf 0x11
65#define DW_CFA_def_cfa_sf 0x12
66#define DW_CFA_def_cfa_offset_sf 0x13
67#define DW_CFA_val_offset 0x14
68#define DW_CFA_val_offset_sf 0x15
69#define DW_CFA_val_expression 0x16
70#define DW_CFA_lo_user 0x1c
71#define DW_CFA_GNU_window_save 0x2d
72#define DW_CFA_GNU_args_size 0x2e
73#define DW_CFA_GNU_negative_offset_extended 0x2f
74#define DW_CFA_hi_user 0x3f
75
76#define DW_EH_PE_FORM 0x07
77#define DW_EH_PE_native 0x00
78#define DW_EH_PE_leb128 0x01
79#define DW_EH_PE_data2 0x02
80#define DW_EH_PE_data4 0x03
81#define DW_EH_PE_data8 0x04
82#define DW_EH_PE_signed 0x08
83#define DW_EH_PE_ADJUST 0x70
84#define DW_EH_PE_abs 0x00
85#define DW_EH_PE_pcrel 0x10
86#define DW_EH_PE_textrel 0x20
87#define DW_EH_PE_datarel 0x30
88#define DW_EH_PE_funcrel 0x40
89#define DW_EH_PE_aligned 0x50
90#define DW_EH_PE_indirect 0x80
91#define DW_EH_PE_omit 0xff
92
93typedef unsigned long uleb128_t;
94typedef signed long sleb128_t;
95
96static struct unwind_table {
97 struct {
98 unsigned long pc;
99 unsigned long range;
100 } core, init;
101 const void *address;
102 unsigned long size;
103 struct unwind_table *link;
104 const char *name;
105} root_table, *last_table;
106
107struct unwind_item {
108 enum item_location {
109 Nowhere,
110 Memory,
111 Register,
112 Value
113 } where;
114 uleb128_t value;
115};
116
117struct unwind_state {
118 uleb128_t loc, org;
119 const u8 *cieStart, *cieEnd;
120 uleb128_t codeAlign;
121 sleb128_t dataAlign;
122 struct cfa {
123 uleb128_t reg, offs;
124 } cfa;
125 struct unwind_item regs[ARRAY_SIZE(reg_info)];
126 unsigned stackDepth:8;
127 unsigned version:8;
128 const u8 *label;
129 const u8 *stack[MAX_STACK_DEPTH];
130};
131
132static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
133
134static struct unwind_table *find_table(unsigned long pc)
135{
136 struct unwind_table *table;
137
138 for (table = &root_table; table; table = table->link)
139 if ((pc >= table->core.pc
140 && pc < table->core.pc + table->core.range)
141 || (pc >= table->init.pc
142 && pc < table->init.pc + table->init.range))
143 break;
144
145 return table;
146}
147
148static void init_unwind_table(struct unwind_table *table,
149 const char *name,
150 const void *core_start,
151 unsigned long core_size,
152 const void *init_start,
153 unsigned long init_size,
154 const void *table_start,
155 unsigned long table_size)
156{
157 table->core.pc = (unsigned long)core_start;
158 table->core.range = core_size;
159 table->init.pc = (unsigned long)init_start;
160 table->init.range = init_size;
161 table->address = table_start;
162 table->size = table_size;
163 table->link = NULL;
164 table->name = name;
165}
166
167void __init unwind_init(void)
168{
169 init_unwind_table(&root_table, "kernel",
170 _text, _end - _text,
171 NULL, 0,
172 __start_unwind, __end_unwind - __start_unwind);
173}
174
175#ifdef CONFIG_MODULES
176
177/* Must be called with module_mutex held. */
178void *unwind_add_table(struct module *module,
179 const void *table_start,
180 unsigned long table_size)
181{
182 struct unwind_table *table;
183
184 if (table_size <= 0)
185 return NULL;
186
187 table = kmalloc(sizeof(*table), GFP_KERNEL);
188 if (!table)
189 return NULL;
190
191 init_unwind_table(table, module->name,
192 module->module_core, module->core_size,
193 module->module_init, module->init_size,
194 table_start, table_size);
195
196 if (last_table)
197 last_table->link = table;
198 else
199 root_table.link = table;
200 last_table = table;
201
202 return table;
203}
204
205struct unlink_table_info
206{
207 struct unwind_table *table;
208 int init_only;
209};
210
211static int unlink_table(void *arg)
212{
213 struct unlink_table_info *info = arg;
214 struct unwind_table *table = info->table, *prev;
215
216 for (prev = &root_table; prev->link && prev->link != table; prev = prev->link)
217 ;
218
219 if (prev->link) {
220 if (info->init_only) {
221 table->init.pc = 0;
222 table->init.range = 0;
223 info->table = NULL;
224 } else {
225 prev->link = table->link;
226 if (!prev->link)
227 last_table = prev;
228 }
229 } else
230 info->table = NULL;
231
232 return 0;
233}
234
235/* Must be called with module_mutex held. */
236void unwind_remove_table(void *handle, int init_only)
237{
238 struct unwind_table *table = handle;
239 struct unlink_table_info info;
240
241 if (!table || table == &root_table)
242 return;
243
244 if (init_only && table == last_table) {
245 table->init.pc = 0;
246 table->init.range = 0;
247 return;
248 }
249
250 info.table = table;
251 info.init_only = init_only;
252 stop_machine_run(unlink_table, &info, NR_CPUS);
253
254 if (info.table)
255 kfree(table);
256}
257
258#endif /* CONFIG_MODULES */
259
260static uleb128_t get_uleb128(const u8 **pcur, const u8 *end)
261{
262 const u8 *cur = *pcur;
263 uleb128_t value;
264 unsigned shift;
265
266 for (shift = 0, value = 0; cur < end; shift += 7) {
267 if (shift + 7 > 8 * sizeof(value)
268 && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
269 cur = end + 1;
270 break;
271 }
272 value |= (uleb128_t)(*cur & 0x7f) << shift;
273 if (!(*cur++ & 0x80))
274 break;
275 }
276 *pcur = cur;
277
278 return value;
279}
280
281static sleb128_t get_sleb128(const u8 **pcur, const u8 *end)
282{
283 const u8 *cur = *pcur;
284 sleb128_t value;
285 unsigned shift;
286
287 for (shift = 0, value = 0; cur < end; shift += 7) {
288 if (shift + 7 > 8 * sizeof(value)
289 && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
290 cur = end + 1;
291 break;
292 }
293 value |= (sleb128_t)(*cur & 0x7f) << shift;
294 if (!(*cur & 0x80)) {
295 value |= -(*cur++ & 0x40) << shift;
296 break;
297 }
298 }
299 *pcur = cur;
300
301 return value;
302}
303
304static unsigned long read_pointer(const u8 **pLoc,
305 const void *end,
306 signed ptrType)
307{
308 unsigned long value = 0;
309 union {
310 const u8 *p8;
311 const u16 *p16u;
312 const s16 *p16s;
313 const u32 *p32u;
314 const s32 *p32s;
315 const unsigned long *pul;
316 } ptr;
317
318 if (ptrType < 0 || ptrType == DW_EH_PE_omit)
319 return 0;
320 ptr.p8 = *pLoc;
321 switch(ptrType & DW_EH_PE_FORM) {
322 case DW_EH_PE_data2:
323 if (end < (const void *)(ptr.p16u + 1))
324 return 0;
325 if(ptrType & DW_EH_PE_signed)
326 value = get_unaligned(ptr.p16s++);
327 else
328 value = get_unaligned(ptr.p16u++);
329 break;
330 case DW_EH_PE_data4:
331#ifdef CONFIG_64BIT
332 if (end < (const void *)(ptr.p32u + 1))
333 return 0;
334 if(ptrType & DW_EH_PE_signed)
335 value = get_unaligned(ptr.p32s++);
336 else
337 value = get_unaligned(ptr.p32u++);
338 break;
339 case DW_EH_PE_data8:
340 BUILD_BUG_ON(sizeof(u64) != sizeof(value));
341#else
342 BUILD_BUG_ON(sizeof(u32) != sizeof(value));
343#endif
344 case DW_EH_PE_native:
345 if (end < (const void *)(ptr.pul + 1))
346 return 0;
347 value = get_unaligned(ptr.pul++);
348 break;
349 case DW_EH_PE_leb128:
350 BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value));
351 value = ptrType & DW_EH_PE_signed
352 ? get_sleb128(&ptr.p8, end)
353 : get_uleb128(&ptr.p8, end);
354 if ((const void *)ptr.p8 > end)
355 return 0;
356 break;
357 default:
358 return 0;
359 }
360 switch(ptrType & DW_EH_PE_ADJUST) {
361 case DW_EH_PE_abs:
362 break;
363 case DW_EH_PE_pcrel:
364 value += (unsigned long)*pLoc;
365 break;
366 default:
367 return 0;
368 }
369 if ((ptrType & DW_EH_PE_indirect)
370 && __get_user(value, (unsigned long *)value))
371 return 0;
372 *pLoc = ptr.p8;
373
374 return value;
375}
376
377static signed fde_pointer_type(const u32 *cie)
378{
379 const u8 *ptr = (const u8 *)(cie + 2);
380 unsigned version = *ptr;
381
382 if (version != 1)
383 return -1; /* unsupported */
384 if (*++ptr) {
385 const char *aug;
386 const u8 *end = (const u8 *)(cie + 1) + *cie;
387 uleb128_t len;
388
389 /* check if augmentation size is first (and thus present) */
390 if (*ptr != 'z')
391 return -1;
392 /* check if augmentation string is nul-terminated */
393 if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL)
394 return -1;
395 ++ptr; /* skip terminator */
396 get_uleb128(&ptr, end); /* skip code alignment */
397 get_sleb128(&ptr, end); /* skip data alignment */
398 /* skip return address column */
399 version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end);
400 len = get_uleb128(&ptr, end); /* augmentation length */
401 if (ptr + len < ptr || ptr + len > end)
402 return -1;
403 end = ptr + len;
404 while (*++aug) {
405 if (ptr >= end)
406 return -1;
407 switch(*aug) {
408 case 'L':
409 ++ptr;
410 break;
411 case 'P': {
412 signed ptrType = *ptr++;
413
414 if (!read_pointer(&ptr, end, ptrType) || ptr > end)
415 return -1;
416 }
417 break;
418 case 'R':
419 return *ptr;
420 default:
421 return -1;
422 }
423 }
424 }
425 return DW_EH_PE_native|DW_EH_PE_abs;
426}
427
428static int advance_loc(unsigned long delta, struct unwind_state *state)
429{
430 state->loc += delta * state->codeAlign;
431
432 return delta > 0;
433}
434
435static void set_rule(uleb128_t reg,
436 enum item_location where,
437 uleb128_t value,
438 struct unwind_state *state)
439{
440 if (reg < ARRAY_SIZE(state->regs)) {
441 state->regs[reg].where = where;
442 state->regs[reg].value = value;
443 }
444}
445
446static int processCFI(const u8 *start,
447 const u8 *end,
448 unsigned long targetLoc,
449 signed ptrType,
450 struct unwind_state *state)
451{
452 union {
453 const u8 *p8;
454 const u16 *p16;
455 const u32 *p32;
456 } ptr;
457 int result = 1;
458
459 if (start != state->cieStart) {
460 state->loc = state->org;
461 result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state);
462 if (targetLoc == 0 && state->label == NULL)
463 return result;
464 }
465 for (ptr.p8 = start; result && ptr.p8 < end; ) {
466 switch(*ptr.p8 >> 6) {
467 uleb128_t value;
468
469 case 0:
470 switch(*ptr.p8++) {
471 case DW_CFA_nop:
472 break;
473 case DW_CFA_set_loc:
474 if ((state->loc = read_pointer(&ptr.p8, end, ptrType)) == 0)
475 result = 0;
476 break;
477 case DW_CFA_advance_loc1:
478 result = ptr.p8 < end && advance_loc(*ptr.p8++, state);
479 break;
480 case DW_CFA_advance_loc2:
481 result = ptr.p8 <= end + 2
482 && advance_loc(*ptr.p16++, state);
483 break;
484 case DW_CFA_advance_loc4:
485 result = ptr.p8 <= end + 4
486 && advance_loc(*ptr.p32++, state);
487 break;
488 case DW_CFA_offset_extended:
489 value = get_uleb128(&ptr.p8, end);
490 set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
491 break;
492 case DW_CFA_val_offset:
493 value = get_uleb128(&ptr.p8, end);
494 set_rule(value, Value, get_uleb128(&ptr.p8, end), state);
495 break;
496 case DW_CFA_offset_extended_sf:
497 value = get_uleb128(&ptr.p8, end);
498 set_rule(value, Memory, get_sleb128(&ptr.p8, end), state);
499 break;
500 case DW_CFA_val_offset_sf:
501 value = get_uleb128(&ptr.p8, end);
502 set_rule(value, Value, get_sleb128(&ptr.p8, end), state);
503 break;
504 case DW_CFA_restore_extended:
505 case DW_CFA_undefined:
506 case DW_CFA_same_value:
507 set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state);
508 break;
509 case DW_CFA_register:
510 value = get_uleb128(&ptr.p8, end);
511 set_rule(value,
512 Register,
513 get_uleb128(&ptr.p8, end), state);
514 break;
515 case DW_CFA_remember_state:
516 if (ptr.p8 == state->label) {
517 state->label = NULL;
518 return 1;
519 }
520 if (state->stackDepth >= MAX_STACK_DEPTH)
521 return 0;
522 state->stack[state->stackDepth++] = ptr.p8;
523 break;
524 case DW_CFA_restore_state:
525 if (state->stackDepth) {
526 const uleb128_t loc = state->loc;
527 const u8 *label = state->label;
528
529 state->label = state->stack[state->stackDepth - 1];
530 memcpy(&state->cfa, &badCFA, sizeof(state->cfa));
531 memset(state->regs, 0, sizeof(state->regs));
532 state->stackDepth = 0;
533 result = processCFI(start, end, 0, ptrType, state);
534 state->loc = loc;
535 state->label = label;
536 } else
537 return 0;
538 break;
539 case DW_CFA_def_cfa:
540 state->cfa.reg = get_uleb128(&ptr.p8, end);
541 /*nobreak*/
542 case DW_CFA_def_cfa_offset:
543 state->cfa.offs = get_uleb128(&ptr.p8, end);
544 break;
545 case DW_CFA_def_cfa_sf:
546 state->cfa.reg = get_uleb128(&ptr.p8, end);
547 /*nobreak*/
548 case DW_CFA_def_cfa_offset_sf:
549 state->cfa.offs = get_sleb128(&ptr.p8, end)
550 * state->dataAlign;
551 break;
552 case DW_CFA_def_cfa_register:
553 state->cfa.reg = get_uleb128(&ptr.p8, end);
554 break;
555 /*todo case DW_CFA_def_cfa_expression: */
556 /*todo case DW_CFA_expression: */
557 /*todo case DW_CFA_val_expression: */
558 case DW_CFA_GNU_args_size:
559 get_uleb128(&ptr.p8, end);
560 break;
561 case DW_CFA_GNU_negative_offset_extended:
562 value = get_uleb128(&ptr.p8, end);
563 set_rule(value,
564 Memory,
565 (uleb128_t)0 - get_uleb128(&ptr.p8, end), state);
566 break;
567 case DW_CFA_GNU_window_save:
568 default:
569 result = 0;
570 break;
571 }
572 break;
573 case 1:
574 result = advance_loc(*ptr.p8++ & 0x3f, state);
575 break;
576 case 2:
577 value = *ptr.p8++ & 0x3f;
578 set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
579 break;
580 case 3:
581 set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state);
582 break;
583 }
584 if (ptr.p8 > end)
585 result = 0;
586 if (result && targetLoc != 0 && targetLoc < state->loc)
587 return 1;
588 }
589
590 return result
591 && ptr.p8 == end
592 && (targetLoc == 0
593 || (/*todo While in theory this should apply, gcc in practice omits
594 everything past the function prolog, and hence the location
595 never reaches the end of the function.
596 targetLoc < state->loc &&*/ state->label == NULL));
597}
598
599/* Unwind to previous to frame. Returns 0 if successful, negative
600 * number in case of an error. */
601int unwind(struct unwind_frame_info *frame)
602{
603#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs])
604 const u32 *fde = NULL, *cie = NULL;
605 const u8 *ptr = NULL, *end = NULL;
606 unsigned long startLoc = 0, endLoc = 0, cfa;
607 unsigned i;
608 signed ptrType = -1;
609 uleb128_t retAddrReg = 0;
610 struct unwind_table *table;
611 struct unwind_state state;
612
613 if (UNW_PC(frame) == 0)
614 return -EINVAL;
615 if ((table = find_table(UNW_PC(frame))) != NULL
616 && !(table->size & (sizeof(*fde) - 1))) {
617 unsigned long tableSize = table->size;
618
619 for (fde = table->address;
620 tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
621 tableSize -= sizeof(*fde) + *fde,
622 fde += 1 + *fde / sizeof(*fde)) {
623 if (!*fde || (*fde & (sizeof(*fde) - 1)))
624 break;
625 if (!fde[1])
626 continue; /* this is a CIE */
627 if ((fde[1] & (sizeof(*fde) - 1))
628 || fde[1] > (unsigned long)(fde + 1)
629 - (unsigned long)table->address)
630 continue; /* this is not a valid FDE */
631 cie = fde + 1 - fde[1] / sizeof(*fde);
632 if (*cie <= sizeof(*cie) + 4
633 || *cie >= fde[1] - sizeof(*fde)
634 || (*cie & (sizeof(*cie) - 1))
635 || cie[1]
636 || (ptrType = fde_pointer_type(cie)) < 0) {
637 cie = NULL; /* this is not a (valid) CIE */
638 continue;
639 }
640 ptr = (const u8 *)(fde + 2);
641 startLoc = read_pointer(&ptr,
642 (const u8 *)(fde + 1) + *fde,
643 ptrType);
644 endLoc = startLoc
645 + read_pointer(&ptr,
646 (const u8 *)(fde + 1) + *fde,
647 ptrType & DW_EH_PE_indirect
648 ? ptrType
649 : ptrType & (DW_EH_PE_FORM|DW_EH_PE_signed));
650 if (UNW_PC(frame) >= startLoc && UNW_PC(frame) < endLoc)
651 break;
652 cie = NULL;
653 }
654 }
655 if (cie != NULL) {
656 memset(&state, 0, sizeof(state));
657 state.cieEnd = ptr; /* keep here temporarily */
658 ptr = (const u8 *)(cie + 2);
659 end = (const u8 *)(cie + 1) + *cie;
660 if ((state.version = *ptr) != 1)
661 cie = NULL; /* unsupported version */
662 else if (*++ptr) {
663 /* check if augmentation size is first (and thus present) */
664 if (*ptr == 'z') {
665 /* check for ignorable (or already handled)
666 * nul-terminated augmentation string */
667 while (++ptr < end && *ptr)
668 if (strchr("LPR", *ptr) == NULL)
669 break;
670 }
671 if (ptr >= end || *ptr)
672 cie = NULL;
673 }
674 ++ptr;
675 }
676 if (cie != NULL) {
677 /* get code aligment factor */
678 state.codeAlign = get_uleb128(&ptr, end);
679 /* get data aligment factor */
680 state.dataAlign = get_sleb128(&ptr, end);
681 if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end)
682 cie = NULL;
683 else {
684 retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end);
685 /* skip augmentation */
686 if (((const char *)(cie + 2))[1] == 'z')
687 ptr += get_uleb128(&ptr, end);
688 if (ptr > end
689 || retAddrReg >= ARRAY_SIZE(reg_info)
690 || REG_INVALID(retAddrReg)
691 || reg_info[retAddrReg].width != sizeof(unsigned long))
692 cie = NULL;
693 }
694 }
695 if (cie != NULL) {
696 state.cieStart = ptr;
697 ptr = state.cieEnd;
698 state.cieEnd = end;
699 end = (const u8 *)(fde + 1) + *fde;
700 /* skip augmentation */
701 if (((const char *)(cie + 2))[1] == 'z') {
702 uleb128_t augSize = get_uleb128(&ptr, end);
703
704 if ((ptr += augSize) > end)
705 fde = NULL;
706 }
707 }
708 if (cie == NULL || fde == NULL) {
709#ifdef CONFIG_FRAME_POINTER
710 unsigned long top, bottom;
711#endif
712
713#ifdef CONFIG_FRAME_POINTER
714 top = STACK_TOP(frame->task);
715 bottom = STACK_BOTTOM(frame->task);
716# if FRAME_RETADDR_OFFSET < 0
717 if (UNW_SP(frame) < top
718 && UNW_FP(frame) <= UNW_SP(frame)
719 && bottom < UNW_FP(frame)
720# else
721 if (UNW_SP(frame) > top
722 && UNW_FP(frame) >= UNW_SP(frame)
723 && bottom > UNW_FP(frame)
724# endif
725 && !((UNW_SP(frame) | UNW_FP(frame))
726 & (sizeof(unsigned long) - 1))) {
727 unsigned long link;
728
729 if (!__get_user(link,
730 (unsigned long *)(UNW_FP(frame)
731 + FRAME_LINK_OFFSET))
732# if FRAME_RETADDR_OFFSET < 0
733 && link > bottom && link < UNW_FP(frame)
734# else
735 && link > UNW_FP(frame) && link < bottom
736# endif
737 && !(link & (sizeof(link) - 1))
738 && !__get_user(UNW_PC(frame),
739 (unsigned long *)(UNW_FP(frame)
740 + FRAME_RETADDR_OFFSET))) {
741 UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET
742# if FRAME_RETADDR_OFFSET < 0
743 -
744# else
745 +
746# endif
747 sizeof(UNW_PC(frame));
748 UNW_FP(frame) = link;
749 return 0;
750 }
751 }
752#endif
753 return -ENXIO;
754 }
755 state.org = startLoc;
756 memcpy(&state.cfa, &badCFA, sizeof(state.cfa));
757 /* process instructions */
758 if (!processCFI(ptr, end, UNW_PC(frame), ptrType, &state)
759 || state.loc > endLoc
760 || state.regs[retAddrReg].where == Nowhere
761 || state.cfa.reg >= ARRAY_SIZE(reg_info)
762 || reg_info[state.cfa.reg].width != sizeof(unsigned long)
763 || state.cfa.offs % sizeof(unsigned long))
764 return -EIO;
765 /* update frame */
766 cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs;
767 startLoc = min((unsigned long)UNW_SP(frame), cfa);
768 endLoc = max((unsigned long)UNW_SP(frame), cfa);
769 if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) {
770 startLoc = min(STACK_LIMIT(cfa), cfa);
771 endLoc = max(STACK_LIMIT(cfa), cfa);
772 }
773#ifndef CONFIG_64BIT
774# define CASES CASE(8); CASE(16); CASE(32)
775#else
776# define CASES CASE(8); CASE(16); CASE(32); CASE(64)
777#endif
778 for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
779 if (REG_INVALID(i)) {
780 if (state.regs[i].where == Nowhere)
781 continue;
782 return -EIO;
783 }
784 switch(state.regs[i].where) {
785 default:
786 break;
787 case Register:
788 if (state.regs[i].value >= ARRAY_SIZE(reg_info)
789 || REG_INVALID(state.regs[i].value)
790 || reg_info[i].width > reg_info[state.regs[i].value].width)
791 return -EIO;
792 switch(reg_info[state.regs[i].value].width) {
793#define CASE(n) \
794 case sizeof(u##n): \
795 state.regs[i].value = FRAME_REG(state.regs[i].value, \
796 const u##n); \
797 break
798 CASES;
799#undef CASE
800 default:
801 return -EIO;
802 }
803 break;
804 }
805 }
806 for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
807 if (REG_INVALID(i))
808 continue;
809 switch(state.regs[i].where) {
810 case Nowhere:
811 if (reg_info[i].width != sizeof(UNW_SP(frame))
812 || &FRAME_REG(i, __typeof__(UNW_SP(frame)))
813 != &UNW_SP(frame))
814 continue;
815 UNW_SP(frame) = cfa;
816 break;
817 case Register:
818 switch(reg_info[i].width) {
819#define CASE(n) case sizeof(u##n): \
820 FRAME_REG(i, u##n) = state.regs[i].value; \
821 break
822 CASES;
823#undef CASE
824 default:
825 return -EIO;
826 }
827 break;
828 case Value:
829 if (reg_info[i].width != sizeof(unsigned long))
830 return -EIO;
831 FRAME_REG(i, unsigned long) = cfa + state.regs[i].value
832 * state.dataAlign;
833 break;
834 case Memory: {
835 unsigned long addr = cfa + state.regs[i].value
836 * state.dataAlign;
837
838 if ((state.regs[i].value * state.dataAlign)
839 % sizeof(unsigned long)
840 || addr < startLoc
841 || addr + sizeof(unsigned long) < addr
842 || addr + sizeof(unsigned long) > endLoc)
843 return -EIO;
844 switch(reg_info[i].width) {
845#define CASE(n) case sizeof(u##n): \
846 __get_user(FRAME_REG(i, u##n), (u##n *)addr); \
847 break
848 CASES;
849#undef CASE
850 default:
851 return -EIO;
852 }
853 }
854 break;
855 }
856 }
857
858 return 0;
859#undef CASES
860#undef FRAME_REG
861}
862EXPORT_SYMBOL(unwind);
863
864int unwind_init_frame_info(struct unwind_frame_info *info,
865 struct task_struct *tsk,
866 /*const*/ struct pt_regs *regs)
867{
868 info->task = tsk;
869 arch_unw_init_frame_info(info, regs);
870
871 return 0;
872}
873EXPORT_SYMBOL(unwind_init_frame_info);
874
875/*
876 * Prepare to unwind a blocked task.
877 */
878int unwind_init_blocked(struct unwind_frame_info *info,
879 struct task_struct *tsk)
880{
881 info->task = tsk;
882 arch_unw_init_blocked(info);
883
884 return 0;
885}
886EXPORT_SYMBOL(unwind_init_blocked);
887
888/*
889 * Prepare to unwind the currently running thread.
890 */
891int unwind_init_running(struct unwind_frame_info *info,
892 asmlinkage int (*callback)(struct unwind_frame_info *,
893 void *arg),
894 void *arg)
895{
896 info->task = current;
897
898 return arch_unwind_init_running(info, callback, arg);
899}
900EXPORT_SYMBOL(unwind_init_running);
901
902/*
903 * Unwind until the return pointer is in user-land (or until an error
904 * occurs). Returns 0 if successful, negative number in case of
905 * error.
906 */
907int unwind_to_user(struct unwind_frame_info *info)
908{
909 while (!arch_unw_user_mode(info)) {
910 int err = unwind(info);
911
912 if (err < 0)
913 return err;
914 }
915
916 return 0;
917}
918EXPORT_SYMBOL(unwind_to_user);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ccb0c1fdf1b5..8bab0102ac73 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -188,14 +188,22 @@ config FRAME_POINTER
188 188
189config UNWIND_INFO 189config UNWIND_INFO
190 bool "Compile the kernel with frame unwind information" 190 bool "Compile the kernel with frame unwind information"
191 depends on !IA64 191 depends on !IA64 && !PARISC
192 depends on !MODULES || !(MIPS || PARISC || PPC || SUPERH || V850) 192 depends on !MODULES || !(MIPS || PPC || SUPERH || V850)
193 help 193 help
194 If you say Y here the resulting kernel image will be slightly larger 194 If you say Y here the resulting kernel image will be slightly larger
195 but not slower, and it will give very useful debugging information. 195 but not slower, and it will give very useful debugging information.
196 If you don't debug the kernel, you can say N, but we may not be able 196 If you don't debug the kernel, you can say N, but we may not be able
197 to solve problems without frame unwind information or frame pointers. 197 to solve problems without frame unwind information or frame pointers.
198 198
199config STACK_UNWIND
200 bool "Stack unwind support"
201 depends on UNWIND_INFO
202 depends on X86
203 help
204 This enables more precise stack traces, omitting all unrelated
205 occurrences of pointers into kernel code from the dump.
206
199config FORCED_INLINING 207config FORCED_INLINING
200 bool "Force gcc to inline functions marked 'inline'" 208 bool "Force gcc to inline functions marked 'inline'"
201 depends on DEBUG_KERNEL 209 depends on DEBUG_KERNEL