aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig12
-rw-r--r--arch/x86/boot/a20.c2
-rw-r--r--arch/x86/boot/apm.c2
-rw-r--r--arch/x86/boot/bitops.h2
-rw-r--r--arch/x86/boot/boot.h2
-rw-r--r--arch/x86/boot/cmdline.c2
-rw-r--r--arch/x86/boot/compressed/head_32.S15
-rw-r--r--arch/x86/boot/compressed/head_64.S30
-rw-r--r--arch/x86/boot/compressed/misc.c8
-rw-r--r--arch/x86/boot/compressed/vmlinux_64.lds4
-rw-r--r--arch/x86/boot/copy.S2
-rw-r--r--arch/x86/boot/cpucheck.c2
-rw-r--r--arch/x86/boot/edd.c2
-rw-r--r--arch/x86/boot/install.sh2
-rw-r--r--arch/x86/boot/main.c2
-rw-r--r--arch/x86/boot/mca.c2
-rw-r--r--arch/x86/boot/memory.c2
-rw-r--r--arch/x86/boot/pm.c2
-rw-r--r--arch/x86/boot/pmjump.S2
-rw-r--r--arch/x86/boot/printf.c2
-rw-r--r--arch/x86/boot/string.c2
-rw-r--r--arch/x86/boot/tty.c2
-rw-r--r--arch/x86/boot/version.c2
-rw-r--r--arch/x86/boot/video-bios.c2
-rw-r--r--arch/x86/boot/video-vesa.c2
-rw-r--r--arch/x86/boot/video-vga.c2
-rw-r--r--arch/x86/boot/video.c2
-rw-r--r--arch/x86/boot/video.h2
-rw-r--r--arch/x86/boot/voyager.c2
-rw-r--r--arch/x86/ia32/sys_ia32.c1
-rw-r--r--arch/x86/kernel/Makefile9
-rw-r--r--arch/x86/kernel/acpi/cstate.c6
-rw-r--r--arch/x86/kernel/acpi/processor.c2
-rw-r--r--arch/x86/kernel/cpu/common.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c28
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c32
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c13
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c20
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c92
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c46
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c1
-rw-r--r--arch/x86/kernel/cpu/proc.c1
-rw-r--r--arch/x86/kernel/cpuid.c4
-rw-r--r--arch/x86/kernel/e820_32.c4
-rw-r--r--arch/x86/kernel/e820_64.c4
-rw-r--r--arch/x86/kernel/efi.c18
-rw-r--r--arch/x86/kernel/efi_64.c12
-rw-r--r--arch/x86/kernel/entry_32.S1
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c17
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_32.S1
-rw-r--r--arch/x86/kernel/i387.c114
-rw-r--r--arch/x86/kernel/io_apic_64.c2
-rw-r--r--arch/x86/kernel/kgdb.c6
-rw-r--r--arch/x86/kernel/microcode.c16
-rw-r--r--arch/x86/kernel/msr.c4
-rw-r--r--arch/x86/kernel/nmi_32.c3
-rw-r--r--arch/x86/kernel/nmi_64.c6
-rw-r--r--arch/x86/kernel/pci-calgary_64.c3
-rw-r--r--arch/x86/kernel/pci-dma.c (renamed from arch/x86/kernel/pci-dma_64.c)546
-rw-r--r--arch/x86/kernel/pci-dma_32.c177
-rw-r--r--arch/x86/kernel/pci-gart_64.c15
-rw-r--r--arch/x86/kernel/pci-nommu.c (renamed from arch/x86/kernel/pci-nommu_64.c)34
-rw-r--r--arch/x86/kernel/pci-swiotlb_64.c9
-rw-r--r--arch/x86/kernel/process.c44
-rw-r--r--arch/x86/kernel/process_32.c50
-rw-r--r--arch/x86/kernel/process_64.c74
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/setup.c28
-rw-r--r--arch/x86/kernel/setup64.c4
-rw-r--r--arch/x86/kernel/setup_32.c7
-rw-r--r--arch/x86/kernel/setup_64.c13
-rw-r--r--arch/x86/kernel/smpboot.c29
-rw-r--r--arch/x86/kernel/traps_32.c35
-rw-r--r--arch/x86/kernel/traps_64.c36
-rw-r--r--arch/x86/kernel/tsc_32.c23
-rw-r--r--arch/x86/kernel/tsc_64.c23
-rw-r--r--arch/x86/mach-visws/visws_apic.c2
-rw-r--r--arch/x86/mach-voyager/voyager_basic.c2
-rw-r--r--arch/x86/mach-voyager/voyager_cat.c2
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c2
-rw-r--r--arch/x86/mach-voyager/voyager_thread.c2
-rw-r--r--arch/x86/math-emu/fpu_entry.c4
-rw-r--r--arch/x86/math-emu/fpu_system.h26
-rw-r--r--arch/x86/math-emu/reg_ld_str.c4
-rw-r--r--arch/x86/mm/discontig_32.c6
-rw-r--r--arch/x86/mm/init_32.c1
-rw-r--r--arch/x86/mm/init_64.c3
-rw-r--r--arch/x86/mm/ioremap.c5
-rw-r--r--arch/x86/mm/k8topology_64.c2
-rw-r--r--arch/x86/mm/numa_64.c19
-rw-r--r--arch/x86/mm/pgtable_32.c4
-rw-r--r--arch/x86/mm/srat_64.c32
-rw-r--r--arch/x86/oprofile/nmi_int.c49
-rw-r--r--arch/x86/vdso/Makefile3
-rw-r--r--arch/x86/video/fbdev.c1
97 files changed, 1107 insertions, 801 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2a59dbb28248..87a693cf2bb7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -117,6 +117,9 @@ config ARCH_HAS_CPU_RELAX
117config HAVE_SETUP_PER_CPU_AREA 117config HAVE_SETUP_PER_CPU_AREA
118 def_bool X86_64 || (X86_SMP && !X86_VOYAGER) 118 def_bool X86_64 || (X86_SMP && !X86_VOYAGER)
119 119
120config HAVE_CPUMASK_OF_CPU_MAP
121 def_bool X86_64_SMP
122
120config ARCH_HIBERNATION_POSSIBLE 123config ARCH_HIBERNATION_POSSIBLE
121 def_bool y 124 def_bool y
122 depends on !SMP || !X86_VOYAGER 125 depends on !SMP || !X86_VOYAGER
@@ -903,6 +906,15 @@ config X86_64_ACPI_NUMA
903 help 906 help
904 Enable ACPI SRAT based node topology detection. 907 Enable ACPI SRAT based node topology detection.
905 908
909# Some NUMA nodes have memory ranges that span
910# other nodes. Even though a pfn is valid and
911# between a node's start and end pfns, it may not
912# reside on that node. See memmap_init_zone()
913# for details.
914config NODES_SPAN_OTHER_NODES
915 def_bool y
916 depends on X86_64_ACPI_NUMA
917
906config NUMA_EMU 918config NUMA_EMU
907 bool "NUMA emulation" 919 bool "NUMA emulation"
908 depends on X86_64 && NUMA 920 depends on X86_64 && NUMA
diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c
index 31348d054fca..90943f83e84d 100644
--- a/arch/x86/boot/a20.c
+++ b/arch/x86/boot/a20.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/a20.c
13 *
14 * Enable A20 gate (return -1 on failure) 12 * Enable A20 gate (return -1 on failure)
15 */ 13 */
16 14
diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c
index c117c7fb859c..7aa6033001f9 100644
--- a/arch/x86/boot/apm.c
+++ b/arch/x86/boot/apm.c
@@ -12,8 +12,6 @@
12 * ----------------------------------------------------------------------- */ 12 * ----------------------------------------------------------------------- */
13 13
14/* 14/*
15 * arch/i386/boot/apm.c
16 *
17 * Get APM BIOS information 15 * Get APM BIOS information
18 */ 16 */
19 17
diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h
index 8dcc8dc7db88..878e4b9940d9 100644
--- a/arch/x86/boot/bitops.h
+++ b/arch/x86/boot/bitops.h
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/bitops.h
13 *
14 * Very simple bitops for the boot code. 12 * Very simple bitops for the boot code.
15 */ 13 */
16 14
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 09578070bfba..a34b9982c7cb 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/boot.h
13 *
14 * Header file for the real-mode kernel code 12 * Header file for the real-mode kernel code
15 */ 13 */
16 14
diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c
index 680408a0f463..a1d35634bce0 100644
--- a/arch/x86/boot/cmdline.c
+++ b/arch/x86/boot/cmdline.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/cmdline.c
13 *
14 * Simple command-line parser for early boot. 12 * Simple command-line parser for early boot.
15 */ 13 */
16 14
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 036e635f18a3..ba7736cf2ec7 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -130,7 +130,7 @@ relocated:
130/* 130/*
131 * Setup the stack for the decompressor 131 * Setup the stack for the decompressor
132 */ 132 */
133 leal stack_end(%ebx), %esp 133 leal boot_stack_end(%ebx), %esp
134 134
135/* 135/*
136 * Do the decompression, and jump to the new kernel.. 136 * Do the decompression, and jump to the new kernel..
@@ -142,8 +142,8 @@ relocated:
142 pushl %eax # input_len 142 pushl %eax # input_len
143 leal input_data(%ebx), %eax 143 leal input_data(%ebx), %eax
144 pushl %eax # input_data 144 pushl %eax # input_data
145 leal _end(%ebx), %eax 145 leal boot_heap(%ebx), %eax
146 pushl %eax # end of the image as third argument 146 pushl %eax # heap area as third argument
147 pushl %esi # real mode pointer as second arg 147 pushl %esi # real mode pointer as second arg
148 call decompress_kernel 148 call decompress_kernel
149 addl $20, %esp 149 addl $20, %esp
@@ -181,7 +181,10 @@ relocated:
181 jmp *%ebp 181 jmp *%ebp
182 182
183.bss 183.bss
184/* Stack and heap for uncompression */
184.balign 4 185.balign 4
185stack: 186boot_heap:
186 .fill 4096, 1, 0 187 .fill BOOT_HEAP_SIZE, 1, 0
187stack_end: 188boot_stack:
189 .fill BOOT_STACK_SIZE, 1, 0
190boot_stack_end:
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index e8657b98c902..d8819efac81d 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -28,6 +28,7 @@
28#include <asm/segment.h> 28#include <asm/segment.h>
29#include <asm/pgtable.h> 29#include <asm/pgtable.h>
30#include <asm/page.h> 30#include <asm/page.h>
31#include <asm/boot.h>
31#include <asm/msr.h> 32#include <asm/msr.h>
32#include <asm/asm-offsets.h> 33#include <asm/asm-offsets.h>
33 34
@@ -62,7 +63,7 @@ startup_32:
62 subl $1b, %ebp 63 subl $1b, %ebp
63 64
64/* setup a stack and make sure cpu supports long mode. */ 65/* setup a stack and make sure cpu supports long mode. */
65 movl $user_stack_end, %eax 66 movl $boot_stack_end, %eax
66 addl %ebp, %eax 67 addl %ebp, %eax
67 movl %eax, %esp 68 movl %eax, %esp
68 69
@@ -243,9 +244,9 @@ ENTRY(startup_64)
243/* Copy the compressed kernel to the end of our buffer 244/* Copy the compressed kernel to the end of our buffer
244 * where decompression in place becomes safe. 245 * where decompression in place becomes safe.
245 */ 246 */
246 leaq _end(%rip), %r8 247 leaq _end_before_pgt(%rip), %r8
247 leaq _end(%rbx), %r9 248 leaq _end_before_pgt(%rbx), %r9
248 movq $_end /* - $startup_32 */, %rcx 249 movq $_end_before_pgt /* - $startup_32 */, %rcx
2491: subq $8, %r8 2501: subq $8, %r8
250 subq $8, %r9 251 subq $8, %r9
251 movq 0(%r8), %rax 252 movq 0(%r8), %rax
@@ -267,14 +268,14 @@ relocated:
267 */ 268 */
268 xorq %rax, %rax 269 xorq %rax, %rax
269 leaq _edata(%rbx), %rdi 270 leaq _edata(%rbx), %rdi
270 leaq _end(%rbx), %rcx 271 leaq _end_before_pgt(%rbx), %rcx
271 subq %rdi, %rcx 272 subq %rdi, %rcx
272 cld 273 cld
273 rep 274 rep
274 stosb 275 stosb
275 276
276 /* Setup the stack */ 277 /* Setup the stack */
277 leaq user_stack_end(%rip), %rsp 278 leaq boot_stack_end(%rip), %rsp
278 279
279 /* zero EFLAGS after setting rsp */ 280 /* zero EFLAGS after setting rsp */
280 pushq $0 281 pushq $0
@@ -285,7 +286,7 @@ relocated:
285 */ 286 */
286 pushq %rsi # Save the real mode argument 287 pushq %rsi # Save the real mode argument
287 movq %rsi, %rdi # real mode address 288 movq %rsi, %rdi # real mode address
288 leaq _heap(%rip), %rsi # _heap 289 leaq boot_heap(%rip), %rsi # malloc area for uncompression
289 leaq input_data(%rip), %rdx # input_data 290 leaq input_data(%rip), %rdx # input_data
290 movl input_len(%rip), %eax 291 movl input_len(%rip), %eax
291 movq %rax, %rcx # input_len 292 movq %rax, %rcx # input_len
@@ -310,9 +311,12 @@ gdt:
310 .quad 0x0080890000000000 /* TS descriptor */ 311 .quad 0x0080890000000000 /* TS descriptor */
311 .quad 0x0000000000000000 /* TS continued */ 312 .quad 0x0000000000000000 /* TS continued */
312gdt_end: 313gdt_end:
313 .bss 314
314/* Stack for uncompression */ 315.bss
315 .balign 4 316/* Stack and heap for uncompression */
316user_stack: 317.balign 4
317 .fill 4096,4,0 318boot_heap:
318user_stack_end: 319 .fill BOOT_HEAP_SIZE, 1, 0
320boot_stack:
321 .fill BOOT_STACK_SIZE, 1, 0
322boot_stack_end:
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index dad4e699f5a3..90456cee47c3 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -217,12 +217,6 @@ static void putstr(const char *);
217static memptr free_mem_ptr; 217static memptr free_mem_ptr;
218static memptr free_mem_end_ptr; 218static memptr free_mem_end_ptr;
219 219
220#ifdef CONFIG_X86_64
221#define HEAP_SIZE 0x7000
222#else
223#define HEAP_SIZE 0x4000
224#endif
225
226static char *vidmem; 220static char *vidmem;
227static int vidport; 221static int vidport;
228static int lines, cols; 222static int lines, cols;
@@ -449,7 +443,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
449 443
450 window = output; /* Output buffer (Normally at 1M) */ 444 window = output; /* Output buffer (Normally at 1M) */
451 free_mem_ptr = heap; /* Heap */ 445 free_mem_ptr = heap; /* Heap */
452 free_mem_end_ptr = heap + HEAP_SIZE; 446 free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
453 inbuf = input_data; /* Input buffer */ 447 inbuf = input_data; /* Input buffer */
454 insize = input_len; 448 insize = input_len;
455 inptr = 0; 449 inptr = 0;
diff --git a/arch/x86/boot/compressed/vmlinux_64.lds b/arch/x86/boot/compressed/vmlinux_64.lds
index 7e5c7209f6cc..bef1ac891bce 100644
--- a/arch/x86/boot/compressed/vmlinux_64.lds
+++ b/arch/x86/boot/compressed/vmlinux_64.lds
@@ -39,10 +39,10 @@ SECTIONS
39 *(.bss.*) 39 *(.bss.*)
40 *(COMMON) 40 *(COMMON)
41 . = ALIGN(8); 41 . = ALIGN(8);
42 _end = . ; 42 _end_before_pgt = . ;
43 . = ALIGN(4096); 43 . = ALIGN(4096);
44 pgtable = . ; 44 pgtable = . ;
45 . = . + 4096 * 6; 45 . = . + 4096 * 6;
46 _heap = .; 46 _ebss = .;
47 } 47 }
48} 48}
diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S
index ef127e56a3cf..ef50c84e8b4b 100644
--- a/arch/x86/boot/copy.S
+++ b/arch/x86/boot/copy.S
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/copy.S
13 *
14 * Memory copy routines 12 * Memory copy routines
15 */ 13 */
16 14
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index 2462c88689ed..7804389ee005 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/cpucheck.c
13 *
14 * Check for obligatory CPU features and abort if the features are not 12 * Check for obligatory CPU features and abort if the features are not
15 * present. This code should be compilable as 16-, 32- or 64-bit 13 * present. This code should be compilable as 16-, 32- or 64-bit
16 * code, so be very careful with types and inline assembly. 14 * code, so be very careful with types and inline assembly.
diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c
index 8721dc46a0b6..d84a48ece785 100644
--- a/arch/x86/boot/edd.c
+++ b/arch/x86/boot/edd.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/edd.c
13 *
14 * Get EDD BIOS disk information 12 * Get EDD BIOS disk information
15 */ 13 */
16 14
diff --git a/arch/x86/boot/install.sh b/arch/x86/boot/install.sh
index 88d77761d01b..8d60ee15dfd9 100644
--- a/arch/x86/boot/install.sh
+++ b/arch/x86/boot/install.sh
@@ -1,7 +1,5 @@
1#!/bin/sh 1#!/bin/sh
2# 2#
3# arch/i386/boot/install.sh
4#
5# This file is subject to the terms and conditions of the GNU General Public 3# This file is subject to the terms and conditions of the GNU General Public
6# License. See the file "COPYING" in the main directory of this archive 4# License. See the file "COPYING" in the main directory of this archive
7# for more details. 5# for more details.
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 7828da5cfd07..77569a4a3be1 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/main.c
13 *
14 * Main module for the real-mode kernel code 12 * Main module for the real-mode kernel code
15 */ 13 */
16 14
diff --git a/arch/x86/boot/mca.c b/arch/x86/boot/mca.c
index 68222f2d4b67..911eaae5d696 100644
--- a/arch/x86/boot/mca.c
+++ b/arch/x86/boot/mca.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/mca.c
13 *
14 * Get the MCA system description table 12 * Get the MCA system description table
15 */ 13 */
16 14
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index e77d89f9e8aa..acad32eb4290 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/memory.c
13 *
14 * Memory detection code 12 * Memory detection code
15 */ 13 */
16 14
diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c
index a93cb8bded4d..328956fdb59e 100644
--- a/arch/x86/boot/pm.c
+++ b/arch/x86/boot/pm.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/pm.c
13 *
14 * Prepare the machine for transition to protected mode. 12 * Prepare the machine for transition to protected mode.
15 */ 13 */
16 14
diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S
index f5402d51f7c3..ab049d40a884 100644
--- a/arch/x86/boot/pmjump.S
+++ b/arch/x86/boot/pmjump.S
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/pmjump.S
13 *
14 * The actual transition into protected mode 12 * The actual transition into protected mode
15 */ 13 */
16 14
diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c
index 7e7e890699be..c1d00c0274c4 100644
--- a/arch/x86/boot/printf.c
+++ b/arch/x86/boot/printf.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/printf.c
13 *
14 * Oh, it's a waste of space, but oh-so-yummy for debugging. This 12 * Oh, it's a waste of space, but oh-so-yummy for debugging. This
15 * version of printf() does not include 64-bit support. "Live with 13 * version of printf() does not include 64-bit support. "Live with
16 * it." 14 * it."
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 481a22097781..f94b7a0c2abf 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/string.c
13 *
14 * Very basic string functions 12 * Very basic string functions
15 */ 13 */
16 14
diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c
index f3f14bd26371..0be77b39328a 100644
--- a/arch/x86/boot/tty.c
+++ b/arch/x86/boot/tty.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/tty.c
13 *
14 * Very simple screen I/O 12 * Very simple screen I/O
15 * XXX: Probably should add very simple serial I/O? 13 * XXX: Probably should add very simple serial I/O?
16 */ 14 */
diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c
index c61462f7d9a7..2723d9b5ce43 100644
--- a/arch/x86/boot/version.c
+++ b/arch/x86/boot/version.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/version.c
13 *
14 * Kernel version string 12 * Kernel version string
15 */ 13 */
16 14
diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c
index 39e247e96172..49f26aaaebc8 100644
--- a/arch/x86/boot/video-bios.c
+++ b/arch/x86/boot/video-bios.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/video-bios.c
13 *
14 * Standard video BIOS modes 12 * Standard video BIOS modes
15 * 13 *
16 * We have two options for this; silent and scanned. 14 * We have two options for this; silent and scanned.
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
index 5d5a3f6e8b5c..401ad998ad08 100644
--- a/arch/x86/boot/video-vesa.c
+++ b/arch/x86/boot/video-vesa.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/video-vesa.c
13 *
14 * VESA text modes 12 * VESA text modes
15 */ 13 */
16 14
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index 330d6589a2ad..40ecb8d7688c 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/video-vga.c
13 *
14 * Common all-VGA modes 12 * Common all-VGA modes
15 */ 13 */
16 14
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index c1c47ba069ef..83598b23093a 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/video.c
13 *
14 * Select video mode 12 * Select video mode
15 */ 13 */
16 14
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h
index d69347f79e8e..ee63f5d14461 100644
--- a/arch/x86/boot/video.h
+++ b/arch/x86/boot/video.h
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/video.h
13 *
14 * Header file for the real-mode video probing code 12 * Header file for the real-mode video probing code
15 */ 13 */
16 14
diff --git a/arch/x86/boot/voyager.c b/arch/x86/boot/voyager.c
index 6499e3239b41..433909d61e5c 100644
--- a/arch/x86/boot/voyager.c
+++ b/arch/x86/boot/voyager.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/voyager.c
13 *
14 * Get the Voyager config information 12 * Get the Voyager config information
15 */ 13 */
16 14
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 7cede7a9e0dc..f00afdf61e67 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -43,7 +43,6 @@
43#include <asm/mman.h> 43#include <asm/mman.h>
44#include <asm/types.h> 44#include <asm/types.h>
45#include <asm/uaccess.h> 45#include <asm/uaccess.h>
46#include <asm/semaphore.h>
47#include <asm/atomic.h> 46#include <asm/atomic.h>
48#include <asm/ia32.h> 47#include <asm/ia32.h>
49#include <asm/vgtod.h> 48#include <asm/vgtod.h>
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index c3920ea8ac56..90e092d0af0c 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -22,13 +22,14 @@ obj-y += setup_$(BITS).o i8259_$(BITS).o setup.o
22obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 22obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
23obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 23obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
24obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o 24obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o
25obj-y += pci-dma_$(BITS).o bootflag.o e820_$(BITS).o 25obj-y += bootflag.o e820_$(BITS).o
26obj-y += quirks.o i8237.o topology.o kdebugfs.o 26obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
27obj-y += alternative.o i8253.o 27obj-y += alternative.o i8253.o pci-nommu.o
28obj-$(CONFIG_X86_64) += pci-nommu_64.o bugs_64.o 28obj-$(CONFIG_X86_64) += bugs_64.o
29obj-y += tsc_$(BITS).o io_delay.o rtc.o 29obj-y += tsc_$(BITS).o io_delay.o rtc.o
30 30
31obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 31obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
32obj-y += process.o
32obj-y += i387.o 33obj-y += i387.o
33obj-y += ptrace.o 34obj-y += ptrace.o
34obj-y += ds.o 35obj-y += ds.o
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 8ca3557a6d59..c2502eb9aa83 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * arch/i386/kernel/acpi/cstate.c
3 *
4 * Copyright (C) 2005 Intel Corporation 2 * Copyright (C) 2005 Intel Corporation
5 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> 3 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
6 * - Added _PDC for SMP C-states on Intel CPUs 4 * - Added _PDC for SMP C-states on Intel CPUs
@@ -93,7 +91,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
93 91
94 /* Make sure we are running on right CPU */ 92 /* Make sure we are running on right CPU */
95 saved_mask = current->cpus_allowed; 93 saved_mask = current->cpus_allowed;
96 retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); 94 retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
97 if (retval) 95 if (retval)
98 return -1; 96 return -1;
99 97
@@ -130,7 +128,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
130 cx->address); 128 cx->address);
131 129
132out: 130out:
133 set_cpus_allowed(current, saved_mask); 131 set_cpus_allowed_ptr(current, &saved_mask);
134 return retval; 132 return retval;
135} 133}
136EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); 134EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
index 324eb0cab19c..de2d2e4ebad9 100644
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * arch/i386/kernel/acpi/processor.c
3 *
4 * Copyright (C) 2005 Intel Corporation 2 * Copyright (C) 2005 Intel Corporation
5 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> 3 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
6 * - Added _PDC for platforms with Intel CPUs 4 * - Added _PDC for platforms with Intel CPUs
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d999d7833bc2..35b4f6a9c8ef 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -5,7 +5,6 @@
5#include <linux/module.h> 5#include <linux/module.h>
6#include <linux/percpu.h> 6#include <linux/percpu.h>
7#include <linux/bootmem.h> 7#include <linux/bootmem.h>
8#include <asm/semaphore.h>
9#include <asm/processor.h> 8#include <asm/processor.h>
10#include <asm/i387.h> 9#include <asm/i387.h>
11#include <asm/msr.h> 10#include <asm/msr.h>
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index a962dcb9c408..e2d870de837c 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -192,9 +192,9 @@ static void drv_read(struct drv_cmd *cmd)
192 cpumask_t saved_mask = current->cpus_allowed; 192 cpumask_t saved_mask = current->cpus_allowed;
193 cmd->val = 0; 193 cmd->val = 0;
194 194
195 set_cpus_allowed(current, cmd->mask); 195 set_cpus_allowed_ptr(current, &cmd->mask);
196 do_drv_read(cmd); 196 do_drv_read(cmd);
197 set_cpus_allowed(current, saved_mask); 197 set_cpus_allowed_ptr(current, &saved_mask);
198} 198}
199 199
200static void drv_write(struct drv_cmd *cmd) 200static void drv_write(struct drv_cmd *cmd)
@@ -203,30 +203,30 @@ static void drv_write(struct drv_cmd *cmd)
203 unsigned int i; 203 unsigned int i;
204 204
205 for_each_cpu_mask(i, cmd->mask) { 205 for_each_cpu_mask(i, cmd->mask) {
206 set_cpus_allowed(current, cpumask_of_cpu(i)); 206 set_cpus_allowed_ptr(current, &cpumask_of_cpu(i));
207 do_drv_write(cmd); 207 do_drv_write(cmd);
208 } 208 }
209 209
210 set_cpus_allowed(current, saved_mask); 210 set_cpus_allowed_ptr(current, &saved_mask);
211 return; 211 return;
212} 212}
213 213
214static u32 get_cur_val(cpumask_t mask) 214static u32 get_cur_val(const cpumask_t *mask)
215{ 215{
216 struct acpi_processor_performance *perf; 216 struct acpi_processor_performance *perf;
217 struct drv_cmd cmd; 217 struct drv_cmd cmd;
218 218
219 if (unlikely(cpus_empty(mask))) 219 if (unlikely(cpus_empty(*mask)))
220 return 0; 220 return 0;
221 221
222 switch (per_cpu(drv_data, first_cpu(mask))->cpu_feature) { 222 switch (per_cpu(drv_data, first_cpu(*mask))->cpu_feature) {
223 case SYSTEM_INTEL_MSR_CAPABLE: 223 case SYSTEM_INTEL_MSR_CAPABLE:
224 cmd.type = SYSTEM_INTEL_MSR_CAPABLE; 224 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
225 cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; 225 cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
226 break; 226 break;
227 case SYSTEM_IO_CAPABLE: 227 case SYSTEM_IO_CAPABLE:
228 cmd.type = SYSTEM_IO_CAPABLE; 228 cmd.type = SYSTEM_IO_CAPABLE;
229 perf = per_cpu(drv_data, first_cpu(mask))->acpi_data; 229 perf = per_cpu(drv_data, first_cpu(*mask))->acpi_data;
230 cmd.addr.io.port = perf->control_register.address; 230 cmd.addr.io.port = perf->control_register.address;
231 cmd.addr.io.bit_width = perf->control_register.bit_width; 231 cmd.addr.io.bit_width = perf->control_register.bit_width;
232 break; 232 break;
@@ -234,7 +234,7 @@ static u32 get_cur_val(cpumask_t mask)
234 return 0; 234 return 0;
235 } 235 }
236 236
237 cmd.mask = mask; 237 cmd.mask = *mask;
238 238
239 drv_read(&cmd); 239 drv_read(&cmd);
240 240
@@ -271,7 +271,7 @@ static unsigned int get_measured_perf(unsigned int cpu)
271 unsigned int retval; 271 unsigned int retval;
272 272
273 saved_mask = current->cpus_allowed; 273 saved_mask = current->cpus_allowed;
274 set_cpus_allowed(current, cpumask_of_cpu(cpu)); 274 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
275 if (get_cpu() != cpu) { 275 if (get_cpu() != cpu) {
276 /* We were not able to run on requested processor */ 276 /* We were not able to run on requested processor */
277 put_cpu(); 277 put_cpu();
@@ -329,7 +329,7 @@ static unsigned int get_measured_perf(unsigned int cpu)
329 retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100; 329 retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100;
330 330
331 put_cpu(); 331 put_cpu();
332 set_cpus_allowed(current, saved_mask); 332 set_cpus_allowed_ptr(current, &saved_mask);
333 333
334 dprintk("cpu %d: performance percent %d\n", cpu, perf_percent); 334 dprintk("cpu %d: performance percent %d\n", cpu, perf_percent);
335 return retval; 335 return retval;
@@ -347,13 +347,13 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
347 return 0; 347 return 0;
348 } 348 }
349 349
350 freq = extract_freq(get_cur_val(cpumask_of_cpu(cpu)), data); 350 freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data);
351 dprintk("cur freq = %u\n", freq); 351 dprintk("cur freq = %u\n", freq);
352 352
353 return freq; 353 return freq;
354} 354}
355 355
356static unsigned int check_freqs(cpumask_t mask, unsigned int freq, 356static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq,
357 struct acpi_cpufreq_data *data) 357 struct acpi_cpufreq_data *data)
358{ 358{
359 unsigned int cur_freq; 359 unsigned int cur_freq;
@@ -449,7 +449,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
449 drv_write(&cmd); 449 drv_write(&cmd);
450 450
451 if (acpi_pstate_strict) { 451 if (acpi_pstate_strict) {
452 if (!check_freqs(cmd.mask, freqs.new, data)) { 452 if (!check_freqs(&cmd.mask, freqs.new, data)) {
453 dprintk("acpi_cpufreq_target failed (%d)\n", 453 dprintk("acpi_cpufreq_target failed (%d)\n",
454 policy->cpu); 454 policy->cpu);
455 return -EAGAIN; 455 return -EAGAIN;
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 14791ec55cfd..199e4e05e5dc 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -289,8 +289,8 @@ static int __init cpufreq_p4_init(void)
289 if (c->x86_vendor != X86_VENDOR_INTEL) 289 if (c->x86_vendor != X86_VENDOR_INTEL)
290 return -ENODEV; 290 return -ENODEV;
291 291
292 if (!test_bit(X86_FEATURE_ACPI, c->x86_capability) || 292 if (!test_cpu_cap(c, X86_FEATURE_ACPI) ||
293 !test_bit(X86_FEATURE_ACC, c->x86_capability)) 293 !test_cpu_cap(c, X86_FEATURE_ACC))
294 return -ENODEV; 294 return -ENODEV;
295 295
296 ret = cpufreq_register_driver(&p4clockmod_driver); 296 ret = cpufreq_register_driver(&p4clockmod_driver);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index c99d59d8ef2e..46d4034d9f37 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -478,12 +478,12 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi
478 478
479static int check_supported_cpu(unsigned int cpu) 479static int check_supported_cpu(unsigned int cpu)
480{ 480{
481 cpumask_t oldmask = CPU_MASK_ALL; 481 cpumask_t oldmask;
482 u32 eax, ebx, ecx, edx; 482 u32 eax, ebx, ecx, edx;
483 unsigned int rc = 0; 483 unsigned int rc = 0;
484 484
485 oldmask = current->cpus_allowed; 485 oldmask = current->cpus_allowed;
486 set_cpus_allowed(current, cpumask_of_cpu(cpu)); 486 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
487 487
488 if (smp_processor_id() != cpu) { 488 if (smp_processor_id() != cpu) {
489 printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); 489 printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu);
@@ -528,7 +528,7 @@ static int check_supported_cpu(unsigned int cpu)
528 rc = 1; 528 rc = 1;
529 529
530out: 530out:
531 set_cpus_allowed(current, oldmask); 531 set_cpus_allowed_ptr(current, &oldmask);
532 return rc; 532 return rc;
533} 533}
534 534
@@ -1015,7 +1015,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
1015/* Driver entry point to switch to the target frequency */ 1015/* Driver entry point to switch to the target frequency */
1016static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) 1016static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
1017{ 1017{
1018 cpumask_t oldmask = CPU_MASK_ALL; 1018 cpumask_t oldmask;
1019 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); 1019 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1020 u32 checkfid; 1020 u32 checkfid;
1021 u32 checkvid; 1021 u32 checkvid;
@@ -1030,7 +1030,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
1030 1030
1031 /* only run on specific CPU from here on */ 1031 /* only run on specific CPU from here on */
1032 oldmask = current->cpus_allowed; 1032 oldmask = current->cpus_allowed;
1033 set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); 1033 set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
1034 1034
1035 if (smp_processor_id() != pol->cpu) { 1035 if (smp_processor_id() != pol->cpu) {
1036 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); 1036 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
@@ -1085,7 +1085,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
1085 ret = 0; 1085 ret = 0;
1086 1086
1087err_out: 1087err_out:
1088 set_cpus_allowed(current, oldmask); 1088 set_cpus_allowed_ptr(current, &oldmask);
1089 return ret; 1089 return ret;
1090} 1090}
1091 1091
@@ -1104,7 +1104,7 @@ static int powernowk8_verify(struct cpufreq_policy *pol)
1104static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) 1104static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1105{ 1105{
1106 struct powernow_k8_data *data; 1106 struct powernow_k8_data *data;
1107 cpumask_t oldmask = CPU_MASK_ALL; 1107 cpumask_t oldmask;
1108 int rc; 1108 int rc;
1109 1109
1110 if (!cpu_online(pol->cpu)) 1110 if (!cpu_online(pol->cpu))
@@ -1145,7 +1145,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1145 1145
1146 /* only run on specific CPU from here on */ 1146 /* only run on specific CPU from here on */
1147 oldmask = current->cpus_allowed; 1147 oldmask = current->cpus_allowed;
1148 set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); 1148 set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
1149 1149
1150 if (smp_processor_id() != pol->cpu) { 1150 if (smp_processor_id() != pol->cpu) {
1151 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); 1151 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
@@ -1164,7 +1164,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1164 fidvid_msr_init(); 1164 fidvid_msr_init();
1165 1165
1166 /* run on any CPU again */ 1166 /* run on any CPU again */
1167 set_cpus_allowed(current, oldmask); 1167 set_cpus_allowed_ptr(current, &oldmask);
1168 1168
1169 if (cpu_family == CPU_HW_PSTATE) 1169 if (cpu_family == CPU_HW_PSTATE)
1170 pol->cpus = cpumask_of_cpu(pol->cpu); 1170 pol->cpus = cpumask_of_cpu(pol->cpu);
@@ -1205,7 +1205,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1205 return 0; 1205 return 0;
1206 1206
1207err_out: 1207err_out:
1208 set_cpus_allowed(current, oldmask); 1208 set_cpus_allowed_ptr(current, &oldmask);
1209 powernow_k8_cpu_exit_acpi(data); 1209 powernow_k8_cpu_exit_acpi(data);
1210 1210
1211 kfree(data); 1211 kfree(data);
@@ -1242,10 +1242,11 @@ static unsigned int powernowk8_get (unsigned int cpu)
1242 if (!data) 1242 if (!data)
1243 return -EINVAL; 1243 return -EINVAL;
1244 1244
1245 set_cpus_allowed(current, cpumask_of_cpu(cpu)); 1245 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
1246 if (smp_processor_id() != cpu) { 1246 if (smp_processor_id() != cpu) {
1247 printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu); 1247 printk(KERN_ERR PFX
1248 set_cpus_allowed(current, oldmask); 1248 "limiting to CPU %d failed in powernowk8_get\n", cpu);
1249 set_cpus_allowed_ptr(current, &oldmask);
1249 return 0; 1250 return 0;
1250 } 1251 }
1251 1252
@@ -1253,13 +1254,14 @@ static unsigned int powernowk8_get (unsigned int cpu)
1253 goto out; 1254 goto out;
1254 1255
1255 if (cpu_family == CPU_HW_PSTATE) 1256 if (cpu_family == CPU_HW_PSTATE)
1256 khz = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); 1257 khz = find_khz_freq_from_pstate(data->powernow_table,
1258 data->currpstate);
1257 else 1259 else
1258 khz = find_khz_freq_from_fid(data->currfid); 1260 khz = find_khz_freq_from_fid(data->currfid);
1259 1261
1260 1262
1261out: 1263out:
1262 set_cpus_allowed(current, oldmask); 1264 set_cpus_allowed_ptr(current, &oldmask);
1263 return khz; 1265 return khz;
1264} 1266}
1265 1267
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 3031f1196192..908dd347c67e 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -315,7 +315,7 @@ static unsigned int get_cur_freq(unsigned int cpu)
315 cpumask_t saved_mask; 315 cpumask_t saved_mask;
316 316
317 saved_mask = current->cpus_allowed; 317 saved_mask = current->cpus_allowed;
318 set_cpus_allowed(current, cpumask_of_cpu(cpu)); 318 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
319 if (smp_processor_id() != cpu) 319 if (smp_processor_id() != cpu)
320 return 0; 320 return 0;
321 321
@@ -333,7 +333,7 @@ static unsigned int get_cur_freq(unsigned int cpu)
333 clock_freq = extract_clock(l, cpu, 1); 333 clock_freq = extract_clock(l, cpu, 1);
334 } 334 }
335 335
336 set_cpus_allowed(current, saved_mask); 336 set_cpus_allowed_ptr(current, &saved_mask);
337 return clock_freq; 337 return clock_freq;
338} 338}
339 339
@@ -487,7 +487,7 @@ static int centrino_target (struct cpufreq_policy *policy,
487 else 487 else
488 cpu_set(j, set_mask); 488 cpu_set(j, set_mask);
489 489
490 set_cpus_allowed(current, set_mask); 490 set_cpus_allowed_ptr(current, &set_mask);
491 preempt_disable(); 491 preempt_disable();
492 if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) { 492 if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) {
493 dprintk("couldn't limit to CPUs in this domain\n"); 493 dprintk("couldn't limit to CPUs in this domain\n");
@@ -555,7 +555,8 @@ static int centrino_target (struct cpufreq_policy *policy,
555 555
556 if (!cpus_empty(covered_cpus)) { 556 if (!cpus_empty(covered_cpus)) {
557 for_each_cpu_mask(j, covered_cpus) { 557 for_each_cpu_mask(j, covered_cpus) {
558 set_cpus_allowed(current, cpumask_of_cpu(j)); 558 set_cpus_allowed_ptr(current,
559 &cpumask_of_cpu(j));
559 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); 560 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
560 } 561 }
561 } 562 }
@@ -569,12 +570,12 @@ static int centrino_target (struct cpufreq_policy *policy,
569 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 570 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
570 } 571 }
571 } 572 }
572 set_cpus_allowed(current, saved_mask); 573 set_cpus_allowed_ptr(current, &saved_mask);
573 return 0; 574 return 0;
574 575
575migrate_end: 576migrate_end:
576 preempt_enable(); 577 preempt_enable();
577 set_cpus_allowed(current, saved_mask); 578 set_cpus_allowed_ptr(current, &saved_mask);
578 return 0; 579 return 0;
579} 580}
580 581
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 14d68aa301ee..1b50244b1fdf 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -229,22 +229,22 @@ static unsigned int speedstep_detect_chipset (void)
229 return 0; 229 return 0;
230} 230}
231 231
232static unsigned int _speedstep_get(cpumask_t cpus) 232static unsigned int _speedstep_get(const cpumask_t *cpus)
233{ 233{
234 unsigned int speed; 234 unsigned int speed;
235 cpumask_t cpus_allowed; 235 cpumask_t cpus_allowed;
236 236
237 cpus_allowed = current->cpus_allowed; 237 cpus_allowed = current->cpus_allowed;
238 set_cpus_allowed(current, cpus); 238 set_cpus_allowed_ptr(current, cpus);
239 speed = speedstep_get_processor_frequency(speedstep_processor); 239 speed = speedstep_get_processor_frequency(speedstep_processor);
240 set_cpus_allowed(current, cpus_allowed); 240 set_cpus_allowed_ptr(current, &cpus_allowed);
241 dprintk("detected %u kHz as current frequency\n", speed); 241 dprintk("detected %u kHz as current frequency\n", speed);
242 return speed; 242 return speed;
243} 243}
244 244
245static unsigned int speedstep_get(unsigned int cpu) 245static unsigned int speedstep_get(unsigned int cpu)
246{ 246{
247 return _speedstep_get(cpumask_of_cpu(cpu)); 247 return _speedstep_get(&cpumask_of_cpu(cpu));
248} 248}
249 249
250/** 250/**
@@ -267,7 +267,7 @@ static int speedstep_target (struct cpufreq_policy *policy,
267 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) 267 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate))
268 return -EINVAL; 268 return -EINVAL;
269 269
270 freqs.old = _speedstep_get(policy->cpus); 270 freqs.old = _speedstep_get(&policy->cpus);
271 freqs.new = speedstep_freqs[newstate].frequency; 271 freqs.new = speedstep_freqs[newstate].frequency;
272 freqs.cpu = policy->cpu; 272 freqs.cpu = policy->cpu;
273 273
@@ -285,12 +285,12 @@ static int speedstep_target (struct cpufreq_policy *policy,
285 } 285 }
286 286
287 /* switch to physical CPU where state is to be changed */ 287 /* switch to physical CPU where state is to be changed */
288 set_cpus_allowed(current, policy->cpus); 288 set_cpus_allowed_ptr(current, &policy->cpus);
289 289
290 speedstep_set_state(newstate); 290 speedstep_set_state(newstate);
291 291
292 /* allow to be run on all CPUs */ 292 /* allow to be run on all CPUs */
293 set_cpus_allowed(current, cpus_allowed); 293 set_cpus_allowed_ptr(current, &cpus_allowed);
294 294
295 for_each_cpu_mask(i, policy->cpus) { 295 for_each_cpu_mask(i, policy->cpus) {
296 freqs.cpu = i; 296 freqs.cpu = i;
@@ -326,7 +326,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
326#endif 326#endif
327 327
328 cpus_allowed = current->cpus_allowed; 328 cpus_allowed = current->cpus_allowed;
329 set_cpus_allowed(current, policy->cpus); 329 set_cpus_allowed_ptr(current, &policy->cpus);
330 330
331 /* detect low and high frequency and transition latency */ 331 /* detect low and high frequency and transition latency */
332 result = speedstep_get_freqs(speedstep_processor, 332 result = speedstep_get_freqs(speedstep_processor,
@@ -334,12 +334,12 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
334 &speedstep_freqs[SPEEDSTEP_HIGH].frequency, 334 &speedstep_freqs[SPEEDSTEP_HIGH].frequency,
335 &policy->cpuinfo.transition_latency, 335 &policy->cpuinfo.transition_latency,
336 &speedstep_set_state); 336 &speedstep_set_state);
337 set_cpus_allowed(current, cpus_allowed); 337 set_cpus_allowed_ptr(current, &cpus_allowed);
338 if (result) 338 if (result)
339 return result; 339 return result;
340 340
341 /* get current speed setting */ 341 /* get current speed setting */
342 speed = _speedstep_get(policy->cpus); 342 speed = _speedstep_get(&policy->cpus);
343 if (!speed) 343 if (!speed)
344 return -EIO; 344 return -EIO;
345 345
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 1b889860eb73..26d615dcb149 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -129,7 +129,7 @@ struct _cpuid4_info {
129 union _cpuid4_leaf_ebx ebx; 129 union _cpuid4_leaf_ebx ebx;
130 union _cpuid4_leaf_ecx ecx; 130 union _cpuid4_leaf_ecx ecx;
131 unsigned long size; 131 unsigned long size;
132 cpumask_t shared_cpu_map; 132 cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */
133}; 133};
134 134
135unsigned short num_cache_leaves; 135unsigned short num_cache_leaves;
@@ -451,8 +451,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
451} 451}
452 452
453/* pointer to _cpuid4_info array (for each cache leaf) */ 453/* pointer to _cpuid4_info array (for each cache leaf) */
454static struct _cpuid4_info *cpuid4_info[NR_CPUS]; 454static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
455#define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y])) 455#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
456 456
457#ifdef CONFIG_SMP 457#ifdef CONFIG_SMP
458static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) 458static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
@@ -474,7 +474,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
474 if (cpu_data(i).apicid >> index_msb == 474 if (cpu_data(i).apicid >> index_msb ==
475 c->apicid >> index_msb) { 475 c->apicid >> index_msb) {
476 cpu_set(i, this_leaf->shared_cpu_map); 476 cpu_set(i, this_leaf->shared_cpu_map);
477 if (i != cpu && cpuid4_info[i]) { 477 if (i != cpu && per_cpu(cpuid4_info, i)) {
478 sibling_leaf = CPUID4_INFO_IDX(i, index); 478 sibling_leaf = CPUID4_INFO_IDX(i, index);
479 cpu_set(cpu, sibling_leaf->shared_cpu_map); 479 cpu_set(cpu, sibling_leaf->shared_cpu_map);
480 } 480 }
@@ -505,8 +505,8 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
505 for (i = 0; i < num_cache_leaves; i++) 505 for (i = 0; i < num_cache_leaves; i++)
506 cache_remove_shared_cpu_map(cpu, i); 506 cache_remove_shared_cpu_map(cpu, i);
507 507
508 kfree(cpuid4_info[cpu]); 508 kfree(per_cpu(cpuid4_info, cpu));
509 cpuid4_info[cpu] = NULL; 509 per_cpu(cpuid4_info, cpu) = NULL;
510} 510}
511 511
512static int __cpuinit detect_cache_attributes(unsigned int cpu) 512static int __cpuinit detect_cache_attributes(unsigned int cpu)
@@ -519,13 +519,13 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
519 if (num_cache_leaves == 0) 519 if (num_cache_leaves == 0)
520 return -ENOENT; 520 return -ENOENT;
521 521
522 cpuid4_info[cpu] = kzalloc( 522 per_cpu(cpuid4_info, cpu) = kzalloc(
523 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); 523 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
524 if (cpuid4_info[cpu] == NULL) 524 if (per_cpu(cpuid4_info, cpu) == NULL)
525 return -ENOMEM; 525 return -ENOMEM;
526 526
527 oldmask = current->cpus_allowed; 527 oldmask = current->cpus_allowed;
528 retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); 528 retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
529 if (retval) 529 if (retval)
530 goto out; 530 goto out;
531 531
@@ -542,12 +542,12 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
542 } 542 }
543 cache_shared_cpu_map_setup(cpu, j); 543 cache_shared_cpu_map_setup(cpu, j);
544 } 544 }
545 set_cpus_allowed(current, oldmask); 545 set_cpus_allowed_ptr(current, &oldmask);
546 546
547out: 547out:
548 if (retval) { 548 if (retval) {
549 kfree(cpuid4_info[cpu]); 549 kfree(per_cpu(cpuid4_info, cpu));
550 cpuid4_info[cpu] = NULL; 550 per_cpu(cpuid4_info, cpu) = NULL;
551 } 551 }
552 552
553 return retval; 553 return retval;
@@ -561,7 +561,7 @@ out:
561extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ 561extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
562 562
563/* pointer to kobject for cpuX/cache */ 563/* pointer to kobject for cpuX/cache */
564static struct kobject * cache_kobject[NR_CPUS]; 564static DEFINE_PER_CPU(struct kobject *, cache_kobject);
565 565
566struct _index_kobject { 566struct _index_kobject {
567 struct kobject kobj; 567 struct kobject kobj;
@@ -570,8 +570,8 @@ struct _index_kobject {
570}; 570};
571 571
572/* pointer to array of kobjects for cpuX/cache/indexY */ 572/* pointer to array of kobjects for cpuX/cache/indexY */
573static struct _index_kobject *index_kobject[NR_CPUS]; 573static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
574#define INDEX_KOBJECT_PTR(x,y) (&((index_kobject[x])[y])) 574#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y]))
575 575
576#define show_one_plus(file_name, object, val) \ 576#define show_one_plus(file_name, object, val) \
577static ssize_t show_##file_name \ 577static ssize_t show_##file_name \
@@ -591,11 +591,32 @@ static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
591 return sprintf (buf, "%luK\n", this_leaf->size / 1024); 591 return sprintf (buf, "%luK\n", this_leaf->size / 1024);
592} 592}
593 593
594static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf) 594static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
595 int type, char *buf)
595{ 596{
596 char mask_str[NR_CPUS]; 597 ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
597 cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map); 598 int n = 0;
598 return sprintf(buf, "%s\n", mask_str); 599
600 if (len > 1) {
601 cpumask_t *mask = &this_leaf->shared_cpu_map;
602
603 n = type?
604 cpulist_scnprintf(buf, len-2, *mask):
605 cpumask_scnprintf(buf, len-2, *mask);
606 buf[n++] = '\n';
607 buf[n] = '\0';
608 }
609 return n;
610}
611
612static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf)
613{
614 return show_shared_cpu_map_func(leaf, 0, buf);
615}
616
617static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf)
618{
619 return show_shared_cpu_map_func(leaf, 1, buf);
599} 620}
600 621
601static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { 622static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
@@ -633,6 +654,7 @@ define_one_ro(ways_of_associativity);
633define_one_ro(number_of_sets); 654define_one_ro(number_of_sets);
634define_one_ro(size); 655define_one_ro(size);
635define_one_ro(shared_cpu_map); 656define_one_ro(shared_cpu_map);
657define_one_ro(shared_cpu_list);
636 658
637static struct attribute * default_attrs[] = { 659static struct attribute * default_attrs[] = {
638 &type.attr, 660 &type.attr,
@@ -643,6 +665,7 @@ static struct attribute * default_attrs[] = {
643 &number_of_sets.attr, 665 &number_of_sets.attr,
644 &size.attr, 666 &size.attr,
645 &shared_cpu_map.attr, 667 &shared_cpu_map.attr,
668 &shared_cpu_list.attr,
646 NULL 669 NULL
647}; 670};
648 671
@@ -684,10 +707,10 @@ static struct kobj_type ktype_percpu_entry = {
684 707
685static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu) 708static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
686{ 709{
687 kfree(cache_kobject[cpu]); 710 kfree(per_cpu(cache_kobject, cpu));
688 kfree(index_kobject[cpu]); 711 kfree(per_cpu(index_kobject, cpu));
689 cache_kobject[cpu] = NULL; 712 per_cpu(cache_kobject, cpu) = NULL;
690 index_kobject[cpu] = NULL; 713 per_cpu(index_kobject, cpu) = NULL;
691 free_cache_attributes(cpu); 714 free_cache_attributes(cpu);
692} 715}
693 716
@@ -703,13 +726,14 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
703 return err; 726 return err;
704 727
705 /* Allocate all required memory */ 728 /* Allocate all required memory */
706 cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL); 729 per_cpu(cache_kobject, cpu) =
707 if (unlikely(cache_kobject[cpu] == NULL)) 730 kzalloc(sizeof(struct kobject), GFP_KERNEL);
731 if (unlikely(per_cpu(cache_kobject, cpu) == NULL))
708 goto err_out; 732 goto err_out;
709 733
710 index_kobject[cpu] = kzalloc( 734 per_cpu(index_kobject, cpu) = kzalloc(
711 sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL); 735 sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL);
712 if (unlikely(index_kobject[cpu] == NULL)) 736 if (unlikely(per_cpu(index_kobject, cpu) == NULL))
713 goto err_out; 737 goto err_out;
714 738
715 return 0; 739 return 0;
@@ -733,7 +757,8 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
733 if (unlikely(retval < 0)) 757 if (unlikely(retval < 0))
734 return retval; 758 return retval;
735 759
736 retval = kobject_init_and_add(cache_kobject[cpu], &ktype_percpu_entry, 760 retval = kobject_init_and_add(per_cpu(cache_kobject, cpu),
761 &ktype_percpu_entry,
737 &sys_dev->kobj, "%s", "cache"); 762 &sys_dev->kobj, "%s", "cache");
738 if (retval < 0) { 763 if (retval < 0) {
739 cpuid4_cache_sysfs_exit(cpu); 764 cpuid4_cache_sysfs_exit(cpu);
@@ -745,13 +770,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
745 this_object->cpu = cpu; 770 this_object->cpu = cpu;
746 this_object->index = i; 771 this_object->index = i;
747 retval = kobject_init_and_add(&(this_object->kobj), 772 retval = kobject_init_and_add(&(this_object->kobj),
748 &ktype_cache, cache_kobject[cpu], 773 &ktype_cache,
774 per_cpu(cache_kobject, cpu),
749 "index%1lu", i); 775 "index%1lu", i);
750 if (unlikely(retval)) { 776 if (unlikely(retval)) {
751 for (j = 0; j < i; j++) { 777 for (j = 0; j < i; j++) {
752 kobject_put(&(INDEX_KOBJECT_PTR(cpu,j)->kobj)); 778 kobject_put(&(INDEX_KOBJECT_PTR(cpu,j)->kobj));
753 } 779 }
754 kobject_put(cache_kobject[cpu]); 780 kobject_put(per_cpu(cache_kobject, cpu));
755 cpuid4_cache_sysfs_exit(cpu); 781 cpuid4_cache_sysfs_exit(cpu);
756 break; 782 break;
757 } 783 }
@@ -760,7 +786,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
760 if (!retval) 786 if (!retval)
761 cpu_set(cpu, cache_dev_map); 787 cpu_set(cpu, cache_dev_map);
762 788
763 kobject_uevent(cache_kobject[cpu], KOBJ_ADD); 789 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
764 return retval; 790 return retval;
765} 791}
766 792
@@ -769,7 +795,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
769 unsigned int cpu = sys_dev->id; 795 unsigned int cpu = sys_dev->id;
770 unsigned long i; 796 unsigned long i;
771 797
772 if (cpuid4_info[cpu] == NULL) 798 if (per_cpu(cpuid4_info, cpu) == NULL)
773 return; 799 return;
774 if (!cpu_isset(cpu, cache_dev_map)) 800 if (!cpu_isset(cpu, cache_dev_map))
775 return; 801 return;
@@ -777,7 +803,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
777 803
778 for (i = 0; i < num_cache_leaves; i++) 804 for (i = 0; i < num_cache_leaves; i++)
779 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); 805 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
780 kobject_put(cache_kobject[cpu]); 806 kobject_put(per_cpu(cache_kobject, cpu));
781 cpuid4_cache_sysfs_exit(cpu); 807 cpuid4_cache_sysfs_exit(cpu);
782} 808}
783 809
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 32671da8184e..7c9a813e1193 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -251,18 +251,18 @@ struct threshold_attr {
251 ssize_t(*store) (struct threshold_block *, const char *, size_t count); 251 ssize_t(*store) (struct threshold_block *, const char *, size_t count);
252}; 252};
253 253
254static cpumask_t affinity_set(unsigned int cpu) 254static void affinity_set(unsigned int cpu, cpumask_t *oldmask,
255 cpumask_t *newmask)
255{ 256{
256 cpumask_t oldmask = current->cpus_allowed; 257 *oldmask = current->cpus_allowed;
257 cpumask_t newmask = CPU_MASK_NONE; 258 cpus_clear(*newmask);
258 cpu_set(cpu, newmask); 259 cpu_set(cpu, *newmask);
259 set_cpus_allowed(current, newmask); 260 set_cpus_allowed_ptr(current, newmask);
260 return oldmask;
261} 261}
262 262
263static void affinity_restore(cpumask_t oldmask) 263static void affinity_restore(const cpumask_t *oldmask)
264{ 264{
265 set_cpus_allowed(current, oldmask); 265 set_cpus_allowed_ptr(current, oldmask);
266} 266}
267 267
268#define SHOW_FIELDS(name) \ 268#define SHOW_FIELDS(name) \
@@ -277,15 +277,15 @@ static ssize_t store_interrupt_enable(struct threshold_block *b,
277 const char *buf, size_t count) 277 const char *buf, size_t count)
278{ 278{
279 char *end; 279 char *end;
280 cpumask_t oldmask; 280 cpumask_t oldmask, newmask;
281 unsigned long new = simple_strtoul(buf, &end, 0); 281 unsigned long new = simple_strtoul(buf, &end, 0);
282 if (end == buf) 282 if (end == buf)
283 return -EINVAL; 283 return -EINVAL;
284 b->interrupt_enable = !!new; 284 b->interrupt_enable = !!new;
285 285
286 oldmask = affinity_set(b->cpu); 286 affinity_set(b->cpu, &oldmask, &newmask);
287 threshold_restart_bank(b, 0, 0); 287 threshold_restart_bank(b, 0, 0);
288 affinity_restore(oldmask); 288 affinity_restore(&oldmask);
289 289
290 return end - buf; 290 return end - buf;
291} 291}
@@ -294,7 +294,7 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
294 const char *buf, size_t count) 294 const char *buf, size_t count)
295{ 295{
296 char *end; 296 char *end;
297 cpumask_t oldmask; 297 cpumask_t oldmask, newmask;
298 u16 old; 298 u16 old;
299 unsigned long new = simple_strtoul(buf, &end, 0); 299 unsigned long new = simple_strtoul(buf, &end, 0);
300 if (end == buf) 300 if (end == buf)
@@ -306,9 +306,9 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
306 old = b->threshold_limit; 306 old = b->threshold_limit;
307 b->threshold_limit = new; 307 b->threshold_limit = new;
308 308
309 oldmask = affinity_set(b->cpu); 309 affinity_set(b->cpu, &oldmask, &newmask);
310 threshold_restart_bank(b, 0, old); 310 threshold_restart_bank(b, 0, old);
311 affinity_restore(oldmask); 311 affinity_restore(&oldmask);
312 312
313 return end - buf; 313 return end - buf;
314} 314}
@@ -316,10 +316,10 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
316static ssize_t show_error_count(struct threshold_block *b, char *buf) 316static ssize_t show_error_count(struct threshold_block *b, char *buf)
317{ 317{
318 u32 high, low; 318 u32 high, low;
319 cpumask_t oldmask; 319 cpumask_t oldmask, newmask;
320 oldmask = affinity_set(b->cpu); 320 affinity_set(b->cpu, &oldmask, &newmask);
321 rdmsr(b->address, low, high); 321 rdmsr(b->address, low, high);
322 affinity_restore(oldmask); 322 affinity_restore(&oldmask);
323 return sprintf(buf, "%x\n", 323 return sprintf(buf, "%x\n",
324 (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); 324 (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
325} 325}
@@ -327,10 +327,10 @@ static ssize_t show_error_count(struct threshold_block *b, char *buf)
327static ssize_t store_error_count(struct threshold_block *b, 327static ssize_t store_error_count(struct threshold_block *b,
328 const char *buf, size_t count) 328 const char *buf, size_t count)
329{ 329{
330 cpumask_t oldmask; 330 cpumask_t oldmask, newmask;
331 oldmask = affinity_set(b->cpu); 331 affinity_set(b->cpu, &oldmask, &newmask);
332 threshold_restart_bank(b, 1, 0); 332 threshold_restart_bank(b, 1, 0);
333 affinity_restore(oldmask); 333 affinity_restore(&oldmask);
334 return 1; 334 return 1;
335} 335}
336 336
@@ -468,7 +468,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
468{ 468{
469 int i, err = 0; 469 int i, err = 0;
470 struct threshold_bank *b = NULL; 470 struct threshold_bank *b = NULL;
471 cpumask_t oldmask = CPU_MASK_NONE; 471 cpumask_t oldmask, newmask;
472 char name[32]; 472 char name[32];
473 473
474 sprintf(name, "threshold_bank%i", bank); 474 sprintf(name, "threshold_bank%i", bank);
@@ -519,10 +519,10 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
519 519
520 per_cpu(threshold_banks, cpu)[bank] = b; 520 per_cpu(threshold_banks, cpu)[bank] = b;
521 521
522 oldmask = affinity_set(cpu); 522 affinity_set(cpu, &oldmask, &newmask);
523 err = allocate_threshold_blocks(cpu, bank, 0, 523 err = allocate_threshold_blocks(cpu, bank, 0,
524 MSR_IA32_MC0_MISC + bank * 4); 524 MSR_IA32_MC0_MISC + bank * 4);
525 affinity_restore(oldmask); 525 affinity_restore(&oldmask);
526 526
527 if (err) 527 if (err)
528 goto out_free; 528 goto out_free;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 9b7e01daa1ca..1f4cc48c14c6 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -1,5 +1,4 @@
1/* 1/*
2 * linux/arch/i386/kernel/cpu/mcheck/therm_throt.c
3 * 2 *
4 * Thermal throttle event support code (such as syslog messaging and rate 3 * Thermal throttle event support code (such as syslog messaging and rate
5 * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). 4 * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 0978a4a39418..0d0d9057e7c0 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -1,7 +1,6 @@
1#include <linux/smp.h> 1#include <linux/smp.h>
2#include <linux/timex.h> 2#include <linux/timex.h>
3#include <linux/string.h> 3#include <linux/string.h>
4#include <asm/semaphore.h>
5#include <linux/seq_file.h> 4#include <linux/seq_file.h>
6#include <linux/cpufreq.h> 5#include <linux/cpufreq.h>
7 6
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 288e7a6598ac..daff52a62248 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -154,12 +154,10 @@ static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb,
154 err = cpuid_device_create(cpu); 154 err = cpuid_device_create(cpu);
155 break; 155 break;
156 case CPU_UP_CANCELED: 156 case CPU_UP_CANCELED:
157 case CPU_UP_CANCELED_FROZEN:
157 case CPU_DEAD: 158 case CPU_DEAD:
158 cpuid_device_destroy(cpu); 159 cpuid_device_destroy(cpu);
159 break; 160 break;
160 case CPU_UP_CANCELED_FROZEN:
161 destroy_suspended_device(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
162 break;
163 } 161 }
164 return err ? NOTIFY_BAD : NOTIFY_OK; 162 return err ? NOTIFY_BAD : NOTIFY_OK;
165} 163}
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 0240cd778365..ed733e7cf4e6 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -475,7 +475,7 @@ int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
475/* 475/*
476 * Find the highest page frame number we have available 476 * Find the highest page frame number we have available
477 */ 477 */
478void __init find_max_pfn(void) 478void __init propagate_e820_map(void)
479{ 479{
480 int i; 480 int i;
481 481
@@ -704,7 +704,7 @@ static int __init parse_memmap(char *arg)
704 * size before original memory map is 704 * size before original memory map is
705 * reset. 705 * reset.
706 */ 706 */
707 find_max_pfn(); 707 propagate_e820_map();
708 saved_max_pfn = max_pfn; 708 saved_max_pfn = max_pfn;
709#endif 709#endif
710 e820.nr_map = 0; 710 e820.nr_map = 0;
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 7f6c0c85c8f6..cbd42e51cb08 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -96,7 +96,7 @@ void __init early_res_to_bootmem(void)
96} 96}
97 97
98/* Check for already reserved areas */ 98/* Check for already reserved areas */
99static inline int 99static inline int __init
100bad_addr(unsigned long *addrp, unsigned long size, unsigned long align) 100bad_addr(unsigned long *addrp, unsigned long size, unsigned long align)
101{ 101{
102 int i; 102 int i;
@@ -116,7 +116,7 @@ again:
116} 116}
117 117
118/* Check for already reserved areas */ 118/* Check for already reserved areas */
119static inline int 119static inline int __init
120bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align) 120bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
121{ 121{
122 int i; 122 int i;
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 759e02bec070..77d424cf68b3 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -383,6 +383,7 @@ static void __init runtime_code_page_mkexec(void)
383{ 383{
384 efi_memory_desc_t *md; 384 efi_memory_desc_t *md;
385 void *p; 385 void *p;
386 u64 addr, npages;
386 387
387 /* Make EFI runtime service code area executable */ 388 /* Make EFI runtime service code area executable */
388 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 389 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
@@ -391,7 +392,10 @@ static void __init runtime_code_page_mkexec(void)
391 if (md->type != EFI_RUNTIME_SERVICES_CODE) 392 if (md->type != EFI_RUNTIME_SERVICES_CODE)
392 continue; 393 continue;
393 394
394 set_memory_x(md->virt_addr, md->num_pages); 395 addr = md->virt_addr;
396 npages = md->num_pages;
397 memrange_efi_to_native(&addr, &npages);
398 set_memory_x(addr, npages);
395 } 399 }
396} 400}
397 401
@@ -408,7 +412,7 @@ void __init efi_enter_virtual_mode(void)
408 efi_memory_desc_t *md; 412 efi_memory_desc_t *md;
409 efi_status_t status; 413 efi_status_t status;
410 unsigned long size; 414 unsigned long size;
411 u64 end, systab; 415 u64 end, systab, addr, npages;
412 void *p, *va; 416 void *p, *va;
413 417
414 efi.systab = NULL; 418 efi.systab = NULL;
@@ -420,7 +424,7 @@ void __init efi_enter_virtual_mode(void)
420 size = md->num_pages << EFI_PAGE_SHIFT; 424 size = md->num_pages << EFI_PAGE_SHIFT;
421 end = md->phys_addr + size; 425 end = md->phys_addr + size;
422 426
423 if ((end >> PAGE_SHIFT) <= max_pfn_mapped) 427 if (PFN_UP(end) <= max_pfn_mapped)
424 va = __va(md->phys_addr); 428 va = __va(md->phys_addr);
425 else 429 else
426 va = efi_ioremap(md->phys_addr, size); 430 va = efi_ioremap(md->phys_addr, size);
@@ -433,8 +437,12 @@ void __init efi_enter_virtual_mode(void)
433 continue; 437 continue;
434 } 438 }
435 439
436 if (!(md->attribute & EFI_MEMORY_WB)) 440 if (!(md->attribute & EFI_MEMORY_WB)) {
437 set_memory_uc(md->virt_addr, md->num_pages); 441 addr = md->virt_addr;
442 npages = md->num_pages;
443 memrange_efi_to_native(&addr, &npages);
444 set_memory_uc(addr, npages);
445 }
438 446
439 systab = (u64) (unsigned long) efi_phys.systab; 447 systab = (u64) (unsigned long) efi_phys.systab;
440 if (md->phys_addr <= systab && systab < end) { 448 if (md->phys_addr <= systab && systab < end) {
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index d143a1e76b30..d0060fdcccac 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -105,14 +105,14 @@ void __init efi_reserve_bootmem(void)
105 105
106void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size) 106void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size)
107{ 107{
108 static unsigned pages_mapped; 108 static unsigned pages_mapped __initdata;
109 unsigned i, pages; 109 unsigned i, pages;
110 unsigned long offset;
110 111
111 /* phys_addr and size must be page aligned */ 112 pages = PFN_UP(phys_addr + size) - PFN_DOWN(phys_addr);
112 if ((phys_addr & ~PAGE_MASK) || (size & ~PAGE_MASK)) 113 offset = phys_addr & ~PAGE_MASK;
113 return NULL; 114 phys_addr &= PAGE_MASK;
114 115
115 pages = size >> PAGE_SHIFT;
116 if (pages_mapped + pages > MAX_EFI_IO_PAGES) 116 if (pages_mapped + pages > MAX_EFI_IO_PAGES)
117 return NULL; 117 return NULL;
118 118
@@ -124,5 +124,5 @@ void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size)
124 } 124 }
125 125
126 return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \ 126 return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \
127 (pages_mapped - pages)); 127 (pages_mapped - pages)) + offset;
128} 128}
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 9ba49a26dff8..f0f8934fc303 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1,5 +1,4 @@
1/* 1/*
2 * linux/arch/i386/entry.S
3 * 2 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1991, 1992 Linus Torvalds
5 */ 4 */
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 5d77c9cd8e15..ebf13908a743 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -61,26 +61,31 @@ int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
61 val = (1UL << UVH_IPI_INT_SEND_SHFT) | 61 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
62 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | 62 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
63 (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | 63 (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
64 (6 << UVH_IPI_INT_DELIVERY_MODE_SHFT); 64 APIC_DM_INIT;
65 uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
66 mdelay(10);
67
68 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
69 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
70 (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
71 APIC_DM_STARTUP;
65 uv_write_global_mmr64(nasid, UVH_IPI_INT, val); 72 uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
66 return 0; 73 return 0;
67} 74}
68 75
69static void uv_send_IPI_one(int cpu, int vector) 76static void uv_send_IPI_one(int cpu, int vector)
70{ 77{
71 unsigned long val, apicid; 78 unsigned long val, apicid, lapicid;
72 int nasid; 79 int nasid;
73 80
74 apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */ 81 apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */
82 lapicid = apicid & 0x3f; /* ZZZ macro needed */
75 nasid = uv_apicid_to_nasid(apicid); 83 nasid = uv_apicid_to_nasid(apicid);
76 val = 84 val =
77 (1UL << UVH_IPI_INT_SEND_SHFT) | (apicid << 85 (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid <<
78 UVH_IPI_INT_APIC_ID_SHFT) | 86 UVH_IPI_INT_APIC_ID_SHFT) |
79 (vector << UVH_IPI_INT_VECTOR_SHFT); 87 (vector << UVH_IPI_INT_VECTOR_SHFT);
80 uv_write_global_mmr64(nasid, UVH_IPI_INT, val); 88 uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
81 printk(KERN_DEBUG
82 "UV: IPI to cpu %d, apicid 0x%lx, vec %d, nasid%d, val 0x%lx\n",
83 cpu, apicid, vector, nasid, val);
84} 89}
85 90
86static void uv_send_IPI_mask(cpumask_t mask, int vector) 91static void uv_send_IPI_mask(cpumask_t mask, int vector)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index d6d54faa84df..993c76773256 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -146,6 +146,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
146 146
147 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); 147 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
148 148
149#ifdef CONFIG_BLK_DEV_INITRD
149 /* Reserve INITRD */ 150 /* Reserve INITRD */
150 if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { 151 if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
151 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; 152 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
@@ -153,6 +154,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
153 unsigned long ramdisk_end = ramdisk_image + ramdisk_size; 154 unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
154 reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); 155 reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
155 } 156 }
157#endif
156 158
157 reserve_ebda_region(); 159 reserve_ebda_region();
158 160
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 826988a6e964..90f038af3adc 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -1,5 +1,4 @@
1/* 1/*
2 * linux/arch/i386/kernel/head.S -- the 32-bit startup code.
3 * 2 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1991, 1992 Linus Torvalds
5 * 4 *
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 8f8102d967b3..db6839b53195 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -35,17 +35,18 @@
35#endif 35#endif
36 36
37static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; 37static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
38unsigned int xstate_size;
39static struct i387_fxsave_struct fx_scratch __cpuinitdata;
38 40
39void mxcsr_feature_mask_init(void) 41void __cpuinit mxcsr_feature_mask_init(void)
40{ 42{
41 unsigned long mask = 0; 43 unsigned long mask = 0;
42 44
43 clts(); 45 clts();
44 if (cpu_has_fxsr) { 46 if (cpu_has_fxsr) {
45 memset(&current->thread.i387.fxsave, 0, 47 memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
46 sizeof(struct i387_fxsave_struct)); 48 asm volatile("fxsave %0" : : "m" (fx_scratch));
47 asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave)); 49 mask = fx_scratch.mxcsr_mask;
48 mask = current->thread.i387.fxsave.mxcsr_mask;
49 if (mask == 0) 50 if (mask == 0)
50 mask = 0x0000ffbf; 51 mask = 0x0000ffbf;
51 } 52 }
@@ -53,6 +54,16 @@ void mxcsr_feature_mask_init(void)
53 stts(); 54 stts();
54} 55}
55 56
57void __init init_thread_xstate(void)
58{
59 if (cpu_has_fxsr)
60 xstate_size = sizeof(struct i387_fxsave_struct);
61#ifdef CONFIG_X86_32
62 else
63 xstate_size = sizeof(struct i387_fsave_struct);
64#endif
65}
66
56#ifdef CONFIG_X86_64 67#ifdef CONFIG_X86_64
57/* 68/*
58 * Called at bootup to set up the initial FPU state that is later cloned 69 * Called at bootup to set up the initial FPU state that is later cloned
@@ -61,10 +72,6 @@ void mxcsr_feature_mask_init(void)
61void __cpuinit fpu_init(void) 72void __cpuinit fpu_init(void)
62{ 73{
63 unsigned long oldcr0 = read_cr0(); 74 unsigned long oldcr0 = read_cr0();
64 extern void __bad_fxsave_alignment(void);
65
66 if (offsetof(struct task_struct, thread.i387.fxsave) & 15)
67 __bad_fxsave_alignment();
68 75
69 set_in_cr4(X86_CR4_OSFXSR); 76 set_in_cr4(X86_CR4_OSFXSR);
70 set_in_cr4(X86_CR4_OSXMMEXCPT); 77 set_in_cr4(X86_CR4_OSXMMEXCPT);
@@ -84,32 +91,44 @@ void __cpuinit fpu_init(void)
84 * value at reset if we support XMM instructions and then 91 * value at reset if we support XMM instructions and then
85 * remeber the current task has used the FPU. 92 * remeber the current task has used the FPU.
86 */ 93 */
87void init_fpu(struct task_struct *tsk) 94int init_fpu(struct task_struct *tsk)
88{ 95{
89 if (tsk_used_math(tsk)) { 96 if (tsk_used_math(tsk)) {
90 if (tsk == current) 97 if (tsk == current)
91 unlazy_fpu(tsk); 98 unlazy_fpu(tsk);
92 return; 99 return 0;
100 }
101
102 /*
103 * Memory allocation at the first usage of the FPU and other state.
104 */
105 if (!tsk->thread.xstate) {
106 tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
107 GFP_KERNEL);
108 if (!tsk->thread.xstate)
109 return -ENOMEM;
93 } 110 }
94 111
95 if (cpu_has_fxsr) { 112 if (cpu_has_fxsr) {
96 memset(&tsk->thread.i387.fxsave, 0, 113 struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
97 sizeof(struct i387_fxsave_struct)); 114
98 tsk->thread.i387.fxsave.cwd = 0x37f; 115 memset(fx, 0, xstate_size);
116 fx->cwd = 0x37f;
99 if (cpu_has_xmm) 117 if (cpu_has_xmm)
100 tsk->thread.i387.fxsave.mxcsr = MXCSR_DEFAULT; 118 fx->mxcsr = MXCSR_DEFAULT;
101 } else { 119 } else {
102 memset(&tsk->thread.i387.fsave, 0, 120 struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
103 sizeof(struct i387_fsave_struct)); 121 memset(fp, 0, xstate_size);
104 tsk->thread.i387.fsave.cwd = 0xffff037fu; 122 fp->cwd = 0xffff037fu;
105 tsk->thread.i387.fsave.swd = 0xffff0000u; 123 fp->swd = 0xffff0000u;
106 tsk->thread.i387.fsave.twd = 0xffffffffu; 124 fp->twd = 0xffffffffu;
107 tsk->thread.i387.fsave.fos = 0xffff0000u; 125 fp->fos = 0xffff0000u;
108 } 126 }
109 /* 127 /*
110 * Only the device not available exception or ptrace can call init_fpu. 128 * Only the device not available exception or ptrace can call init_fpu.
111 */ 129 */
112 set_stopped_child_used_math(tsk); 130 set_stopped_child_used_math(tsk);
131 return 0;
113} 132}
114 133
115int fpregs_active(struct task_struct *target, const struct user_regset *regset) 134int fpregs_active(struct task_struct *target, const struct user_regset *regset)
@@ -126,13 +145,17 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
126 unsigned int pos, unsigned int count, 145 unsigned int pos, unsigned int count,
127 void *kbuf, void __user *ubuf) 146 void *kbuf, void __user *ubuf)
128{ 147{
148 int ret;
149
129 if (!cpu_has_fxsr) 150 if (!cpu_has_fxsr)
130 return -ENODEV; 151 return -ENODEV;
131 152
132 init_fpu(target); 153 ret = init_fpu(target);
154 if (ret)
155 return ret;
133 156
134 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 157 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
135 &target->thread.i387.fxsave, 0, -1); 158 &target->thread.xstate->fxsave, 0, -1);
136} 159}
137 160
138int xfpregs_set(struct task_struct *target, const struct user_regset *regset, 161int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
@@ -144,16 +167,19 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
144 if (!cpu_has_fxsr) 167 if (!cpu_has_fxsr)
145 return -ENODEV; 168 return -ENODEV;
146 169
147 init_fpu(target); 170 ret = init_fpu(target);
171 if (ret)
172 return ret;
173
148 set_stopped_child_used_math(target); 174 set_stopped_child_used_math(target);
149 175
150 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, 176 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
151 &target->thread.i387.fxsave, 0, -1); 177 &target->thread.xstate->fxsave, 0, -1);
152 178
153 /* 179 /*
154 * mxcsr reserved bits must be masked to zero for security reasons. 180 * mxcsr reserved bits must be masked to zero for security reasons.
155 */ 181 */
156 target->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; 182 target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
157 183
158 return ret; 184 return ret;
159} 185}
@@ -233,7 +259,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
233static void 259static void
234convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) 260convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
235{ 261{
236 struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; 262 struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave;
237 struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; 263 struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
238 struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; 264 struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
239 int i; 265 int i;
@@ -273,7 +299,7 @@ static void convert_to_fxsr(struct task_struct *tsk,
273 const struct user_i387_ia32_struct *env) 299 const struct user_i387_ia32_struct *env)
274 300
275{ 301{
276 struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; 302 struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave;
277 struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; 303 struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
278 struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; 304 struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
279 int i; 305 int i;
@@ -302,15 +328,19 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
302 void *kbuf, void __user *ubuf) 328 void *kbuf, void __user *ubuf)
303{ 329{
304 struct user_i387_ia32_struct env; 330 struct user_i387_ia32_struct env;
331 int ret;
305 332
306 if (!HAVE_HWFP) 333 if (!HAVE_HWFP)
307 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); 334 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
308 335
309 init_fpu(target); 336 ret = init_fpu(target);
337 if (ret)
338 return ret;
310 339
311 if (!cpu_has_fxsr) { 340 if (!cpu_has_fxsr) {
312 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 341 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
313 &target->thread.i387.fsave, 0, -1); 342 &target->thread.xstate->fsave, 0,
343 -1);
314 } 344 }
315 345
316 if (kbuf && pos == 0 && count == sizeof(env)) { 346 if (kbuf && pos == 0 && count == sizeof(env)) {
@@ -333,12 +363,15 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
333 if (!HAVE_HWFP) 363 if (!HAVE_HWFP)
334 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); 364 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
335 365
336 init_fpu(target); 366 ret = init_fpu(target);
367 if (ret)
368 return ret;
369
337 set_stopped_child_used_math(target); 370 set_stopped_child_used_math(target);
338 371
339 if (!cpu_has_fxsr) { 372 if (!cpu_has_fxsr) {
340 return user_regset_copyin(&pos, &count, &kbuf, &ubuf, 373 return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
341 &target->thread.i387.fsave, 0, -1); 374 &target->thread.xstate->fsave, 0, -1);
342 } 375 }
343 376
344 if (pos > 0 || count < sizeof(env)) 377 if (pos > 0 || count < sizeof(env))
@@ -358,11 +391,11 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
358static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) 391static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
359{ 392{
360 struct task_struct *tsk = current; 393 struct task_struct *tsk = current;
394 struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
361 395
362 unlazy_fpu(tsk); 396 unlazy_fpu(tsk);
363 tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd; 397 fp->status = fp->swd;
364 if (__copy_to_user(buf, &tsk->thread.i387.fsave, 398 if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
365 sizeof(struct i387_fsave_struct)))
366 return -1; 399 return -1;
367 return 1; 400 return 1;
368} 401}
@@ -370,6 +403,7 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
370static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) 403static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
371{ 404{
372 struct task_struct *tsk = current; 405 struct task_struct *tsk = current;
406 struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
373 struct user_i387_ia32_struct env; 407 struct user_i387_ia32_struct env;
374 int err = 0; 408 int err = 0;
375 409
@@ -379,12 +413,12 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
379 if (__copy_to_user(buf, &env, sizeof(env))) 413 if (__copy_to_user(buf, &env, sizeof(env)))
380 return -1; 414 return -1;
381 415
382 err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status); 416 err |= __put_user(fx->swd, &buf->status);
383 err |= __put_user(X86_FXSR_MAGIC, &buf->magic); 417 err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
384 if (err) 418 if (err)
385 return -1; 419 return -1;
386 420
387 if (__copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave, 421 if (__copy_to_user(&buf->_fxsr_env[0], fx,
388 sizeof(struct i387_fxsave_struct))) 422 sizeof(struct i387_fxsave_struct)))
389 return -1; 423 return -1;
390 return 1; 424 return 1;
@@ -417,7 +451,7 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
417 struct task_struct *tsk = current; 451 struct task_struct *tsk = current;
418 452
419 clear_fpu(tsk); 453 clear_fpu(tsk);
420 return __copy_from_user(&tsk->thread.i387.fsave, buf, 454 return __copy_from_user(&tsk->thread.xstate->fsave, buf,
421 sizeof(struct i387_fsave_struct)); 455 sizeof(struct i387_fsave_struct));
422} 456}
423 457
@@ -428,10 +462,10 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
428 int err; 462 int err;
429 463
430 clear_fpu(tsk); 464 clear_fpu(tsk);
431 err = __copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0], 465 err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0],
432 sizeof(struct i387_fxsave_struct)); 466 sizeof(struct i387_fxsave_struct));
433 /* mxcsr reserved bits must be masked to zero for security reasons */ 467 /* mxcsr reserved bits must be masked to zero for security reasons */
434 tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; 468 tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
435 if (err || __copy_from_user(&env, buf, sizeof(env))) 469 if (err || __copy_from_user(&env, buf, sizeof(env)))
436 return 1; 470 return 1;
437 convert_to_fxsr(tsk, &env); 471 convert_to_fxsr(tsk, &env);
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index b54464b26658..9ba11d07920f 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -785,7 +785,7 @@ static void __clear_irq_vector(int irq)
785 per_cpu(vector_irq, cpu)[vector] = -1; 785 per_cpu(vector_irq, cpu)[vector] = -1;
786 786
787 cfg->vector = 0; 787 cfg->vector = 0;
788 cfg->domain = CPU_MASK_NONE; 788 cpus_clear(cfg->domain);
789} 789}
790 790
791void __setup_vector_irq(int cpu) 791void __setup_vector_irq(int cpu)
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 24362ecf5f9a..f47f0eb886b8 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -46,11 +46,7 @@
46#include <asm/apicdef.h> 46#include <asm/apicdef.h>
47#include <asm/system.h> 47#include <asm/system.h>
48 48
49#ifdef CONFIG_X86_32 49#include <mach_ipi.h>
50# include <mach_ipi.h>
51#else
52# include <asm/mach_apic.h>
53#endif
54 50
55/* 51/*
56 * Put the error code here just in case the user cares: 52 * Put the error code here just in case the user cares:
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 25cf6dee4e56..69729e38b78a 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -402,7 +402,7 @@ static int do_microcode_update (void)
402 402
403 if (!uci->valid) 403 if (!uci->valid)
404 continue; 404 continue;
405 set_cpus_allowed(current, cpumask_of_cpu(cpu)); 405 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
406 error = get_maching_microcode(new_mc, cpu); 406 error = get_maching_microcode(new_mc, cpu);
407 if (error < 0) 407 if (error < 0)
408 goto out; 408 goto out;
@@ -416,7 +416,7 @@ out:
416 vfree(new_mc); 416 vfree(new_mc);
417 if (cursor < 0) 417 if (cursor < 0)
418 error = cursor; 418 error = cursor;
419 set_cpus_allowed(current, old); 419 set_cpus_allowed_ptr(current, &old);
420 return error; 420 return error;
421} 421}
422 422
@@ -579,7 +579,7 @@ static int apply_microcode_check_cpu(int cpu)
579 return 0; 579 return 0;
580 580
581 old = current->cpus_allowed; 581 old = current->cpus_allowed;
582 set_cpus_allowed(current, cpumask_of_cpu(cpu)); 582 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
583 583
584 /* Check if the microcode we have in memory matches the CPU */ 584 /* Check if the microcode we have in memory matches the CPU */
585 if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || 585 if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
@@ -610,7 +610,7 @@ static int apply_microcode_check_cpu(int cpu)
610 " sig=0x%x, pf=0x%x, rev=0x%x\n", 610 " sig=0x%x, pf=0x%x, rev=0x%x\n",
611 cpu, uci->sig, uci->pf, uci->rev); 611 cpu, uci->sig, uci->pf, uci->rev);
612 612
613 set_cpus_allowed(current, old); 613 set_cpus_allowed_ptr(current, &old);
614 return err; 614 return err;
615} 615}
616 616
@@ -621,13 +621,13 @@ static void microcode_init_cpu(int cpu, int resume)
621 621
622 old = current->cpus_allowed; 622 old = current->cpus_allowed;
623 623
624 set_cpus_allowed(current, cpumask_of_cpu(cpu)); 624 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
625 mutex_lock(&microcode_mutex); 625 mutex_lock(&microcode_mutex);
626 collect_cpu_info(cpu); 626 collect_cpu_info(cpu);
627 if (uci->valid && system_state == SYSTEM_RUNNING && !resume) 627 if (uci->valid && system_state == SYSTEM_RUNNING && !resume)
628 cpu_request_microcode(cpu); 628 cpu_request_microcode(cpu);
629 mutex_unlock(&microcode_mutex); 629 mutex_unlock(&microcode_mutex);
630 set_cpus_allowed(current, old); 630 set_cpus_allowed_ptr(current, &old);
631} 631}
632 632
633static void microcode_fini_cpu(int cpu) 633static void microcode_fini_cpu(int cpu)
@@ -657,14 +657,14 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz)
657 old = current->cpus_allowed; 657 old = current->cpus_allowed;
658 658
659 get_online_cpus(); 659 get_online_cpus();
660 set_cpus_allowed(current, cpumask_of_cpu(cpu)); 660 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
661 661
662 mutex_lock(&microcode_mutex); 662 mutex_lock(&microcode_mutex);
663 if (uci->valid) 663 if (uci->valid)
664 err = cpu_request_microcode(cpu); 664 err = cpu_request_microcode(cpu);
665 mutex_unlock(&microcode_mutex); 665 mutex_unlock(&microcode_mutex);
666 put_online_cpus(); 666 put_online_cpus();
667 set_cpus_allowed(current, old); 667 set_cpus_allowed_ptr(current, &old);
668 } 668 }
669 if (err) 669 if (err)
670 return err; 670 return err;
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 4dfb40530057..1f3abe048e93 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -162,12 +162,10 @@ static int __cpuinit msr_class_cpu_callback(struct notifier_block *nfb,
162 err = msr_device_create(cpu); 162 err = msr_device_create(cpu);
163 break; 163 break;
164 case CPU_UP_CANCELED: 164 case CPU_UP_CANCELED:
165 case CPU_UP_CANCELED_FROZEN:
165 case CPU_DEAD: 166 case CPU_DEAD:
166 msr_device_destroy(cpu); 167 msr_device_destroy(cpu);
167 break; 168 break;
168 case CPU_UP_CANCELED_FROZEN:
169 destroy_suspended_device(msr_class, MKDEV(MSR_MAJOR, cpu));
170 break;
171 } 169 }
172 return err ? NOTIFY_BAD : NOTIFY_OK; 170 return err ? NOTIFY_BAD : NOTIFY_OK;
173} 171}
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c
index 8421d0ac6f22..11b14bbaa61e 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi_32.c
@@ -321,7 +321,8 @@ EXPORT_SYMBOL(touch_nmi_watchdog);
321 321
322extern void die_nmi(struct pt_regs *, const char *msg); 322extern void die_nmi(struct pt_regs *, const char *msg);
323 323
324__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) 324notrace __kprobes int
325nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
325{ 326{
326 327
327 /* 328 /*
diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c
index 11f9130ac513..5a29ded994fa 100644
--- a/arch/x86/kernel/nmi_64.c
+++ b/arch/x86/kernel/nmi_64.c
@@ -313,7 +313,8 @@ void touch_nmi_watchdog(void)
313} 313}
314EXPORT_SYMBOL(touch_nmi_watchdog); 314EXPORT_SYMBOL(touch_nmi_watchdog);
315 315
316int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) 316notrace __kprobes int
317nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
317{ 318{
318 int sum; 319 int sum;
319 int touched = 0; 320 int touched = 0;
@@ -384,7 +385,8 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
384 385
385static unsigned ignore_nmis; 386static unsigned ignore_nmis;
386 387
387asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) 388asmlinkage notrace __kprobes void
389do_nmi(struct pt_regs *regs, long error_code)
388{ 390{
389 nmi_enter(); 391 nmi_enter();
390 add_pda(__nmi_count,1); 392 add_pda(__nmi_count,1);
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 1b5464c2434f..adb91e4b62da 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -470,10 +470,11 @@ error:
470 return 0; 470 return 0;
471} 471}
472 472
473static dma_addr_t calgary_map_single(struct device *dev, void *vaddr, 473static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr,
474 size_t size, int direction) 474 size_t size, int direction)
475{ 475{
476 dma_addr_t dma_handle = bad_dma_address; 476 dma_addr_t dma_handle = bad_dma_address;
477 void *vaddr = phys_to_virt(paddr);
477 unsigned long uaddr; 478 unsigned long uaddr;
478 unsigned int npages; 479 unsigned int npages;
479 struct iommu_table *tbl = find_iommu_table(dev); 480 struct iommu_table *tbl = find_iommu_table(dev);
diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma.c
index ada5a0604992..388b113a7d88 100644
--- a/arch/x86/kernel/pci-dma_64.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -1,61 +1,370 @@
1/* 1#include <linux/dma-mapping.h>
2 * Dynamic DMA mapping support.
3 */
4
5#include <linux/types.h>
6#include <linux/mm.h>
7#include <linux/string.h>
8#include <linux/pci.h>
9#include <linux/module.h>
10#include <linux/dmar.h> 2#include <linux/dmar.h>
11#include <asm/io.h> 3#include <linux/bootmem.h>
4#include <linux/pci.h>
5
6#include <asm/proto.h>
7#include <asm/dma.h>
12#include <asm/gart.h> 8#include <asm/gart.h>
13#include <asm/calgary.h> 9#include <asm/calgary.h>
14 10
15int iommu_merge __read_mostly = 0; 11int forbid_dac __read_mostly;
16 12EXPORT_SYMBOL(forbid_dac);
17dma_addr_t bad_dma_address __read_mostly;
18EXPORT_SYMBOL(bad_dma_address);
19 13
20/* This tells the BIO block layer to assume merging. Default to off 14const struct dma_mapping_ops *dma_ops;
21 because we cannot guarantee merging later. */ 15EXPORT_SYMBOL(dma_ops);
22int iommu_bio_merge __read_mostly = 0;
23EXPORT_SYMBOL(iommu_bio_merge);
24 16
25static int iommu_sac_force __read_mostly = 0; 17int iommu_sac_force __read_mostly = 0;
26 18
27int no_iommu __read_mostly;
28#ifdef CONFIG_IOMMU_DEBUG 19#ifdef CONFIG_IOMMU_DEBUG
29int panic_on_overflow __read_mostly = 1; 20int panic_on_overflow __read_mostly = 1;
30int force_iommu __read_mostly = 1; 21int force_iommu __read_mostly = 1;
31#else 22#else
32int panic_on_overflow __read_mostly = 0; 23int panic_on_overflow __read_mostly = 0;
33int force_iommu __read_mostly= 0; 24int force_iommu __read_mostly = 0;
34#endif 25#endif
35 26
27int iommu_merge __read_mostly = 0;
28
29int no_iommu __read_mostly;
36/* Set this to 1 if there is a HW IOMMU in the system */ 30/* Set this to 1 if there is a HW IOMMU in the system */
37int iommu_detected __read_mostly = 0; 31int iommu_detected __read_mostly = 0;
38 32
33/* This tells the BIO block layer to assume merging. Default to off
34 because we cannot guarantee merging later. */
35int iommu_bio_merge __read_mostly = 0;
36EXPORT_SYMBOL(iommu_bio_merge);
37
38dma_addr_t bad_dma_address __read_mostly = 0;
39EXPORT_SYMBOL(bad_dma_address);
40
39/* Dummy device used for NULL arguments (normally ISA). Better would 41/* Dummy device used for NULL arguments (normally ISA). Better would
40 be probably a smaller DMA mask, but this is bug-to-bug compatible 42 be probably a smaller DMA mask, but this is bug-to-bug compatible
41 to i386. */ 43 to older i386. */
42struct device fallback_dev = { 44struct device fallback_dev = {
43 .bus_id = "fallback device", 45 .bus_id = "fallback device",
44 .coherent_dma_mask = DMA_32BIT_MASK, 46 .coherent_dma_mask = DMA_32BIT_MASK,
45 .dma_mask = &fallback_dev.coherent_dma_mask, 47 .dma_mask = &fallback_dev.coherent_dma_mask,
46}; 48};
47 49
50int dma_set_mask(struct device *dev, u64 mask)
51{
52 if (!dev->dma_mask || !dma_supported(dev, mask))
53 return -EIO;
54
55 *dev->dma_mask = mask;
56
57 return 0;
58}
59EXPORT_SYMBOL(dma_set_mask);
60
61#ifdef CONFIG_X86_64
62static __initdata void *dma32_bootmem_ptr;
63static unsigned long dma32_bootmem_size __initdata = (128ULL<<20);
64
65static int __init parse_dma32_size_opt(char *p)
66{
67 if (!p)
68 return -EINVAL;
69 dma32_bootmem_size = memparse(p, &p);
70 return 0;
71}
72early_param("dma32_size", parse_dma32_size_opt);
73
74void __init dma32_reserve_bootmem(void)
75{
76 unsigned long size, align;
77 if (end_pfn <= MAX_DMA32_PFN)
78 return;
79
80 align = 64ULL<<20;
81 size = round_up(dma32_bootmem_size, align);
82 dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
83 __pa(MAX_DMA_ADDRESS));
84 if (dma32_bootmem_ptr)
85 dma32_bootmem_size = size;
86 else
87 dma32_bootmem_size = 0;
88}
89static void __init dma32_free_bootmem(void)
90{
91 int node;
92
93 if (end_pfn <= MAX_DMA32_PFN)
94 return;
95
96 if (!dma32_bootmem_ptr)
97 return;
98
99 for_each_online_node(node)
100 free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr),
101 dma32_bootmem_size);
102
103 dma32_bootmem_ptr = NULL;
104 dma32_bootmem_size = 0;
105}
106
107void __init pci_iommu_alloc(void)
108{
109 /* free the range so iommu could get some range less than 4G */
110 dma32_free_bootmem();
111 /*
112 * The order of these functions is important for
113 * fall-back/fail-over reasons
114 */
115#ifdef CONFIG_GART_IOMMU
116 gart_iommu_hole_init();
117#endif
118
119#ifdef CONFIG_CALGARY_IOMMU
120 detect_calgary();
121#endif
122
123 detect_intel_iommu();
124
125#ifdef CONFIG_SWIOTLB
126 pci_swiotlb_init();
127#endif
128}
129#endif
130
131/*
132 * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
133 * documentation.
134 */
135static __init int iommu_setup(char *p)
136{
137 iommu_merge = 1;
138
139 if (!p)
140 return -EINVAL;
141
142 while (*p) {
143 if (!strncmp(p, "off", 3))
144 no_iommu = 1;
145 /* gart_parse_options has more force support */
146 if (!strncmp(p, "force", 5))
147 force_iommu = 1;
148 if (!strncmp(p, "noforce", 7)) {
149 iommu_merge = 0;
150 force_iommu = 0;
151 }
152
153 if (!strncmp(p, "biomerge", 8)) {
154 iommu_bio_merge = 4096;
155 iommu_merge = 1;
156 force_iommu = 1;
157 }
158 if (!strncmp(p, "panic", 5))
159 panic_on_overflow = 1;
160 if (!strncmp(p, "nopanic", 7))
161 panic_on_overflow = 0;
162 if (!strncmp(p, "merge", 5)) {
163 iommu_merge = 1;
164 force_iommu = 1;
165 }
166 if (!strncmp(p, "nomerge", 7))
167 iommu_merge = 0;
168 if (!strncmp(p, "forcesac", 8))
169 iommu_sac_force = 1;
170 if (!strncmp(p, "allowdac", 8))
171 forbid_dac = 0;
172 if (!strncmp(p, "nodac", 5))
173 forbid_dac = -1;
174 if (!strncmp(p, "usedac", 6)) {
175 forbid_dac = -1;
176 return 1;
177 }
178#ifdef CONFIG_SWIOTLB
179 if (!strncmp(p, "soft", 4))
180 swiotlb = 1;
181#endif
182
183#ifdef CONFIG_GART_IOMMU
184 gart_parse_options(p);
185#endif
186
187#ifdef CONFIG_CALGARY_IOMMU
188 if (!strncmp(p, "calgary", 7))
189 use_calgary = 1;
190#endif /* CONFIG_CALGARY_IOMMU */
191
192 p += strcspn(p, ",");
193 if (*p == ',')
194 ++p;
195 }
196 return 0;
197}
198early_param("iommu", iommu_setup);
199
200#ifdef CONFIG_X86_32
201int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
202 dma_addr_t device_addr, size_t size, int flags)
203{
204 void __iomem *mem_base = NULL;
205 int pages = size >> PAGE_SHIFT;
206 int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
207
208 if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
209 goto out;
210 if (!size)
211 goto out;
212 if (dev->dma_mem)
213 goto out;
214
215 /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
216
217 mem_base = ioremap(bus_addr, size);
218 if (!mem_base)
219 goto out;
220
221 dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
222 if (!dev->dma_mem)
223 goto out;
224 dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
225 if (!dev->dma_mem->bitmap)
226 goto free1_out;
227
228 dev->dma_mem->virt_base = mem_base;
229 dev->dma_mem->device_base = device_addr;
230 dev->dma_mem->size = pages;
231 dev->dma_mem->flags = flags;
232
233 if (flags & DMA_MEMORY_MAP)
234 return DMA_MEMORY_MAP;
235
236 return DMA_MEMORY_IO;
237
238 free1_out:
239 kfree(dev->dma_mem);
240 out:
241 if (mem_base)
242 iounmap(mem_base);
243 return 0;
244}
245EXPORT_SYMBOL(dma_declare_coherent_memory);
246
247void dma_release_declared_memory(struct device *dev)
248{
249 struct dma_coherent_mem *mem = dev->dma_mem;
250
251 if (!mem)
252 return;
253 dev->dma_mem = NULL;
254 iounmap(mem->virt_base);
255 kfree(mem->bitmap);
256 kfree(mem);
257}
258EXPORT_SYMBOL(dma_release_declared_memory);
259
260void *dma_mark_declared_memory_occupied(struct device *dev,
261 dma_addr_t device_addr, size_t size)
262{
263 struct dma_coherent_mem *mem = dev->dma_mem;
264 int pos, err;
265 int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1);
266
267 pages >>= PAGE_SHIFT;
268
269 if (!mem)
270 return ERR_PTR(-EINVAL);
271
272 pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
273 err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
274 if (err != 0)
275 return ERR_PTR(err);
276 return mem->virt_base + (pos << PAGE_SHIFT);
277}
278EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
279
280static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size,
281 dma_addr_t *dma_handle, void **ret)
282{
283 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
284 int order = get_order(size);
285
286 if (mem) {
287 int page = bitmap_find_free_region(mem->bitmap, mem->size,
288 order);
289 if (page >= 0) {
290 *dma_handle = mem->device_base + (page << PAGE_SHIFT);
291 *ret = mem->virt_base + (page << PAGE_SHIFT);
292 memset(*ret, 0, size);
293 }
294 if (mem->flags & DMA_MEMORY_EXCLUSIVE)
295 *ret = NULL;
296 }
297 return (mem != NULL);
298}
299
300static int dma_release_coherent(struct device *dev, int order, void *vaddr)
301{
302 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
303
304 if (mem && vaddr >= mem->virt_base && vaddr <
305 (mem->virt_base + (mem->size << PAGE_SHIFT))) {
306 int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
307
308 bitmap_release_region(mem->bitmap, page, order);
309 return 1;
310 }
311 return 0;
312}
313#else
314#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0)
315#define dma_release_coherent(dev, order, vaddr) (0)
316#endif /* CONFIG_X86_32 */
317
318int dma_supported(struct device *dev, u64 mask)
319{
320#ifdef CONFIG_PCI
321 if (mask > 0xffffffff && forbid_dac > 0) {
322 printk(KERN_INFO "PCI: Disallowing DAC for device %s\n",
323 dev->bus_id);
324 return 0;
325 }
326#endif
327
328 if (dma_ops->dma_supported)
329 return dma_ops->dma_supported(dev, mask);
330
331 /* Copied from i386. Doesn't make much sense, because it will
332 only work for pci_alloc_coherent.
333 The caller just has to use GFP_DMA in this case. */
334 if (mask < DMA_24BIT_MASK)
335 return 0;
336
337 /* Tell the device to use SAC when IOMMU force is on. This
338 allows the driver to use cheaper accesses in some cases.
339
340 Problem with this is that if we overflow the IOMMU area and
341 return DAC as fallback address the device may not handle it
342 correctly.
343
344 As a special case some controllers have a 39bit address
345 mode that is as efficient as 32bit (aic79xx). Don't force
346 SAC for these. Assume all masks <= 40 bits are of this
347 type. Normally this doesn't make any difference, but gives
348 more gentle handling of IOMMU overflow. */
349 if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
350 printk(KERN_INFO "%s: Force SAC with mask %Lx\n",
351 dev->bus_id, mask);
352 return 0;
353 }
354
355 return 1;
356}
357EXPORT_SYMBOL(dma_supported);
358
48/* Allocate DMA memory on node near device */ 359/* Allocate DMA memory on node near device */
49noinline static void * 360noinline struct page *
50dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) 361dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
51{ 362{
52 struct page *page;
53 int node; 363 int node;
54 364
55 node = dev_to_node(dev); 365 node = dev_to_node(dev);
56 366
57 page = alloc_pages_node(node, gfp, order); 367 return alloc_pages_node(node, gfp, order);
58 return page ? page_address(page) : NULL;
59} 368}
60 369
61/* 370/*
@@ -65,9 +374,16 @@ void *
65dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 374dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
66 gfp_t gfp) 375 gfp_t gfp)
67{ 376{
68 void *memory; 377 void *memory = NULL;
378 struct page *page;
69 unsigned long dma_mask = 0; 379 unsigned long dma_mask = 0;
70 u64 bus; 380 dma_addr_t bus;
381
382 /* ignore region specifiers */
383 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
384
385 if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory))
386 return memory;
71 387
72 if (!dev) 388 if (!dev)
73 dev = &fallback_dev; 389 dev = &fallback_dev;
@@ -82,26 +398,25 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
82 /* Don't invoke OOM killer */ 398 /* Don't invoke OOM killer */
83 gfp |= __GFP_NORETRY; 399 gfp |= __GFP_NORETRY;
84 400
85 /* Kludge to make it bug-to-bug compatible with i386. i386 401#ifdef CONFIG_X86_64
86 uses the normal dma_mask for alloc_coherent. */
87 dma_mask &= *dev->dma_mask;
88
89 /* Why <=? Even when the mask is smaller than 4GB it is often 402 /* Why <=? Even when the mask is smaller than 4GB it is often
90 larger than 16MB and in this case we have a chance of 403 larger than 16MB and in this case we have a chance of
91 finding fitting memory in the next higher zone first. If 404 finding fitting memory in the next higher zone first. If
92 not retry with true GFP_DMA. -AK */ 405 not retry with true GFP_DMA. -AK */
93 if (dma_mask <= DMA_32BIT_MASK) 406 if (dma_mask <= DMA_32BIT_MASK)
94 gfp |= GFP_DMA32; 407 gfp |= GFP_DMA32;
408#endif
95 409
96 again: 410 again:
97 memory = dma_alloc_pages(dev, gfp, get_order(size)); 411 page = dma_alloc_pages(dev, gfp, get_order(size));
98 if (memory == NULL) 412 if (page == NULL)
99 return NULL; 413 return NULL;
100 414
101 { 415 {
102 int high, mmu; 416 int high, mmu;
103 bus = virt_to_bus(memory); 417 bus = page_to_phys(page);
104 high = (bus + size) >= dma_mask; 418 memory = page_address(page);
419 high = (bus + size) >= dma_mask;
105 mmu = high; 420 mmu = high;
106 if (force_iommu && !(gfp & GFP_DMA)) 421 if (force_iommu && !(gfp & GFP_DMA))
107 mmu = 1; 422 mmu = 1;
@@ -127,7 +442,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
127 442
128 memset(memory, 0, size); 443 memset(memory, 0, size);
129 if (!mmu) { 444 if (!mmu) {
130 *dma_handle = virt_to_bus(memory); 445 *dma_handle = bus;
131 return memory; 446 return memory;
132 } 447 }
133 } 448 }
@@ -139,7 +454,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
139 } 454 }
140 455
141 if (dma_ops->map_simple) { 456 if (dma_ops->map_simple) {
142 *dma_handle = dma_ops->map_simple(dev, memory, 457 *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory),
143 size, 458 size,
144 PCI_DMA_BIDIRECTIONAL); 459 PCI_DMA_BIDIRECTIONAL);
145 if (*dma_handle != bad_dma_address) 460 if (*dma_handle != bad_dma_address)
@@ -147,7 +462,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
147 } 462 }
148 463
149 if (panic_on_overflow) 464 if (panic_on_overflow)
150 panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",size); 465 panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",
466 (unsigned long)size);
151 free_pages((unsigned long)memory, get_order(size)); 467 free_pages((unsigned long)memory, get_order(size));
152 return NULL; 468 return NULL;
153} 469}
@@ -160,153 +476,16 @@ EXPORT_SYMBOL(dma_alloc_coherent);
160void dma_free_coherent(struct device *dev, size_t size, 476void dma_free_coherent(struct device *dev, size_t size,
161 void *vaddr, dma_addr_t bus) 477 void *vaddr, dma_addr_t bus)
162{ 478{
479 int order = get_order(size);
163 WARN_ON(irqs_disabled()); /* for portability */ 480 WARN_ON(irqs_disabled()); /* for portability */
481 if (dma_release_coherent(dev, order, vaddr))
482 return;
164 if (dma_ops->unmap_single) 483 if (dma_ops->unmap_single)
165 dma_ops->unmap_single(dev, bus, size, 0); 484 dma_ops->unmap_single(dev, bus, size, 0);
166 free_pages((unsigned long)vaddr, get_order(size)); 485 free_pages((unsigned long)vaddr, order);
167} 486}
168EXPORT_SYMBOL(dma_free_coherent); 487EXPORT_SYMBOL(dma_free_coherent);
169 488
170static int forbid_dac __read_mostly;
171
172int dma_supported(struct device *dev, u64 mask)
173{
174#ifdef CONFIG_PCI
175 if (mask > 0xffffffff && forbid_dac > 0) {
176
177
178
179 printk(KERN_INFO "PCI: Disallowing DAC for device %s\n", dev->bus_id);
180 return 0;
181 }
182#endif
183
184 if (dma_ops->dma_supported)
185 return dma_ops->dma_supported(dev, mask);
186
187 /* Copied from i386. Doesn't make much sense, because it will
188 only work for pci_alloc_coherent.
189 The caller just has to use GFP_DMA in this case. */
190 if (mask < DMA_24BIT_MASK)
191 return 0;
192
193 /* Tell the device to use SAC when IOMMU force is on. This
194 allows the driver to use cheaper accesses in some cases.
195
196 Problem with this is that if we overflow the IOMMU area and
197 return DAC as fallback address the device may not handle it
198 correctly.
199
200 As a special case some controllers have a 39bit address
201 mode that is as efficient as 32bit (aic79xx). Don't force
202 SAC for these. Assume all masks <= 40 bits are of this
203 type. Normally this doesn't make any difference, but gives
204 more gentle handling of IOMMU overflow. */
205 if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
206 printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask);
207 return 0;
208 }
209
210 return 1;
211}
212EXPORT_SYMBOL(dma_supported);
213
214int dma_set_mask(struct device *dev, u64 mask)
215{
216 if (!dev->dma_mask || !dma_supported(dev, mask))
217 return -EIO;
218 *dev->dma_mask = mask;
219 return 0;
220}
221EXPORT_SYMBOL(dma_set_mask);
222
223/*
224 * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
225 * documentation.
226 */
227static __init int iommu_setup(char *p)
228{
229 iommu_merge = 1;
230
231 if (!p)
232 return -EINVAL;
233
234 while (*p) {
235 if (!strncmp(p, "off", 3))
236 no_iommu = 1;
237 /* gart_parse_options has more force support */
238 if (!strncmp(p, "force", 5))
239 force_iommu = 1;
240 if (!strncmp(p, "noforce", 7)) {
241 iommu_merge = 0;
242 force_iommu = 0;
243 }
244
245 if (!strncmp(p, "biomerge", 8)) {
246 iommu_bio_merge = 4096;
247 iommu_merge = 1;
248 force_iommu = 1;
249 }
250 if (!strncmp(p, "panic", 5))
251 panic_on_overflow = 1;
252 if (!strncmp(p, "nopanic", 7))
253 panic_on_overflow = 0;
254 if (!strncmp(p, "merge", 5)) {
255 iommu_merge = 1;
256 force_iommu = 1;
257 }
258 if (!strncmp(p, "nomerge", 7))
259 iommu_merge = 0;
260 if (!strncmp(p, "forcesac", 8))
261 iommu_sac_force = 1;
262 if (!strncmp(p, "allowdac", 8))
263 forbid_dac = 0;
264 if (!strncmp(p, "nodac", 5))
265 forbid_dac = -1;
266
267#ifdef CONFIG_SWIOTLB
268 if (!strncmp(p, "soft", 4))
269 swiotlb = 1;
270#endif
271
272#ifdef CONFIG_GART_IOMMU
273 gart_parse_options(p);
274#endif
275
276#ifdef CONFIG_CALGARY_IOMMU
277 if (!strncmp(p, "calgary", 7))
278 use_calgary = 1;
279#endif /* CONFIG_CALGARY_IOMMU */
280
281 p += strcspn(p, ",");
282 if (*p == ',')
283 ++p;
284 }
285 return 0;
286}
287early_param("iommu", iommu_setup);
288
289void __init pci_iommu_alloc(void)
290{
291 /*
292 * The order of these functions is important for
293 * fall-back/fail-over reasons
294 */
295#ifdef CONFIG_GART_IOMMU
296 gart_iommu_hole_init();
297#endif
298
299#ifdef CONFIG_CALGARY_IOMMU
300 detect_calgary();
301#endif
302
303 detect_intel_iommu();
304
305#ifdef CONFIG_SWIOTLB
306 pci_swiotlb_init();
307#endif
308}
309
310static int __init pci_iommu_init(void) 489static int __init pci_iommu_init(void)
311{ 490{
312#ifdef CONFIG_CALGARY_IOMMU 491#ifdef CONFIG_CALGARY_IOMMU
@@ -327,6 +506,8 @@ void pci_iommu_shutdown(void)
327{ 506{
328 gart_iommu_shutdown(); 507 gart_iommu_shutdown();
329} 508}
509/* Must execute after PCI subsystem */
510fs_initcall(pci_iommu_init);
330 511
331#ifdef CONFIG_PCI 512#ifdef CONFIG_PCI
332/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ 513/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
@@ -334,11 +515,10 @@ void pci_iommu_shutdown(void)
334static __devinit void via_no_dac(struct pci_dev *dev) 515static __devinit void via_no_dac(struct pci_dev *dev)
335{ 516{
336 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { 517 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
337 printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n"); 518 printk(KERN_INFO "PCI: VIA PCI bridge detected."
519 "Disabling DAC.\n");
338 forbid_dac = 1; 520 forbid_dac = 1;
339 } 521 }
340} 522}
341DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); 523DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
342#endif 524#endif
343/* Must execute after PCI subsystem */
344fs_initcall(pci_iommu_init);
diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c
deleted file mode 100644
index 51330321a5d3..000000000000
--- a/arch/x86/kernel/pci-dma_32.c
+++ /dev/null
@@ -1,177 +0,0 @@
1/*
2 * Dynamic DMA mapping support.
3 *
4 * On i386 there is no hardware dynamic DMA address translation,
5 * so consistent alloc/free are merely page allocation/freeing.
6 * The rest of the dynamic DMA mapping interface is implemented
7 * in asm/pci.h.
8 */
9
10#include <linux/types.h>
11#include <linux/mm.h>
12#include <linux/string.h>
13#include <linux/pci.h>
14#include <linux/module.h>
15#include <asm/io.h>
16
17struct dma_coherent_mem {
18 void *virt_base;
19 u32 device_base;
20 int size;
21 int flags;
22 unsigned long *bitmap;
23};
24
25void *dma_alloc_coherent(struct device *dev, size_t size,
26 dma_addr_t *dma_handle, gfp_t gfp)
27{
28 void *ret;
29 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
30 int order = get_order(size);
31 /* ignore region specifiers */
32 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
33
34 if (mem) {
35 int page = bitmap_find_free_region(mem->bitmap, mem->size,
36 order);
37 if (page >= 0) {
38 *dma_handle = mem->device_base + (page << PAGE_SHIFT);
39 ret = mem->virt_base + (page << PAGE_SHIFT);
40 memset(ret, 0, size);
41 return ret;
42 }
43 if (mem->flags & DMA_MEMORY_EXCLUSIVE)
44 return NULL;
45 }
46
47 if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
48 gfp |= GFP_DMA;
49
50 ret = (void *)__get_free_pages(gfp, order);
51
52 if (ret != NULL) {
53 memset(ret, 0, size);
54 *dma_handle = virt_to_phys(ret);
55 }
56 return ret;
57}
58EXPORT_SYMBOL(dma_alloc_coherent);
59
60void dma_free_coherent(struct device *dev, size_t size,
61 void *vaddr, dma_addr_t dma_handle)
62{
63 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
64 int order = get_order(size);
65
66 WARN_ON(irqs_disabled()); /* for portability */
67 if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) {
68 int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
69
70 bitmap_release_region(mem->bitmap, page, order);
71 } else
72 free_pages((unsigned long)vaddr, order);
73}
74EXPORT_SYMBOL(dma_free_coherent);
75
76int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
77 dma_addr_t device_addr, size_t size, int flags)
78{
79 void __iomem *mem_base = NULL;
80 int pages = size >> PAGE_SHIFT;
81 int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
82
83 if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
84 goto out;
85 if (!size)
86 goto out;
87 if (dev->dma_mem)
88 goto out;
89
90 /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
91
92 mem_base = ioremap(bus_addr, size);
93 if (!mem_base)
94 goto out;
95
96 dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
97 if (!dev->dma_mem)
98 goto out;
99 dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
100 if (!dev->dma_mem->bitmap)
101 goto free1_out;
102
103 dev->dma_mem->virt_base = mem_base;
104 dev->dma_mem->device_base = device_addr;
105 dev->dma_mem->size = pages;
106 dev->dma_mem->flags = flags;
107
108 if (flags & DMA_MEMORY_MAP)
109 return DMA_MEMORY_MAP;
110
111 return DMA_MEMORY_IO;
112
113 free1_out:
114 kfree(dev->dma_mem);
115 out:
116 if (mem_base)
117 iounmap(mem_base);
118 return 0;
119}
120EXPORT_SYMBOL(dma_declare_coherent_memory);
121
122void dma_release_declared_memory(struct device *dev)
123{
124 struct dma_coherent_mem *mem = dev->dma_mem;
125
126 if(!mem)
127 return;
128 dev->dma_mem = NULL;
129 iounmap(mem->virt_base);
130 kfree(mem->bitmap);
131 kfree(mem);
132}
133EXPORT_SYMBOL(dma_release_declared_memory);
134
135void *dma_mark_declared_memory_occupied(struct device *dev,
136 dma_addr_t device_addr, size_t size)
137{
138 struct dma_coherent_mem *mem = dev->dma_mem;
139 int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
140 int pos, err;
141
142 if (!mem)
143 return ERR_PTR(-EINVAL);
144
145 pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
146 err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
147 if (err != 0)
148 return ERR_PTR(err);
149 return mem->virt_base + (pos << PAGE_SHIFT);
150}
151EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
152
153#ifdef CONFIG_PCI
154/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
155
156int forbid_dac;
157EXPORT_SYMBOL(forbid_dac);
158
159static __devinit void via_no_dac(struct pci_dev *dev)
160{
161 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
162 printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n");
163 forbid_dac = 1;
164 }
165}
166DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
167
168static int check_iommu(char *s)
169{
170 if (!strcmp(s, "usedac")) {
171 forbid_dac = -1;
172 return 1;
173 }
174 return 0;
175}
176__setup("iommu=", check_iommu);
177#endif
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 700e4647dd30..c07455d1695f 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -264,9 +264,9 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
264} 264}
265 265
266static dma_addr_t 266static dma_addr_t
267gart_map_simple(struct device *dev, char *buf, size_t size, int dir) 267gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir)
268{ 268{
269 dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); 269 dma_addr_t map = dma_map_area(dev, paddr, size, dir);
270 270
271 flush_gart(); 271 flush_gart();
272 272
@@ -275,18 +275,17 @@ gart_map_simple(struct device *dev, char *buf, size_t size, int dir)
275 275
276/* Map a single area into the IOMMU */ 276/* Map a single area into the IOMMU */
277static dma_addr_t 277static dma_addr_t
278gart_map_single(struct device *dev, void *addr, size_t size, int dir) 278gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir)
279{ 279{
280 unsigned long phys_mem, bus; 280 unsigned long bus;
281 281
282 if (!dev) 282 if (!dev)
283 dev = &fallback_dev; 283 dev = &fallback_dev;
284 284
285 phys_mem = virt_to_phys(addr); 285 if (!need_iommu(dev, paddr, size))
286 if (!need_iommu(dev, phys_mem, size)) 286 return paddr;
287 return phys_mem;
288 287
289 bus = gart_map_simple(dev, addr, size, dir); 288 bus = gart_map_simple(dev, paddr, size, dir);
290 289
291 return bus; 290 return bus;
292} 291}
diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu.c
index ab08e1832228..aec43d56f49c 100644
--- a/arch/x86/kernel/pci-nommu_64.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -14,7 +14,7 @@
14static int 14static int
15check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) 15check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
16{ 16{
17 if (hwdev && bus + size > *hwdev->dma_mask) { 17 if (hwdev && bus + size > *hwdev->dma_mask) {
18 if (*hwdev->dma_mask >= DMA_32BIT_MASK) 18 if (*hwdev->dma_mask >= DMA_32BIT_MASK)
19 printk(KERN_ERR 19 printk(KERN_ERR
20 "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", 20 "nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
@@ -26,19 +26,17 @@ check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
26} 26}
27 27
28static dma_addr_t 28static dma_addr_t
29nommu_map_single(struct device *hwdev, void *ptr, size_t size, 29nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size,
30 int direction) 30 int direction)
31{ 31{
32 dma_addr_t bus = virt_to_bus(ptr); 32 dma_addr_t bus = paddr;
33 WARN_ON(size == 0);
33 if (!check_addr("map_single", hwdev, bus, size)) 34 if (!check_addr("map_single", hwdev, bus, size))
34 return bad_dma_address; 35 return bad_dma_address;
36 flush_write_buffers();
35 return bus; 37 return bus;
36} 38}
37 39
38static void nommu_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
39 int direction)
40{
41}
42 40
43/* Map a set of buffers described by scatterlist in streaming 41/* Map a set of buffers described by scatterlist in streaming
44 * mode for DMA. This is the scatter-gather version of the 42 * mode for DMA. This is the scatter-gather version of the
@@ -61,30 +59,34 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
61 struct scatterlist *s; 59 struct scatterlist *s;
62 int i; 60 int i;
63 61
62 WARN_ON(nents == 0 || sg[0].length == 0);
63
64 for_each_sg(sg, s, nents, i) { 64 for_each_sg(sg, s, nents, i) {
65 BUG_ON(!sg_page(s)); 65 BUG_ON(!sg_page(s));
66 s->dma_address = virt_to_bus(sg_virt(s)); 66 s->dma_address = sg_phys(s);
67 if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) 67 if (!check_addr("map_sg", hwdev, s->dma_address, s->length))
68 return 0; 68 return 0;
69 s->dma_length = s->length; 69 s->dma_length = s->length;
70 } 70 }
71 flush_write_buffers();
71 return nents; 72 return nents;
72} 73}
73 74
74/* Unmap a set of streaming mode DMA translations. 75/* Make sure we keep the same behaviour */
75 * Again, cpu read rules concerning calls here are the same as for 76static int nommu_mapping_error(dma_addr_t dma_addr)
76 * pci_unmap_single() above.
77 */
78static void nommu_unmap_sg(struct device *dev, struct scatterlist *sg,
79 int nents, int dir)
80{ 77{
78#ifdef CONFIG_X86_32
79 return 0;
80#else
81 return (dma_addr == bad_dma_address);
82#endif
81} 83}
82 84
85
83const struct dma_mapping_ops nommu_dma_ops = { 86const struct dma_mapping_ops nommu_dma_ops = {
84 .map_single = nommu_map_single, 87 .map_single = nommu_map_single,
85 .unmap_single = nommu_unmap_single,
86 .map_sg = nommu_map_sg, 88 .map_sg = nommu_map_sg,
87 .unmap_sg = nommu_unmap_sg, 89 .mapping_error = nommu_mapping_error,
88 .is_phys = 1, 90 .is_phys = 1,
89}; 91};
90 92
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 82a0a674a003..490da7f4b8d0 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -11,11 +11,18 @@
11 11
12int swiotlb __read_mostly; 12int swiotlb __read_mostly;
13 13
14static dma_addr_t
15swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
16 int direction)
17{
18 return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction);
19}
20
14const struct dma_mapping_ops swiotlb_dma_ops = { 21const struct dma_mapping_ops swiotlb_dma_ops = {
15 .mapping_error = swiotlb_dma_mapping_error, 22 .mapping_error = swiotlb_dma_mapping_error,
16 .alloc_coherent = swiotlb_alloc_coherent, 23 .alloc_coherent = swiotlb_alloc_coherent,
17 .free_coherent = swiotlb_free_coherent, 24 .free_coherent = swiotlb_free_coherent,
18 .map_single = swiotlb_map_single, 25 .map_single = swiotlb_map_single_phys,
19 .unmap_single = swiotlb_unmap_single, 26 .unmap_single = swiotlb_unmap_single,
20 .sync_single_for_cpu = swiotlb_sync_single_for_cpu, 27 .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
21 .sync_single_for_device = swiotlb_sync_single_for_device, 28 .sync_single_for_device = swiotlb_sync_single_for_device,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
new file mode 100644
index 000000000000..3004d716539d
--- /dev/null
+++ b/arch/x86/kernel/process.c
@@ -0,0 +1,44 @@
1#include <linux/errno.h>
2#include <linux/kernel.h>
3#include <linux/mm.h>
4#include <linux/smp.h>
5#include <linux/slab.h>
6#include <linux/sched.h>
7
8struct kmem_cache *task_xstate_cachep;
9
10int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
11{
12 *dst = *src;
13 if (src->thread.xstate) {
14 dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
15 GFP_KERNEL);
16 if (!dst->thread.xstate)
17 return -ENOMEM;
18 WARN_ON((unsigned long)dst->thread.xstate & 15);
19 memcpy(dst->thread.xstate, src->thread.xstate, xstate_size);
20 }
21 return 0;
22}
23
24void free_thread_xstate(struct task_struct *tsk)
25{
26 if (tsk->thread.xstate) {
27 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
28 tsk->thread.xstate = NULL;
29 }
30}
31
32void free_thread_info(struct thread_info *ti)
33{
34 free_thread_xstate(ti->task);
35 free_pages((unsigned long)ti, get_order(THREAD_SIZE));
36}
37
38void arch_task_cache_init(void)
39{
40 task_xstate_cachep =
41 kmem_cache_create("task_xstate", xstate_size,
42 __alignof__(union thread_xstate),
43 SLAB_PANIC, NULL);
44}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 3903a8f2eb97..7adad088e373 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -36,6 +36,7 @@
36#include <linux/personality.h> 36#include <linux/personality.h>
37#include <linux/tick.h> 37#include <linux/tick.h>
38#include <linux/percpu.h> 38#include <linux/percpu.h>
39#include <linux/prctl.h>
39 40
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41#include <asm/pgtable.h> 42#include <asm/pgtable.h>
@@ -45,7 +46,6 @@
45#include <asm/processor.h> 46#include <asm/processor.h>
46#include <asm/i387.h> 47#include <asm/i387.h>
47#include <asm/desc.h> 48#include <asm/desc.h>
48#include <asm/vm86.h>
49#ifdef CONFIG_MATH_EMULATION 49#ifdef CONFIG_MATH_EMULATION
50#include <asm/math_emu.h> 50#include <asm/math_emu.h>
51#endif 51#endif
@@ -521,14 +521,18 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
521 regs->cs = __USER_CS; 521 regs->cs = __USER_CS;
522 regs->ip = new_ip; 522 regs->ip = new_ip;
523 regs->sp = new_sp; 523 regs->sp = new_sp;
524 /*
525 * Free the old FP and other extended state
526 */
527 free_thread_xstate(current);
524} 528}
525EXPORT_SYMBOL_GPL(start_thread); 529EXPORT_SYMBOL_GPL(start_thread);
526 530
527#ifdef CONFIG_SECCOMP
528static void hard_disable_TSC(void) 531static void hard_disable_TSC(void)
529{ 532{
530 write_cr4(read_cr4() | X86_CR4_TSD); 533 write_cr4(read_cr4() | X86_CR4_TSD);
531} 534}
535
532void disable_TSC(void) 536void disable_TSC(void)
533{ 537{
534 preempt_disable(); 538 preempt_disable();
@@ -540,11 +544,47 @@ void disable_TSC(void)
540 hard_disable_TSC(); 544 hard_disable_TSC();
541 preempt_enable(); 545 preempt_enable();
542} 546}
547
543static void hard_enable_TSC(void) 548static void hard_enable_TSC(void)
544{ 549{
545 write_cr4(read_cr4() & ~X86_CR4_TSD); 550 write_cr4(read_cr4() & ~X86_CR4_TSD);
546} 551}
547#endif /* CONFIG_SECCOMP */ 552
553void enable_TSC(void)
554{
555 preempt_disable();
556 if (test_and_clear_thread_flag(TIF_NOTSC))
557 /*
558 * Must flip the CPU state synchronously with
559 * TIF_NOTSC in the current running context.
560 */
561 hard_enable_TSC();
562 preempt_enable();
563}
564
565int get_tsc_mode(unsigned long adr)
566{
567 unsigned int val;
568
569 if (test_thread_flag(TIF_NOTSC))
570 val = PR_TSC_SIGSEGV;
571 else
572 val = PR_TSC_ENABLE;
573
574 return put_user(val, (unsigned int __user *)adr);
575}
576
577int set_tsc_mode(unsigned int val)
578{
579 if (val == PR_TSC_SIGSEGV)
580 disable_TSC();
581 else if (val == PR_TSC_ENABLE)
582 enable_TSC();
583 else
584 return -EINVAL;
585
586 return 0;
587}
548 588
549static noinline void 589static noinline void
550__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 590__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
@@ -578,7 +618,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
578 set_debugreg(next->debugreg7, 7); 618 set_debugreg(next->debugreg7, 7);
579 } 619 }
580 620
581#ifdef CONFIG_SECCOMP
582 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ 621 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
583 test_tsk_thread_flag(next_p, TIF_NOTSC)) { 622 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
584 /* prev and next are different */ 623 /* prev and next are different */
@@ -587,7 +626,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
587 else 626 else
588 hard_enable_TSC(); 627 hard_enable_TSC();
589 } 628 }
590#endif
591 629
592#ifdef X86_BTS 630#ifdef X86_BTS
593 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) 631 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
@@ -669,7 +707,7 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct
669 707
670 /* we're going to use this soon, after a few expensive things */ 708 /* we're going to use this soon, after a few expensive things */
671 if (next_p->fpu_counter > 5) 709 if (next_p->fpu_counter > 5)
672 prefetch(&next->i387.fxsave); 710 prefetch(next->xstate);
673 711
674 /* 712 /*
675 * Reload esp0. 713 * Reload esp0.
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e75ccc8a2b87..891af1a1b48a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -36,6 +36,7 @@
36#include <linux/kprobes.h> 36#include <linux/kprobes.h>
37#include <linux/kdebug.h> 37#include <linux/kdebug.h>
38#include <linux/tick.h> 38#include <linux/tick.h>
39#include <linux/prctl.h>
39 40
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41#include <asm/pgtable.h> 42#include <asm/pgtable.h>
@@ -532,9 +533,71 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
532 regs->ss = __USER_DS; 533 regs->ss = __USER_DS;
533 regs->flags = 0x200; 534 regs->flags = 0x200;
534 set_fs(USER_DS); 535 set_fs(USER_DS);
536 /*
537 * Free the old FP and other extended state
538 */
539 free_thread_xstate(current);
535} 540}
536EXPORT_SYMBOL_GPL(start_thread); 541EXPORT_SYMBOL_GPL(start_thread);
537 542
543static void hard_disable_TSC(void)
544{
545 write_cr4(read_cr4() | X86_CR4_TSD);
546}
547
548void disable_TSC(void)
549{
550 preempt_disable();
551 if (!test_and_set_thread_flag(TIF_NOTSC))
552 /*
553 * Must flip the CPU state synchronously with
554 * TIF_NOTSC in the current running context.
555 */
556 hard_disable_TSC();
557 preempt_enable();
558}
559
560static void hard_enable_TSC(void)
561{
562 write_cr4(read_cr4() & ~X86_CR4_TSD);
563}
564
565void enable_TSC(void)
566{
567 preempt_disable();
568 if (test_and_clear_thread_flag(TIF_NOTSC))
569 /*
570 * Must flip the CPU state synchronously with
571 * TIF_NOTSC in the current running context.
572 */
573 hard_enable_TSC();
574 preempt_enable();
575}
576
577int get_tsc_mode(unsigned long adr)
578{
579 unsigned int val;
580
581 if (test_thread_flag(TIF_NOTSC))
582 val = PR_TSC_SIGSEGV;
583 else
584 val = PR_TSC_ENABLE;
585
586 return put_user(val, (unsigned int __user *)adr);
587}
588
589int set_tsc_mode(unsigned int val)
590{
591 if (val == PR_TSC_SIGSEGV)
592 disable_TSC();
593 else if (val == PR_TSC_ENABLE)
594 enable_TSC();
595 else
596 return -EINVAL;
597
598 return 0;
599}
600
538/* 601/*
539 * This special macro can be used to load a debugging register 602 * This special macro can be used to load a debugging register
540 */ 603 */
@@ -572,6 +635,15 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
572 loaddebug(next, 7); 635 loaddebug(next, 7);
573 } 636 }
574 637
638 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
639 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
640 /* prev and next are different */
641 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
642 hard_disable_TSC();
643 else
644 hard_enable_TSC();
645 }
646
575 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 647 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
576 /* 648 /*
577 * Copy the relevant range of the IO bitmap. 649 * Copy the relevant range of the IO bitmap.
@@ -614,7 +686,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
614 686
615 /* we're going to use this soon, after a few expensive things */ 687 /* we're going to use this soon, after a few expensive things */
616 if (next_p->fpu_counter>5) 688 if (next_p->fpu_counter>5)
617 prefetch(&next->i387.fxsave); 689 prefetch(next->xstate);
618 690
619 /* 691 /*
620 * Reload esp0, LDT and the page table pointer: 692 * Reload esp0, LDT and the page table pointer:
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 9692202d3bfb..19c9386ac118 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -420,7 +420,7 @@ static void native_machine_shutdown(void)
420 reboot_cpu_id = smp_processor_id(); 420 reboot_cpu_id = smp_processor_id();
421 421
422 /* Make certain I only run on the appropriate processor */ 422 /* Make certain I only run on the appropriate processor */
423 set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id)); 423 set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id));
424 424
425 /* O.K Now that I'm on the appropriate processor, 425 /* O.K Now that I'm on the appropriate processor,
426 * stop all of the others. 426 * stop all of the others.
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ed157c90412e..0d1f44ae6eea 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -54,6 +54,24 @@ static void __init setup_per_cpu_maps(void)
54#endif 54#endif
55} 55}
56 56
57#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
58cpumask_t *cpumask_of_cpu_map __read_mostly;
59EXPORT_SYMBOL(cpumask_of_cpu_map);
60
61/* requires nr_cpu_ids to be initialized */
62static void __init setup_cpumask_of_cpu(void)
63{
64 int i;
65
66 /* alloc_bootmem zeroes memory */
67 cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
68 for (i = 0; i < nr_cpu_ids; i++)
69 cpu_set(i, cpumask_of_cpu_map[i]);
70}
71#else
72static inline void setup_cpumask_of_cpu(void) { }
73#endif
74
57#ifdef CONFIG_X86_32 75#ifdef CONFIG_X86_32
58/* 76/*
59 * Great future not-so-futuristic plan: make i386 and x86_64 do it 77 * Great future not-so-futuristic plan: make i386 and x86_64 do it
@@ -70,7 +88,7 @@ EXPORT_SYMBOL(__per_cpu_offset);
70 */ 88 */
71void __init setup_per_cpu_areas(void) 89void __init setup_per_cpu_areas(void)
72{ 90{
73 int i; 91 int i, highest_cpu = 0;
74 unsigned long size; 92 unsigned long size;
75 93
76#ifdef CONFIG_HOTPLUG_CPU 94#ifdef CONFIG_HOTPLUG_CPU
@@ -104,10 +122,18 @@ void __init setup_per_cpu_areas(void)
104 __per_cpu_offset[i] = ptr - __per_cpu_start; 122 __per_cpu_offset[i] = ptr - __per_cpu_start;
105#endif 123#endif
106 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 124 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
125
126 highest_cpu = i;
107 } 127 }
108 128
129 nr_cpu_ids = highest_cpu + 1;
130 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids);
131
109 /* Setup percpu data maps */ 132 /* Setup percpu data maps */
110 setup_per_cpu_maps(); 133 setup_per_cpu_maps();
134
135 /* Setup cpumask_of_cpu map */
136 setup_cpumask_of_cpu();
111} 137}
112 138
113#endif 139#endif
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index 9042fb0e36f5..aee0e8200777 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -74,8 +74,8 @@ int force_personality32 = 0;
74Control non executable heap for 32bit processes. 74Control non executable heap for 32bit processes.
75To control the stack too use noexec=off 75To control the stack too use noexec=off
76 76
77on PROT_READ does not imply PROT_EXEC for 32bit processes 77on PROT_READ does not imply PROT_EXEC for 32bit processes (default)
78off PROT_READ implies PROT_EXEC (default) 78off PROT_READ implies PROT_EXEC
79*/ 79*/
80static int __init nonx32_setup(char *str) 80static int __init nonx32_setup(char *str)
81{ 81{
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 5b0bffb7fcc9..78828b0f604f 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -39,6 +39,7 @@
39#include <linux/efi.h> 39#include <linux/efi.h>
40#include <linux/init.h> 40#include <linux/init.h>
41#include <linux/edd.h> 41#include <linux/edd.h>
42#include <linux/iscsi_ibft.h>
42#include <linux/nodemask.h> 43#include <linux/nodemask.h>
43#include <linux/kexec.h> 44#include <linux/kexec.h>
44#include <linux/crash_dump.h> 45#include <linux/crash_dump.h>
@@ -689,6 +690,8 @@ void __init setup_bootmem_allocator(void)
689#endif 690#endif
690 numa_kva_reserve(); 691 numa_kva_reserve();
691 reserve_crashkernel(); 692 reserve_crashkernel();
693
694 reserve_ibft_region();
692} 695}
693 696
694/* 697/*
@@ -812,10 +815,10 @@ void __init setup_arch(char **cmdline_p)
812 efi_init(); 815 efi_init();
813 816
814 /* update e820 for memory not covered by WB MTRRs */ 817 /* update e820 for memory not covered by WB MTRRs */
815 find_max_pfn(); 818 propagate_e820_map();
816 mtrr_bp_init(); 819 mtrr_bp_init();
817 if (mtrr_trim_uncached_memory(max_pfn)) 820 if (mtrr_trim_uncached_memory(max_pfn))
818 find_max_pfn(); 821 propagate_e820_map();
819 822
820 max_low_pfn = setup_memory(); 823 max_low_pfn = setup_memory();
821 824
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 674ef3510cdf..c2ec3dcb6b99 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -33,6 +33,7 @@
33#include <linux/acpi.h> 33#include <linux/acpi.h>
34#include <linux/kallsyms.h> 34#include <linux/kallsyms.h>
35#include <linux/edd.h> 35#include <linux/edd.h>
36#include <linux/iscsi_ibft.h>
36#include <linux/mmzone.h> 37#include <linux/mmzone.h>
37#include <linux/kexec.h> 38#include <linux/kexec.h>
38#include <linux/cpufreq.h> 39#include <linux/cpufreq.h>
@@ -398,6 +399,8 @@ void __init setup_arch(char **cmdline_p)
398 399
399 early_res_to_bootmem(); 400 early_res_to_bootmem();
400 401
402 dma32_reserve_bootmem();
403
401#ifdef CONFIG_ACPI_SLEEP 404#ifdef CONFIG_ACPI_SLEEP
402 /* 405 /*
403 * Reserve low memory region for sleep support. 406 * Reserve low memory region for sleep support.
@@ -420,11 +423,14 @@ void __init setup_arch(char **cmdline_p)
420 unsigned long end_of_mem = end_pfn << PAGE_SHIFT; 423 unsigned long end_of_mem = end_pfn << PAGE_SHIFT;
421 424
422 if (ramdisk_end <= end_of_mem) { 425 if (ramdisk_end <= end_of_mem) {
423 reserve_bootmem_generic(ramdisk_image, ramdisk_size); 426 /*
427 * don't need to reserve again, already reserved early
428 * in x86_64_start_kernel, and early_res_to_bootmem
429 * convert that to reserved in bootmem
430 */
424 initrd_start = ramdisk_image + PAGE_OFFSET; 431 initrd_start = ramdisk_image + PAGE_OFFSET;
425 initrd_end = initrd_start+ramdisk_size; 432 initrd_end = initrd_start+ramdisk_size;
426 } else { 433 } else {
427 /* Assumes everything on node 0 */
428 free_bootmem(ramdisk_image, ramdisk_size); 434 free_bootmem(ramdisk_image, ramdisk_size);
429 printk(KERN_ERR "initrd extends beyond end of memory " 435 printk(KERN_ERR "initrd extends beyond end of memory "
430 "(0x%08lx > 0x%08lx)\ndisabling initrd\n", 436 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
@@ -434,6 +440,9 @@ void __init setup_arch(char **cmdline_p)
434 } 440 }
435#endif 441#endif
436 reserve_crashkernel(); 442 reserve_crashkernel();
443
444 reserve_ibft_region();
445
437 paging_init(); 446 paging_init();
438 map_vsyscall(); 447 map_vsyscall();
439 448
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e6abe8a49b1f..6a925394bc7e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -61,6 +61,7 @@
61#include <asm/mtrr.h> 61#include <asm/mtrr.h>
62#include <asm/nmi.h> 62#include <asm/nmi.h>
63#include <asm/vmi.h> 63#include <asm/vmi.h>
64#include <asm/genapic.h>
64#include <linux/mc146818rtc.h> 65#include <linux/mc146818rtc.h>
65 66
66#include <mach_apic.h> 67#include <mach_apic.h>
@@ -677,6 +678,12 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
677 unsigned long send_status, accept_status = 0; 678 unsigned long send_status, accept_status = 0;
678 int maxlvt, num_starts, j; 679 int maxlvt, num_starts, j;
679 680
681 if (get_uv_system_type() == UV_NON_UNIQUE_APIC) {
682 send_status = uv_wakeup_secondary(phys_apicid, start_eip);
683 atomic_set(&init_deasserted, 1);
684 return send_status;
685 }
686
680 /* 687 /*
681 * Be paranoid about clearing APIC errors. 688 * Be paranoid about clearing APIC errors.
682 */ 689 */
@@ -918,16 +925,19 @@ do_rest:
918 925
919 atomic_set(&init_deasserted, 0); 926 atomic_set(&init_deasserted, 0);
920 927
921 Dprintk("Setting warm reset code and vector.\n"); 928 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
922 929
923 store_NMI_vector(&nmi_high, &nmi_low); 930 Dprintk("Setting warm reset code and vector.\n");
924 931
925 smpboot_setup_warm_reset_vector(start_ip); 932 store_NMI_vector(&nmi_high, &nmi_low);
926 /* 933
927 * Be paranoid about clearing APIC errors. 934 smpboot_setup_warm_reset_vector(start_ip);
928 */ 935 /*
929 apic_write(APIC_ESR, 0); 936 * Be paranoid about clearing APIC errors.
930 apic_read(APIC_ESR); 937 */
938 apic_write(APIC_ESR, 0);
939 apic_read(APIC_ESR);
940 }
931 941
932 /* 942 /*
933 * Starting actual IPI sequence... 943 * Starting actual IPI sequence...
@@ -966,7 +976,8 @@ do_rest:
966 else 976 else
967 /* trampoline code not run */ 977 /* trampoline code not run */
968 printk(KERN_ERR "Not responding.\n"); 978 printk(KERN_ERR "Not responding.\n");
969 inquire_remote_apic(apicid); 979 if (get_uv_system_type() != UV_NON_UNIQUE_APIC)
980 inquire_remote_apic(apicid);
970 } 981 }
971 } 982 }
972 983
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 65791ca2824a..471e694d6713 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -681,7 +681,7 @@ gp_in_kernel:
681 } 681 }
682} 682}
683 683
684static __kprobes void 684static notrace __kprobes void
685mem_parity_error(unsigned char reason, struct pt_regs *regs) 685mem_parity_error(unsigned char reason, struct pt_regs *regs)
686{ 686{
687 printk(KERN_EMERG 687 printk(KERN_EMERG
@@ -707,7 +707,7 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs)
707 clear_mem_error(reason); 707 clear_mem_error(reason);
708} 708}
709 709
710static __kprobes void 710static notrace __kprobes void
711io_check_error(unsigned char reason, struct pt_regs *regs) 711io_check_error(unsigned char reason, struct pt_regs *regs)
712{ 712{
713 unsigned long i; 713 unsigned long i;
@@ -727,7 +727,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
727 outb(reason, 0x61); 727 outb(reason, 0x61);
728} 728}
729 729
730static __kprobes void 730static notrace __kprobes void
731unknown_nmi_error(unsigned char reason, struct pt_regs *regs) 731unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
732{ 732{
733 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 733 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
@@ -755,7 +755,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
755 755
756static DEFINE_SPINLOCK(nmi_print_lock); 756static DEFINE_SPINLOCK(nmi_print_lock);
757 757
758void __kprobes die_nmi(struct pt_regs *regs, const char *msg) 758void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg)
759{ 759{
760 if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP) 760 if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP)
761 return; 761 return;
@@ -786,7 +786,7 @@ void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
786 do_exit(SIGSEGV); 786 do_exit(SIGSEGV);
787} 787}
788 788
789static __kprobes void default_do_nmi(struct pt_regs *regs) 789static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
790{ 790{
791 unsigned char reason = 0; 791 unsigned char reason = 0;
792 792
@@ -828,7 +828,7 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)
828 828
829static int ignore_nmis; 829static int ignore_nmis;
830 830
831__kprobes void do_nmi(struct pt_regs *regs, long error_code) 831notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code)
832{ 832{
833 int cpu; 833 int cpu;
834 834
@@ -1148,9 +1148,22 @@ asmlinkage void math_state_restore(void)
1148 struct thread_info *thread = current_thread_info(); 1148 struct thread_info *thread = current_thread_info();
1149 struct task_struct *tsk = thread->task; 1149 struct task_struct *tsk = thread->task;
1150 1150
1151 if (!tsk_used_math(tsk)) {
1152 local_irq_enable();
1153 /*
1154 * does a slab alloc which can sleep
1155 */
1156 if (init_fpu(tsk)) {
1157 /*
1158 * ran out of memory!
1159 */
1160 do_group_exit(SIGKILL);
1161 return;
1162 }
1163 local_irq_disable();
1164 }
1165
1151 clts(); /* Allow maths ops (or we recurse) */ 1166 clts(); /* Allow maths ops (or we recurse) */
1152 if (!tsk_used_math(tsk))
1153 init_fpu(tsk);
1154 restore_fpu(tsk); 1167 restore_fpu(tsk);
1155 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ 1168 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
1156 tsk->fpu_counter++; 1169 tsk->fpu_counter++;
@@ -1208,11 +1221,6 @@ void __init trap_init(void)
1208#endif 1221#endif
1209 set_trap_gate(19, &simd_coprocessor_error); 1222 set_trap_gate(19, &simd_coprocessor_error);
1210 1223
1211 /*
1212 * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
1213 * Generate a build-time error if the alignment is wrong.
1214 */
1215 BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15);
1216 if (cpu_has_fxsr) { 1224 if (cpu_has_fxsr) {
1217 printk(KERN_INFO "Enabling fast FPU save and restore... "); 1225 printk(KERN_INFO "Enabling fast FPU save and restore... ");
1218 set_in_cr4(X86_CR4_OSFXSR); 1226 set_in_cr4(X86_CR4_OSFXSR);
@@ -1233,6 +1241,7 @@ void __init trap_init(void)
1233 1241
1234 set_bit(SYSCALL_VECTOR, used_vectors); 1242 set_bit(SYSCALL_VECTOR, used_vectors);
1235 1243
1244 init_thread_xstate();
1236 /* 1245 /*
1237 * Should be a barrier for any external CPU state: 1246 * Should be a barrier for any external CPU state:
1238 */ 1247 */
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 79aa6fc0815c..adff76ea97c4 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -600,7 +600,8 @@ void die(const char * str, struct pt_regs * regs, long err)
600 oops_end(flags, regs, SIGSEGV); 600 oops_end(flags, regs, SIGSEGV);
601} 601}
602 602
603void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) 603notrace __kprobes void
604die_nmi(char *str, struct pt_regs *regs, int do_panic)
604{ 605{
605 unsigned long flags; 606 unsigned long flags;
606 607
@@ -772,7 +773,7 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
772 die("general protection fault", regs, error_code); 773 die("general protection fault", regs, error_code);
773} 774}
774 775
775static __kprobes void 776static notrace __kprobes void
776mem_parity_error(unsigned char reason, struct pt_regs * regs) 777mem_parity_error(unsigned char reason, struct pt_regs * regs)
777{ 778{
778 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", 779 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
@@ -796,7 +797,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs)
796 outb(reason, 0x61); 797 outb(reason, 0x61);
797} 798}
798 799
799static __kprobes void 800static notrace __kprobes void
800io_check_error(unsigned char reason, struct pt_regs * regs) 801io_check_error(unsigned char reason, struct pt_regs * regs)
801{ 802{
802 printk("NMI: IOCK error (debug interrupt?)\n"); 803 printk("NMI: IOCK error (debug interrupt?)\n");
@@ -810,7 +811,7 @@ io_check_error(unsigned char reason, struct pt_regs * regs)
810 outb(reason, 0x61); 811 outb(reason, 0x61);
811} 812}
812 813
813static __kprobes void 814static notrace __kprobes void
814unknown_nmi_error(unsigned char reason, struct pt_regs * regs) 815unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
815{ 816{
816 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 817 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
@@ -827,7 +828,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
827 828
828/* Runs on IST stack. This code must keep interrupts off all the time. 829/* Runs on IST stack. This code must keep interrupts off all the time.
829 Nested NMIs are prevented by the CPU. */ 830 Nested NMIs are prevented by the CPU. */
830asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs) 831asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
831{ 832{
832 unsigned char reason = 0; 833 unsigned char reason = 0;
833 int cpu; 834 int cpu;
@@ -1123,11 +1124,24 @@ asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
1123asmlinkage void math_state_restore(void) 1124asmlinkage void math_state_restore(void)
1124{ 1125{
1125 struct task_struct *me = current; 1126 struct task_struct *me = current;
1126 clts(); /* Allow maths ops (or we recurse) */
1127 1127
1128 if (!used_math()) 1128 if (!used_math()) {
1129 init_fpu(me); 1129 local_irq_enable();
1130 restore_fpu_checking(&me->thread.i387.fxsave); 1130 /*
1131 * does a slab alloc which can sleep
1132 */
1133 if (init_fpu(me)) {
1134 /*
1135 * ran out of memory!
1136 */
1137 do_group_exit(SIGKILL);
1138 return;
1139 }
1140 local_irq_disable();
1141 }
1142
1143 clts(); /* Allow maths ops (or we recurse) */
1144 restore_fpu_checking(&me->thread.xstate->fxsave);
1131 task_thread_info(me)->status |= TS_USEDFPU; 1145 task_thread_info(me)->status |= TS_USEDFPU;
1132 me->fpu_counter++; 1146 me->fpu_counter++;
1133} 1147}
@@ -1163,6 +1177,10 @@ void __init trap_init(void)
1163#endif 1177#endif
1164 1178
1165 /* 1179 /*
1180 * initialize the per thread extended state:
1181 */
1182 init_thread_xstate();
1183 /*
1166 * Should be a barrier for any external CPU state. 1184 * Should be a barrier for any external CPU state.
1167 */ 1185 */
1168 cpu_init(); 1186 cpu_init();
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index 3d7e6e9fa6c2..e4790728b224 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -221,9 +221,9 @@ EXPORT_SYMBOL(recalibrate_cpu_khz);
221 * if the CPU frequency is scaled, TSC-based delays will need a different 221 * if the CPU frequency is scaled, TSC-based delays will need a different
222 * loops_per_jiffy value to function properly. 222 * loops_per_jiffy value to function properly.
223 */ 223 */
224static unsigned int ref_freq = 0; 224static unsigned int ref_freq;
225static unsigned long loops_per_jiffy_ref = 0; 225static unsigned long loops_per_jiffy_ref;
226static unsigned long cpu_khz_ref = 0; 226static unsigned long cpu_khz_ref;
227 227
228static int 228static int
229time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) 229time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
@@ -283,15 +283,28 @@ core_initcall(cpufreq_tsc);
283 283
284/* clock source code */ 284/* clock source code */
285 285
286static unsigned long current_tsc_khz = 0; 286static unsigned long current_tsc_khz;
287static struct clocksource clocksource_tsc;
287 288
289/*
290 * We compare the TSC to the cycle_last value in the clocksource
291 * structure to avoid a nasty time-warp issue. This can be observed in
292 * a very small window right after one CPU updated cycle_last under
293 * xtime lock and the other CPU reads a TSC value which is smaller
294 * than the cycle_last reference value due to a TSC which is slighty
295 * behind. This delta is nowhere else observable, but in that case it
296 * results in a forward time jump in the range of hours due to the
297 * unsigned delta calculation of the time keeping core code, which is
298 * necessary to support wrapping clocksources like pm timer.
299 */
288static cycle_t read_tsc(void) 300static cycle_t read_tsc(void)
289{ 301{
290 cycle_t ret; 302 cycle_t ret;
291 303
292 rdtscll(ret); 304 rdtscll(ret);
293 305
294 return ret; 306 return ret >= clocksource_tsc.cycle_last ?
307 ret : clocksource_tsc.cycle_last;
295} 308}
296 309
297static struct clocksource clocksource_tsc = { 310static struct clocksource clocksource_tsc = {
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index ceeba01e7f47..fcc16e58609e 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -11,6 +11,7 @@
11#include <asm/hpet.h> 11#include <asm/hpet.h>
12#include <asm/timex.h> 12#include <asm/timex.h>
13#include <asm/timer.h> 13#include <asm/timer.h>
14#include <asm/vgtod.h>
14 15
15static int notsc __initdata = 0; 16static int notsc __initdata = 0;
16 17
@@ -287,18 +288,34 @@ int __init notsc_setup(char *s)
287 288
288__setup("notsc", notsc_setup); 289__setup("notsc", notsc_setup);
289 290
291static struct clocksource clocksource_tsc;
290 292
291/* clock source code: */ 293/*
294 * We compare the TSC to the cycle_last value in the clocksource
295 * structure to avoid a nasty time-warp. This can be observed in a
296 * very small window right after one CPU updated cycle_last under
297 * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
298 * is smaller than the cycle_last reference value due to a TSC which
299 * is slighty behind. This delta is nowhere else observable, but in
300 * that case it results in a forward time jump in the range of hours
301 * due to the unsigned delta calculation of the time keeping core
302 * code, which is necessary to support wrapping clocksources like pm
303 * timer.
304 */
292static cycle_t read_tsc(void) 305static cycle_t read_tsc(void)
293{ 306{
294 cycle_t ret = (cycle_t)get_cycles(); 307 cycle_t ret = (cycle_t)get_cycles();
295 return ret; 308
309 return ret >= clocksource_tsc.cycle_last ?
310 ret : clocksource_tsc.cycle_last;
296} 311}
297 312
298static cycle_t __vsyscall_fn vread_tsc(void) 313static cycle_t __vsyscall_fn vread_tsc(void)
299{ 314{
300 cycle_t ret = (cycle_t)vget_cycles(); 315 cycle_t ret = (cycle_t)vget_cycles();
301 return ret; 316
317 return ret >= __vsyscall_gtod_data.clock.cycle_last ?
318 ret : __vsyscall_gtod_data.clock.cycle_last;
302} 319}
303 320
304static struct clocksource clocksource_tsc = { 321static struct clocksource clocksource_tsc = {
diff --git a/arch/x86/mach-visws/visws_apic.c b/arch/x86/mach-visws/visws_apic.c
index 710faf71a650..cef9cb1d15ac 100644
--- a/arch/x86/mach-visws/visws_apic.c
+++ b/arch/x86/mach-visws/visws_apic.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/arch/i386/mach-visws/visws_apic.c
3 *
4 * Copyright (C) 1999 Bent Hagemark, Ingo Molnar 2 * Copyright (C) 1999 Bent Hagemark, Ingo Molnar
5 * 3 *
6 * SGI Visual Workstation interrupt controller 4 * SGI Visual Workstation interrupt controller
diff --git a/arch/x86/mach-voyager/voyager_basic.c b/arch/x86/mach-voyager/voyager_basic.c
index 6a949e4edde8..46d6f8067690 100644
--- a/arch/x86/mach-voyager/voyager_basic.c
+++ b/arch/x86/mach-voyager/voyager_basic.c
@@ -2,8 +2,6 @@
2 * 2 *
3 * Author: J.E.J.Bottomley@HansenPartnership.com 3 * Author: J.E.J.Bottomley@HansenPartnership.com
4 * 4 *
5 * linux/arch/i386/kernel/voyager.c
6 *
7 * This file contains all the voyager specific routines for getting 5 * This file contains all the voyager specific routines for getting
8 * initialisation of the architecture to function. For additional 6 * initialisation of the architecture to function. For additional
9 * features see: 7 * features see:
diff --git a/arch/x86/mach-voyager/voyager_cat.c b/arch/x86/mach-voyager/voyager_cat.c
index 17a7904f75b1..ecab9fff0fd1 100644
--- a/arch/x86/mach-voyager/voyager_cat.c
+++ b/arch/x86/mach-voyager/voyager_cat.c
@@ -4,8 +4,6 @@
4 * 4 *
5 * Author: J.E.J.Bottomley@HansenPartnership.com 5 * Author: J.E.J.Bottomley@HansenPartnership.com
6 * 6 *
7 * linux/arch/i386/kernel/voyager_cat.c
8 *
9 * This file contains all the logic for manipulating the CAT bus 7 * This file contains all the logic for manipulating the CAT bus
10 * in a level 5 machine. 8 * in a level 5 machine.
11 * 9 *
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index be7235bf105d..96f60c7cd124 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -4,8 +4,6 @@
4 * 4 *
5 * Author: J.E.J.Bottomley@HansenPartnership.com 5 * Author: J.E.J.Bottomley@HansenPartnership.com
6 * 6 *
7 * linux/arch/i386/kernel/voyager_smp.c
8 *
9 * This file provides all the same external entries as smp.c but uses 7 * This file provides all the same external entries as smp.c but uses
10 * the voyager hal to provide the functionality 8 * the voyager hal to provide the functionality
11 */ 9 */
diff --git a/arch/x86/mach-voyager/voyager_thread.c b/arch/x86/mach-voyager/voyager_thread.c
index c69c931818ed..15464a20fb38 100644
--- a/arch/x86/mach-voyager/voyager_thread.c
+++ b/arch/x86/mach-voyager/voyager_thread.c
@@ -4,8 +4,6 @@
4 * 4 *
5 * Author: J.E.J.Bottomley@HansenPartnership.com 5 * Author: J.E.J.Bottomley@HansenPartnership.com
6 * 6 *
7 * linux/arch/i386/kernel/voyager_thread.c
8 *
9 * This module provides the machine status monitor thread for the 7 * This module provides the machine status monitor thread for the
10 * voyager architecture. This allows us to monitor the machine 8 * voyager architecture. This allows us to monitor the machine
11 * environment (temp, voltage, fan function) and the front panel and 9 * environment (temp, voltage, fan function) and the front panel and
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 4bab3b145392..6e38d877ea77 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -678,7 +678,7 @@ int fpregs_soft_set(struct task_struct *target,
678 unsigned int pos, unsigned int count, 678 unsigned int pos, unsigned int count,
679 const void *kbuf, const void __user *ubuf) 679 const void *kbuf, const void __user *ubuf)
680{ 680{
681 struct i387_soft_struct *s387 = &target->thread.i387.soft; 681 struct i387_soft_struct *s387 = &target->thread.xstate->soft;
682 void *space = s387->st_space; 682 void *space = s387->st_space;
683 int ret; 683 int ret;
684 int offset, other, i, tags, regnr, tag, newtop; 684 int offset, other, i, tags, regnr, tag, newtop;
@@ -730,7 +730,7 @@ int fpregs_soft_get(struct task_struct *target,
730 unsigned int pos, unsigned int count, 730 unsigned int pos, unsigned int count,
731 void *kbuf, void __user *ubuf) 731 void *kbuf, void __user *ubuf)
732{ 732{
733 struct i387_soft_struct *s387 = &target->thread.i387.soft; 733 struct i387_soft_struct *s387 = &target->thread.xstate->soft;
734 const void *space = s387->st_space; 734 const void *space = s387->st_space;
735 int ret; 735 int ret;
736 int offset = (S387->ftop & 7) * 10, other = 80 - offset; 736 int offset = (S387->ftop & 7) * 10, other = 80 - offset;
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h
index a3ae28c49ddd..13488fa153e0 100644
--- a/arch/x86/math-emu/fpu_system.h
+++ b/arch/x86/math-emu/fpu_system.h
@@ -35,8 +35,8 @@
35#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ 35#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \
36 == (1 << 10)) 36 == (1 << 10))
37 37
38#define I387 (current->thread.i387) 38#define I387 (current->thread.xstate)
39#define FPU_info (I387.soft.info) 39#define FPU_info (I387->soft.info)
40 40
41#define FPU_CS (*(unsigned short *) &(FPU_info->___cs)) 41#define FPU_CS (*(unsigned short *) &(FPU_info->___cs))
42#define FPU_SS (*(unsigned short *) &(FPU_info->___ss)) 42#define FPU_SS (*(unsigned short *) &(FPU_info->___ss))
@@ -46,25 +46,25 @@
46#define FPU_EIP (FPU_info->___eip) 46#define FPU_EIP (FPU_info->___eip)
47#define FPU_ORIG_EIP (FPU_info->___orig_eip) 47#define FPU_ORIG_EIP (FPU_info->___orig_eip)
48 48
49#define FPU_lookahead (I387.soft.lookahead) 49#define FPU_lookahead (I387->soft.lookahead)
50 50
51/* nz if ip_offset and cs_selector are not to be set for the current 51/* nz if ip_offset and cs_selector are not to be set for the current
52 instruction. */ 52 instruction. */
53#define no_ip_update (*(u_char *)&(I387.soft.no_update)) 53#define no_ip_update (*(u_char *)&(I387->soft.no_update))
54#define FPU_rm (*(u_char *)&(I387.soft.rm)) 54#define FPU_rm (*(u_char *)&(I387->soft.rm))
55 55
56/* Number of bytes of data which can be legally accessed by the current 56/* Number of bytes of data which can be legally accessed by the current
57 instruction. This only needs to hold a number <= 108, so a byte will do. */ 57 instruction. This only needs to hold a number <= 108, so a byte will do. */
58#define access_limit (*(u_char *)&(I387.soft.alimit)) 58#define access_limit (*(u_char *)&(I387->soft.alimit))
59 59
60#define partial_status (I387.soft.swd) 60#define partial_status (I387->soft.swd)
61#define control_word (I387.soft.cwd) 61#define control_word (I387->soft.cwd)
62#define fpu_tag_word (I387.soft.twd) 62#define fpu_tag_word (I387->soft.twd)
63#define registers (I387.soft.st_space) 63#define registers (I387->soft.st_space)
64#define top (I387.soft.ftop) 64#define top (I387->soft.ftop)
65 65
66#define instruction_address (*(struct address *)&I387.soft.fip) 66#define instruction_address (*(struct address *)&I387->soft.fip)
67#define operand_address (*(struct address *)&I387.soft.foo) 67#define operand_address (*(struct address *)&I387->soft.foo)
68 68
69#define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \ 69#define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \
70 math_abort(FPU_info,SIGSEGV) 70 math_abort(FPU_info,SIGSEGV)
diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c
index 02af772a24db..d597fe7423c9 100644
--- a/arch/x86/math-emu/reg_ld_str.c
+++ b/arch/x86/math-emu/reg_ld_str.c
@@ -1180,8 +1180,8 @@ u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d)
1180 control_word |= 0xffff0040; 1180 control_word |= 0xffff0040;
1181 partial_status = status_word() | 0xffff0000; 1181 partial_status = status_word() | 0xffff0000;
1182 fpu_tag_word |= 0xffff0000; 1182 fpu_tag_word |= 0xffff0000;
1183 I387.soft.fcs &= ~0xf8000000; 1183 I387->soft.fcs &= ~0xf8000000;
1184 I387.soft.fos |= 0xffff0000; 1184 I387->soft.fos |= 0xffff0000;
1185#endif /* PECULIAR_486 */ 1185#endif /* PECULIAR_486 */
1186 if (__copy_to_user(d, &control_word, 7 * 4)) 1186 if (__copy_to_user(d, &control_word, 7 * 4))
1187 FPU_abort; 1187 FPU_abort;
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index eba0bbede7a6..18378850e25a 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -120,7 +120,7 @@ int __init get_memcfg_numa_flat(void)
120 printk("NUMA - single node, flat memory mode\n"); 120 printk("NUMA - single node, flat memory mode\n");
121 121
122 /* Run the memory configuration and find the top of memory. */ 122 /* Run the memory configuration and find the top of memory. */
123 find_max_pfn(); 123 propagate_e820_map();
124 node_start_pfn[0] = 0; 124 node_start_pfn[0] = 0;
125 node_end_pfn[0] = max_pfn; 125 node_end_pfn[0] = max_pfn;
126 memory_present(0, 0, max_pfn); 126 memory_present(0, 0, max_pfn);
@@ -134,7 +134,7 @@ int __init get_memcfg_numa_flat(void)
134/* 134/*
135 * Find the highest page frame number we have available for the node 135 * Find the highest page frame number we have available for the node
136 */ 136 */
137static void __init find_max_pfn_node(int nid) 137static void __init propagate_e820_map_node(int nid)
138{ 138{
139 if (node_end_pfn[nid] > max_pfn) 139 if (node_end_pfn[nid] > max_pfn)
140 node_end_pfn[nid] = max_pfn; 140 node_end_pfn[nid] = max_pfn;
@@ -379,7 +379,7 @@ unsigned long __init setup_memory(void)
379 printk("High memory starts at vaddr %08lx\n", 379 printk("High memory starts at vaddr %08lx\n",
380 (ulong) pfn_to_kaddr(highstart_pfn)); 380 (ulong) pfn_to_kaddr(highstart_pfn));
381 for_each_online_node(nid) 381 for_each_online_node(nid)
382 find_max_pfn_node(nid); 382 propagate_e820_map_node(nid);
383 383
384 memset(NODE_DATA(0), 0, sizeof(struct pglist_data)); 384 memset(NODE_DATA(0), 0, sizeof(struct pglist_data));
385 NODE_DATA(0)->bdata = &node0_bdata; 385 NODE_DATA(0)->bdata = &node0_bdata;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 1500dc8d63e4..9ec62da85fd7 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -1,5 +1,4 @@
1/* 1/*
2 * linux/arch/i386/mm/init.c
3 * 2 *
4 * Copyright (C) 1995 Linus Torvalds 3 * Copyright (C) 1995 Linus Torvalds
5 * 4 *
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1076097dcab2..1ff7906a9a4d 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -47,9 +47,6 @@
47#include <asm/numa.h> 47#include <asm/numa.h>
48#include <asm/cacheflush.h> 48#include <asm/cacheflush.h>
49 49
50const struct dma_mapping_ops *dma_ops;
51EXPORT_SYMBOL(dma_ops);
52
53static unsigned long dma_reserve __initdata; 50static unsigned long dma_reserve __initdata;
54 51
55DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 52DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index c590fd200e29..3a4baf95e24d 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -134,7 +134,7 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
134 134
135 if (!phys_addr_valid(phys_addr)) { 135 if (!phys_addr_valid(phys_addr)) {
136 printk(KERN_WARNING "ioremap: invalid physical address %llx\n", 136 printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
137 phys_addr); 137 (unsigned long long)phys_addr);
138 WARN_ON_ONCE(1); 138 WARN_ON_ONCE(1);
139 return NULL; 139 return NULL;
140 } 140 }
@@ -187,7 +187,8 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
187 new_prot_val == _PAGE_CACHE_WB)) { 187 new_prot_val == _PAGE_CACHE_WB)) {
188 pr_debug( 188 pr_debug(
189 "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", 189 "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
190 phys_addr, phys_addr + size, 190 (unsigned long long)phys_addr,
191 (unsigned long long)(phys_addr + size),
191 prot_val, new_prot_val); 192 prot_val, new_prot_val);
192 free_memtype(phys_addr, phys_addr + size); 193 free_memtype(phys_addr, phys_addr + size);
193 return NULL; 194 return NULL;
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 7a2ebce87df5..86808e666f9c 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -164,7 +164,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
164 if (!found) 164 if (!found)
165 return -1; 165 return -1;
166 166
167 memnode_shift = compute_hash_shift(nodes, 8); 167 memnode_shift = compute_hash_shift(nodes, 8, NULL);
168 if (memnode_shift < 0) { 168 if (memnode_shift < 0) {
169 printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); 169 printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n");
170 return -1; 170 return -1;
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 2ea56f48f29b..9a6892200b27 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -60,7 +60,7 @@ unsigned long __initdata nodemap_size;
60 * -1 if node overlap or lost ram (shift too big) 60 * -1 if node overlap or lost ram (shift too big)
61 */ 61 */
62static int __init populate_memnodemap(const struct bootnode *nodes, 62static int __init populate_memnodemap(const struct bootnode *nodes,
63 int numnodes, int shift) 63 int numnodes, int shift, int *nodeids)
64{ 64{
65 unsigned long addr, end; 65 unsigned long addr, end;
66 int i, res = -1; 66 int i, res = -1;
@@ -76,7 +76,12 @@ static int __init populate_memnodemap(const struct bootnode *nodes,
76 do { 76 do {
77 if (memnodemap[addr >> shift] != NUMA_NO_NODE) 77 if (memnodemap[addr >> shift] != NUMA_NO_NODE)
78 return -1; 78 return -1;
79 memnodemap[addr >> shift] = i; 79
80 if (!nodeids)
81 memnodemap[addr >> shift] = i;
82 else
83 memnodemap[addr >> shift] = nodeids[i];
84
80 addr += (1UL << shift); 85 addr += (1UL << shift);
81 } while (addr < end); 86 } while (addr < end);
82 res = 1; 87 res = 1;
@@ -139,7 +144,8 @@ static int __init extract_lsb_from_nodes(const struct bootnode *nodes,
139 return i; 144 return i;
140} 145}
141 146
142int __init compute_hash_shift(struct bootnode *nodes, int numnodes) 147int __init compute_hash_shift(struct bootnode *nodes, int numnodes,
148 int *nodeids)
143{ 149{
144 int shift; 150 int shift;
145 151
@@ -149,7 +155,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes)
149 printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", 155 printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
150 shift); 156 shift);
151 157
152 if (populate_memnodemap(nodes, numnodes, shift) != 1) { 158 if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) {
153 printk(KERN_INFO "Your memory is not aligned you need to " 159 printk(KERN_INFO "Your memory is not aligned you need to "
154 "rebuild your kernel with a bigger NODEMAPSIZE " 160 "rebuild your kernel with a bigger NODEMAPSIZE "
155 "shift=%d\n", shift); 161 "shift=%d\n", shift);
@@ -380,9 +386,10 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr,
380 * Sets up the system RAM area from start_pfn to end_pfn according to the 386 * Sets up the system RAM area from start_pfn to end_pfn according to the
381 * numa=fake command-line option. 387 * numa=fake command-line option.
382 */ 388 */
389static struct bootnode nodes[MAX_NUMNODES] __initdata;
390
383static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) 391static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
384{ 392{
385 struct bootnode nodes[MAX_NUMNODES];
386 u64 size, addr = start_pfn << PAGE_SHIFT; 393 u64 size, addr = start_pfn << PAGE_SHIFT;
387 u64 max_addr = end_pfn << PAGE_SHIFT; 394 u64 max_addr = end_pfn << PAGE_SHIFT;
388 int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; 395 int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i;
@@ -462,7 +469,7 @@ done:
462 } 469 }
463 } 470 }
464out: 471out:
465 memnode_shift = compute_hash_shift(nodes, num_nodes); 472 memnode_shift = compute_hash_shift(nodes, num_nodes, NULL);
466 if (memnode_shift < 0) { 473 if (memnode_shift < 0) {
467 memnode_shift = 0; 474 memnode_shift = 0;
468 printk(KERN_ERR "No NUMA hash function found. NUMA emulation " 475 printk(KERN_ERR "No NUMA hash function found. NUMA emulation "
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 3165ec0672bd..6fb9e7c6893f 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -1,7 +1,3 @@
1/*
2 * linux/arch/i386/mm/pgtable.c
3 */
4
5#include <linux/sched.h> 1#include <linux/sched.h>
6#include <linux/kernel.h> 2#include <linux/kernel.h>
7#include <linux/errno.h> 3#include <linux/errno.h>
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 1bae9c855ceb..fb43d89f46f3 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -32,6 +32,10 @@ static struct bootnode nodes_add[MAX_NUMNODES];
32static int found_add_area __initdata; 32static int found_add_area __initdata;
33int hotadd_percent __initdata = 0; 33int hotadd_percent __initdata = 0;
34 34
35static int num_node_memblks __initdata;
36static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
37static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
38
35/* Too small nodes confuse the VM badly. Usually they result 39/* Too small nodes confuse the VM badly. Usually they result
36 from BIOS bugs. */ 40 from BIOS bugs. */
37#define NODE_MIN_SIZE (4*1024*1024) 41#define NODE_MIN_SIZE (4*1024*1024)
@@ -41,17 +45,17 @@ static __init int setup_node(int pxm)
41 return acpi_map_pxm_to_node(pxm); 45 return acpi_map_pxm_to_node(pxm);
42} 46}
43 47
44static __init int conflicting_nodes(unsigned long start, unsigned long end) 48static __init int conflicting_memblks(unsigned long start, unsigned long end)
45{ 49{
46 int i; 50 int i;
47 for_each_node_mask(i, nodes_parsed) { 51 for (i = 0; i < num_node_memblks; i++) {
48 struct bootnode *nd = &nodes[i]; 52 struct bootnode *nd = &node_memblk_range[i];
49 if (nd->start == nd->end) 53 if (nd->start == nd->end)
50 continue; 54 continue;
51 if (nd->end > start && nd->start < end) 55 if (nd->end > start && nd->start < end)
52 return i; 56 return memblk_nodeid[i];
53 if (nd->end == end && nd->start == start) 57 if (nd->end == end && nd->start == start)
54 return i; 58 return memblk_nodeid[i];
55 } 59 }
56 return -1; 60 return -1;
57} 61}
@@ -258,7 +262,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
258 bad_srat(); 262 bad_srat();
259 return; 263 return;
260 } 264 }
261 i = conflicting_nodes(start, end); 265 i = conflicting_memblks(start, end);
262 if (i == node) { 266 if (i == node) {
263 printk(KERN_WARNING 267 printk(KERN_WARNING
264 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n", 268 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
@@ -283,10 +287,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
283 nd->end = end; 287 nd->end = end;
284 } 288 }
285 289
286 printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, 290 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
287 nd->start, nd->end); 291 start, end);
288 e820_register_active_regions(node, nd->start >> PAGE_SHIFT, 292 e820_register_active_regions(node, start >> PAGE_SHIFT,
289 nd->end >> PAGE_SHIFT); 293 end >> PAGE_SHIFT);
290 push_node_boundaries(node, nd->start >> PAGE_SHIFT, 294 push_node_boundaries(node, nd->start >> PAGE_SHIFT,
291 nd->end >> PAGE_SHIFT); 295 nd->end >> PAGE_SHIFT);
292 296
@@ -298,6 +302,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
298 if ((nd->start | nd->end) == 0) 302 if ((nd->start | nd->end) == 0)
299 node_clear(node, nodes_parsed); 303 node_clear(node, nodes_parsed);
300 } 304 }
305
306 node_memblk_range[num_node_memblks].start = start;
307 node_memblk_range[num_node_memblks].end = end;
308 memblk_nodeid[num_node_memblks] = node;
309 num_node_memblks++;
301} 310}
302 311
303/* Sanity check to catch more bad SRATs (they are amazingly common). 312/* Sanity check to catch more bad SRATs (they are amazingly common).
@@ -368,7 +377,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
368 return -1; 377 return -1;
369 } 378 }
370 379
371 memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES); 380 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
381 memblk_nodeid);
372 if (memnode_shift < 0) { 382 if (memnode_shift < 0) {
373 printk(KERN_ERR 383 printk(KERN_ERR
374 "SRAT: No NUMA node hash function found. Contact maintainer\n"); 384 "SRAT: No NUMA node hash function found. Contact maintainer\n");
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 1f11cf0a307f..cc48d3fde545 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -23,8 +23,8 @@
23#include "op_x86_model.h" 23#include "op_x86_model.h"
24 24
25static struct op_x86_model_spec const *model; 25static struct op_x86_model_spec const *model;
26static struct op_msrs cpu_msrs[NR_CPUS]; 26static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
27static unsigned long saved_lvtpc[NR_CPUS]; 27static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
28 28
29static int nmi_start(void); 29static int nmi_start(void);
30static void nmi_stop(void); 30static void nmi_stop(void);
@@ -89,7 +89,7 @@ static int profile_exceptions_notify(struct notifier_block *self,
89 89
90 switch (val) { 90 switch (val) {
91 case DIE_NMI: 91 case DIE_NMI:
92 if (model->check_ctrs(args->regs, &cpu_msrs[cpu])) 92 if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
93 ret = NOTIFY_STOP; 93 ret = NOTIFY_STOP;
94 break; 94 break;
95 default: 95 default:
@@ -126,7 +126,7 @@ static void nmi_cpu_save_registers(struct op_msrs *msrs)
126static void nmi_save_registers(void *dummy) 126static void nmi_save_registers(void *dummy)
127{ 127{
128 int cpu = smp_processor_id(); 128 int cpu = smp_processor_id();
129 struct op_msrs *msrs = &cpu_msrs[cpu]; 129 struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
130 nmi_cpu_save_registers(msrs); 130 nmi_cpu_save_registers(msrs);
131} 131}
132 132
@@ -134,10 +134,10 @@ static void free_msrs(void)
134{ 134{
135 int i; 135 int i;
136 for_each_possible_cpu(i) { 136 for_each_possible_cpu(i) {
137 kfree(cpu_msrs[i].counters); 137 kfree(per_cpu(cpu_msrs, i).counters);
138 cpu_msrs[i].counters = NULL; 138 per_cpu(cpu_msrs, i).counters = NULL;
139 kfree(cpu_msrs[i].controls); 139 kfree(per_cpu(cpu_msrs, i).controls);
140 cpu_msrs[i].controls = NULL; 140 per_cpu(cpu_msrs, i).controls = NULL;
141 } 141 }
142} 142}
143 143
@@ -149,13 +149,15 @@ static int allocate_msrs(void)
149 149
150 int i; 150 int i;
151 for_each_possible_cpu(i) { 151 for_each_possible_cpu(i) {
152 cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL); 152 per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
153 if (!cpu_msrs[i].counters) { 153 GFP_KERNEL);
154 if (!per_cpu(cpu_msrs, i).counters) {
154 success = 0; 155 success = 0;
155 break; 156 break;
156 } 157 }
157 cpu_msrs[i].controls = kmalloc(controls_size, GFP_KERNEL); 158 per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
158 if (!cpu_msrs[i].controls) { 159 GFP_KERNEL);
160 if (!per_cpu(cpu_msrs, i).controls) {
159 success = 0; 161 success = 0;
160 break; 162 break;
161 } 163 }
@@ -170,11 +172,11 @@ static int allocate_msrs(void)
170static void nmi_cpu_setup(void *dummy) 172static void nmi_cpu_setup(void *dummy)
171{ 173{
172 int cpu = smp_processor_id(); 174 int cpu = smp_processor_id();
173 struct op_msrs *msrs = &cpu_msrs[cpu]; 175 struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
174 spin_lock(&oprofilefs_lock); 176 spin_lock(&oprofilefs_lock);
175 model->setup_ctrs(msrs); 177 model->setup_ctrs(msrs);
176 spin_unlock(&oprofilefs_lock); 178 spin_unlock(&oprofilefs_lock);
177 saved_lvtpc[cpu] = apic_read(APIC_LVTPC); 179 per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
178 apic_write(APIC_LVTPC, APIC_DM_NMI); 180 apic_write(APIC_LVTPC, APIC_DM_NMI);
179} 181}
180 182
@@ -203,13 +205,15 @@ static int nmi_setup(void)
203 */ 205 */
204 206
205 /* Assume saved/restored counters are the same on all CPUs */ 207 /* Assume saved/restored counters are the same on all CPUs */
206 model->fill_in_addresses(&cpu_msrs[0]); 208 model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
207 for_each_possible_cpu(cpu) { 209 for_each_possible_cpu(cpu) {
208 if (cpu != 0) { 210 if (cpu != 0) {
209 memcpy(cpu_msrs[cpu].counters, cpu_msrs[0].counters, 211 memcpy(per_cpu(cpu_msrs, cpu).counters,
212 per_cpu(cpu_msrs, 0).counters,
210 sizeof(struct op_msr) * model->num_counters); 213 sizeof(struct op_msr) * model->num_counters);
211 214
212 memcpy(cpu_msrs[cpu].controls, cpu_msrs[0].controls, 215 memcpy(per_cpu(cpu_msrs, cpu).controls,
216 per_cpu(cpu_msrs, 0).controls,
213 sizeof(struct op_msr) * model->num_controls); 217 sizeof(struct op_msr) * model->num_controls);
214 } 218 }
215 219
@@ -249,7 +253,7 @@ static void nmi_cpu_shutdown(void *dummy)
249{ 253{
250 unsigned int v; 254 unsigned int v;
251 int cpu = smp_processor_id(); 255 int cpu = smp_processor_id();
252 struct op_msrs *msrs = &cpu_msrs[cpu]; 256 struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
253 257
254 /* restoring APIC_LVTPC can trigger an apic error because the delivery 258 /* restoring APIC_LVTPC can trigger an apic error because the delivery
255 * mode and vector nr combination can be illegal. That's by design: on 259 * mode and vector nr combination can be illegal. That's by design: on
@@ -258,23 +262,24 @@ static void nmi_cpu_shutdown(void *dummy)
258 */ 262 */
259 v = apic_read(APIC_LVTERR); 263 v = apic_read(APIC_LVTERR);
260 apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); 264 apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
261 apic_write(APIC_LVTPC, saved_lvtpc[cpu]); 265 apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
262 apic_write(APIC_LVTERR, v); 266 apic_write(APIC_LVTERR, v);
263 nmi_restore_registers(msrs); 267 nmi_restore_registers(msrs);
264} 268}
265 269
266static void nmi_shutdown(void) 270static void nmi_shutdown(void)
267{ 271{
272 struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
268 nmi_enabled = 0; 273 nmi_enabled = 0;
269 on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); 274 on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
270 unregister_die_notifier(&profile_exceptions_nb); 275 unregister_die_notifier(&profile_exceptions_nb);
271 model->shutdown(cpu_msrs); 276 model->shutdown(msrs);
272 free_msrs(); 277 free_msrs();
273} 278}
274 279
275static void nmi_cpu_start(void *dummy) 280static void nmi_cpu_start(void *dummy)
276{ 281{
277 struct op_msrs const *msrs = &cpu_msrs[smp_processor_id()]; 282 struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
278 model->start(msrs); 283 model->start(msrs);
279} 284}
280 285
@@ -286,7 +291,7 @@ static int nmi_start(void)
286 291
287static void nmi_cpu_stop(void *dummy) 292static void nmi_cpu_stop(void *dummy)
288{ 293{
289 struct op_msrs const *msrs = &cpu_msrs[smp_processor_id()]; 294 struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
290 model->stop(msrs); 295 model->stop(msrs);
291} 296}
292 297
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 17a6b057856b..b7ad9f89d21f 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -37,7 +37,8 @@ $(obj)/%.so: OBJCOPYFLAGS := -S
37$(obj)/%.so: $(obj)/%.so.dbg FORCE 37$(obj)/%.so: $(obj)/%.so.dbg FORCE
38 $(call if_changed,objcopy) 38 $(call if_changed,objcopy)
39 39
40CFL := $(PROFILING) -mcmodel=small -fPIC -g0 -O2 -fasynchronous-unwind-tables -m64 40CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
41 $(filter -g%,$(KBUILD_CFLAGS))
41 42
42$(vobjs): KBUILD_CFLAGS += $(CFL) 43$(vobjs): KBUILD_CFLAGS += $(CFL)
43 44
diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c
index 48fb38d7d2c0..4db42bff8c60 100644
--- a/arch/x86/video/fbdev.c
+++ b/arch/x86/video/fbdev.c
@@ -1,5 +1,4 @@
1/* 1/*
2 * arch/i386/video/fbdev.c - i386 Framebuffer
3 * 2 *
4 * Copyright (C) 2007 Antonino Daplas <adaplas@gmail.com> 3 * Copyright (C) 2007 Antonino Daplas <adaplas@gmail.com>
5 * 4 *