aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-04-21 18:38:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-21 18:38:43 -0400
commit5f033bb9bc5cb3bb37a79e3ef131f50ecdcb72b0 (patch)
tree16c6fdc3fab80d88ea4d8fd7eb302097f97c062c
parentfd9be4ce2e1eb407a8152f823698cc0d652bbec8 (diff)
parent34d0559178393547505ec9492321255405f4e441 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86: (77 commits) x86: UV startup of slave cpus x86: integrate pci-dma.c x86: don't do dma if mask is NULL. x86: return conditional to mmu x86: remove kludge from x86_64 x86: unify gfp masks x86: retry allocation if failed x86: don't try to allocate from DMA zone at first x86: use a fallback dev for i386 x86: use numa allocation function in i386 x86: remove virt_to_bus in pci-dma_64.c x86: adjust dma_free_coherent for i386 x86: move bad_dma_address x86: isolate coherent mapping functions x86: move dma_coherent functions to pci-dma.c x86: merge iommu initialization parameters x86: merge dma_supported x86: move pci fixup to pci-dma.c x86: move x86_64-specific to common code. x86: move initialization functions to pci-dma.c ...
-rw-r--r--Documentation/feature-removal-schedule.txt7
-rw-r--r--Documentation/kernel-parameters.txt10
-rw-r--r--Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c96
-rw-r--r--Documentation/prctl/disable-tsc-on-off-stress-test.c95
-rw-r--r--Documentation/prctl/disable-tsc-test.c94
-rw-r--r--arch/x86/Kconfig9
-rw-r--r--arch/x86/boot/a20.c2
-rw-r--r--arch/x86/boot/apm.c2
-rw-r--r--arch/x86/boot/bitops.h2
-rw-r--r--arch/x86/boot/boot.h2
-rw-r--r--arch/x86/boot/cmdline.c2
-rw-r--r--arch/x86/boot/compressed/head_32.S15
-rw-r--r--arch/x86/boot/compressed/head_64.S30
-rw-r--r--arch/x86/boot/compressed/misc.c8
-rw-r--r--arch/x86/boot/compressed/vmlinux_64.lds4
-rw-r--r--arch/x86/boot/copy.S2
-rw-r--r--arch/x86/boot/cpucheck.c2
-rw-r--r--arch/x86/boot/edd.c2
-rw-r--r--arch/x86/boot/install.sh2
-rw-r--r--arch/x86/boot/main.c2
-rw-r--r--arch/x86/boot/mca.c2
-rw-r--r--arch/x86/boot/memory.c2
-rw-r--r--arch/x86/boot/pm.c2
-rw-r--r--arch/x86/boot/pmjump.S2
-rw-r--r--arch/x86/boot/printf.c2
-rw-r--r--arch/x86/boot/string.c2
-rw-r--r--arch/x86/boot/tty.c2
-rw-r--r--arch/x86/boot/version.c2
-rw-r--r--arch/x86/boot/video-bios.c2
-rw-r--r--arch/x86/boot/video-vesa.c2
-rw-r--r--arch/x86/boot/video-vga.c2
-rw-r--r--arch/x86/boot/video.c2
-rw-r--r--arch/x86/boot/video.h2
-rw-r--r--arch/x86/boot/voyager.c2
-rw-r--r--arch/x86/kernel/Makefile9
-rw-r--r--arch/x86/kernel/acpi/cstate.c2
-rw-r--r--arch/x86/kernel/acpi/processor.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c1
-rw-r--r--arch/x86/kernel/e820_32.c4
-rw-r--r--arch/x86/kernel/e820_64.c4
-rw-r--r--arch/x86/kernel/efi.c18
-rw-r--r--arch/x86/kernel/efi_64.c12
-rw-r--r--arch/x86/kernel/entry_32.S1
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c17
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_32.S1
-rw-r--r--arch/x86/kernel/i387.c114
-rw-r--r--arch/x86/kernel/kgdb.c6
-rw-r--r--arch/x86/kernel/nmi_32.c3
-rw-r--r--arch/x86/kernel/nmi_64.c6
-rw-r--r--arch/x86/kernel/pci-calgary_64.c3
-rw-r--r--arch/x86/kernel/pci-dma.c (renamed from arch/x86/kernel/pci-dma_64.c)546
-rw-r--r--arch/x86/kernel/pci-dma_32.c177
-rw-r--r--arch/x86/kernel/pci-gart_64.c15
-rw-r--r--arch/x86/kernel/pci-nommu.c (renamed from arch/x86/kernel/pci-nommu_64.c)34
-rw-r--r--arch/x86/kernel/pci-swiotlb_64.c9
-rw-r--r--arch/x86/kernel/process.c44
-rw-r--r--arch/x86/kernel/process_32.c50
-rw-r--r--arch/x86/kernel/process_64.c74
-rw-r--r--arch/x86/kernel/setup64.c4
-rw-r--r--arch/x86/kernel/setup_32.c4
-rw-r--r--arch/x86/kernel/setup_64.c9
-rw-r--r--arch/x86/kernel/smpboot.c29
-rw-r--r--arch/x86/kernel/traps_32.c35
-rw-r--r--arch/x86/kernel/traps_64.c36
-rw-r--r--arch/x86/kernel/tsc_32.c23
-rw-r--r--arch/x86/kernel/tsc_64.c23
-rw-r--r--arch/x86/mach-visws/visws_apic.c2
-rw-r--r--arch/x86/mach-voyager/voyager_basic.c2
-rw-r--r--arch/x86/mach-voyager/voyager_cat.c2
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c2
-rw-r--r--arch/x86/mach-voyager/voyager_thread.c2
-rw-r--r--arch/x86/math-emu/fpu_entry.c4
-rw-r--r--arch/x86/math-emu/fpu_system.h26
-rw-r--r--arch/x86/math-emu/reg_ld_str.c4
-rw-r--r--arch/x86/mm/discontig_32.c6
-rw-r--r--arch/x86/mm/init_32.c1
-rw-r--r--arch/x86/mm/init_64.c3
-rw-r--r--arch/x86/mm/ioremap.c5
-rw-r--r--arch/x86/mm/k8topology_64.c2
-rw-r--r--arch/x86/mm/numa_64.c16
-rw-r--r--arch/x86/mm/pgtable_32.c4
-rw-r--r--arch/x86/mm/srat_64.c32
-rw-r--r--arch/x86/vdso/Makefile3
-rw-r--r--arch/x86/video/fbdev.c1
-rw-r--r--include/asm-x86/boot.h8
-rw-r--r--include/asm-x86/dma-mapping.h238
-rw-r--r--include/asm-x86/dma-mapping_32.h187
-rw-r--r--include/asm-x86/dma-mapping_64.h202
-rw-r--r--include/asm-x86/e820_32.h2
-rw-r--r--include/asm-x86/genapic_32.h1
-rw-r--r--include/asm-x86/i387.h37
-rw-r--r--include/asm-x86/numa_64.h3
-rw-r--r--include/asm-x86/pci_64.h1
-rw-r--r--include/asm-x86/processor.h16
-rw-r--r--include/asm-x86/scatterlist.h2
-rw-r--r--include/asm-x86/thread_info.h9
-rw-r--r--include/asm-x86/thread_info_32.h2
-rw-r--r--include/asm-x86/thread_info_64.h6
-rw-r--r--include/asm-x86/tsc.h1
-rw-r--r--include/linux/efi.h7
-rw-r--r--include/linux/irqflags.h6
-rw-r--r--include/linux/prctl.h6
-rw-r--r--kernel/fork.c35
-rw-r--r--kernel/sys.c13
-rw-r--r--kernel/time/timekeeping.c2
107 files changed, 1549 insertions, 1089 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index af0e9393bf68..309c47b91598 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -282,6 +282,13 @@ Why: Not used in-tree. The current out-of-tree users used it to
282 out-of-tree driver. 282 out-of-tree driver.
283Who: Thomas Gleixner <tglx@linutronix.de> 283Who: Thomas Gleixner <tglx@linutronix.de>
284 284
285----------------------------
286
287What: usedac i386 kernel parameter
288When: 2.6.27
289Why: replaced by allowdac and no dac combination
290Who: Glauber Costa <gcosta@redhat.com>
291
285--------------------------- 292---------------------------
286 293
287What: /sys/o2cb symlink 294What: /sys/o2cb symlink
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 4b0f1ae31a4c..f4839606988b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1280,8 +1280,16 @@ and is between 256 and 4096 characters. It is defined in the file
1280 noexec [IA-64] 1280 noexec [IA-64]
1281 1281
1282 noexec [X86-32,X86-64] 1282 noexec [X86-32,X86-64]
1283 On X86-32 available only on PAE configured kernels.
1283 noexec=on: enable non-executable mappings (default) 1284 noexec=on: enable non-executable mappings (default)
1284 noexec=off: disable nn-executable mappings 1285 noexec=off: disable non-executable mappings
1286
1287 noexec32 [X86-64]
1288 This affects only 32-bit executables.
1289 noexec32=on: enable non-executable mappings (default)
1290 read doesn't imply executable mappings
1291 noexec32=off: disable non-executable mappings
1292 read implies executable mappings
1285 1293
1286 nofxsr [BUGS=X86-32] Disables x86 floating point extended 1294 nofxsr [BUGS=X86-32] Disables x86 floating point extended
1287 register save and restore. The kernel will only save 1295 register save and restore. The kernel will only save
diff --git a/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c b/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c
new file mode 100644
index 000000000000..f8e8e95e81fd
--- /dev/null
+++ b/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c
@@ -0,0 +1,96 @@
1/*
2 * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
3 *
4 * Tests if the control register is updated correctly
5 * at context switches
6 *
7 * Warning: this test will cause a very high load for a few seconds
8 *
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <unistd.h>
14#include <signal.h>
15#include <inttypes.h>
16#include <wait.h>
17
18
19#include <sys/prctl.h>
20#include <linux/prctl.h>
21
22/* Get/set the process' ability to use the timestamp counter instruction */
23#ifndef PR_GET_TSC
24#define PR_GET_TSC 25
25#define PR_SET_TSC 26
26# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
27# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
28#endif
29
30uint64_t rdtsc() {
31uint32_t lo, hi;
32/* We cannot use "=A", since this would use %rax on x86_64 */
33__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
34return (uint64_t)hi << 32 | lo;
35}
36
37void sigsegv_expect(int sig)
38{
39 /* */
40}
41
42void segvtask(void)
43{
44 if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0)
45 {
46 perror("prctl");
47 exit(0);
48 }
49 signal(SIGSEGV, sigsegv_expect);
50 alarm(10);
51 rdtsc();
52 fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n");
53 exit(0);
54}
55
56
57void sigsegv_fail(int sig)
58{
59 fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n");
60 exit(0);
61}
62
63void rdtsctask(void)
64{
65 if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0)
66 {
67 perror("prctl");
68 exit(0);
69 }
70 signal(SIGSEGV, sigsegv_fail);
71 alarm(10);
72 for(;;) rdtsc();
73}
74
75
76int main(int argc, char **argv)
77{
78 int n_tasks = 100, i;
79
80 fprintf(stderr, "[No further output means we're allright]\n");
81
82 for (i=0; i<n_tasks; i++)
83 if (fork() == 0)
84 {
85 if (i & 1)
86 segvtask();
87 else
88 rdtsctask();
89 }
90
91 for (i=0; i<n_tasks; i++)
92 wait(NULL);
93
94 exit(0);
95}
96
diff --git a/Documentation/prctl/disable-tsc-on-off-stress-test.c b/Documentation/prctl/disable-tsc-on-off-stress-test.c
new file mode 100644
index 000000000000..1fcd91445375
--- /dev/null
+++ b/Documentation/prctl/disable-tsc-on-off-stress-test.c
@@ -0,0 +1,95 @@
1/*
2 * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
3 *
4 * Tests if the control register is updated correctly
5 * when set with prctl()
6 *
7 * Warning: this test will cause a very high load for a few seconds
8 *
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <unistd.h>
14#include <signal.h>
15#include <inttypes.h>
16#include <wait.h>
17
18
19#include <sys/prctl.h>
20#include <linux/prctl.h>
21
22/* Get/set the process' ability to use the timestamp counter instruction */
23#ifndef PR_GET_TSC
24#define PR_GET_TSC 25
25#define PR_SET_TSC 26
26# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
27# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
28#endif
29
30/* snippet from wikipedia :-) */
31
32uint64_t rdtsc() {
33uint32_t lo, hi;
34/* We cannot use "=A", since this would use %rax on x86_64 */
35__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
36return (uint64_t)hi << 32 | lo;
37}
38
39int should_segv = 0;
40
41void sigsegv_cb(int sig)
42{
43 if (!should_segv)
44 {
45 fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n");
46 exit(0);
47 }
48 if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0)
49 {
50 perror("prctl");
51 exit(0);
52 }
53 should_segv = 0;
54
55 rdtsc();
56}
57
58void task(void)
59{
60 signal(SIGSEGV, sigsegv_cb);
61 alarm(10);
62 for(;;)
63 {
64 rdtsc();
65 if (should_segv)
66 {
67 fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n");
68 exit(0);
69 }
70 if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0)
71 {
72 perror("prctl");
73 exit(0);
74 }
75 should_segv = 1;
76 }
77}
78
79
80int main(int argc, char **argv)
81{
82 int n_tasks = 100, i;
83
84 fprintf(stderr, "[No further output means we're allright]\n");
85
86 for (i=0; i<n_tasks; i++)
87 if (fork() == 0)
88 task();
89
90 for (i=0; i<n_tasks; i++)
91 wait(NULL);
92
93 exit(0);
94}
95
diff --git a/Documentation/prctl/disable-tsc-test.c b/Documentation/prctl/disable-tsc-test.c
new file mode 100644
index 000000000000..843c81eac235
--- /dev/null
+++ b/Documentation/prctl/disable-tsc-test.c
@@ -0,0 +1,94 @@
1/*
2 * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
3 *
4 * Basic test to test behaviour of PR_GET_TSC and PR_SET_TSC
5 */
6
7#include <stdio.h>
8#include <stdlib.h>
9#include <unistd.h>
10#include <signal.h>
11#include <inttypes.h>
12
13
14#include <sys/prctl.h>
15#include <linux/prctl.h>
16
17/* Get/set the process' ability to use the timestamp counter instruction */
18#ifndef PR_GET_TSC
19#define PR_GET_TSC 25
20#define PR_SET_TSC 26
21# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
22# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
23#endif
24
25const char *tsc_names[] =
26{
27 [0] = "[not set]",
28 [PR_TSC_ENABLE] = "PR_TSC_ENABLE",
29 [PR_TSC_SIGSEGV] = "PR_TSC_SIGSEGV",
30};
31
32uint64_t rdtsc() {
33uint32_t lo, hi;
34/* We cannot use "=A", since this would use %rax on x86_64 */
35__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
36return (uint64_t)hi << 32 | lo;
37}
38
39void sigsegv_cb(int sig)
40{
41 int tsc_val = 0;
42
43 printf("[ SIG_SEGV ]\n");
44 printf("prctl(PR_GET_TSC, &tsc_val); ");
45 fflush(stdout);
46
47 if ( prctl(PR_GET_TSC, &tsc_val) == -1)
48 perror("prctl");
49
50 printf("tsc_val == %s\n", tsc_names[tsc_val]);
51 printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n");
52 fflush(stdout);
53 if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1)
54 perror("prctl");
55
56 printf("rdtsc() == ");
57}
58
59int main(int argc, char **argv)
60{
61 int tsc_val = 0;
62
63 signal(SIGSEGV, sigsegv_cb);
64
65 printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
66 printf("prctl(PR_GET_TSC, &tsc_val); ");
67 fflush(stdout);
68
69 if ( prctl(PR_GET_TSC, &tsc_val) == -1)
70 perror("prctl");
71
72 printf("tsc_val == %s\n", tsc_names[tsc_val]);
73 printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
74 printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n");
75 fflush(stdout);
76
77 if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1)
78 perror("prctl");
79
80 printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
81 printf("prctl(PR_SET_TSC, PR_TSC_SIGSEGV)\n");
82 fflush(stdout);
83
84 if ( prctl(PR_SET_TSC, PR_TSC_SIGSEGV) == -1)
85 perror("prctl");
86
87 printf("rdtsc() == ");
88 fflush(stdout);
89 printf("%llu\n", (unsigned long long)rdtsc());
90 fflush(stdout);
91
92 exit(EXIT_SUCCESS);
93}
94
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2a59dbb28248..07cf77113565 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -903,6 +903,15 @@ config X86_64_ACPI_NUMA
903 help 903 help
904 Enable ACPI SRAT based node topology detection. 904 Enable ACPI SRAT based node topology detection.
905 905
906# Some NUMA nodes have memory ranges that span
907# other nodes. Even though a pfn is valid and
908# between a node's start and end pfns, it may not
909# reside on that node. See memmap_init_zone()
910# for details.
911config NODES_SPAN_OTHER_NODES
912 def_bool y
913 depends on X86_64_ACPI_NUMA
914
906config NUMA_EMU 915config NUMA_EMU
907 bool "NUMA emulation" 916 bool "NUMA emulation"
908 depends on X86_64 && NUMA 917 depends on X86_64 && NUMA
diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c
index 31348d054fca..90943f83e84d 100644
--- a/arch/x86/boot/a20.c
+++ b/arch/x86/boot/a20.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/a20.c
13 *
14 * Enable A20 gate (return -1 on failure) 12 * Enable A20 gate (return -1 on failure)
15 */ 13 */
16 14
diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c
index c117c7fb859c..7aa6033001f9 100644
--- a/arch/x86/boot/apm.c
+++ b/arch/x86/boot/apm.c
@@ -12,8 +12,6 @@
12 * ----------------------------------------------------------------------- */ 12 * ----------------------------------------------------------------------- */
13 13
14/* 14/*
15 * arch/i386/boot/apm.c
16 *
17 * Get APM BIOS information 15 * Get APM BIOS information
18 */ 16 */
19 17
diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h
index 8dcc8dc7db88..878e4b9940d9 100644
--- a/arch/x86/boot/bitops.h
+++ b/arch/x86/boot/bitops.h
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/bitops.h
13 *
14 * Very simple bitops for the boot code. 12 * Very simple bitops for the boot code.
15 */ 13 */
16 14
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 09578070bfba..a34b9982c7cb 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/boot.h
13 *
14 * Header file for the real-mode kernel code 12 * Header file for the real-mode kernel code
15 */ 13 */
16 14
diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c
index 680408a0f463..a1d35634bce0 100644
--- a/arch/x86/boot/cmdline.c
+++ b/arch/x86/boot/cmdline.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/cmdline.c
13 *
14 * Simple command-line parser for early boot. 12 * Simple command-line parser for early boot.
15 */ 13 */
16 14
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 036e635f18a3..ba7736cf2ec7 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -130,7 +130,7 @@ relocated:
130/* 130/*
131 * Setup the stack for the decompressor 131 * Setup the stack for the decompressor
132 */ 132 */
133 leal stack_end(%ebx), %esp 133 leal boot_stack_end(%ebx), %esp
134 134
135/* 135/*
136 * Do the decompression, and jump to the new kernel.. 136 * Do the decompression, and jump to the new kernel..
@@ -142,8 +142,8 @@ relocated:
142 pushl %eax # input_len 142 pushl %eax # input_len
143 leal input_data(%ebx), %eax 143 leal input_data(%ebx), %eax
144 pushl %eax # input_data 144 pushl %eax # input_data
145 leal _end(%ebx), %eax 145 leal boot_heap(%ebx), %eax
146 pushl %eax # end of the image as third argument 146 pushl %eax # heap area as third argument
147 pushl %esi # real mode pointer as second arg 147 pushl %esi # real mode pointer as second arg
148 call decompress_kernel 148 call decompress_kernel
149 addl $20, %esp 149 addl $20, %esp
@@ -181,7 +181,10 @@ relocated:
181 jmp *%ebp 181 jmp *%ebp
182 182
183.bss 183.bss
184/* Stack and heap for uncompression */
184.balign 4 185.balign 4
185stack: 186boot_heap:
186 .fill 4096, 1, 0 187 .fill BOOT_HEAP_SIZE, 1, 0
187stack_end: 188boot_stack:
189 .fill BOOT_STACK_SIZE, 1, 0
190boot_stack_end:
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index e8657b98c902..d8819efac81d 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -28,6 +28,7 @@
28#include <asm/segment.h> 28#include <asm/segment.h>
29#include <asm/pgtable.h> 29#include <asm/pgtable.h>
30#include <asm/page.h> 30#include <asm/page.h>
31#include <asm/boot.h>
31#include <asm/msr.h> 32#include <asm/msr.h>
32#include <asm/asm-offsets.h> 33#include <asm/asm-offsets.h>
33 34
@@ -62,7 +63,7 @@ startup_32:
62 subl $1b, %ebp 63 subl $1b, %ebp
63 64
64/* setup a stack and make sure cpu supports long mode. */ 65/* setup a stack and make sure cpu supports long mode. */
65 movl $user_stack_end, %eax 66 movl $boot_stack_end, %eax
66 addl %ebp, %eax 67 addl %ebp, %eax
67 movl %eax, %esp 68 movl %eax, %esp
68 69
@@ -243,9 +244,9 @@ ENTRY(startup_64)
243/* Copy the compressed kernel to the end of our buffer 244/* Copy the compressed kernel to the end of our buffer
244 * where decompression in place becomes safe. 245 * where decompression in place becomes safe.
245 */ 246 */
246 leaq _end(%rip), %r8 247 leaq _end_before_pgt(%rip), %r8
247 leaq _end(%rbx), %r9 248 leaq _end_before_pgt(%rbx), %r9
248 movq $_end /* - $startup_32 */, %rcx 249 movq $_end_before_pgt /* - $startup_32 */, %rcx
2491: subq $8, %r8 2501: subq $8, %r8
250 subq $8, %r9 251 subq $8, %r9
251 movq 0(%r8), %rax 252 movq 0(%r8), %rax
@@ -267,14 +268,14 @@ relocated:
267 */ 268 */
268 xorq %rax, %rax 269 xorq %rax, %rax
269 leaq _edata(%rbx), %rdi 270 leaq _edata(%rbx), %rdi
270 leaq _end(%rbx), %rcx 271 leaq _end_before_pgt(%rbx), %rcx
271 subq %rdi, %rcx 272 subq %rdi, %rcx
272 cld 273 cld
273 rep 274 rep
274 stosb 275 stosb
275 276
276 /* Setup the stack */ 277 /* Setup the stack */
277 leaq user_stack_end(%rip), %rsp 278 leaq boot_stack_end(%rip), %rsp
278 279
279 /* zero EFLAGS after setting rsp */ 280 /* zero EFLAGS after setting rsp */
280 pushq $0 281 pushq $0
@@ -285,7 +286,7 @@ relocated:
285 */ 286 */
286 pushq %rsi # Save the real mode argument 287 pushq %rsi # Save the real mode argument
287 movq %rsi, %rdi # real mode address 288 movq %rsi, %rdi # real mode address
288 leaq _heap(%rip), %rsi # _heap 289 leaq boot_heap(%rip), %rsi # malloc area for uncompression
289 leaq input_data(%rip), %rdx # input_data 290 leaq input_data(%rip), %rdx # input_data
290 movl input_len(%rip), %eax 291 movl input_len(%rip), %eax
291 movq %rax, %rcx # input_len 292 movq %rax, %rcx # input_len
@@ -310,9 +311,12 @@ gdt:
310 .quad 0x0080890000000000 /* TS descriptor */ 311 .quad 0x0080890000000000 /* TS descriptor */
311 .quad 0x0000000000000000 /* TS continued */ 312 .quad 0x0000000000000000 /* TS continued */
312gdt_end: 313gdt_end:
313 .bss 314
314/* Stack for uncompression */ 315.bss
315 .balign 4 316/* Stack and heap for uncompression */
316user_stack: 317.balign 4
317 .fill 4096,4,0 318boot_heap:
318user_stack_end: 319 .fill BOOT_HEAP_SIZE, 1, 0
320boot_stack:
321 .fill BOOT_STACK_SIZE, 1, 0
322boot_stack_end:
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index dad4e699f5a3..90456cee47c3 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -217,12 +217,6 @@ static void putstr(const char *);
217static memptr free_mem_ptr; 217static memptr free_mem_ptr;
218static memptr free_mem_end_ptr; 218static memptr free_mem_end_ptr;
219 219
220#ifdef CONFIG_X86_64
221#define HEAP_SIZE 0x7000
222#else
223#define HEAP_SIZE 0x4000
224#endif
225
226static char *vidmem; 220static char *vidmem;
227static int vidport; 221static int vidport;
228static int lines, cols; 222static int lines, cols;
@@ -449,7 +443,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
449 443
450 window = output; /* Output buffer (Normally at 1M) */ 444 window = output; /* Output buffer (Normally at 1M) */
451 free_mem_ptr = heap; /* Heap */ 445 free_mem_ptr = heap; /* Heap */
452 free_mem_end_ptr = heap + HEAP_SIZE; 446 free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
453 inbuf = input_data; /* Input buffer */ 447 inbuf = input_data; /* Input buffer */
454 insize = input_len; 448 insize = input_len;
455 inptr = 0; 449 inptr = 0;
diff --git a/arch/x86/boot/compressed/vmlinux_64.lds b/arch/x86/boot/compressed/vmlinux_64.lds
index 7e5c7209f6cc..bef1ac891bce 100644
--- a/arch/x86/boot/compressed/vmlinux_64.lds
+++ b/arch/x86/boot/compressed/vmlinux_64.lds
@@ -39,10 +39,10 @@ SECTIONS
39 *(.bss.*) 39 *(.bss.*)
40 *(COMMON) 40 *(COMMON)
41 . = ALIGN(8); 41 . = ALIGN(8);
42 _end = . ; 42 _end_before_pgt = . ;
43 . = ALIGN(4096); 43 . = ALIGN(4096);
44 pgtable = . ; 44 pgtable = . ;
45 . = . + 4096 * 6; 45 . = . + 4096 * 6;
46 _heap = .; 46 _ebss = .;
47 } 47 }
48} 48}
diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S
index ef127e56a3cf..ef50c84e8b4b 100644
--- a/arch/x86/boot/copy.S
+++ b/arch/x86/boot/copy.S
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/copy.S
13 *
14 * Memory copy routines 12 * Memory copy routines
15 */ 13 */
16 14
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index 2462c88689ed..7804389ee005 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/cpucheck.c
13 *
14 * Check for obligatory CPU features and abort if the features are not 12 * Check for obligatory CPU features and abort if the features are not
15 * present. This code should be compilable as 16-, 32- or 64-bit 13 * present. This code should be compilable as 16-, 32- or 64-bit
16 * code, so be very careful with types and inline assembly. 14 * code, so be very careful with types and inline assembly.
diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c
index 8721dc46a0b6..d84a48ece785 100644
--- a/arch/x86/boot/edd.c
+++ b/arch/x86/boot/edd.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/edd.c
13 *
14 * Get EDD BIOS disk information 12 * Get EDD BIOS disk information
15 */ 13 */
16 14
diff --git a/arch/x86/boot/install.sh b/arch/x86/boot/install.sh
index 88d77761d01b..8d60ee15dfd9 100644
--- a/arch/x86/boot/install.sh
+++ b/arch/x86/boot/install.sh
@@ -1,7 +1,5 @@
1#!/bin/sh 1#!/bin/sh
2# 2#
3# arch/i386/boot/install.sh
4#
5# This file is subject to the terms and conditions of the GNU General Public 3# This file is subject to the terms and conditions of the GNU General Public
6# License. See the file "COPYING" in the main directory of this archive 4# License. See the file "COPYING" in the main directory of this archive
7# for more details. 5# for more details.
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 7828da5cfd07..77569a4a3be1 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/main.c
13 *
14 * Main module for the real-mode kernel code 12 * Main module for the real-mode kernel code
15 */ 13 */
16 14
diff --git a/arch/x86/boot/mca.c b/arch/x86/boot/mca.c
index 68222f2d4b67..911eaae5d696 100644
--- a/arch/x86/boot/mca.c
+++ b/arch/x86/boot/mca.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/mca.c
13 *
14 * Get the MCA system description table 12 * Get the MCA system description table
15 */ 13 */
16 14
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index e77d89f9e8aa..acad32eb4290 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/memory.c
13 *
14 * Memory detection code 12 * Memory detection code
15 */ 13 */
16 14
diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c
index a93cb8bded4d..328956fdb59e 100644
--- a/arch/x86/boot/pm.c
+++ b/arch/x86/boot/pm.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/pm.c
13 *
14 * Prepare the machine for transition to protected mode. 12 * Prepare the machine for transition to protected mode.
15 */ 13 */
16 14
diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S
index f5402d51f7c3..ab049d40a884 100644
--- a/arch/x86/boot/pmjump.S
+++ b/arch/x86/boot/pmjump.S
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/pmjump.S
13 *
14 * The actual transition into protected mode 12 * The actual transition into protected mode
15 */ 13 */
16 14
diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c
index 7e7e890699be..c1d00c0274c4 100644
--- a/arch/x86/boot/printf.c
+++ b/arch/x86/boot/printf.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/printf.c
13 *
14 * Oh, it's a waste of space, but oh-so-yummy for debugging. This 12 * Oh, it's a waste of space, but oh-so-yummy for debugging. This
15 * version of printf() does not include 64-bit support. "Live with 13 * version of printf() does not include 64-bit support. "Live with
16 * it." 14 * it."
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 481a22097781..f94b7a0c2abf 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/string.c
13 *
14 * Very basic string functions 12 * Very basic string functions
15 */ 13 */
16 14
diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c
index f3f14bd26371..0be77b39328a 100644
--- a/arch/x86/boot/tty.c
+++ b/arch/x86/boot/tty.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/tty.c
13 *
14 * Very simple screen I/O 12 * Very simple screen I/O
15 * XXX: Probably should add very simple serial I/O? 13 * XXX: Probably should add very simple serial I/O?
16 */ 14 */
diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c
index c61462f7d9a7..2723d9b5ce43 100644
--- a/arch/x86/boot/version.c
+++ b/arch/x86/boot/version.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/version.c
13 *
14 * Kernel version string 12 * Kernel version string
15 */ 13 */
16 14
diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c
index 39e247e96172..49f26aaaebc8 100644
--- a/arch/x86/boot/video-bios.c
+++ b/arch/x86/boot/video-bios.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/video-bios.c
13 *
14 * Standard video BIOS modes 12 * Standard video BIOS modes
15 * 13 *
16 * We have two options for this; silent and scanned. 14 * We have two options for this; silent and scanned.
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
index 5d5a3f6e8b5c..401ad998ad08 100644
--- a/arch/x86/boot/video-vesa.c
+++ b/arch/x86/boot/video-vesa.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/video-vesa.c
13 *
14 * VESA text modes 12 * VESA text modes
15 */ 13 */
16 14
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index 330d6589a2ad..40ecb8d7688c 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/video-vga.c
13 *
14 * Common all-VGA modes 12 * Common all-VGA modes
15 */ 13 */
16 14
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index c1c47ba069ef..83598b23093a 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/video.c
13 *
14 * Select video mode 12 * Select video mode
15 */ 13 */
16 14
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h
index d69347f79e8e..ee63f5d14461 100644
--- a/arch/x86/boot/video.h
+++ b/arch/x86/boot/video.h
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/video.h
13 *
14 * Header file for the real-mode video probing code 12 * Header file for the real-mode video probing code
15 */ 13 */
16 14
diff --git a/arch/x86/boot/voyager.c b/arch/x86/boot/voyager.c
index 6499e3239b41..433909d61e5c 100644
--- a/arch/x86/boot/voyager.c
+++ b/arch/x86/boot/voyager.c
@@ -9,8 +9,6 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/voyager.c
13 *
14 * Get the Voyager config information 12 * Get the Voyager config information
15 */ 13 */
16 14
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index c3920ea8ac56..90e092d0af0c 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -22,13 +22,14 @@ obj-y += setup_$(BITS).o i8259_$(BITS).o setup.o
22obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 22obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
23obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 23obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
24obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o 24obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o
25obj-y += pci-dma_$(BITS).o bootflag.o e820_$(BITS).o 25obj-y += bootflag.o e820_$(BITS).o
26obj-y += quirks.o i8237.o topology.o kdebugfs.o 26obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
27obj-y += alternative.o i8253.o 27obj-y += alternative.o i8253.o pci-nommu.o
28obj-$(CONFIG_X86_64) += pci-nommu_64.o bugs_64.o 28obj-$(CONFIG_X86_64) += bugs_64.o
29obj-y += tsc_$(BITS).o io_delay.o rtc.o 29obj-y += tsc_$(BITS).o io_delay.o rtc.o
30 30
31obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 31obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
32obj-y += process.o
32obj-y += i387.o 33obj-y += i387.o
33obj-y += ptrace.o 34obj-y += ptrace.o
34obj-y += ds.o 35obj-y += ds.o
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 8ca3557a6d59..9366fb68d8d8 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * arch/i386/kernel/acpi/cstate.c
3 *
4 * Copyright (C) 2005 Intel Corporation 2 * Copyright (C) 2005 Intel Corporation
5 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> 3 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
6 * - Added _PDC for SMP C-states on Intel CPUs 4 * - Added _PDC for SMP C-states on Intel CPUs
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
index 324eb0cab19c..de2d2e4ebad9 100644
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * arch/i386/kernel/acpi/processor.c
3 *
4 * Copyright (C) 2005 Intel Corporation 2 * Copyright (C) 2005 Intel Corporation
5 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> 3 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
6 * - Added _PDC for platforms with Intel CPUs 4 * - Added _PDC for platforms with Intel CPUs
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 14791ec55cfd..199e4e05e5dc 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -289,8 +289,8 @@ static int __init cpufreq_p4_init(void)
289 if (c->x86_vendor != X86_VENDOR_INTEL) 289 if (c->x86_vendor != X86_VENDOR_INTEL)
290 return -ENODEV; 290 return -ENODEV;
291 291
292 if (!test_bit(X86_FEATURE_ACPI, c->x86_capability) || 292 if (!test_cpu_cap(c, X86_FEATURE_ACPI) ||
293 !test_bit(X86_FEATURE_ACC, c->x86_capability)) 293 !test_cpu_cap(c, X86_FEATURE_ACC))
294 return -ENODEV; 294 return -ENODEV;
295 295
296 ret = cpufreq_register_driver(&p4clockmod_driver); 296 ret = cpufreq_register_driver(&p4clockmod_driver);
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 9b7e01daa1ca..1f4cc48c14c6 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -1,5 +1,4 @@
1/* 1/*
2 * linux/arch/i386/kernel/cpu/mcheck/therm_throt.c
3 * 2 *
4 * Thermal throttle event support code (such as syslog messaging and rate 3 * Thermal throttle event support code (such as syslog messaging and rate
5 * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). 4 * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 0240cd778365..ed733e7cf4e6 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -475,7 +475,7 @@ int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
475/* 475/*
476 * Find the highest page frame number we have available 476 * Find the highest page frame number we have available
477 */ 477 */
478void __init find_max_pfn(void) 478void __init propagate_e820_map(void)
479{ 479{
480 int i; 480 int i;
481 481
@@ -704,7 +704,7 @@ static int __init parse_memmap(char *arg)
704 * size before original memory map is 704 * size before original memory map is
705 * reset. 705 * reset.
706 */ 706 */
707 find_max_pfn(); 707 propagate_e820_map();
708 saved_max_pfn = max_pfn; 708 saved_max_pfn = max_pfn;
709#endif 709#endif
710 e820.nr_map = 0; 710 e820.nr_map = 0;
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 7f6c0c85c8f6..cbd42e51cb08 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -96,7 +96,7 @@ void __init early_res_to_bootmem(void)
96} 96}
97 97
98/* Check for already reserved areas */ 98/* Check for already reserved areas */
99static inline int 99static inline int __init
100bad_addr(unsigned long *addrp, unsigned long size, unsigned long align) 100bad_addr(unsigned long *addrp, unsigned long size, unsigned long align)
101{ 101{
102 int i; 102 int i;
@@ -116,7 +116,7 @@ again:
116} 116}
117 117
118/* Check for already reserved areas */ 118/* Check for already reserved areas */
119static inline int 119static inline int __init
120bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align) 120bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
121{ 121{
122 int i; 122 int i;
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 759e02bec070..77d424cf68b3 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -383,6 +383,7 @@ static void __init runtime_code_page_mkexec(void)
383{ 383{
384 efi_memory_desc_t *md; 384 efi_memory_desc_t *md;
385 void *p; 385 void *p;
386 u64 addr, npages;
386 387
387 /* Make EFI runtime service code area executable */ 388 /* Make EFI runtime service code area executable */
388 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 389 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
@@ -391,7 +392,10 @@ static void __init runtime_code_page_mkexec(void)
391 if (md->type != EFI_RUNTIME_SERVICES_CODE) 392 if (md->type != EFI_RUNTIME_SERVICES_CODE)
392 continue; 393 continue;
393 394
394 set_memory_x(md->virt_addr, md->num_pages); 395 addr = md->virt_addr;
396 npages = md->num_pages;
397 memrange_efi_to_native(&addr, &npages);
398 set_memory_x(addr, npages);
395 } 399 }
396} 400}
397 401
@@ -408,7 +412,7 @@ void __init efi_enter_virtual_mode(void)
408 efi_memory_desc_t *md; 412 efi_memory_desc_t *md;
409 efi_status_t status; 413 efi_status_t status;
410 unsigned long size; 414 unsigned long size;
411 u64 end, systab; 415 u64 end, systab, addr, npages;
412 void *p, *va; 416 void *p, *va;
413 417
414 efi.systab = NULL; 418 efi.systab = NULL;
@@ -420,7 +424,7 @@ void __init efi_enter_virtual_mode(void)
420 size = md->num_pages << EFI_PAGE_SHIFT; 424 size = md->num_pages << EFI_PAGE_SHIFT;
421 end = md->phys_addr + size; 425 end = md->phys_addr + size;
422 426
423 if ((end >> PAGE_SHIFT) <= max_pfn_mapped) 427 if (PFN_UP(end) <= max_pfn_mapped)
424 va = __va(md->phys_addr); 428 va = __va(md->phys_addr);
425 else 429 else
426 va = efi_ioremap(md->phys_addr, size); 430 va = efi_ioremap(md->phys_addr, size);
@@ -433,8 +437,12 @@ void __init efi_enter_virtual_mode(void)
433 continue; 437 continue;
434 } 438 }
435 439
436 if (!(md->attribute & EFI_MEMORY_WB)) 440 if (!(md->attribute & EFI_MEMORY_WB)) {
437 set_memory_uc(md->virt_addr, md->num_pages); 441 addr = md->virt_addr;
442 npages = md->num_pages;
443 memrange_efi_to_native(&addr, &npages);
444 set_memory_uc(addr, npages);
445 }
438 446
439 systab = (u64) (unsigned long) efi_phys.systab; 447 systab = (u64) (unsigned long) efi_phys.systab;
440 if (md->phys_addr <= systab && systab < end) { 448 if (md->phys_addr <= systab && systab < end) {
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index d143a1e76b30..d0060fdcccac 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -105,14 +105,14 @@ void __init efi_reserve_bootmem(void)
105 105
106void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size) 106void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size)
107{ 107{
108 static unsigned pages_mapped; 108 static unsigned pages_mapped __initdata;
109 unsigned i, pages; 109 unsigned i, pages;
110 unsigned long offset;
110 111
111 /* phys_addr and size must be page aligned */ 112 pages = PFN_UP(phys_addr + size) - PFN_DOWN(phys_addr);
112 if ((phys_addr & ~PAGE_MASK) || (size & ~PAGE_MASK)) 113 offset = phys_addr & ~PAGE_MASK;
113 return NULL; 114 phys_addr &= PAGE_MASK;
114 115
115 pages = size >> PAGE_SHIFT;
116 if (pages_mapped + pages > MAX_EFI_IO_PAGES) 116 if (pages_mapped + pages > MAX_EFI_IO_PAGES)
117 return NULL; 117 return NULL;
118 118
@@ -124,5 +124,5 @@ void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size)
124 } 124 }
125 125
126 return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \ 126 return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \
127 (pages_mapped - pages)); 127 (pages_mapped - pages)) + offset;
128} 128}
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 9ba49a26dff8..f0f8934fc303 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1,5 +1,4 @@
1/* 1/*
2 * linux/arch/i386/entry.S
3 * 2 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1991, 1992 Linus Torvalds
5 */ 4 */
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 5d77c9cd8e15..ebf13908a743 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -61,26 +61,31 @@ int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
61 val = (1UL << UVH_IPI_INT_SEND_SHFT) | 61 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
62 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | 62 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
63 (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | 63 (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
64 (6 << UVH_IPI_INT_DELIVERY_MODE_SHFT); 64 APIC_DM_INIT;
65 uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
66 mdelay(10);
67
68 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
69 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
70 (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
71 APIC_DM_STARTUP;
65 uv_write_global_mmr64(nasid, UVH_IPI_INT, val); 72 uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
66 return 0; 73 return 0;
67} 74}
68 75
69static void uv_send_IPI_one(int cpu, int vector) 76static void uv_send_IPI_one(int cpu, int vector)
70{ 77{
71 unsigned long val, apicid; 78 unsigned long val, apicid, lapicid;
72 int nasid; 79 int nasid;
73 80
74 apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */ 81 apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */
82 lapicid = apicid & 0x3f; /* ZZZ macro needed */
75 nasid = uv_apicid_to_nasid(apicid); 83 nasid = uv_apicid_to_nasid(apicid);
76 val = 84 val =
77 (1UL << UVH_IPI_INT_SEND_SHFT) | (apicid << 85 (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid <<
78 UVH_IPI_INT_APIC_ID_SHFT) | 86 UVH_IPI_INT_APIC_ID_SHFT) |
79 (vector << UVH_IPI_INT_VECTOR_SHFT); 87 (vector << UVH_IPI_INT_VECTOR_SHFT);
80 uv_write_global_mmr64(nasid, UVH_IPI_INT, val); 88 uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
81 printk(KERN_DEBUG
82 "UV: IPI to cpu %d, apicid 0x%lx, vec %d, nasid%d, val 0x%lx\n",
83 cpu, apicid, vector, nasid, val);
84} 89}
85 90
86static void uv_send_IPI_mask(cpumask_t mask, int vector) 91static void uv_send_IPI_mask(cpumask_t mask, int vector)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index d6d54faa84df..993c76773256 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -146,6 +146,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
146 146
147 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); 147 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
148 148
149#ifdef CONFIG_BLK_DEV_INITRD
149 /* Reserve INITRD */ 150 /* Reserve INITRD */
150 if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { 151 if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
151 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; 152 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
@@ -153,6 +154,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
153 unsigned long ramdisk_end = ramdisk_image + ramdisk_size; 154 unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
154 reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); 155 reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
155 } 156 }
157#endif
156 158
157 reserve_ebda_region(); 159 reserve_ebda_region();
158 160
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 826988a6e964..90f038af3adc 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -1,5 +1,4 @@
1/* 1/*
2 * linux/arch/i386/kernel/head.S -- the 32-bit startup code.
3 * 2 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1991, 1992 Linus Torvalds
5 * 4 *
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 8f8102d967b3..db6839b53195 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -35,17 +35,18 @@
35#endif 35#endif
36 36
37static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; 37static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
38unsigned int xstate_size;
39static struct i387_fxsave_struct fx_scratch __cpuinitdata;
38 40
39void mxcsr_feature_mask_init(void) 41void __cpuinit mxcsr_feature_mask_init(void)
40{ 42{
41 unsigned long mask = 0; 43 unsigned long mask = 0;
42 44
43 clts(); 45 clts();
44 if (cpu_has_fxsr) { 46 if (cpu_has_fxsr) {
45 memset(&current->thread.i387.fxsave, 0, 47 memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
46 sizeof(struct i387_fxsave_struct)); 48 asm volatile("fxsave %0" : : "m" (fx_scratch));
47 asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave)); 49 mask = fx_scratch.mxcsr_mask;
48 mask = current->thread.i387.fxsave.mxcsr_mask;
49 if (mask == 0) 50 if (mask == 0)
50 mask = 0x0000ffbf; 51 mask = 0x0000ffbf;
51 } 52 }
@@ -53,6 +54,16 @@ void mxcsr_feature_mask_init(void)
53 stts(); 54 stts();
54} 55}
55 56
57void __init init_thread_xstate(void)
58{
59 if (cpu_has_fxsr)
60 xstate_size = sizeof(struct i387_fxsave_struct);
61#ifdef CONFIG_X86_32
62 else
63 xstate_size = sizeof(struct i387_fsave_struct);
64#endif
65}
66
56#ifdef CONFIG_X86_64 67#ifdef CONFIG_X86_64
57/* 68/*
58 * Called at bootup to set up the initial FPU state that is later cloned 69 * Called at bootup to set up the initial FPU state that is later cloned
@@ -61,10 +72,6 @@ void mxcsr_feature_mask_init(void)
61void __cpuinit fpu_init(void) 72void __cpuinit fpu_init(void)
62{ 73{
63 unsigned long oldcr0 = read_cr0(); 74 unsigned long oldcr0 = read_cr0();
64 extern void __bad_fxsave_alignment(void);
65
66 if (offsetof(struct task_struct, thread.i387.fxsave) & 15)
67 __bad_fxsave_alignment();
68 75
69 set_in_cr4(X86_CR4_OSFXSR); 76 set_in_cr4(X86_CR4_OSFXSR);
70 set_in_cr4(X86_CR4_OSXMMEXCPT); 77 set_in_cr4(X86_CR4_OSXMMEXCPT);
@@ -84,32 +91,44 @@ void __cpuinit fpu_init(void)
84 * value at reset if we support XMM instructions and then 91 * value at reset if we support XMM instructions and then
85 * remeber the current task has used the FPU. 92 * remeber the current task has used the FPU.
86 */ 93 */
87void init_fpu(struct task_struct *tsk) 94int init_fpu(struct task_struct *tsk)
88{ 95{
89 if (tsk_used_math(tsk)) { 96 if (tsk_used_math(tsk)) {
90 if (tsk == current) 97 if (tsk == current)
91 unlazy_fpu(tsk); 98 unlazy_fpu(tsk);
92 return; 99 return 0;
100 }
101
102 /*
103 * Memory allocation at the first usage of the FPU and other state.
104 */
105 if (!tsk->thread.xstate) {
106 tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
107 GFP_KERNEL);
108 if (!tsk->thread.xstate)
109 return -ENOMEM;
93 } 110 }
94 111
95 if (cpu_has_fxsr) { 112 if (cpu_has_fxsr) {
96 memset(&tsk->thread.i387.fxsave, 0, 113 struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
97 sizeof(struct i387_fxsave_struct)); 114
98 tsk->thread.i387.fxsave.cwd = 0x37f; 115 memset(fx, 0, xstate_size);
116 fx->cwd = 0x37f;
99 if (cpu_has_xmm) 117 if (cpu_has_xmm)
100 tsk->thread.i387.fxsave.mxcsr = MXCSR_DEFAULT; 118 fx->mxcsr = MXCSR_DEFAULT;
101 } else { 119 } else {
102 memset(&tsk->thread.i387.fsave, 0, 120 struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
103 sizeof(struct i387_fsave_struct)); 121 memset(fp, 0, xstate_size);
104 tsk->thread.i387.fsave.cwd = 0xffff037fu; 122 fp->cwd = 0xffff037fu;
105 tsk->thread.i387.fsave.swd = 0xffff0000u; 123 fp->swd = 0xffff0000u;
106 tsk->thread.i387.fsave.twd = 0xffffffffu; 124 fp->twd = 0xffffffffu;
107 tsk->thread.i387.fsave.fos = 0xffff0000u; 125 fp->fos = 0xffff0000u;
108 } 126 }
109 /* 127 /*
110 * Only the device not available exception or ptrace can call init_fpu. 128 * Only the device not available exception or ptrace can call init_fpu.
111 */ 129 */
112 set_stopped_child_used_math(tsk); 130 set_stopped_child_used_math(tsk);
131 return 0;
113} 132}
114 133
115int fpregs_active(struct task_struct *target, const struct user_regset *regset) 134int fpregs_active(struct task_struct *target, const struct user_regset *regset)
@@ -126,13 +145,17 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
126 unsigned int pos, unsigned int count, 145 unsigned int pos, unsigned int count,
127 void *kbuf, void __user *ubuf) 146 void *kbuf, void __user *ubuf)
128{ 147{
148 int ret;
149
129 if (!cpu_has_fxsr) 150 if (!cpu_has_fxsr)
130 return -ENODEV; 151 return -ENODEV;
131 152
132 init_fpu(target); 153 ret = init_fpu(target);
154 if (ret)
155 return ret;
133 156
134 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 157 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
135 &target->thread.i387.fxsave, 0, -1); 158 &target->thread.xstate->fxsave, 0, -1);
136} 159}
137 160
138int xfpregs_set(struct task_struct *target, const struct user_regset *regset, 161int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
@@ -144,16 +167,19 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
144 if (!cpu_has_fxsr) 167 if (!cpu_has_fxsr)
145 return -ENODEV; 168 return -ENODEV;
146 169
147 init_fpu(target); 170 ret = init_fpu(target);
171 if (ret)
172 return ret;
173
148 set_stopped_child_used_math(target); 174 set_stopped_child_used_math(target);
149 175
150 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, 176 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
151 &target->thread.i387.fxsave, 0, -1); 177 &target->thread.xstate->fxsave, 0, -1);
152 178
153 /* 179 /*
154 * mxcsr reserved bits must be masked to zero for security reasons. 180 * mxcsr reserved bits must be masked to zero for security reasons.
155 */ 181 */
156 target->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; 182 target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
157 183
158 return ret; 184 return ret;
159} 185}
@@ -233,7 +259,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
233static void 259static void
234convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) 260convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
235{ 261{
236 struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; 262 struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave;
237 struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; 263 struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
238 struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; 264 struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
239 int i; 265 int i;
@@ -273,7 +299,7 @@ static void convert_to_fxsr(struct task_struct *tsk,
273 const struct user_i387_ia32_struct *env) 299 const struct user_i387_ia32_struct *env)
274 300
275{ 301{
276 struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; 302 struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave;
277 struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; 303 struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
278 struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; 304 struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
279 int i; 305 int i;
@@ -302,15 +328,19 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
302 void *kbuf, void __user *ubuf) 328 void *kbuf, void __user *ubuf)
303{ 329{
304 struct user_i387_ia32_struct env; 330 struct user_i387_ia32_struct env;
331 int ret;
305 332
306 if (!HAVE_HWFP) 333 if (!HAVE_HWFP)
307 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); 334 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
308 335
309 init_fpu(target); 336 ret = init_fpu(target);
337 if (ret)
338 return ret;
310 339
311 if (!cpu_has_fxsr) { 340 if (!cpu_has_fxsr) {
312 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 341 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
313 &target->thread.i387.fsave, 0, -1); 342 &target->thread.xstate->fsave, 0,
343 -1);
314 } 344 }
315 345
316 if (kbuf && pos == 0 && count == sizeof(env)) { 346 if (kbuf && pos == 0 && count == sizeof(env)) {
@@ -333,12 +363,15 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
333 if (!HAVE_HWFP) 363 if (!HAVE_HWFP)
334 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); 364 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
335 365
336 init_fpu(target); 366 ret = init_fpu(target);
367 if (ret)
368 return ret;
369
337 set_stopped_child_used_math(target); 370 set_stopped_child_used_math(target);
338 371
339 if (!cpu_has_fxsr) { 372 if (!cpu_has_fxsr) {
340 return user_regset_copyin(&pos, &count, &kbuf, &ubuf, 373 return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
341 &target->thread.i387.fsave, 0, -1); 374 &target->thread.xstate->fsave, 0, -1);
342 } 375 }
343 376
344 if (pos > 0 || count < sizeof(env)) 377 if (pos > 0 || count < sizeof(env))
@@ -358,11 +391,11 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
358static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) 391static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
359{ 392{
360 struct task_struct *tsk = current; 393 struct task_struct *tsk = current;
394 struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
361 395
362 unlazy_fpu(tsk); 396 unlazy_fpu(tsk);
363 tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd; 397 fp->status = fp->swd;
364 if (__copy_to_user(buf, &tsk->thread.i387.fsave, 398 if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
365 sizeof(struct i387_fsave_struct)))
366 return -1; 399 return -1;
367 return 1; 400 return 1;
368} 401}
@@ -370,6 +403,7 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
370static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) 403static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
371{ 404{
372 struct task_struct *tsk = current; 405 struct task_struct *tsk = current;
406 struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
373 struct user_i387_ia32_struct env; 407 struct user_i387_ia32_struct env;
374 int err = 0; 408 int err = 0;
375 409
@@ -379,12 +413,12 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
379 if (__copy_to_user(buf, &env, sizeof(env))) 413 if (__copy_to_user(buf, &env, sizeof(env)))
380 return -1; 414 return -1;
381 415
382 err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status); 416 err |= __put_user(fx->swd, &buf->status);
383 err |= __put_user(X86_FXSR_MAGIC, &buf->magic); 417 err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
384 if (err) 418 if (err)
385 return -1; 419 return -1;
386 420
387 if (__copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave, 421 if (__copy_to_user(&buf->_fxsr_env[0], fx,
388 sizeof(struct i387_fxsave_struct))) 422 sizeof(struct i387_fxsave_struct)))
389 return -1; 423 return -1;
390 return 1; 424 return 1;
@@ -417,7 +451,7 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
417 struct task_struct *tsk = current; 451 struct task_struct *tsk = current;
418 452
419 clear_fpu(tsk); 453 clear_fpu(tsk);
420 return __copy_from_user(&tsk->thread.i387.fsave, buf, 454 return __copy_from_user(&tsk->thread.xstate->fsave, buf,
421 sizeof(struct i387_fsave_struct)); 455 sizeof(struct i387_fsave_struct));
422} 456}
423 457
@@ -428,10 +462,10 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
428 int err; 462 int err;
429 463
430 clear_fpu(tsk); 464 clear_fpu(tsk);
431 err = __copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0], 465 err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0],
432 sizeof(struct i387_fxsave_struct)); 466 sizeof(struct i387_fxsave_struct));
433 /* mxcsr reserved bits must be masked to zero for security reasons */ 467 /* mxcsr reserved bits must be masked to zero for security reasons */
434 tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; 468 tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
435 if (err || __copy_from_user(&env, buf, sizeof(env))) 469 if (err || __copy_from_user(&env, buf, sizeof(env)))
436 return 1; 470 return 1;
437 convert_to_fxsr(tsk, &env); 471 convert_to_fxsr(tsk, &env);
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 24362ecf5f9a..f47f0eb886b8 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -46,11 +46,7 @@
46#include <asm/apicdef.h> 46#include <asm/apicdef.h>
47#include <asm/system.h> 47#include <asm/system.h>
48 48
49#ifdef CONFIG_X86_32 49#include <mach_ipi.h>
50# include <mach_ipi.h>
51#else
52# include <asm/mach_apic.h>
53#endif
54 50
55/* 51/*
56 * Put the error code here just in case the user cares: 52 * Put the error code here just in case the user cares:
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c
index 8421d0ac6f22..11b14bbaa61e 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi_32.c
@@ -321,7 +321,8 @@ EXPORT_SYMBOL(touch_nmi_watchdog);
321 321
322extern void die_nmi(struct pt_regs *, const char *msg); 322extern void die_nmi(struct pt_regs *, const char *msg);
323 323
324__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) 324notrace __kprobes int
325nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
325{ 326{
326 327
327 /* 328 /*
diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c
index 11f9130ac513..5a29ded994fa 100644
--- a/arch/x86/kernel/nmi_64.c
+++ b/arch/x86/kernel/nmi_64.c
@@ -313,7 +313,8 @@ void touch_nmi_watchdog(void)
313} 313}
314EXPORT_SYMBOL(touch_nmi_watchdog); 314EXPORT_SYMBOL(touch_nmi_watchdog);
315 315
316int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) 316notrace __kprobes int
317nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
317{ 318{
318 int sum; 319 int sum;
319 int touched = 0; 320 int touched = 0;
@@ -384,7 +385,8 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
384 385
385static unsigned ignore_nmis; 386static unsigned ignore_nmis;
386 387
387asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) 388asmlinkage notrace __kprobes void
389do_nmi(struct pt_regs *regs, long error_code)
388{ 390{
389 nmi_enter(); 391 nmi_enter();
390 add_pda(__nmi_count,1); 392 add_pda(__nmi_count,1);
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 1b5464c2434f..adb91e4b62da 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -470,10 +470,11 @@ error:
470 return 0; 470 return 0;
471} 471}
472 472
473static dma_addr_t calgary_map_single(struct device *dev, void *vaddr, 473static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr,
474 size_t size, int direction) 474 size_t size, int direction)
475{ 475{
476 dma_addr_t dma_handle = bad_dma_address; 476 dma_addr_t dma_handle = bad_dma_address;
477 void *vaddr = phys_to_virt(paddr);
477 unsigned long uaddr; 478 unsigned long uaddr;
478 unsigned int npages; 479 unsigned int npages;
479 struct iommu_table *tbl = find_iommu_table(dev); 480 struct iommu_table *tbl = find_iommu_table(dev);
diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma.c
index ada5a0604992..388b113a7d88 100644
--- a/arch/x86/kernel/pci-dma_64.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -1,61 +1,370 @@
1/* 1#include <linux/dma-mapping.h>
2 * Dynamic DMA mapping support.
3 */
4
5#include <linux/types.h>
6#include <linux/mm.h>
7#include <linux/string.h>
8#include <linux/pci.h>
9#include <linux/module.h>
10#include <linux/dmar.h> 2#include <linux/dmar.h>
11#include <asm/io.h> 3#include <linux/bootmem.h>
4#include <linux/pci.h>
5
6#include <asm/proto.h>
7#include <asm/dma.h>
12#include <asm/gart.h> 8#include <asm/gart.h>
13#include <asm/calgary.h> 9#include <asm/calgary.h>
14 10
15int iommu_merge __read_mostly = 0; 11int forbid_dac __read_mostly;
16 12EXPORT_SYMBOL(forbid_dac);
17dma_addr_t bad_dma_address __read_mostly;
18EXPORT_SYMBOL(bad_dma_address);
19 13
20/* This tells the BIO block layer to assume merging. Default to off 14const struct dma_mapping_ops *dma_ops;
21 because we cannot guarantee merging later. */ 15EXPORT_SYMBOL(dma_ops);
22int iommu_bio_merge __read_mostly = 0;
23EXPORT_SYMBOL(iommu_bio_merge);
24 16
25static int iommu_sac_force __read_mostly = 0; 17int iommu_sac_force __read_mostly = 0;
26 18
27int no_iommu __read_mostly;
28#ifdef CONFIG_IOMMU_DEBUG 19#ifdef CONFIG_IOMMU_DEBUG
29int panic_on_overflow __read_mostly = 1; 20int panic_on_overflow __read_mostly = 1;
30int force_iommu __read_mostly = 1; 21int force_iommu __read_mostly = 1;
31#else 22#else
32int panic_on_overflow __read_mostly = 0; 23int panic_on_overflow __read_mostly = 0;
33int force_iommu __read_mostly= 0; 24int force_iommu __read_mostly = 0;
34#endif 25#endif
35 26
27int iommu_merge __read_mostly = 0;
28
29int no_iommu __read_mostly;
36/* Set this to 1 if there is a HW IOMMU in the system */ 30/* Set this to 1 if there is a HW IOMMU in the system */
37int iommu_detected __read_mostly = 0; 31int iommu_detected __read_mostly = 0;
38 32
33/* This tells the BIO block layer to assume merging. Default to off
34 because we cannot guarantee merging later. */
35int iommu_bio_merge __read_mostly = 0;
36EXPORT_SYMBOL(iommu_bio_merge);
37
38dma_addr_t bad_dma_address __read_mostly = 0;
39EXPORT_SYMBOL(bad_dma_address);
40
39/* Dummy device used for NULL arguments (normally ISA). Better would 41/* Dummy device used for NULL arguments (normally ISA). Better would
40 be probably a smaller DMA mask, but this is bug-to-bug compatible 42 be probably a smaller DMA mask, but this is bug-to-bug compatible
41 to i386. */ 43 to older i386. */
42struct device fallback_dev = { 44struct device fallback_dev = {
43 .bus_id = "fallback device", 45 .bus_id = "fallback device",
44 .coherent_dma_mask = DMA_32BIT_MASK, 46 .coherent_dma_mask = DMA_32BIT_MASK,
45 .dma_mask = &fallback_dev.coherent_dma_mask, 47 .dma_mask = &fallback_dev.coherent_dma_mask,
46}; 48};
47 49
50int dma_set_mask(struct device *dev, u64 mask)
51{
52 if (!dev->dma_mask || !dma_supported(dev, mask))
53 return -EIO;
54
55 *dev->dma_mask = mask;
56
57 return 0;
58}
59EXPORT_SYMBOL(dma_set_mask);
60
61#ifdef CONFIG_X86_64
62static __initdata void *dma32_bootmem_ptr;
63static unsigned long dma32_bootmem_size __initdata = (128ULL<<20);
64
65static int __init parse_dma32_size_opt(char *p)
66{
67 if (!p)
68 return -EINVAL;
69 dma32_bootmem_size = memparse(p, &p);
70 return 0;
71}
72early_param("dma32_size", parse_dma32_size_opt);
73
74void __init dma32_reserve_bootmem(void)
75{
76 unsigned long size, align;
77 if (end_pfn <= MAX_DMA32_PFN)
78 return;
79
80 align = 64ULL<<20;
81 size = round_up(dma32_bootmem_size, align);
82 dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
83 __pa(MAX_DMA_ADDRESS));
84 if (dma32_bootmem_ptr)
85 dma32_bootmem_size = size;
86 else
87 dma32_bootmem_size = 0;
88}
89static void __init dma32_free_bootmem(void)
90{
91 int node;
92
93 if (end_pfn <= MAX_DMA32_PFN)
94 return;
95
96 if (!dma32_bootmem_ptr)
97 return;
98
99 for_each_online_node(node)
100 free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr),
101 dma32_bootmem_size);
102
103 dma32_bootmem_ptr = NULL;
104 dma32_bootmem_size = 0;
105}
106
107void __init pci_iommu_alloc(void)
108{
109 /* free the range so iommu could get some range less than 4G */
110 dma32_free_bootmem();
111 /*
112 * The order of these functions is important for
113 * fall-back/fail-over reasons
114 */
115#ifdef CONFIG_GART_IOMMU
116 gart_iommu_hole_init();
117#endif
118
119#ifdef CONFIG_CALGARY_IOMMU
120 detect_calgary();
121#endif
122
123 detect_intel_iommu();
124
125#ifdef CONFIG_SWIOTLB
126 pci_swiotlb_init();
127#endif
128}
129#endif
130
131/*
132 * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
133 * documentation.
134 */
135static __init int iommu_setup(char *p)
136{
137 iommu_merge = 1;
138
139 if (!p)
140 return -EINVAL;
141
142 while (*p) {
143 if (!strncmp(p, "off", 3))
144 no_iommu = 1;
145 /* gart_parse_options has more force support */
146 if (!strncmp(p, "force", 5))
147 force_iommu = 1;
148 if (!strncmp(p, "noforce", 7)) {
149 iommu_merge = 0;
150 force_iommu = 0;
151 }
152
153 if (!strncmp(p, "biomerge", 8)) {
154 iommu_bio_merge = 4096;
155 iommu_merge = 1;
156 force_iommu = 1;
157 }
158 if (!strncmp(p, "panic", 5))
159 panic_on_overflow = 1;
160 if (!strncmp(p, "nopanic", 7))
161 panic_on_overflow = 0;
162 if (!strncmp(p, "merge", 5)) {
163 iommu_merge = 1;
164 force_iommu = 1;
165 }
166 if (!strncmp(p, "nomerge", 7))
167 iommu_merge = 0;
168 if (!strncmp(p, "forcesac", 8))
169 iommu_sac_force = 1;
170 if (!strncmp(p, "allowdac", 8))
171 forbid_dac = 0;
172 if (!strncmp(p, "nodac", 5))
173 forbid_dac = -1;
174 if (!strncmp(p, "usedac", 6)) {
175 forbid_dac = -1;
176 return 1;
177 }
178#ifdef CONFIG_SWIOTLB
179 if (!strncmp(p, "soft", 4))
180 swiotlb = 1;
181#endif
182
183#ifdef CONFIG_GART_IOMMU
184 gart_parse_options(p);
185#endif
186
187#ifdef CONFIG_CALGARY_IOMMU
188 if (!strncmp(p, "calgary", 7))
189 use_calgary = 1;
190#endif /* CONFIG_CALGARY_IOMMU */
191
192 p += strcspn(p, ",");
193 if (*p == ',')
194 ++p;
195 }
196 return 0;
197}
198early_param("iommu", iommu_setup);
199
200#ifdef CONFIG_X86_32
201int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
202 dma_addr_t device_addr, size_t size, int flags)
203{
204 void __iomem *mem_base = NULL;
205 int pages = size >> PAGE_SHIFT;
206 int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
207
208 if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
209 goto out;
210 if (!size)
211 goto out;
212 if (dev->dma_mem)
213 goto out;
214
215 /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
216
217 mem_base = ioremap(bus_addr, size);
218 if (!mem_base)
219 goto out;
220
221 dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
222 if (!dev->dma_mem)
223 goto out;
224 dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
225 if (!dev->dma_mem->bitmap)
226 goto free1_out;
227
228 dev->dma_mem->virt_base = mem_base;
229 dev->dma_mem->device_base = device_addr;
230 dev->dma_mem->size = pages;
231 dev->dma_mem->flags = flags;
232
233 if (flags & DMA_MEMORY_MAP)
234 return DMA_MEMORY_MAP;
235
236 return DMA_MEMORY_IO;
237
238 free1_out:
239 kfree(dev->dma_mem);
240 out:
241 if (mem_base)
242 iounmap(mem_base);
243 return 0;
244}
245EXPORT_SYMBOL(dma_declare_coherent_memory);
246
247void dma_release_declared_memory(struct device *dev)
248{
249 struct dma_coherent_mem *mem = dev->dma_mem;
250
251 if (!mem)
252 return;
253 dev->dma_mem = NULL;
254 iounmap(mem->virt_base);
255 kfree(mem->bitmap);
256 kfree(mem);
257}
258EXPORT_SYMBOL(dma_release_declared_memory);
259
260void *dma_mark_declared_memory_occupied(struct device *dev,
261 dma_addr_t device_addr, size_t size)
262{
263 struct dma_coherent_mem *mem = dev->dma_mem;
264 int pos, err;
265 int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1);
266
267 pages >>= PAGE_SHIFT;
268
269 if (!mem)
270 return ERR_PTR(-EINVAL);
271
272 pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
273 err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
274 if (err != 0)
275 return ERR_PTR(err);
276 return mem->virt_base + (pos << PAGE_SHIFT);
277}
278EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
279
280static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size,
281 dma_addr_t *dma_handle, void **ret)
282{
283 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
284 int order = get_order(size);
285
286 if (mem) {
287 int page = bitmap_find_free_region(mem->bitmap, mem->size,
288 order);
289 if (page >= 0) {
290 *dma_handle = mem->device_base + (page << PAGE_SHIFT);
291 *ret = mem->virt_base + (page << PAGE_SHIFT);
292 memset(*ret, 0, size);
293 }
294 if (mem->flags & DMA_MEMORY_EXCLUSIVE)
295 *ret = NULL;
296 }
297 return (mem != NULL);
298}
299
300static int dma_release_coherent(struct device *dev, int order, void *vaddr)
301{
302 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
303
304 if (mem && vaddr >= mem->virt_base && vaddr <
305 (mem->virt_base + (mem->size << PAGE_SHIFT))) {
306 int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
307
308 bitmap_release_region(mem->bitmap, page, order);
309 return 1;
310 }
311 return 0;
312}
313#else
314#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0)
315#define dma_release_coherent(dev, order, vaddr) (0)
316#endif /* CONFIG_X86_32 */
317
318int dma_supported(struct device *dev, u64 mask)
319{
320#ifdef CONFIG_PCI
321 if (mask > 0xffffffff && forbid_dac > 0) {
322 printk(KERN_INFO "PCI: Disallowing DAC for device %s\n",
323 dev->bus_id);
324 return 0;
325 }
326#endif
327
328 if (dma_ops->dma_supported)
329 return dma_ops->dma_supported(dev, mask);
330
331 /* Copied from i386. Doesn't make much sense, because it will
332 only work for pci_alloc_coherent.
333 The caller just has to use GFP_DMA in this case. */
334 if (mask < DMA_24BIT_MASK)
335 return 0;
336
337 /* Tell the device to use SAC when IOMMU force is on. This
338 allows the driver to use cheaper accesses in some cases.
339
340 Problem with this is that if we overflow the IOMMU area and
341 return DAC as fallback address the device may not handle it
342 correctly.
343
344 As a special case some controllers have a 39bit address
345 mode that is as efficient as 32bit (aic79xx). Don't force
346 SAC for these. Assume all masks <= 40 bits are of this
347 type. Normally this doesn't make any difference, but gives
348 more gentle handling of IOMMU overflow. */
349 if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
350 printk(KERN_INFO "%s: Force SAC with mask %Lx\n",
351 dev->bus_id, mask);
352 return 0;
353 }
354
355 return 1;
356}
357EXPORT_SYMBOL(dma_supported);
358
48/* Allocate DMA memory on node near device */ 359/* Allocate DMA memory on node near device */
49noinline static void * 360noinline struct page *
50dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) 361dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
51{ 362{
52 struct page *page;
53 int node; 363 int node;
54 364
55 node = dev_to_node(dev); 365 node = dev_to_node(dev);
56 366
57 page = alloc_pages_node(node, gfp, order); 367 return alloc_pages_node(node, gfp, order);
58 return page ? page_address(page) : NULL;
59} 368}
60 369
61/* 370/*
@@ -65,9 +374,16 @@ void *
65dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 374dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
66 gfp_t gfp) 375 gfp_t gfp)
67{ 376{
68 void *memory; 377 void *memory = NULL;
378 struct page *page;
69 unsigned long dma_mask = 0; 379 unsigned long dma_mask = 0;
70 u64 bus; 380 dma_addr_t bus;
381
382 /* ignore region specifiers */
383 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
384
385 if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory))
386 return memory;
71 387
72 if (!dev) 388 if (!dev)
73 dev = &fallback_dev; 389 dev = &fallback_dev;
@@ -82,26 +398,25 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
82 /* Don't invoke OOM killer */ 398 /* Don't invoke OOM killer */
83 gfp |= __GFP_NORETRY; 399 gfp |= __GFP_NORETRY;
84 400
85 /* Kludge to make it bug-to-bug compatible with i386. i386 401#ifdef CONFIG_X86_64
86 uses the normal dma_mask for alloc_coherent. */
87 dma_mask &= *dev->dma_mask;
88
89 /* Why <=? Even when the mask is smaller than 4GB it is often 402 /* Why <=? Even when the mask is smaller than 4GB it is often
90 larger than 16MB and in this case we have a chance of 403 larger than 16MB and in this case we have a chance of
91 finding fitting memory in the next higher zone first. If 404 finding fitting memory in the next higher zone first. If
92 not retry with true GFP_DMA. -AK */ 405 not retry with true GFP_DMA. -AK */
93 if (dma_mask <= DMA_32BIT_MASK) 406 if (dma_mask <= DMA_32BIT_MASK)
94 gfp |= GFP_DMA32; 407 gfp |= GFP_DMA32;
408#endif
95 409
96 again: 410 again:
97 memory = dma_alloc_pages(dev, gfp, get_order(size)); 411 page = dma_alloc_pages(dev, gfp, get_order(size));
98 if (memory == NULL) 412 if (page == NULL)
99 return NULL; 413 return NULL;
100 414
101 { 415 {
102 int high, mmu; 416 int high, mmu;
103 bus = virt_to_bus(memory); 417 bus = page_to_phys(page);
104 high = (bus + size) >= dma_mask; 418 memory = page_address(page);
419 high = (bus + size) >= dma_mask;
105 mmu = high; 420 mmu = high;
106 if (force_iommu && !(gfp & GFP_DMA)) 421 if (force_iommu && !(gfp & GFP_DMA))
107 mmu = 1; 422 mmu = 1;
@@ -127,7 +442,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
127 442
128 memset(memory, 0, size); 443 memset(memory, 0, size);
129 if (!mmu) { 444 if (!mmu) {
130 *dma_handle = virt_to_bus(memory); 445 *dma_handle = bus;
131 return memory; 446 return memory;
132 } 447 }
133 } 448 }
@@ -139,7 +454,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
139 } 454 }
140 455
141 if (dma_ops->map_simple) { 456 if (dma_ops->map_simple) {
142 *dma_handle = dma_ops->map_simple(dev, memory, 457 *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory),
143 size, 458 size,
144 PCI_DMA_BIDIRECTIONAL); 459 PCI_DMA_BIDIRECTIONAL);
145 if (*dma_handle != bad_dma_address) 460 if (*dma_handle != bad_dma_address)
@@ -147,7 +462,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
147 } 462 }
148 463
149 if (panic_on_overflow) 464 if (panic_on_overflow)
150 panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",size); 465 panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",
466 (unsigned long)size);
151 free_pages((unsigned long)memory, get_order(size)); 467 free_pages((unsigned long)memory, get_order(size));
152 return NULL; 468 return NULL;
153} 469}
@@ -160,153 +476,16 @@ EXPORT_SYMBOL(dma_alloc_coherent);
160void dma_free_coherent(struct device *dev, size_t size, 476void dma_free_coherent(struct device *dev, size_t size,
161 void *vaddr, dma_addr_t bus) 477 void *vaddr, dma_addr_t bus)
162{ 478{
479 int order = get_order(size);
163 WARN_ON(irqs_disabled()); /* for portability */ 480 WARN_ON(irqs_disabled()); /* for portability */
481 if (dma_release_coherent(dev, order, vaddr))
482 return;
164 if (dma_ops->unmap_single) 483 if (dma_ops->unmap_single)
165 dma_ops->unmap_single(dev, bus, size, 0); 484 dma_ops->unmap_single(dev, bus, size, 0);
166 free_pages((unsigned long)vaddr, get_order(size)); 485 free_pages((unsigned long)vaddr, order);
167} 486}
168EXPORT_SYMBOL(dma_free_coherent); 487EXPORT_SYMBOL(dma_free_coherent);
169 488
170static int forbid_dac __read_mostly;
171
172int dma_supported(struct device *dev, u64 mask)
173{
174#ifdef CONFIG_PCI
175 if (mask > 0xffffffff && forbid_dac > 0) {
176
177
178
179 printk(KERN_INFO "PCI: Disallowing DAC for device %s\n", dev->bus_id);
180 return 0;
181 }
182#endif
183
184 if (dma_ops->dma_supported)
185 return dma_ops->dma_supported(dev, mask);
186
187 /* Copied from i386. Doesn't make much sense, because it will
188 only work for pci_alloc_coherent.
189 The caller just has to use GFP_DMA in this case. */
190 if (mask < DMA_24BIT_MASK)
191 return 0;
192
193 /* Tell the device to use SAC when IOMMU force is on. This
194 allows the driver to use cheaper accesses in some cases.
195
196 Problem with this is that if we overflow the IOMMU area and
197 return DAC as fallback address the device may not handle it
198 correctly.
199
200 As a special case some controllers have a 39bit address
201 mode that is as efficient as 32bit (aic79xx). Don't force
202 SAC for these. Assume all masks <= 40 bits are of this
203 type. Normally this doesn't make any difference, but gives
204 more gentle handling of IOMMU overflow. */
205 if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
206 printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask);
207 return 0;
208 }
209
210 return 1;
211}
212EXPORT_SYMBOL(dma_supported);
213
214int dma_set_mask(struct device *dev, u64 mask)
215{
216 if (!dev->dma_mask || !dma_supported(dev, mask))
217 return -EIO;
218 *dev->dma_mask = mask;
219 return 0;
220}
221EXPORT_SYMBOL(dma_set_mask);
222
223/*
224 * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
225 * documentation.
226 */
227static __init int iommu_setup(char *p)
228{
229 iommu_merge = 1;
230
231 if (!p)
232 return -EINVAL;
233
234 while (*p) {
235 if (!strncmp(p, "off", 3))
236 no_iommu = 1;
237 /* gart_parse_options has more force support */
238 if (!strncmp(p, "force", 5))
239 force_iommu = 1;
240 if (!strncmp(p, "noforce", 7)) {
241 iommu_merge = 0;
242 force_iommu = 0;
243 }
244
245 if (!strncmp(p, "biomerge", 8)) {
246 iommu_bio_merge = 4096;
247 iommu_merge = 1;
248 force_iommu = 1;
249 }
250 if (!strncmp(p, "panic", 5))
251 panic_on_overflow = 1;
252 if (!strncmp(p, "nopanic", 7))
253 panic_on_overflow = 0;
254 if (!strncmp(p, "merge", 5)) {
255 iommu_merge = 1;
256 force_iommu = 1;
257 }
258 if (!strncmp(p, "nomerge", 7))
259 iommu_merge = 0;
260 if (!strncmp(p, "forcesac", 8))
261 iommu_sac_force = 1;
262 if (!strncmp(p, "allowdac", 8))
263 forbid_dac = 0;
264 if (!strncmp(p, "nodac", 5))
265 forbid_dac = -1;
266
267#ifdef CONFIG_SWIOTLB
268 if (!strncmp(p, "soft", 4))
269 swiotlb = 1;
270#endif
271
272#ifdef CONFIG_GART_IOMMU
273 gart_parse_options(p);
274#endif
275
276#ifdef CONFIG_CALGARY_IOMMU
277 if (!strncmp(p, "calgary", 7))
278 use_calgary = 1;
279#endif /* CONFIG_CALGARY_IOMMU */
280
281 p += strcspn(p, ",");
282 if (*p == ',')
283 ++p;
284 }
285 return 0;
286}
287early_param("iommu", iommu_setup);
288
289void __init pci_iommu_alloc(void)
290{
291 /*
292 * The order of these functions is important for
293 * fall-back/fail-over reasons
294 */
295#ifdef CONFIG_GART_IOMMU
296 gart_iommu_hole_init();
297#endif
298
299#ifdef CONFIG_CALGARY_IOMMU
300 detect_calgary();
301#endif
302
303 detect_intel_iommu();
304
305#ifdef CONFIG_SWIOTLB
306 pci_swiotlb_init();
307#endif
308}
309
310static int __init pci_iommu_init(void) 489static int __init pci_iommu_init(void)
311{ 490{
312#ifdef CONFIG_CALGARY_IOMMU 491#ifdef CONFIG_CALGARY_IOMMU
@@ -327,6 +506,8 @@ void pci_iommu_shutdown(void)
327{ 506{
328 gart_iommu_shutdown(); 507 gart_iommu_shutdown();
329} 508}
509/* Must execute after PCI subsystem */
510fs_initcall(pci_iommu_init);
330 511
331#ifdef CONFIG_PCI 512#ifdef CONFIG_PCI
332/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ 513/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
@@ -334,11 +515,10 @@ void pci_iommu_shutdown(void)
334static __devinit void via_no_dac(struct pci_dev *dev) 515static __devinit void via_no_dac(struct pci_dev *dev)
335{ 516{
336 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { 517 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
337 printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n"); 518 printk(KERN_INFO "PCI: VIA PCI bridge detected."
519 "Disabling DAC.\n");
338 forbid_dac = 1; 520 forbid_dac = 1;
339 } 521 }
340} 522}
341DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); 523DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
342#endif 524#endif
343/* Must execute after PCI subsystem */
344fs_initcall(pci_iommu_init);
diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c
deleted file mode 100644
index 51330321a5d3..000000000000
--- a/arch/x86/kernel/pci-dma_32.c
+++ /dev/null
@@ -1,177 +0,0 @@
1/*
2 * Dynamic DMA mapping support.
3 *
4 * On i386 there is no hardware dynamic DMA address translation,
5 * so consistent alloc/free are merely page allocation/freeing.
6 * The rest of the dynamic DMA mapping interface is implemented
7 * in asm/pci.h.
8 */
9
10#include <linux/types.h>
11#include <linux/mm.h>
12#include <linux/string.h>
13#include <linux/pci.h>
14#include <linux/module.h>
15#include <asm/io.h>
16
17struct dma_coherent_mem {
18 void *virt_base;
19 u32 device_base;
20 int size;
21 int flags;
22 unsigned long *bitmap;
23};
24
25void *dma_alloc_coherent(struct device *dev, size_t size,
26 dma_addr_t *dma_handle, gfp_t gfp)
27{
28 void *ret;
29 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
30 int order = get_order(size);
31 /* ignore region specifiers */
32 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
33
34 if (mem) {
35 int page = bitmap_find_free_region(mem->bitmap, mem->size,
36 order);
37 if (page >= 0) {
38 *dma_handle = mem->device_base + (page << PAGE_SHIFT);
39 ret = mem->virt_base + (page << PAGE_SHIFT);
40 memset(ret, 0, size);
41 return ret;
42 }
43 if (mem->flags & DMA_MEMORY_EXCLUSIVE)
44 return NULL;
45 }
46
47 if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
48 gfp |= GFP_DMA;
49
50 ret = (void *)__get_free_pages(gfp, order);
51
52 if (ret != NULL) {
53 memset(ret, 0, size);
54 *dma_handle = virt_to_phys(ret);
55 }
56 return ret;
57}
58EXPORT_SYMBOL(dma_alloc_coherent);
59
60void dma_free_coherent(struct device *dev, size_t size,
61 void *vaddr, dma_addr_t dma_handle)
62{
63 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
64 int order = get_order(size);
65
66 WARN_ON(irqs_disabled()); /* for portability */
67 if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) {
68 int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
69
70 bitmap_release_region(mem->bitmap, page, order);
71 } else
72 free_pages((unsigned long)vaddr, order);
73}
74EXPORT_SYMBOL(dma_free_coherent);
75
76int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
77 dma_addr_t device_addr, size_t size, int flags)
78{
79 void __iomem *mem_base = NULL;
80 int pages = size >> PAGE_SHIFT;
81 int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
82
83 if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
84 goto out;
85 if (!size)
86 goto out;
87 if (dev->dma_mem)
88 goto out;
89
90 /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
91
92 mem_base = ioremap(bus_addr, size);
93 if (!mem_base)
94 goto out;
95
96 dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
97 if (!dev->dma_mem)
98 goto out;
99 dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
100 if (!dev->dma_mem->bitmap)
101 goto free1_out;
102
103 dev->dma_mem->virt_base = mem_base;
104 dev->dma_mem->device_base = device_addr;
105 dev->dma_mem->size = pages;
106 dev->dma_mem->flags = flags;
107
108 if (flags & DMA_MEMORY_MAP)
109 return DMA_MEMORY_MAP;
110
111 return DMA_MEMORY_IO;
112
113 free1_out:
114 kfree(dev->dma_mem);
115 out:
116 if (mem_base)
117 iounmap(mem_base);
118 return 0;
119}
120EXPORT_SYMBOL(dma_declare_coherent_memory);
121
122void dma_release_declared_memory(struct device *dev)
123{
124 struct dma_coherent_mem *mem = dev->dma_mem;
125
126 if(!mem)
127 return;
128 dev->dma_mem = NULL;
129 iounmap(mem->virt_base);
130 kfree(mem->bitmap);
131 kfree(mem);
132}
133EXPORT_SYMBOL(dma_release_declared_memory);
134
135void *dma_mark_declared_memory_occupied(struct device *dev,
136 dma_addr_t device_addr, size_t size)
137{
138 struct dma_coherent_mem *mem = dev->dma_mem;
139 int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
140 int pos, err;
141
142 if (!mem)
143 return ERR_PTR(-EINVAL);
144
145 pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
146 err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
147 if (err != 0)
148 return ERR_PTR(err);
149 return mem->virt_base + (pos << PAGE_SHIFT);
150}
151EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
152
153#ifdef CONFIG_PCI
154/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
155
156int forbid_dac;
157EXPORT_SYMBOL(forbid_dac);
158
159static __devinit void via_no_dac(struct pci_dev *dev)
160{
161 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
162 printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n");
163 forbid_dac = 1;
164 }
165}
166DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
167
168static int check_iommu(char *s)
169{
170 if (!strcmp(s, "usedac")) {
171 forbid_dac = -1;
172 return 1;
173 }
174 return 0;
175}
176__setup("iommu=", check_iommu);
177#endif
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 700e4647dd30..c07455d1695f 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -264,9 +264,9 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
264} 264}
265 265
266static dma_addr_t 266static dma_addr_t
267gart_map_simple(struct device *dev, char *buf, size_t size, int dir) 267gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir)
268{ 268{
269 dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); 269 dma_addr_t map = dma_map_area(dev, paddr, size, dir);
270 270
271 flush_gart(); 271 flush_gart();
272 272
@@ -275,18 +275,17 @@ gart_map_simple(struct device *dev, char *buf, size_t size, int dir)
275 275
276/* Map a single area into the IOMMU */ 276/* Map a single area into the IOMMU */
277static dma_addr_t 277static dma_addr_t
278gart_map_single(struct device *dev, void *addr, size_t size, int dir) 278gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir)
279{ 279{
280 unsigned long phys_mem, bus; 280 unsigned long bus;
281 281
282 if (!dev) 282 if (!dev)
283 dev = &fallback_dev; 283 dev = &fallback_dev;
284 284
285 phys_mem = virt_to_phys(addr); 285 if (!need_iommu(dev, paddr, size))
286 if (!need_iommu(dev, phys_mem, size)) 286 return paddr;
287 return phys_mem;
288 287
289 bus = gart_map_simple(dev, addr, size, dir); 288 bus = gart_map_simple(dev, paddr, size, dir);
290 289
291 return bus; 290 return bus;
292} 291}
diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu.c
index ab08e1832228..aec43d56f49c 100644
--- a/arch/x86/kernel/pci-nommu_64.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -14,7 +14,7 @@
14static int 14static int
15check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) 15check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
16{ 16{
17 if (hwdev && bus + size > *hwdev->dma_mask) { 17 if (hwdev && bus + size > *hwdev->dma_mask) {
18 if (*hwdev->dma_mask >= DMA_32BIT_MASK) 18 if (*hwdev->dma_mask >= DMA_32BIT_MASK)
19 printk(KERN_ERR 19 printk(KERN_ERR
20 "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", 20 "nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
@@ -26,19 +26,17 @@ check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
26} 26}
27 27
28static dma_addr_t 28static dma_addr_t
29nommu_map_single(struct device *hwdev, void *ptr, size_t size, 29nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size,
30 int direction) 30 int direction)
31{ 31{
32 dma_addr_t bus = virt_to_bus(ptr); 32 dma_addr_t bus = paddr;
33 WARN_ON(size == 0);
33 if (!check_addr("map_single", hwdev, bus, size)) 34 if (!check_addr("map_single", hwdev, bus, size))
34 return bad_dma_address; 35 return bad_dma_address;
36 flush_write_buffers();
35 return bus; 37 return bus;
36} 38}
37 39
38static void nommu_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
39 int direction)
40{
41}
42 40
43/* Map a set of buffers described by scatterlist in streaming 41/* Map a set of buffers described by scatterlist in streaming
44 * mode for DMA. This is the scatter-gather version of the 42 * mode for DMA. This is the scatter-gather version of the
@@ -61,30 +59,34 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
61 struct scatterlist *s; 59 struct scatterlist *s;
62 int i; 60 int i;
63 61
62 WARN_ON(nents == 0 || sg[0].length == 0);
63
64 for_each_sg(sg, s, nents, i) { 64 for_each_sg(sg, s, nents, i) {
65 BUG_ON(!sg_page(s)); 65 BUG_ON(!sg_page(s));
66 s->dma_address = virt_to_bus(sg_virt(s)); 66 s->dma_address = sg_phys(s);
67 if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) 67 if (!check_addr("map_sg", hwdev, s->dma_address, s->length))
68 return 0; 68 return 0;
69 s->dma_length = s->length; 69 s->dma_length = s->length;
70 } 70 }
71 flush_write_buffers();
71 return nents; 72 return nents;
72} 73}
73 74
74/* Unmap a set of streaming mode DMA translations. 75/* Make sure we keep the same behaviour */
75 * Again, cpu read rules concerning calls here are the same as for 76static int nommu_mapping_error(dma_addr_t dma_addr)
76 * pci_unmap_single() above.
77 */
78static void nommu_unmap_sg(struct device *dev, struct scatterlist *sg,
79 int nents, int dir)
80{ 77{
78#ifdef CONFIG_X86_32
79 return 0;
80#else
81 return (dma_addr == bad_dma_address);
82#endif
81} 83}
82 84
85
83const struct dma_mapping_ops nommu_dma_ops = { 86const struct dma_mapping_ops nommu_dma_ops = {
84 .map_single = nommu_map_single, 87 .map_single = nommu_map_single,
85 .unmap_single = nommu_unmap_single,
86 .map_sg = nommu_map_sg, 88 .map_sg = nommu_map_sg,
87 .unmap_sg = nommu_unmap_sg, 89 .mapping_error = nommu_mapping_error,
88 .is_phys = 1, 90 .is_phys = 1,
89}; 91};
90 92
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 82a0a674a003..490da7f4b8d0 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -11,11 +11,18 @@
11 11
12int swiotlb __read_mostly; 12int swiotlb __read_mostly;
13 13
14static dma_addr_t
15swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
16 int direction)
17{
18 return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction);
19}
20
14const struct dma_mapping_ops swiotlb_dma_ops = { 21const struct dma_mapping_ops swiotlb_dma_ops = {
15 .mapping_error = swiotlb_dma_mapping_error, 22 .mapping_error = swiotlb_dma_mapping_error,
16 .alloc_coherent = swiotlb_alloc_coherent, 23 .alloc_coherent = swiotlb_alloc_coherent,
17 .free_coherent = swiotlb_free_coherent, 24 .free_coherent = swiotlb_free_coherent,
18 .map_single = swiotlb_map_single, 25 .map_single = swiotlb_map_single_phys,
19 .unmap_single = swiotlb_unmap_single, 26 .unmap_single = swiotlb_unmap_single,
20 .sync_single_for_cpu = swiotlb_sync_single_for_cpu, 27 .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
21 .sync_single_for_device = swiotlb_sync_single_for_device, 28 .sync_single_for_device = swiotlb_sync_single_for_device,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
new file mode 100644
index 000000000000..3004d716539d
--- /dev/null
+++ b/arch/x86/kernel/process.c
@@ -0,0 +1,44 @@
1#include <linux/errno.h>
2#include <linux/kernel.h>
3#include <linux/mm.h>
4#include <linux/smp.h>
5#include <linux/slab.h>
6#include <linux/sched.h>
7
8struct kmem_cache *task_xstate_cachep;
9
10int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
11{
12 *dst = *src;
13 if (src->thread.xstate) {
14 dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
15 GFP_KERNEL);
16 if (!dst->thread.xstate)
17 return -ENOMEM;
18 WARN_ON((unsigned long)dst->thread.xstate & 15);
19 memcpy(dst->thread.xstate, src->thread.xstate, xstate_size);
20 }
21 return 0;
22}
23
24void free_thread_xstate(struct task_struct *tsk)
25{
26 if (tsk->thread.xstate) {
27 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
28 tsk->thread.xstate = NULL;
29 }
30}
31
32void free_thread_info(struct thread_info *ti)
33{
34 free_thread_xstate(ti->task);
35 free_pages((unsigned long)ti, get_order(THREAD_SIZE));
36}
37
38void arch_task_cache_init(void)
39{
40 task_xstate_cachep =
41 kmem_cache_create("task_xstate", xstate_size,
42 __alignof__(union thread_xstate),
43 SLAB_PANIC, NULL);
44}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 3903a8f2eb97..7adad088e373 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -36,6 +36,7 @@
36#include <linux/personality.h> 36#include <linux/personality.h>
37#include <linux/tick.h> 37#include <linux/tick.h>
38#include <linux/percpu.h> 38#include <linux/percpu.h>
39#include <linux/prctl.h>
39 40
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41#include <asm/pgtable.h> 42#include <asm/pgtable.h>
@@ -45,7 +46,6 @@
45#include <asm/processor.h> 46#include <asm/processor.h>
46#include <asm/i387.h> 47#include <asm/i387.h>
47#include <asm/desc.h> 48#include <asm/desc.h>
48#include <asm/vm86.h>
49#ifdef CONFIG_MATH_EMULATION 49#ifdef CONFIG_MATH_EMULATION
50#include <asm/math_emu.h> 50#include <asm/math_emu.h>
51#endif 51#endif
@@ -521,14 +521,18 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
521 regs->cs = __USER_CS; 521 regs->cs = __USER_CS;
522 regs->ip = new_ip; 522 regs->ip = new_ip;
523 regs->sp = new_sp; 523 regs->sp = new_sp;
524 /*
525 * Free the old FP and other extended state
526 */
527 free_thread_xstate(current);
524} 528}
525EXPORT_SYMBOL_GPL(start_thread); 529EXPORT_SYMBOL_GPL(start_thread);
526 530
527#ifdef CONFIG_SECCOMP
528static void hard_disable_TSC(void) 531static void hard_disable_TSC(void)
529{ 532{
530 write_cr4(read_cr4() | X86_CR4_TSD); 533 write_cr4(read_cr4() | X86_CR4_TSD);
531} 534}
535
532void disable_TSC(void) 536void disable_TSC(void)
533{ 537{
534 preempt_disable(); 538 preempt_disable();
@@ -540,11 +544,47 @@ void disable_TSC(void)
540 hard_disable_TSC(); 544 hard_disable_TSC();
541 preempt_enable(); 545 preempt_enable();
542} 546}
547
543static void hard_enable_TSC(void) 548static void hard_enable_TSC(void)
544{ 549{
545 write_cr4(read_cr4() & ~X86_CR4_TSD); 550 write_cr4(read_cr4() & ~X86_CR4_TSD);
546} 551}
547#endif /* CONFIG_SECCOMP */ 552
553void enable_TSC(void)
554{
555 preempt_disable();
556 if (test_and_clear_thread_flag(TIF_NOTSC))
557 /*
558 * Must flip the CPU state synchronously with
559 * TIF_NOTSC in the current running context.
560 */
561 hard_enable_TSC();
562 preempt_enable();
563}
564
565int get_tsc_mode(unsigned long adr)
566{
567 unsigned int val;
568
569 if (test_thread_flag(TIF_NOTSC))
570 val = PR_TSC_SIGSEGV;
571 else
572 val = PR_TSC_ENABLE;
573
574 return put_user(val, (unsigned int __user *)adr);
575}
576
577int set_tsc_mode(unsigned int val)
578{
579 if (val == PR_TSC_SIGSEGV)
580 disable_TSC();
581 else if (val == PR_TSC_ENABLE)
582 enable_TSC();
583 else
584 return -EINVAL;
585
586 return 0;
587}
548 588
549static noinline void 589static noinline void
550__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 590__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
@@ -578,7 +618,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
578 set_debugreg(next->debugreg7, 7); 618 set_debugreg(next->debugreg7, 7);
579 } 619 }
580 620
581#ifdef CONFIG_SECCOMP
582 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ 621 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
583 test_tsk_thread_flag(next_p, TIF_NOTSC)) { 622 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
584 /* prev and next are different */ 623 /* prev and next are different */
@@ -587,7 +626,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
587 else 626 else
588 hard_enable_TSC(); 627 hard_enable_TSC();
589 } 628 }
590#endif
591 629
592#ifdef X86_BTS 630#ifdef X86_BTS
593 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) 631 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
@@ -669,7 +707,7 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct
669 707
670 /* we're going to use this soon, after a few expensive things */ 708 /* we're going to use this soon, after a few expensive things */
671 if (next_p->fpu_counter > 5) 709 if (next_p->fpu_counter > 5)
672 prefetch(&next->i387.fxsave); 710 prefetch(next->xstate);
673 711
674 /* 712 /*
675 * Reload esp0. 713 * Reload esp0.
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e75ccc8a2b87..891af1a1b48a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -36,6 +36,7 @@
36#include <linux/kprobes.h> 36#include <linux/kprobes.h>
37#include <linux/kdebug.h> 37#include <linux/kdebug.h>
38#include <linux/tick.h> 38#include <linux/tick.h>
39#include <linux/prctl.h>
39 40
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41#include <asm/pgtable.h> 42#include <asm/pgtable.h>
@@ -532,9 +533,71 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
532 regs->ss = __USER_DS; 533 regs->ss = __USER_DS;
533 regs->flags = 0x200; 534 regs->flags = 0x200;
534 set_fs(USER_DS); 535 set_fs(USER_DS);
536 /*
537 * Free the old FP and other extended state
538 */
539 free_thread_xstate(current);
535} 540}
536EXPORT_SYMBOL_GPL(start_thread); 541EXPORT_SYMBOL_GPL(start_thread);
537 542
543static void hard_disable_TSC(void)
544{
545 write_cr4(read_cr4() | X86_CR4_TSD);
546}
547
548void disable_TSC(void)
549{
550 preempt_disable();
551 if (!test_and_set_thread_flag(TIF_NOTSC))
552 /*
553 * Must flip the CPU state synchronously with
554 * TIF_NOTSC in the current running context.
555 */
556 hard_disable_TSC();
557 preempt_enable();
558}
559
560static void hard_enable_TSC(void)
561{
562 write_cr4(read_cr4() & ~X86_CR4_TSD);
563}
564
565void enable_TSC(void)
566{
567 preempt_disable();
568 if (test_and_clear_thread_flag(TIF_NOTSC))
569 /*
570 * Must flip the CPU state synchronously with
571 * TIF_NOTSC in the current running context.
572 */
573 hard_enable_TSC();
574 preempt_enable();
575}
576
577int get_tsc_mode(unsigned long adr)
578{
579 unsigned int val;
580
581 if (test_thread_flag(TIF_NOTSC))
582 val = PR_TSC_SIGSEGV;
583 else
584 val = PR_TSC_ENABLE;
585
586 return put_user(val, (unsigned int __user *)adr);
587}
588
589int set_tsc_mode(unsigned int val)
590{
591 if (val == PR_TSC_SIGSEGV)
592 disable_TSC();
593 else if (val == PR_TSC_ENABLE)
594 enable_TSC();
595 else
596 return -EINVAL;
597
598 return 0;
599}
600
538/* 601/*
539 * This special macro can be used to load a debugging register 602 * This special macro can be used to load a debugging register
540 */ 603 */
@@ -572,6 +635,15 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
572 loaddebug(next, 7); 635 loaddebug(next, 7);
573 } 636 }
574 637
638 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
639 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
640 /* prev and next are different */
641 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
642 hard_disable_TSC();
643 else
644 hard_enable_TSC();
645 }
646
575 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 647 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
576 /* 648 /*
577 * Copy the relevant range of the IO bitmap. 649 * Copy the relevant range of the IO bitmap.
@@ -614,7 +686,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
614 686
615 /* we're going to use this soon, after a few expensive things */ 687 /* we're going to use this soon, after a few expensive things */
616 if (next_p->fpu_counter>5) 688 if (next_p->fpu_counter>5)
617 prefetch(&next->i387.fxsave); 689 prefetch(next->xstate);
618 690
619 /* 691 /*
620 * Reload esp0, LDT and the page table pointer: 692 * Reload esp0, LDT and the page table pointer:
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index 9042fb0e36f5..aee0e8200777 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -74,8 +74,8 @@ int force_personality32 = 0;
74Control non executable heap for 32bit processes. 74Control non executable heap for 32bit processes.
75To control the stack too use noexec=off 75To control the stack too use noexec=off
76 76
77on PROT_READ does not imply PROT_EXEC for 32bit processes 77on PROT_READ does not imply PROT_EXEC for 32bit processes (default)
78off PROT_READ implies PROT_EXEC (default) 78off PROT_READ implies PROT_EXEC
79*/ 79*/
80static int __init nonx32_setup(char *str) 80static int __init nonx32_setup(char *str)
81{ 81{
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 5b0bffb7fcc9..1c4799e68718 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -812,10 +812,10 @@ void __init setup_arch(char **cmdline_p)
812 efi_init(); 812 efi_init();
813 813
814 /* update e820 for memory not covered by WB MTRRs */ 814 /* update e820 for memory not covered by WB MTRRs */
815 find_max_pfn(); 815 propagate_e820_map();
816 mtrr_bp_init(); 816 mtrr_bp_init();
817 if (mtrr_trim_uncached_memory(max_pfn)) 817 if (mtrr_trim_uncached_memory(max_pfn))
818 find_max_pfn(); 818 propagate_e820_map();
819 819
820 max_low_pfn = setup_memory(); 820 max_low_pfn = setup_memory();
821 821
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 674ef3510cdf..6b8e11f0c15d 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -398,6 +398,8 @@ void __init setup_arch(char **cmdline_p)
398 398
399 early_res_to_bootmem(); 399 early_res_to_bootmem();
400 400
401 dma32_reserve_bootmem();
402
401#ifdef CONFIG_ACPI_SLEEP 403#ifdef CONFIG_ACPI_SLEEP
402 /* 404 /*
403 * Reserve low memory region for sleep support. 405 * Reserve low memory region for sleep support.
@@ -420,11 +422,14 @@ void __init setup_arch(char **cmdline_p)
420 unsigned long end_of_mem = end_pfn << PAGE_SHIFT; 422 unsigned long end_of_mem = end_pfn << PAGE_SHIFT;
421 423
422 if (ramdisk_end <= end_of_mem) { 424 if (ramdisk_end <= end_of_mem) {
423 reserve_bootmem_generic(ramdisk_image, ramdisk_size); 425 /*
426 * don't need to reserve again, already reserved early
427 * in x86_64_start_kernel, and early_res_to_bootmem
428 * convert that to reserved in bootmem
429 */
424 initrd_start = ramdisk_image + PAGE_OFFSET; 430 initrd_start = ramdisk_image + PAGE_OFFSET;
425 initrd_end = initrd_start+ramdisk_size; 431 initrd_end = initrd_start+ramdisk_size;
426 } else { 432 } else {
427 /* Assumes everything on node 0 */
428 free_bootmem(ramdisk_image, ramdisk_size); 433 free_bootmem(ramdisk_image, ramdisk_size);
429 printk(KERN_ERR "initrd extends beyond end of memory " 434 printk(KERN_ERR "initrd extends beyond end of memory "
430 "(0x%08lx > 0x%08lx)\ndisabling initrd\n", 435 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e6abe8a49b1f..6a925394bc7e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -61,6 +61,7 @@
61#include <asm/mtrr.h> 61#include <asm/mtrr.h>
62#include <asm/nmi.h> 62#include <asm/nmi.h>
63#include <asm/vmi.h> 63#include <asm/vmi.h>
64#include <asm/genapic.h>
64#include <linux/mc146818rtc.h> 65#include <linux/mc146818rtc.h>
65 66
66#include <mach_apic.h> 67#include <mach_apic.h>
@@ -677,6 +678,12 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
677 unsigned long send_status, accept_status = 0; 678 unsigned long send_status, accept_status = 0;
678 int maxlvt, num_starts, j; 679 int maxlvt, num_starts, j;
679 680
681 if (get_uv_system_type() == UV_NON_UNIQUE_APIC) {
682 send_status = uv_wakeup_secondary(phys_apicid, start_eip);
683 atomic_set(&init_deasserted, 1);
684 return send_status;
685 }
686
680 /* 687 /*
681 * Be paranoid about clearing APIC errors. 688 * Be paranoid about clearing APIC errors.
682 */ 689 */
@@ -918,16 +925,19 @@ do_rest:
918 925
919 atomic_set(&init_deasserted, 0); 926 atomic_set(&init_deasserted, 0);
920 927
921 Dprintk("Setting warm reset code and vector.\n"); 928 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
922 929
923 store_NMI_vector(&nmi_high, &nmi_low); 930 Dprintk("Setting warm reset code and vector.\n");
924 931
925 smpboot_setup_warm_reset_vector(start_ip); 932 store_NMI_vector(&nmi_high, &nmi_low);
926 /* 933
927 * Be paranoid about clearing APIC errors. 934 smpboot_setup_warm_reset_vector(start_ip);
928 */ 935 /*
929 apic_write(APIC_ESR, 0); 936 * Be paranoid about clearing APIC errors.
930 apic_read(APIC_ESR); 937 */
938 apic_write(APIC_ESR, 0);
939 apic_read(APIC_ESR);
940 }
931 941
932 /* 942 /*
933 * Starting actual IPI sequence... 943 * Starting actual IPI sequence...
@@ -966,7 +976,8 @@ do_rest:
966 else 976 else
967 /* trampoline code not run */ 977 /* trampoline code not run */
968 printk(KERN_ERR "Not responding.\n"); 978 printk(KERN_ERR "Not responding.\n");
969 inquire_remote_apic(apicid); 979 if (get_uv_system_type() != UV_NON_UNIQUE_APIC)
980 inquire_remote_apic(apicid);
970 } 981 }
971 } 982 }
972 983
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 65791ca2824a..471e694d6713 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -681,7 +681,7 @@ gp_in_kernel:
681 } 681 }
682} 682}
683 683
684static __kprobes void 684static notrace __kprobes void
685mem_parity_error(unsigned char reason, struct pt_regs *regs) 685mem_parity_error(unsigned char reason, struct pt_regs *regs)
686{ 686{
687 printk(KERN_EMERG 687 printk(KERN_EMERG
@@ -707,7 +707,7 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs)
707 clear_mem_error(reason); 707 clear_mem_error(reason);
708} 708}
709 709
710static __kprobes void 710static notrace __kprobes void
711io_check_error(unsigned char reason, struct pt_regs *regs) 711io_check_error(unsigned char reason, struct pt_regs *regs)
712{ 712{
713 unsigned long i; 713 unsigned long i;
@@ -727,7 +727,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
727 outb(reason, 0x61); 727 outb(reason, 0x61);
728} 728}
729 729
730static __kprobes void 730static notrace __kprobes void
731unknown_nmi_error(unsigned char reason, struct pt_regs *regs) 731unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
732{ 732{
733 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 733 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
@@ -755,7 +755,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
755 755
756static DEFINE_SPINLOCK(nmi_print_lock); 756static DEFINE_SPINLOCK(nmi_print_lock);
757 757
758void __kprobes die_nmi(struct pt_regs *regs, const char *msg) 758void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg)
759{ 759{
760 if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP) 760 if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP)
761 return; 761 return;
@@ -786,7 +786,7 @@ void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
786 do_exit(SIGSEGV); 786 do_exit(SIGSEGV);
787} 787}
788 788
789static __kprobes void default_do_nmi(struct pt_regs *regs) 789static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
790{ 790{
791 unsigned char reason = 0; 791 unsigned char reason = 0;
792 792
@@ -828,7 +828,7 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)
828 828
829static int ignore_nmis; 829static int ignore_nmis;
830 830
831__kprobes void do_nmi(struct pt_regs *regs, long error_code) 831notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code)
832{ 832{
833 int cpu; 833 int cpu;
834 834
@@ -1148,9 +1148,22 @@ asmlinkage void math_state_restore(void)
1148 struct thread_info *thread = current_thread_info(); 1148 struct thread_info *thread = current_thread_info();
1149 struct task_struct *tsk = thread->task; 1149 struct task_struct *tsk = thread->task;
1150 1150
1151 if (!tsk_used_math(tsk)) {
1152 local_irq_enable();
1153 /*
1154 * does a slab alloc which can sleep
1155 */
1156 if (init_fpu(tsk)) {
1157 /*
1158 * ran out of memory!
1159 */
1160 do_group_exit(SIGKILL);
1161 return;
1162 }
1163 local_irq_disable();
1164 }
1165
1151 clts(); /* Allow maths ops (or we recurse) */ 1166 clts(); /* Allow maths ops (or we recurse) */
1152 if (!tsk_used_math(tsk))
1153 init_fpu(tsk);
1154 restore_fpu(tsk); 1167 restore_fpu(tsk);
1155 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ 1168 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
1156 tsk->fpu_counter++; 1169 tsk->fpu_counter++;
@@ -1208,11 +1221,6 @@ void __init trap_init(void)
1208#endif 1221#endif
1209 set_trap_gate(19, &simd_coprocessor_error); 1222 set_trap_gate(19, &simd_coprocessor_error);
1210 1223
1211 /*
1212 * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
1213 * Generate a build-time error if the alignment is wrong.
1214 */
1215 BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15);
1216 if (cpu_has_fxsr) { 1224 if (cpu_has_fxsr) {
1217 printk(KERN_INFO "Enabling fast FPU save and restore... "); 1225 printk(KERN_INFO "Enabling fast FPU save and restore... ");
1218 set_in_cr4(X86_CR4_OSFXSR); 1226 set_in_cr4(X86_CR4_OSFXSR);
@@ -1233,6 +1241,7 @@ void __init trap_init(void)
1233 1241
1234 set_bit(SYSCALL_VECTOR, used_vectors); 1242 set_bit(SYSCALL_VECTOR, used_vectors);
1235 1243
1244 init_thread_xstate();
1236 /* 1245 /*
1237 * Should be a barrier for any external CPU state: 1246 * Should be a barrier for any external CPU state:
1238 */ 1247 */
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 79aa6fc0815c..adff76ea97c4 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -600,7 +600,8 @@ void die(const char * str, struct pt_regs * regs, long err)
600 oops_end(flags, regs, SIGSEGV); 600 oops_end(flags, regs, SIGSEGV);
601} 601}
602 602
603void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) 603notrace __kprobes void
604die_nmi(char *str, struct pt_regs *regs, int do_panic)
604{ 605{
605 unsigned long flags; 606 unsigned long flags;
606 607
@@ -772,7 +773,7 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
772 die("general protection fault", regs, error_code); 773 die("general protection fault", regs, error_code);
773} 774}
774 775
775static __kprobes void 776static notrace __kprobes void
776mem_parity_error(unsigned char reason, struct pt_regs * regs) 777mem_parity_error(unsigned char reason, struct pt_regs * regs)
777{ 778{
778 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", 779 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
@@ -796,7 +797,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs)
796 outb(reason, 0x61); 797 outb(reason, 0x61);
797} 798}
798 799
799static __kprobes void 800static notrace __kprobes void
800io_check_error(unsigned char reason, struct pt_regs * regs) 801io_check_error(unsigned char reason, struct pt_regs * regs)
801{ 802{
802 printk("NMI: IOCK error (debug interrupt?)\n"); 803 printk("NMI: IOCK error (debug interrupt?)\n");
@@ -810,7 +811,7 @@ io_check_error(unsigned char reason, struct pt_regs * regs)
810 outb(reason, 0x61); 811 outb(reason, 0x61);
811} 812}
812 813
813static __kprobes void 814static notrace __kprobes void
814unknown_nmi_error(unsigned char reason, struct pt_regs * regs) 815unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
815{ 816{
816 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 817 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
@@ -827,7 +828,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
827 828
828/* Runs on IST stack. This code must keep interrupts off all the time. 829/* Runs on IST stack. This code must keep interrupts off all the time.
829 Nested NMIs are prevented by the CPU. */ 830 Nested NMIs are prevented by the CPU. */
830asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs) 831asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
831{ 832{
832 unsigned char reason = 0; 833 unsigned char reason = 0;
833 int cpu; 834 int cpu;
@@ -1123,11 +1124,24 @@ asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
1123asmlinkage void math_state_restore(void) 1124asmlinkage void math_state_restore(void)
1124{ 1125{
1125 struct task_struct *me = current; 1126 struct task_struct *me = current;
1126 clts(); /* Allow maths ops (or we recurse) */
1127 1127
1128 if (!used_math()) 1128 if (!used_math()) {
1129 init_fpu(me); 1129 local_irq_enable();
1130 restore_fpu_checking(&me->thread.i387.fxsave); 1130 /*
1131 * does a slab alloc which can sleep
1132 */
1133 if (init_fpu(me)) {
1134 /*
1135 * ran out of memory!
1136 */
1137 do_group_exit(SIGKILL);
1138 return;
1139 }
1140 local_irq_disable();
1141 }
1142
1143 clts(); /* Allow maths ops (or we recurse) */
1144 restore_fpu_checking(&me->thread.xstate->fxsave);
1131 task_thread_info(me)->status |= TS_USEDFPU; 1145 task_thread_info(me)->status |= TS_USEDFPU;
1132 me->fpu_counter++; 1146 me->fpu_counter++;
1133} 1147}
@@ -1163,6 +1177,10 @@ void __init trap_init(void)
1163#endif 1177#endif
1164 1178
1165 /* 1179 /*
1180 * initialize the per thread extended state:
1181 */
1182 init_thread_xstate();
1183 /*
1166 * Should be a barrier for any external CPU state. 1184 * Should be a barrier for any external CPU state.
1167 */ 1185 */
1168 cpu_init(); 1186 cpu_init();
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index 3d7e6e9fa6c2..e4790728b224 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -221,9 +221,9 @@ EXPORT_SYMBOL(recalibrate_cpu_khz);
221 * if the CPU frequency is scaled, TSC-based delays will need a different 221 * if the CPU frequency is scaled, TSC-based delays will need a different
222 * loops_per_jiffy value to function properly. 222 * loops_per_jiffy value to function properly.
223 */ 223 */
224static unsigned int ref_freq = 0; 224static unsigned int ref_freq;
225static unsigned long loops_per_jiffy_ref = 0; 225static unsigned long loops_per_jiffy_ref;
226static unsigned long cpu_khz_ref = 0; 226static unsigned long cpu_khz_ref;
227 227
228static int 228static int
229time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) 229time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
@@ -283,15 +283,28 @@ core_initcall(cpufreq_tsc);
283 283
284/* clock source code */ 284/* clock source code */
285 285
286static unsigned long current_tsc_khz = 0; 286static unsigned long current_tsc_khz;
287static struct clocksource clocksource_tsc;
287 288
289/*
290 * We compare the TSC to the cycle_last value in the clocksource
291 * structure to avoid a nasty time-warp issue. This can be observed in
292 * a very small window right after one CPU updated cycle_last under
293 * xtime lock and the other CPU reads a TSC value which is smaller
294 * than the cycle_last reference value due to a TSC which is slighty
295 * behind. This delta is nowhere else observable, but in that case it
296 * results in a forward time jump in the range of hours due to the
297 * unsigned delta calculation of the time keeping core code, which is
298 * necessary to support wrapping clocksources like pm timer.
299 */
288static cycle_t read_tsc(void) 300static cycle_t read_tsc(void)
289{ 301{
290 cycle_t ret; 302 cycle_t ret;
291 303
292 rdtscll(ret); 304 rdtscll(ret);
293 305
294 return ret; 306 return ret >= clocksource_tsc.cycle_last ?
307 ret : clocksource_tsc.cycle_last;
295} 308}
296 309
297static struct clocksource clocksource_tsc = { 310static struct clocksource clocksource_tsc = {
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index ceeba01e7f47..fcc16e58609e 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -11,6 +11,7 @@
11#include <asm/hpet.h> 11#include <asm/hpet.h>
12#include <asm/timex.h> 12#include <asm/timex.h>
13#include <asm/timer.h> 13#include <asm/timer.h>
14#include <asm/vgtod.h>
14 15
15static int notsc __initdata = 0; 16static int notsc __initdata = 0;
16 17
@@ -287,18 +288,34 @@ int __init notsc_setup(char *s)
287 288
288__setup("notsc", notsc_setup); 289__setup("notsc", notsc_setup);
289 290
291static struct clocksource clocksource_tsc;
290 292
291/* clock source code: */ 293/*
294 * We compare the TSC to the cycle_last value in the clocksource
295 * structure to avoid a nasty time-warp. This can be observed in a
296 * very small window right after one CPU updated cycle_last under
297 * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
298 * is smaller than the cycle_last reference value due to a TSC which
299 * is slighty behind. This delta is nowhere else observable, but in
300 * that case it results in a forward time jump in the range of hours
301 * due to the unsigned delta calculation of the time keeping core
302 * code, which is necessary to support wrapping clocksources like pm
303 * timer.
304 */
292static cycle_t read_tsc(void) 305static cycle_t read_tsc(void)
293{ 306{
294 cycle_t ret = (cycle_t)get_cycles(); 307 cycle_t ret = (cycle_t)get_cycles();
295 return ret; 308
309 return ret >= clocksource_tsc.cycle_last ?
310 ret : clocksource_tsc.cycle_last;
296} 311}
297 312
298static cycle_t __vsyscall_fn vread_tsc(void) 313static cycle_t __vsyscall_fn vread_tsc(void)
299{ 314{
300 cycle_t ret = (cycle_t)vget_cycles(); 315 cycle_t ret = (cycle_t)vget_cycles();
301 return ret; 316
317 return ret >= __vsyscall_gtod_data.clock.cycle_last ?
318 ret : __vsyscall_gtod_data.clock.cycle_last;
302} 319}
303 320
304static struct clocksource clocksource_tsc = { 321static struct clocksource clocksource_tsc = {
diff --git a/arch/x86/mach-visws/visws_apic.c b/arch/x86/mach-visws/visws_apic.c
index 710faf71a650..cef9cb1d15ac 100644
--- a/arch/x86/mach-visws/visws_apic.c
+++ b/arch/x86/mach-visws/visws_apic.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/arch/i386/mach-visws/visws_apic.c
3 *
4 * Copyright (C) 1999 Bent Hagemark, Ingo Molnar 2 * Copyright (C) 1999 Bent Hagemark, Ingo Molnar
5 * 3 *
6 * SGI Visual Workstation interrupt controller 4 * SGI Visual Workstation interrupt controller
diff --git a/arch/x86/mach-voyager/voyager_basic.c b/arch/x86/mach-voyager/voyager_basic.c
index 6a949e4edde8..46d6f8067690 100644
--- a/arch/x86/mach-voyager/voyager_basic.c
+++ b/arch/x86/mach-voyager/voyager_basic.c
@@ -2,8 +2,6 @@
2 * 2 *
3 * Author: J.E.J.Bottomley@HansenPartnership.com 3 * Author: J.E.J.Bottomley@HansenPartnership.com
4 * 4 *
5 * linux/arch/i386/kernel/voyager.c
6 *
7 * This file contains all the voyager specific routines for getting 5 * This file contains all the voyager specific routines for getting
8 * initialisation of the architecture to function. For additional 6 * initialisation of the architecture to function. For additional
9 * features see: 7 * features see:
diff --git a/arch/x86/mach-voyager/voyager_cat.c b/arch/x86/mach-voyager/voyager_cat.c
index 17a7904f75b1..ecab9fff0fd1 100644
--- a/arch/x86/mach-voyager/voyager_cat.c
+++ b/arch/x86/mach-voyager/voyager_cat.c
@@ -4,8 +4,6 @@
4 * 4 *
5 * Author: J.E.J.Bottomley@HansenPartnership.com 5 * Author: J.E.J.Bottomley@HansenPartnership.com
6 * 6 *
7 * linux/arch/i386/kernel/voyager_cat.c
8 *
9 * This file contains all the logic for manipulating the CAT bus 7 * This file contains all the logic for manipulating the CAT bus
10 * in a level 5 machine. 8 * in a level 5 machine.
11 * 9 *
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index be7235bf105d..96f60c7cd124 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -4,8 +4,6 @@
4 * 4 *
5 * Author: J.E.J.Bottomley@HansenPartnership.com 5 * Author: J.E.J.Bottomley@HansenPartnership.com
6 * 6 *
7 * linux/arch/i386/kernel/voyager_smp.c
8 *
9 * This file provides all the same external entries as smp.c but uses 7 * This file provides all the same external entries as smp.c but uses
10 * the voyager hal to provide the functionality 8 * the voyager hal to provide the functionality
11 */ 9 */
diff --git a/arch/x86/mach-voyager/voyager_thread.c b/arch/x86/mach-voyager/voyager_thread.c
index c69c931818ed..15464a20fb38 100644
--- a/arch/x86/mach-voyager/voyager_thread.c
+++ b/arch/x86/mach-voyager/voyager_thread.c
@@ -4,8 +4,6 @@
4 * 4 *
5 * Author: J.E.J.Bottomley@HansenPartnership.com 5 * Author: J.E.J.Bottomley@HansenPartnership.com
6 * 6 *
7 * linux/arch/i386/kernel/voyager_thread.c
8 *
9 * This module provides the machine status monitor thread for the 7 * This module provides the machine status monitor thread for the
10 * voyager architecture. This allows us to monitor the machine 8 * voyager architecture. This allows us to monitor the machine
11 * environment (temp, voltage, fan function) and the front panel and 9 * environment (temp, voltage, fan function) and the front panel and
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 4bab3b145392..6e38d877ea77 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -678,7 +678,7 @@ int fpregs_soft_set(struct task_struct *target,
678 unsigned int pos, unsigned int count, 678 unsigned int pos, unsigned int count,
679 const void *kbuf, const void __user *ubuf) 679 const void *kbuf, const void __user *ubuf)
680{ 680{
681 struct i387_soft_struct *s387 = &target->thread.i387.soft; 681 struct i387_soft_struct *s387 = &target->thread.xstate->soft;
682 void *space = s387->st_space; 682 void *space = s387->st_space;
683 int ret; 683 int ret;
684 int offset, other, i, tags, regnr, tag, newtop; 684 int offset, other, i, tags, regnr, tag, newtop;
@@ -730,7 +730,7 @@ int fpregs_soft_get(struct task_struct *target,
730 unsigned int pos, unsigned int count, 730 unsigned int pos, unsigned int count,
731 void *kbuf, void __user *ubuf) 731 void *kbuf, void __user *ubuf)
732{ 732{
733 struct i387_soft_struct *s387 = &target->thread.i387.soft; 733 struct i387_soft_struct *s387 = &target->thread.xstate->soft;
734 const void *space = s387->st_space; 734 const void *space = s387->st_space;
735 int ret; 735 int ret;
736 int offset = (S387->ftop & 7) * 10, other = 80 - offset; 736 int offset = (S387->ftop & 7) * 10, other = 80 - offset;
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h
index a3ae28c49ddd..13488fa153e0 100644
--- a/arch/x86/math-emu/fpu_system.h
+++ b/arch/x86/math-emu/fpu_system.h
@@ -35,8 +35,8 @@
35#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ 35#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \
36 == (1 << 10)) 36 == (1 << 10))
37 37
38#define I387 (current->thread.i387) 38#define I387 (current->thread.xstate)
39#define FPU_info (I387.soft.info) 39#define FPU_info (I387->soft.info)
40 40
41#define FPU_CS (*(unsigned short *) &(FPU_info->___cs)) 41#define FPU_CS (*(unsigned short *) &(FPU_info->___cs))
42#define FPU_SS (*(unsigned short *) &(FPU_info->___ss)) 42#define FPU_SS (*(unsigned short *) &(FPU_info->___ss))
@@ -46,25 +46,25 @@
46#define FPU_EIP (FPU_info->___eip) 46#define FPU_EIP (FPU_info->___eip)
47#define FPU_ORIG_EIP (FPU_info->___orig_eip) 47#define FPU_ORIG_EIP (FPU_info->___orig_eip)
48 48
49#define FPU_lookahead (I387.soft.lookahead) 49#define FPU_lookahead (I387->soft.lookahead)
50 50
51/* nz if ip_offset and cs_selector are not to be set for the current 51/* nz if ip_offset and cs_selector are not to be set for the current
52 instruction. */ 52 instruction. */
53#define no_ip_update (*(u_char *)&(I387.soft.no_update)) 53#define no_ip_update (*(u_char *)&(I387->soft.no_update))
54#define FPU_rm (*(u_char *)&(I387.soft.rm)) 54#define FPU_rm (*(u_char *)&(I387->soft.rm))
55 55
56/* Number of bytes of data which can be legally accessed by the current 56/* Number of bytes of data which can be legally accessed by the current
57 instruction. This only needs to hold a number <= 108, so a byte will do. */ 57 instruction. This only needs to hold a number <= 108, so a byte will do. */
58#define access_limit (*(u_char *)&(I387.soft.alimit)) 58#define access_limit (*(u_char *)&(I387->soft.alimit))
59 59
60#define partial_status (I387.soft.swd) 60#define partial_status (I387->soft.swd)
61#define control_word (I387.soft.cwd) 61#define control_word (I387->soft.cwd)
62#define fpu_tag_word (I387.soft.twd) 62#define fpu_tag_word (I387->soft.twd)
63#define registers (I387.soft.st_space) 63#define registers (I387->soft.st_space)
64#define top (I387.soft.ftop) 64#define top (I387->soft.ftop)
65 65
66#define instruction_address (*(struct address *)&I387.soft.fip) 66#define instruction_address (*(struct address *)&I387->soft.fip)
67#define operand_address (*(struct address *)&I387.soft.foo) 67#define operand_address (*(struct address *)&I387->soft.foo)
68 68
69#define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \ 69#define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \
70 math_abort(FPU_info,SIGSEGV) 70 math_abort(FPU_info,SIGSEGV)
diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c
index 02af772a24db..d597fe7423c9 100644
--- a/arch/x86/math-emu/reg_ld_str.c
+++ b/arch/x86/math-emu/reg_ld_str.c
@@ -1180,8 +1180,8 @@ u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d)
1180 control_word |= 0xffff0040; 1180 control_word |= 0xffff0040;
1181 partial_status = status_word() | 0xffff0000; 1181 partial_status = status_word() | 0xffff0000;
1182 fpu_tag_word |= 0xffff0000; 1182 fpu_tag_word |= 0xffff0000;
1183 I387.soft.fcs &= ~0xf8000000; 1183 I387->soft.fcs &= ~0xf8000000;
1184 I387.soft.fos |= 0xffff0000; 1184 I387->soft.fos |= 0xffff0000;
1185#endif /* PECULIAR_486 */ 1185#endif /* PECULIAR_486 */
1186 if (__copy_to_user(d, &control_word, 7 * 4)) 1186 if (__copy_to_user(d, &control_word, 7 * 4))
1187 FPU_abort; 1187 FPU_abort;
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index eba0bbede7a6..18378850e25a 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -120,7 +120,7 @@ int __init get_memcfg_numa_flat(void)
120 printk("NUMA - single node, flat memory mode\n"); 120 printk("NUMA - single node, flat memory mode\n");
121 121
122 /* Run the memory configuration and find the top of memory. */ 122 /* Run the memory configuration and find the top of memory. */
123 find_max_pfn(); 123 propagate_e820_map();
124 node_start_pfn[0] = 0; 124 node_start_pfn[0] = 0;
125 node_end_pfn[0] = max_pfn; 125 node_end_pfn[0] = max_pfn;
126 memory_present(0, 0, max_pfn); 126 memory_present(0, 0, max_pfn);
@@ -134,7 +134,7 @@ int __init get_memcfg_numa_flat(void)
134/* 134/*
135 * Find the highest page frame number we have available for the node 135 * Find the highest page frame number we have available for the node
136 */ 136 */
137static void __init find_max_pfn_node(int nid) 137static void __init propagate_e820_map_node(int nid)
138{ 138{
139 if (node_end_pfn[nid] > max_pfn) 139 if (node_end_pfn[nid] > max_pfn)
140 node_end_pfn[nid] = max_pfn; 140 node_end_pfn[nid] = max_pfn;
@@ -379,7 +379,7 @@ unsigned long __init setup_memory(void)
379 printk("High memory starts at vaddr %08lx\n", 379 printk("High memory starts at vaddr %08lx\n",
380 (ulong) pfn_to_kaddr(highstart_pfn)); 380 (ulong) pfn_to_kaddr(highstart_pfn));
381 for_each_online_node(nid) 381 for_each_online_node(nid)
382 find_max_pfn_node(nid); 382 propagate_e820_map_node(nid);
383 383
384 memset(NODE_DATA(0), 0, sizeof(struct pglist_data)); 384 memset(NODE_DATA(0), 0, sizeof(struct pglist_data));
385 NODE_DATA(0)->bdata = &node0_bdata; 385 NODE_DATA(0)->bdata = &node0_bdata;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 1500dc8d63e4..9ec62da85fd7 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -1,5 +1,4 @@
1/* 1/*
2 * linux/arch/i386/mm/init.c
3 * 2 *
4 * Copyright (C) 1995 Linus Torvalds 3 * Copyright (C) 1995 Linus Torvalds
5 * 4 *
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1076097dcab2..1ff7906a9a4d 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -47,9 +47,6 @@
47#include <asm/numa.h> 47#include <asm/numa.h>
48#include <asm/cacheflush.h> 48#include <asm/cacheflush.h>
49 49
50const struct dma_mapping_ops *dma_ops;
51EXPORT_SYMBOL(dma_ops);
52
53static unsigned long dma_reserve __initdata; 50static unsigned long dma_reserve __initdata;
54 51
55DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 52DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index c590fd200e29..3a4baf95e24d 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -134,7 +134,7 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
134 134
135 if (!phys_addr_valid(phys_addr)) { 135 if (!phys_addr_valid(phys_addr)) {
136 printk(KERN_WARNING "ioremap: invalid physical address %llx\n", 136 printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
137 phys_addr); 137 (unsigned long long)phys_addr);
138 WARN_ON_ONCE(1); 138 WARN_ON_ONCE(1);
139 return NULL; 139 return NULL;
140 } 140 }
@@ -187,7 +187,8 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
187 new_prot_val == _PAGE_CACHE_WB)) { 187 new_prot_val == _PAGE_CACHE_WB)) {
188 pr_debug( 188 pr_debug(
189 "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", 189 "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
190 phys_addr, phys_addr + size, 190 (unsigned long long)phys_addr,
191 (unsigned long long)(phys_addr + size),
191 prot_val, new_prot_val); 192 prot_val, new_prot_val);
192 free_memtype(phys_addr, phys_addr + size); 193 free_memtype(phys_addr, phys_addr + size);
193 return NULL; 194 return NULL;
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 7a2ebce87df5..86808e666f9c 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -164,7 +164,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
164 if (!found) 164 if (!found)
165 return -1; 165 return -1;
166 166
167 memnode_shift = compute_hash_shift(nodes, 8); 167 memnode_shift = compute_hash_shift(nodes, 8, NULL);
168 if (memnode_shift < 0) { 168 if (memnode_shift < 0) {
169 printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); 169 printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n");
170 return -1; 170 return -1;
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 2ea56f48f29b..cb3170186355 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -60,7 +60,7 @@ unsigned long __initdata nodemap_size;
60 * -1 if node overlap or lost ram (shift too big) 60 * -1 if node overlap or lost ram (shift too big)
61 */ 61 */
62static int __init populate_memnodemap(const struct bootnode *nodes, 62static int __init populate_memnodemap(const struct bootnode *nodes,
63 int numnodes, int shift) 63 int numnodes, int shift, int *nodeids)
64{ 64{
65 unsigned long addr, end; 65 unsigned long addr, end;
66 int i, res = -1; 66 int i, res = -1;
@@ -76,7 +76,12 @@ static int __init populate_memnodemap(const struct bootnode *nodes,
76 do { 76 do {
77 if (memnodemap[addr >> shift] != NUMA_NO_NODE) 77 if (memnodemap[addr >> shift] != NUMA_NO_NODE)
78 return -1; 78 return -1;
79 memnodemap[addr >> shift] = i; 79
80 if (!nodeids)
81 memnodemap[addr >> shift] = i;
82 else
83 memnodemap[addr >> shift] = nodeids[i];
84
80 addr += (1UL << shift); 85 addr += (1UL << shift);
81 } while (addr < end); 86 } while (addr < end);
82 res = 1; 87 res = 1;
@@ -139,7 +144,8 @@ static int __init extract_lsb_from_nodes(const struct bootnode *nodes,
139 return i; 144 return i;
140} 145}
141 146
142int __init compute_hash_shift(struct bootnode *nodes, int numnodes) 147int __init compute_hash_shift(struct bootnode *nodes, int numnodes,
148 int *nodeids)
143{ 149{
144 int shift; 150 int shift;
145 151
@@ -149,7 +155,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes)
149 printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", 155 printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
150 shift); 156 shift);
151 157
152 if (populate_memnodemap(nodes, numnodes, shift) != 1) { 158 if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) {
153 printk(KERN_INFO "Your memory is not aligned you need to " 159 printk(KERN_INFO "Your memory is not aligned you need to "
154 "rebuild your kernel with a bigger NODEMAPSIZE " 160 "rebuild your kernel with a bigger NODEMAPSIZE "
155 "shift=%d\n", shift); 161 "shift=%d\n", shift);
@@ -462,7 +468,7 @@ done:
462 } 468 }
463 } 469 }
464out: 470out:
465 memnode_shift = compute_hash_shift(nodes, num_nodes); 471 memnode_shift = compute_hash_shift(nodes, num_nodes, NULL);
466 if (memnode_shift < 0) { 472 if (memnode_shift < 0) {
467 memnode_shift = 0; 473 memnode_shift = 0;
468 printk(KERN_ERR "No NUMA hash function found. NUMA emulation " 474 printk(KERN_ERR "No NUMA hash function found. NUMA emulation "
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 3165ec0672bd..6fb9e7c6893f 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -1,7 +1,3 @@
1/*
2 * linux/arch/i386/mm/pgtable.c
3 */
4
5#include <linux/sched.h> 1#include <linux/sched.h>
6#include <linux/kernel.h> 2#include <linux/kernel.h>
7#include <linux/errno.h> 3#include <linux/errno.h>
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 1bae9c855ceb..fb43d89f46f3 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -32,6 +32,10 @@ static struct bootnode nodes_add[MAX_NUMNODES];
32static int found_add_area __initdata; 32static int found_add_area __initdata;
33int hotadd_percent __initdata = 0; 33int hotadd_percent __initdata = 0;
34 34
35static int num_node_memblks __initdata;
36static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
37static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
38
35/* Too small nodes confuse the VM badly. Usually they result 39/* Too small nodes confuse the VM badly. Usually they result
36 from BIOS bugs. */ 40 from BIOS bugs. */
37#define NODE_MIN_SIZE (4*1024*1024) 41#define NODE_MIN_SIZE (4*1024*1024)
@@ -41,17 +45,17 @@ static __init int setup_node(int pxm)
41 return acpi_map_pxm_to_node(pxm); 45 return acpi_map_pxm_to_node(pxm);
42} 46}
43 47
44static __init int conflicting_nodes(unsigned long start, unsigned long end) 48static __init int conflicting_memblks(unsigned long start, unsigned long end)
45{ 49{
46 int i; 50 int i;
47 for_each_node_mask(i, nodes_parsed) { 51 for (i = 0; i < num_node_memblks; i++) {
48 struct bootnode *nd = &nodes[i]; 52 struct bootnode *nd = &node_memblk_range[i];
49 if (nd->start == nd->end) 53 if (nd->start == nd->end)
50 continue; 54 continue;
51 if (nd->end > start && nd->start < end) 55 if (nd->end > start && nd->start < end)
52 return i; 56 return memblk_nodeid[i];
53 if (nd->end == end && nd->start == start) 57 if (nd->end == end && nd->start == start)
54 return i; 58 return memblk_nodeid[i];
55 } 59 }
56 return -1; 60 return -1;
57} 61}
@@ -258,7 +262,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
258 bad_srat(); 262 bad_srat();
259 return; 263 return;
260 } 264 }
261 i = conflicting_nodes(start, end); 265 i = conflicting_memblks(start, end);
262 if (i == node) { 266 if (i == node) {
263 printk(KERN_WARNING 267 printk(KERN_WARNING
264 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n", 268 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
@@ -283,10 +287,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
283 nd->end = end; 287 nd->end = end;
284 } 288 }
285 289
286 printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, 290 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
287 nd->start, nd->end); 291 start, end);
288 e820_register_active_regions(node, nd->start >> PAGE_SHIFT, 292 e820_register_active_regions(node, start >> PAGE_SHIFT,
289 nd->end >> PAGE_SHIFT); 293 end >> PAGE_SHIFT);
290 push_node_boundaries(node, nd->start >> PAGE_SHIFT, 294 push_node_boundaries(node, nd->start >> PAGE_SHIFT,
291 nd->end >> PAGE_SHIFT); 295 nd->end >> PAGE_SHIFT);
292 296
@@ -298,6 +302,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
298 if ((nd->start | nd->end) == 0) 302 if ((nd->start | nd->end) == 0)
299 node_clear(node, nodes_parsed); 303 node_clear(node, nodes_parsed);
300 } 304 }
305
306 node_memblk_range[num_node_memblks].start = start;
307 node_memblk_range[num_node_memblks].end = end;
308 memblk_nodeid[num_node_memblks] = node;
309 num_node_memblks++;
301} 310}
302 311
303/* Sanity check to catch more bad SRATs (they are amazingly common). 312/* Sanity check to catch more bad SRATs (they are amazingly common).
@@ -368,7 +377,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
368 return -1; 377 return -1;
369 } 378 }
370 379
371 memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES); 380 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
381 memblk_nodeid);
372 if (memnode_shift < 0) { 382 if (memnode_shift < 0) {
373 printk(KERN_ERR 383 printk(KERN_ERR
374 "SRAT: No NUMA node hash function found. Contact maintainer\n"); 384 "SRAT: No NUMA node hash function found. Contact maintainer\n");
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 17a6b057856b..b7ad9f89d21f 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -37,7 +37,8 @@ $(obj)/%.so: OBJCOPYFLAGS := -S
37$(obj)/%.so: $(obj)/%.so.dbg FORCE 37$(obj)/%.so: $(obj)/%.so.dbg FORCE
38 $(call if_changed,objcopy) 38 $(call if_changed,objcopy)
39 39
40CFL := $(PROFILING) -mcmodel=small -fPIC -g0 -O2 -fasynchronous-unwind-tables -m64 40CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
41 $(filter -g%,$(KBUILD_CFLAGS))
41 42
42$(vobjs): KBUILD_CFLAGS += $(CFL) 43$(vobjs): KBUILD_CFLAGS += $(CFL)
43 44
diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c
index 48fb38d7d2c0..4db42bff8c60 100644
--- a/arch/x86/video/fbdev.c
+++ b/arch/x86/video/fbdev.c
@@ -1,5 +1,4 @@
1/* 1/*
2 * arch/i386/video/fbdev.c - i386 Framebuffer
3 * 2 *
4 * Copyright (C) 2007 Antonino Daplas <adaplas@gmail.com> 3 * Copyright (C) 2007 Antonino Daplas <adaplas@gmail.com>
5 * 4 *
diff --git a/include/asm-x86/boot.h b/include/asm-x86/boot.h
index ed8affbf96cb..2faed7ecb092 100644
--- a/include/asm-x86/boot.h
+++ b/include/asm-x86/boot.h
@@ -17,4 +17,12 @@
17 + (CONFIG_PHYSICAL_ALIGN - 1)) \ 17 + (CONFIG_PHYSICAL_ALIGN - 1)) \
18 & ~(CONFIG_PHYSICAL_ALIGN - 1)) 18 & ~(CONFIG_PHYSICAL_ALIGN - 1))
19 19
20#ifdef CONFIG_X86_64
21#define BOOT_HEAP_SIZE 0x7000
22#define BOOT_STACK_SIZE 0x4000
23#else
24#define BOOT_HEAP_SIZE 0x4000
25#define BOOT_STACK_SIZE 0x1000
26#endif
27
20#endif /* _ASM_BOOT_H */ 28#endif /* _ASM_BOOT_H */
diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h
index 58f790f4df52..a1a4dc7fe6ec 100644
--- a/include/asm-x86/dma-mapping.h
+++ b/include/asm-x86/dma-mapping.h
@@ -1,5 +1,237 @@
1#ifndef _ASM_DMA_MAPPING_H_
2#define _ASM_DMA_MAPPING_H_
3
4/*
5 * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
6 * documentation.
7 */
8
9#include <linux/scatterlist.h>
10#include <asm/io.h>
11#include <asm/swiotlb.h>
12
13extern dma_addr_t bad_dma_address;
14extern int iommu_merge;
15extern struct device fallback_dev;
16extern int panic_on_overflow;
17extern int forbid_dac;
18extern int force_iommu;
19
20struct dma_mapping_ops {
21 int (*mapping_error)(dma_addr_t dma_addr);
22 void* (*alloc_coherent)(struct device *dev, size_t size,
23 dma_addr_t *dma_handle, gfp_t gfp);
24 void (*free_coherent)(struct device *dev, size_t size,
25 void *vaddr, dma_addr_t dma_handle);
26 dma_addr_t (*map_single)(struct device *hwdev, phys_addr_t ptr,
27 size_t size, int direction);
28 /* like map_single, but doesn't check the device mask */
29 dma_addr_t (*map_simple)(struct device *hwdev, phys_addr_t ptr,
30 size_t size, int direction);
31 void (*unmap_single)(struct device *dev, dma_addr_t addr,
32 size_t size, int direction);
33 void (*sync_single_for_cpu)(struct device *hwdev,
34 dma_addr_t dma_handle, size_t size,
35 int direction);
36 void (*sync_single_for_device)(struct device *hwdev,
37 dma_addr_t dma_handle, size_t size,
38 int direction);
39 void (*sync_single_range_for_cpu)(struct device *hwdev,
40 dma_addr_t dma_handle, unsigned long offset,
41 size_t size, int direction);
42 void (*sync_single_range_for_device)(struct device *hwdev,
43 dma_addr_t dma_handle, unsigned long offset,
44 size_t size, int direction);
45 void (*sync_sg_for_cpu)(struct device *hwdev,
46 struct scatterlist *sg, int nelems,
47 int direction);
48 void (*sync_sg_for_device)(struct device *hwdev,
49 struct scatterlist *sg, int nelems,
50 int direction);
51 int (*map_sg)(struct device *hwdev, struct scatterlist *sg,
52 int nents, int direction);
53 void (*unmap_sg)(struct device *hwdev,
54 struct scatterlist *sg, int nents,
55 int direction);
56 int (*dma_supported)(struct device *hwdev, u64 mask);
57 int is_phys;
58};
59
60extern const struct dma_mapping_ops *dma_ops;
61
62static inline int dma_mapping_error(dma_addr_t dma_addr)
63{
64 if (dma_ops->mapping_error)
65 return dma_ops->mapping_error(dma_addr);
66
67 return (dma_addr == bad_dma_address);
68}
69
70#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
71#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
72
73void *dma_alloc_coherent(struct device *dev, size_t size,
74 dma_addr_t *dma_handle, gfp_t flag);
75
76void dma_free_coherent(struct device *dev, size_t size,
77 void *vaddr, dma_addr_t dma_handle);
78
79
80extern int dma_supported(struct device *hwdev, u64 mask);
81extern int dma_set_mask(struct device *dev, u64 mask);
82
83static inline dma_addr_t
84dma_map_single(struct device *hwdev, void *ptr, size_t size,
85 int direction)
86{
87 BUG_ON(!valid_dma_direction(direction));
88 return dma_ops->map_single(hwdev, virt_to_phys(ptr), size, direction);
89}
90
91static inline void
92dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size,
93 int direction)
94{
95 BUG_ON(!valid_dma_direction(direction));
96 if (dma_ops->unmap_single)
97 dma_ops->unmap_single(dev, addr, size, direction);
98}
99
100static inline int
101dma_map_sg(struct device *hwdev, struct scatterlist *sg,
102 int nents, int direction)
103{
104 BUG_ON(!valid_dma_direction(direction));
105 return dma_ops->map_sg(hwdev, sg, nents, direction);
106}
107
108static inline void
109dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
110 int direction)
111{
112 BUG_ON(!valid_dma_direction(direction));
113 if (dma_ops->unmap_sg)
114 dma_ops->unmap_sg(hwdev, sg, nents, direction);
115}
116
117static inline void
118dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
119 size_t size, int direction)
120{
121 BUG_ON(!valid_dma_direction(direction));
122 if (dma_ops->sync_single_for_cpu)
123 dma_ops->sync_single_for_cpu(hwdev, dma_handle, size,
124 direction);
125 flush_write_buffers();
126}
127
128static inline void
129dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle,
130 size_t size, int direction)
131{
132 BUG_ON(!valid_dma_direction(direction));
133 if (dma_ops->sync_single_for_device)
134 dma_ops->sync_single_for_device(hwdev, dma_handle, size,
135 direction);
136 flush_write_buffers();
137}
138
139static inline void
140dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
141 unsigned long offset, size_t size, int direction)
142{
143 BUG_ON(!valid_dma_direction(direction));
144 if (dma_ops->sync_single_range_for_cpu)
145 dma_ops->sync_single_range_for_cpu(hwdev, dma_handle, offset,
146 size, direction);
147
148 flush_write_buffers();
149}
150
151static inline void
152dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle,
153 unsigned long offset, size_t size,
154 int direction)
155{
156 BUG_ON(!valid_dma_direction(direction));
157 if (dma_ops->sync_single_range_for_device)
158 dma_ops->sync_single_range_for_device(hwdev, dma_handle,
159 offset, size, direction);
160
161 flush_write_buffers();
162}
163
164static inline void
165dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
166 int nelems, int direction)
167{
168 BUG_ON(!valid_dma_direction(direction));
169 if (dma_ops->sync_sg_for_cpu)
170 dma_ops->sync_sg_for_cpu(hwdev, sg, nelems, direction);
171 flush_write_buffers();
172}
173
174static inline void
175dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
176 int nelems, int direction)
177{
178 BUG_ON(!valid_dma_direction(direction));
179 if (dma_ops->sync_sg_for_device)
180 dma_ops->sync_sg_for_device(hwdev, sg, nelems, direction);
181
182 flush_write_buffers();
183}
184
185static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
186 size_t offset, size_t size,
187 int direction)
188{
189 BUG_ON(!valid_dma_direction(direction));
190 return dma_ops->map_single(dev, page_to_phys(page)+offset,
191 size, direction);
192}
193
194static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
195 size_t size, int direction)
196{
197 dma_unmap_single(dev, addr, size, direction);
198}
199
200static inline void
201dma_cache_sync(struct device *dev, void *vaddr, size_t size,
202 enum dma_data_direction dir)
203{
204 flush_write_buffers();
205}
206
207static inline int dma_get_cache_alignment(void)
208{
209 /* no easy way to get cache size on all x86, so return the
210 * maximum possible, to be safe */
211 return boot_cpu_data.x86_clflush_size;
212}
213
214#define dma_is_consistent(d, h) (1)
215
1#ifdef CONFIG_X86_32 216#ifdef CONFIG_X86_32
2# include "dma-mapping_32.h" 217# define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
3#else 218struct dma_coherent_mem {
4# include "dma-mapping_64.h" 219 void *virt_base;
220 u32 device_base;
221 int size;
222 int flags;
223 unsigned long *bitmap;
224};
225
226extern int
227dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
228 dma_addr_t device_addr, size_t size, int flags);
229
230extern void
231dma_release_declared_memory(struct device *dev);
232
233extern void *
234dma_mark_declared_memory_occupied(struct device *dev,
235 dma_addr_t device_addr, size_t size);
236#endif /* CONFIG_X86_32 */
5#endif 237#endif
diff --git a/include/asm-x86/dma-mapping_32.h b/include/asm-x86/dma-mapping_32.h
deleted file mode 100644
index 55f01bd9e556..000000000000
--- a/include/asm-x86/dma-mapping_32.h
+++ /dev/null
@@ -1,187 +0,0 @@
1#ifndef _ASM_I386_DMA_MAPPING_H
2#define _ASM_I386_DMA_MAPPING_H
3
4#include <linux/mm.h>
5#include <linux/scatterlist.h>
6
7#include <asm/cache.h>
8#include <asm/io.h>
9#include <asm/bug.h>
10
11#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
12#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
13
14void *dma_alloc_coherent(struct device *dev, size_t size,
15 dma_addr_t *dma_handle, gfp_t flag);
16
17void dma_free_coherent(struct device *dev, size_t size,
18 void *vaddr, dma_addr_t dma_handle);
19
20static inline dma_addr_t
21dma_map_single(struct device *dev, void *ptr, size_t size,
22 enum dma_data_direction direction)
23{
24 BUG_ON(!valid_dma_direction(direction));
25 WARN_ON(size == 0);
26 flush_write_buffers();
27 return virt_to_phys(ptr);
28}
29
30static inline void
31dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
32 enum dma_data_direction direction)
33{
34 BUG_ON(!valid_dma_direction(direction));
35}
36
37static inline int
38dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
39 enum dma_data_direction direction)
40{
41 struct scatterlist *sg;
42 int i;
43
44 BUG_ON(!valid_dma_direction(direction));
45 WARN_ON(nents == 0 || sglist[0].length == 0);
46
47 for_each_sg(sglist, sg, nents, i) {
48 BUG_ON(!sg_page(sg));
49
50 sg->dma_address = sg_phys(sg);
51 }
52
53 flush_write_buffers();
54 return nents;
55}
56
57static inline dma_addr_t
58dma_map_page(struct device *dev, struct page *page, unsigned long offset,
59 size_t size, enum dma_data_direction direction)
60{
61 BUG_ON(!valid_dma_direction(direction));
62 return page_to_phys(page) + offset;
63}
64
65static inline void
66dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
67 enum dma_data_direction direction)
68{
69 BUG_ON(!valid_dma_direction(direction));
70}
71
72
73static inline void
74dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
75 enum dma_data_direction direction)
76{
77 BUG_ON(!valid_dma_direction(direction));
78}
79
80static inline void
81dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
82 enum dma_data_direction direction)
83{
84}
85
86static inline void
87dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
88 enum dma_data_direction direction)
89{
90 flush_write_buffers();
91}
92
93static inline void
94dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
95 unsigned long offset, size_t size,
96 enum dma_data_direction direction)
97{
98}
99
100static inline void
101dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
102 unsigned long offset, size_t size,
103 enum dma_data_direction direction)
104{
105 flush_write_buffers();
106}
107
108static inline void
109dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
110 enum dma_data_direction direction)
111{
112}
113
114static inline void
115dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
116 enum dma_data_direction direction)
117{
118 flush_write_buffers();
119}
120
121static inline int
122dma_mapping_error(dma_addr_t dma_addr)
123{
124 return 0;
125}
126
127extern int forbid_dac;
128
129static inline int
130dma_supported(struct device *dev, u64 mask)
131{
132 /*
133 * we fall back to GFP_DMA when the mask isn't all 1s,
134 * so we can't guarantee allocations that must be
135 * within a tighter range than GFP_DMA..
136 */
137 if(mask < 0x00ffffff)
138 return 0;
139
140 /* Work around chipset bugs */
141 if (forbid_dac > 0 && mask > 0xffffffffULL)
142 return 0;
143
144 return 1;
145}
146
147static inline int
148dma_set_mask(struct device *dev, u64 mask)
149{
150 if(!dev->dma_mask || !dma_supported(dev, mask))
151 return -EIO;
152
153 *dev->dma_mask = mask;
154
155 return 0;
156}
157
158static inline int
159dma_get_cache_alignment(void)
160{
161 /* no easy way to get cache size on all x86, so return the
162 * maximum possible, to be safe */
163 return (1 << INTERNODE_CACHE_SHIFT);
164}
165
166#define dma_is_consistent(d, h) (1)
167
168static inline void
169dma_cache_sync(struct device *dev, void *vaddr, size_t size,
170 enum dma_data_direction direction)
171{
172 flush_write_buffers();
173}
174
175#define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
176extern int
177dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
178 dma_addr_t device_addr, size_t size, int flags);
179
180extern void
181dma_release_declared_memory(struct device *dev);
182
183extern void *
184dma_mark_declared_memory_occupied(struct device *dev,
185 dma_addr_t device_addr, size_t size);
186
187#endif
diff --git a/include/asm-x86/dma-mapping_64.h b/include/asm-x86/dma-mapping_64.h
deleted file mode 100644
index ecd0f6125ba3..000000000000
--- a/include/asm-x86/dma-mapping_64.h
+++ /dev/null
@@ -1,202 +0,0 @@
1#ifndef _X8664_DMA_MAPPING_H
2#define _X8664_DMA_MAPPING_H 1
3
4/*
5 * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
6 * documentation.
7 */
8
9#include <linux/scatterlist.h>
10#include <asm/io.h>
11#include <asm/swiotlb.h>
12
13struct dma_mapping_ops {
14 int (*mapping_error)(dma_addr_t dma_addr);
15 void* (*alloc_coherent)(struct device *dev, size_t size,
16 dma_addr_t *dma_handle, gfp_t gfp);
17 void (*free_coherent)(struct device *dev, size_t size,
18 void *vaddr, dma_addr_t dma_handle);
19 dma_addr_t (*map_single)(struct device *hwdev, void *ptr,
20 size_t size, int direction);
21 /* like map_single, but doesn't check the device mask */
22 dma_addr_t (*map_simple)(struct device *hwdev, char *ptr,
23 size_t size, int direction);
24 void (*unmap_single)(struct device *dev, dma_addr_t addr,
25 size_t size, int direction);
26 void (*sync_single_for_cpu)(struct device *hwdev,
27 dma_addr_t dma_handle, size_t size,
28 int direction);
29 void (*sync_single_for_device)(struct device *hwdev,
30 dma_addr_t dma_handle, size_t size,
31 int direction);
32 void (*sync_single_range_for_cpu)(struct device *hwdev,
33 dma_addr_t dma_handle, unsigned long offset,
34 size_t size, int direction);
35 void (*sync_single_range_for_device)(struct device *hwdev,
36 dma_addr_t dma_handle, unsigned long offset,
37 size_t size, int direction);
38 void (*sync_sg_for_cpu)(struct device *hwdev,
39 struct scatterlist *sg, int nelems,
40 int direction);
41 void (*sync_sg_for_device)(struct device *hwdev,
42 struct scatterlist *sg, int nelems,
43 int direction);
44 int (*map_sg)(struct device *hwdev, struct scatterlist *sg,
45 int nents, int direction);
46 void (*unmap_sg)(struct device *hwdev,
47 struct scatterlist *sg, int nents,
48 int direction);
49 int (*dma_supported)(struct device *hwdev, u64 mask);
50 int is_phys;
51};
52
53extern dma_addr_t bad_dma_address;
54extern const struct dma_mapping_ops* dma_ops;
55extern int iommu_merge;
56
57static inline int dma_mapping_error(dma_addr_t dma_addr)
58{
59 if (dma_ops->mapping_error)
60 return dma_ops->mapping_error(dma_addr);
61
62 return (dma_addr == bad_dma_address);
63}
64
65#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
66#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
67
68#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
69#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
70
71extern void *dma_alloc_coherent(struct device *dev, size_t size,
72 dma_addr_t *dma_handle, gfp_t gfp);
73extern void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
74 dma_addr_t dma_handle);
75
76static inline dma_addr_t
77dma_map_single(struct device *hwdev, void *ptr, size_t size,
78 int direction)
79{
80 BUG_ON(!valid_dma_direction(direction));
81 return dma_ops->map_single(hwdev, ptr, size, direction);
82}
83
84static inline void
85dma_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
86 int direction)
87{
88 BUG_ON(!valid_dma_direction(direction));
89 dma_ops->unmap_single(dev, addr, size, direction);
90}
91
92#define dma_map_page(dev,page,offset,size,dir) \
93 dma_map_single((dev), page_address(page)+(offset), (size), (dir))
94
95#define dma_unmap_page dma_unmap_single
96
97static inline void
98dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
99 size_t size, int direction)
100{
101 BUG_ON(!valid_dma_direction(direction));
102 if (dma_ops->sync_single_for_cpu)
103 dma_ops->sync_single_for_cpu(hwdev, dma_handle, size,
104 direction);
105 flush_write_buffers();
106}
107
108static inline void
109dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle,
110 size_t size, int direction)
111{
112 BUG_ON(!valid_dma_direction(direction));
113 if (dma_ops->sync_single_for_device)
114 dma_ops->sync_single_for_device(hwdev, dma_handle, size,
115 direction);
116 flush_write_buffers();
117}
118
119static inline void
120dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
121 unsigned long offset, size_t size, int direction)
122{
123 BUG_ON(!valid_dma_direction(direction));
124 if (dma_ops->sync_single_range_for_cpu) {
125 dma_ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, size, direction);
126 }
127
128 flush_write_buffers();
129}
130
131static inline void
132dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle,
133 unsigned long offset, size_t size, int direction)
134{
135 BUG_ON(!valid_dma_direction(direction));
136 if (dma_ops->sync_single_range_for_device)
137 dma_ops->sync_single_range_for_device(hwdev, dma_handle,
138 offset, size, direction);
139
140 flush_write_buffers();
141}
142
143static inline void
144dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
145 int nelems, int direction)
146{
147 BUG_ON(!valid_dma_direction(direction));
148 if (dma_ops->sync_sg_for_cpu)
149 dma_ops->sync_sg_for_cpu(hwdev, sg, nelems, direction);
150 flush_write_buffers();
151}
152
153static inline void
154dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
155 int nelems, int direction)
156{
157 BUG_ON(!valid_dma_direction(direction));
158 if (dma_ops->sync_sg_for_device) {
159 dma_ops->sync_sg_for_device(hwdev, sg, nelems, direction);
160 }
161
162 flush_write_buffers();
163}
164
165static inline int
166dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction)
167{
168 BUG_ON(!valid_dma_direction(direction));
169 return dma_ops->map_sg(hwdev, sg, nents, direction);
170}
171
172static inline void
173dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
174 int direction)
175{
176 BUG_ON(!valid_dma_direction(direction));
177 dma_ops->unmap_sg(hwdev, sg, nents, direction);
178}
179
180extern int dma_supported(struct device *hwdev, u64 mask);
181
182/* same for gart, swiotlb, and nommu */
183static inline int dma_get_cache_alignment(void)
184{
185 return boot_cpu_data.x86_clflush_size;
186}
187
188#define dma_is_consistent(d, h) 1
189
190extern int dma_set_mask(struct device *dev, u64 mask);
191
192static inline void
193dma_cache_sync(struct device *dev, void *vaddr, size_t size,
194 enum dma_data_direction dir)
195{
196 flush_write_buffers();
197}
198
199extern struct device fallback_dev;
200extern int panic_on_overflow;
201
202#endif /* _X8664_DMA_MAPPING_H */
diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h
index 43b1a8bd4b34..a9f7c6ec32bf 100644
--- a/include/asm-x86/e820_32.h
+++ b/include/asm-x86/e820_32.h
@@ -24,7 +24,7 @@ extern void update_e820(void);
24extern int e820_all_mapped(unsigned long start, unsigned long end, 24extern int e820_all_mapped(unsigned long start, unsigned long end,
25 unsigned type); 25 unsigned type);
26extern int e820_any_mapped(u64 start, u64 end, unsigned type); 26extern int e820_any_mapped(u64 start, u64 end, unsigned type);
27extern void find_max_pfn(void); 27extern void propagate_e820_map(void);
28extern void register_bootmem_low_pages(unsigned long max_low_pfn); 28extern void register_bootmem_low_pages(unsigned long max_low_pfn);
29extern void add_memory_region(unsigned long long start, 29extern void add_memory_region(unsigned long long start,
30 unsigned long long size, int type); 30 unsigned long long size, int type);
diff --git a/include/asm-x86/genapic_32.h b/include/asm-x86/genapic_32.h
index f1b96932746b..b02ea6e17de8 100644
--- a/include/asm-x86/genapic_32.h
+++ b/include/asm-x86/genapic_32.h
@@ -117,6 +117,7 @@ extern struct genapic *genapic;
117enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; 117enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
118#define get_uv_system_type() UV_NONE 118#define get_uv_system_type() UV_NONE
119#define is_uv_system() 0 119#define is_uv_system() 0
120#define uv_wakeup_secondary(a, b) 1
120 121
121 122
122#endif 123#endif
diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index 54522b814f1c..da2adb45f6e3 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h
@@ -21,8 +21,9 @@
21 21
22extern void fpu_init(void); 22extern void fpu_init(void);
23extern void mxcsr_feature_mask_init(void); 23extern void mxcsr_feature_mask_init(void);
24extern void init_fpu(struct task_struct *child); 24extern int init_fpu(struct task_struct *child);
25extern asmlinkage void math_state_restore(void); 25extern asmlinkage void math_state_restore(void);
26extern void init_thread_xstate(void);
26 27
27extern user_regset_active_fn fpregs_active, xfpregs_active; 28extern user_regset_active_fn fpregs_active, xfpregs_active;
28extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; 29extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get;
@@ -117,24 +118,22 @@ static inline void __save_init_fpu(struct task_struct *tsk)
117 /* Using "fxsaveq %0" would be the ideal choice, but is only supported 118 /* Using "fxsaveq %0" would be the ideal choice, but is only supported
118 starting with gas 2.16. */ 119 starting with gas 2.16. */
119 __asm__ __volatile__("fxsaveq %0" 120 __asm__ __volatile__("fxsaveq %0"
120 : "=m" (tsk->thread.i387.fxsave)); 121 : "=m" (tsk->thread.xstate->fxsave));
121#elif 0 122#elif 0
122 /* Using, as a workaround, the properly prefixed form below isn't 123 /* Using, as a workaround, the properly prefixed form below isn't
123 accepted by any binutils version so far released, complaining that 124 accepted by any binutils version so far released, complaining that
124 the same type of prefix is used twice if an extended register is 125 the same type of prefix is used twice if an extended register is
125 needed for addressing (fix submitted to mainline 2005-11-21). */ 126 needed for addressing (fix submitted to mainline 2005-11-21). */
126 __asm__ __volatile__("rex64/fxsave %0" 127 __asm__ __volatile__("rex64/fxsave %0"
127 : "=m" (tsk->thread.i387.fxsave)); 128 : "=m" (tsk->thread.xstate->fxsave));
128#else 129#else
129 /* This, however, we can work around by forcing the compiler to select 130 /* This, however, we can work around by forcing the compiler to select
130 an addressing mode that doesn't require extended registers. */ 131 an addressing mode that doesn't require extended registers. */
131 __asm__ __volatile__("rex64/fxsave %P2(%1)" 132 __asm__ __volatile__("rex64/fxsave (%1)"
132 : "=m" (tsk->thread.i387.fxsave) 133 : "=m" (tsk->thread.xstate->fxsave)
133 : "cdaSDb" (tsk), 134 : "cdaSDb" (&tsk->thread.xstate->fxsave));
134 "i" (offsetof(__typeof__(*tsk),
135 thread.i387.fxsave)));
136#endif 135#endif
137 clear_fpu_state(&tsk->thread.i387.fxsave); 136 clear_fpu_state(&tsk->thread.xstate->fxsave);
138 task_thread_info(tsk)->status &= ~TS_USEDFPU; 137 task_thread_info(tsk)->status &= ~TS_USEDFPU;
139} 138}
140 139
@@ -148,7 +147,7 @@ static inline int save_i387(struct _fpstate __user *buf)
148 int err = 0; 147 int err = 0;
149 148
150 BUILD_BUG_ON(sizeof(struct user_i387_struct) != 149 BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
151 sizeof(tsk->thread.i387.fxsave)); 150 sizeof(tsk->thread.xstate->fxsave));
152 151
153 if ((unsigned long)buf % 16) 152 if ((unsigned long)buf % 16)
154 printk("save_i387: bad fpstate %p\n", buf); 153 printk("save_i387: bad fpstate %p\n", buf);
@@ -164,7 +163,7 @@ static inline int save_i387(struct _fpstate __user *buf)
164 task_thread_info(tsk)->status &= ~TS_USEDFPU; 163 task_thread_info(tsk)->status &= ~TS_USEDFPU;
165 stts(); 164 stts();
166 } else { 165 } else {
167 if (__copy_to_user(buf, &tsk->thread.i387.fxsave, 166 if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
168 sizeof(struct i387_fxsave_struct))) 167 sizeof(struct i387_fxsave_struct)))
169 return -1; 168 return -1;
170 } 169 }
@@ -201,7 +200,7 @@ static inline void restore_fpu(struct task_struct *tsk)
201 "nop ; frstor %1", 200 "nop ; frstor %1",
202 "fxrstor %1", 201 "fxrstor %1",
203 X86_FEATURE_FXSR, 202 X86_FEATURE_FXSR,
204 "m" ((tsk)->thread.i387.fxsave)); 203 "m" (tsk->thread.xstate->fxsave));
205} 204}
206 205
207/* We need a safe address that is cheap to find and that is already 206/* We need a safe address that is cheap to find and that is already
@@ -225,8 +224,8 @@ static inline void __save_init_fpu(struct task_struct *tsk)
225 "fxsave %[fx]\n" 224 "fxsave %[fx]\n"
226 "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", 225 "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
227 X86_FEATURE_FXSR, 226 X86_FEATURE_FXSR,
228 [fx] "m" (tsk->thread.i387.fxsave), 227 [fx] "m" (tsk->thread.xstate->fxsave),
229 [fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory"); 228 [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory");
230 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception 229 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
231 is pending. Clear the x87 state here by setting it to fixed 230 is pending. Clear the x87 state here by setting it to fixed
232 values. safe_address is a random variable that should be in L1 */ 231 values. safe_address is a random variable that should be in L1 */
@@ -327,25 +326,25 @@ static inline void clear_fpu(struct task_struct *tsk)
327static inline unsigned short get_fpu_cwd(struct task_struct *tsk) 326static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
328{ 327{
329 if (cpu_has_fxsr) { 328 if (cpu_has_fxsr) {
330 return tsk->thread.i387.fxsave.cwd; 329 return tsk->thread.xstate->fxsave.cwd;
331 } else { 330 } else {
332 return (unsigned short)tsk->thread.i387.fsave.cwd; 331 return (unsigned short)tsk->thread.xstate->fsave.cwd;
333 } 332 }
334} 333}
335 334
336static inline unsigned short get_fpu_swd(struct task_struct *tsk) 335static inline unsigned short get_fpu_swd(struct task_struct *tsk)
337{ 336{
338 if (cpu_has_fxsr) { 337 if (cpu_has_fxsr) {
339 return tsk->thread.i387.fxsave.swd; 338 return tsk->thread.xstate->fxsave.swd;
340 } else { 339 } else {
341 return (unsigned short)tsk->thread.i387.fsave.swd; 340 return (unsigned short)tsk->thread.xstate->fsave.swd;
342 } 341 }
343} 342}
344 343
345static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) 344static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
346{ 345{
347 if (cpu_has_xmm) { 346 if (cpu_has_xmm) {
348 return tsk->thread.i387.fxsave.mxcsr; 347 return tsk->thread.xstate->fxsave.mxcsr;
349 } else { 348 } else {
350 return MXCSR_DEFAULT; 349 return MXCSR_DEFAULT;
351 } 350 }
diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h
index 32c22ae0709f..22e87c9f6a80 100644
--- a/include/asm-x86/numa_64.h
+++ b/include/asm-x86/numa_64.h
@@ -9,7 +9,8 @@ struct bootnode {
9 u64 end; 9 u64 end;
10}; 10};
11 11
12extern int compute_hash_shift(struct bootnode *nodes, int numnodes); 12extern int compute_hash_shift(struct bootnode *nodes, int numblks,
13 int *nodeids);
13 14
14#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) 15#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
15 16
diff --git a/include/asm-x86/pci_64.h b/include/asm-x86/pci_64.h
index df867e5d80b1..f330234ffa5c 100644
--- a/include/asm-x86/pci_64.h
+++ b/include/asm-x86/pci_64.h
@@ -22,6 +22,7 @@ extern int (*pci_config_read)(int seg, int bus, int dev, int fn,
22extern int (*pci_config_write)(int seg, int bus, int dev, int fn, 22extern int (*pci_config_write)(int seg, int bus, int dev, int fn,
23 int reg, int len, u32 value); 23 int reg, int len, u32 value);
24 24
25extern void dma32_reserve_bootmem(void);
25extern void pci_iommu_alloc(void); 26extern void pci_iommu_alloc(void);
26 27
27/* The PCI address space does equal the physical memory 28/* The PCI address space does equal the physical memory
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 6e26c7c717a2..e6bf92ddeb21 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -354,7 +354,7 @@ struct i387_soft_struct {
354 u32 entry_eip; 354 u32 entry_eip;
355}; 355};
356 356
357union i387_union { 357union thread_xstate {
358 struct i387_fsave_struct fsave; 358 struct i387_fsave_struct fsave;
359 struct i387_fxsave_struct fxsave; 359 struct i387_fxsave_struct fxsave;
360 struct i387_soft_struct soft; 360 struct i387_soft_struct soft;
@@ -365,6 +365,9 @@ DECLARE_PER_CPU(struct orig_ist, orig_ist);
365#endif 365#endif
366 366
367extern void print_cpu_info(struct cpuinfo_x86 *); 367extern void print_cpu_info(struct cpuinfo_x86 *);
368extern unsigned int xstate_size;
369extern void free_thread_xstate(struct task_struct *);
370extern struct kmem_cache *task_xstate_cachep;
368extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); 371extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
369extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); 372extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
370extern unsigned short num_cache_leaves; 373extern unsigned short num_cache_leaves;
@@ -397,8 +400,8 @@ struct thread_struct {
397 unsigned long cr2; 400 unsigned long cr2;
398 unsigned long trap_no; 401 unsigned long trap_no;
399 unsigned long error_code; 402 unsigned long error_code;
400 /* Floating point info: */ 403 /* floating point and extended processor state */
401 union i387_union i387 __attribute__((aligned(16)));; 404 union thread_xstate *xstate;
402#ifdef CONFIG_X86_32 405#ifdef CONFIG_X86_32
403 /* Virtual 86 mode info */ 406 /* Virtual 86 mode info */
404 struct vm86_struct __user *vm86_info; 407 struct vm86_struct __user *vm86_info;
@@ -918,4 +921,11 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
918 921
919#define KSTK_EIP(task) (task_pt_regs(task)->ip) 922#define KSTK_EIP(task) (task_pt_regs(task)->ip)
920 923
924/* Get/set a process' ability to use the timestamp counter instruction */
925#define GET_TSC_CTL(adr) get_tsc_mode((adr))
926#define SET_TSC_CTL(val) set_tsc_mode((val))
927
928extern int get_tsc_mode(unsigned long adr);
929extern int set_tsc_mode(unsigned int val);
930
921#endif 931#endif
diff --git a/include/asm-x86/scatterlist.h b/include/asm-x86/scatterlist.h
index d13c197866d6..c0432061f81a 100644
--- a/include/asm-x86/scatterlist.h
+++ b/include/asm-x86/scatterlist.h
@@ -11,9 +11,7 @@ struct scatterlist {
11 unsigned int offset; 11 unsigned int offset;
12 unsigned int length; 12 unsigned int length;
13 dma_addr_t dma_address; 13 dma_addr_t dma_address;
14#ifdef CONFIG_X86_64
15 unsigned int dma_length; 14 unsigned int dma_length;
16#endif
17}; 15};
18 16
19#define ARCH_HAS_SG_CHAIN 17#define ARCH_HAS_SG_CHAIN
diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index d5fd12f2abdb..77244f17993f 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -1,5 +1,14 @@
1#ifndef _ASM_X86_THREAD_INFO_H
1#ifdef CONFIG_X86_32 2#ifdef CONFIG_X86_32
2# include "thread_info_32.h" 3# include "thread_info_32.h"
3#else 4#else
4# include "thread_info_64.h" 5# include "thread_info_64.h"
5#endif 6#endif
7
8#ifndef __ASSEMBLY__
9extern void arch_task_cache_init(void);
10extern void free_thread_info(struct thread_info *ti);
11extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
12#define arch_task_cache_init arch_task_cache_init
13#endif
14#endif /* _ASM_X86_THREAD_INFO_H */
diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h
index 4e053fa561a9..531859962096 100644
--- a/include/asm-x86/thread_info_32.h
+++ b/include/asm-x86/thread_info_32.h
@@ -102,8 +102,6 @@ static inline struct thread_info *current_thread_info(void)
102 __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE))) 102 __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE)))
103#endif 103#endif
104 104
105#define free_thread_info(info) free_pages((unsigned long)(info), get_order(THREAD_SIZE))
106
107#else /* !__ASSEMBLY__ */ 105#else /* !__ASSEMBLY__ */
108 106
109/* how to get the thread information struct from ASM */ 107/* how to get the thread information struct from ASM */
diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h
index 1e5c6f6152cd..ed664e874dec 100644
--- a/include/asm-x86/thread_info_64.h
+++ b/include/asm-x86/thread_info_64.h
@@ -85,8 +85,6 @@ static inline struct thread_info *stack_thread_info(void)
85#define alloc_thread_info(tsk) \ 85#define alloc_thread_info(tsk) \
86 ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) 86 ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
87 87
88#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER)
89
90#else /* !__ASSEMBLY__ */ 88#else /* !__ASSEMBLY__ */
91 89
92/* how to get the thread information struct from ASM */ 90/* how to get the thread information struct from ASM */
@@ -126,6 +124,7 @@ static inline struct thread_info *stack_thread_info(void)
126#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ 124#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */
127#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ 125#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
128#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ 126#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */
127#define TIF_NOTSC 28 /* TSC is not accessible in userland */
129 128
130#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 129#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
131#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) 130#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
@@ -147,6 +146,7 @@ static inline struct thread_info *stack_thread_info(void)
147#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) 146#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR)
148#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) 147#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
149#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) 148#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS)
149#define _TIF_NOTSC (1 << TIF_NOTSC)
150 150
151/* work to do on interrupt/exception return */ 151/* work to do on interrupt/exception return */
152#define _TIF_WORK_MASK \ 152#define _TIF_WORK_MASK \
@@ -160,7 +160,7 @@ static inline struct thread_info *stack_thread_info(void)
160 160
161/* flags to check in __switch_to() */ 161/* flags to check in __switch_to() */
162#define _TIF_WORK_CTXSW \ 162#define _TIF_WORK_CTXSW \
163 (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS) 163 (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS|_TIF_NOTSC)
164#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW 164#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
165#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) 165#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
166 166
diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h
index d2d8eb5b55f5..0434bd8349a7 100644
--- a/include/asm-x86/tsc.h
+++ b/include/asm-x86/tsc.h
@@ -18,6 +18,7 @@ extern unsigned int cpu_khz;
18extern unsigned int tsc_khz; 18extern unsigned int tsc_khz;
19 19
20extern void disable_TSC(void); 20extern void disable_TSC(void);
21extern void enable_TSC(void);
21 22
22static inline cycles_t get_cycles(void) 23static inline cycles_t get_cycles(void)
23{ 24{
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 14813b595802..a5f359a7ad0e 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -18,6 +18,7 @@
18#include <linux/proc_fs.h> 18#include <linux/proc_fs.h>
19#include <linux/rtc.h> 19#include <linux/rtc.h>
20#include <linux/ioport.h> 20#include <linux/ioport.h>
21#include <linux/pfn.h>
21 22
22#include <asm/page.h> 23#include <asm/page.h>
23#include <asm/system.h> 24#include <asm/system.h>
@@ -394,4 +395,10 @@ struct efi_generic_dev_path {
394 u16 length; 395 u16 length;
395} __attribute ((packed)); 396} __attribute ((packed));
396 397
398static inline void memrange_efi_to_native(u64 *addr, u64 *npages)
399{
400 *npages = PFN_UP(*addr + (*npages<<EFI_PAGE_SHIFT)) - PFN_DOWN(*addr);
401 *addr &= PAGE_MASK;
402}
403
397#endif /* _LINUX_EFI_H */ 404#endif /* _LINUX_EFI_H */
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 412e025bc5c7..e600c4e9b8c5 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -84,10 +84,10 @@
84 84
85#define irqs_disabled() \ 85#define irqs_disabled() \
86({ \ 86({ \
87 unsigned long flags; \ 87 unsigned long _flags; \
88 \ 88 \
89 raw_local_save_flags(flags); \ 89 raw_local_save_flags(_flags); \
90 raw_irqs_disabled_flags(flags); \ 90 raw_irqs_disabled_flags(_flags); \
91}) 91})
92 92
93#define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) 93#define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags)
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index 3800639775ae..5c80b1939636 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -67,4 +67,10 @@
67#define PR_CAPBSET_READ 23 67#define PR_CAPBSET_READ 23
68#define PR_CAPBSET_DROP 24 68#define PR_CAPBSET_DROP 24
69 69
70/* Get/set the process' ability to use the timestamp counter instruction */
71#define PR_GET_TSC 25
72#define PR_SET_TSC 26
73# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
74# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
75
70#endif /* _LINUX_PRCTL_H */ 76#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 9c042f901570..89fe414645e9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -132,6 +132,14 @@ void __put_task_struct(struct task_struct *tsk)
132 free_task(tsk); 132 free_task(tsk);
133} 133}
134 134
135/*
136 * macro override instead of weak attribute alias, to workaround
137 * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions.
138 */
139#ifndef arch_task_cache_init
140#define arch_task_cache_init()
141#endif
142
135void __init fork_init(unsigned long mempages) 143void __init fork_init(unsigned long mempages)
136{ 144{
137#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR 145#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
@@ -144,6 +152,9 @@ void __init fork_init(unsigned long mempages)
144 ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL); 152 ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL);
145#endif 153#endif
146 154
155 /* do the arch specific task caches init */
156 arch_task_cache_init();
157
147 /* 158 /*
148 * The default maximum number of threads is set to a safe 159 * The default maximum number of threads is set to a safe
149 * value: the thread structures can take up at most half 160 * value: the thread structures can take up at most half
@@ -163,6 +174,13 @@ void __init fork_init(unsigned long mempages)
163 init_task.signal->rlim[RLIMIT_NPROC]; 174 init_task.signal->rlim[RLIMIT_NPROC];
164} 175}
165 176
177int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,
178 struct task_struct *src)
179{
180 *dst = *src;
181 return 0;
182}
183
166static struct task_struct *dup_task_struct(struct task_struct *orig) 184static struct task_struct *dup_task_struct(struct task_struct *orig)
167{ 185{
168 struct task_struct *tsk; 186 struct task_struct *tsk;
@@ -181,15 +199,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
181 return NULL; 199 return NULL;
182 } 200 }
183 201
184 *tsk = *orig; 202 err = arch_dup_task_struct(tsk, orig);
203 if (err)
204 goto out;
205
185 tsk->stack = ti; 206 tsk->stack = ti;
186 207
187 err = prop_local_init_single(&tsk->dirties); 208 err = prop_local_init_single(&tsk->dirties);
188 if (err) { 209 if (err)
189 free_thread_info(ti); 210 goto out;
190 free_task_struct(tsk);
191 return NULL;
192 }
193 211
194 setup_thread_stack(tsk, orig); 212 setup_thread_stack(tsk, orig);
195 213
@@ -205,6 +223,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
205#endif 223#endif
206 tsk->splice_pipe = NULL; 224 tsk->splice_pipe = NULL;
207 return tsk; 225 return tsk;
226
227out:
228 free_thread_info(ti);
229 free_task_struct(tsk);
230 return NULL;
208} 231}
209 232
210#ifdef CONFIG_MMU 233#ifdef CONFIG_MMU
diff --git a/kernel/sys.c b/kernel/sys.c
index a626116af5db..6a0cc71ee88d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -67,6 +67,12 @@
67#ifndef SET_ENDIAN 67#ifndef SET_ENDIAN
68# define SET_ENDIAN(a,b) (-EINVAL) 68# define SET_ENDIAN(a,b) (-EINVAL)
69#endif 69#endif
70#ifndef GET_TSC_CTL
71# define GET_TSC_CTL(a) (-EINVAL)
72#endif
73#ifndef SET_TSC_CTL
74# define SET_TSC_CTL(a) (-EINVAL)
75#endif
70 76
71/* 77/*
72 * this is where the system-wide overflow UID and GID are defined, for 78 * this is where the system-wide overflow UID and GID are defined, for
@@ -1737,7 +1743,12 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
1737#else 1743#else
1738 return -EINVAL; 1744 return -EINVAL;
1739#endif 1745#endif
1740 1746 case PR_GET_TSC:
1747 error = GET_TSC_CTL(arg2);
1748 break;
1749 case PR_SET_TSC:
1750 error = SET_TSC_CTL(arg2);
1751 break;
1741 default: 1752 default:
1742 error = -EINVAL; 1753 error = -EINVAL;
1743 break; 1754 break;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index a3fa587c350c..2d6087c7cf98 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -178,6 +178,7 @@ static void change_clocksource(void)
178 if (clock == new) 178 if (clock == new)
179 return; 179 return;
180 180
181 new->cycle_last = 0;
181 now = clocksource_read(new); 182 now = clocksource_read(new);
182 nsec = __get_nsec_offset(); 183 nsec = __get_nsec_offset();
183 timespec_add_ns(&xtime, nsec); 184 timespec_add_ns(&xtime, nsec);
@@ -295,6 +296,7 @@ static int timekeeping_resume(struct sys_device *dev)
295 timespec_add_ns(&xtime, timekeeping_suspend_nsecs); 296 timespec_add_ns(&xtime, timekeeping_suspend_nsecs);
296 update_xtime_cache(0); 297 update_xtime_cache(0);
297 /* re-base the last cycle value */ 298 /* re-base the last cycle value */
299 clock->cycle_last = 0;
298 clock->cycle_last = clocksource_read(clock); 300 clock->cycle_last = clocksource_read(clock);
299 clock->error = 0; 301 clock->error = 0;
300 timekeeping_suspended = 0; 302 timekeeping_suspended = 0;