aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/Makefile1
-rw-r--r--arch/x86/boot/Makefile18
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/boot/compressed/head_64.S8
-rw-r--r--arch/x86/boot/cpu.c26
-rw-r--r--arch/x86/boot/mkcpustr.c49
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/common.c10
-rw-r--r--arch/x86/kernel/cpu/cpu.h9
-rw-r--r--arch/x86/kernel/cpu/feature_names.c83
-rw-r--r--arch/x86/kernel/cpu/intel.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c107
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c16
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h4
-rw-r--r--arch/x86/kernel/cpu/proc.c74
-rw-r--r--arch/x86/kernel/cpuid.c52
-rw-r--r--arch/x86/kernel/efi.c57
-rw-r--r--arch/x86/kernel/efi_64.c22
-rw-r--r--arch/x86/kernel/head_64.S4
-rw-r--r--arch/x86/kernel/ldt.c3
-rw-r--r--arch/x86/kernel/msr.c14
-rw-r--r--arch/x86/kernel/pci-gart_64.c5
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/setup_64.c76
-rw-r--r--arch/x86/kernel/test_nx.c12
-rw-r--r--arch/x86/kernel/trampoline_32.S7
-rw-r--r--arch/x86/kernel/trampoline_64.S3
-rw-r--r--arch/x86/kernel/vmi_32.c6
-rw-r--r--arch/x86/lib/mmx_32.c31
-rw-r--r--arch/x86/lib/usercopy_32.c12
-rw-r--r--arch/x86/lib/usercopy_64.c12
-rw-r--r--arch/x86/mm/fault.c34
-rw-r--r--arch/x86/mm/init_32.c6
-rw-r--r--arch/x86/mm/init_64.c49
-rw-r--r--arch/x86/mm/ioremap.c41
-rw-r--r--arch/x86/mm/numa_64.c7
-rw-r--r--arch/x86/mm/pageattr-test.c3
-rw-r--r--arch/x86/mm/pageattr.c400
-rw-r--r--arch/x86/mm/pgtable_32.c61
-rw-r--r--arch/x86/pci/numa.c52
42 files changed, 707 insertions, 679 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 99af1272ab1d..59eef1c7fdaa 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -306,6 +306,7 @@ config X86_RDC321X
306 select M486 306 select M486
307 select X86_REBOOTFIXUPS 307 select X86_REBOOTFIXUPS
308 select GENERIC_GPIO 308 select GENERIC_GPIO
309 select LEDS_CLASS
309 select LEDS_GPIO 310 select LEDS_GPIO
310 help 311 help
311 This option is needed for RDC R-321x system-on-chip, also known 312 This option is needed for RDC R-321x system-on-chip, also known
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 8978e98bed5b..364865b1b08d 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -92,7 +92,6 @@ KBUILD_AFLAGS += $(cfi) $(cfi-sigframe)
92KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) 92KBUILD_CFLAGS += $(cfi) $(cfi-sigframe)
93 93
94LDFLAGS := -m elf_$(UTS_MACHINE) 94LDFLAGS := -m elf_$(UTS_MACHINE)
95OBJCOPYFLAGS := -O binary -R .note -R .comment -S
96 95
97# Speed up the build 96# Speed up the build
98KBUILD_CFLAGS += -pipe 97KBUILD_CFLAGS += -pipe
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 349b81a39c40..f88458e83ef0 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -26,7 +26,7 @@ SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
26#RAMDISK := -DRAMDISK=512 26#RAMDISK := -DRAMDISK=512
27 27
28targets := vmlinux.bin setup.bin setup.elf zImage bzImage 28targets := vmlinux.bin setup.bin setup.elf zImage bzImage
29subdir- := compressed 29subdir- := compressed
30 30
31setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o 31setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
32setup-y += header.o main.o mca.o memory.o pm.o pmjump.o 32setup-y += header.o main.o mca.o memory.o pm.o pmjump.o
@@ -43,9 +43,17 @@ setup-y += video-vesa.o
43setup-y += video-bios.o 43setup-y += video-bios.o
44 44
45targets += $(setup-y) 45targets += $(setup-y)
46hostprogs-y := tools/build 46hostprogs-y := mkcpustr tools/build
47 47
48HOSTCFLAGS_build.o := $(LINUXINCLUDE) 48HOST_EXTRACFLAGS += $(LINUXINCLUDE)
49
50$(obj)/cpu.o: $(obj)/cpustr.h
51
52quiet_cmd_cpustr = CPUSTR $@
53 cmd_cpustr = $(obj)/mkcpustr > $@
54targets += cpustr.h
55$(obj)/cpustr.h: $(obj)/mkcpustr FORCE
56 $(call if_changed,cpustr)
49 57
50# --------------------------------------------------------------------------- 58# ---------------------------------------------------------------------------
51 59
@@ -80,6 +88,7 @@ $(obj)/zImage $(obj)/bzImage: $(obj)/setup.bin \
80 $(call if_changed,image) 88 $(call if_changed,image)
81 @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' 89 @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
82 90
91OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
83$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE 92$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
84 $(call if_changed,objcopy) 93 $(call if_changed,objcopy)
85 94
@@ -90,7 +99,6 @@ $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
90 $(call if_changed,ld) 99 $(call if_changed,ld)
91 100
92OBJCOPYFLAGS_setup.bin := -O binary 101OBJCOPYFLAGS_setup.bin := -O binary
93
94$(obj)/setup.bin: $(obj)/setup.elf FORCE 102$(obj)/setup.bin: $(obj)/setup.elf FORCE
95 $(call if_changed,objcopy) 103 $(call if_changed,objcopy)
96 104
@@ -98,7 +106,7 @@ $(obj)/compressed/vmlinux: FORCE
98 $(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@ 106 $(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@
99 107
100# Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel 108# Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel
101FDARGS = 109FDARGS =
102# Set this if you want an initrd included with the zdisk/fdimage/isoimage kernel 110# Set this if you want an initrd included with the zdisk/fdimage/isoimage kernel
103FDINITRD = 111FDINITRD =
104 112
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index fe24ceabd909..d2b9f3bb87c0 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -22,6 +22,7 @@ $(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $
22 $(call if_changed,ld) 22 $(call if_changed,ld)
23 @: 23 @:
24 24
25OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
25$(obj)/vmlinux.bin: vmlinux FORCE 26$(obj)/vmlinux.bin: vmlinux FORCE
26 $(call if_changed,objcopy) 27 $(call if_changed,objcopy)
27 28
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 1ccb38a7f0d2..e8657b98c902 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -80,8 +80,8 @@ startup_32:
80 80
81#ifdef CONFIG_RELOCATABLE 81#ifdef CONFIG_RELOCATABLE
82 movl %ebp, %ebx 82 movl %ebp, %ebx
83 addl $(LARGE_PAGE_SIZE -1), %ebx 83 addl $(PMD_PAGE_SIZE -1), %ebx
84 andl $LARGE_PAGE_MASK, %ebx 84 andl $PMD_PAGE_MASK, %ebx
85#else 85#else
86 movl $CONFIG_PHYSICAL_START, %ebx 86 movl $CONFIG_PHYSICAL_START, %ebx
87#endif 87#endif
@@ -220,8 +220,8 @@ ENTRY(startup_64)
220 /* Start with the delta to where the kernel will run at. */ 220 /* Start with the delta to where the kernel will run at. */
221#ifdef CONFIG_RELOCATABLE 221#ifdef CONFIG_RELOCATABLE
222 leaq startup_32(%rip) /* - $startup_32 */, %rbp 222 leaq startup_32(%rip) /* - $startup_32 */, %rbp
223 addq $(LARGE_PAGE_SIZE - 1), %rbp 223 addq $(PMD_PAGE_SIZE - 1), %rbp
224 andq $LARGE_PAGE_MASK, %rbp 224 andq $PMD_PAGE_MASK, %rbp
225 movq %rbp, %rbx 225 movq %rbp, %rbx
226#else 226#else
227 movq $CONFIG_PHYSICAL_START, %rbp 227 movq $CONFIG_PHYSICAL_START, %rbp
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 2a5c32da5852..00e19edd852c 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -1,7 +1,7 @@
1/* -*- linux-c -*- ------------------------------------------------------- * 1/* -*- linux-c -*- ------------------------------------------------------- *
2 * 2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved 4 * Copyright 2007-2008 rPath, Inc. - All Rights Reserved
5 * 5 *
6 * This file is part of the Linux kernel, and is made available under 6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2. 7 * the terms of the GNU General Public License version 2.
@@ -9,7 +9,7 @@
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11/* 11/*
12 * arch/i386/boot/cpu.c 12 * arch/x86/boot/cpu.c
13 * 13 *
14 * Check for obligatory CPU features and abort if the features are not 14 * Check for obligatory CPU features and abort if the features are not
15 * present. 15 * present.
@@ -19,6 +19,8 @@
19#include "bitops.h" 19#include "bitops.h"
20#include <asm/cpufeature.h> 20#include <asm/cpufeature.h>
21 21
22#include "cpustr.h"
23
22static char *cpu_name(int level) 24static char *cpu_name(int level)
23{ 25{
24 static char buf[6]; 26 static char buf[6];
@@ -35,6 +37,7 @@ int validate_cpu(void)
35{ 37{
36 u32 *err_flags; 38 u32 *err_flags;
37 int cpu_level, req_level; 39 int cpu_level, req_level;
40 const unsigned char *msg_strs;
38 41
39 check_cpu(&cpu_level, &req_level, &err_flags); 42 check_cpu(&cpu_level, &req_level, &err_flags);
40 43
@@ -51,13 +54,26 @@ int validate_cpu(void)
51 puts("This kernel requires the following features " 54 puts("This kernel requires the following features "
52 "not present on the CPU:\n"); 55 "not present on the CPU:\n");
53 56
57 msg_strs = (const unsigned char *)x86_cap_strs;
58
54 for (i = 0; i < NCAPINTS; i++) { 59 for (i = 0; i < NCAPINTS; i++) {
55 u32 e = err_flags[i]; 60 u32 e = err_flags[i];
56 61
57 for (j = 0; j < 32; j++) { 62 for (j = 0; j < 32; j++) {
58 if (e & 1) 63 int n = (i << 5)+j;
59 printf("%d:%d ", i, j); 64 if (*msg_strs < n) {
60 65 /* Skip to the next string */
66 do {
67 msg_strs++;
68 } while (*msg_strs);
69 msg_strs++;
70 }
71 if (e & 1) {
72 if (*msg_strs == n && msg_strs[1])
73 printf("%s ", msg_strs+1);
74 else
75 printf("%d:%d ", i, j);
76 }
61 e >>= 1; 77 e >>= 1;
62 } 78 }
63 } 79 }
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
new file mode 100644
index 000000000000..bbe76953bae9
--- /dev/null
+++ b/arch/x86/boot/mkcpustr.c
@@ -0,0 +1,49 @@
1/* ----------------------------------------------------------------------- *
2 *
3 * Copyright 2008 rPath, Inc. - All Rights Reserved
4 *
5 * This file is part of the Linux kernel, and is made available under
6 * the terms of the GNU General Public License version 2 or (at your
7 * option) any later version; incorporated herein by reference.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * This is a host program to preprocess the CPU strings into a
13 * compact format suitable for the setup code.
14 */
15
16#include <stdio.h>
17
18#include "../kernel/cpu/feature_names.c"
19
20#if NCAPFLAGS > 8
21# error "Need to adjust the boot code handling of CPUID strings"
22#endif
23
24int main(void)
25{
26 int i;
27 const char *str;
28
29 printf("static const char x86_cap_strs[] = \n");
30
31 for (i = 0; i < NCAPINTS*32; i++) {
32 str = x86_cap_flags[i];
33
34 if (i == NCAPINTS*32-1) {
35 /* The last entry must be unconditional; this
36 also consumes the compiler-added null character */
37 if (!str)
38 str = "";
39 printf("\t\"\\x%02x\"\"%s\"\n", i, str);
40 } else if (str) {
41 printf("#if REQUIRED_MASK%d & (1 << %d)\n"
42 "\t\"\\x%02x\"\"%s\\0\"\n"
43 "#endif\n",
44 i >> 5, i & 31, i, str);
45 }
46 }
47 printf("\t;\n");
48 return 0;
49}
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 6f813009d44b..21dc1a061bf1 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -37,7 +37,8 @@ obj-$(CONFIG_X86_MSR) += msr.o
37obj-$(CONFIG_X86_CPUID) += cpuid.o 37obj-$(CONFIG_X86_CPUID) += cpuid.o
38obj-$(CONFIG_MICROCODE) += microcode.o 38obj-$(CONFIG_MICROCODE) += microcode.o
39obj-$(CONFIG_PCI) += early-quirks.o 39obj-$(CONFIG_PCI) += early-quirks.o
40obj-$(CONFIG_APM) += apm_32.o 40apm-y := apm_32.o
41obj-$(CONFIG_APM) += apm.o
41obj-$(CONFIG_X86_SMP) += smp_$(BITS).o smpboot_$(BITS).o tsc_sync.o 42obj-$(CONFIG_X86_SMP) += smp_$(BITS).o smpboot_$(BITS).o tsc_sync.o
42obj-$(CONFIG_X86_32_SMP) += smpcommon_32.o 43obj-$(CONFIG_X86_32_SMP) += smpcommon_32.o
43obj-$(CONFIG_X86_64_SMP) += smp_64.o smpboot_64.o tsc_sync.o 44obj-$(CONFIG_X86_64_SMP) += smp_64.o smpboot_64.o tsc_sync.o
@@ -74,7 +75,8 @@ ifdef CONFIG_INPUT_PCSPKR
74obj-y += pcspeaker.o 75obj-y += pcspeaker.o
75endif 76endif
76 77
77obj-$(CONFIG_SCx200) += scx200_32.o 78obj-$(CONFIG_SCx200) += scx200.o
79scx200-y += scx200_32.o
78 80
79### 81###
80# 64 bit specific files 82# 64 bit specific files
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index cfdb2f3bd763..a0c4d7c5dbd7 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -3,6 +3,7 @@
3# 3#
4 4
5obj-y := intel_cacheinfo.o addon_cpuid_features.o 5obj-y := intel_cacheinfo.o addon_cpuid_features.o
6obj-y += feature_names.o
6 7
7obj-$(CONFIG_X86_32) += common.o proc.o bugs.o 8obj-$(CONFIG_X86_32) += common.o proc.o bugs.o
8obj-$(CONFIG_X86_32) += amd.o 9obj-$(CONFIG_X86_32) += amd.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b7b2142b58e7..d9313d9adced 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -623,16 +623,6 @@ cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
623 * They will insert themselves into the cpu_devs structure. 623 * They will insert themselves into the cpu_devs structure.
624 * Then, when cpu_init() is called, we can just iterate over that array. 624 * Then, when cpu_init() is called, we can just iterate over that array.
625 */ 625 */
626
627extern int intel_cpu_init(void);
628extern int cyrix_init_cpu(void);
629extern int nsc_init_cpu(void);
630extern int amd_init_cpu(void);
631extern int centaur_init_cpu(void);
632extern int transmeta_init_cpu(void);
633extern int nexgen_init_cpu(void);
634extern int umc_init_cpu(void);
635
636void __init early_cpu_init(void) 626void __init early_cpu_init(void)
637{ 627{
638 intel_cpu_init(); 628 intel_cpu_init();
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index ad6527a5beb1..e0b38c33d842 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -27,3 +27,12 @@ extern void display_cacheinfo(struct cpuinfo_x86 *c);
27extern void early_init_intel(struct cpuinfo_x86 *c); 27extern void early_init_intel(struct cpuinfo_x86 *c);
28extern void early_init_amd(struct cpuinfo_x86 *c); 28extern void early_init_amd(struct cpuinfo_x86 *c);
29 29
30/* Specific CPU type init functions */
31int intel_cpu_init(void);
32int amd_init_cpu(void);
33int cyrix_init_cpu(void);
34int nsc_init_cpu(void);
35int centaur_init_cpu(void);
36int transmeta_init_cpu(void);
37int nexgen_init_cpu(void);
38int umc_init_cpu(void);
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
new file mode 100644
index 000000000000..ee975ac6bbcb
--- /dev/null
+++ b/arch/x86/kernel/cpu/feature_names.c
@@ -0,0 +1,83 @@
1/*
2 * Strings for the various x86 capability flags.
3 *
4 * This file must not contain any executable code.
5 */
6
7#include "asm/cpufeature.h"
8
9/*
10 * These flag bits must match the definitions in <asm/cpufeature.h>.
11 * NULL means this bit is undefined or reserved; either way it doesn't
12 * have meaning as far as Linux is concerned. Note that it's important
13 * to realize there is a difference between this table and CPUID -- if
14 * applications want to get the raw CPUID data, they should access
15 * /dev/cpu/<cpu_nr>/cpuid instead.
16 */
17const char * const x86_cap_flags[NCAPINTS*32] = {
18 /* Intel-defined */
19 "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
20 "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
21 "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
22 "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
23
24 /* AMD-defined */
25 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
26 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
27 NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
28 NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
29 "3dnowext", "3dnow",
30
31 /* Transmeta-defined */
32 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
33 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
34 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
35 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
36
37 /* Other (Linux-defined) */
38 "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
39 NULL, NULL, NULL, NULL,
40 "constant_tsc", "up", NULL, "arch_perfmon",
41 "pebs", "bts", NULL, NULL,
42 "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
43 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
44
45 /* Intel-defined (#2) */
46 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
47 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
48 NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
49 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
50
51 /* VIA/Cyrix/Centaur-defined */
52 NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
53 "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
54 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
55 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
56
57 /* AMD-defined (#2) */
58 "lahf_lm", "cmp_legacy", "svm", "extapic",
59 "cr8_legacy", "abm", "sse4a", "misalignsse",
60 "3dnowprefetch", "osvw", "ibs", "sse5",
61 "skinit", "wdt", NULL, NULL,
62 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
63 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
64
65 /* Auxiliary (Linux-defined) */
66 "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
67 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
68 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
69 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
70};
71
72const char *const x86_power_flags[32] = {
73 "ts", /* temperature sensor */
74 "fid", /* frequency id control */
75 "vid", /* voltage id control */
76 "ttp", /* thermal trip */
77 "tm",
78 "stc",
79 "100mhzsteps",
80 "hwpstate",
81 "", /* tsc invariant mapped to constant_tsc */
82 /* nothing */
83};
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d1c372b018db..fae31ce747bd 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -13,6 +13,7 @@
13#include <asm/uaccess.h> 13#include <asm/uaccess.h>
14#include <asm/ptrace.h> 14#include <asm/ptrace.h>
15#include <asm/ds.h> 15#include <asm/ds.h>
16#include <asm/bugs.h>
16 17
17#include "cpu.h" 18#include "cpu.h"
18 19
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 8e139c70f888..ff14c320040c 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -7,8 +7,6 @@
7#include <asm/processor-flags.h> 7#include <asm/processor-flags.h>
8#include "mtrr.h" 8#include "mtrr.h"
9 9
10int arr3_protected;
11
12static void 10static void
13cyrix_get_arr(unsigned int reg, unsigned long *base, 11cyrix_get_arr(unsigned int reg, unsigned long *base,
14 unsigned long *size, mtrr_type * type) 12 unsigned long *size, mtrr_type * type)
@@ -99,8 +97,6 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
99 case 4: 97 case 4:
100 return replace_reg; 98 return replace_reg;
101 case 3: 99 case 3:
102 if (arr3_protected)
103 break;
104 case 2: 100 case 2:
105 case 1: 101 case 1:
106 case 0: 102 case 0:
@@ -115,8 +111,6 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
115 } else { 111 } else {
116 for (i = 0; i < 7; i++) { 112 for (i = 0; i < 7; i++) {
117 cyrix_get_arr(i, &lbase, &lsize, &ltype); 113 cyrix_get_arr(i, &lbase, &lsize, &ltype);
118 if ((i == 3) && arr3_protected)
119 continue;
120 if (lsize == 0) 114 if (lsize == 0)
121 return i; 115 return i;
122 } 116 }
@@ -260,107 +254,6 @@ static void cyrix_set_all(void)
260 post_set(); 254 post_set();
261} 255}
262 256
263#if 0
264/*
265 * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection
266 * with the SMM (System Management Mode) mode. So we need the following:
267 * Check whether SMI_LOCK (CCR3 bit 0) is set
268 * if it is set, write a warning message: ARR3 cannot be changed!
269 * (it cannot be changed until the next processor reset)
270 * if it is reset, then we can change it, set all the needed bits:
271 * - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset)
272 * - disable access to SMM memory (CCR1 bit 2 reset)
273 * - disable SMM mode (CCR1 bit 1 reset)
274 * - disable write protection of ARR3 (CCR6 bit 1 reset)
275 * - (maybe) disable ARR3
276 * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set)
277 */
278static void __init
279cyrix_arr_init(void)
280{
281 struct set_mtrr_context ctxt;
282 unsigned char ccr[7];
283 int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 };
284#ifdef CONFIG_SMP
285 int i;
286#endif
287
288 /* flush cache and enable MAPEN */
289 set_mtrr_prepare_save(&ctxt);
290 set_mtrr_cache_disable(&ctxt);
291
292 /* Save all CCRs locally */
293 ccr[0] = getCx86(CX86_CCR0);
294 ccr[1] = getCx86(CX86_CCR1);
295 ccr[2] = getCx86(CX86_CCR2);
296 ccr[3] = ctxt.ccr3;
297 ccr[4] = getCx86(CX86_CCR4);
298 ccr[5] = getCx86(CX86_CCR5);
299 ccr[6] = getCx86(CX86_CCR6);
300
301 if (ccr[3] & 1) {
302 ccrc[3] = 1;
303 arr3_protected = 1;
304 } else {
305 /* Disable SMM mode (bit 1), access to SMM memory (bit 2) and
306 * access to SMM memory through ARR3 (bit 7).
307 */
308 if (ccr[1] & 0x80) {
309 ccr[1] &= 0x7f;
310 ccrc[1] |= 0x80;
311 }
312 if (ccr[1] & 0x04) {
313 ccr[1] &= 0xfb;
314 ccrc[1] |= 0x04;
315 }
316 if (ccr[1] & 0x02) {
317 ccr[1] &= 0xfd;
318 ccrc[1] |= 0x02;
319 }
320 arr3_protected = 0;
321 if (ccr[6] & 0x02) {
322 ccr[6] &= 0xfd;
323 ccrc[6] = 1; /* Disable write protection of ARR3 */
324 setCx86(CX86_CCR6, ccr[6]);
325 }
326 /* Disable ARR3. This is safe now that we disabled SMM. */
327 /* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */
328 }
329 /* If we changed CCR1 in memory, change it in the processor, too. */
330 if (ccrc[1])
331 setCx86(CX86_CCR1, ccr[1]);
332
333 /* Enable ARR usage by the processor */
334 if (!(ccr[5] & 0x20)) {
335 ccr[5] |= 0x20;
336 ccrc[5] = 1;
337 setCx86(CX86_CCR5, ccr[5]);
338 }
339#ifdef CONFIG_SMP
340 for (i = 0; i < 7; i++)
341 ccr_state[i] = ccr[i];
342 for (i = 0; i < 8; i++)
343 cyrix_get_arr(i,
344 &arr_state[i].base, &arr_state[i].size,
345 &arr_state[i].type);
346#endif
347
348 set_mtrr_done(&ctxt); /* flush cache and disable MAPEN */
349
350 if (ccrc[5])
351 printk(KERN_INFO "mtrr: ARR usage was not enabled, enabled manually\n");
352 if (ccrc[3])
353 printk(KERN_INFO "mtrr: ARR3 cannot be changed\n");
354/*
355 if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3 disabled\n");
356 if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n");
357 if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n");
358*/
359 if (ccrc[6])
360 printk(KERN_INFO "mtrr: ARR3 was write protected, unprotected\n");
361}
362#endif
363
364static struct mtrr_ops cyrix_mtrr_ops = { 257static struct mtrr_ops cyrix_mtrr_ops = {
365 .vendor = X86_VENDOR_CYRIX, 258 .vendor = X86_VENDOR_CYRIX,
366// .init = cyrix_arr_init, 259// .init = cyrix_arr_init,
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 715919582657..1e27b69a7a0e 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -59,12 +59,6 @@ struct mtrr_ops * mtrr_if = NULL;
59static void set_mtrr(unsigned int reg, unsigned long base, 59static void set_mtrr(unsigned int reg, unsigned long base,
60 unsigned long size, mtrr_type type); 60 unsigned long size, mtrr_type type);
61 61
62#ifndef CONFIG_X86_64
63extern int arr3_protected;
64#else
65#define arr3_protected 0
66#endif
67
68void set_mtrr_ops(struct mtrr_ops * ops) 62void set_mtrr_ops(struct mtrr_ops * ops)
69{ 63{
70 if (ops->vendor && ops->vendor < X86_VENDOR_NUM) 64 if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
@@ -513,12 +507,6 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
513 printk(KERN_WARNING "mtrr: register: %d too big\n", reg); 507 printk(KERN_WARNING "mtrr: register: %d too big\n", reg);
514 goto out; 508 goto out;
515 } 509 }
516 if (is_cpu(CYRIX) && !use_intel()) {
517 if ((reg == 3) && arr3_protected) {
518 printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n");
519 goto out;
520 }
521 }
522 mtrr_if->get(reg, &lbase, &lsize, &ltype); 510 mtrr_if->get(reg, &lbase, &lsize, &ltype);
523 if (lsize < 1) { 511 if (lsize < 1) {
524 printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg); 512 printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
@@ -566,10 +554,6 @@ EXPORT_SYMBOL(mtrr_del);
566 * These should be called implicitly, but we can't yet until all the initcall 554 * These should be called implicitly, but we can't yet until all the initcall
567 * stuff is done... 555 * stuff is done...
568 */ 556 */
569extern void amd_init_mtrr(void);
570extern void cyrix_init_mtrr(void);
571extern void centaur_init_mtrr(void);
572
573static void __init init_ifs(void) 557static void __init init_ifs(void)
574{ 558{
575#ifndef CONFIG_X86_64 559#ifndef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index fb74a2c20814..2cc77eb6fea3 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -97,3 +97,7 @@ void mtrr_state_warn(void);
97const char *mtrr_attrib_to_str(int x); 97const char *mtrr_attrib_to_str(int x);
98void mtrr_wrmsr(unsigned, unsigned, unsigned); 98void mtrr_wrmsr(unsigned, unsigned, unsigned);
99 99
100/* CPU specific mtrr init functions */
101int amd_init_mtrr(void);
102int cyrix_init_mtrr(void);
103int centaur_init_mtrr(void);
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 028213260148..af11d31dce0a 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -10,80 +10,6 @@
10 */ 10 */
11static int show_cpuinfo(struct seq_file *m, void *v) 11static int show_cpuinfo(struct seq_file *m, void *v)
12{ 12{
13 /*
14 * These flag bits must match the definitions in <asm/cpufeature.h>.
15 * NULL means this bit is undefined or reserved; either way it doesn't
16 * have meaning as far as Linux is concerned. Note that it's important
17 * to realize there is a difference between this table and CPUID -- if
18 * applications want to get the raw CPUID data, they should access
19 * /dev/cpu/<cpu_nr>/cpuid instead.
20 */
21 static const char * const x86_cap_flags[] = {
22 /* Intel-defined */
23 "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
24 "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
25 "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
26 "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
27
28 /* AMD-defined */
29 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
30 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
31 NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
32 NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
33 "3dnowext", "3dnow",
34
35 /* Transmeta-defined */
36 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
37 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
38 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
39 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
40
41 /* Other (Linux-defined) */
42 "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
43 NULL, NULL, NULL, NULL,
44 "constant_tsc", "up", NULL, "arch_perfmon",
45 "pebs", "bts", NULL, "sync_rdtsc",
46 "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
47 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
48
49 /* Intel-defined (#2) */
50 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
51 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
52 NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
53 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
54
55 /* VIA/Cyrix/Centaur-defined */
56 NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
57 "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
58 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
59 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
60
61 /* AMD-defined (#2) */
62 "lahf_lm", "cmp_legacy", "svm", "extapic",
63 "cr8_legacy", "abm", "sse4a", "misalignsse",
64 "3dnowprefetch", "osvw", "ibs", "sse5",
65 "skinit", "wdt", NULL, NULL,
66 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
67 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
68
69 /* Auxiliary (Linux-defined) */
70 "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
71 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
72 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
73 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
74 };
75 static const char * const x86_power_flags[] = {
76 "ts", /* temperature sensor */
77 "fid", /* frequency id control */
78 "vid", /* voltage id control */
79 "ttp", /* thermal trip */
80 "tm",
81 "stc",
82 "100mhzsteps",
83 "hwpstate",
84 "", /* constant_tsc - moved to flags */
85 /* nothing */
86 };
87 struct cpuinfo_x86 *c = v; 13 struct cpuinfo_x86 *c = v;
88 int i, n = 0; 14 int i, n = 0;
89 int fpu_exception; 15 int fpu_exception;
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index a63432d800f9..288e7a6598ac 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -1,6 +1,6 @@
1/* ----------------------------------------------------------------------- * 1/* ----------------------------------------------------------------------- *
2 * 2 *
3 * Copyright 2000 H. Peter Anvin - All Rights Reserved 3 * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
@@ -17,6 +17,10 @@
17 * and then read in chunks of 16 bytes. A larger size means multiple 17 * and then read in chunks of 16 bytes. A larger size means multiple
18 * reads of consecutive levels. 18 * reads of consecutive levels.
19 * 19 *
20 * The lower 32 bits of the file position is used as the incoming %eax,
21 * and the upper 32 bits of the file position as the incoming %ecx,
22 * the latter intended for "counting" eax levels like eax=4.
23 *
20 * This driver uses /dev/cpu/%d/cpuid where %d is the minor number, and on 24 * This driver uses /dev/cpu/%d/cpuid where %d is the minor number, and on
21 * an SMP box will direct the access to CPU %d. 25 * an SMP box will direct the access to CPU %d.
22 */ 26 */
@@ -43,35 +47,24 @@
43 47
44static struct class *cpuid_class; 48static struct class *cpuid_class;
45 49
46struct cpuid_command { 50struct cpuid_regs {
47 u32 reg; 51 u32 eax, ebx, ecx, edx;
48 u32 *data;
49}; 52};
50 53
51static void cpuid_smp_cpuid(void *cmd_block) 54static void cpuid_smp_cpuid(void *cmd_block)
52{ 55{
53 struct cpuid_command *cmd = cmd_block; 56 struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block;
54
55 cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2],
56 &cmd->data[3]);
57}
58
59static inline void do_cpuid(int cpu, u32 reg, u32 * data)
60{
61 struct cpuid_command cmd;
62
63 cmd.reg = reg;
64 cmd.data = data;
65 57
66 smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1); 58 cpuid_count(cmd->eax, cmd->ecx,
59 &cmd->eax, &cmd->ebx, &cmd->ecx, &cmd->edx);
67} 60}
68 61
69static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) 62static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
70{ 63{
71 loff_t ret; 64 loff_t ret;
65 struct inode *inode = file->f_mapping->host;
72 66
73 lock_kernel(); 67 mutex_lock(&inode->i_mutex);
74
75 switch (orig) { 68 switch (orig) {
76 case 0: 69 case 0:
77 file->f_pos = offset; 70 file->f_pos = offset;
@@ -84,8 +77,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
84 default: 77 default:
85 ret = -EINVAL; 78 ret = -EINVAL;
86 } 79 }
87 80 mutex_unlock(&inode->i_mutex);
88 unlock_kernel();
89 return ret; 81 return ret;
90} 82}
91 83
@@ -93,19 +85,21 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
93 size_t count, loff_t * ppos) 85 size_t count, loff_t * ppos)
94{ 86{
95 char __user *tmp = buf; 87 char __user *tmp = buf;
96 u32 data[4]; 88 struct cpuid_regs cmd;
97 u32 reg = *ppos;
98 int cpu = iminor(file->f_path.dentry->d_inode); 89 int cpu = iminor(file->f_path.dentry->d_inode);
90 u64 pos = *ppos;
99 91
100 if (count % 16) 92 if (count % 16)
101 return -EINVAL; /* Invalid chunk size */ 93 return -EINVAL; /* Invalid chunk size */
102 94
103 for (; count; count -= 16) { 95 for (; count; count -= 16) {
104 do_cpuid(cpu, reg, data); 96 cmd.eax = pos;
105 if (copy_to_user(tmp, &data, 16)) 97 cmd.ecx = pos >> 32;
98 smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
99 if (copy_to_user(tmp, &cmd, 16))
106 return -EFAULT; 100 return -EFAULT;
107 tmp += 16; 101 tmp += 16;
108 *ppos = reg++; 102 *ppos = ++pos;
109 } 103 }
110 104
111 return tmp - buf; 105 return tmp - buf;
@@ -193,7 +187,7 @@ static int __init cpuid_init(void)
193 } 187 }
194 for_each_online_cpu(i) { 188 for_each_online_cpu(i) {
195 err = cpuid_device_create(i); 189 err = cpuid_device_create(i);
196 if (err != 0) 190 if (err != 0)
197 goto out_class; 191 goto out_class;
198 } 192 }
199 register_hotcpu_notifier(&cpuid_class_cpu_notifier); 193 register_hotcpu_notifier(&cpuid_class_cpu_notifier);
@@ -208,7 +202,7 @@ out_class:
208 } 202 }
209 class_destroy(cpuid_class); 203 class_destroy(cpuid_class);
210out_chrdev: 204out_chrdev:
211 unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); 205 unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
212out: 206out:
213 return err; 207 return err;
214} 208}
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 1411324a625c..32dd62b36ff7 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -379,11 +379,9 @@ void __init efi_init(void)
379#endif 379#endif
380} 380}
381 381
382#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
383static void __init runtime_code_page_mkexec(void) 382static void __init runtime_code_page_mkexec(void)
384{ 383{
385 efi_memory_desc_t *md; 384 efi_memory_desc_t *md;
386 unsigned long end;
387 void *p; 385 void *p;
388 386
389 if (!(__supported_pte_mask & _PAGE_NX)) 387 if (!(__supported_pte_mask & _PAGE_NX))
@@ -392,18 +390,13 @@ static void __init runtime_code_page_mkexec(void)
392 /* Make EFI runtime service code area executable */ 390 /* Make EFI runtime service code area executable */
393 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 391 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
394 md = p; 392 md = p;
395 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); 393
396 if (md->type == EFI_RUNTIME_SERVICES_CODE && 394 if (md->type != EFI_RUNTIME_SERVICES_CODE)
397 (end >> PAGE_SHIFT) <= max_pfn_mapped) { 395 continue;
398 set_memory_x(md->virt_addr, md->num_pages); 396
399 set_memory_uc(md->virt_addr, md->num_pages); 397 set_memory_x(md->virt_addr, md->num_pages << EFI_PAGE_SHIFT);
400 }
401 } 398 }
402 __flush_tlb_all();
403} 399}
404#else
405static inline void __init runtime_code_page_mkexec(void) { }
406#endif
407 400
408/* 401/*
409 * This function will switch the EFI runtime services to virtual mode. 402 * This function will switch the EFI runtime services to virtual mode.
@@ -417,30 +410,40 @@ void __init efi_enter_virtual_mode(void)
417{ 410{
418 efi_memory_desc_t *md; 411 efi_memory_desc_t *md;
419 efi_status_t status; 412 efi_status_t status;
420 unsigned long end; 413 unsigned long size;
421 void *p; 414 u64 end, systab;
415 void *p, *va;
422 416
423 efi.systab = NULL; 417 efi.systab = NULL;
424 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 418 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
425 md = p; 419 md = p;
426 if (!(md->attribute & EFI_MEMORY_RUNTIME)) 420 if (!(md->attribute & EFI_MEMORY_RUNTIME))
427 continue; 421 continue;
428 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); 422
429 if ((md->attribute & EFI_MEMORY_WB) && 423 size = md->num_pages << EFI_PAGE_SHIFT;
430 ((end >> PAGE_SHIFT) <= max_pfn_mapped)) 424 end = md->phys_addr + size;
431 md->virt_addr = (unsigned long)__va(md->phys_addr); 425
426 if ((end >> PAGE_SHIFT) <= max_pfn_mapped)
427 va = __va(md->phys_addr);
432 else 428 else
433 md->virt_addr = (unsigned long) 429 va = efi_ioremap(md->phys_addr, size);
434 efi_ioremap(md->phys_addr, 430
435 md->num_pages << EFI_PAGE_SHIFT); 431 if (md->attribute & EFI_MEMORY_WB)
436 if (!md->virt_addr) 432 set_memory_uc(md->virt_addr, size);
433
434 md->virt_addr = (u64) (unsigned long) va;
435
436 if (!va) {
437 printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n", 437 printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n",
438 (unsigned long long)md->phys_addr); 438 (unsigned long long)md->phys_addr);
439 if ((md->phys_addr <= (unsigned long)efi_phys.systab) && 439 continue;
440 ((unsigned long)efi_phys.systab < end)) 440 }
441 efi.systab = (efi_system_table_t *)(unsigned long) 441
442 (md->virt_addr - md->phys_addr + 442 systab = (u64) (unsigned long) efi_phys.systab;
443 (unsigned long)efi_phys.systab); 443 if (md->phys_addr <= systab && systab < end) {
444 systab += md->virt_addr - md->phys_addr;
445 efi.systab = (efi_system_table_t *) (unsigned long) systab;
446 }
444 } 447 }
445 448
446 BUG_ON(!efi.systab); 449 BUG_ON(!efi.systab);
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 674f2379480f..09d5c2330934 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -54,10 +54,10 @@ static void __init early_mapping_set_exec(unsigned long start,
54 else 54 else
55 set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \ 55 set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \
56 __supported_pte_mask)); 56 __supported_pte_mask));
57 if (level == 4) 57 if (level == PG_LEVEL_4K)
58 start = (start + PMD_SIZE) & PMD_MASK;
59 else
60 start = (start + PAGE_SIZE) & PAGE_MASK; 58 start = (start + PAGE_SIZE) & PAGE_MASK;
59 else
60 start = (start + PMD_SIZE) & PMD_MASK;
61 } 61 }
62} 62}
63 63
@@ -109,23 +109,23 @@ void __init efi_reserve_bootmem(void)
109 memmap.nr_map * memmap.desc_size); 109 memmap.nr_map * memmap.desc_size);
110} 110}
111 111
112void __iomem * __init efi_ioremap(unsigned long offset, 112void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size)
113 unsigned long size)
114{ 113{
115 static unsigned pages_mapped; 114 static unsigned pages_mapped;
116 unsigned long last_addr;
117 unsigned i, pages; 115 unsigned i, pages;
118 116
119 last_addr = offset + size - 1; 117 /* phys_addr and size must be page aligned */
120 offset &= PAGE_MASK; 118 if ((phys_addr & ~PAGE_MASK) || (size & ~PAGE_MASK))
121 pages = (PAGE_ALIGN(last_addr) - offset) >> PAGE_SHIFT; 119 return NULL;
120
121 pages = size >> PAGE_SHIFT;
122 if (pages_mapped + pages > MAX_EFI_IO_PAGES) 122 if (pages_mapped + pages > MAX_EFI_IO_PAGES)
123 return NULL; 123 return NULL;
124 124
125 for (i = 0; i < pages; i++) { 125 for (i = 0; i < pages; i++) {
126 __set_fixmap(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped, 126 __set_fixmap(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped,
127 offset, PAGE_KERNEL_EXEC_NOCACHE); 127 phys_addr, PAGE_KERNEL);
128 offset += PAGE_SIZE; 128 phys_addr += PAGE_SIZE;
129 pages_mapped++; 129 pages_mapped++;
130 } 130 }
131 131
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 1d5a7a361200..4f283ad215ec 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -63,7 +63,7 @@ startup_64:
63 63
64 /* Is the address not 2M aligned? */ 64 /* Is the address not 2M aligned? */
65 movq %rbp, %rax 65 movq %rbp, %rax
66 andl $~LARGE_PAGE_MASK, %eax 66 andl $~PMD_PAGE_MASK, %eax
67 testl %eax, %eax 67 testl %eax, %eax
68 jnz bad_address 68 jnz bad_address
69 69
@@ -88,7 +88,7 @@ startup_64:
88 88
89 /* Add an Identity mapping if I am above 1G */ 89 /* Add an Identity mapping if I am above 1G */
90 leaq _text(%rip), %rdi 90 leaq _text(%rip), %rdi
91 andq $LARGE_PAGE_MASK, %rdi 91 andq $PMD_PAGE_MASK, %rdi
92 92
93 movq %rdi, %rax 93 movq %rdi, %rax
94 shrq $PUD_SHIFT, %rax 94 shrq $PUD_SHIFT, %rax
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 8a7660c8394a..0224c3637c73 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -35,7 +35,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
35 if (mincount <= pc->size) 35 if (mincount <= pc->size)
36 return 0; 36 return 0;
37 oldsize = pc->size; 37 oldsize = pc->size;
38 mincount = (mincount + 511) & (~511); 38 mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) &
39 (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1));
39 if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) 40 if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
40 newldt = vmalloc(mincount * LDT_ENTRY_SIZE); 41 newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
41 else 42 else
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index bd82850e6519..af51ea8400b2 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -1,6 +1,6 @@
1/* ----------------------------------------------------------------------- * 1/* ----------------------------------------------------------------------- *
2 * 2 *
3 * Copyright 2000 H. Peter Anvin - All Rights Reserved 3 * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
@@ -45,9 +45,10 @@ static struct class *msr_class;
45 45
46static loff_t msr_seek(struct file *file, loff_t offset, int orig) 46static loff_t msr_seek(struct file *file, loff_t offset, int orig)
47{ 47{
48 loff_t ret = -EINVAL; 48 loff_t ret;
49 struct inode *inode = file->f_mapping->host;
49 50
50 lock_kernel(); 51 mutex_lock(&inode->i_mutex);
51 switch (orig) { 52 switch (orig) {
52 case 0: 53 case 0:
53 file->f_pos = offset; 54 file->f_pos = offset;
@@ -56,8 +57,11 @@ static loff_t msr_seek(struct file *file, loff_t offset, int orig)
56 case 1: 57 case 1:
57 file->f_pos += offset; 58 file->f_pos += offset;
58 ret = file->f_pos; 59 ret = file->f_pos;
60 break;
61 default:
62 ret = -EINVAL;
59 } 63 }
60 unlock_kernel(); 64 mutex_unlock(&inode->i_mutex);
61 return ret; 65 return ret;
62} 66}
63 67
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 4d5cc7181982..845cbecd68e9 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -501,7 +501,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
501 } 501 }
502 502
503 a = aper + iommu_size; 503 a = aper + iommu_size;
504 iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a; 504 iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
505 505
506 if (iommu_size < 64*1024*1024) { 506 if (iommu_size < 64*1024*1024) {
507 printk(KERN_WARNING 507 printk(KERN_WARNING
@@ -731,7 +731,8 @@ void __init gart_iommu_init(void)
731 * the backing memory. The GART address is only used by PCI 731 * the backing memory. The GART address is only used by PCI
732 * devices. 732 * devices.
733 */ 733 */
734 clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size); 734 set_memory_np((unsigned long)__va(iommu_bus_base),
735 iommu_size >> PAGE_SHIFT);
735 736
736 /* 737 /*
737 * Try to workaround a bug (thanks to BenH) 738 * Try to workaround a bug (thanks to BenH)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 968371ab223a..dabdbeff1f77 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -251,7 +251,7 @@ void cpu_idle_wait(void)
251 * because it has nothing to do. 251 * because it has nothing to do.
252 * Give all the remaining CPUS a kick. 252 * Give all the remaining CPUS a kick.
253 */ 253 */
254 smp_call_function_mask(map, do_nothing, 0, 0); 254 smp_call_function_mask(map, do_nothing, NULL, 0);
255 } while (!cpus_empty(map)); 255 } while (!cpus_empty(map));
256 256
257 set_cpus_allowed(current, tmp); 257 set_cpus_allowed(current, tmp);
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 18df70c534b9..c8939dfddfba 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -1068,82 +1068,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
1068 struct cpuinfo_x86 *c = v; 1068 struct cpuinfo_x86 *c = v;
1069 int cpu = 0, i; 1069 int cpu = 0, i;
1070 1070
1071 /*
1072 * These flag bits must match the definitions in <asm/cpufeature.h>.
1073 * NULL means this bit is undefined or reserved; either way it doesn't
1074 * have meaning as far as Linux is concerned. Note that it's important
1075 * to realize there is a difference between this table and CPUID -- if
1076 * applications want to get the raw CPUID data, they should access
1077 * /dev/cpu/<cpu_nr>/cpuid instead.
1078 */
1079 static const char *const x86_cap_flags[] = {
1080 /* Intel-defined */
1081 "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
1082 "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
1083 "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
1084 "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
1085
1086 /* AMD-defined */
1087 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1088 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
1089 NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
1090 NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
1091 "3dnowext", "3dnow",
1092
1093 /* Transmeta-defined */
1094 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
1095 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1096 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1097 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1098
1099 /* Other (Linux-defined) */
1100 "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
1101 NULL, NULL, NULL, NULL,
1102 "constant_tsc", "up", NULL, "arch_perfmon",
1103 "pebs", "bts", NULL, "sync_rdtsc",
1104 "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1105 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1106
1107 /* Intel-defined (#2) */
1108 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
1109 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
1110 NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
1111 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1112
1113 /* VIA/Cyrix/Centaur-defined */
1114 NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
1115 "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
1116 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1117 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1118
1119 /* AMD-defined (#2) */
1120 "lahf_lm", "cmp_legacy", "svm", "extapic",
1121 "cr8_legacy", "abm", "sse4a", "misalignsse",
1122 "3dnowprefetch", "osvw", "ibs", "sse5",
1123 "skinit", "wdt", NULL, NULL,
1124 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1125 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1126
1127 /* Auxiliary (Linux-defined) */
1128 "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1129 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1130 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1131 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1132 };
1133 static const char *const x86_power_flags[] = {
1134 "ts", /* temperature sensor */
1135 "fid", /* frequency id control */
1136 "vid", /* voltage id control */
1137 "ttp", /* thermal trip */
1138 "tm",
1139 "stc",
1140 "100mhzsteps",
1141 "hwpstate",
1142 "", /* tsc invariant mapped to constant_tsc */
1143 /* nothing */
1144 };
1145
1146
1147#ifdef CONFIG_SMP 1071#ifdef CONFIG_SMP
1148 cpu = c->cpu_index; 1072 cpu = c->cpu_index;
1149#endif 1073#endif
diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
index ae0ef2e304c7..36c100c323aa 100644
--- a/arch/x86/kernel/test_nx.c
+++ b/arch/x86/kernel/test_nx.c
@@ -12,6 +12,7 @@
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/sort.h> 13#include <linux/sort.h>
14#include <asm/uaccess.h> 14#include <asm/uaccess.h>
15#include <asm/asm.h>
15 16
16extern int rodata_test_data; 17extern int rodata_test_data;
17 18
@@ -89,16 +90,7 @@ static noinline int test_address(void *address)
89 "2: mov %[zero], %[rslt]\n" 90 "2: mov %[zero], %[rslt]\n"
90 " ret\n" 91 " ret\n"
91 ".previous\n" 92 ".previous\n"
92 ".section __ex_table,\"a\"\n" 93 _ASM_EXTABLE(0b,2b)
93 " .align 8\n"
94#ifdef CONFIG_X86_32
95 " .long 0b\n"
96 " .long 2b\n"
97#else
98 " .quad 0b\n"
99 " .quad 2b\n"
100#endif
101 ".previous\n"
102 : [rslt] "=r" (result) 94 : [rslt] "=r" (result)
103 : [fake_code] "r" (address), [zero] "r" (0UL), "0" (result) 95 : [fake_code] "r" (address), [zero] "r" (0UL), "0" (result)
104 ); 96 );
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
index 9bcc1c6aca3d..64580679861e 100644
--- a/arch/x86/kernel/trampoline_32.S
+++ b/arch/x86/kernel/trampoline_32.S
@@ -11,12 +11,7 @@
11 * trampoline page to make our stack and everything else 11 * trampoline page to make our stack and everything else
12 * is a mystery. 12 * is a mystery.
13 * 13 *
14 * In fact we don't actually need a stack so we don't 14 * We jump into arch/x86/kernel/head_32.S.
15 * set one up.
16 *
17 * We jump into the boot/compressed/head.S code. So you'd
18 * better be running a compressed kernel image or you
19 * won't get very far.
20 * 15 *
21 * On entry to trampoline_data, the processor is in real mode 16 * On entry to trampoline_data, the processor is in real mode
22 * with 16-bit addressing and 16-bit data. CS has some value 17 * with 16-bit addressing and 16-bit data. CS has some value
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index e30b67c6a9f5..4aedd0bcee4c 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -10,9 +10,6 @@
10 * trampoline page to make our stack and everything else 10 * trampoline page to make our stack and everything else
11 * is a mystery. 11 * is a mystery.
12 * 12 *
13 * In fact we don't actually need a stack so we don't
14 * set one up.
15 *
16 * On entry to trampoline_data, the processor is in real mode 13 * On entry to trampoline_data, the processor is in real mode
17 * with 16-bit addressing and 16-bit data. CS has some value 14 * with 16-bit addressing and 16-bit data. CS has some value
18 * and IP is zero. Thus, data addresses need to be absolute 15 * and IP is zero. Thus, data addresses need to be absolute
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 4525bc2c2e19..12affe1f9bce 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -220,21 +220,21 @@ static void vmi_set_tr(void)
220static void vmi_write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) 220static void vmi_write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
221{ 221{
222 u32 *idt_entry = (u32 *)g; 222 u32 *idt_entry = (u32 *)g;
223 vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[2]); 223 vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[1]);
224} 224}
225 225
226static void vmi_write_gdt_entry(struct desc_struct *dt, int entry, 226static void vmi_write_gdt_entry(struct desc_struct *dt, int entry,
227 const void *desc, int type) 227 const void *desc, int type)
228{ 228{
229 u32 *gdt_entry = (u32 *)desc; 229 u32 *gdt_entry = (u32 *)desc;
230 vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[2]); 230 vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[1]);
231} 231}
232 232
233static void vmi_write_ldt_entry(struct desc_struct *dt, int entry, 233static void vmi_write_ldt_entry(struct desc_struct *dt, int entry,
234 const void *desc) 234 const void *desc)
235{ 235{
236 u32 *ldt_entry = (u32 *)desc; 236 u32 *ldt_entry = (u32 *)desc;
237 vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[2]); 237 vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
238} 238}
239 239
240static void vmi_load_sp0(struct tss_struct *tss, 240static void vmi_load_sp0(struct tss_struct *tss,
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
index 28084d2e8dd4..cc9b4a4450f3 100644
--- a/arch/x86/lib/mmx_32.c
+++ b/arch/x86/lib/mmx_32.c
@@ -4,6 +4,7 @@
4#include <linux/hardirq.h> 4#include <linux/hardirq.h>
5#include <linux/module.h> 5#include <linux/module.h>
6 6
7#include <asm/asm.h>
7#include <asm/i387.h> 8#include <asm/i387.h>
8 9
9 10
@@ -50,10 +51,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
50 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ 51 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
51 " jmp 2b\n" 52 " jmp 2b\n"
52 ".previous\n" 53 ".previous\n"
53 ".section __ex_table,\"a\"\n" 54 _ASM_EXTABLE(1b,3b)
54 " .align 4\n"
55 " .long 1b, 3b\n"
56 ".previous"
57 : : "r" (from) ); 55 : : "r" (from) );
58 56
59 57
@@ -81,10 +79,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
81 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ 79 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
82 " jmp 2b\n" 80 " jmp 2b\n"
83 ".previous\n" 81 ".previous\n"
84 ".section __ex_table,\"a\"\n" 82 _ASM_EXTABLE(1b,3b)
85 " .align 4\n"
86 " .long 1b, 3b\n"
87 ".previous"
88 : : "r" (from), "r" (to) : "memory"); 83 : : "r" (from), "r" (to) : "memory");
89 from+=64; 84 from+=64;
90 to+=64; 85 to+=64;
@@ -181,10 +176,7 @@ static void fast_copy_page(void *to, void *from)
181 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ 176 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
182 " jmp 2b\n" 177 " jmp 2b\n"
183 ".previous\n" 178 ".previous\n"
184 ".section __ex_table,\"a\"\n" 179 _ASM_EXTABLE(1b,3b)
185 " .align 4\n"
186 " .long 1b, 3b\n"
187 ".previous"
188 : : "r" (from) ); 180 : : "r" (from) );
189 181
190 for(i=0; i<(4096-320)/64; i++) 182 for(i=0; i<(4096-320)/64; i++)
@@ -211,10 +203,7 @@ static void fast_copy_page(void *to, void *from)
211 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ 203 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
212 " jmp 2b\n" 204 " jmp 2b\n"
213 ".previous\n" 205 ".previous\n"
214 ".section __ex_table,\"a\"\n" 206 _ASM_EXTABLE(1b,3b)
215 " .align 4\n"
216 " .long 1b, 3b\n"
217 ".previous"
218 : : "r" (from), "r" (to) : "memory"); 207 : : "r" (from), "r" (to) : "memory");
219 from+=64; 208 from+=64;
220 to+=64; 209 to+=64;
@@ -311,10 +300,7 @@ static void fast_copy_page(void *to, void *from)
311 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ 300 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
312 " jmp 2b\n" 301 " jmp 2b\n"
313 ".previous\n" 302 ".previous\n"
314 ".section __ex_table,\"a\"\n" 303 _ASM_EXTABLE(1b,3b)
315 " .align 4\n"
316 " .long 1b, 3b\n"
317 ".previous"
318 : : "r" (from) ); 304 : : "r" (from) );
319 305
320 for(i=0; i<4096/64; i++) 306 for(i=0; i<4096/64; i++)
@@ -341,10 +327,7 @@ static void fast_copy_page(void *to, void *from)
341 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ 327 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
342 " jmp 2b\n" 328 " jmp 2b\n"
343 ".previous\n" 329 ".previous\n"
344 ".section __ex_table,\"a\"\n" 330 _ASM_EXTABLE(1b,3b)
345 " .align 4\n"
346 " .long 1b, 3b\n"
347 ".previous"
348 : : "r" (from), "r" (to) : "memory"); 331 : : "r" (from), "r" (to) : "memory");
349 from+=64; 332 from+=64;
350 to+=64; 333 to+=64;
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 9c4ffd5bedb2..e849b9998b0e 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -48,10 +48,7 @@ do { \
48 "3: movl %5,%0\n" \ 48 "3: movl %5,%0\n" \
49 " jmp 2b\n" \ 49 " jmp 2b\n" \
50 ".previous\n" \ 50 ".previous\n" \
51 ".section __ex_table,\"a\"\n" \ 51 _ASM_EXTABLE(0b,3b) \
52 " .align 4\n" \
53 " .long 0b,3b\n" \
54 ".previous" \
55 : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ 52 : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \
56 "=&D" (__d2) \ 53 "=&D" (__d2) \
57 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ 54 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
@@ -132,11 +129,8 @@ do { \
132 "3: lea 0(%2,%0,4),%0\n" \ 129 "3: lea 0(%2,%0,4),%0\n" \
133 " jmp 2b\n" \ 130 " jmp 2b\n" \
134 ".previous\n" \ 131 ".previous\n" \
135 ".section __ex_table,\"a\"\n" \ 132 _ASM_EXTABLE(0b,3b) \
136 " .align 4\n" \ 133 _ASM_EXTABLE(1b,2b) \
137 " .long 0b,3b\n" \
138 " .long 1b,2b\n" \
139 ".previous" \
140 : "=&c"(size), "=&D" (__d0) \ 134 : "=&c"(size), "=&D" (__d0) \
141 : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ 135 : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \
142} while (0) 136} while (0)
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 893d43f838cc..0c89d1bb0287 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -31,10 +31,7 @@ do { \
31 "3: movq %5,%0\n" \ 31 "3: movq %5,%0\n" \
32 " jmp 2b\n" \ 32 " jmp 2b\n" \
33 ".previous\n" \ 33 ".previous\n" \
34 ".section __ex_table,\"a\"\n" \ 34 _ASM_EXTABLE(0b,3b) \
35 " .align 8\n" \
36 " .quad 0b,3b\n" \
37 ".previous" \
38 : "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ 35 : "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \
39 "=&D" (__d2) \ 36 "=&D" (__d2) \
40 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ 37 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
@@ -87,11 +84,8 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
87 "3: lea 0(%[size1],%[size8],8),%[size8]\n" 84 "3: lea 0(%[size1],%[size8],8),%[size8]\n"
88 " jmp 2b\n" 85 " jmp 2b\n"
89 ".previous\n" 86 ".previous\n"
90 ".section __ex_table,\"a\"\n" 87 _ASM_EXTABLE(0b,3b)
91 " .align 8\n" 88 _ASM_EXTABLE(1b,2b)
92 " .quad 0b,3b\n"
93 " .quad 1b,2b\n"
94 ".previous"
95 : [size8] "=c"(size), [dst] "=&D" (__d0) 89 : [size8] "=c"(size), [dst] "=&D" (__d0)
96 : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr), 90 : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr),
97 [zero] "r" (0UL), [eight] "r" (8UL)); 91 [zero] "r" (0UL), [eight] "r" (8UL));
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e4440d0abf81..ad8b9733d6b3 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -240,7 +240,8 @@ void dump_pagetable(unsigned long address)
240 pud = pud_offset(pgd, address); 240 pud = pud_offset(pgd, address);
241 if (bad_address(pud)) goto bad; 241 if (bad_address(pud)) goto bad;
242 printk("PUD %lx ", pud_val(*pud)); 242 printk("PUD %lx ", pud_val(*pud));
243 if (!pud_present(*pud)) goto ret; 243 if (!pud_present(*pud) || pud_large(*pud))
244 goto ret;
244 245
245 pmd = pmd_offset(pud, address); 246 pmd = pmd_offset(pud, address);
246 if (bad_address(pmd)) goto bad; 247 if (bad_address(pmd)) goto bad;
@@ -508,6 +509,10 @@ static int vmalloc_fault(unsigned long address)
508 pmd_t *pmd, *pmd_ref; 509 pmd_t *pmd, *pmd_ref;
509 pte_t *pte, *pte_ref; 510 pte_t *pte, *pte_ref;
510 511
512 /* Make sure we are in vmalloc area */
513 if (!(address >= VMALLOC_START && address < VMALLOC_END))
514 return -1;
515
511 /* Copy kernel mappings over when needed. This can also 516 /* Copy kernel mappings over when needed. This can also
512 happen within a race in page table update. In the later 517 happen within a race in page table update. In the later
513 case just flush. */ 518 case just flush. */
@@ -603,6 +608,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
603 */ 608 */
604#ifdef CONFIG_X86_32 609#ifdef CONFIG_X86_32
605 if (unlikely(address >= TASK_SIZE)) { 610 if (unlikely(address >= TASK_SIZE)) {
611#else
612 if (unlikely(address >= TASK_SIZE64)) {
613#endif
606 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && 614 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
607 vmalloc_fault(address) >= 0) 615 vmalloc_fault(address) >= 0)
608 return; 616 return;
@@ -618,6 +626,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
618 goto bad_area_nosemaphore; 626 goto bad_area_nosemaphore;
619 } 627 }
620 628
629
630#ifdef CONFIG_X86_32
621 /* It's safe to allow irq's after cr2 has been saved and the vmalloc 631 /* It's safe to allow irq's after cr2 has been saved and the vmalloc
622 fault has been handled. */ 632 fault has been handled. */
623 if (regs->flags & (X86_EFLAGS_IF|VM_MASK)) 633 if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
@@ -630,28 +640,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
630 if (in_atomic() || !mm) 640 if (in_atomic() || !mm)
631 goto bad_area_nosemaphore; 641 goto bad_area_nosemaphore;
632#else /* CONFIG_X86_64 */ 642#else /* CONFIG_X86_64 */
633 if (unlikely(address >= TASK_SIZE64)) {
634 /*
635 * Don't check for the module range here: its PML4
636 * is always initialized because it's shared with the main
637 * kernel text. Only vmalloc may need PML4 syncups.
638 */
639 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
640 ((address >= VMALLOC_START && address < VMALLOC_END))) {
641 if (vmalloc_fault(address) >= 0)
642 return;
643 }
644
645 /* Can handle a stale RO->RW TLB */
646 if (spurious_fault(address, error_code))
647 return;
648
649 /*
650 * Don't take the mm semaphore here. If we fixup a prefetch
651 * fault we could otherwise deadlock.
652 */
653 goto bad_area_nosemaphore;
654 }
655 if (likely(regs->flags & X86_EFLAGS_IF)) 643 if (likely(regs->flags & X86_EFLAGS_IF))
656 local_irq_enable(); 644 local_irq_enable();
657 645
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index f2f36f8dae52..d1bc04006d16 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -31,6 +31,7 @@
31#include <linux/initrd.h> 31#include <linux/initrd.h>
32#include <linux/cpumask.h> 32#include <linux/cpumask.h>
33 33
34#include <asm/asm.h>
34#include <asm/processor.h> 35#include <asm/processor.h>
35#include <asm/system.h> 36#include <asm/system.h>
36#include <asm/uaccess.h> 37#include <asm/uaccess.h>
@@ -718,10 +719,7 @@ static noinline int do_test_wp_bit(void)
718 "1: movb %1, %0 \n" 719 "1: movb %1, %0 \n"
719 " xorl %2, %2 \n" 720 " xorl %2, %2 \n"
720 "2: \n" 721 "2: \n"
721 ".section __ex_table, \"a\"\n" 722 _ASM_EXTABLE(1b,2b)
722 " .align 4 \n"
723 " .long 1b, 2b \n"
724 ".previous \n"
725 :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)), 723 :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
726 "=q" (tmp_reg), 724 "=q" (tmp_reg),
727 "=r" (flag) 725 "=r" (flag)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index eabcaed76c28..3a98d6f724ab 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -273,7 +273,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
273 int i = pmd_index(address); 273 int i = pmd_index(address);
274 274
275 for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) { 275 for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
276 unsigned long entry;
277 pmd_t *pmd = pmd_page + pmd_index(address); 276 pmd_t *pmd = pmd_page + pmd_index(address);
278 277
279 if (address >= end) { 278 if (address >= end) {
@@ -287,9 +286,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
287 if (pmd_val(*pmd)) 286 if (pmd_val(*pmd))
288 continue; 287 continue;
289 288
290 entry = __PAGE_KERNEL_LARGE|_PAGE_GLOBAL|address; 289 set_pte((pte_t *)pmd,
291 entry &= __supported_pte_mask; 290 pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
292 set_pmd(pmd, __pmd(entry));
293 } 291 }
294} 292}
295 293
@@ -435,49 +433,6 @@ void __init paging_init(void)
435#endif 433#endif
436 434
437/* 435/*
438 * Unmap a kernel mapping if it exists. This is useful to avoid
439 * prefetches from the CPU leading to inconsistent cache lines.
440 * address and size must be aligned to 2MB boundaries.
441 * Does nothing when the mapping doesn't exist.
442 */
443void __init clear_kernel_mapping(unsigned long address, unsigned long size)
444{
445 unsigned long end = address + size;
446
447 BUG_ON(address & ~LARGE_PAGE_MASK);
448 BUG_ON(size & ~LARGE_PAGE_MASK);
449
450 for (; address < end; address += LARGE_PAGE_SIZE) {
451 pgd_t *pgd = pgd_offset_k(address);
452 pud_t *pud;
453 pmd_t *pmd;
454
455 if (pgd_none(*pgd))
456 continue;
457
458 pud = pud_offset(pgd, address);
459 if (pud_none(*pud))
460 continue;
461
462 pmd = pmd_offset(pud, address);
463 if (!pmd || pmd_none(*pmd))
464 continue;
465
466 if (!(pmd_val(*pmd) & _PAGE_PSE)) {
467 /*
468 * Could handle this, but it should not happen
469 * currently:
470 */
471 printk(KERN_ERR "clear_kernel_mapping: "
472 "mapping has been split. will leak memory\n");
473 pmd_ERROR(*pmd);
474 }
475 set_pmd(pmd, __pmd(0));
476 }
477 __flush_tlb_all();
478}
479
480/*
481 * Memory hotplug specific functions 436 * Memory hotplug specific functions
482 */ 437 */
483void online_page(struct page *page) 438void online_page(struct page *page)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index c004d94608fd..ee6648fe6b15 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -70,25 +70,12 @@ int page_is_ram(unsigned long pagenr)
70 * Fix up the linear direct mapping of the kernel to avoid cache attribute 70 * Fix up the linear direct mapping of the kernel to avoid cache attribute
71 * conflicts. 71 * conflicts.
72 */ 72 */
73static int ioremap_change_attr(unsigned long paddr, unsigned long size, 73static int ioremap_change_attr(unsigned long vaddr, unsigned long size,
74 enum ioremap_mode mode) 74 enum ioremap_mode mode)
75{ 75{
76 unsigned long vaddr = (unsigned long)__va(paddr);
77 unsigned long nrpages = size >> PAGE_SHIFT; 76 unsigned long nrpages = size >> PAGE_SHIFT;
78 unsigned int level;
79 int err; 77 int err;
80 78
81 /* No change for pages after the last mapping */
82 if ((paddr + size - 1) >= (max_pfn_mapped << PAGE_SHIFT))
83 return 0;
84
85 /*
86 * If there is no identity map for this address,
87 * change_page_attr_addr is unnecessary
88 */
89 if (!lookup_address(vaddr, &level))
90 return 0;
91
92 switch (mode) { 79 switch (mode) {
93 case IOR_MODE_UNCACHED: 80 case IOR_MODE_UNCACHED:
94 default: 81 default:
@@ -114,9 +101,8 @@ static int ioremap_change_attr(unsigned long paddr, unsigned long size,
114static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size, 101static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
115 enum ioremap_mode mode) 102 enum ioremap_mode mode)
116{ 103{
117 void __iomem *addr; 104 unsigned long pfn, offset, last_addr, vaddr;
118 struct vm_struct *area; 105 struct vm_struct *area;
119 unsigned long offset, last_addr;
120 pgprot_t prot; 106 pgprot_t prot;
121 107
122 /* Don't allow wraparound or zero size */ 108 /* Don't allow wraparound or zero size */
@@ -133,9 +119,10 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
133 /* 119 /*
134 * Don't allow anybody to remap normal RAM that we're using.. 120 * Don't allow anybody to remap normal RAM that we're using..
135 */ 121 */
136 for (offset = phys_addr >> PAGE_SHIFT; offset < max_pfn_mapped && 122 for (pfn = phys_addr >> PAGE_SHIFT; pfn < max_pfn_mapped &&
137 (offset << PAGE_SHIFT) < last_addr; offset++) { 123 (pfn << PAGE_SHIFT) < last_addr; pfn++) {
138 if (page_is_ram(offset)) 124 if (page_is_ram(pfn) && pfn_valid(pfn) &&
125 !PageReserved(pfn_to_page(pfn)))
139 return NULL; 126 return NULL;
140 } 127 }
141 128
@@ -163,19 +150,18 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
163 if (!area) 150 if (!area)
164 return NULL; 151 return NULL;
165 area->phys_addr = phys_addr; 152 area->phys_addr = phys_addr;
166 addr = (void __iomem *) area->addr; 153 vaddr = (unsigned long) area->addr;
167 if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size, 154 if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) {
168 phys_addr, prot)) { 155 remove_vm_area((void *)(vaddr & PAGE_MASK));
169 remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
170 return NULL; 156 return NULL;
171 } 157 }
172 158
173 if (ioremap_change_attr(phys_addr, size, mode) < 0) { 159 if (ioremap_change_attr(vaddr, size, mode) < 0) {
174 vunmap(addr); 160 vunmap(area->addr);
175 return NULL; 161 return NULL;
176 } 162 }
177 163
178 return (void __iomem *) (offset + (char __iomem *)addr); 164 return (void __iomem *) (vaddr + offset);
179} 165}
180 166
181/** 167/**
@@ -254,9 +240,6 @@ void iounmap(volatile void __iomem *addr)
254 return; 240 return;
255 } 241 }
256 242
257 /* Reset the direct mapping. Can block */
258 ioremap_change_attr(p->phys_addr, p->size, IOR_MODE_CACHED);
259
260 /* Finally remove it */ 243 /* Finally remove it */
261 o = remove_vm_area((void *)addr); 244 o = remove_vm_area((void *)addr);
262 BUG_ON(p != o || o == NULL); 245 BUG_ON(p != o || o == NULL);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index a920d09b9194..5a02bf4c91ec 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -202,6 +202,8 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
202 if (node_data[nodeid] == NULL) 202 if (node_data[nodeid] == NULL)
203 return; 203 return;
204 nodedata_phys = __pa(node_data[nodeid]); 204 nodedata_phys = __pa(node_data[nodeid]);
205 printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys,
206 nodedata_phys + pgdat_size - 1);
205 207
206 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); 208 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
207 NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; 209 NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
@@ -225,12 +227,15 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
225 return; 227 return;
226 } 228 }
227 bootmap_start = __pa(bootmap); 229 bootmap_start = __pa(bootmap);
228 Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
229 230
230 bootmap_size = init_bootmem_node(NODE_DATA(nodeid), 231 bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
231 bootmap_start >> PAGE_SHIFT, 232 bootmap_start >> PAGE_SHIFT,
232 start_pfn, end_pfn); 233 start_pfn, end_pfn);
233 234
235 printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n",
236 bootmap_start, bootmap_start + bootmap_size - 1,
237 bootmap_pages);
238
234 free_bootmem_with_active_regions(nodeid, end); 239 free_bootmem_with_active_regions(nodeid, end);
235 240
236 reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); 241 reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 7573e786d2f2..398f3a578dde 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -137,7 +137,8 @@ static __init int exercise_pageattr(void)
137 137
138 for (k = 0; k < len[i]; k++) { 138 for (k = 0; k < len[i]; k++) {
139 pte = lookup_address(addr[i] + k*PAGE_SIZE, &level); 139 pte = lookup_address(addr[i] + k*PAGE_SIZE, &level);
140 if (!pte || pgprot_val(pte_pgprot(*pte)) == 0) { 140 if (!pte || pgprot_val(pte_pgprot(*pte)) == 0 ||
141 !(pte_val(*pte) & _PAGE_PRESENT)) {
141 addr[i] = 0; 142 addr[i] = 0;
142 break; 143 break;
143 } 144 }
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e297bd65e513..bb55a78dcd62 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -16,6 +16,17 @@
16#include <asm/uaccess.h> 16#include <asm/uaccess.h>
17#include <asm/pgalloc.h> 17#include <asm/pgalloc.h>
18 18
19/*
20 * The current flushing context - we pass it instead of 5 arguments:
21 */
22struct cpa_data {
23 unsigned long vaddr;
24 pgprot_t mask_set;
25 pgprot_t mask_clr;
26 int numpages;
27 int flushtlb;
28};
29
19static inline int 30static inline int
20within(unsigned long addr, unsigned long start, unsigned long end) 31within(unsigned long addr, unsigned long start, unsigned long end)
21{ 32{
@@ -52,21 +63,23 @@ void clflush_cache_range(void *vaddr, unsigned int size)
52 63
53static void __cpa_flush_all(void *arg) 64static void __cpa_flush_all(void *arg)
54{ 65{
66 unsigned long cache = (unsigned long)arg;
67
55 /* 68 /*
56 * Flush all to work around Errata in early athlons regarding 69 * Flush all to work around Errata in early athlons regarding
57 * large page flushing. 70 * large page flushing.
58 */ 71 */
59 __flush_tlb_all(); 72 __flush_tlb_all();
60 73
61 if (boot_cpu_data.x86_model >= 4) 74 if (cache && boot_cpu_data.x86_model >= 4)
62 wbinvd(); 75 wbinvd();
63} 76}
64 77
65static void cpa_flush_all(void) 78static void cpa_flush_all(unsigned long cache)
66{ 79{
67 BUG_ON(irqs_disabled()); 80 BUG_ON(irqs_disabled());
68 81
69 on_each_cpu(__cpa_flush_all, NULL, 1, 1); 82 on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1);
70} 83}
71 84
72static void __cpa_flush_range(void *arg) 85static void __cpa_flush_range(void *arg)
@@ -79,7 +92,7 @@ static void __cpa_flush_range(void *arg)
79 __flush_tlb_all(); 92 __flush_tlb_all();
80} 93}
81 94
82static void cpa_flush_range(unsigned long start, int numpages) 95static void cpa_flush_range(unsigned long start, int numpages, int cache)
83{ 96{
84 unsigned int i, level; 97 unsigned int i, level;
85 unsigned long addr; 98 unsigned long addr;
@@ -89,6 +102,9 @@ static void cpa_flush_range(unsigned long start, int numpages)
89 102
90 on_each_cpu(__cpa_flush_range, NULL, 1, 1); 103 on_each_cpu(__cpa_flush_range, NULL, 1, 1);
91 104
105 if (!cache)
106 return;
107
92 /* 108 /*
93 * We only need to flush on one CPU, 109 * We only need to flush on one CPU,
94 * clflush is a MESI-coherent instruction that 110 * clflush is a MESI-coherent instruction that
@@ -101,11 +117,27 @@ static void cpa_flush_range(unsigned long start, int numpages)
101 /* 117 /*
102 * Only flush present addresses: 118 * Only flush present addresses:
103 */ 119 */
104 if (pte && pte_present(*pte)) 120 if (pte && (pte_val(*pte) & _PAGE_PRESENT))
105 clflush_cache_range((void *) addr, PAGE_SIZE); 121 clflush_cache_range((void *) addr, PAGE_SIZE);
106 } 122 }
107} 123}
108 124
125#define HIGH_MAP_START __START_KERNEL_map
126#define HIGH_MAP_END (__START_KERNEL_map + KERNEL_TEXT_SIZE)
127
128
129/*
130 * Converts a virtual address to a X86-64 highmap address
131 */
132static unsigned long virt_to_highmap(void *address)
133{
134#ifdef CONFIG_X86_64
135 return __pa((unsigned long)address) + HIGH_MAP_START - phys_base;
136#else
137 return (unsigned long)address;
138#endif
139}
140
109/* 141/*
110 * Certain areas of memory on x86 require very specific protection flags, 142 * Certain areas of memory on x86 require very specific protection flags,
111 * for example the BIOS area or kernel text. Callers don't always get this 143 * for example the BIOS area or kernel text. Callers don't always get this
@@ -129,12 +161,24 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
129 */ 161 */
130 if (within(address, (unsigned long)_text, (unsigned long)_etext)) 162 if (within(address, (unsigned long)_text, (unsigned long)_etext))
131 pgprot_val(forbidden) |= _PAGE_NX; 163 pgprot_val(forbidden) |= _PAGE_NX;
164 /*
165 * Do the same for the x86-64 high kernel mapping
166 */
167 if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext)))
168 pgprot_val(forbidden) |= _PAGE_NX;
169
132 170
133#ifdef CONFIG_DEBUG_RODATA 171#ifdef CONFIG_DEBUG_RODATA
134 /* The .rodata section needs to be read-only */ 172 /* The .rodata section needs to be read-only */
135 if (within(address, (unsigned long)__start_rodata, 173 if (within(address, (unsigned long)__start_rodata,
136 (unsigned long)__end_rodata)) 174 (unsigned long)__end_rodata))
137 pgprot_val(forbidden) |= _PAGE_RW; 175 pgprot_val(forbidden) |= _PAGE_RW;
176 /*
177 * Do the same for the x86-64 high kernel mapping
178 */
179 if (within(address, virt_to_highmap(__start_rodata),
180 virt_to_highmap(__end_rodata)))
181 pgprot_val(forbidden) |= _PAGE_RW;
138#endif 182#endif
139 183
140 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); 184 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
@@ -142,6 +186,14 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
142 return prot; 186 return prot;
143} 187}
144 188
189/*
190 * Lookup the page table entry for a virtual address. Return a pointer
191 * to the entry and the level of the mapping.
192 *
193 * Note: We return pud and pmd either when the entry is marked large
194 * or when the present bit is not set. Otherwise we would return a
195 * pointer to a nonexisting mapping.
196 */
145pte_t *lookup_address(unsigned long address, int *level) 197pte_t *lookup_address(unsigned long address, int *level)
146{ 198{
147 pgd_t *pgd = pgd_offset_k(address); 199 pgd_t *pgd = pgd_offset_k(address);
@@ -152,21 +204,31 @@ pte_t *lookup_address(unsigned long address, int *level)
152 204
153 if (pgd_none(*pgd)) 205 if (pgd_none(*pgd))
154 return NULL; 206 return NULL;
207
155 pud = pud_offset(pgd, address); 208 pud = pud_offset(pgd, address);
156 if (pud_none(*pud)) 209 if (pud_none(*pud))
157 return NULL; 210 return NULL;
211
212 *level = PG_LEVEL_1G;
213 if (pud_large(*pud) || !pud_present(*pud))
214 return (pte_t *)pud;
215
158 pmd = pmd_offset(pud, address); 216 pmd = pmd_offset(pud, address);
159 if (pmd_none(*pmd)) 217 if (pmd_none(*pmd))
160 return NULL; 218 return NULL;
161 219
162 *level = PG_LEVEL_2M; 220 *level = PG_LEVEL_2M;
163 if (pmd_large(*pmd)) 221 if (pmd_large(*pmd) || !pmd_present(*pmd))
164 return (pte_t *)pmd; 222 return (pte_t *)pmd;
165 223
166 *level = PG_LEVEL_4K; 224 *level = PG_LEVEL_4K;
225
167 return pte_offset_kernel(pmd, address); 226 return pte_offset_kernel(pmd, address);
168} 227}
169 228
229/*
230 * Set the new pmd in all the pgds we know about:
231 */
170static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) 232static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
171{ 233{
172 /* change init_mm */ 234 /* change init_mm */
@@ -175,6 +237,7 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
175 if (!SHARED_KERNEL_PMD) { 237 if (!SHARED_KERNEL_PMD) {
176 struct page *page; 238 struct page *page;
177 239
240 address = __pa(address);
178 list_for_each_entry(page, &pgd_list, lru) { 241 list_for_each_entry(page, &pgd_list, lru) {
179 pgd_t *pgd; 242 pgd_t *pgd;
180 pud_t *pud; 243 pud_t *pud;
@@ -189,18 +252,114 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
189#endif 252#endif
190} 253}
191 254
255static int
256try_preserve_large_page(pte_t *kpte, unsigned long address,
257 struct cpa_data *cpa)
258{
259 unsigned long nextpage_addr, numpages, pmask, psize, flags;
260 pte_t new_pte, old_pte, *tmp;
261 pgprot_t old_prot, new_prot;
262 int level, do_split = 1;
263
264 /*
265 * An Athlon 64 X2 showed hard hangs if we tried to preserve
266 * largepages and changed the PSE entry from RW to RO.
267 *
268 * As AMD CPUs have a long series of erratas in this area,
269 * (and none of the known ones seem to explain this hang),
270 * disable this code until the hang can be debugged:
271 */
272 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
273 return 1;
274
275 spin_lock_irqsave(&pgd_lock, flags);
276 /*
277 * Check for races, another CPU might have split this page
278 * up already:
279 */
280 tmp = lookup_address(address, &level);
281 if (tmp != kpte)
282 goto out_unlock;
283
284 switch (level) {
285 case PG_LEVEL_2M:
286 psize = PMD_PAGE_SIZE;
287 pmask = PMD_PAGE_MASK;
288 break;
289#ifdef CONFIG_X86_64
290 case PG_LEVEL_1G:
291 psize = PMD_PAGE_SIZE;
292 pmask = PMD_PAGE_MASK;
293 break;
294#endif
295 default:
296 do_split = -EINVAL;
297 goto out_unlock;
298 }
299
300 /*
301 * Calculate the number of pages, which fit into this large
302 * page starting at address:
303 */
304 nextpage_addr = (address + psize) & pmask;
305 numpages = (nextpage_addr - address) >> PAGE_SHIFT;
306 if (numpages < cpa->numpages)
307 cpa->numpages = numpages;
308
309 /*
310 * We are safe now. Check whether the new pgprot is the same:
311 */
312 old_pte = *kpte;
313 old_prot = new_prot = pte_pgprot(old_pte);
314
315 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
316 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
317 new_prot = static_protections(new_prot, address);
318
319 /*
320 * If there are no changes, return. maxpages has been updated
321 * above:
322 */
323 if (pgprot_val(new_prot) == pgprot_val(old_prot)) {
324 do_split = 0;
325 goto out_unlock;
326 }
327
328 /*
329 * We need to change the attributes. Check, whether we can
330 * change the large page in one go. We request a split, when
331 * the address is not aligned and the number of pages is
332 * smaller than the number of pages in the large page. Note
333 * that we limited the number of possible pages already to
334 * the number of pages in the large page.
335 */
336 if (address == (nextpage_addr - psize) && cpa->numpages == numpages) {
337 /*
338 * The address is aligned and the number of pages
339 * covers the full page.
340 */
341 new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
342 __set_pmd_pte(kpte, address, new_pte);
343 cpa->flushtlb = 1;
344 do_split = 0;
345 }
346
347out_unlock:
348 spin_unlock_irqrestore(&pgd_lock, flags);
349
350 return do_split;
351}
352
192static int split_large_page(pte_t *kpte, unsigned long address) 353static int split_large_page(pte_t *kpte, unsigned long address)
193{ 354{
194 pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte)); 355 unsigned long flags, pfn, pfninc = 1;
195 gfp_t gfp_flags = GFP_KERNEL; 356 gfp_t gfp_flags = GFP_KERNEL;
196 unsigned long flags; 357 unsigned int i, level;
197 unsigned long addr;
198 pte_t *pbase, *tmp; 358 pte_t *pbase, *tmp;
359 pgprot_t ref_prot;
199 struct page *base; 360 struct page *base;
200 unsigned int i, level;
201 361
202#ifdef CONFIG_DEBUG_PAGEALLOC 362#ifdef CONFIG_DEBUG_PAGEALLOC
203 gfp_flags = __GFP_HIGH | __GFP_NOFAIL | __GFP_NOWARN;
204 gfp_flags = GFP_ATOMIC | __GFP_NOWARN; 363 gfp_flags = GFP_ATOMIC | __GFP_NOWARN;
205#endif 364#endif
206 base = alloc_pages(gfp_flags, 0); 365 base = alloc_pages(gfp_flags, 0);
@@ -213,30 +372,41 @@ static int split_large_page(pte_t *kpte, unsigned long address)
213 * up for us already: 372 * up for us already:
214 */ 373 */
215 tmp = lookup_address(address, &level); 374 tmp = lookup_address(address, &level);
216 if (tmp != kpte) { 375 if (tmp != kpte)
217 WARN_ON_ONCE(1);
218 goto out_unlock; 376 goto out_unlock;
219 }
220 377
221 address = __pa(address);
222 addr = address & LARGE_PAGE_MASK;
223 pbase = (pte_t *)page_address(base); 378 pbase = (pte_t *)page_address(base);
224#ifdef CONFIG_X86_32 379#ifdef CONFIG_X86_32
225 paravirt_alloc_pt(&init_mm, page_to_pfn(base)); 380 paravirt_alloc_pt(&init_mm, page_to_pfn(base));
226#endif 381#endif
382 ref_prot = pte_pgprot(pte_clrhuge(*kpte));
383
384#ifdef CONFIG_X86_64
385 if (level == PG_LEVEL_1G) {
386 pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
387 pgprot_val(ref_prot) |= _PAGE_PSE;
388 }
389#endif
227 390
228 pgprot_val(ref_prot) &= ~_PAGE_NX; 391 /*
229 for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) 392 * Get the target pfn from the original entry:
230 set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot)); 393 */
394 pfn = pte_pfn(*kpte);
395 for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
396 set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
231 397
232 /* 398 /*
233 * Install the new, split up pagetable. Important detail here: 399 * Install the new, split up pagetable. Important details here:
234 * 400 *
235 * On Intel the NX bit of all levels must be cleared to make a 401 * On Intel the NX bit of all levels must be cleared to make a
236 * page executable. See section 4.13.2 of Intel 64 and IA-32 402 * page executable. See section 4.13.2 of Intel 64 and IA-32
237 * Architectures Software Developer's Manual). 403 * Architectures Software Developer's Manual).
404 *
405 * Mark the entry present. The current mapping might be
406 * set to not present, which we preserved above.
238 */ 407 */
239 ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte))); 408 ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte)));
409 pgprot_val(ref_prot) |= _PAGE_PRESENT;
240 __set_pmd_pte(kpte, address, mk_pte(base, ref_prot)); 410 __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
241 base = NULL; 411 base = NULL;
242 412
@@ -249,18 +419,12 @@ out_unlock:
249 return 0; 419 return 0;
250} 420}
251 421
252static int 422static int __change_page_attr(unsigned long address, struct cpa_data *cpa)
253__change_page_attr(unsigned long address, unsigned long pfn,
254 pgprot_t mask_set, pgprot_t mask_clr)
255{ 423{
424 int level, do_split, err;
256 struct page *kpte_page; 425 struct page *kpte_page;
257 int level, err = 0;
258 pte_t *kpte; 426 pte_t *kpte;
259 427
260#ifdef CONFIG_X86_32
261 BUG_ON(pfn > max_low_pfn);
262#endif
263
264repeat: 428repeat:
265 kpte = lookup_address(address, &level); 429 kpte = lookup_address(address, &level);
266 if (!kpte) 430 if (!kpte)
@@ -271,23 +435,62 @@ repeat:
271 BUG_ON(PageCompound(kpte_page)); 435 BUG_ON(PageCompound(kpte_page));
272 436
273 if (level == PG_LEVEL_4K) { 437 if (level == PG_LEVEL_4K) {
274 pgprot_t new_prot = pte_pgprot(*kpte);
275 pte_t new_pte, old_pte = *kpte; 438 pte_t new_pte, old_pte = *kpte;
439 pgprot_t new_prot = pte_pgprot(old_pte);
440
441 if(!pte_val(old_pte)) {
442 printk(KERN_WARNING "CPA: called for zero pte. "
443 "vaddr = %lx cpa->vaddr = %lx\n", address,
444 cpa->vaddr);
445 WARN_ON(1);
446 return -EINVAL;
447 }
276 448
277 pgprot_val(new_prot) &= ~pgprot_val(mask_clr); 449 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
278 pgprot_val(new_prot) |= pgprot_val(mask_set); 450 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
279 451
280 new_prot = static_protections(new_prot, address); 452 new_prot = static_protections(new_prot, address);
281 453
282 new_pte = pfn_pte(pfn, canon_pgprot(new_prot)); 454 /*
283 BUG_ON(pte_pfn(new_pte) != pte_pfn(old_pte)); 455 * We need to keep the pfn from the existing PTE,
456 * after all we're only going to change it's attributes
457 * not the memory it points to
458 */
459 new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
460
461 /*
462 * Do we really change anything ?
463 */
464 if (pte_val(old_pte) != pte_val(new_pte)) {
465 set_pte_atomic(kpte, new_pte);
466 cpa->flushtlb = 1;
467 }
468 cpa->numpages = 1;
469 return 0;
470 }
471
472 /*
473 * Check, whether we can keep the large page intact
474 * and just change the pte:
475 */
476 do_split = try_preserve_large_page(kpte, address, cpa);
477 /*
478 * When the range fits into the existing large page,
479 * return. cp->numpages and cpa->tlbflush have been updated in
480 * try_large_page:
481 */
482 if (do_split <= 0)
483 return do_split;
284 484
285 set_pte_atomic(kpte, new_pte); 485 /*
286 } else { 486 * We have to split the large page:
287 err = split_large_page(kpte, address); 487 */
288 if (!err) 488 err = split_large_page(kpte, address);
289 goto repeat; 489 if (!err) {
490 cpa->flushtlb = 1;
491 goto repeat;
290 } 492 }
493
291 return err; 494 return err;
292} 495}
293 496
@@ -304,19 +507,14 @@ repeat:
304 * 507 *
305 * Modules and drivers should use the set_memory_* APIs instead. 508 * Modules and drivers should use the set_memory_* APIs instead.
306 */ 509 */
307 510static int change_page_attr_addr(struct cpa_data *cpa)
308#define HIGH_MAP_START __START_KERNEL_map
309#define HIGH_MAP_END (__START_KERNEL_map + KERNEL_TEXT_SIZE)
310
311static int
312change_page_attr_addr(unsigned long address, pgprot_t mask_set,
313 pgprot_t mask_clr)
314{ 511{
315 unsigned long phys_addr = __pa(address);
316 unsigned long pfn = phys_addr >> PAGE_SHIFT;
317 int err; 512 int err;
513 unsigned long address = cpa->vaddr;
318 514
319#ifdef CONFIG_X86_64 515#ifdef CONFIG_X86_64
516 unsigned long phys_addr = __pa(address);
517
320 /* 518 /*
321 * If we are inside the high mapped kernel range, then we 519 * If we are inside the high mapped kernel range, then we
322 * fixup the low mapping first. __va() returns the virtual 520 * fixup the low mapping first. __va() returns the virtual
@@ -326,7 +524,7 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set,
326 address = (unsigned long) __va(phys_addr); 524 address = (unsigned long) __va(phys_addr);
327#endif 525#endif
328 526
329 err = __change_page_attr(address, pfn, mask_set, mask_clr); 527 err = __change_page_attr(address, cpa);
330 if (err) 528 if (err)
331 return err; 529 return err;
332 530
@@ -339,42 +537,89 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set,
339 /* 537 /*
340 * Calc the high mapping address. See __phys_addr() 538 * Calc the high mapping address. See __phys_addr()
341 * for the non obvious details. 539 * for the non obvious details.
540 *
541 * Note that NX and other required permissions are
542 * checked in static_protections().
342 */ 543 */
343 address = phys_addr + HIGH_MAP_START - phys_base; 544 address = phys_addr + HIGH_MAP_START - phys_base;
344 /* Make sure the kernel mappings stay executable */
345 pgprot_val(mask_clr) |= _PAGE_NX;
346 545
347 /* 546 /*
348 * Our high aliases are imprecise, because we check 547 * Our high aliases are imprecise, because we check
349 * everything between 0 and KERNEL_TEXT_SIZE, so do 548 * everything between 0 and KERNEL_TEXT_SIZE, so do
350 * not propagate lookup failures back to users: 549 * not propagate lookup failures back to users:
351 */ 550 */
352 __change_page_attr(address, pfn, mask_set, mask_clr); 551 __change_page_attr(address, cpa);
353 } 552 }
354#endif 553#endif
355 return err; 554 return err;
356} 555}
357 556
358static int __change_page_attr_set_clr(unsigned long addr, int numpages, 557static int __change_page_attr_set_clr(struct cpa_data *cpa)
359 pgprot_t mask_set, pgprot_t mask_clr)
360{ 558{
361 unsigned int i; 559 int ret, numpages = cpa->numpages;
362 int ret;
363 560
364 for (i = 0; i < numpages ; i++, addr += PAGE_SIZE) { 561 while (numpages) {
365 ret = change_page_attr_addr(addr, mask_set, mask_clr); 562 /*
563 * Store the remaining nr of pages for the large page
564 * preservation check.
565 */
566 cpa->numpages = numpages;
567 ret = change_page_attr_addr(cpa);
366 if (ret) 568 if (ret)
367 return ret; 569 return ret;
368 }
369 570
571 /*
572 * Adjust the number of pages with the result of the
573 * CPA operation. Either a large page has been
574 * preserved or a single page update happened.
575 */
576 BUG_ON(cpa->numpages > numpages);
577 numpages -= cpa->numpages;
578 cpa->vaddr += cpa->numpages * PAGE_SIZE;
579 }
370 return 0; 580 return 0;
371} 581}
372 582
583static inline int cache_attr(pgprot_t attr)
584{
585 return pgprot_val(attr) &
586 (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
587}
588
373static int change_page_attr_set_clr(unsigned long addr, int numpages, 589static int change_page_attr_set_clr(unsigned long addr, int numpages,
374 pgprot_t mask_set, pgprot_t mask_clr) 590 pgprot_t mask_set, pgprot_t mask_clr)
375{ 591{
376 int ret = __change_page_attr_set_clr(addr, numpages, mask_set, 592 struct cpa_data cpa;
377 mask_clr); 593 int ret, cache;
594
595 /*
596 * Check, if we are requested to change a not supported
597 * feature:
598 */
599 mask_set = canon_pgprot(mask_set);
600 mask_clr = canon_pgprot(mask_clr);
601 if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
602 return 0;
603
604 cpa.vaddr = addr;
605 cpa.numpages = numpages;
606 cpa.mask_set = mask_set;
607 cpa.mask_clr = mask_clr;
608 cpa.flushtlb = 0;
609
610 ret = __change_page_attr_set_clr(&cpa);
611
612 /*
613 * Check whether we really changed something:
614 */
615 if (!cpa.flushtlb)
616 return ret;
617
618 /*
619 * No need to flush, when we did not set any of the caching
620 * attributes:
621 */
622 cache = cache_attr(mask_set);
378 623
379 /* 624 /*
380 * On success we use clflush, when the CPU supports it to 625 * On success we use clflush, when the CPU supports it to
@@ -383,9 +628,9 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
383 * wbindv): 628 * wbindv):
384 */ 629 */
385 if (!ret && cpu_has_clflush) 630 if (!ret && cpu_has_clflush)
386 cpa_flush_range(addr, numpages); 631 cpa_flush_range(addr, numpages, cache);
387 else 632 else
388 cpa_flush_all(); 633 cpa_flush_all(cache);
389 634
390 return ret; 635 return ret;
391} 636}
@@ -489,37 +734,26 @@ int set_pages_rw(struct page *page, int numpages)
489 return set_memory_rw(addr, numpages); 734 return set_memory_rw(addr, numpages);
490} 735}
491 736
492
493#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_CPA_DEBUG)
494static inline int __change_page_attr_set(unsigned long addr, int numpages,
495 pgprot_t mask)
496{
497 return __change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
498}
499
500static inline int __change_page_attr_clear(unsigned long addr, int numpages,
501 pgprot_t mask)
502{
503 return __change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
504}
505#endif
506
507#ifdef CONFIG_DEBUG_PAGEALLOC 737#ifdef CONFIG_DEBUG_PAGEALLOC
508 738
509static int __set_pages_p(struct page *page, int numpages) 739static int __set_pages_p(struct page *page, int numpages)
510{ 740{
511 unsigned long addr = (unsigned long)page_address(page); 741 struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
742 .numpages = numpages,
743 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
744 .mask_clr = __pgprot(0)};
512 745
513 return __change_page_attr_set(addr, numpages, 746 return __change_page_attr_set_clr(&cpa);
514 __pgprot(_PAGE_PRESENT | _PAGE_RW));
515} 747}
516 748
517static int __set_pages_np(struct page *page, int numpages) 749static int __set_pages_np(struct page *page, int numpages)
518{ 750{
519 unsigned long addr = (unsigned long)page_address(page); 751 struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
752 .numpages = numpages,
753 .mask_set = __pgprot(0),
754 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
520 755
521 return __change_page_attr_clear(addr, numpages, 756 return __change_page_attr_set_clr(&cpa);
522 __pgprot(_PAGE_PRESENT));
523} 757}
524 758
525void kernel_map_pages(struct page *page, int numpages, int enable) 759void kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index cb3aa470249b..c7db504be1ea 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -219,50 +219,39 @@ static inline void pgd_list_del(pgd_t *pgd)
219 list_del(&page->lru); 219 list_del(&page->lru);
220} 220}
221 221
222#define UNSHARED_PTRS_PER_PGD \
223 (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
222 224
223 225static void pgd_ctor(void *p)
224#if (PTRS_PER_PMD == 1)
225/* Non-PAE pgd constructor */
226static void pgd_ctor(void *pgd)
227{ 226{
227 pgd_t *pgd = p;
228 unsigned long flags; 228 unsigned long flags;
229 229
230 /* !PAE, no pagetable sharing */ 230 /* Clear usermode parts of PGD */
231 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); 231 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
232 232
233 spin_lock_irqsave(&pgd_lock, flags); 233 spin_lock_irqsave(&pgd_lock, flags);
234 234
235 /* must happen under lock */ 235 /* If the pgd points to a shared pagetable level (either the
236 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, 236 ptes in non-PAE, or shared PMD in PAE), then just copy the
237 swapper_pg_dir + USER_PTRS_PER_PGD, 237 references from swapper_pg_dir. */
238 KERNEL_PGD_PTRS); 238 if (PAGETABLE_LEVELS == 2 ||
239 paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, 239 (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
240 __pa(swapper_pg_dir) >> PAGE_SHIFT, 240 clone_pgd_range(pgd + USER_PTRS_PER_PGD,
241 USER_PTRS_PER_PGD,
242 KERNEL_PGD_PTRS);
243 pgd_list_add(pgd);
244 spin_unlock_irqrestore(&pgd_lock, flags);
245}
246#else /* PTRS_PER_PMD > 1 */
247/* PAE pgd constructor */
248static void pgd_ctor(void *pgd)
249{
250 /* PAE, kernel PMD may be shared */
251
252 if (SHARED_KERNEL_PMD) {
253 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
254 swapper_pg_dir + USER_PTRS_PER_PGD, 241 swapper_pg_dir + USER_PTRS_PER_PGD,
255 KERNEL_PGD_PTRS); 242 KERNEL_PGD_PTRS);
256 } else { 243 paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
257 unsigned long flags; 244 __pa(swapper_pg_dir) >> PAGE_SHIFT,
245 USER_PTRS_PER_PGD,
246 KERNEL_PGD_PTRS);
247 }
258 248
259 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); 249 /* list required to sync kernel mapping updates */
260 spin_lock_irqsave(&pgd_lock, flags); 250 if (!SHARED_KERNEL_PMD)
261 pgd_list_add(pgd); 251 pgd_list_add(pgd);
262 spin_unlock_irqrestore(&pgd_lock, flags); 252
263 } 253 spin_unlock_irqrestore(&pgd_lock, flags);
264} 254}
265#endif /* PTRS_PER_PMD */
266 255
267static void pgd_dtor(void *pgd) 256static void pgd_dtor(void *pgd)
268{ 257{
@@ -276,9 +265,6 @@ static void pgd_dtor(void *pgd)
276 spin_unlock_irqrestore(&pgd_lock, flags); 265 spin_unlock_irqrestore(&pgd_lock, flags);
277} 266}
278 267
279#define UNSHARED_PTRS_PER_PGD \
280 (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
281
282#ifdef CONFIG_X86_PAE 268#ifdef CONFIG_X86_PAE
283/* 269/*
284 * Mop up any pmd pages which may still be attached to the pgd. 270 * Mop up any pmd pages which may still be attached to the pgd.
@@ -387,13 +373,6 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
387 373
388void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) 374void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
389{ 375{
390 /* This is called just after the pmd has been detached from
391 the pgd, which requires a full tlb flush to be recognized
392 by the CPU. Rather than incurring multiple tlb flushes
393 while the address space is being pulled down, make the tlb
394 gathering machinery do a full flush when we're done. */
395 tlb->fullmm = 1;
396
397 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); 376 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
398 tlb_remove_page(tlb, virt_to_page(pmd)); 377 tlb_remove_page(tlb, virt_to_page(pmd));
399} 378}
diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c
index f5f165f69e0c..55270c26237c 100644
--- a/arch/x86/pci/numa.c
+++ b/arch/x86/pci/numa.c
@@ -5,36 +5,62 @@
5#include <linux/pci.h> 5#include <linux/pci.h>
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/nodemask.h> 7#include <linux/nodemask.h>
8#include <mach_apic.h>
8#include "pci.h" 9#include "pci.h"
9 10
11#define XQUAD_PORTIO_BASE 0xfe400000
12#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
13
10#define BUS2QUAD(global) (mp_bus_id_to_node[global]) 14#define BUS2QUAD(global) (mp_bus_id_to_node[global])
11#define BUS2LOCAL(global) (mp_bus_id_to_local[global]) 15#define BUS2LOCAL(global) (mp_bus_id_to_local[global])
12#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) 16#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
13 17
18extern void *xquad_portio; /* Where the IO area was mapped */
19#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
20
14#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \ 21#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \
15 (0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3)) 22 (0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3))
16 23
24static void write_cf8(unsigned bus, unsigned devfn, unsigned reg)
25{
26 unsigned val = PCI_CONF1_MQ_ADDRESS(bus, devfn, reg);
27 if (xquad_portio)
28 writel(val, XQUAD_PORT_ADDR(0xcf8, BUS2QUAD(bus)));
29 else
30 outl(val, 0xCF8);
31}
32
17static int pci_conf1_mq_read(unsigned int seg, unsigned int bus, 33static int pci_conf1_mq_read(unsigned int seg, unsigned int bus,
18 unsigned int devfn, int reg, int len, u32 *value) 34 unsigned int devfn, int reg, int len, u32 *value)
19{ 35{
20 unsigned long flags; 36 unsigned long flags;
37 void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus));
21 38
22 if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) 39 if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255))
23 return -EINVAL; 40 return -EINVAL;
24 41
25 spin_lock_irqsave(&pci_config_lock, flags); 42 spin_lock_irqsave(&pci_config_lock, flags);
26 43
27 outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus)); 44 write_cf8(bus, devfn, reg);
28 45
29 switch (len) { 46 switch (len) {
30 case 1: 47 case 1:
31 *value = inb_quad(0xCFC + (reg & 3), BUS2QUAD(bus)); 48 if (xquad_portio)
49 *value = readb(adr + (reg & 3));
50 else
51 *value = inb(0xCFC + (reg & 3));
32 break; 52 break;
33 case 2: 53 case 2:
34 *value = inw_quad(0xCFC + (reg & 2), BUS2QUAD(bus)); 54 if (xquad_portio)
55 *value = readw(adr + (reg & 2));
56 else
57 *value = inw(0xCFC + (reg & 2));
35 break; 58 break;
36 case 4: 59 case 4:
37 *value = inl_quad(0xCFC, BUS2QUAD(bus)); 60 if (xquad_portio)
61 *value = readl(adr);
62 else
63 *value = inl(0xCFC);
38 break; 64 break;
39 } 65 }
40 66
@@ -47,23 +73,33 @@ static int pci_conf1_mq_write(unsigned int seg, unsigned int bus,
47 unsigned int devfn, int reg, int len, u32 value) 73 unsigned int devfn, int reg, int len, u32 value)
48{ 74{
49 unsigned long flags; 75 unsigned long flags;
76 void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus));
50 77
51 if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) 78 if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255))
52 return -EINVAL; 79 return -EINVAL;
53 80
54 spin_lock_irqsave(&pci_config_lock, flags); 81 spin_lock_irqsave(&pci_config_lock, flags);
55 82
56 outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus)); 83 write_cf8(bus, devfn, reg);
57 84
58 switch (len) { 85 switch (len) {
59 case 1: 86 case 1:
60 outb_quad((u8)value, 0xCFC + (reg & 3), BUS2QUAD(bus)); 87 if (xquad_portio)
88 writeb(value, adr + (reg & 3));
89 else
90 outb((u8)value, 0xCFC + (reg & 3));
61 break; 91 break;
62 case 2: 92 case 2:
63 outw_quad((u16)value, 0xCFC + (reg & 2), BUS2QUAD(bus)); 93 if (xquad_portio)
94 writew(value, adr + (reg & 2));
95 else
96 outw((u16)value, 0xCFC + (reg & 2));
64 break; 97 break;
65 case 4: 98 case 4:
66 outl_quad((u32)value, 0xCFC, BUS2QUAD(bus)); 99 if (xquad_portio)
100 writel(value, adr + reg);
101 else
102 outl((u32)value, 0xCFC);
67 break; 103 break;
68 } 104 }
69 105