aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig6
-rw-r--r--arch/x86/Kconfig.debug2
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/kernel/Makefile10
-rw-r--r--arch/x86/kernel/acpi/boot.c3
-rw-r--r--arch/x86/kernel/acpi/cstate.c2
-rw-r--r--arch/x86/kernel/asm-offsets_32.c6
-rw-r--r--arch/x86/kernel/cpu/bugs.c8
-rw-r--r--arch/x86/kernel/efi.c16
-rw-r--r--arch/x86/kernel/efi_32.c1
-rw-r--r--arch/x86/kernel/efi_64.c32
-rw-r--r--arch/x86/kernel/entry_32.S2
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/head64.c3
-rw-r--r--arch/x86/kernel/head_32.S2
-rw-r--r--arch/x86/kernel/head_64.S9
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/i8253.c2
-rw-r--r--arch/x86/kernel/i8259_32.c25
-rw-r--r--arch/x86/kernel/io_delay.c1
-rw-r--r--arch/x86/kernel/kprobes.c4
-rw-r--r--arch/x86/kernel/nmi_32.c21
-rw-r--r--arch/x86/kernel/nmi_64.c21
-rw-r--r--arch/x86/kernel/pci-gart_64.c9
-rw-r--r--arch/x86/kernel/ptrace.c4
-rw-r--r--arch/x86/kernel/reboot.c46
-rw-r--r--arch/x86/kernel/setup_32.c1
-rw-r--r--arch/x86/kernel/setup_64.c6
-rw-r--r--arch/x86/kernel/test_rodata.c2
-rw-r--r--arch/x86/kernel/topology.c2
-rw-r--r--arch/x86/kernel/traps_64.c4
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S26
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S30
-rw-r--r--arch/x86/lib/csum-wrappers_64.c147
-rw-r--r--arch/x86/lib/io_64.c18
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/init_32.c1
-rw-r--r--arch/x86/mm/init_64.c28
-rw-r--r--arch/x86/mm/ioremap.c30
-rw-r--r--arch/x86/mm/numa_64.c5
-rw-r--r--arch/x86/mm/pageattr.c253
-rw-r--r--arch/x86/mm/srat_64.c3
-rw-r--r--arch/x86/pci/common.c2
-rw-r--r--arch/x86/pci/irq.c4
-rw-r--r--arch/x86/power/hibernate_asm_64.S5
-rw-r--r--arch/x86/xen/enlighten.c4
46 files changed, 470 insertions, 344 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index aaed1a3b92d6..4a88cf7695b4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -21,6 +21,8 @@ config X86
21 select HAVE_IDE 21 select HAVE_IDE
22 select HAVE_OPROFILE 22 select HAVE_OPROFILE
23 select HAVE_KPROBES 23 select HAVE_KPROBES
24 select HAVE_KVM
25
24 26
25config GENERIC_LOCKBREAK 27config GENERIC_LOCKBREAK
26 def_bool n 28 def_bool n
@@ -119,8 +121,6 @@ config ARCH_HAS_CPU_RELAX
119config HAVE_SETUP_PER_CPU_AREA 121config HAVE_SETUP_PER_CPU_AREA
120 def_bool X86_64 122 def_bool X86_64
121 123
122select HAVE_KVM
123
124config ARCH_HIBERNATION_POSSIBLE 124config ARCH_HIBERNATION_POSSIBLE
125 def_bool y 125 def_bool y
126 depends on !SMP || !X86_VOYAGER 126 depends on !SMP || !X86_VOYAGER
@@ -1054,7 +1054,7 @@ config SECCOMP
1054 1054
1055config CC_STACKPROTECTOR 1055config CC_STACKPROTECTOR
1056 bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" 1056 bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
1057 depends on X86_64 && EXPERIMENTAL 1057 depends on X86_64 && EXPERIMENTAL && BROKEN
1058 help 1058 help
1059 This option turns on the -fstack-protector GCC feature. This 1059 This option turns on the -fstack-protector GCC feature. This
1060 feature puts, at the beginning of critical functions, a canary 1060 feature puts, at the beginning of critical functions, a canary
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 864affc9a7b0..702eb39901ca 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -156,7 +156,7 @@ config IO_DELAY_TYPE_NONE
156 156
157choice 157choice
158 prompt "IO delay type" 158 prompt "IO delay type"
159 default IO_DELAY_0XED 159 default IO_DELAY_0X80
160 160
161config IO_DELAY_0X80 161config IO_DELAY_0X80
162 bool "port 0x80 based port-IO delay [recommended]" 162 bool "port 0x80 based port-IO delay [recommended]"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 204af43535c5..f1e739a43d41 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -229,7 +229,7 @@ zdisk bzdisk: vmlinux
229fdimage fdimage144 fdimage288 isoimage: vmlinux 229fdimage fdimage144 fdimage288 isoimage: vmlinux
230 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@ 230 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
231 231
232install: vdso_install 232install:
233 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install 233 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install
234 234
235PHONY += vdso_install 235PHONY += vdso_install
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 76ec0f8f138a..4eb5ce841106 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -6,7 +6,15 @@ extra-y := head_$(BITS).o init_task.o vmlinux.lds
6extra-$(CONFIG_X86_64) += head64.o 6extra-$(CONFIG_X86_64) += head64.o
7 7
8CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) 8CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
9CFLAGS_vsyscall_64.o := $(PROFILING) -g0 9
10#
11# vsyscalls (which work on the user stack) should have
12# no stack-protector checks:
13#
14nostackp := $(call cc-option, -fno-stack-protector)
15CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
16CFLAGS_hpet.o := $(nostackp)
17CFLAGS_tsc_64.o := $(nostackp)
10 18
11obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o 19obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
12obj-y += traps_$(BITS).o irq_$(BITS).o 20obj-y += traps_$(BITS).o irq_$(BITS).o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 680b7300a489..2cdc9de9371d 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -72,7 +72,8 @@ static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return
72#define PREFIX "ACPI: " 72#define PREFIX "ACPI: "
73 73
74int acpi_noirq; /* skip ACPI IRQ initialization */ 74int acpi_noirq; /* skip ACPI IRQ initialization */
75int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */ 75int acpi_pci_disabled; /* skip ACPI PCI scan and IRQ initialization */
76EXPORT_SYMBOL(acpi_pci_disabled);
76int acpi_ht __initdata = 1; /* enable HT */ 77int acpi_ht __initdata = 1; /* enable HT */
77 78
78int acpi_lapic; 79int acpi_lapic;
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 10b67170b133..8ca3557a6d59 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -126,6 +126,8 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
126 printk(KERN_DEBUG "Monitor-Mwait will be used to enter C-%d " 126 printk(KERN_DEBUG "Monitor-Mwait will be used to enter C-%d "
127 "state\n", cx->type); 127 "state\n", cx->type);
128 } 128 }
129 snprintf(cx->desc, ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x",
130 cx->address);
129 131
130out: 132out:
131 set_cpus_allowed(current, saved_mask); 133 set_cpus_allowed(current, saved_mask);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index afd84463b712..a33d53017997 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -20,10 +20,8 @@
20 20
21#include <xen/interface/xen.h> 21#include <xen/interface/xen.h>
22 22
23#ifdef CONFIG_LGUEST_GUEST
24#include <linux/lguest.h> 23#include <linux/lguest.h>
25#include "../../../drivers/lguest/lg.h" 24#include "../../../drivers/lguest/lg.h"
26#endif
27 25
28#define DEFINE(sym, val) \ 26#define DEFINE(sym, val) \
29 asm volatile("\n->" #sym " %0 " #val : : "i" (val)) 27 asm volatile("\n->" #sym " %0 " #val : : "i" (val))
@@ -134,6 +132,10 @@ void foo(void)
134 BLANK(); 132 BLANK();
135 OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); 133 OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
136 OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir); 134 OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir);
135#endif
136
137#ifdef CONFIG_LGUEST
138 BLANK();
137 OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc); 139 OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc);
138 OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc); 140 OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc);
139 OFFSET(LGUEST_PAGES_host_cr3, lguest_pages, state.host_cr3); 141 OFFSET(LGUEST_PAGES_host_cr3, lguest_pages, state.host_cr3);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 9b95edcfc6ae..027e5c003b16 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -25,14 +25,6 @@ static int __init no_halt(char *s)
25 25
26__setup("no-hlt", no_halt); 26__setup("no-hlt", no_halt);
27 27
28static int __init mca_pentium(char *s)
29{
30 mca_pentium_flag = 1;
31 return 1;
32}
33
34__setup("mca-pentium", mca_pentium);
35
36static int __init no_387(char *s) 28static int __init no_387(char *s)
37{ 29{
38 boot_cpu_data.hard_math = 0; 30 boot_cpu_data.hard_math = 0;
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 32dd62b36ff7..759e02bec070 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -54,7 +54,7 @@ EXPORT_SYMBOL(efi);
54 54
55struct efi_memory_map memmap; 55struct efi_memory_map memmap;
56 56
57struct efi efi_phys __initdata; 57static struct efi efi_phys __initdata;
58static efi_system_table_t efi_systab __initdata; 58static efi_system_table_t efi_systab __initdata;
59 59
60static int __init setup_noefi(char *arg) 60static int __init setup_noefi(char *arg)
@@ -384,9 +384,6 @@ static void __init runtime_code_page_mkexec(void)
384 efi_memory_desc_t *md; 384 efi_memory_desc_t *md;
385 void *p; 385 void *p;
386 386
387 if (!(__supported_pte_mask & _PAGE_NX))
388 return;
389
390 /* Make EFI runtime service code area executable */ 387 /* Make EFI runtime service code area executable */
391 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 388 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
392 md = p; 389 md = p;
@@ -394,7 +391,7 @@ static void __init runtime_code_page_mkexec(void)
394 if (md->type != EFI_RUNTIME_SERVICES_CODE) 391 if (md->type != EFI_RUNTIME_SERVICES_CODE)
395 continue; 392 continue;
396 393
397 set_memory_x(md->virt_addr, md->num_pages << EFI_PAGE_SHIFT); 394 set_memory_x(md->virt_addr, md->num_pages);
398 } 395 }
399} 396}
400 397
@@ -428,9 +425,6 @@ void __init efi_enter_virtual_mode(void)
428 else 425 else
429 va = efi_ioremap(md->phys_addr, size); 426 va = efi_ioremap(md->phys_addr, size);
430 427
431 if (md->attribute & EFI_MEMORY_WB)
432 set_memory_uc(md->virt_addr, size);
433
434 md->virt_addr = (u64) (unsigned long) va; 428 md->virt_addr = (u64) (unsigned long) va;
435 429
436 if (!va) { 430 if (!va) {
@@ -439,6 +433,9 @@ void __init efi_enter_virtual_mode(void)
439 continue; 433 continue;
440 } 434 }
441 435
436 if (!(md->attribute & EFI_MEMORY_WB))
437 set_memory_uc(md->virt_addr, md->num_pages);
438
442 systab = (u64) (unsigned long) efi_phys.systab; 439 systab = (u64) (unsigned long) efi_phys.systab;
443 if (md->phys_addr <= systab && systab < end) { 440 if (md->phys_addr <= systab && systab < end) {
444 systab += md->virt_addr - md->phys_addr; 441 systab += md->virt_addr - md->phys_addr;
@@ -476,7 +473,8 @@ void __init efi_enter_virtual_mode(void)
476 efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; 473 efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
477 efi.reset_system = virt_efi_reset_system; 474 efi.reset_system = virt_efi_reset_system;
478 efi.set_virtual_address_map = virt_efi_set_virtual_address_map; 475 efi.set_virtual_address_map = virt_efi_set_virtual_address_map;
479 runtime_code_page_mkexec(); 476 if (__supported_pte_mask & _PAGE_NX)
477 runtime_code_page_mkexec();
480 early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); 478 early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
481 memmap.map = NULL; 479 memmap.map = NULL;
482} 480}
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
index cb91f985b4a1..5d23d85624d4 100644
--- a/arch/x86/kernel/efi_32.c
+++ b/arch/x86/kernel/efi_32.c
@@ -28,6 +28,7 @@
28#include <asm/page.h> 28#include <asm/page.h>
29#include <asm/pgtable.h> 29#include <asm/pgtable.h>
30#include <asm/tlbflush.h> 30#include <asm/tlbflush.h>
31#include <asm/efi.h>
31 32
32/* 33/*
33 * To make EFI call EFI runtime service in physical addressing mode we need 34 * To make EFI call EFI runtime service in physical addressing mode we need
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 09d5c2330934..d143a1e76b30 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -35,6 +35,7 @@
35#include <asm/tlbflush.h> 35#include <asm/tlbflush.h>
36#include <asm/proto.h> 36#include <asm/proto.h>
37#include <asm/efi.h> 37#include <asm/efi.h>
38#include <asm/cacheflush.h>
38 39
39static pgd_t save_pgd __initdata; 40static pgd_t save_pgd __initdata;
40static unsigned long efi_flags __initdata; 41static unsigned long efi_flags __initdata;
@@ -43,22 +44,15 @@ static void __init early_mapping_set_exec(unsigned long start,
43 unsigned long end, 44 unsigned long end,
44 int executable) 45 int executable)
45{ 46{
46 pte_t *kpte; 47 unsigned long num_pages;
47 unsigned int level; 48
48 49 start &= PMD_MASK;
49 while (start < end) { 50 end = (end + PMD_SIZE - 1) & PMD_MASK;
50 kpte = lookup_address((unsigned long)__va(start), &level); 51 num_pages = (end - start) >> PAGE_SHIFT;
51 BUG_ON(!kpte); 52 if (executable)
52 if (executable) 53 set_memory_x((unsigned long)__va(start), num_pages);
53 set_pte(kpte, pte_mkexec(*kpte)); 54 else
54 else 55 set_memory_nx((unsigned long)__va(start), num_pages);
55 set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \
56 __supported_pte_mask));
57 if (level == PG_LEVEL_4K)
58 start = (start + PAGE_SIZE) & PAGE_MASK;
59 else
60 start = (start + PMD_SIZE) & PMD_MASK;
61 }
62} 56}
63 57
64static void __init early_runtime_code_mapping_set_exec(int executable) 58static void __init early_runtime_code_mapping_set_exec(int executable)
@@ -74,7 +68,7 @@ static void __init early_runtime_code_mapping_set_exec(int executable)
74 md = p; 68 md = p;
75 if (md->type == EFI_RUNTIME_SERVICES_CODE) { 69 if (md->type == EFI_RUNTIME_SERVICES_CODE) {
76 unsigned long end; 70 unsigned long end;
77 end = md->phys_addr + (md->num_pages << PAGE_SHIFT); 71 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
78 early_mapping_set_exec(md->phys_addr, end, executable); 72 early_mapping_set_exec(md->phys_addr, end, executable);
79 } 73 }
80 } 74 }
@@ -84,8 +78,8 @@ void __init efi_call_phys_prelog(void)
84{ 78{
85 unsigned long vaddress; 79 unsigned long vaddress;
86 80
87 local_irq_save(efi_flags);
88 early_runtime_code_mapping_set_exec(1); 81 early_runtime_code_mapping_set_exec(1);
82 local_irq_save(efi_flags);
89 vaddress = (unsigned long)__va(0x0UL); 83 vaddress = (unsigned long)__va(0x0UL);
90 save_pgd = *pgd_offset_k(0x0UL); 84 save_pgd = *pgd_offset_k(0x0UL);
91 set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress)); 85 set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress));
@@ -98,9 +92,9 @@ void __init efi_call_phys_epilog(void)
98 * After the lock is released, the original page table is restored. 92 * After the lock is released, the original page table is restored.
99 */ 93 */
100 set_pgd(pgd_offset_k(0x0UL), save_pgd); 94 set_pgd(pgd_offset_k(0x0UL), save_pgd);
101 early_runtime_code_mapping_set_exec(0);
102 __flush_tlb_all(); 95 __flush_tlb_all();
103 local_irq_restore(efi_flags); 96 local_irq_restore(efi_flags);
97 early_runtime_code_mapping_set_exec(0);
104} 98}
105 99
106void __init efi_reserve_bootmem(void) 100void __init efi_reserve_bootmem(void)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 824e21b80aad..4b87c32b639f 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -409,7 +409,7 @@ restore_nocheck_notrace:
409 RESTORE_REGS 409 RESTORE_REGS
410 addl $4, %esp # skip orig_eax/error_code 410 addl $4, %esp # skip orig_eax/error_code
411 CFI_ADJUST_CFA_OFFSET -4 411 CFI_ADJUST_CFA_OFFSET -4
412ENTRY(irq_return) 412irq_return:
413 INTERRUPT_RETURN 413 INTERRUPT_RETURN
414.section .fixup,"ax" 414.section .fixup,"ax"
415iret_exc: 415iret_exc:
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 6be39a387c5a..2ad9a1bc6a73 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -583,7 +583,7 @@ retint_restore_args: /* return to kernel space */
583restore_args: 583restore_args:
584 RESTORE_ARGS 0,8,0 584 RESTORE_ARGS 0,8,0
585 585
586ENTRY(irq_return) 586irq_return:
587 INTERRUPT_RETURN 587 INTERRUPT_RETURN
588 588
589 .section __ex_table, "a" 589 .section __ex_table, "a"
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 24dbf56928d7..ad2440832de0 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -88,6 +88,9 @@ void __init x86_64_start_kernel(char * real_mode_data)
88 /* Make NULL pointers segfault */ 88 /* Make NULL pointers segfault */
89 zap_identity_mappings(); 89 zap_identity_mappings();
90 90
91 /* Cleanup the over mapped high alias */
92 cleanup_highmap();
93
91 for (i = 0; i < IDT_ENTRIES; i++) { 94 for (i = 0; i < IDT_ENTRIES; i++) {
92#ifdef CONFIG_EARLY_PRINTK 95#ifdef CONFIG_EARLY_PRINTK
93 set_intr_gate(i, &early_idt_handlers[i]); 96 set_intr_gate(i, &early_idt_handlers[i]);
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 74ef4a41f224..25eb98540a41 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -612,7 +612,7 @@ ENTRY(swapper_pg_pmd)
612ENTRY(swapper_pg_dir) 612ENTRY(swapper_pg_dir)
613 .fill 1024,4,0 613 .fill 1024,4,0
614#endif 614#endif
615ENTRY(swapper_pg_fixmap) 615swapper_pg_fixmap:
616 .fill 1024,4,0 616 .fill 1024,4,0
617ENTRY(empty_zero_page) 617ENTRY(empty_zero_page)
618 .fill 4096,1,0 618 .fill 4096,1,0
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 09b38d539b09..eb415043a929 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -107,8 +107,13 @@ startup_64:
107 movq %rdx, 0(%rbx, %rax, 8) 107 movq %rdx, 0(%rbx, %rax, 8)
108ident_complete: 108ident_complete:
109 109
110 /* Fixup the kernel text+data virtual addresses 110 /*
111 * Fixup the kernel text+data virtual addresses. Note that
112 * we might write invalid pmds, when the kernel is relocated
113 * cleanup_highmap() fixes this up along with the mappings
114 * beyond _end.
111 */ 115 */
116
112 leaq level2_kernel_pgt(%rip), %rdi 117 leaq level2_kernel_pgt(%rip), %rdi
113 leaq 4096(%rdi), %r8 118 leaq 4096(%rdi), %r8
114 /* See if it is a valid page table entry */ 119 /* See if it is a valid page table entry */
@@ -250,7 +255,7 @@ ENTRY(secondary_startup_64)
250 lretq 255 lretq
251 256
252 /* SMP bootup changes these two */ 257 /* SMP bootup changes these two */
253 __CPUINITDATA 258 __REFDATA
254 .align 8 259 .align 8
255 ENTRY(initial_code) 260 ENTRY(initial_code)
256 .quad x86_64_start_kernel 261 .quad x86_64_start_kernel
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 26719bd2c77c..763dfc407232 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -39,7 +39,7 @@
39#define HAVE_HWFP 1 39#define HAVE_HWFP 1
40#endif 40#endif
41 41
42unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; 42static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
43 43
44void mxcsr_feature_mask_init(void) 44void mxcsr_feature_mask_init(void)
45{ 45{
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index ef62b07b2b48..8540abe86ade 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -95,7 +95,7 @@ static int pit_next_event(unsigned long delta, struct clock_event_device *evt)
95 * registered. This mechanism replaces the previous #ifdef LOCAL_APIC - 95 * registered. This mechanism replaces the previous #ifdef LOCAL_APIC -
96 * !using_apic_timer decisions in do_timer_interrupt_hook() 96 * !using_apic_timer decisions in do_timer_interrupt_hook()
97 */ 97 */
98struct clock_event_device pit_clockevent = { 98static struct clock_event_device pit_clockevent = {
99 .name = "pit", 99 .name = "pit",
100 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, 100 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
101 .set_mode = init_pit_timer, 101 .set_mode = init_pit_timer,
diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c
index 2d25b77102fe..fe631967d625 100644
--- a/arch/x86/kernel/i8259_32.c
+++ b/arch/x86/kernel/i8259_32.c
@@ -26,8 +26,6 @@
26 * present in the majority of PC/AT boxes. 26 * present in the majority of PC/AT boxes.
27 * plus some generic x86 specific things if generic specifics makes 27 * plus some generic x86 specific things if generic specifics makes
28 * any sense at all. 28 * any sense at all.
29 * this file should become arch/i386/kernel/irq.c when the old irq.c
30 * moves to arch independent land
31 */ 29 */
32 30
33static int i8259A_auto_eoi; 31static int i8259A_auto_eoi;
@@ -362,23 +360,12 @@ void __init init_ISA_irqs (void)
362#endif 360#endif
363 init_8259A(0); 361 init_8259A(0);
364 362
365 for (i = 0; i < NR_IRQS; i++) { 363 /*
366 irq_desc[i].status = IRQ_DISABLED; 364 * 16 old-style INTA-cycle interrupts:
367 irq_desc[i].action = NULL; 365 */
368 irq_desc[i].depth = 1; 366 for (i = 0; i < 16; i++) {
369 367 set_irq_chip_and_handler_name(i, &i8259A_chip,
370 if (i < 16) { 368 handle_level_irq, "XT");
371 /*
372 * 16 old-style INTA-cycle interrupts:
373 */
374 set_irq_chip_and_handler_name(i, &i8259A_chip,
375 handle_level_irq, "XT");
376 } else {
377 /*
378 * 'high' PCI IRQs filled in on demand
379 */
380 irq_desc[i].chip = &no_irq_chip;
381 }
382 } 369 }
383} 370}
384 371
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index bd49321034db..c706a3061553 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -13,7 +13,6 @@
13#include <asm/io.h> 13#include <asm/io.h>
14 14
15int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE; 15int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE;
16EXPORT_SYMBOL_GPL(io_delay_type);
17 16
18static int __initdata io_delay_override; 17static int __initdata io_delay_override;
19 18
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index a99e764fd66a..34a591283f5d 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -581,7 +581,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
581 * When a retprobed function returns, this code saves registers and 581 * When a retprobed function returns, this code saves registers and
582 * calls trampoline_handler() runs, which calls the kretprobe's handler. 582 * calls trampoline_handler() runs, which calls the kretprobe's handler.
583 */ 583 */
584void __kprobes kretprobe_trampoline_holder(void) 584static void __used __kprobes kretprobe_trampoline_holder(void)
585{ 585{
586 asm volatile ( 586 asm volatile (
587 ".global kretprobe_trampoline\n" 587 ".global kretprobe_trampoline\n"
@@ -673,7 +673,7 @@ void __kprobes kretprobe_trampoline_holder(void)
673/* 673/*
674 * Called from kretprobe_trampoline 674 * Called from kretprobe_trampoline
675 */ 675 */
676void * __kprobes trampoline_handler(struct pt_regs *regs) 676static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
677{ 677{
678 struct kretprobe_instance *ri = NULL; 678 struct kretprobe_instance *ri = NULL;
679 struct hlist_head *head, empty_rp; 679 struct hlist_head *head, empty_rp;
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c
index edd413650b3b..6a0aa7038685 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi_32.c
@@ -46,9 +46,6 @@ static unsigned int nmi_hz = HZ;
46 46
47static DEFINE_PER_CPU(short, wd_enabled); 47static DEFINE_PER_CPU(short, wd_enabled);
48 48
49/* local prototypes */
50static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
51
52static int endflag __initdata = 0; 49static int endflag __initdata = 0;
53 50
54#ifdef CONFIG_SMP 51#ifdef CONFIG_SMP
@@ -391,15 +388,6 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
391 return rc; 388 return rc;
392} 389}
393 390
394int do_nmi_callback(struct pt_regs * regs, int cpu)
395{
396#ifdef CONFIG_SYSCTL
397 if (unknown_nmi_panic)
398 return unknown_nmi_panic_callback(regs, cpu);
399#endif
400 return 0;
401}
402
403#ifdef CONFIG_SYSCTL 391#ifdef CONFIG_SYSCTL
404 392
405static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) 393static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
@@ -453,6 +441,15 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
453 441
454#endif 442#endif
455 443
444int do_nmi_callback(struct pt_regs *regs, int cpu)
445{
446#ifdef CONFIG_SYSCTL
447 if (unknown_nmi_panic)
448 return unknown_nmi_panic_callback(regs, cpu);
449#endif
450 return 0;
451}
452
456void __trigger_all_cpu_backtrace(void) 453void __trigger_all_cpu_backtrace(void)
457{ 454{
458 int i; 455 int i;
diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c
index fb99484d21cf..9a4fde74bee1 100644
--- a/arch/x86/kernel/nmi_64.c
+++ b/arch/x86/kernel/nmi_64.c
@@ -46,9 +46,6 @@ static unsigned int nmi_hz = HZ;
46 46
47static DEFINE_PER_CPU(short, wd_enabled); 47static DEFINE_PER_CPU(short, wd_enabled);
48 48
49/* local prototypes */
50static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
51
52/* Run after command line and cpu_init init, but before all other checks */ 49/* Run after command line and cpu_init init, but before all other checks */
53void nmi_watchdog_default(void) 50void nmi_watchdog_default(void)
54{ 51{
@@ -394,15 +391,6 @@ asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code)
394 nmi_exit(); 391 nmi_exit();
395} 392}
396 393
397int do_nmi_callback(struct pt_regs * regs, int cpu)
398{
399#ifdef CONFIG_SYSCTL
400 if (unknown_nmi_panic)
401 return unknown_nmi_panic_callback(regs, cpu);
402#endif
403 return 0;
404}
405
406void stop_nmi(void) 394void stop_nmi(void)
407{ 395{
408 acpi_nmi_disable(); 396 acpi_nmi_disable();
@@ -464,6 +452,15 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
464 452
465#endif 453#endif
466 454
455int do_nmi_callback(struct pt_regs *regs, int cpu)
456{
457#ifdef CONFIG_SYSCTL
458 if (unknown_nmi_panic)
459 return unknown_nmi_panic_callback(regs, cpu);
460#endif
461 return 0;
462}
463
467void __trigger_all_cpu_backtrace(void) 464void __trigger_all_cpu_backtrace(void)
468{ 465{
469 int i; 466 int i;
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 65f6acb025c8..faf3229f8fb3 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -749,6 +749,15 @@ void __init gart_iommu_init(void)
749 */ 749 */
750 set_memory_np((unsigned long)__va(iommu_bus_base), 750 set_memory_np((unsigned long)__va(iommu_bus_base),
751 iommu_size >> PAGE_SHIFT); 751 iommu_size >> PAGE_SHIFT);
752 /*
753 * Tricky. The GART table remaps the physical memory range,
754 * so the CPU wont notice potential aliases and if the memory
755 * is remapped to UC later on, we might surprise the PCI devices
756 * with a stray writeout of a cacheline. So play it sure and
757 * do an explicit, full-scale wbinvd() _after_ having marked all
758 * the pages as Not-Present:
759 */
760 wbinvd();
752 761
753 /* 762 /*
754 * Try to workaround a bug (thanks to BenH) 763 * Try to workaround a bug (thanks to BenH)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 702c33efea84..d862e396b099 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1160,7 +1160,7 @@ static int genregs32_set(struct task_struct *target,
1160 if (kbuf) { 1160 if (kbuf) {
1161 const compat_ulong_t *k = kbuf; 1161 const compat_ulong_t *k = kbuf;
1162 while (count > 0 && !ret) { 1162 while (count > 0 && !ret) {
1163 ret = putreg(target, pos, *k++); 1163 ret = putreg32(target, pos, *k++);
1164 count -= sizeof(*k); 1164 count -= sizeof(*k);
1165 pos += sizeof(*k); 1165 pos += sizeof(*k);
1166 } 1166 }
@@ -1171,7 +1171,7 @@ static int genregs32_set(struct task_struct *target,
1171 ret = __get_user(word, u++); 1171 ret = __get_user(word, u++);
1172 if (ret) 1172 if (ret)
1173 break; 1173 break;
1174 ret = putreg(target, pos, word); 1174 ret = putreg32(target, pos, word);
1175 count -= sizeof(*u); 1175 count -= sizeof(*u);
1176 pos += sizeof(*u); 1176 pos += sizeof(*u);
1177 } 1177 }
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 5818dc28167d..7fd6ac43e4a1 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -326,7 +326,7 @@ static inline void kb_wait(void)
326 } 326 }
327} 327}
328 328
329void machine_emergency_restart(void) 329static void native_machine_emergency_restart(void)
330{ 330{
331 int i; 331 int i;
332 332
@@ -376,7 +376,7 @@ void machine_emergency_restart(void)
376 } 376 }
377} 377}
378 378
379void machine_shutdown(void) 379static void native_machine_shutdown(void)
380{ 380{
381 /* Stop the cpus and apics */ 381 /* Stop the cpus and apics */
382#ifdef CONFIG_SMP 382#ifdef CONFIG_SMP
@@ -420,7 +420,7 @@ void machine_shutdown(void)
420#endif 420#endif
421} 421}
422 422
423void machine_restart(char *__unused) 423static void native_machine_restart(char *__unused)
424{ 424{
425 printk("machine restart\n"); 425 printk("machine restart\n");
426 426
@@ -429,11 +429,11 @@ void machine_restart(char *__unused)
429 machine_emergency_restart(); 429 machine_emergency_restart();
430} 430}
431 431
432void machine_halt(void) 432static void native_machine_halt(void)
433{ 433{
434} 434}
435 435
436void machine_power_off(void) 436static void native_machine_power_off(void)
437{ 437{
438 if (pm_power_off) { 438 if (pm_power_off) {
439 if (!reboot_force) 439 if (!reboot_force)
@@ -443,9 +443,35 @@ void machine_power_off(void)
443} 443}
444 444
445struct machine_ops machine_ops = { 445struct machine_ops machine_ops = {
446 .power_off = machine_power_off, 446 .power_off = native_machine_power_off,
447 .shutdown = machine_shutdown, 447 .shutdown = native_machine_shutdown,
448 .emergency_restart = machine_emergency_restart, 448 .emergency_restart = native_machine_emergency_restart,
449 .restart = machine_restart, 449 .restart = native_machine_restart,
450 .halt = machine_halt 450 .halt = native_machine_halt
451}; 451};
452
453void machine_power_off(void)
454{
455 machine_ops.power_off();
456}
457
458void machine_shutdown(void)
459{
460 machine_ops.shutdown();
461}
462
463void machine_emergency_restart(void)
464{
465 machine_ops.emergency_restart();
466}
467
468void machine_restart(char *cmd)
469{
470 machine_ops.restart(cmd);
471}
472
473void machine_halt(void)
474{
475 machine_ops.halt();
476}
477
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 691ab4cb167b..a1d7071a51c9 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -164,7 +164,6 @@ unsigned long mmu_cr4_features = X86_CR4_PAE;
164unsigned int machine_id; 164unsigned int machine_id;
165unsigned int machine_submodel_id; 165unsigned int machine_submodel_id;
166unsigned int BIOS_revision; 166unsigned int BIOS_revision;
167unsigned int mca_pentium_flag;
168 167
169/* Boot loader ID as an integer, for the benefit of proc_dointvec */ 168/* Boot loader ID as an integer, for the benefit of proc_dointvec */
170int bootloader_type; 169int bootloader_type;
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index c0d8208af12a..6fd804f07821 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -518,7 +518,7 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
518} 518}
519 519
520#ifdef CONFIG_NUMA 520#ifdef CONFIG_NUMA
521static int nearby_node(int apicid) 521static int __cpuinit nearby_node(int apicid)
522{ 522{
523 int i, node; 523 int i, node;
524 524
@@ -791,7 +791,7 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
791 return 1; 791 return 1;
792} 792}
793 793
794static void srat_detect_node(void) 794static void __cpuinit srat_detect_node(void)
795{ 795{
796#ifdef CONFIG_NUMA 796#ifdef CONFIG_NUMA
797 unsigned node; 797 unsigned node;
@@ -1046,7 +1046,7 @@ __setup("noclflush", setup_noclflush);
1046void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) 1046void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
1047{ 1047{
1048 if (c->x86_model_id[0]) 1048 if (c->x86_model_id[0])
1049 printk(KERN_INFO "%s", c->x86_model_id); 1049 printk(KERN_CONT "%s", c->x86_model_id);
1050 1050
1051 if (c->x86_mask || c->cpuid_level >= 0) 1051 if (c->x86_mask || c->cpuid_level >= 0)
1052 printk(KERN_CONT " stepping %02x\n", c->x86_mask); 1052 printk(KERN_CONT " stepping %02x\n", c->x86_mask);
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c
index 4c163772000e..c29e235792af 100644
--- a/arch/x86/kernel/test_rodata.c
+++ b/arch/x86/kernel/test_rodata.c
@@ -10,8 +10,8 @@
10 * of the License. 10 * of the License.
11 */ 11 */
12#include <linux/module.h> 12#include <linux/module.h>
13#include <asm/cacheflush.h>
13#include <asm/sections.h> 14#include <asm/sections.h>
14extern int rodata_test_data;
15 15
16int rodata_test(void) 16int rodata_test(void)
17{ 17{
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index a40051b71d9b..0fcc95a354f7 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -34,7 +34,7 @@
34static DEFINE_PER_CPU(struct x86_cpu, cpu_devices); 34static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);
35 35
36#ifdef CONFIG_HOTPLUG_CPU 36#ifdef CONFIG_HOTPLUG_CPU
37int arch_register_cpu(int num) 37int __ref arch_register_cpu(int num)
38{ 38{
39 /* 39 /*
40 * CPU0 cannot be offlined due to several 40 * CPU0 cannot be offlined due to several
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index efc66df728b6..045466681911 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -84,7 +84,7 @@ static inline void conditional_sti(struct pt_regs *regs)
84 84
85static inline void preempt_conditional_sti(struct pt_regs *regs) 85static inline void preempt_conditional_sti(struct pt_regs *regs)
86{ 86{
87 preempt_disable(); 87 inc_preempt_count();
88 if (regs->flags & X86_EFLAGS_IF) 88 if (regs->flags & X86_EFLAGS_IF)
89 local_irq_enable(); 89 local_irq_enable();
90} 90}
@@ -95,7 +95,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
95 local_irq_disable(); 95 local_irq_disable();
96 /* Make sure to not schedule here because we could be running 96 /* Make sure to not schedule here because we could be running
97 on an exception stack. */ 97 on an exception stack. */
98 preempt_enable_no_resched(); 98 dec_preempt_count();
99} 99}
100 100
101int kstack_depth_to_print = 12; 101int kstack_depth_to_print = 12;
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index f1148ac8abe3..2ffa9656fe7a 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -38,7 +38,7 @@ SECTIONS
38 38
39 /* read-only */ 39 /* read-only */
40 .text : AT(ADDR(.text) - LOAD_OFFSET) { 40 .text : AT(ADDR(.text) - LOAD_OFFSET) {
41 . = ALIGN(4096); /* not really needed, already page aligned */ 41 . = ALIGN(PAGE_SIZE); /* not really needed, already page aligned */
42 *(.text.page_aligned) 42 *(.text.page_aligned)
43 TEXT_TEXT 43 TEXT_TEXT
44 SCHED_TEXT 44 SCHED_TEXT
@@ -70,21 +70,21 @@ SECTIONS
70 RODATA 70 RODATA
71 71
72 /* writeable */ 72 /* writeable */
73 . = ALIGN(4096); 73 . = ALIGN(PAGE_SIZE);
74 .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ 74 .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
75 DATA_DATA 75 DATA_DATA
76 CONSTRUCTORS 76 CONSTRUCTORS
77 } :data 77 } :data
78 78
79 . = ALIGN(4096); 79 . = ALIGN(PAGE_SIZE);
80 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { 80 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
81 __nosave_begin = .; 81 __nosave_begin = .;
82 *(.data.nosave) 82 *(.data.nosave)
83 . = ALIGN(4096); 83 . = ALIGN(PAGE_SIZE);
84 __nosave_end = .; 84 __nosave_end = .;
85 } 85 }
86 86
87 . = ALIGN(4096); 87 . = ALIGN(PAGE_SIZE);
88 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { 88 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
89 *(.data.page_aligned) 89 *(.data.page_aligned)
90 *(.data.idt) 90 *(.data.idt)
@@ -108,7 +108,7 @@ SECTIONS
108 } 108 }
109 109
110 /* might get freed after init */ 110 /* might get freed after init */
111 . = ALIGN(4096); 111 . = ALIGN(PAGE_SIZE);
112 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { 112 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
113 __smp_locks = .; 113 __smp_locks = .;
114 *(.smp_locks) 114 *(.smp_locks)
@@ -120,10 +120,10 @@ SECTIONS
120 * after boot. Always make sure that ALIGN() directive is present after 120 * after boot. Always make sure that ALIGN() directive is present after
121 * the section which contains __smp_alt_end. 121 * the section which contains __smp_alt_end.
122 */ 122 */
123 . = ALIGN(4096); 123 . = ALIGN(PAGE_SIZE);
124 124
125 /* will be freed after init */ 125 /* will be freed after init */
126 . = ALIGN(4096); /* Init code and data */ 126 . = ALIGN(PAGE_SIZE); /* Init code and data */
127 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { 127 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
128 __init_begin = .; 128 __init_begin = .;
129 _sinittext = .; 129 _sinittext = .;
@@ -174,23 +174,23 @@ SECTIONS
174 EXIT_DATA 174 EXIT_DATA
175 } 175 }
176#if defined(CONFIG_BLK_DEV_INITRD) 176#if defined(CONFIG_BLK_DEV_INITRD)
177 . = ALIGN(4096); 177 . = ALIGN(PAGE_SIZE);
178 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { 178 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
179 __initramfs_start = .; 179 __initramfs_start = .;
180 *(.init.ramfs) 180 *(.init.ramfs)
181 __initramfs_end = .; 181 __initramfs_end = .;
182 } 182 }
183#endif 183#endif
184 . = ALIGN(4096); 184 . = ALIGN(PAGE_SIZE);
185 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { 185 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
186 __per_cpu_start = .; 186 __per_cpu_start = .;
187 *(.data.percpu) 187 *(.data.percpu)
188 *(.data.percpu.shared_aligned) 188 *(.data.percpu.shared_aligned)
189 __per_cpu_end = .; 189 __per_cpu_end = .;
190 } 190 }
191 . = ALIGN(4096); 191 . = ALIGN(PAGE_SIZE);
192 /* freed after init ends here */ 192 /* freed after init ends here */
193 193
194 .bss : AT(ADDR(.bss) - LOAD_OFFSET) { 194 .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
195 __init_end = .; 195 __init_end = .;
196 __bss_start = .; /* BSS */ 196 __bss_start = .; /* BSS */
@@ -200,7 +200,7 @@ SECTIONS
200 __bss_stop = .; 200 __bss_stop = .;
201 _end = . ; 201 _end = . ;
202 /* This is where the kernel creates the early boot page tables */ 202 /* This is where the kernel creates the early boot page tables */
203 . = ALIGN(4096); 203 . = ALIGN(PAGE_SIZE);
204 pg0 = . ; 204 pg0 = . ;
205 } 205 }
206 206
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 0992b9946c6f..fab132299735 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -37,7 +37,7 @@ SECTIONS
37 KPROBES_TEXT 37 KPROBES_TEXT
38 *(.fixup) 38 *(.fixup)
39 *(.gnu.warning) 39 *(.gnu.warning)
40 _etext = .; /* End of text section */ 40 _etext = .; /* End of text section */
41 } :text = 0x9090 41 } :text = 0x9090
42 42
43 . = ALIGN(16); /* Exception table */ 43 . = ALIGN(16); /* Exception table */
@@ -60,7 +60,7 @@ SECTIONS
60 __tracedata_end = .; 60 __tracedata_end = .;
61 } 61 }
62 62
63 . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ 63 . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */
64 /* Data */ 64 /* Data */
65 .data : AT(ADDR(.data) - LOAD_OFFSET) { 65 .data : AT(ADDR(.data) - LOAD_OFFSET) {
66 DATA_DATA 66 DATA_DATA
@@ -119,7 +119,7 @@ SECTIONS
119 .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) 119 .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3))
120 { *(.vsyscall_3) } 120 { *(.vsyscall_3) }
121 121
122 . = VSYSCALL_VIRT_ADDR + 4096; 122 . = VSYSCALL_VIRT_ADDR + PAGE_SIZE;
123 123
124#undef VSYSCALL_ADDR 124#undef VSYSCALL_ADDR
125#undef VSYSCALL_PHYS_ADDR 125#undef VSYSCALL_PHYS_ADDR
@@ -129,28 +129,28 @@ SECTIONS
129#undef VVIRT_OFFSET 129#undef VVIRT_OFFSET
130#undef VVIRT 130#undef VVIRT
131 131
132 . = ALIGN(8192); /* init_task */ 132 . = ALIGN(THREAD_SIZE); /* init_task */
133 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { 133 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
134 *(.data.init_task) 134 *(.data.init_task)
135 }:data.init 135 }:data.init
136 136
137 . = ALIGN(4096); 137 . = ALIGN(PAGE_SIZE);
138 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { 138 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
139 *(.data.page_aligned) 139 *(.data.page_aligned)
140 } 140 }
141 141
142 /* might get freed after init */ 142 /* might get freed after init */
143 . = ALIGN(4096); 143 . = ALIGN(PAGE_SIZE);
144 __smp_alt_begin = .; 144 __smp_alt_begin = .;
145 __smp_locks = .; 145 __smp_locks = .;
146 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { 146 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
147 *(.smp_locks) 147 *(.smp_locks)
148 } 148 }
149 __smp_locks_end = .; 149 __smp_locks_end = .;
150 . = ALIGN(4096); 150 . = ALIGN(PAGE_SIZE);
151 __smp_alt_end = .; 151 __smp_alt_end = .;
152 152
153 . = ALIGN(4096); /* Init code and data */ 153 . = ALIGN(PAGE_SIZE); /* Init code and data */
154 __init_begin = .; 154 __init_begin = .;
155 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { 155 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
156 _sinittext = .; 156 _sinittext = .;
@@ -191,7 +191,7 @@ SECTIONS
191 .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { 191 .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
192 *(.altinstructions) 192 *(.altinstructions)
193 } 193 }
194 __alt_instructions_end = .; 194 __alt_instructions_end = .;
195 .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { 195 .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
196 *(.altinstr_replacement) 196 *(.altinstr_replacement)
197 } 197 }
@@ -207,25 +207,25 @@ SECTIONS
207/* vdso blob that is mapped into user space */ 207/* vdso blob that is mapped into user space */
208 vdso_start = . ; 208 vdso_start = . ;
209 .vdso : AT(ADDR(.vdso) - LOAD_OFFSET) { *(.vdso) } 209 .vdso : AT(ADDR(.vdso) - LOAD_OFFSET) { *(.vdso) }
210 . = ALIGN(4096); 210 . = ALIGN(PAGE_SIZE);
211 vdso_end = .; 211 vdso_end = .;
212 212
213#ifdef CONFIG_BLK_DEV_INITRD 213#ifdef CONFIG_BLK_DEV_INITRD
214 . = ALIGN(4096); 214 . = ALIGN(PAGE_SIZE);
215 __initramfs_start = .; 215 __initramfs_start = .;
216 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) } 216 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) }
217 __initramfs_end = .; 217 __initramfs_end = .;
218#endif 218#endif
219 219
220 PERCPU(4096) 220 PERCPU(PAGE_SIZE)
221 221
222 . = ALIGN(4096); 222 . = ALIGN(PAGE_SIZE);
223 __init_end = .; 223 __init_end = .;
224 224
225 . = ALIGN(4096); 225 . = ALIGN(PAGE_SIZE);
226 __nosave_begin = .; 226 __nosave_begin = .;
227 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } 227 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) }
228 . = ALIGN(4096); 228 . = ALIGN(PAGE_SIZE);
229 __nosave_end = .; 229 __nosave_end = .;
230 230
231 __bss_start = .; /* BSS */ 231 __bss_start = .; /* BSS */
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index fd42a4a095fc..459b58a8a15c 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -1,117 +1,129 @@
1/* Copyright 2002,2003 Andi Kleen, SuSE Labs. 1/*
2 * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
2 * Subject to the GNU Public License v.2 3 * Subject to the GNU Public License v.2
3 * 4 *
4 * Wrappers of assembly checksum functions for x86-64. 5 * Wrappers of assembly checksum functions for x86-64.
5 */ 6 */
6
7#include <asm/checksum.h> 7#include <asm/checksum.h>
8#include <linux/module.h> 8#include <linux/module.h>
9 9
10/** 10/**
11 * csum_partial_copy_from_user - Copy and checksum from user space. 11 * csum_partial_copy_from_user - Copy and checksum from user space.
12 * @src: source address (user space) 12 * @src: source address (user space)
13 * @dst: destination address 13 * @dst: destination address
14 * @len: number of bytes to be copied. 14 * @len: number of bytes to be copied.
15 * @isum: initial sum that is added into the result (32bit unfolded) 15 * @isum: initial sum that is added into the result (32bit unfolded)
16 * @errp: set to -EFAULT for an bad source address. 16 * @errp: set to -EFAULT for an bad source address.
17 * 17 *
18 * Returns an 32bit unfolded checksum of the buffer. 18 * Returns an 32bit unfolded checksum of the buffer.
19 * src and dst are best aligned to 64bits. 19 * src and dst are best aligned to 64bits.
20 */ 20 */
21__wsum 21__wsum
22csum_partial_copy_from_user(const void __user *src, void *dst, 22csum_partial_copy_from_user(const void __user *src, void *dst,
23 int len, __wsum isum, int *errp) 23 int len, __wsum isum, int *errp)
24{ 24{
25 might_sleep(); 25 might_sleep();
26 *errp = 0; 26 *errp = 0;
27 if (likely(access_ok(VERIFY_READ,src, len))) { 27
28 /* Why 6, not 7? To handle odd addresses aligned we 28 if (!likely(access_ok(VERIFY_READ, src, len)))
29 would need to do considerable complications to fix the 29 goto out_err;
30 checksum which is defined as an 16bit accumulator. The 30
31 fix alignment code is primarily for performance 31 /*
32 compatibility with 32bit and that will handle odd 32 * Why 6, not 7? To handle odd addresses aligned we
33 addresses slowly too. */ 33 * would need to do considerable complications to fix the
34 if (unlikely((unsigned long)src & 6)) { 34 * checksum which is defined as an 16bit accumulator. The
35 while (((unsigned long)src & 6) && len >= 2) { 35 * fix alignment code is primarily for performance
36 __u16 val16; 36 * compatibility with 32bit and that will handle odd
37 *errp = __get_user(val16, (const __u16 __user *)src); 37 * addresses slowly too.
38 if (*errp) 38 */
39 return isum; 39 if (unlikely((unsigned long)src & 6)) {
40 *(__u16 *)dst = val16; 40 while (((unsigned long)src & 6) && len >= 2) {
41 isum = (__force __wsum)add32_with_carry( 41 __u16 val16;
42 (__force unsigned)isum, val16); 42
43 src += 2; 43 *errp = __get_user(val16, (const __u16 __user *)src);
44 dst += 2; 44 if (*errp)
45 len -= 2; 45 return isum;
46 } 46
47 *(__u16 *)dst = val16;
48 isum = (__force __wsum)add32_with_carry(
49 (__force unsigned)isum, val16);
50 src += 2;
51 dst += 2;
52 len -= 2;
47 } 53 }
48 isum = csum_partial_copy_generic((__force const void *)src, 54 }
49 dst, len, isum, errp, NULL); 55 isum = csum_partial_copy_generic((__force const void *)src,
50 if (likely(*errp == 0)) 56 dst, len, isum, errp, NULL);
51 return isum; 57 if (unlikely(*errp))
52 } 58 goto out_err;
59
60 return isum;
61
62out_err:
53 *errp = -EFAULT; 63 *errp = -EFAULT;
54 memset(dst,0,len); 64 memset(dst, 0, len);
55 return isum;
56}
57 65
66 return isum;
67}
58EXPORT_SYMBOL(csum_partial_copy_from_user); 68EXPORT_SYMBOL(csum_partial_copy_from_user);
59 69
60/** 70/**
61 * csum_partial_copy_to_user - Copy and checksum to user space. 71 * csum_partial_copy_to_user - Copy and checksum to user space.
62 * @src: source address 72 * @src: source address
63 * @dst: destination address (user space) 73 * @dst: destination address (user space)
64 * @len: number of bytes to be copied. 74 * @len: number of bytes to be copied.
65 * @isum: initial sum that is added into the result (32bit unfolded) 75 * @isum: initial sum that is added into the result (32bit unfolded)
66 * @errp: set to -EFAULT for an bad destination address. 76 * @errp: set to -EFAULT for an bad destination address.
67 * 77 *
68 * Returns an 32bit unfolded checksum of the buffer. 78 * Returns an 32bit unfolded checksum of the buffer.
69 * src and dst are best aligned to 64bits. 79 * src and dst are best aligned to 64bits.
70 */ 80 */
71__wsum 81__wsum
72csum_partial_copy_to_user(const void *src, void __user *dst, 82csum_partial_copy_to_user(const void *src, void __user *dst,
73 int len, __wsum isum, int *errp) 83 int len, __wsum isum, int *errp)
74{ 84{
75 might_sleep(); 85 might_sleep();
86
76 if (unlikely(!access_ok(VERIFY_WRITE, dst, len))) { 87 if (unlikely(!access_ok(VERIFY_WRITE, dst, len))) {
77 *errp = -EFAULT; 88 *errp = -EFAULT;
78 return 0; 89 return 0;
79 } 90 }
80 91
81 if (unlikely((unsigned long)dst & 6)) { 92 if (unlikely((unsigned long)dst & 6)) {
82 while (((unsigned long)dst & 6) && len >= 2) { 93 while (((unsigned long)dst & 6) && len >= 2) {
83 __u16 val16 = *(__u16 *)src; 94 __u16 val16 = *(__u16 *)src;
95
84 isum = (__force __wsum)add32_with_carry( 96 isum = (__force __wsum)add32_with_carry(
85 (__force unsigned)isum, val16); 97 (__force unsigned)isum, val16);
86 *errp = __put_user(val16, (__u16 __user *)dst); 98 *errp = __put_user(val16, (__u16 __user *)dst);
87 if (*errp) 99 if (*errp)
88 return isum; 100 return isum;
89 src += 2; 101 src += 2;
90 dst += 2; 102 dst += 2;
91 len -= 2; 103 len -= 2;
92 } 104 }
93 } 105 }
94 106
95 *errp = 0; 107 *errp = 0;
96 return csum_partial_copy_generic(src, (void __force *)dst,len,isum,NULL,errp); 108 return csum_partial_copy_generic(src, (void __force *)dst,
97} 109 len, isum, NULL, errp);
98 110}
99EXPORT_SYMBOL(csum_partial_copy_to_user); 111EXPORT_SYMBOL(csum_partial_copy_to_user);
100 112
101/** 113/**
102 * csum_partial_copy_nocheck - Copy and checksum. 114 * csum_partial_copy_nocheck - Copy and checksum.
103 * @src: source address 115 * @src: source address
104 * @dst: destination address 116 * @dst: destination address
105 * @len: number of bytes to be copied. 117 * @len: number of bytes to be copied.
106 * @isum: initial sum that is added into the result (32bit unfolded) 118 * @isum: initial sum that is added into the result (32bit unfolded)
107 * 119 *
108 * Returns an 32bit unfolded checksum of the buffer. 120 * Returns an 32bit unfolded checksum of the buffer.
109 */ 121 */
110__wsum 122__wsum
111csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum) 123csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
112{ 124{
113 return csum_partial_copy_generic(src,dst,len,sum,NULL,NULL); 125 return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL);
114} 126}
115EXPORT_SYMBOL(csum_partial_copy_nocheck); 127EXPORT_SYMBOL(csum_partial_copy_nocheck);
116 128
117__sum16 csum_ipv6_magic(const struct in6_addr *saddr, 129__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
@@ -119,17 +131,20 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
119 __u32 len, unsigned short proto, __wsum sum) 131 __u32 len, unsigned short proto, __wsum sum)
120{ 132{
121 __u64 rest, sum64; 133 __u64 rest, sum64;
122 134
123 rest = (__force __u64)htonl(len) + (__force __u64)htons(proto) + 135 rest = (__force __u64)htonl(len) + (__force __u64)htons(proto) +
124 (__force __u64)sum; 136 (__force __u64)sum;
125 asm(" addq (%[saddr]),%[sum]\n"
126 " adcq 8(%[saddr]),%[sum]\n"
127 " adcq (%[daddr]),%[sum]\n"
128 " adcq 8(%[daddr]),%[sum]\n"
129 " adcq $0,%[sum]\n"
130 : [sum] "=r" (sum64)
131 : "[sum]" (rest),[saddr] "r" (saddr), [daddr] "r" (daddr));
132 return csum_fold((__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32));
133}
134 137
138 asm(" addq (%[saddr]),%[sum]\n"
139 " adcq 8(%[saddr]),%[sum]\n"
140 " adcq (%[daddr]),%[sum]\n"
141 " adcq 8(%[daddr]),%[sum]\n"
142 " adcq $0,%[sum]\n"
143
144 : [sum] "=r" (sum64)
145 : "[sum]" (rest), [saddr] "r" (saddr), [daddr] "r" (daddr));
146
147 return csum_fold(
148 (__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32));
149}
135EXPORT_SYMBOL(csum_ipv6_magic); 150EXPORT_SYMBOL(csum_ipv6_magic);
diff --git a/arch/x86/lib/io_64.c b/arch/x86/lib/io_64.c
index 87b4a4e18039..3f1eb59b5f08 100644
--- a/arch/x86/lib/io_64.c
+++ b/arch/x86/lib/io_64.c
@@ -1,23 +1,25 @@
1#include <linux/string.h> 1#include <linux/string.h>
2#include <asm/io.h>
3#include <linux/module.h> 2#include <linux/module.h>
3#include <asm/io.h>
4 4
5void __memcpy_toio(unsigned long dst,const void*src,unsigned len) 5void __memcpy_toio(unsigned long dst, const void *src, unsigned len)
6{ 6{
7 __inline_memcpy((void *) dst,src,len); 7 __inline_memcpy((void *)dst, src, len);
8} 8}
9EXPORT_SYMBOL(__memcpy_toio); 9EXPORT_SYMBOL(__memcpy_toio);
10 10
11void __memcpy_fromio(void *dst,unsigned long src,unsigned len) 11void __memcpy_fromio(void *dst, unsigned long src, unsigned len)
12{ 12{
13 __inline_memcpy(dst,(const void *) src,len); 13 __inline_memcpy(dst, (const void *)src, len);
14} 14}
15EXPORT_SYMBOL(__memcpy_fromio); 15EXPORT_SYMBOL(__memcpy_fromio);
16 16
17void memset_io(volatile void __iomem *a, int b, size_t c) 17void memset_io(volatile void __iomem *a, int b, size_t c)
18{ 18{
19 /* XXX: memset can mangle the IO patterns quite a bit. 19 /*
20 perhaps it would be better to use a dumb one */ 20 * TODO: memset can mangle the IO patterns quite a bit.
21 memset((void *)a,b,c); 21 * perhaps it would be better to use a dumb one:
22 */
23 memset((void *)a, b, c);
22} 24}
23EXPORT_SYMBOL(memset_io); 25EXPORT_SYMBOL(memset_io);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 621afb6343dc..fdc667422df9 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -186,7 +186,7 @@ static int bad_address(void *p)
186} 186}
187#endif 187#endif
188 188
189void dump_pagetable(unsigned long address) 189static void dump_pagetable(unsigned long address)
190{ 190{
191#ifdef CONFIG_X86_32 191#ifdef CONFIG_X86_32
192 __typeof__(pte_val(__pte(0))) page; 192 __typeof__(pte_val(__pte(0))) page;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8106bba41ecb..ee1091a46964 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -47,6 +47,7 @@
47#include <asm/sections.h> 47#include <asm/sections.h>
48#include <asm/paravirt.h> 48#include <asm/paravirt.h>
49#include <asm/setup.h> 49#include <asm/setup.h>
50#include <asm/cacheflush.h>
50 51
51unsigned int __VMALLOC_RESERVE = 128 << 20; 52unsigned int __VMALLOC_RESERVE = 128 << 20;
52 53
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index b59fc238151f..bb652f5a93fb 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -45,6 +45,7 @@
45#include <asm/sections.h> 45#include <asm/sections.h>
46#include <asm/kdebug.h> 46#include <asm/kdebug.h>
47#include <asm/numa.h> 47#include <asm/numa.h>
48#include <asm/cacheflush.h>
48 49
49const struct dma_mapping_ops *dma_ops; 50const struct dma_mapping_ops *dma_ops;
50EXPORT_SYMBOL(dma_ops); 51EXPORT_SYMBOL(dma_ops);
@@ -170,6 +171,33 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
170 __flush_tlb_one(vaddr); 171 __flush_tlb_one(vaddr);
171} 172}
172 173
174/*
175 * The head.S code sets up the kernel high mapping from:
176 * __START_KERNEL_map to __START_KERNEL_map + KERNEL_TEXT_SIZE
177 *
178 * phys_addr holds the negative offset to the kernel, which is added
179 * to the compile time generated pmds. This results in invalid pmds up
180 * to the point where we hit the physaddr 0 mapping.
181 *
182 * We limit the mappings to the region from _text to _end. _end is
183 * rounded up to the 2MB boundary. This catches the invalid pmds as
184 * well, as they are located before _text:
185 */
186void __init cleanup_highmap(void)
187{
188 unsigned long vaddr = __START_KERNEL_map;
189 unsigned long end = round_up((unsigned long)_end, PMD_SIZE) - 1;
190 pmd_t *pmd = level2_kernel_pgt;
191 pmd_t *last_pmd = pmd + PTRS_PER_PMD;
192
193 for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) {
194 if (!pmd_present(*pmd))
195 continue;
196 if (vaddr < (unsigned long) _text || vaddr > end)
197 set_pmd(pmd, __pmd(0));
198 }
199}
200
173/* NOTE: this is meant to be run only at boot */ 201/* NOTE: this is meant to be run only at boot */
174void __init 202void __init
175__set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot) 203__set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index a4897a85268a..882328efc3db 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -42,6 +42,22 @@ int page_is_ram(unsigned long pagenr)
42 unsigned long addr, end; 42 unsigned long addr, end;
43 int i; 43 int i;
44 44
45 /*
46 * A special case is the first 4Kb of memory;
47 * This is a BIOS owned area, not kernel ram, but generally
48 * not listed as such in the E820 table.
49 */
50 if (pagenr == 0)
51 return 0;
52
53 /*
54 * Second special case: Some BIOSen report the PC BIOS
55 * area (640->1Mb) as ram even though it is not.
56 */
57 if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
58 pagenr < (BIOS_END >> PAGE_SHIFT))
59 return 0;
60
45 for (i = 0; i < e820.nr_map; i++) { 61 for (i = 0; i < e820.nr_map; i++) {
46 /* 62 /*
47 * Not usable memory: 63 * Not usable memory:
@@ -51,14 +67,6 @@ int page_is_ram(unsigned long pagenr)
51 addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT; 67 addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
52 end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT; 68 end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
53 69
54 /*
55 * Sanity check: Some BIOSen report areas as RAM that
56 * are not. Notably the 640->1Mb area, which is the
57 * PCI BIOS area.
58 */
59 if (addr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
60 end < (BIOS_END >> PAGE_SHIFT))
61 continue;
62 70
63 if ((pagenr >= addr) && (pagenr < end)) 71 if ((pagenr >= addr) && (pagenr < end))
64 return 1; 72 return 1;
@@ -126,6 +134,8 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
126 return NULL; 134 return NULL;
127 } 135 }
128 136
137 WARN_ON_ONCE(page_is_ram(pfn));
138
129 switch (mode) { 139 switch (mode) {
130 case IOR_MODE_UNCACHED: 140 case IOR_MODE_UNCACHED:
131 default: 141 default:
@@ -265,7 +275,9 @@ static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
265 275
266static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) 276static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
267{ 277{
268 pgd_t *pgd = &swapper_pg_dir[pgd_index(addr)]; 278 /* Don't assume we're using swapper_pg_dir at this point */
279 pgd_t *base = __va(read_cr3());
280 pgd_t *pgd = &base[pgd_index(addr)];
269 pud_t *pud = pud_offset(pgd, addr); 281 pud_t *pud = pud_offset(pgd, addr);
270 pmd_t *pmd = pmd_offset(pud, addr); 282 pmd_t *pmd = pmd_offset(pud, addr);
271 283
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 1aecc658cd7d..59898fb0a4aa 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -494,11 +494,13 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
494 int i; 494 int i;
495 495
496 nodes_clear(node_possible_map); 496 nodes_clear(node_possible_map);
497 nodes_clear(node_online_map);
497 498
498#ifdef CONFIG_NUMA_EMU 499#ifdef CONFIG_NUMA_EMU
499 if (cmdline && !numa_emulation(start_pfn, end_pfn)) 500 if (cmdline && !numa_emulation(start_pfn, end_pfn))
500 return; 501 return;
501 nodes_clear(node_possible_map); 502 nodes_clear(node_possible_map);
503 nodes_clear(node_online_map);
502#endif 504#endif
503 505
504#ifdef CONFIG_ACPI_NUMA 506#ifdef CONFIG_ACPI_NUMA
@@ -506,6 +508,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
506 end_pfn << PAGE_SHIFT)) 508 end_pfn << PAGE_SHIFT))
507 return; 509 return;
508 nodes_clear(node_possible_map); 510 nodes_clear(node_possible_map);
511 nodes_clear(node_online_map);
509#endif 512#endif
510 513
511#ifdef CONFIG_K8_NUMA 514#ifdef CONFIG_K8_NUMA
@@ -513,6 +516,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
513 end_pfn<<PAGE_SHIFT)) 516 end_pfn<<PAGE_SHIFT))
514 return; 517 return;
515 nodes_clear(node_possible_map); 518 nodes_clear(node_possible_map);
519 nodes_clear(node_online_map);
516#endif 520#endif
517 printk(KERN_INFO "%s\n", 521 printk(KERN_INFO "%s\n",
518 numa_off ? "NUMA turned off" : "No NUMA configuration found"); 522 numa_off ? "NUMA turned off" : "No NUMA configuration found");
@@ -524,7 +528,6 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
524 memnode_shift = 63; 528 memnode_shift = 63;
525 memnodemap = memnode.embedded_map; 529 memnodemap = memnode.embedded_map;
526 memnodemap[0] = 0; 530 memnodemap[0] = 0;
527 nodes_clear(node_online_map);
528 node_set_online(0); 531 node_set_online(0);
529 node_set(0, node_possible_map); 532 node_set(0, node_possible_map);
530 for (i = 0; i < NR_CPUS; i++) 533 for (i = 0; i < NR_CPUS; i++)
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 440210a2277d..464d8fc21ce6 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -16,6 +16,7 @@
16#include <asm/sections.h> 16#include <asm/sections.h>
17#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18#include <asm/pgalloc.h> 18#include <asm/pgalloc.h>
19#include <asm/proto.h>
19 20
20/* 21/*
21 * The current flushing context - we pass it instead of 5 arguments: 22 * The current flushing context - we pass it instead of 5 arguments:
@@ -26,8 +27,23 @@ struct cpa_data {
26 pgprot_t mask_clr; 27 pgprot_t mask_clr;
27 int numpages; 28 int numpages;
28 int flushtlb; 29 int flushtlb;
30 unsigned long pfn;
29}; 31};
30 32
33#ifdef CONFIG_X86_64
34
35static inline unsigned long highmap_start_pfn(void)
36{
37 return __pa(_text) >> PAGE_SHIFT;
38}
39
40static inline unsigned long highmap_end_pfn(void)
41{
42 return __pa(round_up((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
43}
44
45#endif
46
31static inline int 47static inline int
32within(unsigned long addr, unsigned long start, unsigned long end) 48within(unsigned long addr, unsigned long start, unsigned long end)
33{ 49{
@@ -123,29 +139,14 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
123 } 139 }
124} 140}
125 141
126#define HIGH_MAP_START __START_KERNEL_map
127#define HIGH_MAP_END (__START_KERNEL_map + KERNEL_TEXT_SIZE)
128
129
130/*
131 * Converts a virtual address to a X86-64 highmap address
132 */
133static unsigned long virt_to_highmap(void *address)
134{
135#ifdef CONFIG_X86_64
136 return __pa((unsigned long)address) + HIGH_MAP_START - phys_base;
137#else
138 return (unsigned long)address;
139#endif
140}
141
142/* 142/*
143 * Certain areas of memory on x86 require very specific protection flags, 143 * Certain areas of memory on x86 require very specific protection flags,
144 * for example the BIOS area or kernel text. Callers don't always get this 144 * for example the BIOS area or kernel text. Callers don't always get this
145 * right (again, ioremap() on BIOS memory is not uncommon) so this function 145 * right (again, ioremap() on BIOS memory is not uncommon) so this function
146 * checks and fixes these known static required protection bits. 146 * checks and fixes these known static required protection bits.
147 */ 147 */
148static inline pgprot_t static_protections(pgprot_t prot, unsigned long address) 148static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
149 unsigned long pfn)
149{ 150{
150 pgprot_t forbidden = __pgprot(0); 151 pgprot_t forbidden = __pgprot(0);
151 152
@@ -153,30 +154,23 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
153 * The BIOS area between 640k and 1Mb needs to be executable for 154 * The BIOS area between 640k and 1Mb needs to be executable for
154 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. 155 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
155 */ 156 */
156 if (within(__pa(address), BIOS_BEGIN, BIOS_END)) 157 if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT))
157 pgprot_val(forbidden) |= _PAGE_NX; 158 pgprot_val(forbidden) |= _PAGE_NX;
158 159
159 /* 160 /*
160 * The kernel text needs to be executable for obvious reasons 161 * The kernel text needs to be executable for obvious reasons
161 * Does not cover __inittext since that is gone later on 162 * Does not cover __inittext since that is gone later on. On
163 * 64bit we do not enforce !NX on the low mapping
162 */ 164 */
163 if (within(address, (unsigned long)_text, (unsigned long)_etext)) 165 if (within(address, (unsigned long)_text, (unsigned long)_etext))
164 pgprot_val(forbidden) |= _PAGE_NX; 166 pgprot_val(forbidden) |= _PAGE_NX;
165 /*
166 * Do the same for the x86-64 high kernel mapping
167 */
168 if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext)))
169 pgprot_val(forbidden) |= _PAGE_NX;
170 167
171 /* The .rodata section needs to be read-only */
172 if (within(address, (unsigned long)__start_rodata,
173 (unsigned long)__end_rodata))
174 pgprot_val(forbidden) |= _PAGE_RW;
175 /* 168 /*
176 * Do the same for the x86-64 high kernel mapping 169 * The .rodata section needs to be read-only. Using the pfn
170 * catches all aliases.
177 */ 171 */
178 if (within(address, virt_to_highmap(__start_rodata), 172 if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT,
179 virt_to_highmap(__end_rodata))) 173 __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
180 pgprot_val(forbidden) |= _PAGE_RW; 174 pgprot_val(forbidden) |= _PAGE_RW;
181 175
182 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); 176 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
@@ -253,7 +247,7 @@ static int
253try_preserve_large_page(pte_t *kpte, unsigned long address, 247try_preserve_large_page(pte_t *kpte, unsigned long address,
254 struct cpa_data *cpa) 248 struct cpa_data *cpa)
255{ 249{
256 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr; 250 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
257 pte_t new_pte, old_pte, *tmp; 251 pte_t new_pte, old_pte, *tmp;
258 pgprot_t old_prot, new_prot; 252 pgprot_t old_prot, new_prot;
259 int i, do_split = 1; 253 int i, do_split = 1;
@@ -275,8 +269,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
275 break; 269 break;
276#ifdef CONFIG_X86_64 270#ifdef CONFIG_X86_64
277 case PG_LEVEL_1G: 271 case PG_LEVEL_1G:
278 psize = PMD_PAGE_SIZE; 272 psize = PUD_PAGE_SIZE;
279 pmask = PMD_PAGE_MASK; 273 pmask = PUD_PAGE_MASK;
280 break; 274 break;
281#endif 275#endif
282 default: 276 default:
@@ -301,7 +295,15 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
301 295
302 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); 296 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
303 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); 297 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
304 new_prot = static_protections(new_prot, address); 298
299 /*
300 * old_pte points to the large page base address. So we need
301 * to add the offset of the virtual address:
302 */
303 pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
304 cpa->pfn = pfn;
305
306 new_prot = static_protections(new_prot, address, pfn);
305 307
306 /* 308 /*
307 * We need to check the full range, whether 309 * We need to check the full range, whether
@@ -309,8 +311,9 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
309 * the pages in the range we try to preserve: 311 * the pages in the range we try to preserve:
310 */ 312 */
311 addr = address + PAGE_SIZE; 313 addr = address + PAGE_SIZE;
312 for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE) { 314 pfn++;
313 pgprot_t chk_prot = static_protections(new_prot, addr); 315 for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) {
316 pgprot_t chk_prot = static_protections(new_prot, addr, pfn);
314 317
315 if (pgprot_val(chk_prot) != pgprot_val(new_prot)) 318 if (pgprot_val(chk_prot) != pgprot_val(new_prot))
316 goto out_unlock; 319 goto out_unlock;
@@ -505,46 +508,46 @@ out_unlock:
505 return 0; 508 return 0;
506} 509}
507 510
508static int __change_page_attr(unsigned long address, struct cpa_data *cpa) 511static int __change_page_attr(struct cpa_data *cpa, int primary)
509{ 512{
513 unsigned long address = cpa->vaddr;
510 int do_split, err; 514 int do_split, err;
511 unsigned int level; 515 unsigned int level;
512 struct page *kpte_page; 516 pte_t *kpte, old_pte;
513 pte_t *kpte;
514 517
515repeat: 518repeat:
516 kpte = lookup_address(address, &level); 519 kpte = lookup_address(address, &level);
517 if (!kpte) 520 if (!kpte)
518 return -EINVAL; 521 return primary ? -EINVAL : 0;
519 522
520 kpte_page = virt_to_page(kpte); 523 old_pte = *kpte;
521 BUG_ON(PageLRU(kpte_page)); 524 if (!pte_val(old_pte)) {
522 BUG_ON(PageCompound(kpte_page)); 525 if (!primary)
526 return 0;
527 printk(KERN_WARNING "CPA: called for zero pte. "
528 "vaddr = %lx cpa->vaddr = %lx\n", address,
529 cpa->vaddr);
530 WARN_ON(1);
531 return -EINVAL;
532 }
523 533
524 if (level == PG_LEVEL_4K) { 534 if (level == PG_LEVEL_4K) {
525 pte_t new_pte, old_pte = *kpte; 535 pte_t new_pte;
526 pgprot_t new_prot = pte_pgprot(old_pte); 536 pgprot_t new_prot = pte_pgprot(old_pte);
527 537 unsigned long pfn = pte_pfn(old_pte);
528 if(!pte_val(old_pte)) {
529 printk(KERN_WARNING "CPA: called for zero pte. "
530 "vaddr = %lx cpa->vaddr = %lx\n", address,
531 cpa->vaddr);
532 WARN_ON(1);
533 return -EINVAL;
534 }
535 538
536 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); 539 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
537 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); 540 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
538 541
539 new_prot = static_protections(new_prot, address); 542 new_prot = static_protections(new_prot, address, pfn);
540 543
541 /* 544 /*
542 * We need to keep the pfn from the existing PTE, 545 * We need to keep the pfn from the existing PTE,
543 * after all we're only going to change it's attributes 546 * after all we're only going to change it's attributes
544 * not the memory it points to 547 * not the memory it points to
545 */ 548 */
546 new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); 549 new_pte = pfn_pte(pfn, canon_pgprot(new_prot));
547 550 cpa->pfn = pfn;
548 /* 551 /*
549 * Do we really change anything ? 552 * Do we really change anything ?
550 */ 553 */
@@ -581,67 +584,59 @@ repeat:
581 return err; 584 return err;
582} 585}
583 586
584/** 587static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
585 * change_page_attr_addr - Change page table attributes in linear mapping 588
586 * @address: Virtual address in linear mapping. 589static int cpa_process_alias(struct cpa_data *cpa)
587 * @prot: New page table attribute (PAGE_*)
588 *
589 * Change page attributes of a page in the direct mapping. This is a variant
590 * of change_page_attr() that also works on memory holes that do not have
591 * mem_map entry (pfn_valid() is false).
592 *
593 * See change_page_attr() documentation for more details.
594 *
595 * Modules and drivers should use the set_memory_* APIs instead.
596 */
597static int change_page_attr_addr(struct cpa_data *cpa)
598{ 590{
599 int err; 591 struct cpa_data alias_cpa;
600 unsigned long address = cpa->vaddr; 592 int ret = 0;
601 593
602#ifdef CONFIG_X86_64 594 if (cpa->pfn > max_pfn_mapped)
603 unsigned long phys_addr = __pa(address); 595 return 0;
604 596
605 /* 597 /*
606 * If we are inside the high mapped kernel range, then we 598 * No need to redo, when the primary call touched the direct
607 * fixup the low mapping first. __va() returns the virtual 599 * mapping already:
608 * address in the linear mapping:
609 */ 600 */
610 if (within(address, HIGH_MAP_START, HIGH_MAP_END)) 601 if (!within(cpa->vaddr, PAGE_OFFSET,
611 address = (unsigned long) __va(phys_addr); 602 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
612#endif
613 603
614 err = __change_page_attr(address, cpa); 604 alias_cpa = *cpa;
615 if (err) 605 alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
616 return err; 606
607 ret = __change_page_attr_set_clr(&alias_cpa, 0);
608 }
617 609
618#ifdef CONFIG_X86_64 610#ifdef CONFIG_X86_64
611 if (ret)
612 return ret;
613 /*
614 * No need to redo, when the primary call touched the high
615 * mapping already:
616 */
617 if (within(cpa->vaddr, (unsigned long) _text, (unsigned long) _end))
618 return 0;
619
619 /* 620 /*
620 * If the physical address is inside the kernel map, we need 621 * If the physical address is inside the kernel map, we need
621 * to touch the high mapped kernel as well: 622 * to touch the high mapped kernel as well:
622 */ 623 */
623 if (within(phys_addr, 0, KERNEL_TEXT_SIZE)) { 624 if (!within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn()))
624 /* 625 return 0;
625 * Calc the high mapping address. See __phys_addr()
626 * for the non obvious details.
627 *
628 * Note that NX and other required permissions are
629 * checked in static_protections().
630 */
631 address = phys_addr + HIGH_MAP_START - phys_base;
632 626
633 /* 627 alias_cpa = *cpa;
634 * Our high aliases are imprecise, because we check 628 alias_cpa.vaddr =
635 * everything between 0 and KERNEL_TEXT_SIZE, so do 629 (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
636 * not propagate lookup failures back to users: 630
637 */ 631 /*
638 __change_page_attr(address, cpa); 632 * The high mapping range is imprecise, so ignore the return value.
639 } 633 */
634 __change_page_attr_set_clr(&alias_cpa, 0);
640#endif 635#endif
641 return err; 636 return ret;
642} 637}
643 638
644static int __change_page_attr_set_clr(struct cpa_data *cpa) 639static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
645{ 640{
646 int ret, numpages = cpa->numpages; 641 int ret, numpages = cpa->numpages;
647 642
@@ -651,10 +646,17 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa)
651 * preservation check. 646 * preservation check.
652 */ 647 */
653 cpa->numpages = numpages; 648 cpa->numpages = numpages;
654 ret = change_page_attr_addr(cpa); 649
650 ret = __change_page_attr(cpa, checkalias);
655 if (ret) 651 if (ret)
656 return ret; 652 return ret;
657 653
654 if (checkalias) {
655 ret = cpa_process_alias(cpa);
656 if (ret)
657 return ret;
658 }
659
658 /* 660 /*
659 * Adjust the number of pages with the result of the 661 * Adjust the number of pages with the result of the
660 * CPA operation. Either a large page has been 662 * CPA operation. Either a large page has been
@@ -677,7 +679,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
677 pgprot_t mask_set, pgprot_t mask_clr) 679 pgprot_t mask_set, pgprot_t mask_clr)
678{ 680{
679 struct cpa_data cpa; 681 struct cpa_data cpa;
680 int ret, cache; 682 int ret, cache, checkalias;
681 683
682 /* 684 /*
683 * Check, if we are requested to change a not supported 685 * Check, if we are requested to change a not supported
@@ -688,13 +690,25 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
688 if (!pgprot_val(mask_set) && !pgprot_val(mask_clr)) 690 if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
689 return 0; 691 return 0;
690 692
693 /* Ensure we are PAGE_SIZE aligned */
694 if (addr & ~PAGE_MASK) {
695 addr &= PAGE_MASK;
696 /*
697 * People should not be passing in unaligned addresses:
698 */
699 WARN_ON_ONCE(1);
700 }
701
691 cpa.vaddr = addr; 702 cpa.vaddr = addr;
692 cpa.numpages = numpages; 703 cpa.numpages = numpages;
693 cpa.mask_set = mask_set; 704 cpa.mask_set = mask_set;
694 cpa.mask_clr = mask_clr; 705 cpa.mask_clr = mask_clr;
695 cpa.flushtlb = 0; 706 cpa.flushtlb = 0;
696 707
697 ret = __change_page_attr_set_clr(&cpa); 708 /* No alias checking for _NX bit modifications */
709 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
710
711 ret = __change_page_attr_set_clr(&cpa, checkalias);
698 712
699 /* 713 /*
700 * Check whether we really changed something: 714 * Check whether we really changed something:
@@ -832,7 +846,7 @@ static int __set_pages_p(struct page *page, int numpages)
832 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), 846 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
833 .mask_clr = __pgprot(0)}; 847 .mask_clr = __pgprot(0)};
834 848
835 return __change_page_attr_set_clr(&cpa); 849 return __change_page_attr_set_clr(&cpa, 1);
836} 850}
837 851
838static int __set_pages_np(struct page *page, int numpages) 852static int __set_pages_np(struct page *page, int numpages)
@@ -842,7 +856,7 @@ static int __set_pages_np(struct page *page, int numpages)
842 .mask_set = __pgprot(0), 856 .mask_set = __pgprot(0),
843 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)}; 857 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
844 858
845 return __change_page_attr_set_clr(&cpa); 859 return __change_page_attr_set_clr(&cpa, 1);
846} 860}
847 861
848void kernel_map_pages(struct page *page, int numpages, int enable) 862void kernel_map_pages(struct page *page, int numpages, int enable)
@@ -861,8 +875,12 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
861 return; 875 return;
862 876
863 /* 877 /*
864 * The return value is ignored - the calls cannot fail, 878 * The return value is ignored as the calls cannot fail.
865 * large pages are disabled at boot time: 879 * Large pages are kept enabled at boot time, and are
880 * split up quickly with DEBUG_PAGEALLOC. If a splitup
881 * fails here (due to temporary memory shortage) no damage
882 * is done because we just keep the largepage intact up
883 * to the next attempt when it will likely be split up:
866 */ 884 */
867 if (enable) 885 if (enable)
868 __set_pages_p(page, numpages); 886 __set_pages_p(page, numpages);
@@ -881,7 +899,24 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
881 */ 899 */
882 cpa_fill_pool(); 900 cpa_fill_pool();
883} 901}
884#endif 902
903#ifdef CONFIG_HIBERNATION
904
905bool kernel_page_present(struct page *page)
906{
907 unsigned int level;
908 pte_t *pte;
909
910 if (PageHighMem(page))
911 return false;
912
913 pte = lookup_address((unsigned long)page_address(page), &level);
914 return (pte_val(*pte) & _PAGE_PRESENT);
915}
916
917#endif /* CONFIG_HIBERNATION */
918
919#endif /* CONFIG_DEBUG_PAGEALLOC */
885 920
886/* 921/*
887 * The testcases use internal knowledge of the implementation that shouldn't 922 * The testcases use internal knowledge of the implementation that shouldn't
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index ecd91ea8a8ae..845001c617cc 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -166,7 +166,8 @@ static inline int save_add_info(void) {return 0;}
166 * Both SPARSE and RESERVE need nodes_add information. 166 * Both SPARSE and RESERVE need nodes_add information.
167 * This code supports one contiguous hot add area per node. 167 * This code supports one contiguous hot add area per node.
168 */ 168 */
169static int reserve_hotadd(int node, unsigned long start, unsigned long end) 169static int __init
170reserve_hotadd(int node, unsigned long start, unsigned long end)
170{ 171{
171 unsigned long s_pfn = start >> PAGE_SHIFT; 172 unsigned long s_pfn = start >> PAGE_SHIFT;
172 unsigned long e_pfn = end >> PAGE_SHIFT; 173 unsigned long e_pfn = end >> PAGE_SHIFT;
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index b7c67a187b6b..7b6e3bb9b28c 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -541,7 +541,7 @@ void pcibios_disable_device (struct pci_dev *dev)
541 pcibios_disable_irq(dev); 541 pcibios_disable_irq(dev);
542} 542}
543 543
544struct pci_bus *pci_scan_bus_with_sysdata(int busno) 544struct pci_bus *__devinit pci_scan_bus_with_sysdata(int busno)
545{ 545{
546 struct pci_bus *bus = NULL; 546 struct pci_bus *bus = NULL;
547 struct pci_sysdata *sd; 547 struct pci_sysdata *sd;
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index ed07ce6c171b..a8715861877e 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -583,6 +583,10 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
583 case PCI_DEVICE_ID_INTEL_ICH9_4: 583 case PCI_DEVICE_ID_INTEL_ICH9_4:
584 case PCI_DEVICE_ID_INTEL_ICH9_5: 584 case PCI_DEVICE_ID_INTEL_ICH9_5:
585 case PCI_DEVICE_ID_INTEL_TOLAPAI_0: 585 case PCI_DEVICE_ID_INTEL_TOLAPAI_0:
586 case PCI_DEVICE_ID_INTEL_ICH10_0:
587 case PCI_DEVICE_ID_INTEL_ICH10_1:
588 case PCI_DEVICE_ID_INTEL_ICH10_2:
589 case PCI_DEVICE_ID_INTEL_ICH10_3:
586 r->name = "PIIX/ICH"; 590 r->name = "PIIX/ICH";
587 r->get = pirq_piix_get; 591 r->get = pirq_piix_get;
588 r->set = pirq_piix_set; 592 r->set = pirq_piix_set;
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index 1deb3244b99b..000415947d93 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -20,6 +20,7 @@
20#include <asm/segment.h> 20#include <asm/segment.h>
21#include <asm/page.h> 21#include <asm/page.h>
22#include <asm/asm-offsets.h> 22#include <asm/asm-offsets.h>
23#include <asm/processor-flags.h>
23 24
24ENTRY(swsusp_arch_suspend) 25ENTRY(swsusp_arch_suspend)
25 movq $saved_context, %rax 26 movq $saved_context, %rax
@@ -60,7 +61,7 @@ ENTRY(restore_image)
60 /* Flush TLB */ 61 /* Flush TLB */
61 movq mmu_cr4_features(%rip), %rax 62 movq mmu_cr4_features(%rip), %rax
62 movq %rax, %rdx 63 movq %rax, %rdx
63 andq $~(1<<7), %rdx # PGE 64 andq $~(X86_CR4_PGE), %rdx
64 movq %rdx, %cr4; # turn off PGE 65 movq %rdx, %cr4; # turn off PGE
65 movq %cr3, %rcx; # flush TLB 66 movq %cr3, %rcx; # flush TLB
66 movq %rcx, %cr3; 67 movq %rcx, %cr3;
@@ -112,7 +113,7 @@ ENTRY(restore_registers)
112 /* Flush TLB, including "global" things (vmalloc) */ 113 /* Flush TLB, including "global" things (vmalloc) */
113 movq mmu_cr4_features(%rip), %rax 114 movq mmu_cr4_features(%rip), %rax
114 movq %rax, %rdx 115 movq %rax, %rdx
115 andq $~(1<<7), %rdx; # PGE 116 andq $~(X86_CR4_PGE), %rdx
116 movq %rdx, %cr4; # turn off PGE 117 movq %rdx, %cr4; # turn off PGE
117 movq %cr3, %rcx; # flush TLB 118 movq %cr3, %rcx; # flush TLB
118 movq %rcx, %cr3 119 movq %rcx, %cr3
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index de647bc6e74d..49e5358f481a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -798,6 +798,10 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
798 * added to the table can be prepared properly for Xen. 798 * added to the table can be prepared properly for Xen.
799 */ 799 */
800 xen_write_cr3(__pa(base)); 800 xen_write_cr3(__pa(base));
801
802 /* Unpin initial Xen pagetable */
803 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
804 PFN_DOWN(__pa(xen_start_info->pt_base)));
801} 805}
802 806
803static __init void xen_pagetable_setup_done(pgd_t *base) 807static __init void xen_pagetable_setup_done(pgd_t *base)