diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-06-17 06:52:15 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-06-17 06:56:49 -0400 |
commit | eadb8a091b27a840de7450f84ecff5ef13476424 (patch) | |
tree | 58c3782d40def63baa8167f3d31e3048cb4c7660 /arch/x86/mm | |
parent | 73874005cd8800440be4299bd095387fff4b90ac (diff) | |
parent | 65795efbd380a832ae508b04dba8f8e53f0b84d9 (diff) |
Merge branch 'linus' into tracing/hw-breakpoints
Conflicts:
arch/x86/Kconfig
arch/x86/kernel/traps.c
arch/x86/power/cpu.c
arch/x86/power/cpu_32.c
kernel/Makefile
Semantic conflict:
arch/x86/kernel/hw_breakpoint.c
Merge reason: Resolve the conflicts, move from put_cpu_no_sched() to
put_cpu() in arch/x86/kernel/hw_breakpoint.c.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/mm/dump_pagetables.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 87 | ||||
-rw-r--r-- | arch/x86/mm/highmem_32.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/hugetlbpage.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 80 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 73 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 51 | ||||
-rw-r--r-- | arch/x86/mm/iomap_32.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/error.c | 228 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/error.h | 15 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/kmemcheck.c | 640 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/opcode.c | 106 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/opcode.h | 9 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/pte.c | 22 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/pte.h | 10 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/selftest.c | 69 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/selftest.h | 6 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/shadow.c | 162 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/shadow.h | 16 | ||||
-rw-r--r-- | arch/x86/mm/memtest.c | 17 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 33 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 29 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 12 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 98 |
26 files changed, 1466 insertions, 316 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index fdd30d08ab52..eefdeee8a871 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -10,6 +10,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o | |||
10 | 10 | ||
11 | obj-$(CONFIG_HIGHMEM) += highmem_32.o | 11 | obj-$(CONFIG_HIGHMEM) += highmem_32.o |
12 | 12 | ||
13 | obj-$(CONFIG_KMEMCHECK) += kmemcheck/ | ||
14 | |||
13 | obj-$(CONFIG_MMIOTRACE) += mmiotrace.o | 15 | obj-$(CONFIG_MMIOTRACE) += mmiotrace.o |
14 | mmiotrace-y := kmmio.o pf_in.o mmio-mod.o | 16 | mmiotrace-y := kmmio.o pf_in.o mmio-mod.o |
15 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o | 17 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o |
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index e7277cbcfb40..a725b7f760ae 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c | |||
@@ -161,13 +161,14 @@ static void note_page(struct seq_file *m, struct pg_state *st, | |||
161 | st->current_address >= st->marker[1].start_address) { | 161 | st->current_address >= st->marker[1].start_address) { |
162 | const char *unit = units; | 162 | const char *unit = units; |
163 | unsigned long delta; | 163 | unsigned long delta; |
164 | int width = sizeof(unsigned long) * 2; | ||
164 | 165 | ||
165 | /* | 166 | /* |
166 | * Now print the actual finished series | 167 | * Now print the actual finished series |
167 | */ | 168 | */ |
168 | seq_printf(m, "0x%p-0x%p ", | 169 | seq_printf(m, "0x%0*lx-0x%0*lx ", |
169 | (void *)st->start_address, | 170 | width, st->start_address, |
170 | (void *)st->current_address); | 171 | width, st->current_address); |
171 | 172 | ||
172 | delta = (st->current_address - st->start_address) >> 10; | 173 | delta = (st->current_address - st->start_address) >> 10; |
173 | while (!(delta & 1023) && unit[1]) { | 174 | while (!(delta & 1023) && unit[1]) { |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a03b7279efa0..baa0e86adfbc 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -3,40 +3,18 @@ | |||
3 | * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. | 3 | * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. |
4 | * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar | 4 | * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar |
5 | */ | 5 | */ |
6 | #include <linux/interrupt.h> | 6 | #include <linux/magic.h> /* STACK_END_MAGIC */ |
7 | #include <linux/mmiotrace.h> | 7 | #include <linux/sched.h> /* test_thread_flag(), ... */ |
8 | #include <linux/bootmem.h> | 8 | #include <linux/kdebug.h> /* oops_begin/end, ... */ |
9 | #include <linux/compiler.h> | 9 | #include <linux/module.h> /* search_exception_table */ |
10 | #include <linux/highmem.h> | 10 | #include <linux/bootmem.h> /* max_low_pfn */ |
11 | #include <linux/kprobes.h> | 11 | #include <linux/kprobes.h> /* __kprobes, ... */ |
12 | #include <linux/uaccess.h> | 12 | #include <linux/mmiotrace.h> /* kmmio_handler, ... */ |
13 | #include <linux/vmalloc.h> | 13 | #include <linux/perf_counter.h> /* perf_swcounter_event */ |
14 | #include <linux/vt_kern.h> | 14 | |
15 | #include <linux/signal.h> | 15 | #include <asm/traps.h> /* dotraplinkage, ... */ |
16 | #include <linux/kernel.h> | 16 | #include <asm/pgalloc.h> /* pgd_*(), ... */ |
17 | #include <linux/ptrace.h> | 17 | #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ |
18 | #include <linux/string.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/kdebug.h> | ||
21 | #include <linux/errno.h> | ||
22 | #include <linux/magic.h> | ||
23 | #include <linux/sched.h> | ||
24 | #include <linux/types.h> | ||
25 | #include <linux/init.h> | ||
26 | #include <linux/mman.h> | ||
27 | #include <linux/tty.h> | ||
28 | #include <linux/smp.h> | ||
29 | #include <linux/mm.h> | ||
30 | |||
31 | #include <asm-generic/sections.h> | ||
32 | |||
33 | #include <asm/tlbflush.h> | ||
34 | #include <asm/pgalloc.h> | ||
35 | #include <asm/segment.h> | ||
36 | #include <asm/system.h> | ||
37 | #include <asm/proto.h> | ||
38 | #include <asm/traps.h> | ||
39 | #include <asm/desc.h> | ||
40 | 18 | ||
41 | /* | 19 | /* |
42 | * Page fault error code bits: | 20 | * Page fault error code bits: |
@@ -225,12 +203,10 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) | |||
225 | if (!pmd_present(*pmd_k)) | 203 | if (!pmd_present(*pmd_k)) |
226 | return NULL; | 204 | return NULL; |
227 | 205 | ||
228 | if (!pmd_present(*pmd)) { | 206 | if (!pmd_present(*pmd)) |
229 | set_pmd(pmd, *pmd_k); | 207 | set_pmd(pmd, *pmd_k); |
230 | arch_flush_lazy_mmu_mode(); | 208 | else |
231 | } else { | ||
232 | BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); | 209 | BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); |
233 | } | ||
234 | 210 | ||
235 | return pmd_k; | 211 | return pmd_k; |
236 | } | 212 | } |
@@ -538,8 +514,6 @@ bad: | |||
538 | static int is_errata93(struct pt_regs *regs, unsigned long address) | 514 | static int is_errata93(struct pt_regs *regs, unsigned long address) |
539 | { | 515 | { |
540 | #ifdef CONFIG_X86_64 | 516 | #ifdef CONFIG_X86_64 |
541 | static int once; | ||
542 | |||
543 | if (address != regs->ip) | 517 | if (address != regs->ip) |
544 | return 0; | 518 | return 0; |
545 | 519 | ||
@@ -549,10 +523,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address) | |||
549 | address |= 0xffffffffUL << 32; | 523 | address |= 0xffffffffUL << 32; |
550 | if ((address >= (u64)_stext && address <= (u64)_etext) || | 524 | if ((address >= (u64)_stext && address <= (u64)_etext) || |
551 | (address >= MODULES_VADDR && address <= MODULES_END)) { | 525 | (address >= MODULES_VADDR && address <= MODULES_END)) { |
552 | if (!once) { | 526 | printk_once(errata93_warning); |
553 | printk(errata93_warning); | ||
554 | once = 1; | ||
555 | } | ||
556 | regs->ip = address; | 527 | regs->ip = address; |
557 | return 1; | 528 | return 1; |
558 | } | 529 | } |
@@ -986,6 +957,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
986 | /* Get the faulting address: */ | 957 | /* Get the faulting address: */ |
987 | address = read_cr2(); | 958 | address = read_cr2(); |
988 | 959 | ||
960 | /* | ||
961 | * Detect and handle instructions that would cause a page fault for | ||
962 | * both a tracked kernel page and a userspace page. | ||
963 | */ | ||
964 | if (kmemcheck_active(regs)) | ||
965 | kmemcheck_hide(regs); | ||
966 | |||
989 | if (unlikely(kmmio_fault(regs, address))) | 967 | if (unlikely(kmmio_fault(regs, address))) |
990 | return; | 968 | return; |
991 | 969 | ||
@@ -1003,9 +981,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1003 | * protection error (error_code & 9) == 0. | 981 | * protection error (error_code & 9) == 0. |
1004 | */ | 982 | */ |
1005 | if (unlikely(fault_in_kernel_space(address))) { | 983 | if (unlikely(fault_in_kernel_space(address))) { |
1006 | if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && | 984 | if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) { |
1007 | vmalloc_fault(address) >= 0) | 985 | if (vmalloc_fault(address) >= 0) |
1008 | return; | 986 | return; |
987 | |||
988 | if (kmemcheck_fault(regs, address, error_code)) | ||
989 | return; | ||
990 | } | ||
1009 | 991 | ||
1010 | /* Can handle a stale RO->RW TLB: */ | 992 | /* Can handle a stale RO->RW TLB: */ |
1011 | if (spurious_fault(error_code, address)) | 993 | if (spurious_fault(error_code, address)) |
@@ -1044,6 +1026,8 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1044 | if (unlikely(error_code & PF_RSVD)) | 1026 | if (unlikely(error_code & PF_RSVD)) |
1045 | pgtable_bad(regs, error_code, address); | 1027 | pgtable_bad(regs, error_code, address); |
1046 | 1028 | ||
1029 | perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); | ||
1030 | |||
1047 | /* | 1031 | /* |
1048 | * If we're in an interrupt, have no user context or are running | 1032 | * If we're in an interrupt, have no user context or are running |
1049 | * in an atomic region then we must not take the fault: | 1033 | * in an atomic region then we must not take the fault: |
@@ -1137,10 +1121,15 @@ good_area: | |||
1137 | return; | 1121 | return; |
1138 | } | 1122 | } |
1139 | 1123 | ||
1140 | if (fault & VM_FAULT_MAJOR) | 1124 | if (fault & VM_FAULT_MAJOR) { |
1141 | tsk->maj_flt++; | 1125 | tsk->maj_flt++; |
1142 | else | 1126 | perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, |
1127 | regs, address); | ||
1128 | } else { | ||
1143 | tsk->min_flt++; | 1129 | tsk->min_flt++; |
1130 | perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, | ||
1131 | regs, address); | ||
1132 | } | ||
1144 | 1133 | ||
1145 | check_v8086_mode(regs, address, tsk); | 1134 | check_v8086_mode(regs, address, tsk); |
1146 | 1135 | ||
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 8126e8d1a2a4..58f621e81919 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c | |||
@@ -44,7 +44,6 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) | |||
44 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | 44 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); |
45 | BUG_ON(!pte_none(*(kmap_pte-idx))); | 45 | BUG_ON(!pte_none(*(kmap_pte-idx))); |
46 | set_pte(kmap_pte-idx, mk_pte(page, prot)); | 46 | set_pte(kmap_pte-idx, mk_pte(page, prot)); |
47 | arch_flush_lazy_mmu_mode(); | ||
48 | 47 | ||
49 | return (void *)vaddr; | 48 | return (void *)vaddr; |
50 | } | 49 | } |
@@ -74,7 +73,6 @@ void kunmap_atomic(void *kvaddr, enum km_type type) | |||
74 | #endif | 73 | #endif |
75 | } | 74 | } |
76 | 75 | ||
77 | arch_flush_lazy_mmu_mode(); | ||
78 | pagefault_enable(); | 76 | pagefault_enable(); |
79 | } | 77 | } |
80 | 78 | ||
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 8f307d914c2e..f46c340727b8 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c | |||
@@ -26,12 +26,16 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma, | |||
26 | unsigned long sbase = saddr & PUD_MASK; | 26 | unsigned long sbase = saddr & PUD_MASK; |
27 | unsigned long s_end = sbase + PUD_SIZE; | 27 | unsigned long s_end = sbase + PUD_SIZE; |
28 | 28 | ||
29 | /* Allow segments to share if only one is marked locked */ | ||
30 | unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED; | ||
31 | unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED; | ||
32 | |||
29 | /* | 33 | /* |
30 | * match the virtual addresses, permission and the alignment of the | 34 | * match the virtual addresses, permission and the alignment of the |
31 | * page table page. | 35 | * page table page. |
32 | */ | 36 | */ |
33 | if (pmd_index(addr) != pmd_index(saddr) || | 37 | if (pmd_index(addr) != pmd_index(saddr) || |
34 | vma->vm_flags != svma->vm_flags || | 38 | vm_flags != svm_flags || |
35 | sbase < svma->vm_start || svma->vm_end < s_end) | 39 | sbase < svma->vm_start || svma->vm_end < s_end) |
36 | return 0; | 40 | return 0; |
37 | 41 | ||
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index ae4f7b5d7104..f53b57e4086f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -1,3 +1,4 @@ | |||
1 | #include <linux/initrd.h> | ||
1 | #include <linux/ioport.h> | 2 | #include <linux/ioport.h> |
2 | #include <linux/swap.h> | 3 | #include <linux/swap.h> |
3 | 4 | ||
@@ -10,6 +11,9 @@ | |||
10 | #include <asm/setup.h> | 11 | #include <asm/setup.h> |
11 | #include <asm/system.h> | 12 | #include <asm/system.h> |
12 | #include <asm/tlbflush.h> | 13 | #include <asm/tlbflush.h> |
14 | #include <asm/tlb.h> | ||
15 | |||
16 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
13 | 17 | ||
14 | unsigned long __initdata e820_table_start; | 18 | unsigned long __initdata e820_table_start; |
15 | unsigned long __meminitdata e820_table_end; | 19 | unsigned long __meminitdata e820_table_end; |
@@ -23,6 +27,69 @@ int direct_gbpages | |||
23 | #endif | 27 | #endif |
24 | ; | 28 | ; |
25 | 29 | ||
30 | int nx_enabled; | ||
31 | |||
32 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | ||
33 | static int disable_nx __cpuinitdata; | ||
34 | |||
35 | /* | ||
36 | * noexec = on|off | ||
37 | * | ||
38 | * Control non-executable mappings for processes. | ||
39 | * | ||
40 | * on Enable | ||
41 | * off Disable | ||
42 | */ | ||
43 | static int __init noexec_setup(char *str) | ||
44 | { | ||
45 | if (!str) | ||
46 | return -EINVAL; | ||
47 | if (!strncmp(str, "on", 2)) { | ||
48 | __supported_pte_mask |= _PAGE_NX; | ||
49 | disable_nx = 0; | ||
50 | } else if (!strncmp(str, "off", 3)) { | ||
51 | disable_nx = 1; | ||
52 | __supported_pte_mask &= ~_PAGE_NX; | ||
53 | } | ||
54 | return 0; | ||
55 | } | ||
56 | early_param("noexec", noexec_setup); | ||
57 | #endif | ||
58 | |||
59 | #ifdef CONFIG_X86_PAE | ||
60 | static void __init set_nx(void) | ||
61 | { | ||
62 | unsigned int v[4], l, h; | ||
63 | |||
64 | if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { | ||
65 | cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); | ||
66 | |||
67 | if ((v[3] & (1 << 20)) && !disable_nx) { | ||
68 | rdmsr(MSR_EFER, l, h); | ||
69 | l |= EFER_NX; | ||
70 | wrmsr(MSR_EFER, l, h); | ||
71 | nx_enabled = 1; | ||
72 | __supported_pte_mask |= _PAGE_NX; | ||
73 | } | ||
74 | } | ||
75 | } | ||
76 | #else | ||
77 | static inline void set_nx(void) | ||
78 | { | ||
79 | } | ||
80 | #endif | ||
81 | |||
82 | #ifdef CONFIG_X86_64 | ||
83 | void __cpuinit check_efer(void) | ||
84 | { | ||
85 | unsigned long efer; | ||
86 | |||
87 | rdmsrl(MSR_EFER, efer); | ||
88 | if (!(efer & EFER_NX) || disable_nx) | ||
89 | __supported_pte_mask &= ~_PAGE_NX; | ||
90 | } | ||
91 | #endif | ||
92 | |||
26 | static void __init find_early_table_space(unsigned long end, int use_pse, | 93 | static void __init find_early_table_space(unsigned long end, int use_pse, |
27 | int use_gbpages) | 94 | int use_gbpages) |
28 | { | 95 | { |
@@ -66,12 +133,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse, | |||
66 | */ | 133 | */ |
67 | #ifdef CONFIG_X86_32 | 134 | #ifdef CONFIG_X86_32 |
68 | start = 0x7000; | 135 | start = 0x7000; |
69 | e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, | 136 | #else |
70 | tables, PAGE_SIZE); | ||
71 | #else /* CONFIG_X86_64 */ | ||
72 | start = 0x8000; | 137 | start = 0x8000; |
73 | e820_table_start = find_e820_area(start, end, tables, PAGE_SIZE); | ||
74 | #endif | 138 | #endif |
139 | e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, | ||
140 | tables, PAGE_SIZE); | ||
75 | if (e820_table_start == -1UL) | 141 | if (e820_table_start == -1UL) |
76 | panic("Cannot find space for the kernel page tables"); | 142 | panic("Cannot find space for the kernel page tables"); |
77 | 143 | ||
@@ -147,7 +213,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
147 | if (!after_bootmem) | 213 | if (!after_bootmem) |
148 | init_gbpages(); | 214 | init_gbpages(); |
149 | 215 | ||
150 | #ifdef CONFIG_DEBUG_PAGEALLOC | 216 | #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) |
151 | /* | 217 | /* |
152 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | 218 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. |
153 | * This will simplify cpa(), which otherwise needs to support splitting | 219 | * This will simplify cpa(), which otherwise needs to support splitting |
@@ -159,12 +225,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
159 | use_gbpages = direct_gbpages; | 225 | use_gbpages = direct_gbpages; |
160 | #endif | 226 | #endif |
161 | 227 | ||
162 | #ifdef CONFIG_X86_32 | ||
163 | #ifdef CONFIG_X86_PAE | ||
164 | set_nx(); | 228 | set_nx(); |
165 | if (nx_enabled) | 229 | if (nx_enabled) |
166 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); | 230 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); |
167 | #endif | ||
168 | 231 | ||
169 | /* Enable PSE if available */ | 232 | /* Enable PSE if available */ |
170 | if (cpu_has_pse) | 233 | if (cpu_has_pse) |
@@ -175,7 +238,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
175 | set_in_cr4(X86_CR4_PGE); | 238 | set_in_cr4(X86_CR4_PGE); |
176 | __supported_pte_mask |= _PAGE_GLOBAL; | 239 | __supported_pte_mask |= _PAGE_GLOBAL; |
177 | } | 240 | } |
178 | #endif | ||
179 | 241 | ||
180 | if (use_gbpages) | 242 | if (use_gbpages) |
181 | page_size_mask |= 1 << PG_LEVEL_1G; | 243 | page_size_mask |= 1 << PG_LEVEL_1G; |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 749559ed80f5..3cd7711bb949 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -49,12 +49,9 @@ | |||
49 | #include <asm/paravirt.h> | 49 | #include <asm/paravirt.h> |
50 | #include <asm/setup.h> | 50 | #include <asm/setup.h> |
51 | #include <asm/cacheflush.h> | 51 | #include <asm/cacheflush.h> |
52 | #include <asm/page_types.h> | ||
52 | #include <asm/init.h> | 53 | #include <asm/init.h> |
53 | 54 | ||
54 | unsigned long max_low_pfn_mapped; | ||
55 | unsigned long max_pfn_mapped; | ||
56 | |||
57 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
58 | unsigned long highstart_pfn, highend_pfn; | 55 | unsigned long highstart_pfn, highend_pfn; |
59 | 56 | ||
60 | static noinline int do_test_wp_bit(void); | 57 | static noinline int do_test_wp_bit(void); |
@@ -114,7 +111,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) | |||
114 | pte_t *page_table = NULL; | 111 | pte_t *page_table = NULL; |
115 | 112 | ||
116 | if (after_bootmem) { | 113 | if (after_bootmem) { |
117 | #ifdef CONFIG_DEBUG_PAGEALLOC | 114 | #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) |
118 | page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); | 115 | page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); |
119 | #endif | 116 | #endif |
120 | if (!page_table) | 117 | if (!page_table) |
@@ -567,7 +564,7 @@ static inline void save_pg_dir(void) | |||
567 | } | 564 | } |
568 | #endif /* !CONFIG_ACPI_SLEEP */ | 565 | #endif /* !CONFIG_ACPI_SLEEP */ |
569 | 566 | ||
570 | void zap_low_mappings(void) | 567 | void zap_low_mappings(bool early) |
571 | { | 568 | { |
572 | int i; | 569 | int i; |
573 | 570 | ||
@@ -584,64 +581,16 @@ void zap_low_mappings(void) | |||
584 | set_pgd(swapper_pg_dir+i, __pgd(0)); | 581 | set_pgd(swapper_pg_dir+i, __pgd(0)); |
585 | #endif | 582 | #endif |
586 | } | 583 | } |
587 | flush_tlb_all(); | ||
588 | } | ||
589 | 584 | ||
590 | int nx_enabled; | 585 | if (early) |
586 | __flush_tlb(); | ||
587 | else | ||
588 | flush_tlb_all(); | ||
589 | } | ||
591 | 590 | ||
592 | pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); | 591 | pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); |
593 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | 592 | EXPORT_SYMBOL_GPL(__supported_pte_mask); |
594 | 593 | ||
595 | #ifdef CONFIG_X86_PAE | ||
596 | |||
597 | static int disable_nx __initdata; | ||
598 | |||
599 | /* | ||
600 | * noexec = on|off | ||
601 | * | ||
602 | * Control non executable mappings. | ||
603 | * | ||
604 | * on Enable | ||
605 | * off Disable | ||
606 | */ | ||
607 | static int __init noexec_setup(char *str) | ||
608 | { | ||
609 | if (!str || !strcmp(str, "on")) { | ||
610 | if (cpu_has_nx) { | ||
611 | __supported_pte_mask |= _PAGE_NX; | ||
612 | disable_nx = 0; | ||
613 | } | ||
614 | } else { | ||
615 | if (!strcmp(str, "off")) { | ||
616 | disable_nx = 1; | ||
617 | __supported_pte_mask &= ~_PAGE_NX; | ||
618 | } else { | ||
619 | return -EINVAL; | ||
620 | } | ||
621 | } | ||
622 | |||
623 | return 0; | ||
624 | } | ||
625 | early_param("noexec", noexec_setup); | ||
626 | |||
627 | void __init set_nx(void) | ||
628 | { | ||
629 | unsigned int v[4], l, h; | ||
630 | |||
631 | if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { | ||
632 | cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); | ||
633 | |||
634 | if ((v[3] & (1 << 20)) && !disable_nx) { | ||
635 | rdmsr(MSR_EFER, l, h); | ||
636 | l |= EFER_NX; | ||
637 | wrmsr(MSR_EFER, l, h); | ||
638 | nx_enabled = 1; | ||
639 | __supported_pte_mask |= _PAGE_NX; | ||
640 | } | ||
641 | } | ||
642 | } | ||
643 | #endif | ||
644 | |||
645 | /* user-defined highmem size */ | 594 | /* user-defined highmem size */ |
646 | static unsigned int highmem_pages = -1; | 595 | static unsigned int highmem_pages = -1; |
647 | 596 | ||
@@ -761,15 +710,15 @@ void __init initmem_init(unsigned long start_pfn, | |||
761 | highstart_pfn = highend_pfn = max_pfn; | 710 | highstart_pfn = highend_pfn = max_pfn; |
762 | if (max_pfn > max_low_pfn) | 711 | if (max_pfn > max_low_pfn) |
763 | highstart_pfn = max_low_pfn; | 712 | highstart_pfn = max_low_pfn; |
764 | memory_present(0, 0, highend_pfn); | ||
765 | e820_register_active_regions(0, 0, highend_pfn); | 713 | e820_register_active_regions(0, 0, highend_pfn); |
714 | sparse_memory_present_with_active_regions(0); | ||
766 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", | 715 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", |
767 | pages_to_mb(highend_pfn - highstart_pfn)); | 716 | pages_to_mb(highend_pfn - highstart_pfn)); |
768 | num_physpages = highend_pfn; | 717 | num_physpages = highend_pfn; |
769 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; | 718 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; |
770 | #else | 719 | #else |
771 | memory_present(0, 0, max_low_pfn); | ||
772 | e820_register_active_regions(0, 0, max_low_pfn); | 720 | e820_register_active_regions(0, 0, max_low_pfn); |
721 | sparse_memory_present_with_active_regions(0); | ||
773 | num_physpages = max_low_pfn; | 722 | num_physpages = max_low_pfn; |
774 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; | 723 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; |
775 | #endif | 724 | #endif |
@@ -1011,7 +960,7 @@ void __init mem_init(void) | |||
1011 | test_wp_bit(); | 960 | test_wp_bit(); |
1012 | 961 | ||
1013 | save_pg_dir(); | 962 | save_pg_dir(); |
1014 | zap_low_mappings(); | 963 | zap_low_mappings(true); |
1015 | } | 964 | } |
1016 | 965 | ||
1017 | #ifdef CONFIG_MEMORY_HOTPLUG | 966 | #ifdef CONFIG_MEMORY_HOTPLUG |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1753e8020df6..9c543290a813 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -50,18 +50,8 @@ | |||
50 | #include <asm/cacheflush.h> | 50 | #include <asm/cacheflush.h> |
51 | #include <asm/init.h> | 51 | #include <asm/init.h> |
52 | 52 | ||
53 | /* | ||
54 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | ||
55 | * The direct mapping extends to max_pfn_mapped, so that we can directly access | ||
56 | * apertures, ACPI and other tables without having to play with fixmaps. | ||
57 | */ | ||
58 | unsigned long max_low_pfn_mapped; | ||
59 | unsigned long max_pfn_mapped; | ||
60 | |||
61 | static unsigned long dma_reserve __initdata; | 53 | static unsigned long dma_reserve __initdata; |
62 | 54 | ||
63 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
64 | |||
65 | static int __init parse_direct_gbpages_off(char *arg) | 55 | static int __init parse_direct_gbpages_off(char *arg) |
66 | { | 56 | { |
67 | direct_gbpages = 0; | 57 | direct_gbpages = 0; |
@@ -85,39 +75,6 @@ early_param("gbpages", parse_direct_gbpages_on); | |||
85 | pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; | 75 | pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; |
86 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | 76 | EXPORT_SYMBOL_GPL(__supported_pte_mask); |
87 | 77 | ||
88 | static int disable_nx __cpuinitdata; | ||
89 | |||
90 | /* | ||
91 | * noexec=on|off | ||
92 | * Control non-executable mappings for 64-bit processes. | ||
93 | * | ||
94 | * on Enable (default) | ||
95 | * off Disable | ||
96 | */ | ||
97 | static int __init nonx_setup(char *str) | ||
98 | { | ||
99 | if (!str) | ||
100 | return -EINVAL; | ||
101 | if (!strncmp(str, "on", 2)) { | ||
102 | __supported_pte_mask |= _PAGE_NX; | ||
103 | disable_nx = 0; | ||
104 | } else if (!strncmp(str, "off", 3)) { | ||
105 | disable_nx = 1; | ||
106 | __supported_pte_mask &= ~_PAGE_NX; | ||
107 | } | ||
108 | return 0; | ||
109 | } | ||
110 | early_param("noexec", nonx_setup); | ||
111 | |||
112 | void __cpuinit check_efer(void) | ||
113 | { | ||
114 | unsigned long efer; | ||
115 | |||
116 | rdmsrl(MSR_EFER, efer); | ||
117 | if (!(efer & EFER_NX) || disable_nx) | ||
118 | __supported_pte_mask &= ~_PAGE_NX; | ||
119 | } | ||
120 | |||
121 | int force_personality32; | 78 | int force_personality32; |
122 | 79 | ||
123 | /* | 80 | /* |
@@ -147,7 +104,7 @@ static __ref void *spp_getpage(void) | |||
147 | void *ptr; | 104 | void *ptr; |
148 | 105 | ||
149 | if (after_bootmem) | 106 | if (after_bootmem) |
150 | ptr = (void *) get_zeroed_page(GFP_ATOMIC); | 107 | ptr = (void *) get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK); |
151 | else | 108 | else |
152 | ptr = alloc_bootmem_pages(PAGE_SIZE); | 109 | ptr = alloc_bootmem_pages(PAGE_SIZE); |
153 | 110 | ||
@@ -324,7 +281,7 @@ static __ref void *alloc_low_page(unsigned long *phys) | |||
324 | void *adr; | 281 | void *adr; |
325 | 282 | ||
326 | if (after_bootmem) { | 283 | if (after_bootmem) { |
327 | adr = (void *)get_zeroed_page(GFP_ATOMIC); | 284 | adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK); |
328 | *phys = __pa(adr); | 285 | *phys = __pa(adr); |
329 | 286 | ||
330 | return adr; | 287 | return adr; |
@@ -628,6 +585,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
628 | early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); | 585 | early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); |
629 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); | 586 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); |
630 | } | 587 | } |
588 | #endif | ||
631 | 589 | ||
632 | void __init paging_init(void) | 590 | void __init paging_init(void) |
633 | { | 591 | { |
@@ -638,11 +596,10 @@ void __init paging_init(void) | |||
638 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; | 596 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; |
639 | max_zone_pfns[ZONE_NORMAL] = max_pfn; | 597 | max_zone_pfns[ZONE_NORMAL] = max_pfn; |
640 | 598 | ||
641 | memory_present(0, 0, max_pfn); | 599 | sparse_memory_present_with_active_regions(MAX_NUMNODES); |
642 | sparse_init(); | 600 | sparse_init(); |
643 | free_area_init_nodes(max_zone_pfns); | 601 | free_area_init_nodes(max_zone_pfns); |
644 | } | 602 | } |
645 | #endif | ||
646 | 603 | ||
647 | /* | 604 | /* |
648 | * Memory hotplug specific functions | 605 | * Memory hotplug specific functions |
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 8056545e2d39..fe6f84ca121e 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c | |||
@@ -82,7 +82,6 @@ iounmap_atomic(void *kvaddr, enum km_type type) | |||
82 | if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) | 82 | if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) |
83 | kpte_clear_flush(kmap_pte-idx, vaddr); | 83 | kpte_clear_flush(kmap_pte-idx, vaddr); |
84 | 84 | ||
85 | arch_flush_lazy_mmu_mode(); | ||
86 | pagefault_enable(); | 85 | pagefault_enable(); |
87 | } | 86 | } |
88 | EXPORT_SYMBOL_GPL(iounmap_atomic); | 87 | EXPORT_SYMBOL_GPL(iounmap_atomic); |
diff --git a/arch/x86/mm/kmemcheck/Makefile b/arch/x86/mm/kmemcheck/Makefile new file mode 100644 index 000000000000..520b3bce4095 --- /dev/null +++ b/arch/x86/mm/kmemcheck/Makefile | |||
@@ -0,0 +1 @@ | |||
obj-y := error.o kmemcheck.o opcode.o pte.o selftest.o shadow.o | |||
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c new file mode 100644 index 000000000000..4901d0dafda6 --- /dev/null +++ b/arch/x86/mm/kmemcheck/error.c | |||
@@ -0,0 +1,228 @@ | |||
1 | #include <linux/interrupt.h> | ||
2 | #include <linux/kdebug.h> | ||
3 | #include <linux/kmemcheck.h> | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/types.h> | ||
6 | #include <linux/ptrace.h> | ||
7 | #include <linux/stacktrace.h> | ||
8 | #include <linux/string.h> | ||
9 | |||
10 | #include "error.h" | ||
11 | #include "shadow.h" | ||
12 | |||
13 | enum kmemcheck_error_type { | ||
14 | KMEMCHECK_ERROR_INVALID_ACCESS, | ||
15 | KMEMCHECK_ERROR_BUG, | ||
16 | }; | ||
17 | |||
18 | #define SHADOW_COPY_SIZE (1 << CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT) | ||
19 | |||
20 | struct kmemcheck_error { | ||
21 | enum kmemcheck_error_type type; | ||
22 | |||
23 | union { | ||
24 | /* KMEMCHECK_ERROR_INVALID_ACCESS */ | ||
25 | struct { | ||
26 | /* Kind of access that caused the error */ | ||
27 | enum kmemcheck_shadow state; | ||
28 | /* Address and size of the erroneous read */ | ||
29 | unsigned long address; | ||
30 | unsigned int size; | ||
31 | }; | ||
32 | }; | ||
33 | |||
34 | struct pt_regs regs; | ||
35 | struct stack_trace trace; | ||
36 | unsigned long trace_entries[32]; | ||
37 | |||
38 | /* We compress it to a char. */ | ||
39 | unsigned char shadow_copy[SHADOW_COPY_SIZE]; | ||
40 | unsigned char memory_copy[SHADOW_COPY_SIZE]; | ||
41 | }; | ||
42 | |||
43 | /* | ||
44 | * Create a ring queue of errors to output. We can't call printk() directly | ||
45 | * from the kmemcheck traps, since this may call the console drivers and | ||
46 | * result in a recursive fault. | ||
47 | */ | ||
48 | static struct kmemcheck_error error_fifo[CONFIG_KMEMCHECK_QUEUE_SIZE]; | ||
49 | static unsigned int error_count; | ||
50 | static unsigned int error_rd; | ||
51 | static unsigned int error_wr; | ||
52 | static unsigned int error_missed_count; | ||
53 | |||
54 | static struct kmemcheck_error *error_next_wr(void) | ||
55 | { | ||
56 | struct kmemcheck_error *e; | ||
57 | |||
58 | if (error_count == ARRAY_SIZE(error_fifo)) { | ||
59 | ++error_missed_count; | ||
60 | return NULL; | ||
61 | } | ||
62 | |||
63 | e = &error_fifo[error_wr]; | ||
64 | if (++error_wr == ARRAY_SIZE(error_fifo)) | ||
65 | error_wr = 0; | ||
66 | ++error_count; | ||
67 | return e; | ||
68 | } | ||
69 | |||
70 | static struct kmemcheck_error *error_next_rd(void) | ||
71 | { | ||
72 | struct kmemcheck_error *e; | ||
73 | |||
74 | if (error_count == 0) | ||
75 | return NULL; | ||
76 | |||
77 | e = &error_fifo[error_rd]; | ||
78 | if (++error_rd == ARRAY_SIZE(error_fifo)) | ||
79 | error_rd = 0; | ||
80 | --error_count; | ||
81 | return e; | ||
82 | } | ||
83 | |||
84 | void kmemcheck_error_recall(void) | ||
85 | { | ||
86 | static const char *desc[] = { | ||
87 | [KMEMCHECK_SHADOW_UNALLOCATED] = "unallocated", | ||
88 | [KMEMCHECK_SHADOW_UNINITIALIZED] = "uninitialized", | ||
89 | [KMEMCHECK_SHADOW_INITIALIZED] = "initialized", | ||
90 | [KMEMCHECK_SHADOW_FREED] = "freed", | ||
91 | }; | ||
92 | |||
93 | static const char short_desc[] = { | ||
94 | [KMEMCHECK_SHADOW_UNALLOCATED] = 'a', | ||
95 | [KMEMCHECK_SHADOW_UNINITIALIZED] = 'u', | ||
96 | [KMEMCHECK_SHADOW_INITIALIZED] = 'i', | ||
97 | [KMEMCHECK_SHADOW_FREED] = 'f', | ||
98 | }; | ||
99 | |||
100 | struct kmemcheck_error *e; | ||
101 | unsigned int i; | ||
102 | |||
103 | e = error_next_rd(); | ||
104 | if (!e) | ||
105 | return; | ||
106 | |||
107 | switch (e->type) { | ||
108 | case KMEMCHECK_ERROR_INVALID_ACCESS: | ||
109 | printk(KERN_ERR "WARNING: kmemcheck: Caught %d-bit read " | ||
110 | "from %s memory (%p)\n", | ||
111 | 8 * e->size, e->state < ARRAY_SIZE(desc) ? | ||
112 | desc[e->state] : "(invalid shadow state)", | ||
113 | (void *) e->address); | ||
114 | |||
115 | printk(KERN_INFO); | ||
116 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) | ||
117 | printk("%02x", e->memory_copy[i]); | ||
118 | printk("\n"); | ||
119 | |||
120 | printk(KERN_INFO); | ||
121 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) { | ||
122 | if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) | ||
123 | printk(" %c", short_desc[e->shadow_copy[i]]); | ||
124 | else | ||
125 | printk(" ?"); | ||
126 | } | ||
127 | printk("\n"); | ||
128 | printk(KERN_INFO "%*c\n", 2 + 2 | ||
129 | * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); | ||
130 | break; | ||
131 | case KMEMCHECK_ERROR_BUG: | ||
132 | printk(KERN_EMERG "ERROR: kmemcheck: Fatal error\n"); | ||
133 | break; | ||
134 | } | ||
135 | |||
136 | __show_regs(&e->regs, 1); | ||
137 | print_stack_trace(&e->trace, 0); | ||
138 | } | ||
139 | |||
140 | static void do_wakeup(unsigned long data) | ||
141 | { | ||
142 | while (error_count > 0) | ||
143 | kmemcheck_error_recall(); | ||
144 | |||
145 | if (error_missed_count > 0) { | ||
146 | printk(KERN_WARNING "kmemcheck: Lost %d error reports because " | ||
147 | "the queue was too small\n", error_missed_count); | ||
148 | error_missed_count = 0; | ||
149 | } | ||
150 | } | ||
151 | |||
152 | static DECLARE_TASKLET(kmemcheck_tasklet, &do_wakeup, 0); | ||
153 | |||
154 | /* | ||
155 | * Save the context of an error report. | ||
156 | */ | ||
157 | void kmemcheck_error_save(enum kmemcheck_shadow state, | ||
158 | unsigned long address, unsigned int size, struct pt_regs *regs) | ||
159 | { | ||
160 | static unsigned long prev_ip; | ||
161 | |||
162 | struct kmemcheck_error *e; | ||
163 | void *shadow_copy; | ||
164 | void *memory_copy; | ||
165 | |||
166 | /* Don't report several adjacent errors from the same EIP. */ | ||
167 | if (regs->ip == prev_ip) | ||
168 | return; | ||
169 | prev_ip = regs->ip; | ||
170 | |||
171 | e = error_next_wr(); | ||
172 | if (!e) | ||
173 | return; | ||
174 | |||
175 | e->type = KMEMCHECK_ERROR_INVALID_ACCESS; | ||
176 | |||
177 | e->state = state; | ||
178 | e->address = address; | ||
179 | e->size = size; | ||
180 | |||
181 | /* Save regs */ | ||
182 | memcpy(&e->regs, regs, sizeof(*regs)); | ||
183 | |||
184 | /* Save stack trace */ | ||
185 | e->trace.nr_entries = 0; | ||
186 | e->trace.entries = e->trace_entries; | ||
187 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); | ||
188 | e->trace.skip = 0; | ||
189 | save_stack_trace_bp(&e->trace, regs->bp); | ||
190 | |||
191 | /* Round address down to nearest 16 bytes */ | ||
192 | shadow_copy = kmemcheck_shadow_lookup(address | ||
193 | & ~(SHADOW_COPY_SIZE - 1)); | ||
194 | BUG_ON(!shadow_copy); | ||
195 | |||
196 | memcpy(e->shadow_copy, shadow_copy, SHADOW_COPY_SIZE); | ||
197 | |||
198 | kmemcheck_show_addr(address); | ||
199 | memory_copy = (void *) (address & ~(SHADOW_COPY_SIZE - 1)); | ||
200 | memcpy(e->memory_copy, memory_copy, SHADOW_COPY_SIZE); | ||
201 | kmemcheck_hide_addr(address); | ||
202 | |||
203 | tasklet_hi_schedule_first(&kmemcheck_tasklet); | ||
204 | } | ||
205 | |||
206 | /* | ||
207 | * Save the context of a kmemcheck bug. | ||
208 | */ | ||
209 | void kmemcheck_error_save_bug(struct pt_regs *regs) | ||
210 | { | ||
211 | struct kmemcheck_error *e; | ||
212 | |||
213 | e = error_next_wr(); | ||
214 | if (!e) | ||
215 | return; | ||
216 | |||
217 | e->type = KMEMCHECK_ERROR_BUG; | ||
218 | |||
219 | memcpy(&e->regs, regs, sizeof(*regs)); | ||
220 | |||
221 | e->trace.nr_entries = 0; | ||
222 | e->trace.entries = e->trace_entries; | ||
223 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); | ||
224 | e->trace.skip = 1; | ||
225 | save_stack_trace(&e->trace); | ||
226 | |||
227 | tasklet_hi_schedule_first(&kmemcheck_tasklet); | ||
228 | } | ||
diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h new file mode 100644 index 000000000000..0efc2e8d0a20 --- /dev/null +++ b/arch/x86/mm/kmemcheck/error.h | |||
@@ -0,0 +1,15 @@ | |||
1 | #ifndef ARCH__X86__MM__KMEMCHECK__ERROR_H | ||
2 | #define ARCH__X86__MM__KMEMCHECK__ERROR_H | ||
3 | |||
4 | #include <linux/ptrace.h> | ||
5 | |||
6 | #include "shadow.h" | ||
7 | |||
8 | void kmemcheck_error_save(enum kmemcheck_shadow state, | ||
9 | unsigned long address, unsigned int size, struct pt_regs *regs); | ||
10 | |||
11 | void kmemcheck_error_save_bug(struct pt_regs *regs); | ||
12 | |||
13 | void kmemcheck_error_recall(void); | ||
14 | |||
15 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c new file mode 100644 index 000000000000..2c55ed098654 --- /dev/null +++ b/arch/x86/mm/kmemcheck/kmemcheck.c | |||
@@ -0,0 +1,640 @@ | |||
1 | /** | ||
2 | * kmemcheck - a heavyweight memory checker for the linux kernel | ||
3 | * Copyright (C) 2007, 2008 Vegard Nossum <vegardno@ifi.uio.no> | ||
4 | * (With a lot of help from Ingo Molnar and Pekka Enberg.) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License (version 2) as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/init.h> | ||
12 | #include <linux/interrupt.h> | ||
13 | #include <linux/kallsyms.h> | ||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/kmemcheck.h> | ||
16 | #include <linux/mm.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/page-flags.h> | ||
19 | #include <linux/percpu.h> | ||
20 | #include <linux/ptrace.h> | ||
21 | #include <linux/string.h> | ||
22 | #include <linux/types.h> | ||
23 | |||
24 | #include <asm/cacheflush.h> | ||
25 | #include <asm/kmemcheck.h> | ||
26 | #include <asm/pgtable.h> | ||
27 | #include <asm/tlbflush.h> | ||
28 | |||
29 | #include "error.h" | ||
30 | #include "opcode.h" | ||
31 | #include "pte.h" | ||
32 | #include "selftest.h" | ||
33 | #include "shadow.h" | ||
34 | |||
35 | |||
36 | #ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT | ||
37 | # define KMEMCHECK_ENABLED 0 | ||
38 | #endif | ||
39 | |||
40 | #ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT | ||
41 | # define KMEMCHECK_ENABLED 1 | ||
42 | #endif | ||
43 | |||
44 | #ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT | ||
45 | # define KMEMCHECK_ENABLED 2 | ||
46 | #endif | ||
47 | |||
48 | int kmemcheck_enabled = KMEMCHECK_ENABLED; | ||
49 | |||
50 | int __init kmemcheck_init(void) | ||
51 | { | ||
52 | #ifdef CONFIG_SMP | ||
53 | /* | ||
54 | * Limit SMP to use a single CPU. We rely on the fact that this code | ||
55 | * runs before SMP is set up. | ||
56 | */ | ||
57 | if (setup_max_cpus > 1) { | ||
58 | printk(KERN_INFO | ||
59 | "kmemcheck: Limiting number of CPUs to 1.\n"); | ||
60 | setup_max_cpus = 1; | ||
61 | } | ||
62 | #endif | ||
63 | |||
64 | if (!kmemcheck_selftest()) { | ||
65 | printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n"); | ||
66 | kmemcheck_enabled = 0; | ||
67 | return -EINVAL; | ||
68 | } | ||
69 | |||
70 | printk(KERN_INFO "kmemcheck: Initialized\n"); | ||
71 | return 0; | ||
72 | } | ||
73 | |||
74 | early_initcall(kmemcheck_init); | ||
75 | |||
76 | /* | ||
77 | * We need to parse the kmemcheck= option before any memory is allocated. | ||
78 | */ | ||
79 | static int __init param_kmemcheck(char *str) | ||
80 | { | ||
81 | if (!str) | ||
82 | return -EINVAL; | ||
83 | |||
84 | sscanf(str, "%d", &kmemcheck_enabled); | ||
85 | return 0; | ||
86 | } | ||
87 | |||
88 | early_param("kmemcheck", param_kmemcheck); | ||
89 | |||
90 | int kmemcheck_show_addr(unsigned long address) | ||
91 | { | ||
92 | pte_t *pte; | ||
93 | |||
94 | pte = kmemcheck_pte_lookup(address); | ||
95 | if (!pte) | ||
96 | return 0; | ||
97 | |||
98 | set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); | ||
99 | __flush_tlb_one(address); | ||
100 | return 1; | ||
101 | } | ||
102 | |||
103 | int kmemcheck_hide_addr(unsigned long address) | ||
104 | { | ||
105 | pte_t *pte; | ||
106 | |||
107 | pte = kmemcheck_pte_lookup(address); | ||
108 | if (!pte) | ||
109 | return 0; | ||
110 | |||
111 | set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); | ||
112 | __flush_tlb_one(address); | ||
113 | return 1; | ||
114 | } | ||
115 | |||
116 | struct kmemcheck_context { | ||
117 | bool busy; | ||
118 | int balance; | ||
119 | |||
120 | /* | ||
121 | * There can be at most two memory operands to an instruction, but | ||
122 | * each address can cross a page boundary -- so we may need up to | ||
123 | * four addresses that must be hidden/revealed for each fault. | ||
124 | */ | ||
125 | unsigned long addr[4]; | ||
126 | unsigned long n_addrs; | ||
127 | unsigned long flags; | ||
128 | |||
129 | /* Data size of the instruction that caused a fault. */ | ||
130 | unsigned int size; | ||
131 | }; | ||
132 | |||
133 | static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context); | ||
134 | |||
135 | bool kmemcheck_active(struct pt_regs *regs) | ||
136 | { | ||
137 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
138 | |||
139 | return data->balance > 0; | ||
140 | } | ||
141 | |||
142 | /* Save an address that needs to be shown/hidden */ | ||
143 | static void kmemcheck_save_addr(unsigned long addr) | ||
144 | { | ||
145 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
146 | |||
147 | BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr)); | ||
148 | data->addr[data->n_addrs++] = addr; | ||
149 | } | ||
150 | |||
151 | static unsigned int kmemcheck_show_all(void) | ||
152 | { | ||
153 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
154 | unsigned int i; | ||
155 | unsigned int n; | ||
156 | |||
157 | n = 0; | ||
158 | for (i = 0; i < data->n_addrs; ++i) | ||
159 | n += kmemcheck_show_addr(data->addr[i]); | ||
160 | |||
161 | return n; | ||
162 | } | ||
163 | |||
164 | static unsigned int kmemcheck_hide_all(void) | ||
165 | { | ||
166 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
167 | unsigned int i; | ||
168 | unsigned int n; | ||
169 | |||
170 | n = 0; | ||
171 | for (i = 0; i < data->n_addrs; ++i) | ||
172 | n += kmemcheck_hide_addr(data->addr[i]); | ||
173 | |||
174 | return n; | ||
175 | } | ||
176 | |||
177 | /* | ||
178 | * Called from the #PF handler. | ||
179 | */ | ||
180 | void kmemcheck_show(struct pt_regs *regs) | ||
181 | { | ||
182 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
183 | |||
184 | BUG_ON(!irqs_disabled()); | ||
185 | |||
186 | if (unlikely(data->balance != 0)) { | ||
187 | kmemcheck_show_all(); | ||
188 | kmemcheck_error_save_bug(regs); | ||
189 | data->balance = 0; | ||
190 | return; | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * None of the addresses actually belonged to kmemcheck. Note that | ||
195 | * this is not an error. | ||
196 | */ | ||
197 | if (kmemcheck_show_all() == 0) | ||
198 | return; | ||
199 | |||
200 | ++data->balance; | ||
201 | |||
202 | /* | ||
203 | * The IF needs to be cleared as well, so that the faulting | ||
204 | * instruction can run "uninterrupted". Otherwise, we might take | ||
205 | * an interrupt and start executing that before we've had a chance | ||
206 | * to hide the page again. | ||
207 | * | ||
208 | * NOTE: In the rare case of multiple faults, we must not override | ||
209 | * the original flags: | ||
210 | */ | ||
211 | if (!(regs->flags & X86_EFLAGS_TF)) | ||
212 | data->flags = regs->flags; | ||
213 | |||
214 | regs->flags |= X86_EFLAGS_TF; | ||
215 | regs->flags &= ~X86_EFLAGS_IF; | ||
216 | } | ||
217 | |||
218 | /* | ||
219 | * Called from the #DB handler. | ||
220 | */ | ||
221 | void kmemcheck_hide(struct pt_regs *regs) | ||
222 | { | ||
223 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
224 | int n; | ||
225 | |||
226 | BUG_ON(!irqs_disabled()); | ||
227 | |||
228 | if (data->balance == 0) | ||
229 | return; | ||
230 | |||
231 | if (unlikely(data->balance != 1)) { | ||
232 | kmemcheck_show_all(); | ||
233 | kmemcheck_error_save_bug(regs); | ||
234 | data->n_addrs = 0; | ||
235 | data->balance = 0; | ||
236 | |||
237 | if (!(data->flags & X86_EFLAGS_TF)) | ||
238 | regs->flags &= ~X86_EFLAGS_TF; | ||
239 | if (data->flags & X86_EFLAGS_IF) | ||
240 | regs->flags |= X86_EFLAGS_IF; | ||
241 | return; | ||
242 | } | ||
243 | |||
244 | if (kmemcheck_enabled) | ||
245 | n = kmemcheck_hide_all(); | ||
246 | else | ||
247 | n = kmemcheck_show_all(); | ||
248 | |||
249 | if (n == 0) | ||
250 | return; | ||
251 | |||
252 | --data->balance; | ||
253 | |||
254 | data->n_addrs = 0; | ||
255 | |||
256 | if (!(data->flags & X86_EFLAGS_TF)) | ||
257 | regs->flags &= ~X86_EFLAGS_TF; | ||
258 | if (data->flags & X86_EFLAGS_IF) | ||
259 | regs->flags |= X86_EFLAGS_IF; | ||
260 | } | ||
261 | |||
262 | void kmemcheck_show_pages(struct page *p, unsigned int n) | ||
263 | { | ||
264 | unsigned int i; | ||
265 | |||
266 | for (i = 0; i < n; ++i) { | ||
267 | unsigned long address; | ||
268 | pte_t *pte; | ||
269 | unsigned int level; | ||
270 | |||
271 | address = (unsigned long) page_address(&p[i]); | ||
272 | pte = lookup_address(address, &level); | ||
273 | BUG_ON(!pte); | ||
274 | BUG_ON(level != PG_LEVEL_4K); | ||
275 | |||
276 | set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); | ||
277 | set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN)); | ||
278 | __flush_tlb_one(address); | ||
279 | } | ||
280 | } | ||
281 | |||
282 | bool kmemcheck_page_is_tracked(struct page *p) | ||
283 | { | ||
284 | /* This will also check the "hidden" flag of the PTE. */ | ||
285 | return kmemcheck_pte_lookup((unsigned long) page_address(p)); | ||
286 | } | ||
287 | |||
288 | void kmemcheck_hide_pages(struct page *p, unsigned int n) | ||
289 | { | ||
290 | unsigned int i; | ||
291 | |||
292 | for (i = 0; i < n; ++i) { | ||
293 | unsigned long address; | ||
294 | pte_t *pte; | ||
295 | unsigned int level; | ||
296 | |||
297 | address = (unsigned long) page_address(&p[i]); | ||
298 | pte = lookup_address(address, &level); | ||
299 | BUG_ON(!pte); | ||
300 | BUG_ON(level != PG_LEVEL_4K); | ||
301 | |||
302 | set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); | ||
303 | set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN)); | ||
304 | __flush_tlb_one(address); | ||
305 | } | ||
306 | } | ||
307 | |||
308 | /* Access may NOT cross page boundary */ | ||
309 | static void kmemcheck_read_strict(struct pt_regs *regs, | ||
310 | unsigned long addr, unsigned int size) | ||
311 | { | ||
312 | void *shadow; | ||
313 | enum kmemcheck_shadow status; | ||
314 | |||
315 | shadow = kmemcheck_shadow_lookup(addr); | ||
316 | if (!shadow) | ||
317 | return; | ||
318 | |||
319 | kmemcheck_save_addr(addr); | ||
320 | status = kmemcheck_shadow_test(shadow, size); | ||
321 | if (status == KMEMCHECK_SHADOW_INITIALIZED) | ||
322 | return; | ||
323 | |||
324 | if (kmemcheck_enabled) | ||
325 | kmemcheck_error_save(status, addr, size, regs); | ||
326 | |||
327 | if (kmemcheck_enabled == 2) | ||
328 | kmemcheck_enabled = 0; | ||
329 | |||
330 | /* Don't warn about it again. */ | ||
331 | kmemcheck_shadow_set(shadow, size); | ||
332 | } | ||
333 | |||
334 | /* Access may cross page boundary */ | ||
335 | static void kmemcheck_read(struct pt_regs *regs, | ||
336 | unsigned long addr, unsigned int size) | ||
337 | { | ||
338 | unsigned long page = addr & PAGE_MASK; | ||
339 | unsigned long next_addr = addr + size - 1; | ||
340 | unsigned long next_page = next_addr & PAGE_MASK; | ||
341 | |||
342 | if (likely(page == next_page)) { | ||
343 | kmemcheck_read_strict(regs, addr, size); | ||
344 | return; | ||
345 | } | ||
346 | |||
347 | /* | ||
348 | * What we do is basically to split the access across the | ||
349 | * two pages and handle each part separately. Yes, this means | ||
350 | * that we may now see reads that are 3 + 5 bytes, for | ||
351 | * example (and if both are uninitialized, there will be two | ||
352 | * reports), but it makes the code a lot simpler. | ||
353 | */ | ||
354 | kmemcheck_read_strict(regs, addr, next_page - addr); | ||
355 | kmemcheck_read_strict(regs, next_page, next_addr - next_page); | ||
356 | } | ||
357 | |||
358 | static void kmemcheck_write_strict(struct pt_regs *regs, | ||
359 | unsigned long addr, unsigned int size) | ||
360 | { | ||
361 | void *shadow; | ||
362 | |||
363 | shadow = kmemcheck_shadow_lookup(addr); | ||
364 | if (!shadow) | ||
365 | return; | ||
366 | |||
367 | kmemcheck_save_addr(addr); | ||
368 | kmemcheck_shadow_set(shadow, size); | ||
369 | } | ||
370 | |||
371 | static void kmemcheck_write(struct pt_regs *regs, | ||
372 | unsigned long addr, unsigned int size) | ||
373 | { | ||
374 | unsigned long page = addr & PAGE_MASK; | ||
375 | unsigned long next_addr = addr + size - 1; | ||
376 | unsigned long next_page = next_addr & PAGE_MASK; | ||
377 | |||
378 | if (likely(page == next_page)) { | ||
379 | kmemcheck_write_strict(regs, addr, size); | ||
380 | return; | ||
381 | } | ||
382 | |||
383 | /* See comment in kmemcheck_read(). */ | ||
384 | kmemcheck_write_strict(regs, addr, next_page - addr); | ||
385 | kmemcheck_write_strict(regs, next_page, next_addr - next_page); | ||
386 | } | ||
387 | |||
388 | /* | ||
389 | * Copying is hard. We have two addresses, each of which may be split across | ||
390 | * a page (and each page will have different shadow addresses). | ||
391 | */ | ||
392 | static void kmemcheck_copy(struct pt_regs *regs, | ||
393 | unsigned long src_addr, unsigned long dst_addr, unsigned int size) | ||
394 | { | ||
395 | uint8_t shadow[8]; | ||
396 | enum kmemcheck_shadow status; | ||
397 | |||
398 | unsigned long page; | ||
399 | unsigned long next_addr; | ||
400 | unsigned long next_page; | ||
401 | |||
402 | uint8_t *x; | ||
403 | unsigned int i; | ||
404 | unsigned int n; | ||
405 | |||
406 | BUG_ON(size > sizeof(shadow)); | ||
407 | |||
408 | page = src_addr & PAGE_MASK; | ||
409 | next_addr = src_addr + size - 1; | ||
410 | next_page = next_addr & PAGE_MASK; | ||
411 | |||
412 | if (likely(page == next_page)) { | ||
413 | /* Same page */ | ||
414 | x = kmemcheck_shadow_lookup(src_addr); | ||
415 | if (x) { | ||
416 | kmemcheck_save_addr(src_addr); | ||
417 | for (i = 0; i < size; ++i) | ||
418 | shadow[i] = x[i]; | ||
419 | } else { | ||
420 | for (i = 0; i < size; ++i) | ||
421 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
422 | } | ||
423 | } else { | ||
424 | n = next_page - src_addr; | ||
425 | BUG_ON(n > sizeof(shadow)); | ||
426 | |||
427 | /* First page */ | ||
428 | x = kmemcheck_shadow_lookup(src_addr); | ||
429 | if (x) { | ||
430 | kmemcheck_save_addr(src_addr); | ||
431 | for (i = 0; i < n; ++i) | ||
432 | shadow[i] = x[i]; | ||
433 | } else { | ||
434 | /* Not tracked */ | ||
435 | for (i = 0; i < n; ++i) | ||
436 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
437 | } | ||
438 | |||
439 | /* Second page */ | ||
440 | x = kmemcheck_shadow_lookup(next_page); | ||
441 | if (x) { | ||
442 | kmemcheck_save_addr(next_page); | ||
443 | for (i = n; i < size; ++i) | ||
444 | shadow[i] = x[i - n]; | ||
445 | } else { | ||
446 | /* Not tracked */ | ||
447 | for (i = n; i < size; ++i) | ||
448 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
449 | } | ||
450 | } | ||
451 | |||
452 | page = dst_addr & PAGE_MASK; | ||
453 | next_addr = dst_addr + size - 1; | ||
454 | next_page = next_addr & PAGE_MASK; | ||
455 | |||
456 | if (likely(page == next_page)) { | ||
457 | /* Same page */ | ||
458 | x = kmemcheck_shadow_lookup(dst_addr); | ||
459 | if (x) { | ||
460 | kmemcheck_save_addr(dst_addr); | ||
461 | for (i = 0; i < size; ++i) { | ||
462 | x[i] = shadow[i]; | ||
463 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
464 | } | ||
465 | } | ||
466 | } else { | ||
467 | n = next_page - dst_addr; | ||
468 | BUG_ON(n > sizeof(shadow)); | ||
469 | |||
470 | /* First page */ | ||
471 | x = kmemcheck_shadow_lookup(dst_addr); | ||
472 | if (x) { | ||
473 | kmemcheck_save_addr(dst_addr); | ||
474 | for (i = 0; i < n; ++i) { | ||
475 | x[i] = shadow[i]; | ||
476 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
477 | } | ||
478 | } | ||
479 | |||
480 | /* Second page */ | ||
481 | x = kmemcheck_shadow_lookup(next_page); | ||
482 | if (x) { | ||
483 | kmemcheck_save_addr(next_page); | ||
484 | for (i = n; i < size; ++i) { | ||
485 | x[i - n] = shadow[i]; | ||
486 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
487 | } | ||
488 | } | ||
489 | } | ||
490 | |||
491 | status = kmemcheck_shadow_test(shadow, size); | ||
492 | if (status == KMEMCHECK_SHADOW_INITIALIZED) | ||
493 | return; | ||
494 | |||
495 | if (kmemcheck_enabled) | ||
496 | kmemcheck_error_save(status, src_addr, size, regs); | ||
497 | |||
498 | if (kmemcheck_enabled == 2) | ||
499 | kmemcheck_enabled = 0; | ||
500 | } | ||
501 | |||
502 | enum kmemcheck_method { | ||
503 | KMEMCHECK_READ, | ||
504 | KMEMCHECK_WRITE, | ||
505 | }; | ||
506 | |||
507 | static void kmemcheck_access(struct pt_regs *regs, | ||
508 | unsigned long fallback_address, enum kmemcheck_method fallback_method) | ||
509 | { | ||
510 | const uint8_t *insn; | ||
511 | const uint8_t *insn_primary; | ||
512 | unsigned int size; | ||
513 | |||
514 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
515 | |||
516 | /* Recursive fault -- ouch. */ | ||
517 | if (data->busy) { | ||
518 | kmemcheck_show_addr(fallback_address); | ||
519 | kmemcheck_error_save_bug(regs); | ||
520 | return; | ||
521 | } | ||
522 | |||
523 | data->busy = true; | ||
524 | |||
525 | insn = (const uint8_t *) regs->ip; | ||
526 | insn_primary = kmemcheck_opcode_get_primary(insn); | ||
527 | |||
528 | kmemcheck_opcode_decode(insn, &size); | ||
529 | |||
530 | switch (insn_primary[0]) { | ||
531 | #ifdef CONFIG_KMEMCHECK_BITOPS_OK | ||
532 | /* AND, OR, XOR */ | ||
533 | /* | ||
534 | * Unfortunately, these instructions have to be excluded from | ||
535 | * our regular checking since they access only some (and not | ||
536 | * all) bits. This clears out "bogus" bitfield-access warnings. | ||
537 | */ | ||
538 | case 0x80: | ||
539 | case 0x81: | ||
540 | case 0x82: | ||
541 | case 0x83: | ||
542 | switch ((insn_primary[1] >> 3) & 7) { | ||
543 | /* OR */ | ||
544 | case 1: | ||
545 | /* AND */ | ||
546 | case 4: | ||
547 | /* XOR */ | ||
548 | case 6: | ||
549 | kmemcheck_write(regs, fallback_address, size); | ||
550 | goto out; | ||
551 | |||
552 | /* ADD */ | ||
553 | case 0: | ||
554 | /* ADC */ | ||
555 | case 2: | ||
556 | /* SBB */ | ||
557 | case 3: | ||
558 | /* SUB */ | ||
559 | case 5: | ||
560 | /* CMP */ | ||
561 | case 7: | ||
562 | break; | ||
563 | } | ||
564 | break; | ||
565 | #endif | ||
566 | |||
567 | /* MOVS, MOVSB, MOVSW, MOVSD */ | ||
568 | case 0xa4: | ||
569 | case 0xa5: | ||
570 | /* | ||
571 | * These instructions are special because they take two | ||
572 | * addresses, but we only get one page fault. | ||
573 | */ | ||
574 | kmemcheck_copy(regs, regs->si, regs->di, size); | ||
575 | goto out; | ||
576 | |||
577 | /* CMPS, CMPSB, CMPSW, CMPSD */ | ||
578 | case 0xa6: | ||
579 | case 0xa7: | ||
580 | kmemcheck_read(regs, regs->si, size); | ||
581 | kmemcheck_read(regs, regs->di, size); | ||
582 | goto out; | ||
583 | } | ||
584 | |||
585 | /* | ||
586 | * If the opcode isn't special in any way, we use the data from the | ||
587 | * page fault handler to determine the address and type of memory | ||
588 | * access. | ||
589 | */ | ||
590 | switch (fallback_method) { | ||
591 | case KMEMCHECK_READ: | ||
592 | kmemcheck_read(regs, fallback_address, size); | ||
593 | goto out; | ||
594 | case KMEMCHECK_WRITE: | ||
595 | kmemcheck_write(regs, fallback_address, size); | ||
596 | goto out; | ||
597 | } | ||
598 | |||
599 | out: | ||
600 | data->busy = false; | ||
601 | } | ||
602 | |||
603 | bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, | ||
604 | unsigned long error_code) | ||
605 | { | ||
606 | pte_t *pte; | ||
607 | |||
608 | /* | ||
609 | * XXX: Is it safe to assume that memory accesses from virtual 86 | ||
610 | * mode or non-kernel code segments will _never_ access kernel | ||
611 | * memory (e.g. tracked pages)? For now, we need this to avoid | ||
612 | * invoking kmemcheck for PnP BIOS calls. | ||
613 | */ | ||
614 | if (regs->flags & X86_VM_MASK) | ||
615 | return false; | ||
616 | if (regs->cs != __KERNEL_CS) | ||
617 | return false; | ||
618 | |||
619 | pte = kmemcheck_pte_lookup(address); | ||
620 | if (!pte) | ||
621 | return false; | ||
622 | |||
623 | if (error_code & 2) | ||
624 | kmemcheck_access(regs, address, KMEMCHECK_WRITE); | ||
625 | else | ||
626 | kmemcheck_access(regs, address, KMEMCHECK_READ); | ||
627 | |||
628 | kmemcheck_show(regs); | ||
629 | return true; | ||
630 | } | ||
631 | |||
632 | bool kmemcheck_trap(struct pt_regs *regs) | ||
633 | { | ||
634 | if (!kmemcheck_active(regs)) | ||
635 | return false; | ||
636 | |||
637 | /* We're done. */ | ||
638 | kmemcheck_hide(regs); | ||
639 | return true; | ||
640 | } | ||
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c new file mode 100644 index 000000000000..63c19e27aa6f --- /dev/null +++ b/arch/x86/mm/kmemcheck/opcode.c | |||
@@ -0,0 +1,106 @@ | |||
1 | #include <linux/types.h> | ||
2 | |||
3 | #include "opcode.h" | ||
4 | |||
5 | static bool opcode_is_prefix(uint8_t b) | ||
6 | { | ||
7 | return | ||
8 | /* Group 1 */ | ||
9 | b == 0xf0 || b == 0xf2 || b == 0xf3 | ||
10 | /* Group 2 */ | ||
11 | || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26 | ||
12 | || b == 0x64 || b == 0x65 || b == 0x2e || b == 0x3e | ||
13 | /* Group 3 */ | ||
14 | || b == 0x66 | ||
15 | /* Group 4 */ | ||
16 | || b == 0x67; | ||
17 | } | ||
18 | |||
19 | #ifdef CONFIG_X86_64 | ||
20 | static bool opcode_is_rex_prefix(uint8_t b) | ||
21 | { | ||
22 | return (b & 0xf0) == 0x40; | ||
23 | } | ||
24 | #else | ||
25 | static bool opcode_is_rex_prefix(uint8_t b) | ||
26 | { | ||
27 | return false; | ||
28 | } | ||
29 | #endif | ||
30 | |||
31 | #define REX_W (1 << 3) | ||
32 | |||
33 | /* | ||
34 | * This is a VERY crude opcode decoder. We only need to find the size of the | ||
35 | * load/store that caused our #PF and this should work for all the opcodes | ||
36 | * that we care about. Moreover, the ones who invented this instruction set | ||
37 | * should be shot. | ||
38 | */ | ||
39 | void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size) | ||
40 | { | ||
41 | /* Default operand size */ | ||
42 | int operand_size_override = 4; | ||
43 | |||
44 | /* prefixes */ | ||
45 | for (; opcode_is_prefix(*op); ++op) { | ||
46 | if (*op == 0x66) | ||
47 | operand_size_override = 2; | ||
48 | } | ||
49 | |||
50 | /* REX prefix */ | ||
51 | if (opcode_is_rex_prefix(*op)) { | ||
52 | uint8_t rex = *op; | ||
53 | |||
54 | ++op; | ||
55 | if (rex & REX_W) { | ||
56 | switch (*op) { | ||
57 | case 0x63: | ||
58 | *size = 4; | ||
59 | return; | ||
60 | case 0x0f: | ||
61 | ++op; | ||
62 | |||
63 | switch (*op) { | ||
64 | case 0xb6: | ||
65 | case 0xbe: | ||
66 | *size = 1; | ||
67 | return; | ||
68 | case 0xb7: | ||
69 | case 0xbf: | ||
70 | *size = 2; | ||
71 | return; | ||
72 | } | ||
73 | |||
74 | break; | ||
75 | } | ||
76 | |||
77 | *size = 8; | ||
78 | return; | ||
79 | } | ||
80 | } | ||
81 | |||
82 | /* escape opcode */ | ||
83 | if (*op == 0x0f) { | ||
84 | ++op; | ||
85 | |||
86 | /* | ||
87 | * This is move with zero-extend and sign-extend, respectively; | ||
88 | * we don't have to think about 0xb6/0xbe, because this is | ||
89 | * already handled in the conditional below. | ||
90 | */ | ||
91 | if (*op == 0xb7 || *op == 0xbf) | ||
92 | operand_size_override = 2; | ||
93 | } | ||
94 | |||
95 | *size = (*op & 1) ? operand_size_override : 1; | ||
96 | } | ||
97 | |||
98 | const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op) | ||
99 | { | ||
100 | /* skip prefixes */ | ||
101 | while (opcode_is_prefix(*op)) | ||
102 | ++op; | ||
103 | if (opcode_is_rex_prefix(*op)) | ||
104 | ++op; | ||
105 | return op; | ||
106 | } | ||
diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h new file mode 100644 index 000000000000..6956aad66b5b --- /dev/null +++ b/arch/x86/mm/kmemcheck/opcode.h | |||
@@ -0,0 +1,9 @@ | |||
1 | #ifndef ARCH__X86__MM__KMEMCHECK__OPCODE_H | ||
2 | #define ARCH__X86__MM__KMEMCHECK__OPCODE_H | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | |||
6 | void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size); | ||
7 | const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op); | ||
8 | |||
9 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c new file mode 100644 index 000000000000..4ead26eeaf96 --- /dev/null +++ b/arch/x86/mm/kmemcheck/pte.c | |||
@@ -0,0 +1,22 @@ | |||
1 | #include <linux/mm.h> | ||
2 | |||
3 | #include <asm/pgtable.h> | ||
4 | |||
5 | #include "pte.h" | ||
6 | |||
7 | pte_t *kmemcheck_pte_lookup(unsigned long address) | ||
8 | { | ||
9 | pte_t *pte; | ||
10 | unsigned int level; | ||
11 | |||
12 | pte = lookup_address(address, &level); | ||
13 | if (!pte) | ||
14 | return NULL; | ||
15 | if (level != PG_LEVEL_4K) | ||
16 | return NULL; | ||
17 | if (!pte_hidden(*pte)) | ||
18 | return NULL; | ||
19 | |||
20 | return pte; | ||
21 | } | ||
22 | |||
diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h new file mode 100644 index 000000000000..9f5966456492 --- /dev/null +++ b/arch/x86/mm/kmemcheck/pte.h | |||
@@ -0,0 +1,10 @@ | |||
1 | #ifndef ARCH__X86__MM__KMEMCHECK__PTE_H | ||
2 | #define ARCH__X86__MM__KMEMCHECK__PTE_H | ||
3 | |||
4 | #include <linux/mm.h> | ||
5 | |||
6 | #include <asm/pgtable.h> | ||
7 | |||
8 | pte_t *kmemcheck_pte_lookup(unsigned long address); | ||
9 | |||
10 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c new file mode 100644 index 000000000000..036efbea8b28 --- /dev/null +++ b/arch/x86/mm/kmemcheck/selftest.c | |||
@@ -0,0 +1,69 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | |||
3 | #include "opcode.h" | ||
4 | #include "selftest.h" | ||
5 | |||
6 | struct selftest_opcode { | ||
7 | unsigned int expected_size; | ||
8 | const uint8_t *insn; | ||
9 | const char *desc; | ||
10 | }; | ||
11 | |||
12 | static const struct selftest_opcode selftest_opcodes[] = { | ||
13 | /* REP MOVS */ | ||
14 | {1, "\xf3\xa4", "rep movsb <mem8>, <mem8>"}, | ||
15 | {4, "\xf3\xa5", "rep movsl <mem32>, <mem32>"}, | ||
16 | |||
17 | /* MOVZX / MOVZXD */ | ||
18 | {1, "\x66\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg16>"}, | ||
19 | {1, "\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg32>"}, | ||
20 | |||
21 | /* MOVSX / MOVSXD */ | ||
22 | {1, "\x66\x0f\xbe\x51\xf8", "movswq <mem8>, <reg16>"}, | ||
23 | {1, "\x0f\xbe\x51\xf8", "movswq <mem8>, <reg32>"}, | ||
24 | |||
25 | #ifdef CONFIG_X86_64 | ||
26 | /* MOVZX / MOVZXD */ | ||
27 | {1, "\x49\x0f\xb6\x51\xf8", "movzbq <mem8>, <reg64>"}, | ||
28 | {2, "\x49\x0f\xb7\x51\xf8", "movzbq <mem16>, <reg64>"}, | ||
29 | |||
30 | /* MOVSX / MOVSXD */ | ||
31 | {1, "\x49\x0f\xbe\x51\xf8", "movsbq <mem8>, <reg64>"}, | ||
32 | {2, "\x49\x0f\xbf\x51\xf8", "movsbq <mem16>, <reg64>"}, | ||
33 | {4, "\x49\x63\x51\xf8", "movslq <mem32>, <reg64>"}, | ||
34 | #endif | ||
35 | }; | ||
36 | |||
37 | static bool selftest_opcode_one(const struct selftest_opcode *op) | ||
38 | { | ||
39 | unsigned size; | ||
40 | |||
41 | kmemcheck_opcode_decode(op->insn, &size); | ||
42 | |||
43 | if (size == op->expected_size) | ||
44 | return true; | ||
45 | |||
46 | printk(KERN_WARNING "kmemcheck: opcode %s: expected size %d, got %d\n", | ||
47 | op->desc, op->expected_size, size); | ||
48 | return false; | ||
49 | } | ||
50 | |||
51 | static bool selftest_opcodes_all(void) | ||
52 | { | ||
53 | bool pass = true; | ||
54 | unsigned int i; | ||
55 | |||
56 | for (i = 0; i < ARRAY_SIZE(selftest_opcodes); ++i) | ||
57 | pass = pass && selftest_opcode_one(&selftest_opcodes[i]); | ||
58 | |||
59 | return pass; | ||
60 | } | ||
61 | |||
62 | bool kmemcheck_selftest(void) | ||
63 | { | ||
64 | bool pass = true; | ||
65 | |||
66 | pass = pass && selftest_opcodes_all(); | ||
67 | |||
68 | return pass; | ||
69 | } | ||
diff --git a/arch/x86/mm/kmemcheck/selftest.h b/arch/x86/mm/kmemcheck/selftest.h new file mode 100644 index 000000000000..8fed4fe11f95 --- /dev/null +++ b/arch/x86/mm/kmemcheck/selftest.h | |||
@@ -0,0 +1,6 @@ | |||
1 | #ifndef ARCH_X86_MM_KMEMCHECK_SELFTEST_H | ||
2 | #define ARCH_X86_MM_KMEMCHECK_SELFTEST_H | ||
3 | |||
4 | bool kmemcheck_selftest(void); | ||
5 | |||
6 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c new file mode 100644 index 000000000000..e773b6bd0079 --- /dev/null +++ b/arch/x86/mm/kmemcheck/shadow.c | |||
@@ -0,0 +1,162 @@ | |||
1 | #include <linux/kmemcheck.h> | ||
2 | #include <linux/module.h> | ||
3 | #include <linux/mm.h> | ||
4 | #include <linux/module.h> | ||
5 | |||
6 | #include <asm/page.h> | ||
7 | #include <asm/pgtable.h> | ||
8 | |||
9 | #include "pte.h" | ||
10 | #include "shadow.h" | ||
11 | |||
12 | /* | ||
13 | * Return the shadow address for the given address. Returns NULL if the | ||
14 | * address is not tracked. | ||
15 | * | ||
16 | * We need to be extremely careful not to follow any invalid pointers, | ||
17 | * because this function can be called for *any* possible address. | ||
18 | */ | ||
19 | void *kmemcheck_shadow_lookup(unsigned long address) | ||
20 | { | ||
21 | pte_t *pte; | ||
22 | struct page *page; | ||
23 | |||
24 | if (!virt_addr_valid(address)) | ||
25 | return NULL; | ||
26 | |||
27 | pte = kmemcheck_pte_lookup(address); | ||
28 | if (!pte) | ||
29 | return NULL; | ||
30 | |||
31 | page = virt_to_page(address); | ||
32 | if (!page->shadow) | ||
33 | return NULL; | ||
34 | return page->shadow + (address & (PAGE_SIZE - 1)); | ||
35 | } | ||
36 | |||
37 | static void mark_shadow(void *address, unsigned int n, | ||
38 | enum kmemcheck_shadow status) | ||
39 | { | ||
40 | unsigned long addr = (unsigned long) address; | ||
41 | unsigned long last_addr = addr + n - 1; | ||
42 | unsigned long page = addr & PAGE_MASK; | ||
43 | unsigned long last_page = last_addr & PAGE_MASK; | ||
44 | unsigned int first_n; | ||
45 | void *shadow; | ||
46 | |||
47 | /* If the memory range crosses a page boundary, stop there. */ | ||
48 | if (page == last_page) | ||
49 | first_n = n; | ||
50 | else | ||
51 | first_n = page + PAGE_SIZE - addr; | ||
52 | |||
53 | shadow = kmemcheck_shadow_lookup(addr); | ||
54 | if (shadow) | ||
55 | memset(shadow, status, first_n); | ||
56 | |||
57 | addr += first_n; | ||
58 | n -= first_n; | ||
59 | |||
60 | /* Do full-page memset()s. */ | ||
61 | while (n >= PAGE_SIZE) { | ||
62 | shadow = kmemcheck_shadow_lookup(addr); | ||
63 | if (shadow) | ||
64 | memset(shadow, status, PAGE_SIZE); | ||
65 | |||
66 | addr += PAGE_SIZE; | ||
67 | n -= PAGE_SIZE; | ||
68 | } | ||
69 | |||
70 | /* Do the remaining page, if any. */ | ||
71 | if (n > 0) { | ||
72 | shadow = kmemcheck_shadow_lookup(addr); | ||
73 | if (shadow) | ||
74 | memset(shadow, status, n); | ||
75 | } | ||
76 | } | ||
77 | |||
78 | void kmemcheck_mark_unallocated(void *address, unsigned int n) | ||
79 | { | ||
80 | mark_shadow(address, n, KMEMCHECK_SHADOW_UNALLOCATED); | ||
81 | } | ||
82 | |||
83 | void kmemcheck_mark_uninitialized(void *address, unsigned int n) | ||
84 | { | ||
85 | mark_shadow(address, n, KMEMCHECK_SHADOW_UNINITIALIZED); | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * Fill the shadow memory of the given address such that the memory at that | ||
90 | * address is marked as being initialized. | ||
91 | */ | ||
92 | void kmemcheck_mark_initialized(void *address, unsigned int n) | ||
93 | { | ||
94 | mark_shadow(address, n, KMEMCHECK_SHADOW_INITIALIZED); | ||
95 | } | ||
96 | EXPORT_SYMBOL_GPL(kmemcheck_mark_initialized); | ||
97 | |||
98 | void kmemcheck_mark_freed(void *address, unsigned int n) | ||
99 | { | ||
100 | mark_shadow(address, n, KMEMCHECK_SHADOW_FREED); | ||
101 | } | ||
102 | |||
103 | void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n) | ||
104 | { | ||
105 | unsigned int i; | ||
106 | |||
107 | for (i = 0; i < n; ++i) | ||
108 | kmemcheck_mark_unallocated(page_address(&p[i]), PAGE_SIZE); | ||
109 | } | ||
110 | |||
111 | void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n) | ||
112 | { | ||
113 | unsigned int i; | ||
114 | |||
115 | for (i = 0; i < n; ++i) | ||
116 | kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE); | ||
117 | } | ||
118 | |||
119 | void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n) | ||
120 | { | ||
121 | unsigned int i; | ||
122 | |||
123 | for (i = 0; i < n; ++i) | ||
124 | kmemcheck_mark_initialized(page_address(&p[i]), PAGE_SIZE); | ||
125 | } | ||
126 | |||
127 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) | ||
128 | { | ||
129 | uint8_t *x; | ||
130 | unsigned int i; | ||
131 | |||
132 | x = shadow; | ||
133 | |||
134 | #ifdef CONFIG_KMEMCHECK_PARTIAL_OK | ||
135 | /* | ||
136 | * Make sure _some_ bytes are initialized. Gcc frequently generates | ||
137 | * code to access neighboring bytes. | ||
138 | */ | ||
139 | for (i = 0; i < size; ++i) { | ||
140 | if (x[i] == KMEMCHECK_SHADOW_INITIALIZED) | ||
141 | return x[i]; | ||
142 | } | ||
143 | #else | ||
144 | /* All bytes must be initialized. */ | ||
145 | for (i = 0; i < size; ++i) { | ||
146 | if (x[i] != KMEMCHECK_SHADOW_INITIALIZED) | ||
147 | return x[i]; | ||
148 | } | ||
149 | #endif | ||
150 | |||
151 | return x[0]; | ||
152 | } | ||
153 | |||
154 | void kmemcheck_shadow_set(void *shadow, unsigned int size) | ||
155 | { | ||
156 | uint8_t *x; | ||
157 | unsigned int i; | ||
158 | |||
159 | x = shadow; | ||
160 | for (i = 0; i < size; ++i) | ||
161 | x[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
162 | } | ||
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h new file mode 100644 index 000000000000..af46d9ab9d86 --- /dev/null +++ b/arch/x86/mm/kmemcheck/shadow.h | |||
@@ -0,0 +1,16 @@ | |||
1 | #ifndef ARCH__X86__MM__KMEMCHECK__SHADOW_H | ||
2 | #define ARCH__X86__MM__KMEMCHECK__SHADOW_H | ||
3 | |||
4 | enum kmemcheck_shadow { | ||
5 | KMEMCHECK_SHADOW_UNALLOCATED, | ||
6 | KMEMCHECK_SHADOW_UNINITIALIZED, | ||
7 | KMEMCHECK_SHADOW_INITIALIZED, | ||
8 | KMEMCHECK_SHADOW_FREED, | ||
9 | }; | ||
10 | |||
11 | void *kmemcheck_shadow_lookup(unsigned long address); | ||
12 | |||
13 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size); | ||
14 | void kmemcheck_shadow_set(void *shadow, unsigned int size); | ||
15 | |||
16 | #endif | ||
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 605c8be06217..18d244f70205 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c | |||
@@ -40,23 +40,22 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) | |||
40 | 40 | ||
41 | static void __init memtest(u64 pattern, u64 start_phys, u64 size) | 41 | static void __init memtest(u64 pattern, u64 start_phys, u64 size) |
42 | { | 42 | { |
43 | u64 i, count; | 43 | u64 *p, *start, *end; |
44 | u64 *start; | ||
45 | u64 start_bad, last_bad; | 44 | u64 start_bad, last_bad; |
46 | u64 start_phys_aligned; | 45 | u64 start_phys_aligned; |
47 | size_t incr; | 46 | const size_t incr = sizeof(pattern); |
48 | 47 | ||
49 | incr = sizeof(pattern); | ||
50 | start_phys_aligned = ALIGN(start_phys, incr); | 48 | start_phys_aligned = ALIGN(start_phys, incr); |
51 | count = (size - (start_phys_aligned - start_phys))/incr; | ||
52 | start = __va(start_phys_aligned); | 49 | start = __va(start_phys_aligned); |
50 | end = start + (size - (start_phys_aligned - start_phys)) / incr; | ||
53 | start_bad = 0; | 51 | start_bad = 0; |
54 | last_bad = 0; | 52 | last_bad = 0; |
55 | 53 | ||
56 | for (i = 0; i < count; i++) | 54 | for (p = start; p < end; p++) |
57 | start[i] = pattern; | 55 | *p = pattern; |
58 | for (i = 0; i < count; i++, start++, start_phys_aligned += incr) { | 56 | |
59 | if (*start == pattern) | 57 | for (p = start; p < end; p++, start_phys_aligned += incr) { |
58 | if (*p == pattern) | ||
60 | continue; | 59 | continue; |
61 | if (start_phys_aligned == last_bad + incr) { | 60 | if (start_phys_aligned == last_bad + incr) { |
62 | last_bad += incr; | 61 | last_bad += incr; |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 2d05a12029dc..459913beac71 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -179,18 +179,25 @@ static void * __init early_node_mem(int nodeid, unsigned long start, | |||
179 | } | 179 | } |
180 | 180 | ||
181 | /* Initialize bootmem allocator for a node */ | 181 | /* Initialize bootmem allocator for a node */ |
182 | void __init setup_node_bootmem(int nodeid, unsigned long start, | 182 | void __init |
183 | unsigned long end) | 183 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) |
184 | { | 184 | { |
185 | unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; | 185 | unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; |
186 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | ||
186 | unsigned long bootmap_start, nodedata_phys; | 187 | unsigned long bootmap_start, nodedata_phys; |
187 | void *bootmap; | 188 | void *bootmap; |
188 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | ||
189 | int nid; | 189 | int nid; |
190 | 190 | ||
191 | if (!end) | 191 | if (!end) |
192 | return; | 192 | return; |
193 | 193 | ||
194 | /* | ||
195 | * Don't confuse VM with a node that doesn't have the | ||
196 | * minimum amount of memory: | ||
197 | */ | ||
198 | if (end && (end - start) < NODE_MIN_SIZE) | ||
199 | return; | ||
200 | |||
194 | start = roundup(start, ZONE_ALIGN); | 201 | start = roundup(start, ZONE_ALIGN); |
195 | 202 | ||
196 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, | 203 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, |
@@ -272,9 +279,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
272 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, | 279 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, |
273 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); | 280 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); |
274 | 281 | ||
275 | #ifdef CONFIG_ACPI_NUMA | ||
276 | srat_reserve_add_area(nodeid); | ||
277 | #endif | ||
278 | node_set_online(nodeid); | 282 | node_set_online(nodeid); |
279 | } | 283 | } |
280 | 284 | ||
@@ -578,21 +582,6 @@ unsigned long __init numa_free_all_bootmem(void) | |||
578 | return pages; | 582 | return pages; |
579 | } | 583 | } |
580 | 584 | ||
581 | void __init paging_init(void) | ||
582 | { | ||
583 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | ||
584 | |||
585 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | ||
586 | max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; | ||
587 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; | ||
588 | max_zone_pfns[ZONE_NORMAL] = max_pfn; | ||
589 | |||
590 | sparse_memory_present_with_active_regions(MAX_NUMNODES); | ||
591 | sparse_init(); | ||
592 | |||
593 | free_area_init_nodes(max_zone_pfns); | ||
594 | } | ||
595 | |||
596 | static __init int numa_setup(char *opt) | 585 | static __init int numa_setup(char *opt) |
597 | { | 586 | { |
598 | if (!opt) | 587 | if (!opt) |
@@ -606,8 +595,6 @@ static __init int numa_setup(char *opt) | |||
606 | #ifdef CONFIG_ACPI_NUMA | 595 | #ifdef CONFIG_ACPI_NUMA |
607 | if (!strncmp(opt, "noacpi", 6)) | 596 | if (!strncmp(opt, "noacpi", 6)) |
608 | acpi_numa = -1; | 597 | acpi_numa = -1; |
609 | if (!strncmp(opt, "hotadd=", 7)) | ||
610 | hotadd_percent = simple_strtoul(opt+7, NULL, 10); | ||
611 | #endif | 598 | #endif |
612 | return 0; | 599 | return 0; |
613 | } | 600 | } |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 797f9f107cb6..3cfe9ced8a4c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -153,7 +153,7 @@ static void __cpa_flush_all(void *arg) | |||
153 | */ | 153 | */ |
154 | __flush_tlb_all(); | 154 | __flush_tlb_all(); |
155 | 155 | ||
156 | if (cache && boot_cpu_data.x86_model >= 4) | 156 | if (cache && boot_cpu_data.x86 >= 4) |
157 | wbinvd(); | 157 | wbinvd(); |
158 | } | 158 | } |
159 | 159 | ||
@@ -208,20 +208,15 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache, | |||
208 | int in_flags, struct page **pages) | 208 | int in_flags, struct page **pages) |
209 | { | 209 | { |
210 | unsigned int i, level; | 210 | unsigned int i, level; |
211 | unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ | ||
211 | 212 | ||
212 | BUG_ON(irqs_disabled()); | 213 | BUG_ON(irqs_disabled()); |
213 | 214 | ||
214 | on_each_cpu(__cpa_flush_range, NULL, 1); | 215 | on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1); |
215 | 216 | ||
216 | if (!cache) | 217 | if (!cache || do_wbinvd) |
217 | return; | 218 | return; |
218 | 219 | ||
219 | /* 4M threshold */ | ||
220 | if (numpages >= 1024) { | ||
221 | if (boot_cpu_data.x86_model >= 4) | ||
222 | wbinvd(); | ||
223 | return; | ||
224 | } | ||
225 | /* | 220 | /* |
226 | * We only need to flush on one CPU, | 221 | * We only need to flush on one CPU, |
227 | * clflush is a MESI-coherent instruction that | 222 | * clflush is a MESI-coherent instruction that |
@@ -475,7 +470,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
475 | 470 | ||
476 | if (!debug_pagealloc) | 471 | if (!debug_pagealloc) |
477 | spin_unlock(&cpa_lock); | 472 | spin_unlock(&cpa_lock); |
478 | base = alloc_pages(GFP_KERNEL, 0); | 473 | base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); |
479 | if (!debug_pagealloc) | 474 | if (!debug_pagealloc) |
480 | spin_lock(&cpa_lock); | 475 | spin_lock(&cpa_lock); |
481 | if (!base) | 476 | if (!base) |
@@ -844,13 +839,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
844 | 839 | ||
845 | vm_unmap_aliases(); | 840 | vm_unmap_aliases(); |
846 | 841 | ||
847 | /* | ||
848 | * If we're called with lazy mmu updates enabled, the | ||
849 | * in-memory pte state may be stale. Flush pending updates to | ||
850 | * bring them up to date. | ||
851 | */ | ||
852 | arch_flush_lazy_mmu_mode(); | ||
853 | |||
854 | cpa.vaddr = addr; | 842 | cpa.vaddr = addr; |
855 | cpa.pages = pages; | 843 | cpa.pages = pages; |
856 | cpa.numpages = numpages; | 844 | cpa.numpages = numpages; |
@@ -895,13 +883,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
895 | } else | 883 | } else |
896 | cpa_flush_all(cache); | 884 | cpa_flush_all(cache); |
897 | 885 | ||
898 | /* | ||
899 | * If we've been called with lazy mmu updates enabled, then | ||
900 | * make sure that everything gets flushed out before we | ||
901 | * return. | ||
902 | */ | ||
903 | arch_flush_lazy_mmu_mode(); | ||
904 | |||
905 | out: | 886 | out: |
906 | return ret; | 887 | return ret; |
907 | } | 888 | } |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 7aa03a5389f5..8e43bdd45456 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -4,9 +4,11 @@ | |||
4 | #include <asm/tlb.h> | 4 | #include <asm/tlb.h> |
5 | #include <asm/fixmap.h> | 5 | #include <asm/fixmap.h> |
6 | 6 | ||
7 | #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO | ||
8 | |||
7 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | 9 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) |
8 | { | 10 | { |
9 | return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); | 11 | return (pte_t *)__get_free_page(PGALLOC_GFP); |
10 | } | 12 | } |
11 | 13 | ||
12 | pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) | 14 | pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) |
@@ -14,9 +16,9 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) | |||
14 | struct page *pte; | 16 | struct page *pte; |
15 | 17 | ||
16 | #ifdef CONFIG_HIGHPTE | 18 | #ifdef CONFIG_HIGHPTE |
17 | pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); | 19 | pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0); |
18 | #else | 20 | #else |
19 | pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); | 21 | pte = alloc_pages(PGALLOC_GFP, 0); |
20 | #endif | 22 | #endif |
21 | if (pte) | 23 | if (pte) |
22 | pgtable_page_ctor(pte); | 24 | pgtable_page_ctor(pte); |
@@ -161,7 +163,7 @@ static int preallocate_pmds(pmd_t *pmds[]) | |||
161 | bool failed = false; | 163 | bool failed = false; |
162 | 164 | ||
163 | for(i = 0; i < PREALLOCATED_PMDS; i++) { | 165 | for(i = 0; i < PREALLOCATED_PMDS; i++) { |
164 | pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); | 166 | pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP); |
165 | if (pmd == NULL) | 167 | if (pmd == NULL) |
166 | failed = true; | 168 | failed = true; |
167 | pmds[i] = pmd; | 169 | pmds[i] = pmd; |
@@ -228,7 +230,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
228 | pmd_t *pmds[PREALLOCATED_PMDS]; | 230 | pmd_t *pmds[PREALLOCATED_PMDS]; |
229 | unsigned long flags; | 231 | unsigned long flags; |
230 | 232 | ||
231 | pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); | 233 | pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); |
232 | 234 | ||
233 | if (pgd == NULL) | 235 | if (pgd == NULL) |
234 | goto out; | 236 | goto out; |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 01765955baaf..2dfcbf9df2ae 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -31,17 +31,11 @@ static nodemask_t nodes_parsed __initdata; | |||
31 | static nodemask_t cpu_nodes_parsed __initdata; | 31 | static nodemask_t cpu_nodes_parsed __initdata; |
32 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | 32 | static struct bootnode nodes[MAX_NUMNODES] __initdata; |
33 | static struct bootnode nodes_add[MAX_NUMNODES]; | 33 | static struct bootnode nodes_add[MAX_NUMNODES]; |
34 | static int found_add_area __initdata; | ||
35 | int hotadd_percent __initdata = 0; | ||
36 | 34 | ||
37 | static int num_node_memblks __initdata; | 35 | static int num_node_memblks __initdata; |
38 | static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; | 36 | static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; |
39 | static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; | 37 | static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; |
40 | 38 | ||
41 | /* Too small nodes confuse the VM badly. Usually they result | ||
42 | from BIOS bugs. */ | ||
43 | #define NODE_MIN_SIZE (4*1024*1024) | ||
44 | |||
45 | static __init int setup_node(int pxm) | 39 | static __init int setup_node(int pxm) |
46 | { | 40 | { |
47 | return acpi_map_pxm_to_node(pxm); | 41 | return acpi_map_pxm_to_node(pxm); |
@@ -66,9 +60,6 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end) | |||
66 | { | 60 | { |
67 | struct bootnode *nd = &nodes[i]; | 61 | struct bootnode *nd = &nodes[i]; |
68 | 62 | ||
69 | if (found_add_area) | ||
70 | return; | ||
71 | |||
72 | if (nd->start < start) { | 63 | if (nd->start < start) { |
73 | nd->start = start; | 64 | nd->start = start; |
74 | if (nd->end < nd->start) | 65 | if (nd->end < nd->start) |
@@ -86,7 +77,6 @@ static __init void bad_srat(void) | |||
86 | int i; | 77 | int i; |
87 | printk(KERN_ERR "SRAT: SRAT not used.\n"); | 78 | printk(KERN_ERR "SRAT: SRAT not used.\n"); |
88 | acpi_numa = -1; | 79 | acpi_numa = -1; |
89 | found_add_area = 0; | ||
90 | for (i = 0; i < MAX_LOCAL_APIC; i++) | 80 | for (i = 0; i < MAX_LOCAL_APIC; i++) |
91 | apicid_to_node[i] = NUMA_NO_NODE; | 81 | apicid_to_node[i] = NUMA_NO_NODE; |
92 | for (i = 0; i < MAX_NUMNODES; i++) | 82 | for (i = 0; i < MAX_NUMNODES; i++) |
@@ -182,24 +172,21 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | |||
182 | pxm, apic_id, node); | 172 | pxm, apic_id, node); |
183 | } | 173 | } |
184 | 174 | ||
185 | static int update_end_of_memory(unsigned long end) {return -1;} | ||
186 | static int hotadd_enough_memory(struct bootnode *nd) {return 1;} | ||
187 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE | 175 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE |
188 | static inline int save_add_info(void) {return 1;} | 176 | static inline int save_add_info(void) {return 1;} |
189 | #else | 177 | #else |
190 | static inline int save_add_info(void) {return 0;} | 178 | static inline int save_add_info(void) {return 0;} |
191 | #endif | 179 | #endif |
192 | /* | 180 | /* |
193 | * Update nodes_add and decide if to include add are in the zone. | 181 | * Update nodes_add[] |
194 | * Both SPARSE and RESERVE need nodes_add information. | 182 | * This code supports one contiguous hot add area per node |
195 | * This code supports one contiguous hot add area per node. | ||
196 | */ | 183 | */ |
197 | static int __init | 184 | static void __init |
198 | reserve_hotadd(int node, unsigned long start, unsigned long end) | 185 | update_nodes_add(int node, unsigned long start, unsigned long end) |
199 | { | 186 | { |
200 | unsigned long s_pfn = start >> PAGE_SHIFT; | 187 | unsigned long s_pfn = start >> PAGE_SHIFT; |
201 | unsigned long e_pfn = end >> PAGE_SHIFT; | 188 | unsigned long e_pfn = end >> PAGE_SHIFT; |
202 | int ret = 0, changed = 0; | 189 | int changed = 0; |
203 | struct bootnode *nd = &nodes_add[node]; | 190 | struct bootnode *nd = &nodes_add[node]; |
204 | 191 | ||
205 | /* I had some trouble with strange memory hotadd regions breaking | 192 | /* I had some trouble with strange memory hotadd regions breaking |
@@ -210,7 +197,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) | |||
210 | mistakes */ | 197 | mistakes */ |
211 | if ((signed long)(end - start) < NODE_MIN_SIZE) { | 198 | if ((signed long)(end - start) < NODE_MIN_SIZE) { |
212 | printk(KERN_ERR "SRAT: Hotplug area too small\n"); | 199 | printk(KERN_ERR "SRAT: Hotplug area too small\n"); |
213 | return -1; | 200 | return; |
214 | } | 201 | } |
215 | 202 | ||
216 | /* This check might be a bit too strict, but I'm keeping it for now. */ | 203 | /* This check might be a bit too strict, but I'm keeping it for now. */ |
@@ -218,12 +205,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) | |||
218 | printk(KERN_ERR | 205 | printk(KERN_ERR |
219 | "SRAT: Hotplug area %lu -> %lu has existing memory\n", | 206 | "SRAT: Hotplug area %lu -> %lu has existing memory\n", |
220 | s_pfn, e_pfn); | 207 | s_pfn, e_pfn); |
221 | return -1; | 208 | return; |
222 | } | ||
223 | |||
224 | if (!hotadd_enough_memory(&nodes_add[node])) { | ||
225 | printk(KERN_ERR "SRAT: Hotplug area too large\n"); | ||
226 | return -1; | ||
227 | } | 209 | } |
228 | 210 | ||
229 | /* Looks good */ | 211 | /* Looks good */ |
@@ -245,11 +227,9 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) | |||
245 | printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); | 227 | printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); |
246 | } | 228 | } |
247 | 229 | ||
248 | ret = update_end_of_memory(nd->end); | ||
249 | |||
250 | if (changed) | 230 | if (changed) |
251 | printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end); | 231 | printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", |
252 | return ret; | 232 | nd->start, nd->end); |
253 | } | 233 | } |
254 | 234 | ||
255 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ | 235 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ |
@@ -310,13 +290,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
310 | start, end); | 290 | start, end); |
311 | e820_register_active_regions(node, start >> PAGE_SHIFT, | 291 | e820_register_active_regions(node, start >> PAGE_SHIFT, |
312 | end >> PAGE_SHIFT); | 292 | end >> PAGE_SHIFT); |
313 | push_node_boundaries(node, nd->start >> PAGE_SHIFT, | ||
314 | nd->end >> PAGE_SHIFT); | ||
315 | 293 | ||
316 | if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && | 294 | if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { |
317 | (reserve_hotadd(node, start, end) < 0)) { | 295 | update_nodes_add(node, start, end); |
318 | /* Ignore hotadd region. Undo damage */ | 296 | /* restore nodes[node] */ |
319 | printk(KERN_NOTICE "SRAT: Hotplug region ignored\n"); | ||
320 | *nd = oldnode; | 297 | *nd = oldnode; |
321 | if ((nd->start | nd->end) == 0) | 298 | if ((nd->start | nd->end) == 0) |
322 | node_clear(node, nodes_parsed); | 299 | node_clear(node, nodes_parsed); |
@@ -345,9 +322,9 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
345 | pxmram = 0; | 322 | pxmram = 0; |
346 | } | 323 | } |
347 | 324 | ||
348 | e820ram = max_pfn - absent_pages_in_range(0, max_pfn); | 325 | e820ram = max_pfn - (e820_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT); |
349 | /* We seem to lose 3 pages somewhere. Allow a bit of slack. */ | 326 | /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ |
350 | if ((long)(e820ram - pxmram) >= 1*1024*1024) { | 327 | if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) { |
351 | printk(KERN_ERR | 328 | printk(KERN_ERR |
352 | "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n", | 329 | "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n", |
353 | (pxmram << PAGE_SHIFT) >> 20, | 330 | (pxmram << PAGE_SHIFT) >> 20, |
@@ -357,17 +334,6 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
357 | return 1; | 334 | return 1; |
358 | } | 335 | } |
359 | 336 | ||
360 | static void __init unparse_node(int node) | ||
361 | { | ||
362 | int i; | ||
363 | node_clear(node, nodes_parsed); | ||
364 | node_clear(node, cpu_nodes_parsed); | ||
365 | for (i = 0; i < MAX_LOCAL_APIC; i++) { | ||
366 | if (apicid_to_node[i] == node) | ||
367 | apicid_to_node[i] = NUMA_NO_NODE; | ||
368 | } | ||
369 | } | ||
370 | |||
371 | void __init acpi_numa_arch_fixup(void) {} | 337 | void __init acpi_numa_arch_fixup(void) {} |
372 | 338 | ||
373 | /* Use the information discovered above to actually set up the nodes. */ | 339 | /* Use the information discovered above to actually set up the nodes. */ |
@@ -379,18 +345,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
379 | return -1; | 345 | return -1; |
380 | 346 | ||
381 | /* First clean up the node list */ | 347 | /* First clean up the node list */ |
382 | for (i = 0; i < MAX_NUMNODES; i++) { | 348 | for (i = 0; i < MAX_NUMNODES; i++) |
383 | cutoff_node(i, start, end); | 349 | cutoff_node(i, start, end); |
384 | /* | ||
385 | * don't confuse VM with a node that doesn't have the | ||
386 | * minimum memory. | ||
387 | */ | ||
388 | if (nodes[i].end && | ||
389 | (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) { | ||
390 | unparse_node(i); | ||
391 | node_set_offline(i); | ||
392 | } | ||
393 | } | ||
394 | 350 | ||
395 | if (!nodes_cover_memory(nodes)) { | 351 | if (!nodes_cover_memory(nodes)) { |
396 | bad_srat(); | 352 | bad_srat(); |
@@ -423,7 +379,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
423 | 379 | ||
424 | if (node == NUMA_NO_NODE) | 380 | if (node == NUMA_NO_NODE) |
425 | continue; | 381 | continue; |
426 | if (!node_isset(node, node_possible_map)) | 382 | if (!node_online(node)) |
427 | numa_clear_node(i); | 383 | numa_clear_node(i); |
428 | } | 384 | } |
429 | numa_init_array(); | 385 | numa_init_array(); |
@@ -510,26 +466,6 @@ static int null_slit_node_compare(int a, int b) | |||
510 | } | 466 | } |
511 | #endif /* CONFIG_NUMA_EMU */ | 467 | #endif /* CONFIG_NUMA_EMU */ |
512 | 468 | ||
513 | void __init srat_reserve_add_area(int nodeid) | ||
514 | { | ||
515 | if (found_add_area && nodes_add[nodeid].end) { | ||
516 | u64 total_mb; | ||
517 | |||
518 | printk(KERN_INFO "SRAT: Reserving hot-add memory space " | ||
519 | "for node %d at %Lx-%Lx\n", | ||
520 | nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end); | ||
521 | total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start) | ||
522 | >> PAGE_SHIFT; | ||
523 | total_mb *= sizeof(struct page); | ||
524 | total_mb >>= 20; | ||
525 | printk(KERN_INFO "SRAT: This will cost you %Lu MB of " | ||
526 | "pre-allocated memory.\n", (unsigned long long)total_mb); | ||
527 | reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start, | ||
528 | nodes_add[nodeid].end - nodes_add[nodeid].start, | ||
529 | BOOTMEM_DEFAULT); | ||
530 | } | ||
531 | } | ||
532 | |||
533 | int __node_distance(int a, int b) | 469 | int __node_distance(int a, int b) |
534 | { | 470 | { |
535 | int index; | 471 | int index; |