diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-06-11 11:55:42 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-06-11 11:55:42 -0400 |
commit | 940010c5a314a7bd9b498593bc6ba1718ac5aec5 (patch) | |
tree | d141e08ced08c40c6a8e3ab2cdecde5ff14e560f /arch/x86/mm | |
parent | 8dc8e5e8bc0ce00b0f656bf972f67cd8a72759e5 (diff) | |
parent | 991ec02cdca33b03a132a0cacfe6f0aa0be9aa8d (diff) |
Merge branch 'linus' into perfcounters/core
Conflicts:
arch/x86/kernel/irqinit.c
arch/x86/kernel/irqinit_64.c
arch/x86/kernel/traps.c
arch/x86/mm/fault.c
include/linux/sched.h
kernel/exit.c
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/dump_pagetables.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 59 | ||||
-rw-r--r-- | arch/x86/mm/highmem_32.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 78 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 61 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 47 | ||||
-rw-r--r-- | arch/x86/mm/iomap_32.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/kmmio.c | 104 | ||||
-rw-r--r-- | arch/x86/mm/memtest.c | 14 | ||||
-rw-r--r-- | arch/x86/mm/mmio-mod.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 33 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 14 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 98 |
13 files changed, 190 insertions, 330 deletions
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index e7277cbcfb40..a725b7f760ae 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c | |||
@@ -161,13 +161,14 @@ static void note_page(struct seq_file *m, struct pg_state *st, | |||
161 | st->current_address >= st->marker[1].start_address) { | 161 | st->current_address >= st->marker[1].start_address) { |
162 | const char *unit = units; | 162 | const char *unit = units; |
163 | unsigned long delta; | 163 | unsigned long delta; |
164 | int width = sizeof(unsigned long) * 2; | ||
164 | 165 | ||
165 | /* | 166 | /* |
166 | * Now print the actual finished series | 167 | * Now print the actual finished series |
167 | */ | 168 | */ |
168 | seq_printf(m, "0x%p-0x%p ", | 169 | seq_printf(m, "0x%0*lx-0x%0*lx ", |
169 | (void *)st->start_address, | 170 | width, st->start_address, |
170 | (void *)st->current_address); | 171 | width, st->current_address); |
171 | 172 | ||
172 | delta = (st->current_address - st->start_address) >> 10; | 173 | delta = (st->current_address - st->start_address) >> 10; |
173 | while (!(delta & 1023) && unit[1]) { | 174 | while (!(delta & 1023) && unit[1]) { |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 5c6d816f30b4..c6acc6326374 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -3,41 +3,17 @@ | |||
3 | * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. | 3 | * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. |
4 | * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar | 4 | * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar |
5 | */ | 5 | */ |
6 | #include <linux/interrupt.h> | 6 | #include <linux/magic.h> /* STACK_END_MAGIC */ |
7 | #include <linux/mmiotrace.h> | 7 | #include <linux/sched.h> /* test_thread_flag(), ... */ |
8 | #include <linux/bootmem.h> | 8 | #include <linux/kdebug.h> /* oops_begin/end, ... */ |
9 | #include <linux/compiler.h> | 9 | #include <linux/module.h> /* search_exception_table */ |
10 | #include <linux/highmem.h> | 10 | #include <linux/bootmem.h> /* max_low_pfn */ |
11 | #include <linux/kprobes.h> | 11 | #include <linux/kprobes.h> /* __kprobes, ... */ |
12 | #include <linux/uaccess.h> | 12 | #include <linux/mmiotrace.h> /* kmmio_handler, ... */ |
13 | #include <linux/vmalloc.h> | 13 | #include <linux/perf_counter.h> /* perf_swcounter_event */ |
14 | #include <linux/vt_kern.h> | 14 | |
15 | #include <linux/signal.h> | 15 | #include <asm/traps.h> /* dotraplinkage, ... */ |
16 | #include <linux/kernel.h> | 16 | #include <asm/pgalloc.h> /* pgd_*(), ... */ |
17 | #include <linux/ptrace.h> | ||
18 | #include <linux/string.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/kdebug.h> | ||
21 | #include <linux/errno.h> | ||
22 | #include <linux/magic.h> | ||
23 | #include <linux/sched.h> | ||
24 | #include <linux/types.h> | ||
25 | #include <linux/init.h> | ||
26 | #include <linux/mman.h> | ||
27 | #include <linux/tty.h> | ||
28 | #include <linux/smp.h> | ||
29 | #include <linux/mm.h> | ||
30 | #include <linux/perf_counter.h> | ||
31 | |||
32 | #include <asm-generic/sections.h> | ||
33 | |||
34 | #include <asm/tlbflush.h> | ||
35 | #include <asm/pgalloc.h> | ||
36 | #include <asm/segment.h> | ||
37 | #include <asm/system.h> | ||
38 | #include <asm/proto.h> | ||
39 | #include <asm/traps.h> | ||
40 | #include <asm/desc.h> | ||
41 | 17 | ||
42 | /* | 18 | /* |
43 | * Page fault error code bits: | 19 | * Page fault error code bits: |
@@ -226,12 +202,10 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) | |||
226 | if (!pmd_present(*pmd_k)) | 202 | if (!pmd_present(*pmd_k)) |
227 | return NULL; | 203 | return NULL; |
228 | 204 | ||
229 | if (!pmd_present(*pmd)) { | 205 | if (!pmd_present(*pmd)) |
230 | set_pmd(pmd, *pmd_k); | 206 | set_pmd(pmd, *pmd_k); |
231 | arch_flush_lazy_mmu_mode(); | 207 | else |
232 | } else { | ||
233 | BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); | 208 | BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); |
234 | } | ||
235 | 209 | ||
236 | return pmd_k; | 210 | return pmd_k; |
237 | } | 211 | } |
@@ -539,8 +513,6 @@ bad: | |||
539 | static int is_errata93(struct pt_regs *regs, unsigned long address) | 513 | static int is_errata93(struct pt_regs *regs, unsigned long address) |
540 | { | 514 | { |
541 | #ifdef CONFIG_X86_64 | 515 | #ifdef CONFIG_X86_64 |
542 | static int once; | ||
543 | |||
544 | if (address != regs->ip) | 516 | if (address != regs->ip) |
545 | return 0; | 517 | return 0; |
546 | 518 | ||
@@ -550,10 +522,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address) | |||
550 | address |= 0xffffffffUL << 32; | 522 | address |= 0xffffffffUL << 32; |
551 | if ((address >= (u64)_stext && address <= (u64)_etext) || | 523 | if ((address >= (u64)_stext && address <= (u64)_etext) || |
552 | (address >= MODULES_VADDR && address <= MODULES_END)) { | 524 | (address >= MODULES_VADDR && address <= MODULES_END)) { |
553 | if (!once) { | 525 | printk_once(errata93_warning); |
554 | printk(errata93_warning); | ||
555 | once = 1; | ||
556 | } | ||
557 | regs->ip = address; | 526 | regs->ip = address; |
558 | return 1; | 527 | return 1; |
559 | } | 528 | } |
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 8126e8d1a2a4..58f621e81919 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c | |||
@@ -44,7 +44,6 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) | |||
44 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | 44 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); |
45 | BUG_ON(!pte_none(*(kmap_pte-idx))); | 45 | BUG_ON(!pte_none(*(kmap_pte-idx))); |
46 | set_pte(kmap_pte-idx, mk_pte(page, prot)); | 46 | set_pte(kmap_pte-idx, mk_pte(page, prot)); |
47 | arch_flush_lazy_mmu_mode(); | ||
48 | 47 | ||
49 | return (void *)vaddr; | 48 | return (void *)vaddr; |
50 | } | 49 | } |
@@ -74,7 +73,6 @@ void kunmap_atomic(void *kvaddr, enum km_type type) | |||
74 | #endif | 73 | #endif |
75 | } | 74 | } |
76 | 75 | ||
77 | arch_flush_lazy_mmu_mode(); | ||
78 | pagefault_enable(); | 76 | pagefault_enable(); |
79 | } | 77 | } |
80 | 78 | ||
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index ae4f7b5d7104..34c1bfb64f1c 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -1,3 +1,4 @@ | |||
1 | #include <linux/initrd.h> | ||
1 | #include <linux/ioport.h> | 2 | #include <linux/ioport.h> |
2 | #include <linux/swap.h> | 3 | #include <linux/swap.h> |
3 | 4 | ||
@@ -10,6 +11,9 @@ | |||
10 | #include <asm/setup.h> | 11 | #include <asm/setup.h> |
11 | #include <asm/system.h> | 12 | #include <asm/system.h> |
12 | #include <asm/tlbflush.h> | 13 | #include <asm/tlbflush.h> |
14 | #include <asm/tlb.h> | ||
15 | |||
16 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
13 | 17 | ||
14 | unsigned long __initdata e820_table_start; | 18 | unsigned long __initdata e820_table_start; |
15 | unsigned long __meminitdata e820_table_end; | 19 | unsigned long __meminitdata e820_table_end; |
@@ -23,6 +27,69 @@ int direct_gbpages | |||
23 | #endif | 27 | #endif |
24 | ; | 28 | ; |
25 | 29 | ||
30 | int nx_enabled; | ||
31 | |||
32 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | ||
33 | static int disable_nx __cpuinitdata; | ||
34 | |||
35 | /* | ||
36 | * noexec = on|off | ||
37 | * | ||
38 | * Control non-executable mappings for processes. | ||
39 | * | ||
40 | * on Enable | ||
41 | * off Disable | ||
42 | */ | ||
43 | static int __init noexec_setup(char *str) | ||
44 | { | ||
45 | if (!str) | ||
46 | return -EINVAL; | ||
47 | if (!strncmp(str, "on", 2)) { | ||
48 | __supported_pte_mask |= _PAGE_NX; | ||
49 | disable_nx = 0; | ||
50 | } else if (!strncmp(str, "off", 3)) { | ||
51 | disable_nx = 1; | ||
52 | __supported_pte_mask &= ~_PAGE_NX; | ||
53 | } | ||
54 | return 0; | ||
55 | } | ||
56 | early_param("noexec", noexec_setup); | ||
57 | #endif | ||
58 | |||
59 | #ifdef CONFIG_X86_PAE | ||
60 | static void __init set_nx(void) | ||
61 | { | ||
62 | unsigned int v[4], l, h; | ||
63 | |||
64 | if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { | ||
65 | cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); | ||
66 | |||
67 | if ((v[3] & (1 << 20)) && !disable_nx) { | ||
68 | rdmsr(MSR_EFER, l, h); | ||
69 | l |= EFER_NX; | ||
70 | wrmsr(MSR_EFER, l, h); | ||
71 | nx_enabled = 1; | ||
72 | __supported_pte_mask |= _PAGE_NX; | ||
73 | } | ||
74 | } | ||
75 | } | ||
76 | #else | ||
77 | static inline void set_nx(void) | ||
78 | { | ||
79 | } | ||
80 | #endif | ||
81 | |||
82 | #ifdef CONFIG_X86_64 | ||
83 | void __cpuinit check_efer(void) | ||
84 | { | ||
85 | unsigned long efer; | ||
86 | |||
87 | rdmsrl(MSR_EFER, efer); | ||
88 | if (!(efer & EFER_NX) || disable_nx) | ||
89 | __supported_pte_mask &= ~_PAGE_NX; | ||
90 | } | ||
91 | #endif | ||
92 | |||
26 | static void __init find_early_table_space(unsigned long end, int use_pse, | 93 | static void __init find_early_table_space(unsigned long end, int use_pse, |
27 | int use_gbpages) | 94 | int use_gbpages) |
28 | { | 95 | { |
@@ -66,12 +133,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse, | |||
66 | */ | 133 | */ |
67 | #ifdef CONFIG_X86_32 | 134 | #ifdef CONFIG_X86_32 |
68 | start = 0x7000; | 135 | start = 0x7000; |
69 | e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, | 136 | #else |
70 | tables, PAGE_SIZE); | ||
71 | #else /* CONFIG_X86_64 */ | ||
72 | start = 0x8000; | 137 | start = 0x8000; |
73 | e820_table_start = find_e820_area(start, end, tables, PAGE_SIZE); | ||
74 | #endif | 138 | #endif |
139 | e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, | ||
140 | tables, PAGE_SIZE); | ||
75 | if (e820_table_start == -1UL) | 141 | if (e820_table_start == -1UL) |
76 | panic("Cannot find space for the kernel page tables"); | 142 | panic("Cannot find space for the kernel page tables"); |
77 | 143 | ||
@@ -159,12 +225,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
159 | use_gbpages = direct_gbpages; | 225 | use_gbpages = direct_gbpages; |
160 | #endif | 226 | #endif |
161 | 227 | ||
162 | #ifdef CONFIG_X86_32 | ||
163 | #ifdef CONFIG_X86_PAE | ||
164 | set_nx(); | 228 | set_nx(); |
165 | if (nx_enabled) | 229 | if (nx_enabled) |
166 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); | 230 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); |
167 | #endif | ||
168 | 231 | ||
169 | /* Enable PSE if available */ | 232 | /* Enable PSE if available */ |
170 | if (cpu_has_pse) | 233 | if (cpu_has_pse) |
@@ -175,7 +238,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
175 | set_in_cr4(X86_CR4_PGE); | 238 | set_in_cr4(X86_CR4_PGE); |
176 | __supported_pte_mask |= _PAGE_GLOBAL; | 239 | __supported_pte_mask |= _PAGE_GLOBAL; |
177 | } | 240 | } |
178 | #endif | ||
179 | 241 | ||
180 | if (use_gbpages) | 242 | if (use_gbpages) |
181 | page_size_mask |= 1 << PG_LEVEL_1G; | 243 | page_size_mask |= 1 << PG_LEVEL_1G; |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 749559ed80f5..949708d7a481 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -49,12 +49,9 @@ | |||
49 | #include <asm/paravirt.h> | 49 | #include <asm/paravirt.h> |
50 | #include <asm/setup.h> | 50 | #include <asm/setup.h> |
51 | #include <asm/cacheflush.h> | 51 | #include <asm/cacheflush.h> |
52 | #include <asm/page_types.h> | ||
52 | #include <asm/init.h> | 53 | #include <asm/init.h> |
53 | 54 | ||
54 | unsigned long max_low_pfn_mapped; | ||
55 | unsigned long max_pfn_mapped; | ||
56 | |||
57 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
58 | unsigned long highstart_pfn, highend_pfn; | 55 | unsigned long highstart_pfn, highend_pfn; |
59 | 56 | ||
60 | static noinline int do_test_wp_bit(void); | 57 | static noinline int do_test_wp_bit(void); |
@@ -587,61 +584,9 @@ void zap_low_mappings(void) | |||
587 | flush_tlb_all(); | 584 | flush_tlb_all(); |
588 | } | 585 | } |
589 | 586 | ||
590 | int nx_enabled; | ||
591 | |||
592 | pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); | 587 | pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); |
593 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | 588 | EXPORT_SYMBOL_GPL(__supported_pte_mask); |
594 | 589 | ||
595 | #ifdef CONFIG_X86_PAE | ||
596 | |||
597 | static int disable_nx __initdata; | ||
598 | |||
599 | /* | ||
600 | * noexec = on|off | ||
601 | * | ||
602 | * Control non executable mappings. | ||
603 | * | ||
604 | * on Enable | ||
605 | * off Disable | ||
606 | */ | ||
607 | static int __init noexec_setup(char *str) | ||
608 | { | ||
609 | if (!str || !strcmp(str, "on")) { | ||
610 | if (cpu_has_nx) { | ||
611 | __supported_pte_mask |= _PAGE_NX; | ||
612 | disable_nx = 0; | ||
613 | } | ||
614 | } else { | ||
615 | if (!strcmp(str, "off")) { | ||
616 | disable_nx = 1; | ||
617 | __supported_pte_mask &= ~_PAGE_NX; | ||
618 | } else { | ||
619 | return -EINVAL; | ||
620 | } | ||
621 | } | ||
622 | |||
623 | return 0; | ||
624 | } | ||
625 | early_param("noexec", noexec_setup); | ||
626 | |||
627 | void __init set_nx(void) | ||
628 | { | ||
629 | unsigned int v[4], l, h; | ||
630 | |||
631 | if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { | ||
632 | cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); | ||
633 | |||
634 | if ((v[3] & (1 << 20)) && !disable_nx) { | ||
635 | rdmsr(MSR_EFER, l, h); | ||
636 | l |= EFER_NX; | ||
637 | wrmsr(MSR_EFER, l, h); | ||
638 | nx_enabled = 1; | ||
639 | __supported_pte_mask |= _PAGE_NX; | ||
640 | } | ||
641 | } | ||
642 | } | ||
643 | #endif | ||
644 | |||
645 | /* user-defined highmem size */ | 590 | /* user-defined highmem size */ |
646 | static unsigned int highmem_pages = -1; | 591 | static unsigned int highmem_pages = -1; |
647 | 592 | ||
@@ -761,15 +706,15 @@ void __init initmem_init(unsigned long start_pfn, | |||
761 | highstart_pfn = highend_pfn = max_pfn; | 706 | highstart_pfn = highend_pfn = max_pfn; |
762 | if (max_pfn > max_low_pfn) | 707 | if (max_pfn > max_low_pfn) |
763 | highstart_pfn = max_low_pfn; | 708 | highstart_pfn = max_low_pfn; |
764 | memory_present(0, 0, highend_pfn); | ||
765 | e820_register_active_regions(0, 0, highend_pfn); | 709 | e820_register_active_regions(0, 0, highend_pfn); |
710 | sparse_memory_present_with_active_regions(0); | ||
766 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", | 711 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", |
767 | pages_to_mb(highend_pfn - highstart_pfn)); | 712 | pages_to_mb(highend_pfn - highstart_pfn)); |
768 | num_physpages = highend_pfn; | 713 | num_physpages = highend_pfn; |
769 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; | 714 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; |
770 | #else | 715 | #else |
771 | memory_present(0, 0, max_low_pfn); | ||
772 | e820_register_active_regions(0, 0, max_low_pfn); | 716 | e820_register_active_regions(0, 0, max_low_pfn); |
717 | sparse_memory_present_with_active_regions(0); | ||
773 | num_physpages = max_low_pfn; | 718 | num_physpages = max_low_pfn; |
774 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; | 719 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; |
775 | #endif | 720 | #endif |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1753e8020df6..52bb9519bb86 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -50,18 +50,8 @@ | |||
50 | #include <asm/cacheflush.h> | 50 | #include <asm/cacheflush.h> |
51 | #include <asm/init.h> | 51 | #include <asm/init.h> |
52 | 52 | ||
53 | /* | ||
54 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | ||
55 | * The direct mapping extends to max_pfn_mapped, so that we can directly access | ||
56 | * apertures, ACPI and other tables without having to play with fixmaps. | ||
57 | */ | ||
58 | unsigned long max_low_pfn_mapped; | ||
59 | unsigned long max_pfn_mapped; | ||
60 | |||
61 | static unsigned long dma_reserve __initdata; | 53 | static unsigned long dma_reserve __initdata; |
62 | 54 | ||
63 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
64 | |||
65 | static int __init parse_direct_gbpages_off(char *arg) | 55 | static int __init parse_direct_gbpages_off(char *arg) |
66 | { | 56 | { |
67 | direct_gbpages = 0; | 57 | direct_gbpages = 0; |
@@ -85,39 +75,6 @@ early_param("gbpages", parse_direct_gbpages_on); | |||
85 | pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; | 75 | pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; |
86 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | 76 | EXPORT_SYMBOL_GPL(__supported_pte_mask); |
87 | 77 | ||
88 | static int disable_nx __cpuinitdata; | ||
89 | |||
90 | /* | ||
91 | * noexec=on|off | ||
92 | * Control non-executable mappings for 64-bit processes. | ||
93 | * | ||
94 | * on Enable (default) | ||
95 | * off Disable | ||
96 | */ | ||
97 | static int __init nonx_setup(char *str) | ||
98 | { | ||
99 | if (!str) | ||
100 | return -EINVAL; | ||
101 | if (!strncmp(str, "on", 2)) { | ||
102 | __supported_pte_mask |= _PAGE_NX; | ||
103 | disable_nx = 0; | ||
104 | } else if (!strncmp(str, "off", 3)) { | ||
105 | disable_nx = 1; | ||
106 | __supported_pte_mask &= ~_PAGE_NX; | ||
107 | } | ||
108 | return 0; | ||
109 | } | ||
110 | early_param("noexec", nonx_setup); | ||
111 | |||
112 | void __cpuinit check_efer(void) | ||
113 | { | ||
114 | unsigned long efer; | ||
115 | |||
116 | rdmsrl(MSR_EFER, efer); | ||
117 | if (!(efer & EFER_NX) || disable_nx) | ||
118 | __supported_pte_mask &= ~_PAGE_NX; | ||
119 | } | ||
120 | |||
121 | int force_personality32; | 78 | int force_personality32; |
122 | 79 | ||
123 | /* | 80 | /* |
@@ -628,6 +585,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
628 | early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); | 585 | early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); |
629 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); | 586 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); |
630 | } | 587 | } |
588 | #endif | ||
631 | 589 | ||
632 | void __init paging_init(void) | 590 | void __init paging_init(void) |
633 | { | 591 | { |
@@ -638,11 +596,10 @@ void __init paging_init(void) | |||
638 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; | 596 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; |
639 | max_zone_pfns[ZONE_NORMAL] = max_pfn; | 597 | max_zone_pfns[ZONE_NORMAL] = max_pfn; |
640 | 598 | ||
641 | memory_present(0, 0, max_pfn); | 599 | sparse_memory_present_with_active_regions(MAX_NUMNODES); |
642 | sparse_init(); | 600 | sparse_init(); |
643 | free_area_init_nodes(max_zone_pfns); | 601 | free_area_init_nodes(max_zone_pfns); |
644 | } | 602 | } |
645 | #endif | ||
646 | 603 | ||
647 | /* | 604 | /* |
648 | * Memory hotplug specific functions | 605 | * Memory hotplug specific functions |
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 8056545e2d39..fe6f84ca121e 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c | |||
@@ -82,7 +82,6 @@ iounmap_atomic(void *kvaddr, enum km_type type) | |||
82 | if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) | 82 | if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) |
83 | kpte_clear_flush(kmap_pte-idx, vaddr); | 83 | kpte_clear_flush(kmap_pte-idx, vaddr); |
84 | 84 | ||
85 | arch_flush_lazy_mmu_mode(); | ||
86 | pagefault_enable(); | 85 | pagefault_enable(); |
87 | } | 86 | } |
88 | EXPORT_SYMBOL_GPL(iounmap_atomic); | 87 | EXPORT_SYMBOL_GPL(iounmap_atomic); |
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 50dc802a1c46..16ccbd77917f 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c | |||
@@ -32,7 +32,7 @@ struct kmmio_fault_page { | |||
32 | struct list_head list; | 32 | struct list_head list; |
33 | struct kmmio_fault_page *release_next; | 33 | struct kmmio_fault_page *release_next; |
34 | unsigned long page; /* location of the fault page */ | 34 | unsigned long page; /* location of the fault page */ |
35 | bool old_presence; /* page presence prior to arming */ | 35 | pteval_t old_presence; /* page presence prior to arming */ |
36 | bool armed; | 36 | bool armed; |
37 | 37 | ||
38 | /* | 38 | /* |
@@ -97,60 +97,62 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr) | |||
97 | static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) | 97 | static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) |
98 | { | 98 | { |
99 | struct list_head *head; | 99 | struct list_head *head; |
100 | struct kmmio_fault_page *p; | 100 | struct kmmio_fault_page *f; |
101 | 101 | ||
102 | page &= PAGE_MASK; | 102 | page &= PAGE_MASK; |
103 | head = kmmio_page_list(page); | 103 | head = kmmio_page_list(page); |
104 | list_for_each_entry_rcu(p, head, list) { | 104 | list_for_each_entry_rcu(f, head, list) { |
105 | if (p->page == page) | 105 | if (f->page == page) |
106 | return p; | 106 | return f; |
107 | } | 107 | } |
108 | return NULL; | 108 | return NULL; |
109 | } | 109 | } |
110 | 110 | ||
111 | static void set_pmd_presence(pmd_t *pmd, bool present, bool *old) | 111 | static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old) |
112 | { | 112 | { |
113 | pmdval_t v = pmd_val(*pmd); | 113 | pmdval_t v = pmd_val(*pmd); |
114 | *old = !!(v & _PAGE_PRESENT); | 114 | if (clear) { |
115 | v &= ~_PAGE_PRESENT; | 115 | *old = v & _PAGE_PRESENT; |
116 | if (present) | 116 | v &= ~_PAGE_PRESENT; |
117 | v |= _PAGE_PRESENT; | 117 | } else /* presume this has been called with clear==true previously */ |
118 | v |= *old; | ||
118 | set_pmd(pmd, __pmd(v)); | 119 | set_pmd(pmd, __pmd(v)); |
119 | } | 120 | } |
120 | 121 | ||
121 | static void set_pte_presence(pte_t *pte, bool present, bool *old) | 122 | static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old) |
122 | { | 123 | { |
123 | pteval_t v = pte_val(*pte); | 124 | pteval_t v = pte_val(*pte); |
124 | *old = !!(v & _PAGE_PRESENT); | 125 | if (clear) { |
125 | v &= ~_PAGE_PRESENT; | 126 | *old = v & _PAGE_PRESENT; |
126 | if (present) | 127 | v &= ~_PAGE_PRESENT; |
127 | v |= _PAGE_PRESENT; | 128 | } else /* presume this has been called with clear==true previously */ |
129 | v |= *old; | ||
128 | set_pte_atomic(pte, __pte(v)); | 130 | set_pte_atomic(pte, __pte(v)); |
129 | } | 131 | } |
130 | 132 | ||
131 | static int set_page_presence(unsigned long addr, bool present, bool *old) | 133 | static int clear_page_presence(struct kmmio_fault_page *f, bool clear) |
132 | { | 134 | { |
133 | unsigned int level; | 135 | unsigned int level; |
134 | pte_t *pte = lookup_address(addr, &level); | 136 | pte_t *pte = lookup_address(f->page, &level); |
135 | 137 | ||
136 | if (!pte) { | 138 | if (!pte) { |
137 | pr_err("kmmio: no pte for page 0x%08lx\n", addr); | 139 | pr_err("kmmio: no pte for page 0x%08lx\n", f->page); |
138 | return -1; | 140 | return -1; |
139 | } | 141 | } |
140 | 142 | ||
141 | switch (level) { | 143 | switch (level) { |
142 | case PG_LEVEL_2M: | 144 | case PG_LEVEL_2M: |
143 | set_pmd_presence((pmd_t *)pte, present, old); | 145 | clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence); |
144 | break; | 146 | break; |
145 | case PG_LEVEL_4K: | 147 | case PG_LEVEL_4K: |
146 | set_pte_presence(pte, present, old); | 148 | clear_pte_presence(pte, clear, &f->old_presence); |
147 | break; | 149 | break; |
148 | default: | 150 | default: |
149 | pr_err("kmmio: unexpected page level 0x%x.\n", level); | 151 | pr_err("kmmio: unexpected page level 0x%x.\n", level); |
150 | return -1; | 152 | return -1; |
151 | } | 153 | } |
152 | 154 | ||
153 | __flush_tlb_one(addr); | 155 | __flush_tlb_one(f->page); |
154 | return 0; | 156 | return 0; |
155 | } | 157 | } |
156 | 158 | ||
@@ -171,9 +173,9 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f) | |||
171 | WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); | 173 | WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); |
172 | if (f->armed) { | 174 | if (f->armed) { |
173 | pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", | 175 | pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", |
174 | f->page, f->count, f->old_presence); | 176 | f->page, f->count, !!f->old_presence); |
175 | } | 177 | } |
176 | ret = set_page_presence(f->page, false, &f->old_presence); | 178 | ret = clear_page_presence(f, true); |
177 | WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); | 179 | WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); |
178 | f->armed = true; | 180 | f->armed = true; |
179 | return ret; | 181 | return ret; |
@@ -182,8 +184,7 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f) | |||
182 | /** Restore the given page to saved presence state. */ | 184 | /** Restore the given page to saved presence state. */ |
183 | static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) | 185 | static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) |
184 | { | 186 | { |
185 | bool tmp; | 187 | int ret = clear_page_presence(f, false); |
186 | int ret = set_page_presence(f->page, f->old_presence, &tmp); | ||
187 | WARN_ONCE(ret < 0, | 188 | WARN_ONCE(ret < 0, |
188 | KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); | 189 | KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); |
189 | f->armed = false; | 190 | f->armed = false; |
@@ -310,7 +311,12 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) | |||
310 | struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); | 311 | struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); |
311 | 312 | ||
312 | if (!ctx->active) { | 313 | if (!ctx->active) { |
313 | pr_debug("kmmio: spurious debug trap on CPU %d.\n", | 314 | /* |
315 | * debug traps without an active context are due to either | ||
316 | * something external causing them (f.e. using a debugger while | ||
317 | * mmio tracing enabled), or erroneous behaviour | ||
318 | */ | ||
319 | pr_warning("kmmio: unexpected debug trap on CPU %d.\n", | ||
314 | smp_processor_id()); | 320 | smp_processor_id()); |
315 | goto out; | 321 | goto out; |
316 | } | 322 | } |
@@ -439,12 +445,12 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head) | |||
439 | head, | 445 | head, |
440 | struct kmmio_delayed_release, | 446 | struct kmmio_delayed_release, |
441 | rcu); | 447 | rcu); |
442 | struct kmmio_fault_page *p = dr->release_list; | 448 | struct kmmio_fault_page *f = dr->release_list; |
443 | while (p) { | 449 | while (f) { |
444 | struct kmmio_fault_page *next = p->release_next; | 450 | struct kmmio_fault_page *next = f->release_next; |
445 | BUG_ON(p->count); | 451 | BUG_ON(f->count); |
446 | kfree(p); | 452 | kfree(f); |
447 | p = next; | 453 | f = next; |
448 | } | 454 | } |
449 | kfree(dr); | 455 | kfree(dr); |
450 | } | 456 | } |
@@ -453,19 +459,19 @@ static void remove_kmmio_fault_pages(struct rcu_head *head) | |||
453 | { | 459 | { |
454 | struct kmmio_delayed_release *dr = | 460 | struct kmmio_delayed_release *dr = |
455 | container_of(head, struct kmmio_delayed_release, rcu); | 461 | container_of(head, struct kmmio_delayed_release, rcu); |
456 | struct kmmio_fault_page *p = dr->release_list; | 462 | struct kmmio_fault_page *f = dr->release_list; |
457 | struct kmmio_fault_page **prevp = &dr->release_list; | 463 | struct kmmio_fault_page **prevp = &dr->release_list; |
458 | unsigned long flags; | 464 | unsigned long flags; |
459 | 465 | ||
460 | spin_lock_irqsave(&kmmio_lock, flags); | 466 | spin_lock_irqsave(&kmmio_lock, flags); |
461 | while (p) { | 467 | while (f) { |
462 | if (!p->count) { | 468 | if (!f->count) { |
463 | list_del_rcu(&p->list); | 469 | list_del_rcu(&f->list); |
464 | prevp = &p->release_next; | 470 | prevp = &f->release_next; |
465 | } else { | 471 | } else { |
466 | *prevp = p->release_next; | 472 | *prevp = f->release_next; |
467 | } | 473 | } |
468 | p = p->release_next; | 474 | f = f->release_next; |
469 | } | 475 | } |
470 | spin_unlock_irqrestore(&kmmio_lock, flags); | 476 | spin_unlock_irqrestore(&kmmio_lock, flags); |
471 | 477 | ||
@@ -528,8 +534,8 @@ void unregister_kmmio_probe(struct kmmio_probe *p) | |||
528 | } | 534 | } |
529 | EXPORT_SYMBOL(unregister_kmmio_probe); | 535 | EXPORT_SYMBOL(unregister_kmmio_probe); |
530 | 536 | ||
531 | static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, | 537 | static int |
532 | void *args) | 538 | kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) |
533 | { | 539 | { |
534 | struct die_args *arg = args; | 540 | struct die_args *arg = args; |
535 | 541 | ||
@@ -544,11 +550,23 @@ static struct notifier_block nb_die = { | |||
544 | .notifier_call = kmmio_die_notifier | 550 | .notifier_call = kmmio_die_notifier |
545 | }; | 551 | }; |
546 | 552 | ||
547 | static int __init init_kmmio(void) | 553 | int kmmio_init(void) |
548 | { | 554 | { |
549 | int i; | 555 | int i; |
556 | |||
550 | for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) | 557 | for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) |
551 | INIT_LIST_HEAD(&kmmio_page_table[i]); | 558 | INIT_LIST_HEAD(&kmmio_page_table[i]); |
559 | |||
552 | return register_die_notifier(&nb_die); | 560 | return register_die_notifier(&nb_die); |
553 | } | 561 | } |
554 | fs_initcall(init_kmmio); /* should be before device_initcall() */ | 562 | |
563 | void kmmio_cleanup(void) | ||
564 | { | ||
565 | int i; | ||
566 | |||
567 | unregister_die_notifier(&nb_die); | ||
568 | for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) { | ||
569 | WARN_ONCE(!list_empty(&kmmio_page_table[i]), | ||
570 | KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n"); | ||
571 | } | ||
572 | } | ||
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 605c8be06217..c0bedcd10f97 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c | |||
@@ -40,23 +40,23 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) | |||
40 | 40 | ||
41 | static void __init memtest(u64 pattern, u64 start_phys, u64 size) | 41 | static void __init memtest(u64 pattern, u64 start_phys, u64 size) |
42 | { | 42 | { |
43 | u64 i, count; | 43 | u64 *p; |
44 | u64 *start; | 44 | void *start, *end; |
45 | u64 start_bad, last_bad; | 45 | u64 start_bad, last_bad; |
46 | u64 start_phys_aligned; | 46 | u64 start_phys_aligned; |
47 | size_t incr; | 47 | size_t incr; |
48 | 48 | ||
49 | incr = sizeof(pattern); | 49 | incr = sizeof(pattern); |
50 | start_phys_aligned = ALIGN(start_phys, incr); | 50 | start_phys_aligned = ALIGN(start_phys, incr); |
51 | count = (size - (start_phys_aligned - start_phys))/incr; | ||
52 | start = __va(start_phys_aligned); | 51 | start = __va(start_phys_aligned); |
52 | end = start + size - (start_phys_aligned - start_phys); | ||
53 | start_bad = 0; | 53 | start_bad = 0; |
54 | last_bad = 0; | 54 | last_bad = 0; |
55 | 55 | ||
56 | for (i = 0; i < count; i++) | 56 | for (p = start; p < end; p++) |
57 | start[i] = pattern; | 57 | *p = pattern; |
58 | for (i = 0; i < count; i++, start++, start_phys_aligned += incr) { | 58 | for (p = start; p < end; p++, start_phys_aligned += incr) { |
59 | if (*start == pattern) | 59 | if (*p == pattern) |
60 | continue; | 60 | continue; |
61 | if (start_phys_aligned == last_bad + incr) { | 61 | if (start_phys_aligned == last_bad + incr) { |
62 | last_bad += incr; | 62 | last_bad += incr; |
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index c9342ed8b402..132772a8ec57 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c | |||
@@ -451,6 +451,7 @@ void enable_mmiotrace(void) | |||
451 | 451 | ||
452 | if (nommiotrace) | 452 | if (nommiotrace) |
453 | pr_info(NAME "MMIO tracing disabled.\n"); | 453 | pr_info(NAME "MMIO tracing disabled.\n"); |
454 | kmmio_init(); | ||
454 | enter_uniprocessor(); | 455 | enter_uniprocessor(); |
455 | spin_lock_irq(&trace_lock); | 456 | spin_lock_irq(&trace_lock); |
456 | atomic_inc(&mmiotrace_enabled); | 457 | atomic_inc(&mmiotrace_enabled); |
@@ -473,6 +474,7 @@ void disable_mmiotrace(void) | |||
473 | 474 | ||
474 | clear_trace_list(); /* guarantees: no more kmmio callbacks */ | 475 | clear_trace_list(); /* guarantees: no more kmmio callbacks */ |
475 | leave_uniprocessor(); | 476 | leave_uniprocessor(); |
477 | kmmio_cleanup(); | ||
476 | pr_info(NAME "disabled.\n"); | 478 | pr_info(NAME "disabled.\n"); |
477 | out: | 479 | out: |
478 | mutex_unlock(&mmiotrace_mutex); | 480 | mutex_unlock(&mmiotrace_mutex); |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 2d05a12029dc..459913beac71 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -179,18 +179,25 @@ static void * __init early_node_mem(int nodeid, unsigned long start, | |||
179 | } | 179 | } |
180 | 180 | ||
181 | /* Initialize bootmem allocator for a node */ | 181 | /* Initialize bootmem allocator for a node */ |
182 | void __init setup_node_bootmem(int nodeid, unsigned long start, | 182 | void __init |
183 | unsigned long end) | 183 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) |
184 | { | 184 | { |
185 | unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; | 185 | unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; |
186 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | ||
186 | unsigned long bootmap_start, nodedata_phys; | 187 | unsigned long bootmap_start, nodedata_phys; |
187 | void *bootmap; | 188 | void *bootmap; |
188 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | ||
189 | int nid; | 189 | int nid; |
190 | 190 | ||
191 | if (!end) | 191 | if (!end) |
192 | return; | 192 | return; |
193 | 193 | ||
194 | /* | ||
195 | * Don't confuse VM with a node that doesn't have the | ||
196 | * minimum amount of memory: | ||
197 | */ | ||
198 | if (end && (end - start) < NODE_MIN_SIZE) | ||
199 | return; | ||
200 | |||
194 | start = roundup(start, ZONE_ALIGN); | 201 | start = roundup(start, ZONE_ALIGN); |
195 | 202 | ||
196 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, | 203 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, |
@@ -272,9 +279,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
272 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, | 279 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, |
273 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); | 280 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); |
274 | 281 | ||
275 | #ifdef CONFIG_ACPI_NUMA | ||
276 | srat_reserve_add_area(nodeid); | ||
277 | #endif | ||
278 | node_set_online(nodeid); | 282 | node_set_online(nodeid); |
279 | } | 283 | } |
280 | 284 | ||
@@ -578,21 +582,6 @@ unsigned long __init numa_free_all_bootmem(void) | |||
578 | return pages; | 582 | return pages; |
579 | } | 583 | } |
580 | 584 | ||
581 | void __init paging_init(void) | ||
582 | { | ||
583 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | ||
584 | |||
585 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | ||
586 | max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; | ||
587 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; | ||
588 | max_zone_pfns[ZONE_NORMAL] = max_pfn; | ||
589 | |||
590 | sparse_memory_present_with_active_regions(MAX_NUMNODES); | ||
591 | sparse_init(); | ||
592 | |||
593 | free_area_init_nodes(max_zone_pfns); | ||
594 | } | ||
595 | |||
596 | static __init int numa_setup(char *opt) | 585 | static __init int numa_setup(char *opt) |
597 | { | 586 | { |
598 | if (!opt) | 587 | if (!opt) |
@@ -606,8 +595,6 @@ static __init int numa_setup(char *opt) | |||
606 | #ifdef CONFIG_ACPI_NUMA | 595 | #ifdef CONFIG_ACPI_NUMA |
607 | if (!strncmp(opt, "noacpi", 6)) | 596 | if (!strncmp(opt, "noacpi", 6)) |
608 | acpi_numa = -1; | 597 | acpi_numa = -1; |
609 | if (!strncmp(opt, "hotadd=", 7)) | ||
610 | hotadd_percent = simple_strtoul(opt+7, NULL, 10); | ||
611 | #endif | 598 | #endif |
612 | return 0; | 599 | return 0; |
613 | } | 600 | } |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e17efed088c5..6ce9518fe2ac 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -839,13 +839,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
839 | 839 | ||
840 | vm_unmap_aliases(); | 840 | vm_unmap_aliases(); |
841 | 841 | ||
842 | /* | ||
843 | * If we're called with lazy mmu updates enabled, the | ||
844 | * in-memory pte state may be stale. Flush pending updates to | ||
845 | * bring them up to date. | ||
846 | */ | ||
847 | arch_flush_lazy_mmu_mode(); | ||
848 | |||
849 | cpa.vaddr = addr; | 842 | cpa.vaddr = addr; |
850 | cpa.pages = pages; | 843 | cpa.pages = pages; |
851 | cpa.numpages = numpages; | 844 | cpa.numpages = numpages; |
@@ -890,13 +883,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
890 | } else | 883 | } else |
891 | cpa_flush_all(cache); | 884 | cpa_flush_all(cache); |
892 | 885 | ||
893 | /* | ||
894 | * If we've been called with lazy mmu updates enabled, then | ||
895 | * make sure that everything gets flushed out before we | ||
896 | * return. | ||
897 | */ | ||
898 | arch_flush_lazy_mmu_mode(); | ||
899 | |||
900 | out: | 886 | out: |
901 | return ret; | 887 | return ret; |
902 | } | 888 | } |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 01765955baaf..2dfcbf9df2ae 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -31,17 +31,11 @@ static nodemask_t nodes_parsed __initdata; | |||
31 | static nodemask_t cpu_nodes_parsed __initdata; | 31 | static nodemask_t cpu_nodes_parsed __initdata; |
32 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | 32 | static struct bootnode nodes[MAX_NUMNODES] __initdata; |
33 | static struct bootnode nodes_add[MAX_NUMNODES]; | 33 | static struct bootnode nodes_add[MAX_NUMNODES]; |
34 | static int found_add_area __initdata; | ||
35 | int hotadd_percent __initdata = 0; | ||
36 | 34 | ||
37 | static int num_node_memblks __initdata; | 35 | static int num_node_memblks __initdata; |
38 | static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; | 36 | static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; |
39 | static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; | 37 | static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; |
40 | 38 | ||
41 | /* Too small nodes confuse the VM badly. Usually they result | ||
42 | from BIOS bugs. */ | ||
43 | #define NODE_MIN_SIZE (4*1024*1024) | ||
44 | |||
45 | static __init int setup_node(int pxm) | 39 | static __init int setup_node(int pxm) |
46 | { | 40 | { |
47 | return acpi_map_pxm_to_node(pxm); | 41 | return acpi_map_pxm_to_node(pxm); |
@@ -66,9 +60,6 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end) | |||
66 | { | 60 | { |
67 | struct bootnode *nd = &nodes[i]; | 61 | struct bootnode *nd = &nodes[i]; |
68 | 62 | ||
69 | if (found_add_area) | ||
70 | return; | ||
71 | |||
72 | if (nd->start < start) { | 63 | if (nd->start < start) { |
73 | nd->start = start; | 64 | nd->start = start; |
74 | if (nd->end < nd->start) | 65 | if (nd->end < nd->start) |
@@ -86,7 +77,6 @@ static __init void bad_srat(void) | |||
86 | int i; | 77 | int i; |
87 | printk(KERN_ERR "SRAT: SRAT not used.\n"); | 78 | printk(KERN_ERR "SRAT: SRAT not used.\n"); |
88 | acpi_numa = -1; | 79 | acpi_numa = -1; |
89 | found_add_area = 0; | ||
90 | for (i = 0; i < MAX_LOCAL_APIC; i++) | 80 | for (i = 0; i < MAX_LOCAL_APIC; i++) |
91 | apicid_to_node[i] = NUMA_NO_NODE; | 81 | apicid_to_node[i] = NUMA_NO_NODE; |
92 | for (i = 0; i < MAX_NUMNODES; i++) | 82 | for (i = 0; i < MAX_NUMNODES; i++) |
@@ -182,24 +172,21 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | |||
182 | pxm, apic_id, node); | 172 | pxm, apic_id, node); |
183 | } | 173 | } |
184 | 174 | ||
185 | static int update_end_of_memory(unsigned long end) {return -1;} | ||
186 | static int hotadd_enough_memory(struct bootnode *nd) {return 1;} | ||
187 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE | 175 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE |
188 | static inline int save_add_info(void) {return 1;} | 176 | static inline int save_add_info(void) {return 1;} |
189 | #else | 177 | #else |
190 | static inline int save_add_info(void) {return 0;} | 178 | static inline int save_add_info(void) {return 0;} |
191 | #endif | 179 | #endif |
192 | /* | 180 | /* |
193 | * Update nodes_add and decide if to include add are in the zone. | 181 | * Update nodes_add[] |
194 | * Both SPARSE and RESERVE need nodes_add information. | 182 | * This code supports one contiguous hot add area per node |
195 | * This code supports one contiguous hot add area per node. | ||
196 | */ | 183 | */ |
197 | static int __init | 184 | static void __init |
198 | reserve_hotadd(int node, unsigned long start, unsigned long end) | 185 | update_nodes_add(int node, unsigned long start, unsigned long end) |
199 | { | 186 | { |
200 | unsigned long s_pfn = start >> PAGE_SHIFT; | 187 | unsigned long s_pfn = start >> PAGE_SHIFT; |
201 | unsigned long e_pfn = end >> PAGE_SHIFT; | 188 | unsigned long e_pfn = end >> PAGE_SHIFT; |
202 | int ret = 0, changed = 0; | 189 | int changed = 0; |
203 | struct bootnode *nd = &nodes_add[node]; | 190 | struct bootnode *nd = &nodes_add[node]; |
204 | 191 | ||
205 | /* I had some trouble with strange memory hotadd regions breaking | 192 | /* I had some trouble with strange memory hotadd regions breaking |
@@ -210,7 +197,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) | |||
210 | mistakes */ | 197 | mistakes */ |
211 | if ((signed long)(end - start) < NODE_MIN_SIZE) { | 198 | if ((signed long)(end - start) < NODE_MIN_SIZE) { |
212 | printk(KERN_ERR "SRAT: Hotplug area too small\n"); | 199 | printk(KERN_ERR "SRAT: Hotplug area too small\n"); |
213 | return -1; | 200 | return; |
214 | } | 201 | } |
215 | 202 | ||
216 | /* This check might be a bit too strict, but I'm keeping it for now. */ | 203 | /* This check might be a bit too strict, but I'm keeping it for now. */ |
@@ -218,12 +205,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) | |||
218 | printk(KERN_ERR | 205 | printk(KERN_ERR |
219 | "SRAT: Hotplug area %lu -> %lu has existing memory\n", | 206 | "SRAT: Hotplug area %lu -> %lu has existing memory\n", |
220 | s_pfn, e_pfn); | 207 | s_pfn, e_pfn); |
221 | return -1; | 208 | return; |
222 | } | ||
223 | |||
224 | if (!hotadd_enough_memory(&nodes_add[node])) { | ||
225 | printk(KERN_ERR "SRAT: Hotplug area too large\n"); | ||
226 | return -1; | ||
227 | } | 209 | } |
228 | 210 | ||
229 | /* Looks good */ | 211 | /* Looks good */ |
@@ -245,11 +227,9 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) | |||
245 | printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); | 227 | printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); |
246 | } | 228 | } |
247 | 229 | ||
248 | ret = update_end_of_memory(nd->end); | ||
249 | |||
250 | if (changed) | 230 | if (changed) |
251 | printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end); | 231 | printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", |
252 | return ret; | 232 | nd->start, nd->end); |
253 | } | 233 | } |
254 | 234 | ||
255 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ | 235 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ |
@@ -310,13 +290,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
310 | start, end); | 290 | start, end); |
311 | e820_register_active_regions(node, start >> PAGE_SHIFT, | 291 | e820_register_active_regions(node, start >> PAGE_SHIFT, |
312 | end >> PAGE_SHIFT); | 292 | end >> PAGE_SHIFT); |
313 | push_node_boundaries(node, nd->start >> PAGE_SHIFT, | ||
314 | nd->end >> PAGE_SHIFT); | ||
315 | 293 | ||
316 | if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && | 294 | if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { |
317 | (reserve_hotadd(node, start, end) < 0)) { | 295 | update_nodes_add(node, start, end); |
318 | /* Ignore hotadd region. Undo damage */ | 296 | /* restore nodes[node] */ |
319 | printk(KERN_NOTICE "SRAT: Hotplug region ignored\n"); | ||
320 | *nd = oldnode; | 297 | *nd = oldnode; |
321 | if ((nd->start | nd->end) == 0) | 298 | if ((nd->start | nd->end) == 0) |
322 | node_clear(node, nodes_parsed); | 299 | node_clear(node, nodes_parsed); |
@@ -345,9 +322,9 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
345 | pxmram = 0; | 322 | pxmram = 0; |
346 | } | 323 | } |
347 | 324 | ||
348 | e820ram = max_pfn - absent_pages_in_range(0, max_pfn); | 325 | e820ram = max_pfn - (e820_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT); |
349 | /* We seem to lose 3 pages somewhere. Allow a bit of slack. */ | 326 | /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ |
350 | if ((long)(e820ram - pxmram) >= 1*1024*1024) { | 327 | if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) { |
351 | printk(KERN_ERR | 328 | printk(KERN_ERR |
352 | "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n", | 329 | "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n", |
353 | (pxmram << PAGE_SHIFT) >> 20, | 330 | (pxmram << PAGE_SHIFT) >> 20, |
@@ -357,17 +334,6 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
357 | return 1; | 334 | return 1; |
358 | } | 335 | } |
359 | 336 | ||
360 | static void __init unparse_node(int node) | ||
361 | { | ||
362 | int i; | ||
363 | node_clear(node, nodes_parsed); | ||
364 | node_clear(node, cpu_nodes_parsed); | ||
365 | for (i = 0; i < MAX_LOCAL_APIC; i++) { | ||
366 | if (apicid_to_node[i] == node) | ||
367 | apicid_to_node[i] = NUMA_NO_NODE; | ||
368 | } | ||
369 | } | ||
370 | |||
371 | void __init acpi_numa_arch_fixup(void) {} | 337 | void __init acpi_numa_arch_fixup(void) {} |
372 | 338 | ||
373 | /* Use the information discovered above to actually set up the nodes. */ | 339 | /* Use the information discovered above to actually set up the nodes. */ |
@@ -379,18 +345,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
379 | return -1; | 345 | return -1; |
380 | 346 | ||
381 | /* First clean up the node list */ | 347 | /* First clean up the node list */ |
382 | for (i = 0; i < MAX_NUMNODES; i++) { | 348 | for (i = 0; i < MAX_NUMNODES; i++) |
383 | cutoff_node(i, start, end); | 349 | cutoff_node(i, start, end); |
384 | /* | ||
385 | * don't confuse VM with a node that doesn't have the | ||
386 | * minimum memory. | ||
387 | */ | ||
388 | if (nodes[i].end && | ||
389 | (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) { | ||
390 | unparse_node(i); | ||
391 | node_set_offline(i); | ||
392 | } | ||
393 | } | ||
394 | 350 | ||
395 | if (!nodes_cover_memory(nodes)) { | 351 | if (!nodes_cover_memory(nodes)) { |
396 | bad_srat(); | 352 | bad_srat(); |
@@ -423,7 +379,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
423 | 379 | ||
424 | if (node == NUMA_NO_NODE) | 380 | if (node == NUMA_NO_NODE) |
425 | continue; | 381 | continue; |
426 | if (!node_isset(node, node_possible_map)) | 382 | if (!node_online(node)) |
427 | numa_clear_node(i); | 383 | numa_clear_node(i); |
428 | } | 384 | } |
429 | numa_init_array(); | 385 | numa_init_array(); |
@@ -510,26 +466,6 @@ static int null_slit_node_compare(int a, int b) | |||
510 | } | 466 | } |
511 | #endif /* CONFIG_NUMA_EMU */ | 467 | #endif /* CONFIG_NUMA_EMU */ |
512 | 468 | ||
513 | void __init srat_reserve_add_area(int nodeid) | ||
514 | { | ||
515 | if (found_add_area && nodes_add[nodeid].end) { | ||
516 | u64 total_mb; | ||
517 | |||
518 | printk(KERN_INFO "SRAT: Reserving hot-add memory space " | ||
519 | "for node %d at %Lx-%Lx\n", | ||
520 | nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end); | ||
521 | total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start) | ||
522 | >> PAGE_SHIFT; | ||
523 | total_mb *= sizeof(struct page); | ||
524 | total_mb >>= 20; | ||
525 | printk(KERN_INFO "SRAT: This will cost you %Lu MB of " | ||
526 | "pre-allocated memory.\n", (unsigned long long)total_mb); | ||
527 | reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start, | ||
528 | nodes_add[nodeid].end - nodes_add[nodeid].start, | ||
529 | BOOTMEM_DEFAULT); | ||
530 | } | ||
531 | } | ||
532 | |||
533 | int __node_distance(int a, int b) | 469 | int __node_distance(int a, int b) |
534 | { | 470 | { |
535 | int index; | 471 | int index; |