diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-10 19:13:20 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-10 19:13:20 -0400 |
commit | bb7762961d3ce745688e9050e914c1d3f980268d (patch) | |
tree | e841f58cd6188cc44583cd055798b4475a4d68f0 /arch/x86/mm | |
parent | 48c72d1ab4ec86789a23aed0b0b5f31ac083c0c6 (diff) | |
parent | 35d5a9a61490bf39d2e48d7f499c8c801a39ebe9 (diff) |
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (22 commits)
x86: fix system without memory on node0
x86, mm: Fix node_possible_map logic
mm, x86: remove MEMORY_HOTPLUG_RESERVE related code
x86: make sparse mem work in non-NUMA mode
x86: process.c, remove useless headers
x86: merge process.c a bit
x86: use sparse_memory_present_with_active_regions() on UMA
x86: unify 64-bit UMA and NUMA paging_init()
x86: Allow 1MB of slack between the e820 map and SRAT, not 4GB
x86: Sanity check the e820 against the SRAT table using e820 map only
x86: clean up and and print out initial max_pfn_mapped
x86/pci: remove rounding quirk from e820_setup_gap()
x86, e820, pci: reserve extra free space near end of RAM
x86: fix typo in address space documentation
x86: 46 bit physical address support on 64 bits
x86, mm: fault.c, use printk_once() in is_errata93()
x86: move per-cpu mmu_gathers to mm/init.c
x86: move max_pfn_mapped and max_low_pfn_mapped to setup.c
x86: unify noexec handling
x86: remove (null) in /sys kernel_page_tables
...
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/dump_pagetables.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 51 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 77 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 61 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 47 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 33 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 98 |
7 files changed, 116 insertions, 258 deletions
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index e7277cbcfb40..a725b7f760ae 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c | |||
@@ -161,13 +161,14 @@ static void note_page(struct seq_file *m, struct pg_state *st, | |||
161 | st->current_address >= st->marker[1].start_address) { | 161 | st->current_address >= st->marker[1].start_address) { |
162 | const char *unit = units; | 162 | const char *unit = units; |
163 | unsigned long delta; | 163 | unsigned long delta; |
164 | int width = sizeof(unsigned long) * 2; | ||
164 | 165 | ||
165 | /* | 166 | /* |
166 | * Now print the actual finished series | 167 | * Now print the actual finished series |
167 | */ | 168 | */ |
168 | seq_printf(m, "0x%p-0x%p ", | 169 | seq_printf(m, "0x%0*lx-0x%0*lx ", |
169 | (void *)st->start_address, | 170 | width, st->start_address, |
170 | (void *)st->current_address); | 171 | width, st->current_address); |
171 | 172 | ||
172 | delta = (st->current_address - st->start_address) >> 10; | 173 | delta = (st->current_address - st->start_address) >> 10; |
173 | while (!(delta & 1023) && unit[1]) { | 174 | while (!(delta & 1023) && unit[1]) { |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a03b7279efa0..b9ca6d767dbb 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -3,40 +3,16 @@ | |||
3 | * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. | 3 | * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. |
4 | * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar | 4 | * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar |
5 | */ | 5 | */ |
6 | #include <linux/interrupt.h> | 6 | #include <linux/magic.h> /* STACK_END_MAGIC */ |
7 | #include <linux/mmiotrace.h> | 7 | #include <linux/sched.h> /* test_thread_flag(), ... */ |
8 | #include <linux/bootmem.h> | 8 | #include <linux/kdebug.h> /* oops_begin/end, ... */ |
9 | #include <linux/compiler.h> | 9 | #include <linux/module.h> /* search_exception_table */ |
10 | #include <linux/highmem.h> | 10 | #include <linux/bootmem.h> /* max_low_pfn */ |
11 | #include <linux/kprobes.h> | 11 | #include <linux/kprobes.h> /* __kprobes, ... */ |
12 | #include <linux/uaccess.h> | 12 | #include <linux/mmiotrace.h> /* kmmio_handler, ... */ |
13 | #include <linux/vmalloc.h> | 13 | |
14 | #include <linux/vt_kern.h> | 14 | #include <asm/traps.h> /* dotraplinkage, ... */ |
15 | #include <linux/signal.h> | 15 | #include <asm/pgalloc.h> /* pgd_*(), ... */ |
16 | #include <linux/kernel.h> | ||
17 | #include <linux/ptrace.h> | ||
18 | #include <linux/string.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/kdebug.h> | ||
21 | #include <linux/errno.h> | ||
22 | #include <linux/magic.h> | ||
23 | #include <linux/sched.h> | ||
24 | #include <linux/types.h> | ||
25 | #include <linux/init.h> | ||
26 | #include <linux/mman.h> | ||
27 | #include <linux/tty.h> | ||
28 | #include <linux/smp.h> | ||
29 | #include <linux/mm.h> | ||
30 | |||
31 | #include <asm-generic/sections.h> | ||
32 | |||
33 | #include <asm/tlbflush.h> | ||
34 | #include <asm/pgalloc.h> | ||
35 | #include <asm/segment.h> | ||
36 | #include <asm/system.h> | ||
37 | #include <asm/proto.h> | ||
38 | #include <asm/traps.h> | ||
39 | #include <asm/desc.h> | ||
40 | 16 | ||
41 | /* | 17 | /* |
42 | * Page fault error code bits: | 18 | * Page fault error code bits: |
@@ -538,8 +514,6 @@ bad: | |||
538 | static int is_errata93(struct pt_regs *regs, unsigned long address) | 514 | static int is_errata93(struct pt_regs *regs, unsigned long address) |
539 | { | 515 | { |
540 | #ifdef CONFIG_X86_64 | 516 | #ifdef CONFIG_X86_64 |
541 | static int once; | ||
542 | |||
543 | if (address != regs->ip) | 517 | if (address != regs->ip) |
544 | return 0; | 518 | return 0; |
545 | 519 | ||
@@ -549,10 +523,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address) | |||
549 | address |= 0xffffffffUL << 32; | 523 | address |= 0xffffffffUL << 32; |
550 | if ((address >= (u64)_stext && address <= (u64)_etext) || | 524 | if ((address >= (u64)_stext && address <= (u64)_etext) || |
551 | (address >= MODULES_VADDR && address <= MODULES_END)) { | 525 | (address >= MODULES_VADDR && address <= MODULES_END)) { |
552 | if (!once) { | 526 | printk_once(errata93_warning); |
553 | printk(errata93_warning); | ||
554 | once = 1; | ||
555 | } | ||
556 | regs->ip = address; | 527 | regs->ip = address; |
557 | return 1; | 528 | return 1; |
558 | } | 529 | } |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 4b98df0973b9..34c1bfb64f1c 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -11,6 +11,9 @@ | |||
11 | #include <asm/setup.h> | 11 | #include <asm/setup.h> |
12 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/tlbflush.h> | 13 | #include <asm/tlbflush.h> |
14 | #include <asm/tlb.h> | ||
15 | |||
16 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
14 | 17 | ||
15 | unsigned long __initdata e820_table_start; | 18 | unsigned long __initdata e820_table_start; |
16 | unsigned long __meminitdata e820_table_end; | 19 | unsigned long __meminitdata e820_table_end; |
@@ -24,6 +27,69 @@ int direct_gbpages | |||
24 | #endif | 27 | #endif |
25 | ; | 28 | ; |
26 | 29 | ||
30 | int nx_enabled; | ||
31 | |||
32 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | ||
33 | static int disable_nx __cpuinitdata; | ||
34 | |||
35 | /* | ||
36 | * noexec = on|off | ||
37 | * | ||
38 | * Control non-executable mappings for processes. | ||
39 | * | ||
40 | * on Enable | ||
41 | * off Disable | ||
42 | */ | ||
43 | static int __init noexec_setup(char *str) | ||
44 | { | ||
45 | if (!str) | ||
46 | return -EINVAL; | ||
47 | if (!strncmp(str, "on", 2)) { | ||
48 | __supported_pte_mask |= _PAGE_NX; | ||
49 | disable_nx = 0; | ||
50 | } else if (!strncmp(str, "off", 3)) { | ||
51 | disable_nx = 1; | ||
52 | __supported_pte_mask &= ~_PAGE_NX; | ||
53 | } | ||
54 | return 0; | ||
55 | } | ||
56 | early_param("noexec", noexec_setup); | ||
57 | #endif | ||
58 | |||
59 | #ifdef CONFIG_X86_PAE | ||
60 | static void __init set_nx(void) | ||
61 | { | ||
62 | unsigned int v[4], l, h; | ||
63 | |||
64 | if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { | ||
65 | cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); | ||
66 | |||
67 | if ((v[3] & (1 << 20)) && !disable_nx) { | ||
68 | rdmsr(MSR_EFER, l, h); | ||
69 | l |= EFER_NX; | ||
70 | wrmsr(MSR_EFER, l, h); | ||
71 | nx_enabled = 1; | ||
72 | __supported_pte_mask |= _PAGE_NX; | ||
73 | } | ||
74 | } | ||
75 | } | ||
76 | #else | ||
77 | static inline void set_nx(void) | ||
78 | { | ||
79 | } | ||
80 | #endif | ||
81 | |||
82 | #ifdef CONFIG_X86_64 | ||
83 | void __cpuinit check_efer(void) | ||
84 | { | ||
85 | unsigned long efer; | ||
86 | |||
87 | rdmsrl(MSR_EFER, efer); | ||
88 | if (!(efer & EFER_NX) || disable_nx) | ||
89 | __supported_pte_mask &= ~_PAGE_NX; | ||
90 | } | ||
91 | #endif | ||
92 | |||
27 | static void __init find_early_table_space(unsigned long end, int use_pse, | 93 | static void __init find_early_table_space(unsigned long end, int use_pse, |
28 | int use_gbpages) | 94 | int use_gbpages) |
29 | { | 95 | { |
@@ -67,12 +133,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse, | |||
67 | */ | 133 | */ |
68 | #ifdef CONFIG_X86_32 | 134 | #ifdef CONFIG_X86_32 |
69 | start = 0x7000; | 135 | start = 0x7000; |
70 | e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, | 136 | #else |
71 | tables, PAGE_SIZE); | ||
72 | #else /* CONFIG_X86_64 */ | ||
73 | start = 0x8000; | 137 | start = 0x8000; |
74 | e820_table_start = find_e820_area(start, end, tables, PAGE_SIZE); | ||
75 | #endif | 138 | #endif |
139 | e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, | ||
140 | tables, PAGE_SIZE); | ||
76 | if (e820_table_start == -1UL) | 141 | if (e820_table_start == -1UL) |
77 | panic("Cannot find space for the kernel page tables"); | 142 | panic("Cannot find space for the kernel page tables"); |
78 | 143 | ||
@@ -160,12 +225,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
160 | use_gbpages = direct_gbpages; | 225 | use_gbpages = direct_gbpages; |
161 | #endif | 226 | #endif |
162 | 227 | ||
163 | #ifdef CONFIG_X86_32 | ||
164 | #ifdef CONFIG_X86_PAE | ||
165 | set_nx(); | 228 | set_nx(); |
166 | if (nx_enabled) | 229 | if (nx_enabled) |
167 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); | 230 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); |
168 | #endif | ||
169 | 231 | ||
170 | /* Enable PSE if available */ | 232 | /* Enable PSE if available */ |
171 | if (cpu_has_pse) | 233 | if (cpu_has_pse) |
@@ -176,7 +238,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
176 | set_in_cr4(X86_CR4_PGE); | 238 | set_in_cr4(X86_CR4_PGE); |
177 | __supported_pte_mask |= _PAGE_GLOBAL; | 239 | __supported_pte_mask |= _PAGE_GLOBAL; |
178 | } | 240 | } |
179 | #endif | ||
180 | 241 | ||
181 | if (use_gbpages) | 242 | if (use_gbpages) |
182 | page_size_mask |= 1 << PG_LEVEL_1G; | 243 | page_size_mask |= 1 << PG_LEVEL_1G; |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 749559ed80f5..949708d7a481 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -49,12 +49,9 @@ | |||
49 | #include <asm/paravirt.h> | 49 | #include <asm/paravirt.h> |
50 | #include <asm/setup.h> | 50 | #include <asm/setup.h> |
51 | #include <asm/cacheflush.h> | 51 | #include <asm/cacheflush.h> |
52 | #include <asm/page_types.h> | ||
52 | #include <asm/init.h> | 53 | #include <asm/init.h> |
53 | 54 | ||
54 | unsigned long max_low_pfn_mapped; | ||
55 | unsigned long max_pfn_mapped; | ||
56 | |||
57 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
58 | unsigned long highstart_pfn, highend_pfn; | 55 | unsigned long highstart_pfn, highend_pfn; |
59 | 56 | ||
60 | static noinline int do_test_wp_bit(void); | 57 | static noinline int do_test_wp_bit(void); |
@@ -587,61 +584,9 @@ void zap_low_mappings(void) | |||
587 | flush_tlb_all(); | 584 | flush_tlb_all(); |
588 | } | 585 | } |
589 | 586 | ||
590 | int nx_enabled; | ||
591 | |||
592 | pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); | 587 | pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); |
593 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | 588 | EXPORT_SYMBOL_GPL(__supported_pte_mask); |
594 | 589 | ||
595 | #ifdef CONFIG_X86_PAE | ||
596 | |||
597 | static int disable_nx __initdata; | ||
598 | |||
599 | /* | ||
600 | * noexec = on|off | ||
601 | * | ||
602 | * Control non executable mappings. | ||
603 | * | ||
604 | * on Enable | ||
605 | * off Disable | ||
606 | */ | ||
607 | static int __init noexec_setup(char *str) | ||
608 | { | ||
609 | if (!str || !strcmp(str, "on")) { | ||
610 | if (cpu_has_nx) { | ||
611 | __supported_pte_mask |= _PAGE_NX; | ||
612 | disable_nx = 0; | ||
613 | } | ||
614 | } else { | ||
615 | if (!strcmp(str, "off")) { | ||
616 | disable_nx = 1; | ||
617 | __supported_pte_mask &= ~_PAGE_NX; | ||
618 | } else { | ||
619 | return -EINVAL; | ||
620 | } | ||
621 | } | ||
622 | |||
623 | return 0; | ||
624 | } | ||
625 | early_param("noexec", noexec_setup); | ||
626 | |||
627 | void __init set_nx(void) | ||
628 | { | ||
629 | unsigned int v[4], l, h; | ||
630 | |||
631 | if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { | ||
632 | cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); | ||
633 | |||
634 | if ((v[3] & (1 << 20)) && !disable_nx) { | ||
635 | rdmsr(MSR_EFER, l, h); | ||
636 | l |= EFER_NX; | ||
637 | wrmsr(MSR_EFER, l, h); | ||
638 | nx_enabled = 1; | ||
639 | __supported_pte_mask |= _PAGE_NX; | ||
640 | } | ||
641 | } | ||
642 | } | ||
643 | #endif | ||
644 | |||
645 | /* user-defined highmem size */ | 590 | /* user-defined highmem size */ |
646 | static unsigned int highmem_pages = -1; | 591 | static unsigned int highmem_pages = -1; |
647 | 592 | ||
@@ -761,15 +706,15 @@ void __init initmem_init(unsigned long start_pfn, | |||
761 | highstart_pfn = highend_pfn = max_pfn; | 706 | highstart_pfn = highend_pfn = max_pfn; |
762 | if (max_pfn > max_low_pfn) | 707 | if (max_pfn > max_low_pfn) |
763 | highstart_pfn = max_low_pfn; | 708 | highstart_pfn = max_low_pfn; |
764 | memory_present(0, 0, highend_pfn); | ||
765 | e820_register_active_regions(0, 0, highend_pfn); | 709 | e820_register_active_regions(0, 0, highend_pfn); |
710 | sparse_memory_present_with_active_regions(0); | ||
766 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", | 711 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", |
767 | pages_to_mb(highend_pfn - highstart_pfn)); | 712 | pages_to_mb(highend_pfn - highstart_pfn)); |
768 | num_physpages = highend_pfn; | 713 | num_physpages = highend_pfn; |
769 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; | 714 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; |
770 | #else | 715 | #else |
771 | memory_present(0, 0, max_low_pfn); | ||
772 | e820_register_active_regions(0, 0, max_low_pfn); | 716 | e820_register_active_regions(0, 0, max_low_pfn); |
717 | sparse_memory_present_with_active_regions(0); | ||
773 | num_physpages = max_low_pfn; | 718 | num_physpages = max_low_pfn; |
774 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; | 719 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; |
775 | #endif | 720 | #endif |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1753e8020df6..52bb9519bb86 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -50,18 +50,8 @@ | |||
50 | #include <asm/cacheflush.h> | 50 | #include <asm/cacheflush.h> |
51 | #include <asm/init.h> | 51 | #include <asm/init.h> |
52 | 52 | ||
53 | /* | ||
54 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | ||
55 | * The direct mapping extends to max_pfn_mapped, so that we can directly access | ||
56 | * apertures, ACPI and other tables without having to play with fixmaps. | ||
57 | */ | ||
58 | unsigned long max_low_pfn_mapped; | ||
59 | unsigned long max_pfn_mapped; | ||
60 | |||
61 | static unsigned long dma_reserve __initdata; | 53 | static unsigned long dma_reserve __initdata; |
62 | 54 | ||
63 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
64 | |||
65 | static int __init parse_direct_gbpages_off(char *arg) | 55 | static int __init parse_direct_gbpages_off(char *arg) |
66 | { | 56 | { |
67 | direct_gbpages = 0; | 57 | direct_gbpages = 0; |
@@ -85,39 +75,6 @@ early_param("gbpages", parse_direct_gbpages_on); | |||
85 | pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; | 75 | pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; |
86 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | 76 | EXPORT_SYMBOL_GPL(__supported_pte_mask); |
87 | 77 | ||
88 | static int disable_nx __cpuinitdata; | ||
89 | |||
90 | /* | ||
91 | * noexec=on|off | ||
92 | * Control non-executable mappings for 64-bit processes. | ||
93 | * | ||
94 | * on Enable (default) | ||
95 | * off Disable | ||
96 | */ | ||
97 | static int __init nonx_setup(char *str) | ||
98 | { | ||
99 | if (!str) | ||
100 | return -EINVAL; | ||
101 | if (!strncmp(str, "on", 2)) { | ||
102 | __supported_pte_mask |= _PAGE_NX; | ||
103 | disable_nx = 0; | ||
104 | } else if (!strncmp(str, "off", 3)) { | ||
105 | disable_nx = 1; | ||
106 | __supported_pte_mask &= ~_PAGE_NX; | ||
107 | } | ||
108 | return 0; | ||
109 | } | ||
110 | early_param("noexec", nonx_setup); | ||
111 | |||
112 | void __cpuinit check_efer(void) | ||
113 | { | ||
114 | unsigned long efer; | ||
115 | |||
116 | rdmsrl(MSR_EFER, efer); | ||
117 | if (!(efer & EFER_NX) || disable_nx) | ||
118 | __supported_pte_mask &= ~_PAGE_NX; | ||
119 | } | ||
120 | |||
121 | int force_personality32; | 78 | int force_personality32; |
122 | 79 | ||
123 | /* | 80 | /* |
@@ -628,6 +585,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
628 | early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); | 585 | early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); |
629 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); | 586 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); |
630 | } | 587 | } |
588 | #endif | ||
631 | 589 | ||
632 | void __init paging_init(void) | 590 | void __init paging_init(void) |
633 | { | 591 | { |
@@ -638,11 +596,10 @@ void __init paging_init(void) | |||
638 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; | 596 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; |
639 | max_zone_pfns[ZONE_NORMAL] = max_pfn; | 597 | max_zone_pfns[ZONE_NORMAL] = max_pfn; |
640 | 598 | ||
641 | memory_present(0, 0, max_pfn); | 599 | sparse_memory_present_with_active_regions(MAX_NUMNODES); |
642 | sparse_init(); | 600 | sparse_init(); |
643 | free_area_init_nodes(max_zone_pfns); | 601 | free_area_init_nodes(max_zone_pfns); |
644 | } | 602 | } |
645 | #endif | ||
646 | 603 | ||
647 | /* | 604 | /* |
648 | * Memory hotplug specific functions | 605 | * Memory hotplug specific functions |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 2d05a12029dc..459913beac71 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -179,18 +179,25 @@ static void * __init early_node_mem(int nodeid, unsigned long start, | |||
179 | } | 179 | } |
180 | 180 | ||
181 | /* Initialize bootmem allocator for a node */ | 181 | /* Initialize bootmem allocator for a node */ |
182 | void __init setup_node_bootmem(int nodeid, unsigned long start, | 182 | void __init |
183 | unsigned long end) | 183 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) |
184 | { | 184 | { |
185 | unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; | 185 | unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; |
186 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | ||
186 | unsigned long bootmap_start, nodedata_phys; | 187 | unsigned long bootmap_start, nodedata_phys; |
187 | void *bootmap; | 188 | void *bootmap; |
188 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | ||
189 | int nid; | 189 | int nid; |
190 | 190 | ||
191 | if (!end) | 191 | if (!end) |
192 | return; | 192 | return; |
193 | 193 | ||
194 | /* | ||
195 | * Don't confuse VM with a node that doesn't have the | ||
196 | * minimum amount of memory: | ||
197 | */ | ||
198 | if (end && (end - start) < NODE_MIN_SIZE) | ||
199 | return; | ||
200 | |||
194 | start = roundup(start, ZONE_ALIGN); | 201 | start = roundup(start, ZONE_ALIGN); |
195 | 202 | ||
196 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, | 203 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, |
@@ -272,9 +279,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
272 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, | 279 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, |
273 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); | 280 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); |
274 | 281 | ||
275 | #ifdef CONFIG_ACPI_NUMA | ||
276 | srat_reserve_add_area(nodeid); | ||
277 | #endif | ||
278 | node_set_online(nodeid); | 282 | node_set_online(nodeid); |
279 | } | 283 | } |
280 | 284 | ||
@@ -578,21 +582,6 @@ unsigned long __init numa_free_all_bootmem(void) | |||
578 | return pages; | 582 | return pages; |
579 | } | 583 | } |
580 | 584 | ||
581 | void __init paging_init(void) | ||
582 | { | ||
583 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | ||
584 | |||
585 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | ||
586 | max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; | ||
587 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; | ||
588 | max_zone_pfns[ZONE_NORMAL] = max_pfn; | ||
589 | |||
590 | sparse_memory_present_with_active_regions(MAX_NUMNODES); | ||
591 | sparse_init(); | ||
592 | |||
593 | free_area_init_nodes(max_zone_pfns); | ||
594 | } | ||
595 | |||
596 | static __init int numa_setup(char *opt) | 585 | static __init int numa_setup(char *opt) |
597 | { | 586 | { |
598 | if (!opt) | 587 | if (!opt) |
@@ -606,8 +595,6 @@ static __init int numa_setup(char *opt) | |||
606 | #ifdef CONFIG_ACPI_NUMA | 595 | #ifdef CONFIG_ACPI_NUMA |
607 | if (!strncmp(opt, "noacpi", 6)) | 596 | if (!strncmp(opt, "noacpi", 6)) |
608 | acpi_numa = -1; | 597 | acpi_numa = -1; |
609 | if (!strncmp(opt, "hotadd=", 7)) | ||
610 | hotadd_percent = simple_strtoul(opt+7, NULL, 10); | ||
611 | #endif | 598 | #endif |
612 | return 0; | 599 | return 0; |
613 | } | 600 | } |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 01765955baaf..2dfcbf9df2ae 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -31,17 +31,11 @@ static nodemask_t nodes_parsed __initdata; | |||
31 | static nodemask_t cpu_nodes_parsed __initdata; | 31 | static nodemask_t cpu_nodes_parsed __initdata; |
32 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | 32 | static struct bootnode nodes[MAX_NUMNODES] __initdata; |
33 | static struct bootnode nodes_add[MAX_NUMNODES]; | 33 | static struct bootnode nodes_add[MAX_NUMNODES]; |
34 | static int found_add_area __initdata; | ||
35 | int hotadd_percent __initdata = 0; | ||
36 | 34 | ||
37 | static int num_node_memblks __initdata; | 35 | static int num_node_memblks __initdata; |
38 | static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; | 36 | static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; |
39 | static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; | 37 | static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; |
40 | 38 | ||
41 | /* Too small nodes confuse the VM badly. Usually they result | ||
42 | from BIOS bugs. */ | ||
43 | #define NODE_MIN_SIZE (4*1024*1024) | ||
44 | |||
45 | static __init int setup_node(int pxm) | 39 | static __init int setup_node(int pxm) |
46 | { | 40 | { |
47 | return acpi_map_pxm_to_node(pxm); | 41 | return acpi_map_pxm_to_node(pxm); |
@@ -66,9 +60,6 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end) | |||
66 | { | 60 | { |
67 | struct bootnode *nd = &nodes[i]; | 61 | struct bootnode *nd = &nodes[i]; |
68 | 62 | ||
69 | if (found_add_area) | ||
70 | return; | ||
71 | |||
72 | if (nd->start < start) { | 63 | if (nd->start < start) { |
73 | nd->start = start; | 64 | nd->start = start; |
74 | if (nd->end < nd->start) | 65 | if (nd->end < nd->start) |
@@ -86,7 +77,6 @@ static __init void bad_srat(void) | |||
86 | int i; | 77 | int i; |
87 | printk(KERN_ERR "SRAT: SRAT not used.\n"); | 78 | printk(KERN_ERR "SRAT: SRAT not used.\n"); |
88 | acpi_numa = -1; | 79 | acpi_numa = -1; |
89 | found_add_area = 0; | ||
90 | for (i = 0; i < MAX_LOCAL_APIC; i++) | 80 | for (i = 0; i < MAX_LOCAL_APIC; i++) |
91 | apicid_to_node[i] = NUMA_NO_NODE; | 81 | apicid_to_node[i] = NUMA_NO_NODE; |
92 | for (i = 0; i < MAX_NUMNODES; i++) | 82 | for (i = 0; i < MAX_NUMNODES; i++) |
@@ -182,24 +172,21 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | |||
182 | pxm, apic_id, node); | 172 | pxm, apic_id, node); |
183 | } | 173 | } |
184 | 174 | ||
185 | static int update_end_of_memory(unsigned long end) {return -1;} | ||
186 | static int hotadd_enough_memory(struct bootnode *nd) {return 1;} | ||
187 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE | 175 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE |
188 | static inline int save_add_info(void) {return 1;} | 176 | static inline int save_add_info(void) {return 1;} |
189 | #else | 177 | #else |
190 | static inline int save_add_info(void) {return 0;} | 178 | static inline int save_add_info(void) {return 0;} |
191 | #endif | 179 | #endif |
192 | /* | 180 | /* |
193 | * Update nodes_add and decide if to include add are in the zone. | 181 | * Update nodes_add[] |
194 | * Both SPARSE and RESERVE need nodes_add information. | 182 | * This code supports one contiguous hot add area per node |
195 | * This code supports one contiguous hot add area per node. | ||
196 | */ | 183 | */ |
197 | static int __init | 184 | static void __init |
198 | reserve_hotadd(int node, unsigned long start, unsigned long end) | 185 | update_nodes_add(int node, unsigned long start, unsigned long end) |
199 | { | 186 | { |
200 | unsigned long s_pfn = start >> PAGE_SHIFT; | 187 | unsigned long s_pfn = start >> PAGE_SHIFT; |
201 | unsigned long e_pfn = end >> PAGE_SHIFT; | 188 | unsigned long e_pfn = end >> PAGE_SHIFT; |
202 | int ret = 0, changed = 0; | 189 | int changed = 0; |
203 | struct bootnode *nd = &nodes_add[node]; | 190 | struct bootnode *nd = &nodes_add[node]; |
204 | 191 | ||
205 | /* I had some trouble with strange memory hotadd regions breaking | 192 | /* I had some trouble with strange memory hotadd regions breaking |
@@ -210,7 +197,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) | |||
210 | mistakes */ | 197 | mistakes */ |
211 | if ((signed long)(end - start) < NODE_MIN_SIZE) { | 198 | if ((signed long)(end - start) < NODE_MIN_SIZE) { |
212 | printk(KERN_ERR "SRAT: Hotplug area too small\n"); | 199 | printk(KERN_ERR "SRAT: Hotplug area too small\n"); |
213 | return -1; | 200 | return; |
214 | } | 201 | } |
215 | 202 | ||
216 | /* This check might be a bit too strict, but I'm keeping it for now. */ | 203 | /* This check might be a bit too strict, but I'm keeping it for now. */ |
@@ -218,12 +205,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) | |||
218 | printk(KERN_ERR | 205 | printk(KERN_ERR |
219 | "SRAT: Hotplug area %lu -> %lu has existing memory\n", | 206 | "SRAT: Hotplug area %lu -> %lu has existing memory\n", |
220 | s_pfn, e_pfn); | 207 | s_pfn, e_pfn); |
221 | return -1; | 208 | return; |
222 | } | ||
223 | |||
224 | if (!hotadd_enough_memory(&nodes_add[node])) { | ||
225 | printk(KERN_ERR "SRAT: Hotplug area too large\n"); | ||
226 | return -1; | ||
227 | } | 209 | } |
228 | 210 | ||
229 | /* Looks good */ | 211 | /* Looks good */ |
@@ -245,11 +227,9 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) | |||
245 | printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); | 227 | printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); |
246 | } | 228 | } |
247 | 229 | ||
248 | ret = update_end_of_memory(nd->end); | ||
249 | |||
250 | if (changed) | 230 | if (changed) |
251 | printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end); | 231 | printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", |
252 | return ret; | 232 | nd->start, nd->end); |
253 | } | 233 | } |
254 | 234 | ||
255 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ | 235 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ |
@@ -310,13 +290,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
310 | start, end); | 290 | start, end); |
311 | e820_register_active_regions(node, start >> PAGE_SHIFT, | 291 | e820_register_active_regions(node, start >> PAGE_SHIFT, |
312 | end >> PAGE_SHIFT); | 292 | end >> PAGE_SHIFT); |
313 | push_node_boundaries(node, nd->start >> PAGE_SHIFT, | ||
314 | nd->end >> PAGE_SHIFT); | ||
315 | 293 | ||
316 | if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && | 294 | if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { |
317 | (reserve_hotadd(node, start, end) < 0)) { | 295 | update_nodes_add(node, start, end); |
318 | /* Ignore hotadd region. Undo damage */ | 296 | /* restore nodes[node] */ |
319 | printk(KERN_NOTICE "SRAT: Hotplug region ignored\n"); | ||
320 | *nd = oldnode; | 297 | *nd = oldnode; |
321 | if ((nd->start | nd->end) == 0) | 298 | if ((nd->start | nd->end) == 0) |
322 | node_clear(node, nodes_parsed); | 299 | node_clear(node, nodes_parsed); |
@@ -345,9 +322,9 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
345 | pxmram = 0; | 322 | pxmram = 0; |
346 | } | 323 | } |
347 | 324 | ||
348 | e820ram = max_pfn - absent_pages_in_range(0, max_pfn); | 325 | e820ram = max_pfn - (e820_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT); |
349 | /* We seem to lose 3 pages somewhere. Allow a bit of slack. */ | 326 | /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ |
350 | if ((long)(e820ram - pxmram) >= 1*1024*1024) { | 327 | if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) { |
351 | printk(KERN_ERR | 328 | printk(KERN_ERR |
352 | "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n", | 329 | "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n", |
353 | (pxmram << PAGE_SHIFT) >> 20, | 330 | (pxmram << PAGE_SHIFT) >> 20, |
@@ -357,17 +334,6 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
357 | return 1; | 334 | return 1; |
358 | } | 335 | } |
359 | 336 | ||
360 | static void __init unparse_node(int node) | ||
361 | { | ||
362 | int i; | ||
363 | node_clear(node, nodes_parsed); | ||
364 | node_clear(node, cpu_nodes_parsed); | ||
365 | for (i = 0; i < MAX_LOCAL_APIC; i++) { | ||
366 | if (apicid_to_node[i] == node) | ||
367 | apicid_to_node[i] = NUMA_NO_NODE; | ||
368 | } | ||
369 | } | ||
370 | |||
371 | void __init acpi_numa_arch_fixup(void) {} | 337 | void __init acpi_numa_arch_fixup(void) {} |
372 | 338 | ||
373 | /* Use the information discovered above to actually set up the nodes. */ | 339 | /* Use the information discovered above to actually set up the nodes. */ |
@@ -379,18 +345,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
379 | return -1; | 345 | return -1; |
380 | 346 | ||
381 | /* First clean up the node list */ | 347 | /* First clean up the node list */ |
382 | for (i = 0; i < MAX_NUMNODES; i++) { | 348 | for (i = 0; i < MAX_NUMNODES; i++) |
383 | cutoff_node(i, start, end); | 349 | cutoff_node(i, start, end); |
384 | /* | ||
385 | * don't confuse VM with a node that doesn't have the | ||
386 | * minimum memory. | ||
387 | */ | ||
388 | if (nodes[i].end && | ||
389 | (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) { | ||
390 | unparse_node(i); | ||
391 | node_set_offline(i); | ||
392 | } | ||
393 | } | ||
394 | 350 | ||
395 | if (!nodes_cover_memory(nodes)) { | 351 | if (!nodes_cover_memory(nodes)) { |
396 | bad_srat(); | 352 | bad_srat(); |
@@ -423,7 +379,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
423 | 379 | ||
424 | if (node == NUMA_NO_NODE) | 380 | if (node == NUMA_NO_NODE) |
425 | continue; | 381 | continue; |
426 | if (!node_isset(node, node_possible_map)) | 382 | if (!node_online(node)) |
427 | numa_clear_node(i); | 383 | numa_clear_node(i); |
428 | } | 384 | } |
429 | numa_init_array(); | 385 | numa_init_array(); |
@@ -510,26 +466,6 @@ static int null_slit_node_compare(int a, int b) | |||
510 | } | 466 | } |
511 | #endif /* CONFIG_NUMA_EMU */ | 467 | #endif /* CONFIG_NUMA_EMU */ |
512 | 468 | ||
513 | void __init srat_reserve_add_area(int nodeid) | ||
514 | { | ||
515 | if (found_add_area && nodes_add[nodeid].end) { | ||
516 | u64 total_mb; | ||
517 | |||
518 | printk(KERN_INFO "SRAT: Reserving hot-add memory space " | ||
519 | "for node %d at %Lx-%Lx\n", | ||
520 | nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end); | ||
521 | total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start) | ||
522 | >> PAGE_SHIFT; | ||
523 | total_mb *= sizeof(struct page); | ||
524 | total_mb >>= 20; | ||
525 | printk(KERN_INFO "SRAT: This will cost you %Lu MB of " | ||
526 | "pre-allocated memory.\n", (unsigned long long)total_mb); | ||
527 | reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start, | ||
528 | nodes_add[nodeid].end - nodes_add[nodeid].start, | ||
529 | BOOTMEM_DEFAULT); | ||
530 | } | ||
531 | } | ||
532 | |||
533 | int __node_distance(int a, int b) | 469 | int __node_distance(int a, int b) |
534 | { | 470 | { |
535 | int index; | 471 | int index; |