aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-06-10 19:13:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-06-10 19:13:20 -0400
commitbb7762961d3ce745688e9050e914c1d3f980268d (patch)
treee841f58cd6188cc44583cd055798b4475a4d68f0 /arch/x86/mm
parent48c72d1ab4ec86789a23aed0b0b5f31ac083c0c6 (diff)
parent35d5a9a61490bf39d2e48d7f499c8c801a39ebe9 (diff)
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (22 commits) x86: fix system without memory on node0 x86, mm: Fix node_possible_map logic mm, x86: remove MEMORY_HOTPLUG_RESERVE related code x86: make sparse mem work in non-NUMA mode x86: process.c, remove useless headers x86: merge process.c a bit x86: use sparse_memory_present_with_active_regions() on UMA x86: unify 64-bit UMA and NUMA paging_init() x86: Allow 1MB of slack between the e820 map and SRAT, not 4GB x86: Sanity check the e820 against the SRAT table using e820 map only x86: clean up and and print out initial max_pfn_mapped x86/pci: remove rounding quirk from e820_setup_gap() x86, e820, pci: reserve extra free space near end of RAM x86: fix typo in address space documentation x86: 46 bit physical address support on 64 bits x86, mm: fault.c, use printk_once() in is_errata93() x86: move per-cpu mmu_gathers to mm/init.c x86: move max_pfn_mapped and max_low_pfn_mapped to setup.c x86: unify noexec handling x86: remove (null) in /sys kernel_page_tables ...
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/dump_pagetables.c7
-rw-r--r--arch/x86/mm/fault.c51
-rw-r--r--arch/x86/mm/init.c77
-rw-r--r--arch/x86/mm/init_32.c61
-rw-r--r--arch/x86/mm/init_64.c47
-rw-r--r--arch/x86/mm/numa_64.c33
-rw-r--r--arch/x86/mm/srat_64.c98
7 files changed, 116 insertions, 258 deletions
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index e7277cbcfb40..a725b7f760ae 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -161,13 +161,14 @@ static void note_page(struct seq_file *m, struct pg_state *st,
161 st->current_address >= st->marker[1].start_address) { 161 st->current_address >= st->marker[1].start_address) {
162 const char *unit = units; 162 const char *unit = units;
163 unsigned long delta; 163 unsigned long delta;
164 int width = sizeof(unsigned long) * 2;
164 165
165 /* 166 /*
166 * Now print the actual finished series 167 * Now print the actual finished series
167 */ 168 */
168 seq_printf(m, "0x%p-0x%p ", 169 seq_printf(m, "0x%0*lx-0x%0*lx ",
169 (void *)st->start_address, 170 width, st->start_address,
170 (void *)st->current_address); 171 width, st->current_address);
171 172
172 delta = (st->current_address - st->start_address) >> 10; 173 delta = (st->current_address - st->start_address) >> 10;
173 while (!(delta & 1023) && unit[1]) { 174 while (!(delta & 1023) && unit[1]) {
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a03b7279efa0..b9ca6d767dbb 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -3,40 +3,16 @@
3 * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. 3 * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
4 * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar 4 * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
5 */ 5 */
6#include <linux/interrupt.h> 6#include <linux/magic.h> /* STACK_END_MAGIC */
7#include <linux/mmiotrace.h> 7#include <linux/sched.h> /* test_thread_flag(), ... */
8#include <linux/bootmem.h> 8#include <linux/kdebug.h> /* oops_begin/end, ... */
9#include <linux/compiler.h> 9#include <linux/module.h> /* search_exception_table */
10#include <linux/highmem.h> 10#include <linux/bootmem.h> /* max_low_pfn */
11#include <linux/kprobes.h> 11#include <linux/kprobes.h> /* __kprobes, ... */
12#include <linux/uaccess.h> 12#include <linux/mmiotrace.h> /* kmmio_handler, ... */
13#include <linux/vmalloc.h> 13
14#include <linux/vt_kern.h> 14#include <asm/traps.h> /* dotraplinkage, ... */
15#include <linux/signal.h> 15#include <asm/pgalloc.h> /* pgd_*(), ... */
16#include <linux/kernel.h>
17#include <linux/ptrace.h>
18#include <linux/string.h>
19#include <linux/module.h>
20#include <linux/kdebug.h>
21#include <linux/errno.h>
22#include <linux/magic.h>
23#include <linux/sched.h>
24#include <linux/types.h>
25#include <linux/init.h>
26#include <linux/mman.h>
27#include <linux/tty.h>
28#include <linux/smp.h>
29#include <linux/mm.h>
30
31#include <asm-generic/sections.h>
32
33#include <asm/tlbflush.h>
34#include <asm/pgalloc.h>
35#include <asm/segment.h>
36#include <asm/system.h>
37#include <asm/proto.h>
38#include <asm/traps.h>
39#include <asm/desc.h>
40 16
41/* 17/*
42 * Page fault error code bits: 18 * Page fault error code bits:
@@ -538,8 +514,6 @@ bad:
538static int is_errata93(struct pt_regs *regs, unsigned long address) 514static int is_errata93(struct pt_regs *regs, unsigned long address)
539{ 515{
540#ifdef CONFIG_X86_64 516#ifdef CONFIG_X86_64
541 static int once;
542
543 if (address != regs->ip) 517 if (address != regs->ip)
544 return 0; 518 return 0;
545 519
@@ -549,10 +523,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
549 address |= 0xffffffffUL << 32; 523 address |= 0xffffffffUL << 32;
550 if ((address >= (u64)_stext && address <= (u64)_etext) || 524 if ((address >= (u64)_stext && address <= (u64)_etext) ||
551 (address >= MODULES_VADDR && address <= MODULES_END)) { 525 (address >= MODULES_VADDR && address <= MODULES_END)) {
552 if (!once) { 526 printk_once(errata93_warning);
553 printk(errata93_warning);
554 once = 1;
555 }
556 regs->ip = address; 527 regs->ip = address;
557 return 1; 528 return 1;
558 } 529 }
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 4b98df0973b9..34c1bfb64f1c 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -11,6 +11,9 @@
11#include <asm/setup.h> 11#include <asm/setup.h>
12#include <asm/system.h> 12#include <asm/system.h>
13#include <asm/tlbflush.h> 13#include <asm/tlbflush.h>
14#include <asm/tlb.h>
15
16DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
14 17
15unsigned long __initdata e820_table_start; 18unsigned long __initdata e820_table_start;
16unsigned long __meminitdata e820_table_end; 19unsigned long __meminitdata e820_table_end;
@@ -24,6 +27,69 @@ int direct_gbpages
24#endif 27#endif
25; 28;
26 29
30int nx_enabled;
31
32#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
33static int disable_nx __cpuinitdata;
34
35/*
36 * noexec = on|off
37 *
38 * Control non-executable mappings for processes.
39 *
40 * on Enable
41 * off Disable
42 */
43static int __init noexec_setup(char *str)
44{
45 if (!str)
46 return -EINVAL;
47 if (!strncmp(str, "on", 2)) {
48 __supported_pte_mask |= _PAGE_NX;
49 disable_nx = 0;
50 } else if (!strncmp(str, "off", 3)) {
51 disable_nx = 1;
52 __supported_pte_mask &= ~_PAGE_NX;
53 }
54 return 0;
55}
56early_param("noexec", noexec_setup);
57#endif
58
59#ifdef CONFIG_X86_PAE
60static void __init set_nx(void)
61{
62 unsigned int v[4], l, h;
63
64 if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
65 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
66
67 if ((v[3] & (1 << 20)) && !disable_nx) {
68 rdmsr(MSR_EFER, l, h);
69 l |= EFER_NX;
70 wrmsr(MSR_EFER, l, h);
71 nx_enabled = 1;
72 __supported_pte_mask |= _PAGE_NX;
73 }
74 }
75}
76#else
77static inline void set_nx(void)
78{
79}
80#endif
81
82#ifdef CONFIG_X86_64
83void __cpuinit check_efer(void)
84{
85 unsigned long efer;
86
87 rdmsrl(MSR_EFER, efer);
88 if (!(efer & EFER_NX) || disable_nx)
89 __supported_pte_mask &= ~_PAGE_NX;
90}
91#endif
92
27static void __init find_early_table_space(unsigned long end, int use_pse, 93static void __init find_early_table_space(unsigned long end, int use_pse,
28 int use_gbpages) 94 int use_gbpages)
29{ 95{
@@ -67,12 +133,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
67 */ 133 */
68#ifdef CONFIG_X86_32 134#ifdef CONFIG_X86_32
69 start = 0x7000; 135 start = 0x7000;
70 e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, 136#else
71 tables, PAGE_SIZE);
72#else /* CONFIG_X86_64 */
73 start = 0x8000; 137 start = 0x8000;
74 e820_table_start = find_e820_area(start, end, tables, PAGE_SIZE);
75#endif 138#endif
139 e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
140 tables, PAGE_SIZE);
76 if (e820_table_start == -1UL) 141 if (e820_table_start == -1UL)
77 panic("Cannot find space for the kernel page tables"); 142 panic("Cannot find space for the kernel page tables");
78 143
@@ -160,12 +225,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
160 use_gbpages = direct_gbpages; 225 use_gbpages = direct_gbpages;
161#endif 226#endif
162 227
163#ifdef CONFIG_X86_32
164#ifdef CONFIG_X86_PAE
165 set_nx(); 228 set_nx();
166 if (nx_enabled) 229 if (nx_enabled)
167 printk(KERN_INFO "NX (Execute Disable) protection: active\n"); 230 printk(KERN_INFO "NX (Execute Disable) protection: active\n");
168#endif
169 231
170 /* Enable PSE if available */ 232 /* Enable PSE if available */
171 if (cpu_has_pse) 233 if (cpu_has_pse)
@@ -176,7 +238,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
176 set_in_cr4(X86_CR4_PGE); 238 set_in_cr4(X86_CR4_PGE);
177 __supported_pte_mask |= _PAGE_GLOBAL; 239 __supported_pte_mask |= _PAGE_GLOBAL;
178 } 240 }
179#endif
180 241
181 if (use_gbpages) 242 if (use_gbpages)
182 page_size_mask |= 1 << PG_LEVEL_1G; 243 page_size_mask |= 1 << PG_LEVEL_1G;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 749559ed80f5..949708d7a481 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -49,12 +49,9 @@
49#include <asm/paravirt.h> 49#include <asm/paravirt.h>
50#include <asm/setup.h> 50#include <asm/setup.h>
51#include <asm/cacheflush.h> 51#include <asm/cacheflush.h>
52#include <asm/page_types.h>
52#include <asm/init.h> 53#include <asm/init.h>
53 54
54unsigned long max_low_pfn_mapped;
55unsigned long max_pfn_mapped;
56
57DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
58unsigned long highstart_pfn, highend_pfn; 55unsigned long highstart_pfn, highend_pfn;
59 56
60static noinline int do_test_wp_bit(void); 57static noinline int do_test_wp_bit(void);
@@ -587,61 +584,9 @@ void zap_low_mappings(void)
587 flush_tlb_all(); 584 flush_tlb_all();
588} 585}
589 586
590int nx_enabled;
591
592pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); 587pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP);
593EXPORT_SYMBOL_GPL(__supported_pte_mask); 588EXPORT_SYMBOL_GPL(__supported_pte_mask);
594 589
595#ifdef CONFIG_X86_PAE
596
597static int disable_nx __initdata;
598
599/*
600 * noexec = on|off
601 *
602 * Control non executable mappings.
603 *
604 * on Enable
605 * off Disable
606 */
607static int __init noexec_setup(char *str)
608{
609 if (!str || !strcmp(str, "on")) {
610 if (cpu_has_nx) {
611 __supported_pte_mask |= _PAGE_NX;
612 disable_nx = 0;
613 }
614 } else {
615 if (!strcmp(str, "off")) {
616 disable_nx = 1;
617 __supported_pte_mask &= ~_PAGE_NX;
618 } else {
619 return -EINVAL;
620 }
621 }
622
623 return 0;
624}
625early_param("noexec", noexec_setup);
626
627void __init set_nx(void)
628{
629 unsigned int v[4], l, h;
630
631 if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
632 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
633
634 if ((v[3] & (1 << 20)) && !disable_nx) {
635 rdmsr(MSR_EFER, l, h);
636 l |= EFER_NX;
637 wrmsr(MSR_EFER, l, h);
638 nx_enabled = 1;
639 __supported_pte_mask |= _PAGE_NX;
640 }
641 }
642}
643#endif
644
645/* user-defined highmem size */ 590/* user-defined highmem size */
646static unsigned int highmem_pages = -1; 591static unsigned int highmem_pages = -1;
647 592
@@ -761,15 +706,15 @@ void __init initmem_init(unsigned long start_pfn,
761 highstart_pfn = highend_pfn = max_pfn; 706 highstart_pfn = highend_pfn = max_pfn;
762 if (max_pfn > max_low_pfn) 707 if (max_pfn > max_low_pfn)
763 highstart_pfn = max_low_pfn; 708 highstart_pfn = max_low_pfn;
764 memory_present(0, 0, highend_pfn);
765 e820_register_active_regions(0, 0, highend_pfn); 709 e820_register_active_regions(0, 0, highend_pfn);
710 sparse_memory_present_with_active_regions(0);
766 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", 711 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
767 pages_to_mb(highend_pfn - highstart_pfn)); 712 pages_to_mb(highend_pfn - highstart_pfn));
768 num_physpages = highend_pfn; 713 num_physpages = highend_pfn;
769 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; 714 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
770#else 715#else
771 memory_present(0, 0, max_low_pfn);
772 e820_register_active_regions(0, 0, max_low_pfn); 716 e820_register_active_regions(0, 0, max_low_pfn);
717 sparse_memory_present_with_active_regions(0);
773 num_physpages = max_low_pfn; 718 num_physpages = max_low_pfn;
774 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; 719 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
775#endif 720#endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1753e8020df6..52bb9519bb86 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -50,18 +50,8 @@
50#include <asm/cacheflush.h> 50#include <asm/cacheflush.h>
51#include <asm/init.h> 51#include <asm/init.h>
52 52
53/*
54 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
55 * The direct mapping extends to max_pfn_mapped, so that we can directly access
56 * apertures, ACPI and other tables without having to play with fixmaps.
57 */
58unsigned long max_low_pfn_mapped;
59unsigned long max_pfn_mapped;
60
61static unsigned long dma_reserve __initdata; 53static unsigned long dma_reserve __initdata;
62 54
63DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
64
65static int __init parse_direct_gbpages_off(char *arg) 55static int __init parse_direct_gbpages_off(char *arg)
66{ 56{
67 direct_gbpages = 0; 57 direct_gbpages = 0;
@@ -85,39 +75,6 @@ early_param("gbpages", parse_direct_gbpages_on);
85pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; 75pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP;
86EXPORT_SYMBOL_GPL(__supported_pte_mask); 76EXPORT_SYMBOL_GPL(__supported_pte_mask);
87 77
88static int disable_nx __cpuinitdata;
89
90/*
91 * noexec=on|off
92 * Control non-executable mappings for 64-bit processes.
93 *
94 * on Enable (default)
95 * off Disable
96 */
97static int __init nonx_setup(char *str)
98{
99 if (!str)
100 return -EINVAL;
101 if (!strncmp(str, "on", 2)) {
102 __supported_pte_mask |= _PAGE_NX;
103 disable_nx = 0;
104 } else if (!strncmp(str, "off", 3)) {
105 disable_nx = 1;
106 __supported_pte_mask &= ~_PAGE_NX;
107 }
108 return 0;
109}
110early_param("noexec", nonx_setup);
111
112void __cpuinit check_efer(void)
113{
114 unsigned long efer;
115
116 rdmsrl(MSR_EFER, efer);
117 if (!(efer & EFER_NX) || disable_nx)
118 __supported_pte_mask &= ~_PAGE_NX;
119}
120
121int force_personality32; 78int force_personality32;
122 79
123/* 80/*
@@ -628,6 +585,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
628 early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); 585 early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
629 reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); 586 reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
630} 587}
588#endif
631 589
632void __init paging_init(void) 590void __init paging_init(void)
633{ 591{
@@ -638,11 +596,10 @@ void __init paging_init(void)
638 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; 596 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
639 max_zone_pfns[ZONE_NORMAL] = max_pfn; 597 max_zone_pfns[ZONE_NORMAL] = max_pfn;
640 598
641 memory_present(0, 0, max_pfn); 599 sparse_memory_present_with_active_regions(MAX_NUMNODES);
642 sparse_init(); 600 sparse_init();
643 free_area_init_nodes(max_zone_pfns); 601 free_area_init_nodes(max_zone_pfns);
644} 602}
645#endif
646 603
647/* 604/*
648 * Memory hotplug specific functions 605 * Memory hotplug specific functions
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 2d05a12029dc..459913beac71 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -179,18 +179,25 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
179} 179}
180 180
181/* Initialize bootmem allocator for a node */ 181/* Initialize bootmem allocator for a node */
182void __init setup_node_bootmem(int nodeid, unsigned long start, 182void __init
183 unsigned long end) 183setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
184{ 184{
185 unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; 185 unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
186 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
186 unsigned long bootmap_start, nodedata_phys; 187 unsigned long bootmap_start, nodedata_phys;
187 void *bootmap; 188 void *bootmap;
188 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
189 int nid; 189 int nid;
190 190
191 if (!end) 191 if (!end)
192 return; 192 return;
193 193
194 /*
195 * Don't confuse VM with a node that doesn't have the
196 * minimum amount of memory:
197 */
198 if (end && (end - start) < NODE_MIN_SIZE)
199 return;
200
194 start = roundup(start, ZONE_ALIGN); 201 start = roundup(start, ZONE_ALIGN);
195 202
196 printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, 203 printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
@@ -272,9 +279,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
272 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, 279 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
273 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); 280 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
274 281
275#ifdef CONFIG_ACPI_NUMA
276 srat_reserve_add_area(nodeid);
277#endif
278 node_set_online(nodeid); 282 node_set_online(nodeid);
279} 283}
280 284
@@ -578,21 +582,6 @@ unsigned long __init numa_free_all_bootmem(void)
578 return pages; 582 return pages;
579} 583}
580 584
581void __init paging_init(void)
582{
583 unsigned long max_zone_pfns[MAX_NR_ZONES];
584
585 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
586 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
587 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
588 max_zone_pfns[ZONE_NORMAL] = max_pfn;
589
590 sparse_memory_present_with_active_regions(MAX_NUMNODES);
591 sparse_init();
592
593 free_area_init_nodes(max_zone_pfns);
594}
595
596static __init int numa_setup(char *opt) 585static __init int numa_setup(char *opt)
597{ 586{
598 if (!opt) 587 if (!opt)
@@ -606,8 +595,6 @@ static __init int numa_setup(char *opt)
606#ifdef CONFIG_ACPI_NUMA 595#ifdef CONFIG_ACPI_NUMA
607 if (!strncmp(opt, "noacpi", 6)) 596 if (!strncmp(opt, "noacpi", 6))
608 acpi_numa = -1; 597 acpi_numa = -1;
609 if (!strncmp(opt, "hotadd=", 7))
610 hotadd_percent = simple_strtoul(opt+7, NULL, 10);
611#endif 598#endif
612 return 0; 599 return 0;
613} 600}
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 01765955baaf..2dfcbf9df2ae 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -31,17 +31,11 @@ static nodemask_t nodes_parsed __initdata;
31static nodemask_t cpu_nodes_parsed __initdata; 31static nodemask_t cpu_nodes_parsed __initdata;
32static struct bootnode nodes[MAX_NUMNODES] __initdata; 32static struct bootnode nodes[MAX_NUMNODES] __initdata;
33static struct bootnode nodes_add[MAX_NUMNODES]; 33static struct bootnode nodes_add[MAX_NUMNODES];
34static int found_add_area __initdata;
35int hotadd_percent __initdata = 0;
36 34
37static int num_node_memblks __initdata; 35static int num_node_memblks __initdata;
38static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; 36static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
39static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; 37static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
40 38
41/* Too small nodes confuse the VM badly. Usually they result
42 from BIOS bugs. */
43#define NODE_MIN_SIZE (4*1024*1024)
44
45static __init int setup_node(int pxm) 39static __init int setup_node(int pxm)
46{ 40{
47 return acpi_map_pxm_to_node(pxm); 41 return acpi_map_pxm_to_node(pxm);
@@ -66,9 +60,6 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end)
66{ 60{
67 struct bootnode *nd = &nodes[i]; 61 struct bootnode *nd = &nodes[i];
68 62
69 if (found_add_area)
70 return;
71
72 if (nd->start < start) { 63 if (nd->start < start) {
73 nd->start = start; 64 nd->start = start;
74 if (nd->end < nd->start) 65 if (nd->end < nd->start)
@@ -86,7 +77,6 @@ static __init void bad_srat(void)
86 int i; 77 int i;
87 printk(KERN_ERR "SRAT: SRAT not used.\n"); 78 printk(KERN_ERR "SRAT: SRAT not used.\n");
88 acpi_numa = -1; 79 acpi_numa = -1;
89 found_add_area = 0;
90 for (i = 0; i < MAX_LOCAL_APIC; i++) 80 for (i = 0; i < MAX_LOCAL_APIC; i++)
91 apicid_to_node[i] = NUMA_NO_NODE; 81 apicid_to_node[i] = NUMA_NO_NODE;
92 for (i = 0; i < MAX_NUMNODES; i++) 82 for (i = 0; i < MAX_NUMNODES; i++)
@@ -182,24 +172,21 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
182 pxm, apic_id, node); 172 pxm, apic_id, node);
183} 173}
184 174
185static int update_end_of_memory(unsigned long end) {return -1;}
186static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
187#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 175#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
188static inline int save_add_info(void) {return 1;} 176static inline int save_add_info(void) {return 1;}
189#else 177#else
190static inline int save_add_info(void) {return 0;} 178static inline int save_add_info(void) {return 0;}
191#endif 179#endif
192/* 180/*
193 * Update nodes_add and decide if to include add are in the zone. 181 * Update nodes_add[]
194 * Both SPARSE and RESERVE need nodes_add information. 182 * This code supports one contiguous hot add area per node
195 * This code supports one contiguous hot add area per node.
196 */ 183 */
197static int __init 184static void __init
198reserve_hotadd(int node, unsigned long start, unsigned long end) 185update_nodes_add(int node, unsigned long start, unsigned long end)
199{ 186{
200 unsigned long s_pfn = start >> PAGE_SHIFT; 187 unsigned long s_pfn = start >> PAGE_SHIFT;
201 unsigned long e_pfn = end >> PAGE_SHIFT; 188 unsigned long e_pfn = end >> PAGE_SHIFT;
202 int ret = 0, changed = 0; 189 int changed = 0;
203 struct bootnode *nd = &nodes_add[node]; 190 struct bootnode *nd = &nodes_add[node];
204 191
205 /* I had some trouble with strange memory hotadd regions breaking 192 /* I had some trouble with strange memory hotadd regions breaking
@@ -210,7 +197,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
210 mistakes */ 197 mistakes */
211 if ((signed long)(end - start) < NODE_MIN_SIZE) { 198 if ((signed long)(end - start) < NODE_MIN_SIZE) {
212 printk(KERN_ERR "SRAT: Hotplug area too small\n"); 199 printk(KERN_ERR "SRAT: Hotplug area too small\n");
213 return -1; 200 return;
214 } 201 }
215 202
216 /* This check might be a bit too strict, but I'm keeping it for now. */ 203 /* This check might be a bit too strict, but I'm keeping it for now. */
@@ -218,12 +205,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
218 printk(KERN_ERR 205 printk(KERN_ERR
219 "SRAT: Hotplug area %lu -> %lu has existing memory\n", 206 "SRAT: Hotplug area %lu -> %lu has existing memory\n",
220 s_pfn, e_pfn); 207 s_pfn, e_pfn);
221 return -1; 208 return;
222 }
223
224 if (!hotadd_enough_memory(&nodes_add[node])) {
225 printk(KERN_ERR "SRAT: Hotplug area too large\n");
226 return -1;
227 } 209 }
228 210
229 /* Looks good */ 211 /* Looks good */
@@ -245,11 +227,9 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
245 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); 227 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
246 } 228 }
247 229
248 ret = update_end_of_memory(nd->end);
249
250 if (changed) 230 if (changed)
251 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end); 231 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
252 return ret; 232 nd->start, nd->end);
253} 233}
254 234
255/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ 235/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
@@ -310,13 +290,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
310 start, end); 290 start, end);
311 e820_register_active_regions(node, start >> PAGE_SHIFT, 291 e820_register_active_regions(node, start >> PAGE_SHIFT,
312 end >> PAGE_SHIFT); 292 end >> PAGE_SHIFT);
313 push_node_boundaries(node, nd->start >> PAGE_SHIFT,
314 nd->end >> PAGE_SHIFT);
315 293
316 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && 294 if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
317 (reserve_hotadd(node, start, end) < 0)) { 295 update_nodes_add(node, start, end);
318 /* Ignore hotadd region. Undo damage */ 296 /* restore nodes[node] */
319 printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
320 *nd = oldnode; 297 *nd = oldnode;
321 if ((nd->start | nd->end) == 0) 298 if ((nd->start | nd->end) == 0)
322 node_clear(node, nodes_parsed); 299 node_clear(node, nodes_parsed);
@@ -345,9 +322,9 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
345 pxmram = 0; 322 pxmram = 0;
346 } 323 }
347 324
348 e820ram = max_pfn - absent_pages_in_range(0, max_pfn); 325 e820ram = max_pfn - (e820_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
349 /* We seem to lose 3 pages somewhere. Allow a bit of slack. */ 326 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
350 if ((long)(e820ram - pxmram) >= 1*1024*1024) { 327 if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
351 printk(KERN_ERR 328 printk(KERN_ERR
352 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n", 329 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
353 (pxmram << PAGE_SHIFT) >> 20, 330 (pxmram << PAGE_SHIFT) >> 20,
@@ -357,17 +334,6 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
357 return 1; 334 return 1;
358} 335}
359 336
360static void __init unparse_node(int node)
361{
362 int i;
363 node_clear(node, nodes_parsed);
364 node_clear(node, cpu_nodes_parsed);
365 for (i = 0; i < MAX_LOCAL_APIC; i++) {
366 if (apicid_to_node[i] == node)
367 apicid_to_node[i] = NUMA_NO_NODE;
368 }
369}
370
371void __init acpi_numa_arch_fixup(void) {} 337void __init acpi_numa_arch_fixup(void) {}
372 338
373/* Use the information discovered above to actually set up the nodes. */ 339/* Use the information discovered above to actually set up the nodes. */
@@ -379,18 +345,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
379 return -1; 345 return -1;
380 346
381 /* First clean up the node list */ 347 /* First clean up the node list */
382 for (i = 0; i < MAX_NUMNODES; i++) { 348 for (i = 0; i < MAX_NUMNODES; i++)
383 cutoff_node(i, start, end); 349 cutoff_node(i, start, end);
384 /*
385 * don't confuse VM with a node that doesn't have the
386 * minimum memory.
387 */
388 if (nodes[i].end &&
389 (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
390 unparse_node(i);
391 node_set_offline(i);
392 }
393 }
394 350
395 if (!nodes_cover_memory(nodes)) { 351 if (!nodes_cover_memory(nodes)) {
396 bad_srat(); 352 bad_srat();
@@ -423,7 +379,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
423 379
424 if (node == NUMA_NO_NODE) 380 if (node == NUMA_NO_NODE)
425 continue; 381 continue;
426 if (!node_isset(node, node_possible_map)) 382 if (!node_online(node))
427 numa_clear_node(i); 383 numa_clear_node(i);
428 } 384 }
429 numa_init_array(); 385 numa_init_array();
@@ -510,26 +466,6 @@ static int null_slit_node_compare(int a, int b)
510} 466}
511#endif /* CONFIG_NUMA_EMU */ 467#endif /* CONFIG_NUMA_EMU */
512 468
513void __init srat_reserve_add_area(int nodeid)
514{
515 if (found_add_area && nodes_add[nodeid].end) {
516 u64 total_mb;
517
518 printk(KERN_INFO "SRAT: Reserving hot-add memory space "
519 "for node %d at %Lx-%Lx\n",
520 nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
521 total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
522 >> PAGE_SHIFT;
523 total_mb *= sizeof(struct page);
524 total_mb >>= 20;
525 printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
526 "pre-allocated memory.\n", (unsigned long long)total_mb);
527 reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
528 nodes_add[nodeid].end - nodes_add[nodeid].start,
529 BOOTMEM_DEFAULT);
530 }
531}
532
533int __node_distance(int a, int b) 469int __node_distance(int a, int b)
534{ 470{
535 int index; 471 int index;