aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/x86/x86_64/boot-options.txt5
-rw-r--r--arch/x86/include/asm/numa_64.h3
-rw-r--r--arch/x86/mm/numa_64.c5
-rw-r--r--arch/x86/mm/srat_64.c63
-rw-r--r--include/linux/mm.h2
-rw-r--r--mm/page_alloc.c69
6 files changed, 12 insertions, 135 deletions
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 34c13040a71..2db5893d6c9 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -150,11 +150,6 @@ NUMA
150 Otherwise, the remaining system RAM is allocated to an 150 Otherwise, the remaining system RAM is allocated to an
151 additional node. 151 additional node.
152 152
153 numa=hotadd=percent
154 Only allow hotadd memory to preallocate page structures upto
155 percent of already available memory.
156 numa=hotadd=0 will disable hotadd memory.
157
158ACPI 153ACPI
159 154
160 acpi=off Don't enable ACPI 155 acpi=off Don't enable ACPI
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 064ed6df4cb..7feff0648d7 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -17,9 +17,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
17extern void numa_init_array(void); 17extern void numa_init_array(void);
18extern int numa_off; 18extern int numa_off;
19 19
20extern void srat_reserve_add_area(int nodeid);
21extern int hotadd_percent;
22
23extern s16 apicid_to_node[MAX_LOCAL_APIC]; 20extern s16 apicid_to_node[MAX_LOCAL_APIC];
24 21
25extern unsigned long numa_free_all_bootmem(void); 22extern unsigned long numa_free_all_bootmem(void);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index fb61d81a656..a6a93c39523 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -272,9 +272,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
272 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, 272 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
273 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); 273 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
274 274
275#ifdef CONFIG_ACPI_NUMA
276 srat_reserve_add_area(nodeid);
277#endif
278 node_set_online(nodeid); 275 node_set_online(nodeid);
279} 276}
280 277
@@ -591,8 +588,6 @@ static __init int numa_setup(char *opt)
591#ifdef CONFIG_ACPI_NUMA 588#ifdef CONFIG_ACPI_NUMA
592 if (!strncmp(opt, "noacpi", 6)) 589 if (!strncmp(opt, "noacpi", 6))
593 acpi_numa = -1; 590 acpi_numa = -1;
594 if (!strncmp(opt, "hotadd=", 7))
595 hotadd_percent = simple_strtoul(opt+7, NULL, 10);
596#endif 591#endif
597 return 0; 592 return 0;
598} 593}
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 87b45bff250..b0dbbd48e58 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -31,8 +31,6 @@ static nodemask_t nodes_parsed __initdata;
31static nodemask_t cpu_nodes_parsed __initdata; 31static nodemask_t cpu_nodes_parsed __initdata;
32static struct bootnode nodes[MAX_NUMNODES] __initdata; 32static struct bootnode nodes[MAX_NUMNODES] __initdata;
33static struct bootnode nodes_add[MAX_NUMNODES]; 33static struct bootnode nodes_add[MAX_NUMNODES];
34static int found_add_area __initdata;
35int hotadd_percent __initdata = 0;
36 34
37static int num_node_memblks __initdata; 35static int num_node_memblks __initdata;
38static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; 36static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
@@ -66,9 +64,6 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end)
66{ 64{
67 struct bootnode *nd = &nodes[i]; 65 struct bootnode *nd = &nodes[i];
68 66
69 if (found_add_area)
70 return;
71
72 if (nd->start < start) { 67 if (nd->start < start) {
73 nd->start = start; 68 nd->start = start;
74 if (nd->end < nd->start) 69 if (nd->end < nd->start)
@@ -86,7 +81,6 @@ static __init void bad_srat(void)
86 int i; 81 int i;
87 printk(KERN_ERR "SRAT: SRAT not used.\n"); 82 printk(KERN_ERR "SRAT: SRAT not used.\n");
88 acpi_numa = -1; 83 acpi_numa = -1;
89 found_add_area = 0;
90 for (i = 0; i < MAX_LOCAL_APIC; i++) 84 for (i = 0; i < MAX_LOCAL_APIC; i++)
91 apicid_to_node[i] = NUMA_NO_NODE; 85 apicid_to_node[i] = NUMA_NO_NODE;
92 for (i = 0; i < MAX_NUMNODES; i++) 86 for (i = 0; i < MAX_NUMNODES; i++)
@@ -182,24 +176,21 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
182 pxm, apic_id, node); 176 pxm, apic_id, node);
183} 177}
184 178
185static int update_end_of_memory(unsigned long end) {return -1;}
186static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
187#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 179#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
188static inline int save_add_info(void) {return 1;} 180static inline int save_add_info(void) {return 1;}
189#else 181#else
190static inline int save_add_info(void) {return 0;} 182static inline int save_add_info(void) {return 0;}
191#endif 183#endif
192/* 184/*
193 * Update nodes_add and decide if to include add are in the zone. 185 * Update nodes_add[]
194 * Both SPARSE and RESERVE need nodes_add information. 186 * This code supports one contiguous hot add area per node
195 * This code supports one contiguous hot add area per node.
196 */ 187 */
197static int __init 188static void __init
198reserve_hotadd(int node, unsigned long start, unsigned long end) 189update_nodes_add(int node, unsigned long start, unsigned long end)
199{ 190{
200 unsigned long s_pfn = start >> PAGE_SHIFT; 191 unsigned long s_pfn = start >> PAGE_SHIFT;
201 unsigned long e_pfn = end >> PAGE_SHIFT; 192 unsigned long e_pfn = end >> PAGE_SHIFT;
202 int ret = 0, changed = 0; 193 int changed = 0;
203 struct bootnode *nd = &nodes_add[node]; 194 struct bootnode *nd = &nodes_add[node];
204 195
205 /* I had some trouble with strange memory hotadd regions breaking 196 /* I had some trouble with strange memory hotadd regions breaking
@@ -210,7 +201,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
210 mistakes */ 201 mistakes */
211 if ((signed long)(end - start) < NODE_MIN_SIZE) { 202 if ((signed long)(end - start) < NODE_MIN_SIZE) {
212 printk(KERN_ERR "SRAT: Hotplug area too small\n"); 203 printk(KERN_ERR "SRAT: Hotplug area too small\n");
213 return -1; 204 return;
214 } 205 }
215 206
216 /* This check might be a bit too strict, but I'm keeping it for now. */ 207 /* This check might be a bit too strict, but I'm keeping it for now. */
@@ -218,12 +209,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
218 printk(KERN_ERR 209 printk(KERN_ERR
219 "SRAT: Hotplug area %lu -> %lu has existing memory\n", 210 "SRAT: Hotplug area %lu -> %lu has existing memory\n",
220 s_pfn, e_pfn); 211 s_pfn, e_pfn);
221 return -1; 212 return;
222 }
223
224 if (!hotadd_enough_memory(&nodes_add[node])) {
225 printk(KERN_ERR "SRAT: Hotplug area too large\n");
226 return -1;
227 } 213 }
228 214
229 /* Looks good */ 215 /* Looks good */
@@ -245,11 +231,9 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
245 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); 231 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
246 } 232 }
247 233
248 ret = update_end_of_memory(nd->end);
249
250 if (changed) 234 if (changed)
251 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end); 235 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
252 return ret; 236 nd->start, nd->end);
253} 237}
254 238
255/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ 239/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
@@ -310,13 +294,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
310 start, end); 294 start, end);
311 e820_register_active_regions(node, start >> PAGE_SHIFT, 295 e820_register_active_regions(node, start >> PAGE_SHIFT,
312 end >> PAGE_SHIFT); 296 end >> PAGE_SHIFT);
313 push_node_boundaries(node, nd->start >> PAGE_SHIFT,
314 nd->end >> PAGE_SHIFT);
315 297
316 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && 298 if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
317 (reserve_hotadd(node, start, end) < 0)) { 299 update_nodes_add(node, start, end);
318 /* Ignore hotadd region. Undo damage */ 300 /* restore nodes[node] */
319 printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
320 *nd = oldnode; 301 *nd = oldnode;
321 if ((nd->start | nd->end) == 0) 302 if ((nd->start | nd->end) == 0)
322 node_clear(node, nodes_parsed); 303 node_clear(node, nodes_parsed);
@@ -510,26 +491,6 @@ static int null_slit_node_compare(int a, int b)
510} 491}
511#endif /* CONFIG_NUMA_EMU */ 492#endif /* CONFIG_NUMA_EMU */
512 493
513void __init srat_reserve_add_area(int nodeid)
514{
515 if (found_add_area && nodes_add[nodeid].end) {
516 u64 total_mb;
517
518 printk(KERN_INFO "SRAT: Reserving hot-add memory space "
519 "for node %d at %Lx-%Lx\n",
520 nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
521 total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
522 >> PAGE_SHIFT;
523 total_mb *= sizeof(struct page);
524 total_mb >>= 20;
525 printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
526 "pre-allocated memory.\n", (unsigned long long)total_mb);
527 reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
528 nodes_add[nodeid].end - nodes_add[nodeid].start,
529 BOOTMEM_DEFAULT);
530 }
531}
532
533int __node_distance(int a, int b) 494int __node_distance(int a, int b)
534{ 495{
535 int index; 496 int index;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bff1f0d475c..511b0986709 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1031,8 +1031,6 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn,
1031 unsigned long end_pfn); 1031 unsigned long end_pfn);
1032extern void remove_active_range(unsigned int nid, unsigned long start_pfn, 1032extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
1033 unsigned long end_pfn); 1033 unsigned long end_pfn);
1034extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn,
1035 unsigned long end_pfn);
1036extern void remove_all_active_ranges(void); 1034extern void remove_all_active_ranges(void);
1037extern unsigned long absent_pages_in_range(unsigned long start_pfn, 1035extern unsigned long absent_pages_in_range(unsigned long start_pfn,
1038 unsigned long end_pfn); 1036 unsigned long end_pfn);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fe753ecf2aa..474c7e9dd51 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -149,10 +149,6 @@ static unsigned long __meminitdata dma_reserve;
149 static int __meminitdata nr_nodemap_entries; 149 static int __meminitdata nr_nodemap_entries;
150 static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; 150 static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
151 static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; 151 static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
152#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
153 static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES];
154 static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES];
155#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
156 static unsigned long __initdata required_kernelcore; 152 static unsigned long __initdata required_kernelcore;
157 static unsigned long __initdata required_movablecore; 153 static unsigned long __initdata required_movablecore;
158 static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; 154 static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
@@ -3103,64 +3099,6 @@ void __init sparse_memory_present_with_active_regions(int nid)
3103} 3099}
3104 3100
3105/** 3101/**
3106 * push_node_boundaries - Push node boundaries to at least the requested boundary
3107 * @nid: The nid of the node to push the boundary for
3108 * @start_pfn: The start pfn of the node
3109 * @end_pfn: The end pfn of the node
3110 *
3111 * In reserve-based hot-add, mem_map is allocated that is unused until hotadd
3112 * time. Specifically, on x86_64, SRAT will report ranges that can potentially
3113 * be hotplugged even though no physical memory exists. This function allows
3114 * an arch to push out the node boundaries so mem_map is allocated that can
3115 * be used later.
3116 */
3117#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
3118void __init push_node_boundaries(unsigned int nid,
3119 unsigned long start_pfn, unsigned long end_pfn)
3120{
3121 mminit_dprintk(MMINIT_TRACE, "zoneboundary",
3122 "Entering push_node_boundaries(%u, %lu, %lu)\n",
3123 nid, start_pfn, end_pfn);
3124
3125 /* Initialise the boundary for this node if necessary */
3126 if (node_boundary_end_pfn[nid] == 0)
3127 node_boundary_start_pfn[nid] = -1UL;
3128
3129 /* Update the boundaries */
3130 if (node_boundary_start_pfn[nid] > start_pfn)
3131 node_boundary_start_pfn[nid] = start_pfn;
3132 if (node_boundary_end_pfn[nid] < end_pfn)
3133 node_boundary_end_pfn[nid] = end_pfn;
3134}
3135
3136/* If necessary, push the node boundary out for reserve hotadd */
3137static void __meminit account_node_boundary(unsigned int nid,
3138 unsigned long *start_pfn, unsigned long *end_pfn)
3139{
3140 mminit_dprintk(MMINIT_TRACE, "zoneboundary",
3141 "Entering account_node_boundary(%u, %lu, %lu)\n",
3142 nid, *start_pfn, *end_pfn);
3143
3144 /* Return if boundary information has not been provided */
3145 if (node_boundary_end_pfn[nid] == 0)
3146 return;
3147
3148 /* Check the boundaries and update if necessary */
3149 if (node_boundary_start_pfn[nid] < *start_pfn)
3150 *start_pfn = node_boundary_start_pfn[nid];
3151 if (node_boundary_end_pfn[nid] > *end_pfn)
3152 *end_pfn = node_boundary_end_pfn[nid];
3153}
3154#else
3155void __init push_node_boundaries(unsigned int nid,
3156 unsigned long start_pfn, unsigned long end_pfn) {}
3157
3158static void __meminit account_node_boundary(unsigned int nid,
3159 unsigned long *start_pfn, unsigned long *end_pfn) {}
3160#endif
3161
3162
3163/**
3164 * get_pfn_range_for_nid - Return the start and end page frames for a node 3102 * get_pfn_range_for_nid - Return the start and end page frames for a node
3165 * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned. 3103 * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
3166 * @start_pfn: Passed by reference. On return, it will have the node start_pfn. 3104 * @start_pfn: Passed by reference. On return, it will have the node start_pfn.
@@ -3185,9 +3123,6 @@ void __meminit get_pfn_range_for_nid(unsigned int nid,
3185 3123
3186 if (*start_pfn == -1UL) 3124 if (*start_pfn == -1UL)
3187 *start_pfn = 0; 3125 *start_pfn = 0;
3188
3189 /* Push the node boundaries out if requested */
3190 account_node_boundary(nid, start_pfn, end_pfn);
3191} 3126}
3192 3127
3193/* 3128/*
@@ -3793,10 +3728,6 @@ void __init remove_all_active_ranges(void)
3793{ 3728{
3794 memset(early_node_map, 0, sizeof(early_node_map)); 3729 memset(early_node_map, 0, sizeof(early_node_map));
3795 nr_nodemap_entries = 0; 3730 nr_nodemap_entries = 0;
3796#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
3797 memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn));
3798 memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn));
3799#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
3800} 3731}
3801 3732
3802/* Compare two active node_active_regions */ 3733/* Compare two active node_active_regions */