aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2006-09-27 04:49:51 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-09-27 11:26:11 -0400
commit4cfee88ad30acc47f02b8b7ba3db8556262dce1e (patch)
treea336e6774143c869ec4e945f176368792355478b /arch
parentc67c3cb4c99fb2ee63c8733943c353d745f45b84 (diff)
[PATCH] Have x86 use add_active_range() and free_area_init_nodes
Size zones and holes in an architecture independent manner for x86. [akpm@osdl.org: build fix] Signed-off-by: Mel Gorman <mel@csn.ul.ie> Cc: Dave Hansen <haveblue@us.ibm.com> Cc: Andy Whitcroft <apw@shadowen.org> Cc: Andi Kleen <ak@muc.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: "Keith Mannthey" <kmannth@gmail.com> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Yasunori Goto <y-goto@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/i386/Kconfig8
-rw-r--r--arch/i386/kernel/setup.c26
-rw-r--r--arch/i386/kernel/srat.c97
-rw-r--r--arch/i386/mm/discontig.c69
4 files changed, 34 insertions, 166 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 758044f5e718..3fd9f1e8b093 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -598,12 +598,10 @@ config ARCH_SELECT_MEMORY_MODEL
598 def_bool y 598 def_bool y
599 depends on ARCH_SPARSEMEM_ENABLE 599 depends on ARCH_SPARSEMEM_ENABLE
600 600
601source "mm/Kconfig" 601config ARCH_POPULATES_NODE_MAP
602 def_bool y
602 603
603config HAVE_ARCH_EARLY_PFN_TO_NID 604source "mm/Kconfig"
604 bool
605 default y
606 depends on NUMA
607 605
608config HIGHPTE 606config HIGHPTE
609 bool "Allocate 3rd-level pagetables from highmem" 607 bool "Allocate 3rd-level pagetables from highmem"
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 76a524b4c90f..814cdebf7377 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -1089,22 +1089,20 @@ static unsigned long __init setup_memory(void)
1089 1089
1090void __init zone_sizes_init(void) 1090void __init zone_sizes_init(void)
1091{ 1091{
1092 unsigned long zones_size[MAX_NR_ZONES] = { 0, };
1093 unsigned int max_dma, low;
1094
1095 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
1096 low = max_low_pfn;
1097
1098 if (low < max_dma)
1099 zones_size[ZONE_DMA] = low;
1100 else {
1101 zones_size[ZONE_DMA] = max_dma;
1102 zones_size[ZONE_NORMAL] = low - max_dma;
1103#ifdef CONFIG_HIGHMEM 1092#ifdef CONFIG_HIGHMEM
1104 zones_size[ZONE_HIGHMEM] = highend_pfn - low; 1093 unsigned long max_zone_pfns[MAX_NR_ZONES] = {
1094 virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT,
1095 max_low_pfn,
1096 highend_pfn};
1097 add_active_range(0, 0, highend_pfn);
1098#else
1099 unsigned long max_zone_pfns[MAX_NR_ZONES] = {
1100 virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT,
1101 max_low_pfn};
1102 add_active_range(0, 0, max_low_pfn);
1105#endif 1103#endif
1106 } 1104
1107 free_area_init(zones_size); 1105 free_area_init_nodes(max_zone_pfns);
1108} 1106}
1109#else 1107#else
1110extern unsigned long __init setup_memory(void); 1108extern unsigned long __init setup_memory(void);
diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c
index 83db411b3aa7..32413122c4c2 100644
--- a/arch/i386/kernel/srat.c
+++ b/arch/i386/kernel/srat.c
@@ -54,8 +54,6 @@ struct node_memory_chunk_s {
54static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS]; 54static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS];
55 55
56static int num_memory_chunks; /* total number of memory chunks */ 56static int num_memory_chunks; /* total number of memory chunks */
57static int zholes_size_init;
58static unsigned long zholes_size[MAX_NUMNODES * MAX_NR_ZONES];
59 57
60extern void * boot_ioremap(unsigned long, unsigned long); 58extern void * boot_ioremap(unsigned long, unsigned long);
61 59
@@ -135,47 +133,6 @@ static void __init parse_memory_affinity_structure (char *sratp)
135 "enabled and removable" : "enabled" ) ); 133 "enabled and removable" : "enabled" ) );
136} 134}
137 135
138/* Take a chunk of pages from page frame cstart to cend and count the number
139 * of pages in each zone, returned via zones[].
140 */
141static __init void chunk_to_zones(unsigned long cstart, unsigned long cend,
142 unsigned long *zones)
143{
144 unsigned long max_dma;
145 extern unsigned long max_low_pfn;
146
147 int z;
148 unsigned long rend;
149
150 /* FIXME: MAX_DMA_ADDRESS and max_low_pfn are trying to provide
151 * similarly scoped information and should be handled in a consistant
152 * manner.
153 */
154 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
155
156 /* Split the hole into the zones in which it falls. Repeatedly
157 * take the segment in which the remaining hole starts, round it
158 * to the end of that zone.
159 */
160 memset(zones, 0, MAX_NR_ZONES * sizeof(long));
161 while (cstart < cend) {
162 if (cstart < max_dma) {
163 z = ZONE_DMA;
164 rend = (cend < max_dma)? cend : max_dma;
165
166 } else if (cstart < max_low_pfn) {
167 z = ZONE_NORMAL;
168 rend = (cend < max_low_pfn)? cend : max_low_pfn;
169
170 } else {
171 z = ZONE_HIGHMEM;
172 rend = cend;
173 }
174 zones[z] += rend - cstart;
175 cstart = rend;
176 }
177}
178
179/* 136/*
180 * The SRAT table always lists ascending addresses, so can always 137 * The SRAT table always lists ascending addresses, so can always
181 * assume that the first "start" address that you see is the real 138 * assume that the first "start" address that you see is the real
@@ -220,7 +177,6 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
220 177
221 memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */ 178 memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */
222 memset(node_memory_chunk, 0, sizeof(node_memory_chunk)); 179 memset(node_memory_chunk, 0, sizeof(node_memory_chunk));
223 memset(zholes_size, 0, sizeof(zholes_size));
224 180
225 num_memory_chunks = 0; 181 num_memory_chunks = 0;
226 while (p < end) { 182 while (p < end) {
@@ -284,6 +240,7 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
284 printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", 240 printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
285 j, chunk->nid, chunk->start_pfn, chunk->end_pfn); 241 j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
286 node_read_chunk(chunk->nid, chunk); 242 node_read_chunk(chunk->nid, chunk);
243 add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn);
287 } 244 }
288 245
289 for_each_online_node(nid) { 246 for_each_online_node(nid) {
@@ -392,57 +349,7 @@ int __init get_memcfg_from_srat(void)
392 return acpi20_parse_srat((struct acpi_table_srat *)header); 349 return acpi20_parse_srat((struct acpi_table_srat *)header);
393 } 350 }
394out_err: 351out_err:
352 remove_all_active_ranges();
395 printk("failed to get NUMA memory information from SRAT table\n"); 353 printk("failed to get NUMA memory information from SRAT table\n");
396 return 0; 354 return 0;
397} 355}
398
399/* For each node run the memory list to determine whether there are
400 * any memory holes. For each hole determine which ZONE they fall
401 * into.
402 *
403 * NOTE#1: this requires knowledge of the zone boundries and so
404 * _cannot_ be performed before those are calculated in setup_memory.
405 *
406 * NOTE#2: we rely on the fact that the memory chunks are ordered by
407 * start pfn number during setup.
408 */
409static void __init get_zholes_init(void)
410{
411 int nid;
412 int c;
413 int first;
414 unsigned long end = 0;
415
416 for_each_online_node(nid) {
417 first = 1;
418 for (c = 0; c < num_memory_chunks; c++){
419 if (node_memory_chunk[c].nid == nid) {
420 if (first) {
421 end = node_memory_chunk[c].end_pfn;
422 first = 0;
423
424 } else {
425 /* Record any gap between this chunk
426 * and the previous chunk on this node
427 * against the zones it spans.
428 */
429 chunk_to_zones(end,
430 node_memory_chunk[c].start_pfn,
431 &zholes_size[nid * MAX_NR_ZONES]);
432 }
433 }
434 }
435 }
436}
437
438unsigned long * __init get_zholes_size(int nid)
439{
440 if (!zholes_size_init) {
441 zholes_size_init++;
442 get_zholes_init();
443 }
444 if (nid >= MAX_NUMNODES || !node_online(nid))
445 printk("%s: nid = %d is invalid/offline. num_online_nodes = %d",
446 __FUNCTION__, nid, num_online_nodes());
447 return &zholes_size[nid * MAX_NR_ZONES];
448}
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index 941d1a5ebabb..51e3739dd227 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -157,21 +157,6 @@ static void __init find_max_pfn_node(int nid)
157 BUG(); 157 BUG();
158} 158}
159 159
160/* Find the owning node for a pfn. */
161int early_pfn_to_nid(unsigned long pfn)
162{
163 int nid;
164
165 for_each_node(nid) {
166 if (node_end_pfn[nid] == 0)
167 break;
168 if (node_start_pfn[nid] <= pfn && node_end_pfn[nid] >= pfn)
169 return nid;
170 }
171
172 return 0;
173}
174
175/* 160/*
176 * Allocate memory for the pg_data_t for this node via a crude pre-bootmem 161 * Allocate memory for the pg_data_t for this node via a crude pre-bootmem
177 * method. For node zero take this from the bottom of memory, for 162 * method. For node zero take this from the bottom of memory, for
@@ -227,6 +212,8 @@ static unsigned long calculate_numa_remap_pages(void)
227 unsigned long pfn; 212 unsigned long pfn;
228 213
229 for_each_online_node(nid) { 214 for_each_online_node(nid) {
215 unsigned old_end_pfn = node_end_pfn[nid];
216
230 /* 217 /*
231 * The acpi/srat node info can show hot-add memroy zones 218 * The acpi/srat node info can show hot-add memroy zones
232 * where memory could be added but not currently present. 219 * where memory could be added but not currently present.
@@ -276,6 +263,7 @@ static unsigned long calculate_numa_remap_pages(void)
276 263
277 node_end_pfn[nid] -= size; 264 node_end_pfn[nid] -= size;
278 node_remap_start_pfn[nid] = node_end_pfn[nid]; 265 node_remap_start_pfn[nid] = node_end_pfn[nid];
266 shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]);
279 } 267 }
280 printk("Reserving total of %ld pages for numa KVA remap\n", 268 printk("Reserving total of %ld pages for numa KVA remap\n",
281 reserve_pages); 269 reserve_pages);
@@ -369,45 +357,22 @@ void __init numa_kva_reserve(void)
369void __init zone_sizes_init(void) 357void __init zone_sizes_init(void)
370{ 358{
371 int nid; 359 int nid;
372 360 unsigned long max_zone_pfns[MAX_NR_ZONES] = {
373 361 virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT,
374 for_each_online_node(nid) { 362 max_low_pfn,
375 unsigned long zones_size[MAX_NR_ZONES] = {0, }; 363 highend_pfn
376 unsigned long *zholes_size; 364 };
377 unsigned int max_dma; 365
378 366 /* If SRAT has not registered memory, register it now */
379 unsigned long low = max_low_pfn; 367 if (find_max_pfn_with_active_regions() == 0) {
380 unsigned long start = node_start_pfn[nid]; 368 for_each_online_node(nid) {
381 unsigned long high = node_end_pfn[nid]; 369 if (node_has_online_mem(nid))
382 370 add_active_range(nid, node_start_pfn[nid],
383 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; 371 node_end_pfn[nid]);
384
385 if (node_has_online_mem(nid)){
386 if (start > low) {
387#ifdef CONFIG_HIGHMEM
388 BUG_ON(start > high);
389 zones_size[ZONE_HIGHMEM] = high - start;
390#endif
391 } else {
392 if (low < max_dma)
393 zones_size[ZONE_DMA] = low;
394 else {
395 BUG_ON(max_dma > low);
396 BUG_ON(low > high);
397 zones_size[ZONE_DMA] = max_dma;
398 zones_size[ZONE_NORMAL] = low - max_dma;
399#ifdef CONFIG_HIGHMEM
400 zones_size[ZONE_HIGHMEM] = high - low;
401#endif
402 }
403 }
404 } 372 }
405
406 zholes_size = get_zholes_size(nid);
407
408 free_area_init_node(nid, NODE_DATA(nid), zones_size, start,
409 zholes_size);
410 } 373 }
374
375 free_area_init_nodes(max_zone_pfns);
411 return; 376 return;
412} 377}
413 378