diff options
author | Mel Gorman <mel@csn.ul.ie> | 2006-09-27 04:49:51 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-09-27 11:26:11 -0400 |
commit | 4cfee88ad30acc47f02b8b7ba3db8556262dce1e (patch) | |
tree | a336e6774143c869ec4e945f176368792355478b /arch | |
parent | c67c3cb4c99fb2ee63c8733943c353d745f45b84 (diff) |
[PATCH] Have x86 use add_active_range() and free_area_init_nodes
Size zones and holes in an architecture independent manner for x86.
[akpm@osdl.org: build fix]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: "Keith Mannthey" <kmannth@gmail.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/i386/Kconfig | 8 | ||||
-rw-r--r-- | arch/i386/kernel/setup.c | 26 | ||||
-rw-r--r-- | arch/i386/kernel/srat.c | 97 | ||||
-rw-r--r-- | arch/i386/mm/discontig.c | 69 |
4 files changed, 34 insertions, 166 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 758044f5e718..3fd9f1e8b093 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig | |||
@@ -598,12 +598,10 @@ config ARCH_SELECT_MEMORY_MODEL | |||
598 | def_bool y | 598 | def_bool y |
599 | depends on ARCH_SPARSEMEM_ENABLE | 599 | depends on ARCH_SPARSEMEM_ENABLE |
600 | 600 | ||
601 | source "mm/Kconfig" | 601 | config ARCH_POPULATES_NODE_MAP |
602 | def_bool y | ||
602 | 603 | ||
603 | config HAVE_ARCH_EARLY_PFN_TO_NID | 604 | source "mm/Kconfig" |
604 | bool | ||
605 | default y | ||
606 | depends on NUMA | ||
607 | 605 | ||
608 | config HIGHPTE | 606 | config HIGHPTE |
609 | bool "Allocate 3rd-level pagetables from highmem" | 607 | bool "Allocate 3rd-level pagetables from highmem" |
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 76a524b4c90f..814cdebf7377 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c | |||
@@ -1089,22 +1089,20 @@ static unsigned long __init setup_memory(void) | |||
1089 | 1089 | ||
1090 | void __init zone_sizes_init(void) | 1090 | void __init zone_sizes_init(void) |
1091 | { | 1091 | { |
1092 | unsigned long zones_size[MAX_NR_ZONES] = { 0, }; | ||
1093 | unsigned int max_dma, low; | ||
1094 | |||
1095 | max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; | ||
1096 | low = max_low_pfn; | ||
1097 | |||
1098 | if (low < max_dma) | ||
1099 | zones_size[ZONE_DMA] = low; | ||
1100 | else { | ||
1101 | zones_size[ZONE_DMA] = max_dma; | ||
1102 | zones_size[ZONE_NORMAL] = low - max_dma; | ||
1103 | #ifdef CONFIG_HIGHMEM | 1092 | #ifdef CONFIG_HIGHMEM |
1104 | zones_size[ZONE_HIGHMEM] = highend_pfn - low; | 1093 | unsigned long max_zone_pfns[MAX_NR_ZONES] = { |
1094 | virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT, | ||
1095 | max_low_pfn, | ||
1096 | highend_pfn}; | ||
1097 | add_active_range(0, 0, highend_pfn); | ||
1098 | #else | ||
1099 | unsigned long max_zone_pfns[MAX_NR_ZONES] = { | ||
1100 | virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT, | ||
1101 | max_low_pfn}; | ||
1102 | add_active_range(0, 0, max_low_pfn); | ||
1105 | #endif | 1103 | #endif |
1106 | } | 1104 | |
1107 | free_area_init(zones_size); | 1105 | free_area_init_nodes(max_zone_pfns); |
1108 | } | 1106 | } |
1109 | #else | 1107 | #else |
1110 | extern unsigned long __init setup_memory(void); | 1108 | extern unsigned long __init setup_memory(void); |
diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c index 83db411b3aa7..32413122c4c2 100644 --- a/arch/i386/kernel/srat.c +++ b/arch/i386/kernel/srat.c | |||
@@ -54,8 +54,6 @@ struct node_memory_chunk_s { | |||
54 | static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS]; | 54 | static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS]; |
55 | 55 | ||
56 | static int num_memory_chunks; /* total number of memory chunks */ | 56 | static int num_memory_chunks; /* total number of memory chunks */ |
57 | static int zholes_size_init; | ||
58 | static unsigned long zholes_size[MAX_NUMNODES * MAX_NR_ZONES]; | ||
59 | 57 | ||
60 | extern void * boot_ioremap(unsigned long, unsigned long); | 58 | extern void * boot_ioremap(unsigned long, unsigned long); |
61 | 59 | ||
@@ -135,47 +133,6 @@ static void __init parse_memory_affinity_structure (char *sratp) | |||
135 | "enabled and removable" : "enabled" ) ); | 133 | "enabled and removable" : "enabled" ) ); |
136 | } | 134 | } |
137 | 135 | ||
138 | /* Take a chunk of pages from page frame cstart to cend and count the number | ||
139 | * of pages in each zone, returned via zones[]. | ||
140 | */ | ||
141 | static __init void chunk_to_zones(unsigned long cstart, unsigned long cend, | ||
142 | unsigned long *zones) | ||
143 | { | ||
144 | unsigned long max_dma; | ||
145 | extern unsigned long max_low_pfn; | ||
146 | |||
147 | int z; | ||
148 | unsigned long rend; | ||
149 | |||
150 | /* FIXME: MAX_DMA_ADDRESS and max_low_pfn are trying to provide | ||
151 | * similarly scoped information and should be handled in a consistant | ||
152 | * manner. | ||
153 | */ | ||
154 | max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; | ||
155 | |||
156 | /* Split the hole into the zones in which it falls. Repeatedly | ||
157 | * take the segment in which the remaining hole starts, round it | ||
158 | * to the end of that zone. | ||
159 | */ | ||
160 | memset(zones, 0, MAX_NR_ZONES * sizeof(long)); | ||
161 | while (cstart < cend) { | ||
162 | if (cstart < max_dma) { | ||
163 | z = ZONE_DMA; | ||
164 | rend = (cend < max_dma)? cend : max_dma; | ||
165 | |||
166 | } else if (cstart < max_low_pfn) { | ||
167 | z = ZONE_NORMAL; | ||
168 | rend = (cend < max_low_pfn)? cend : max_low_pfn; | ||
169 | |||
170 | } else { | ||
171 | z = ZONE_HIGHMEM; | ||
172 | rend = cend; | ||
173 | } | ||
174 | zones[z] += rend - cstart; | ||
175 | cstart = rend; | ||
176 | } | ||
177 | } | ||
178 | |||
179 | /* | 136 | /* |
180 | * The SRAT table always lists ascending addresses, so can always | 137 | * The SRAT table always lists ascending addresses, so can always |
181 | * assume that the first "start" address that you see is the real | 138 | * assume that the first "start" address that you see is the real |
@@ -220,7 +177,6 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) | |||
220 | 177 | ||
221 | memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */ | 178 | memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */ |
222 | memset(node_memory_chunk, 0, sizeof(node_memory_chunk)); | 179 | memset(node_memory_chunk, 0, sizeof(node_memory_chunk)); |
223 | memset(zholes_size, 0, sizeof(zholes_size)); | ||
224 | 180 | ||
225 | num_memory_chunks = 0; | 181 | num_memory_chunks = 0; |
226 | while (p < end) { | 182 | while (p < end) { |
@@ -284,6 +240,7 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) | |||
284 | printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", | 240 | printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", |
285 | j, chunk->nid, chunk->start_pfn, chunk->end_pfn); | 241 | j, chunk->nid, chunk->start_pfn, chunk->end_pfn); |
286 | node_read_chunk(chunk->nid, chunk); | 242 | node_read_chunk(chunk->nid, chunk); |
243 | add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn); | ||
287 | } | 244 | } |
288 | 245 | ||
289 | for_each_online_node(nid) { | 246 | for_each_online_node(nid) { |
@@ -392,57 +349,7 @@ int __init get_memcfg_from_srat(void) | |||
392 | return acpi20_parse_srat((struct acpi_table_srat *)header); | 349 | return acpi20_parse_srat((struct acpi_table_srat *)header); |
393 | } | 350 | } |
394 | out_err: | 351 | out_err: |
352 | remove_all_active_ranges(); | ||
395 | printk("failed to get NUMA memory information from SRAT table\n"); | 353 | printk("failed to get NUMA memory information from SRAT table\n"); |
396 | return 0; | 354 | return 0; |
397 | } | 355 | } |
398 | |||
399 | /* For each node run the memory list to determine whether there are | ||
400 | * any memory holes. For each hole determine which ZONE they fall | ||
401 | * into. | ||
402 | * | ||
403 | * NOTE#1: this requires knowledge of the zone boundries and so | ||
404 | * _cannot_ be performed before those are calculated in setup_memory. | ||
405 | * | ||
406 | * NOTE#2: we rely on the fact that the memory chunks are ordered by | ||
407 | * start pfn number during setup. | ||
408 | */ | ||
409 | static void __init get_zholes_init(void) | ||
410 | { | ||
411 | int nid; | ||
412 | int c; | ||
413 | int first; | ||
414 | unsigned long end = 0; | ||
415 | |||
416 | for_each_online_node(nid) { | ||
417 | first = 1; | ||
418 | for (c = 0; c < num_memory_chunks; c++){ | ||
419 | if (node_memory_chunk[c].nid == nid) { | ||
420 | if (first) { | ||
421 | end = node_memory_chunk[c].end_pfn; | ||
422 | first = 0; | ||
423 | |||
424 | } else { | ||
425 | /* Record any gap between this chunk | ||
426 | * and the previous chunk on this node | ||
427 | * against the zones it spans. | ||
428 | */ | ||
429 | chunk_to_zones(end, | ||
430 | node_memory_chunk[c].start_pfn, | ||
431 | &zholes_size[nid * MAX_NR_ZONES]); | ||
432 | } | ||
433 | } | ||
434 | } | ||
435 | } | ||
436 | } | ||
437 | |||
438 | unsigned long * __init get_zholes_size(int nid) | ||
439 | { | ||
440 | if (!zholes_size_init) { | ||
441 | zholes_size_init++; | ||
442 | get_zholes_init(); | ||
443 | } | ||
444 | if (nid >= MAX_NUMNODES || !node_online(nid)) | ||
445 | printk("%s: nid = %d is invalid/offline. num_online_nodes = %d", | ||
446 | __FUNCTION__, nid, num_online_nodes()); | ||
447 | return &zholes_size[nid * MAX_NR_ZONES]; | ||
448 | } | ||
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 941d1a5ebabb..51e3739dd227 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c | |||
@@ -157,21 +157,6 @@ static void __init find_max_pfn_node(int nid) | |||
157 | BUG(); | 157 | BUG(); |
158 | } | 158 | } |
159 | 159 | ||
160 | /* Find the owning node for a pfn. */ | ||
161 | int early_pfn_to_nid(unsigned long pfn) | ||
162 | { | ||
163 | int nid; | ||
164 | |||
165 | for_each_node(nid) { | ||
166 | if (node_end_pfn[nid] == 0) | ||
167 | break; | ||
168 | if (node_start_pfn[nid] <= pfn && node_end_pfn[nid] >= pfn) | ||
169 | return nid; | ||
170 | } | ||
171 | |||
172 | return 0; | ||
173 | } | ||
174 | |||
175 | /* | 160 | /* |
176 | * Allocate memory for the pg_data_t for this node via a crude pre-bootmem | 161 | * Allocate memory for the pg_data_t for this node via a crude pre-bootmem |
177 | * method. For node zero take this from the bottom of memory, for | 162 | * method. For node zero take this from the bottom of memory, for |
@@ -227,6 +212,8 @@ static unsigned long calculate_numa_remap_pages(void) | |||
227 | unsigned long pfn; | 212 | unsigned long pfn; |
228 | 213 | ||
229 | for_each_online_node(nid) { | 214 | for_each_online_node(nid) { |
215 | unsigned old_end_pfn = node_end_pfn[nid]; | ||
216 | |||
230 | /* | 217 | /* |
231 | * The acpi/srat node info can show hot-add memroy zones | 218 | * The acpi/srat node info can show hot-add memroy zones |
232 | * where memory could be added but not currently present. | 219 | * where memory could be added but not currently present. |
@@ -276,6 +263,7 @@ static unsigned long calculate_numa_remap_pages(void) | |||
276 | 263 | ||
277 | node_end_pfn[nid] -= size; | 264 | node_end_pfn[nid] -= size; |
278 | node_remap_start_pfn[nid] = node_end_pfn[nid]; | 265 | node_remap_start_pfn[nid] = node_end_pfn[nid]; |
266 | shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]); | ||
279 | } | 267 | } |
280 | printk("Reserving total of %ld pages for numa KVA remap\n", | 268 | printk("Reserving total of %ld pages for numa KVA remap\n", |
281 | reserve_pages); | 269 | reserve_pages); |
@@ -369,45 +357,22 @@ void __init numa_kva_reserve(void) | |||
369 | void __init zone_sizes_init(void) | 357 | void __init zone_sizes_init(void) |
370 | { | 358 | { |
371 | int nid; | 359 | int nid; |
372 | 360 | unsigned long max_zone_pfns[MAX_NR_ZONES] = { | |
373 | 361 | virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT, | |
374 | for_each_online_node(nid) { | 362 | max_low_pfn, |
375 | unsigned long zones_size[MAX_NR_ZONES] = {0, }; | 363 | highend_pfn |
376 | unsigned long *zholes_size; | 364 | }; |
377 | unsigned int max_dma; | 365 | |
378 | 366 | /* If SRAT has not registered memory, register it now */ | |
379 | unsigned long low = max_low_pfn; | 367 | if (find_max_pfn_with_active_regions() == 0) { |
380 | unsigned long start = node_start_pfn[nid]; | 368 | for_each_online_node(nid) { |
381 | unsigned long high = node_end_pfn[nid]; | 369 | if (node_has_online_mem(nid)) |
382 | 370 | add_active_range(nid, node_start_pfn[nid], | |
383 | max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; | 371 | node_end_pfn[nid]); |
384 | |||
385 | if (node_has_online_mem(nid)){ | ||
386 | if (start > low) { | ||
387 | #ifdef CONFIG_HIGHMEM | ||
388 | BUG_ON(start > high); | ||
389 | zones_size[ZONE_HIGHMEM] = high - start; | ||
390 | #endif | ||
391 | } else { | ||
392 | if (low < max_dma) | ||
393 | zones_size[ZONE_DMA] = low; | ||
394 | else { | ||
395 | BUG_ON(max_dma > low); | ||
396 | BUG_ON(low > high); | ||
397 | zones_size[ZONE_DMA] = max_dma; | ||
398 | zones_size[ZONE_NORMAL] = low - max_dma; | ||
399 | #ifdef CONFIG_HIGHMEM | ||
400 | zones_size[ZONE_HIGHMEM] = high - low; | ||
401 | #endif | ||
402 | } | ||
403 | } | ||
404 | } | 372 | } |
405 | |||
406 | zholes_size = get_zholes_size(nid); | ||
407 | |||
408 | free_area_init_node(nid, NODE_DATA(nid), zones_size, start, | ||
409 | zholes_size); | ||
410 | } | 373 | } |
374 | |||
375 | free_area_init_nodes(max_zone_pfns); | ||
411 | return; | 376 | return; |
412 | } | 377 | } |
413 | 378 | ||