diff options
author | Yinghai Lu <yhlu.kernel@gmail.com> | 2008-06-13 22:08:52 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-08 04:36:29 -0400 |
commit | cc1050bafebfb1d7935331282e948b5016318192 (patch) | |
tree | 7e9e6935ed4a18afb63dedbcd808657c683265f5 | |
parent | d2dbf343329dc777d77488743465f7be4245971d (diff) |
x86: replace shrink_active_range() with remove_active_range()
in case we have kva before ramdisk on a node, we still need to use
those ranges.
v2: reserve_early kva ram area, in case there are holes in highmem, to avoid
those area could be treat as free high pages.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/mm/discontig_32.c | 45 | ||||
-rw-r--r-- | include/linux/mm.h | 3 | ||||
-rw-r--r-- | mm/page_alloc.c | 29 |
3 files changed, 49 insertions, 28 deletions
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index accc7c6c57fc..c3f119e99e0d 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c | |||
@@ -230,8 +230,8 @@ static unsigned long calculate_numa_remap_pages(void) | |||
230 | unsigned long size, reserve_pages = 0; | 230 | unsigned long size, reserve_pages = 0; |
231 | 231 | ||
232 | for_each_online_node(nid) { | 232 | for_each_online_node(nid) { |
233 | u64 node_end_target; | 233 | u64 node_kva_target; |
234 | u64 node_end_final; | 234 | u64 node_kva_final; |
235 | 235 | ||
236 | /* | 236 | /* |
237 | * The acpi/srat node info can show hot-add memroy zones | 237 | * The acpi/srat node info can show hot-add memroy zones |
@@ -254,42 +254,45 @@ static unsigned long calculate_numa_remap_pages(void) | |||
254 | /* now the roundup is correct, convert to PAGE_SIZE pages */ | 254 | /* now the roundup is correct, convert to PAGE_SIZE pages */ |
255 | size = size * PTRS_PER_PTE; | 255 | size = size * PTRS_PER_PTE; |
256 | 256 | ||
257 | node_end_target = round_down(node_end_pfn[nid] - size, | 257 | node_kva_target = round_down(node_end_pfn[nid] - size, |
258 | PTRS_PER_PTE); | 258 | PTRS_PER_PTE); |
259 | node_end_target <<= PAGE_SHIFT; | 259 | node_kva_target <<= PAGE_SHIFT; |
260 | do { | 260 | do { |
261 | node_end_final = find_e820_area(node_end_target, | 261 | node_kva_final = find_e820_area(node_kva_target, |
262 | ((u64)node_end_pfn[nid])<<PAGE_SHIFT, | 262 | ((u64)node_end_pfn[nid])<<PAGE_SHIFT, |
263 | ((u64)size)<<PAGE_SHIFT, | 263 | ((u64)size)<<PAGE_SHIFT, |
264 | LARGE_PAGE_BYTES); | 264 | LARGE_PAGE_BYTES); |
265 | node_end_target -= LARGE_PAGE_BYTES; | 265 | node_kva_target -= LARGE_PAGE_BYTES; |
266 | } while (node_end_final == -1ULL && | 266 | } while (node_kva_final == -1ULL && |
267 | (node_end_target>>PAGE_SHIFT) > (node_start_pfn[nid])); | 267 | (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid])); |
268 | 268 | ||
269 | if (node_end_final == -1ULL) | 269 | if (node_kva_final == -1ULL) |
270 | panic("Can not get kva ram\n"); | 270 | panic("Can not get kva ram\n"); |
271 | 271 | ||
272 | printk("Reserving %ld pages of KVA for lmem_map of node %d\n", | ||
273 | size, nid); | ||
274 | node_remap_size[nid] = size; | 272 | node_remap_size[nid] = size; |
275 | node_remap_offset[nid] = reserve_pages; | 273 | node_remap_offset[nid] = reserve_pages; |
276 | reserve_pages += size; | 274 | reserve_pages += size; |
277 | printk("Shrinking node %d from %ld pages to %lld pages\n", | 275 | printk("Reserving %ld pages of KVA for lmem_map of node %d at %llx\n", |
278 | nid, node_end_pfn[nid], node_end_final>>PAGE_SHIFT); | 276 | size, nid, node_kva_final>>PAGE_SHIFT); |
279 | 277 | ||
280 | /* | 278 | /* |
281 | * prevent kva address below max_low_pfn want it on system | 279 | * prevent kva address below max_low_pfn want it on system |
282 | * with less memory later. | 280 | * with less memory later. |
283 | * layout will be: KVA address , KVA RAM | 281 | * layout will be: KVA address , KVA RAM |
282 | * | ||
283 | * we are supposed to only record the one less then max_low_pfn | ||
284 | * but we could have some hole in high memory, and it will only | ||
285 | * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide | ||
286 | * to use it as free. | ||
287 | * So reserve_early here, hope we don't run out of that array | ||
284 | */ | 288 | */ |
285 | if ((node_end_final>>PAGE_SHIFT) < max_low_pfn) | 289 | reserve_early(node_kva_final, |
286 | reserve_early(node_end_final, | 290 | node_kva_final+(((u64)size)<<PAGE_SHIFT), |
287 | node_end_final+(((u64)size)<<PAGE_SHIFT), | 291 | "KVA RAM"); |
288 | "KVA RAM"); | 292 | |
289 | 293 | node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT; | |
290 | node_end_pfn[nid] = node_end_final>>PAGE_SHIFT; | 294 | remove_active_range(nid, node_remap_start_pfn[nid], |
291 | node_remap_start_pfn[nid] = node_end_pfn[nid]; | 295 | node_remap_start_pfn[nid] + size); |
292 | shrink_active_range(nid, node_end_pfn[nid]); | ||
293 | } | 296 | } |
294 | printk("Reserving total of %ld pages for numa KVA remap\n", | 297 | printk("Reserving total of %ld pages for numa KVA remap\n", |
295 | reserve_pages); | 298 | reserve_pages); |
diff --git a/include/linux/mm.h b/include/linux/mm.h index ce8e397a61f6..034a3156d2f0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -998,7 +998,8 @@ extern void free_area_init_node(int nid, pg_data_t *pgdat, | |||
998 | extern void free_area_init_nodes(unsigned long *max_zone_pfn); | 998 | extern void free_area_init_nodes(unsigned long *max_zone_pfn); |
999 | extern void add_active_range(unsigned int nid, unsigned long start_pfn, | 999 | extern void add_active_range(unsigned int nid, unsigned long start_pfn, |
1000 | unsigned long end_pfn); | 1000 | unsigned long end_pfn); |
1001 | extern void shrink_active_range(unsigned int nid, unsigned long new_end_pfn); | 1001 | extern void remove_active_range(unsigned int nid, unsigned long start_pfn, |
1002 | unsigned long end_pfn); | ||
1002 | extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, | 1003 | extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, |
1003 | unsigned long end_pfn); | 1004 | unsigned long end_pfn); |
1004 | extern void remove_all_active_ranges(void); | 1005 | extern void remove_all_active_ranges(void); |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index eee5ba7509c1..d80e1868e570 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -3552,30 +3552,47 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn, | |||
3552 | } | 3552 | } |
3553 | 3553 | ||
3554 | /** | 3554 | /** |
3555 | * shrink_active_range - Shrink an existing registered range of PFNs | 3555 | * remove_active_range - Shrink an existing registered range of PFNs |
3556 | * @nid: The node id the range is on that should be shrunk | 3556 | * @nid: The node id the range is on that should be shrunk |
3557 | * @new_end_pfn: The new PFN of the range | 3557 | * @start_pfn: The new PFN of the range |
3558 | * @end_pfn: The new PFN of the range | ||
3558 | * | 3559 | * |
3559 | * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node. | 3560 | * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node. |
3560 | * The map is kept near the end physical page range that has already been | 3561 | * The map is kept near the end physical page range that has already been |
3561 | * registered. This function allows an arch to shrink an existing registered | 3562 | * registered. This function allows an arch to shrink an existing registered |
3562 | * range. | 3563 | * range. |
3563 | */ | 3564 | */ |
3564 | void __init shrink_active_range(unsigned int nid, unsigned long new_end_pfn) | 3565 | void __init remove_active_range(unsigned int nid, unsigned long start_pfn, |
3566 | unsigned long end_pfn) | ||
3565 | { | 3567 | { |
3566 | int i, j; | 3568 | int i, j; |
3567 | int removed = 0; | 3569 | int removed = 0; |
3568 | 3570 | ||
3571 | printk(KERN_DEBUG "remove_active_range (%d, %lu, %lu)\n", | ||
3572 | nid, start_pfn, end_pfn); | ||
3573 | |||
3569 | /* Find the old active region end and shrink */ | 3574 | /* Find the old active region end and shrink */ |
3570 | for_each_active_range_index_in_nid(i, nid) { | 3575 | for_each_active_range_index_in_nid(i, nid) { |
3571 | if (early_node_map[i].start_pfn >= new_end_pfn) { | 3576 | if (early_node_map[i].start_pfn >= start_pfn && |
3577 | early_node_map[i].end_pfn <= end_pfn) { | ||
3572 | /* clear it */ | 3578 | /* clear it */ |
3579 | early_node_map[i].start_pfn = 0; | ||
3573 | early_node_map[i].end_pfn = 0; | 3580 | early_node_map[i].end_pfn = 0; |
3574 | removed = 1; | 3581 | removed = 1; |
3575 | continue; | 3582 | continue; |
3576 | } | 3583 | } |
3577 | if (early_node_map[i].end_pfn > new_end_pfn) { | 3584 | if (early_node_map[i].start_pfn < start_pfn && |
3578 | early_node_map[i].end_pfn = new_end_pfn; | 3585 | early_node_map[i].end_pfn > start_pfn) { |
3586 | unsigned long temp_end_pfn = early_node_map[i].end_pfn; | ||
3587 | early_node_map[i].end_pfn = start_pfn; | ||
3588 | if (temp_end_pfn > end_pfn) | ||
3589 | add_active_range(nid, end_pfn, temp_end_pfn); | ||
3590 | continue; | ||
3591 | } | ||
3592 | if (early_node_map[i].start_pfn >= start_pfn && | ||
3593 | early_node_map[i].end_pfn > end_pfn && | ||
3594 | early_node_map[i].start_pfn < end_pfn) { | ||
3595 | early_node_map[i].start_pfn = end_pfn; | ||
3579 | continue; | 3596 | continue; |
3580 | } | 3597 | } |
3581 | } | 3598 | } |