diff options
author | Tejun Heo <tj@kernel.org> | 2011-04-04 18:23:55 -0400 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2011-04-06 20:57:27 -0400 |
commit | 0e9f93c1c04c8ab10cc564df54a7ad0f83c67796 (patch) | |
tree | 242d6e4d51866ef91b3c70bac4f3afdee31d0b21 /arch/x86/mm | |
parent | 82044c328d6f6b22882c2a936e487e6d2240817a (diff) |
x86-32, numa: Move lowmem address space reservation to init_alloc_remap()
Remap alloc init is done in the following stages.
1. init_alloc_remap() calculates how much memory is necessary for each
node and reserves node local memory.
2. initmem_init() collects how much each node needs and reserves a
single contiguous lowmem area which can contain all.
3. init_remap_allocator() initializes allocator parameters from the
determined lowmem address and per-node offsets.
4. Actual remap happens.
There is no reason for the lowmem remap area to be reserved as a
single contiguous area at one go. They don't interact with each other
and the memblock allocator will put them side-by-side anyway.
This patch breaks up the single lowmem address reservation and put
per-node lowmem address reservation into init_alloc_remap() and
initializes allocator parameters directly in the function as all the
addresses are determined there. This merges steps 2 and 3 into 1.
While at it, remove now largely irrelevant comments in
init_alloc_remap().
This change causes the following behavior changes.
* Remap lowmem areas are allocated in smaller per-node chunks.
* Remap lowmem area reservation failure fail future remap allocations
instead of panicking.
* Remap allocator initialization is less verbose.
Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1301955840-7246-10-git-send-email-tj@kernel.org
Acked-by: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/numa_32.c | 82 |
1 files changed, 25 insertions, 57 deletions
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index c127543372f5..12bb34c434ea 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -108,9 +108,6 @@ static unsigned long node_remap_size[MAX_NUMNODES]; | |||
108 | static void *node_remap_start_vaddr[MAX_NUMNODES]; | 108 | static void *node_remap_start_vaddr[MAX_NUMNODES]; |
109 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | 109 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); |
110 | 110 | ||
111 | static unsigned long kva_start_pfn; | ||
112 | static unsigned long kva_pages; | ||
113 | |||
114 | int __cpuinit numa_cpu_node(int cpu) | 111 | int __cpuinit numa_cpu_node(int cpu) |
115 | { | 112 | { |
116 | return apic->x86_32_numa_cpu_node(cpu); | 113 | return apic->x86_32_numa_cpu_node(cpu); |
@@ -266,7 +263,8 @@ void resume_map_numa_kva(pgd_t *pgd_base) | |||
266 | static __init unsigned long init_alloc_remap(int nid, unsigned long offset) | 263 | static __init unsigned long init_alloc_remap(int nid, unsigned long offset) |
267 | { | 264 | { |
268 | unsigned long size; | 265 | unsigned long size; |
269 | u64 node_pa; | 266 | u64 node_pa, remap_pa; |
267 | void *remap_va; | ||
270 | 268 | ||
271 | /* | 269 | /* |
272 | * The acpi/srat node info can show hot-add memroy zones where | 270 | * The acpi/srat node info can show hot-add memroy zones where |
@@ -287,6 +285,7 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset) | |||
287 | size += ALIGN(sizeof(pg_data_t), PAGE_SIZE); | 285 | size += ALIGN(sizeof(pg_data_t), PAGE_SIZE); |
288 | size = ALIGN(size, LARGE_PAGE_BYTES); | 286 | size = ALIGN(size, LARGE_PAGE_BYTES); |
289 | 287 | ||
288 | /* allocate node memory and the lowmem remap area */ | ||
290 | node_pa = memblock_find_in_range(node_start_pfn[nid] << PAGE_SHIFT, | 289 | node_pa = memblock_find_in_range(node_start_pfn[nid] << PAGE_SHIFT, |
291 | (u64)node_end_pfn[nid] << PAGE_SHIFT, | 290 | (u64)node_end_pfn[nid] << PAGE_SHIFT, |
292 | size, LARGE_PAGE_BYTES); | 291 | size, LARGE_PAGE_BYTES); |
@@ -295,45 +294,35 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset) | |||
295 | size, nid); | 294 | size, nid); |
296 | return 0; | 295 | return 0; |
297 | } | 296 | } |
297 | memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM"); | ||
298 | |||
299 | remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, | ||
300 | max_low_pfn << PAGE_SHIFT, | ||
301 | size, LARGE_PAGE_BYTES); | ||
302 | if (remap_pa == MEMBLOCK_ERROR) { | ||
303 | pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", | ||
304 | size, nid); | ||
305 | memblock_x86_free_range(node_pa, node_pa + size); | ||
306 | return 0; | ||
307 | } | ||
308 | memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG"); | ||
309 | remap_va = phys_to_virt(remap_pa); | ||
298 | 310 | ||
311 | /* initialize remap allocator parameters */ | ||
312 | node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT; | ||
299 | node_remap_size[nid] = size >> PAGE_SHIFT; | 313 | node_remap_size[nid] = size >> PAGE_SHIFT; |
300 | node_remap_offset[nid] = offset; | 314 | node_remap_offset[nid] = offset; |
301 | printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of node %d at %llx\n", | ||
302 | size >> PAGE_SHIFT, nid, node_pa >> PAGE_SHIFT); | ||
303 | 315 | ||
304 | /* | 316 | node_remap_start_vaddr[nid] = remap_va; |
305 | * prevent kva address below max_low_pfn want it on system | 317 | node_remap_end_vaddr[nid] = remap_va + size; |
306 | * with less memory later. | 318 | node_remap_alloc_vaddr[nid] = remap_va + ALIGN(sizeof(pg_data_t), PAGE_SIZE); |
307 | * layout will be: KVA address , KVA RAM | ||
308 | * | ||
309 | * we are supposed to only record the one less then | ||
310 | * max_low_pfn but we could have some hole in high memory, | ||
311 | * and it will only check page_is_ram(pfn) && | ||
312 | * !page_is_reserved_early(pfn) to decide to use it as free. | ||
313 | * So memblock_x86_reserve_range here, hope we don't run out | ||
314 | * of that array | ||
315 | */ | ||
316 | memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM"); | ||
317 | 319 | ||
318 | node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT; | 320 | printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n", |
321 | nid, node_pa, node_pa + size, remap_va, remap_va + size); | ||
319 | 322 | ||
320 | return size >> PAGE_SHIFT; | 323 | return size >> PAGE_SHIFT; |
321 | } | 324 | } |
322 | 325 | ||
323 | static void init_remap_allocator(int nid) | ||
324 | { | ||
325 | node_remap_start_vaddr[nid] = pfn_to_kaddr( | ||
326 | kva_start_pfn + node_remap_offset[nid]); | ||
327 | node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + | ||
328 | (node_remap_size[nid] * PAGE_SIZE); | ||
329 | node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + | ||
330 | ALIGN(sizeof(pg_data_t), PAGE_SIZE); | ||
331 | |||
332 | printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid, | ||
333 | (ulong) node_remap_start_vaddr[nid], | ||
334 | (ulong) node_remap_end_vaddr[nid]); | ||
335 | } | ||
336 | |||
337 | void __init initmem_init(void) | 326 | void __init initmem_init(void) |
338 | { | 327 | { |
339 | unsigned long reserve_pages = 0; | 328 | unsigned long reserve_pages = 0; |
@@ -352,25 +341,7 @@ void __init initmem_init(void) | |||
352 | 341 | ||
353 | for_each_online_node(nid) | 342 | for_each_online_node(nid) |
354 | reserve_pages += init_alloc_remap(nid, reserve_pages); | 343 | reserve_pages += init_alloc_remap(nid, reserve_pages); |
355 | kva_pages = roundup(reserve_pages, PTRS_PER_PTE); | 344 | |
356 | printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n", | ||
357 | reserve_pages); | ||
358 | |||
359 | kva_start_pfn = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, | ||
360 | max_low_pfn << PAGE_SHIFT, | ||
361 | kva_pages << PAGE_SHIFT, | ||
362 | PTRS_PER_PTE << PAGE_SHIFT) >> PAGE_SHIFT; | ||
363 | if (kva_start_pfn == MEMBLOCK_ERROR) | ||
364 | panic("Can not get kva space\n"); | ||
365 | |||
366 | printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n", | ||
367 | kva_start_pfn, max_low_pfn); | ||
368 | printk(KERN_INFO "max_pfn = %lx\n", max_pfn); | ||
369 | |||
370 | /* avoid clash with initrd */ | ||
371 | memblock_x86_reserve_range(kva_start_pfn<<PAGE_SHIFT, | ||
372 | (kva_start_pfn + kva_pages)<<PAGE_SHIFT, | ||
373 | "KVA PG"); | ||
374 | #ifdef CONFIG_HIGHMEM | 345 | #ifdef CONFIG_HIGHMEM |
375 | highstart_pfn = highend_pfn = max_pfn; | 346 | highstart_pfn = highend_pfn = max_pfn; |
376 | if (max_pfn > max_low_pfn) | 347 | if (max_pfn > max_low_pfn) |
@@ -390,11 +361,8 @@ void __init initmem_init(void) | |||
390 | 361 | ||
391 | printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", | 362 | printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", |
392 | (ulong) pfn_to_kaddr(max_low_pfn)); | 363 | (ulong) pfn_to_kaddr(max_low_pfn)); |
393 | for_each_online_node(nid) { | 364 | for_each_online_node(nid) |
394 | init_remap_allocator(nid); | ||
395 | |||
396 | allocate_pgdat(nid); | 365 | allocate_pgdat(nid); |
397 | } | ||
398 | remap_numa_kva(); | 366 | remap_numa_kva(); |
399 | 367 | ||
400 | printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", | 368 | printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", |