aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2010-07-06 18:39:16 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2010-08-04 22:56:23 -0400
commitc196f76fd5ece716ee3b7fa5dda3576961c0cecc (patch)
tree7687dbae04327ed56bec60b21667eea66a9e52b4
parentfef501d49d31f997a3381b6c1efd5bca382b6b6f (diff)
memblock: NUMA allocate can now use early_pfn_map
We now provide a default (weak) implementation of memblock_nid_range() which uses the early_pfn_map[] if CONFIG_ARCH_POPULATES_NODE_MAP is set. Sparc still needs to use its own method due to the way the pages can be scattered between nodes. This implementation is inefficient due to our main algorithm and callback construct wanting to work on an ascending addresses bases while early_pfn_map[] would rather work with nid's (it's unsorted at that stage). But it should work and we can look into improving it subsequently, possibly using arch compile options to chose a different algorithm alltogether. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--include/linux/memblock.h3
-rw-r--r--mm/memblock.c28
2 files changed, 30 insertions, 1 deletions
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index e5e8f9db3a84..82b030244aa7 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -47,6 +47,9 @@ extern long memblock_remove(phys_addr_t base, phys_addr_t size);
47extern long __init memblock_free(phys_addr_t base, phys_addr_t size); 47extern long __init memblock_free(phys_addr_t base, phys_addr_t size);
48extern long __init memblock_reserve(phys_addr_t base, phys_addr_t size); 48extern long __init memblock_reserve(phys_addr_t base, phys_addr_t size);
49 49
50/* The numa aware allocator is only available if
51 * CONFIG_ARCH_POPULATES_NODE_MAP is set
52 */
50extern phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); 53extern phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid);
51extern phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align); 54extern phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align);
52 55
diff --git a/mm/memblock.c b/mm/memblock.c
index 468ff43a72b4..af7e4d9cf400 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -15,6 +15,7 @@
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/bitops.h> 16#include <linux/bitops.h>
17#include <linux/poison.h> 17#include <linux/poison.h>
18#include <linux/pfn.h>
18#include <linux/memblock.h> 19#include <linux/memblock.h>
19 20
20struct memblock memblock; 21struct memblock memblock;
@@ -451,11 +452,36 @@ phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align)
451/* 452/*
452 * Additional node-local allocators. Search for node memory is bottom up 453 * Additional node-local allocators. Search for node memory is bottom up
453 * and walks memblock regions within that node bottom-up as well, but allocation 454 * and walks memblock regions within that node bottom-up as well, but allocation
454 * within an memblock region is top-down. 455 * within an memblock region is top-down. XXX I plan to fix that at some stage
456 *
457 * WARNING: Only available after early_node_map[] has been populated,
458 * on some architectures, that is after all the calls to add_active_range()
459 * have been done to populate it.
455 */ 460 */
456 461
457phys_addr_t __weak __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid) 462phys_addr_t __weak __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid)
458{ 463{
464#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
465 /*
466 * This code originates from sparc which really wants use to walk by addresses
467 * and returns the nid. This is not very convenient for early_pfn_map[] users
468 * as the map isn't sorted yet, and it really wants to be walked by nid.
469 *
470 * For now, I implement the inefficient method below which walks the early
471 * map multiple times. Eventually we may want to use an ARCH config option
472 * to implement a completely different method for both case.
473 */
474 unsigned long start_pfn, end_pfn;
475 int i;
476
477 for (i = 0; i < MAX_NUMNODES; i++) {
478 get_pfn_range_for_nid(i, &start_pfn, &end_pfn);
479 if (start < PFN_PHYS(start_pfn) || start >= PFN_PHYS(end_pfn))
480 continue;
481 *nid = i;
482 return min(end, PFN_PHYS(end_pfn));
483 }
484#endif
459 *nid = 0; 485 *nid = 0;
460 486
461 return end; 487 return end;