diff options
-rw-r--r-- | arch/x86/mm/numa.c | 15 | ||||
-rw-r--r-- | include/linux/mm.h | 1 | ||||
-rw-r--r-- | mm/page_alloc.c | 54 |
3 files changed, 70 insertions, 0 deletions
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index f5510d889a22..fbeaaf416610 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -496,6 +496,7 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) | |||
496 | 496 | ||
497 | static int __init numa_register_memblks(struct numa_meminfo *mi) | 497 | static int __init numa_register_memblks(struct numa_meminfo *mi) |
498 | { | 498 | { |
499 | unsigned long uninitialized_var(pfn_align); | ||
499 | int i, nid; | 500 | int i, nid; |
500 | 501 | ||
501 | /* Account for nodes with cpus and no memory */ | 502 | /* Account for nodes with cpus and no memory */ |
@@ -511,6 +512,20 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) | |||
511 | 512 | ||
512 | /* for out of order entries */ | 513 | /* for out of order entries */ |
513 | sort_node_map(); | 514 | sort_node_map(); |
515 | |||
516 | /* | ||
517 | * If sections array is gonna be used for pfn -> nid mapping, check | ||
518 | * whether its granularity is fine enough. | ||
519 | */ | ||
520 | #ifdef NODE_NOT_IN_PAGE_FLAGS | ||
521 | pfn_align = node_map_pfn_alignment(); | ||
522 | if (pfn_align && pfn_align < PAGES_PER_SECTION) { | ||
523 | printk(KERN_WARNING "Node alignment %LuMB < min %LuMB, rejecting NUMA config\n", | ||
524 | PFN_PHYS(pfn_align) >> 20, | ||
525 | PFN_PHYS(PAGES_PER_SECTION) >> 20); | ||
526 | return -EINVAL; | ||
527 | } | ||
528 | #endif | ||
514 | if (!numa_meminfo_cover_memory(mi)) | 529 | if (!numa_meminfo_cover_memory(mi)) |
515 | return -EINVAL; | 530 | return -EINVAL; |
516 | 531 | ||
diff --git a/include/linux/mm.h b/include/linux/mm.h index 9670f71d7be9..c70a326b8f26 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1313,6 +1313,7 @@ extern void remove_active_range(unsigned int nid, unsigned long start_pfn, | |||
1313 | unsigned long end_pfn); | 1313 | unsigned long end_pfn); |
1314 | extern void remove_all_active_ranges(void); | 1314 | extern void remove_all_active_ranges(void); |
1315 | void sort_node_map(void); | 1315 | void sort_node_map(void); |
1316 | unsigned long node_map_pfn_alignment(void); | ||
1316 | unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, | 1317 | unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, |
1317 | unsigned long end_pfn); | 1318 | unsigned long end_pfn); |
1318 | extern unsigned long absent_pages_in_range(unsigned long start_pfn, | 1319 | extern unsigned long absent_pages_in_range(unsigned long start_pfn, |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4e8985acdab8..9119faae6e6a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -4585,6 +4585,60 @@ void __init sort_node_map(void) | |||
4585 | cmp_node_active_region, NULL); | 4585 | cmp_node_active_region, NULL); |
4586 | } | 4586 | } |
4587 | 4587 | ||
4588 | /** | ||
4589 | * node_map_pfn_alignment - determine the maximum internode alignment | ||
4590 | * | ||
4591 | * This function should be called after node map is populated and sorted. | ||
4592 | * It calculates the maximum power of two alignment which can distinguish | ||
4593 | * all the nodes. | ||
4594 | * | ||
4595 | * For example, if all nodes are 1GiB and aligned to 1GiB, the return value | ||
4596 | * would indicate 1GiB alignment with (1 << (30 - PAGE_SHIFT)). If the | ||
4597 | * nodes are shifted by 256MiB, 256MiB. Note that if only the last node is | ||
4598 | * shifted, 1GiB is enough and this function will indicate so. | ||
4599 | * | ||
4600 | * This is used to test whether pfn -> nid mapping of the chosen memory | ||
4601 | * model has fine enough granularity to avoid incorrect mapping for the | ||
4602 | * populated node map. | ||
4603 | * | ||
4604 | * Returns the determined alignment in pfn's. 0 if there is no alignment | ||
4605 | * requirement (single node). | ||
4606 | */ | ||
4607 | unsigned long __init node_map_pfn_alignment(void) | ||
4608 | { | ||
4609 | unsigned long accl_mask = 0, last_end = 0; | ||
4610 | int last_nid = -1; | ||
4611 | int i; | ||
4612 | |||
4613 | for_each_active_range_index_in_nid(i, MAX_NUMNODES) { | ||
4614 | int nid = early_node_map[i].nid; | ||
4615 | unsigned long start = early_node_map[i].start_pfn; | ||
4616 | unsigned long end = early_node_map[i].end_pfn; | ||
4617 | unsigned long mask; | ||
4618 | |||
4619 | if (!start || last_nid < 0 || last_nid == nid) { | ||
4620 | last_nid = nid; | ||
4621 | last_end = end; | ||
4622 | continue; | ||
4623 | } | ||
4624 | |||
4625 | /* | ||
4626 | * Start with a mask granular enough to pin-point to the | ||
4627 | * start pfn and tick off bits one-by-one until it becomes | ||
4628 | * too coarse to separate the current node from the last. | ||
4629 | */ | ||
4630 | mask = ~((1 << __ffs(start)) - 1); | ||
4631 | while (mask && last_end <= (start & (mask << 1))) | ||
4632 | mask <<= 1; | ||
4633 | |||
4634 | /* accumulate all internode masks */ | ||
4635 | accl_mask |= mask; | ||
4636 | } | ||
4637 | |||
4638 | /* convert mask to number of pages */ | ||
4639 | return ~accl_mask + 1; | ||
4640 | } | ||
4641 | |||
4588 | /* Find the lowest pfn for a node */ | 4642 | /* Find the lowest pfn for a node */ |
4589 | static unsigned long __init find_min_pfn_for_node(int nid) | 4643 | static unsigned long __init find_min_pfn_for_node(int nid) |
4590 | { | 4644 | { |