diff options
-rw-r--r-- | arch/x86/include/asm/mmzone_32.h | 6 | ||||
-rw-r--r-- | arch/x86/mm/numa.c | 15 | ||||
-rw-r--r-- | arch/x86/mm/numa_32.c | 6 | ||||
-rw-r--r-- | include/linux/mm.h | 1 | ||||
-rw-r--r-- | mm/page_alloc.c | 54 |
5 files changed, 76 insertions, 6 deletions
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index ffa037f28d39..55728e121473 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h | |||
@@ -34,15 +34,15 @@ static inline void resume_map_numa_kva(pgd_t *pgd) {} | |||
34 | * 64Gb / 4096bytes/page = 16777216 pages | 34 | * 64Gb / 4096bytes/page = 16777216 pages |
35 | */ | 35 | */ |
36 | #define MAX_NR_PAGES 16777216 | 36 | #define MAX_NR_PAGES 16777216 |
37 | #define MAX_ELEMENTS 1024 | 37 | #define MAX_SECTIONS 1024 |
38 | #define PAGES_PER_ELEMENT (MAX_NR_PAGES/MAX_ELEMENTS) | 38 | #define PAGES_PER_SECTION (MAX_NR_PAGES/MAX_SECTIONS) |
39 | 39 | ||
40 | extern s8 physnode_map[]; | 40 | extern s8 physnode_map[]; |
41 | 41 | ||
42 | static inline int pfn_to_nid(unsigned long pfn) | 42 | static inline int pfn_to_nid(unsigned long pfn) |
43 | { | 43 | { |
44 | #ifdef CONFIG_NUMA | 44 | #ifdef CONFIG_NUMA |
45 | return((int) physnode_map[(pfn) / PAGES_PER_ELEMENT]); | 45 | return((int) physnode_map[(pfn) / PAGES_PER_SECTION]); |
46 | #else | 46 | #else |
47 | return 0; | 47 | return 0; |
48 | #endif | 48 | #endif |
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index f5510d889a22..fbeaaf416610 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -496,6 +496,7 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) | |||
496 | 496 | ||
497 | static int __init numa_register_memblks(struct numa_meminfo *mi) | 497 | static int __init numa_register_memblks(struct numa_meminfo *mi) |
498 | { | 498 | { |
499 | unsigned long uninitialized_var(pfn_align); | ||
499 | int i, nid; | 500 | int i, nid; |
500 | 501 | ||
501 | /* Account for nodes with cpus and no memory */ | 502 | /* Account for nodes with cpus and no memory */ |
@@ -511,6 +512,20 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) | |||
511 | 512 | ||
512 | /* for out of order entries */ | 513 | /* for out of order entries */ |
513 | sort_node_map(); | 514 | sort_node_map(); |
515 | |||
516 | /* | ||
517 | * If sections array is gonna be used for pfn -> nid mapping, check | ||
518 | * whether its granularity is fine enough. | ||
519 | */ | ||
520 | #ifdef NODE_NOT_IN_PAGE_FLAGS | ||
521 | pfn_align = node_map_pfn_alignment(); | ||
522 | if (pfn_align && pfn_align < PAGES_PER_SECTION) { | ||
523 | printk(KERN_WARNING "Node alignment %LuMB < min %LuMB, rejecting NUMA config\n", | ||
524 | PFN_PHYS(pfn_align) >> 20, | ||
525 | PFN_PHYS(PAGES_PER_SECTION) >> 20); | ||
526 | return -EINVAL; | ||
527 | } | ||
528 | #endif | ||
514 | if (!numa_meminfo_cover_memory(mi)) | 529 | if (!numa_meminfo_cover_memory(mi)) |
515 | return -EINVAL; | 530 | return -EINVAL; |
516 | 531 | ||
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 849a975d3fa0..3adebe7e536a 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -41,7 +41,7 @@ | |||
41 | * physnode_map[16-31] = 1; | 41 | * physnode_map[16-31] = 1; |
42 | * physnode_map[32- ] = -1; | 42 | * physnode_map[32- ] = -1; |
43 | */ | 43 | */ |
44 | s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; | 44 | s8 physnode_map[MAX_SECTIONS] __read_mostly = { [0 ... (MAX_SECTIONS - 1)] = -1}; |
45 | EXPORT_SYMBOL(physnode_map); | 45 | EXPORT_SYMBOL(physnode_map); |
46 | 46 | ||
47 | void memory_present(int nid, unsigned long start, unsigned long end) | 47 | void memory_present(int nid, unsigned long start, unsigned long end) |
@@ -52,8 +52,8 @@ void memory_present(int nid, unsigned long start, unsigned long end) | |||
52 | nid, start, end); | 52 | nid, start, end); |
53 | printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid); | 53 | printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid); |
54 | printk(KERN_DEBUG " "); | 54 | printk(KERN_DEBUG " "); |
55 | for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) { | 55 | for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { |
56 | physnode_map[pfn / PAGES_PER_ELEMENT] = nid; | 56 | physnode_map[pfn / PAGES_PER_SECTION] = nid; |
57 | printk(KERN_CONT "%lx ", pfn); | 57 | printk(KERN_CONT "%lx ", pfn); |
58 | } | 58 | } |
59 | printk(KERN_CONT "\n"); | 59 | printk(KERN_CONT "\n"); |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 9670f71d7be9..c70a326b8f26 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1313,6 +1313,7 @@ extern void remove_active_range(unsigned int nid, unsigned long start_pfn, | |||
1313 | unsigned long end_pfn); | 1313 | unsigned long end_pfn); |
1314 | extern void remove_all_active_ranges(void); | 1314 | extern void remove_all_active_ranges(void); |
1315 | void sort_node_map(void); | 1315 | void sort_node_map(void); |
1316 | unsigned long node_map_pfn_alignment(void); | ||
1316 | unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, | 1317 | unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, |
1317 | unsigned long end_pfn); | 1318 | unsigned long end_pfn); |
1318 | extern unsigned long absent_pages_in_range(unsigned long start_pfn, | 1319 | extern unsigned long absent_pages_in_range(unsigned long start_pfn, |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4e8985acdab8..9119faae6e6a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -4585,6 +4585,60 @@ void __init sort_node_map(void) | |||
4585 | cmp_node_active_region, NULL); | 4585 | cmp_node_active_region, NULL); |
4586 | } | 4586 | } |
4587 | 4587 | ||
4588 | /** | ||
4589 | * node_map_pfn_alignment - determine the maximum internode alignment | ||
4590 | * | ||
4591 | * This function should be called after node map is populated and sorted. | ||
4592 | * It calculates the maximum power of two alignment which can distinguish | ||
4593 | * all the nodes. | ||
4594 | * | ||
4595 | * For example, if all nodes are 1GiB and aligned to 1GiB, the return value | ||
4596 | * would indicate 1GiB alignment with (1 << (30 - PAGE_SHIFT)). If the | ||
4597 | * nodes are shifted by 256MiB, 256MiB. Note that if only the last node is | ||
4598 | * shifted, 1GiB is enough and this function will indicate so. | ||
4599 | * | ||
4600 | * This is used to test whether pfn -> nid mapping of the chosen memory | ||
4601 | * model has fine enough granularity to avoid incorrect mapping for the | ||
4602 | * populated node map. | ||
4603 | * | ||
4604 | * Returns the determined alignment in pfn's. 0 if there is no alignment | ||
4605 | * requirement (single node). | ||
4606 | */ | ||
4607 | unsigned long __init node_map_pfn_alignment(void) | ||
4608 | { | ||
4609 | unsigned long accl_mask = 0, last_end = 0; | ||
4610 | int last_nid = -1; | ||
4611 | int i; | ||
4612 | |||
4613 | for_each_active_range_index_in_nid(i, MAX_NUMNODES) { | ||
4614 | int nid = early_node_map[i].nid; | ||
4615 | unsigned long start = early_node_map[i].start_pfn; | ||
4616 | unsigned long end = early_node_map[i].end_pfn; | ||
4617 | unsigned long mask; | ||
4618 | |||
4619 | if (!start || last_nid < 0 || last_nid == nid) { | ||
4620 | last_nid = nid; | ||
4621 | last_end = end; | ||
4622 | continue; | ||
4623 | } | ||
4624 | |||
4625 | /* | ||
4626 | * Start with a mask granular enough to pin-point to the | ||
4627 | * start pfn and tick off bits one-by-one until it becomes | ||
4628 | * too coarse to separate the current node from the last. | ||
4629 | */ | ||
4630 | mask = ~((1 << __ffs(start)) - 1); | ||
4631 | while (mask && last_end <= (start & (mask << 1))) | ||
4632 | mask <<= 1; | ||
4633 | |||
4634 | /* accumulate all internode masks */ | ||
4635 | accl_mask |= mask; | ||
4636 | } | ||
4637 | |||
4638 | /* convert mask to number of pages */ | ||
4639 | return ~accl_mask + 1; | ||
4640 | } | ||
4641 | |||
4588 | /* Find the lowest pfn for a node */ | 4642 | /* Find the lowest pfn for a node */ |
4589 | static unsigned long __init find_min_pfn_for_node(int nid) | 4643 | static unsigned long __init find_min_pfn_for_node(int nid) |
4590 | { | 4644 | { |