aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/mmzone_32.h6
-rw-r--r--arch/x86/mm/numa.c15
-rw-r--r--arch/x86/mm/numa_32.c6
-rw-r--r--include/linux/mm.h1
-rw-r--r--mm/page_alloc.c54
5 files changed, 76 insertions, 6 deletions
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index ffa037f28d39..55728e121473 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -34,15 +34,15 @@ static inline void resume_map_numa_kva(pgd_t *pgd) {}
34 * 64Gb / 4096bytes/page = 16777216 pages 34 * 64Gb / 4096bytes/page = 16777216 pages
35 */ 35 */
36#define MAX_NR_PAGES 16777216 36#define MAX_NR_PAGES 16777216
37#define MAX_ELEMENTS 1024 37#define MAX_SECTIONS 1024
38#define PAGES_PER_ELEMENT (MAX_NR_PAGES/MAX_ELEMENTS) 38#define PAGES_PER_SECTION (MAX_NR_PAGES/MAX_SECTIONS)
39 39
40extern s8 physnode_map[]; 40extern s8 physnode_map[];
41 41
42static inline int pfn_to_nid(unsigned long pfn) 42static inline int pfn_to_nid(unsigned long pfn)
43{ 43{
44#ifdef CONFIG_NUMA 44#ifdef CONFIG_NUMA
45 return((int) physnode_map[(pfn) / PAGES_PER_ELEMENT]); 45 return((int) physnode_map[(pfn) / PAGES_PER_SECTION]);
46#else 46#else
47 return 0; 47 return 0;
48#endif 48#endif
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index f5510d889a22..fbeaaf416610 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -496,6 +496,7 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
496 496
497static int __init numa_register_memblks(struct numa_meminfo *mi) 497static int __init numa_register_memblks(struct numa_meminfo *mi)
498{ 498{
499 unsigned long uninitialized_var(pfn_align);
499 int i, nid; 500 int i, nid;
500 501
501 /* Account for nodes with cpus and no memory */ 502 /* Account for nodes with cpus and no memory */
@@ -511,6 +512,20 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
511 512
512 /* for out of order entries */ 513 /* for out of order entries */
513 sort_node_map(); 514 sort_node_map();
515
516 /*
517 * If sections array is gonna be used for pfn -> nid mapping, check
518 * whether its granularity is fine enough.
519 */
520#ifdef NODE_NOT_IN_PAGE_FLAGS
521 pfn_align = node_map_pfn_alignment();
522 if (pfn_align && pfn_align < PAGES_PER_SECTION) {
523 printk(KERN_WARNING "Node alignment %LuMB < min %LuMB, rejecting NUMA config\n",
524 PFN_PHYS(pfn_align) >> 20,
525 PFN_PHYS(PAGES_PER_SECTION) >> 20);
526 return -EINVAL;
527 }
528#endif
514 if (!numa_meminfo_cover_memory(mi)) 529 if (!numa_meminfo_cover_memory(mi))
515 return -EINVAL; 530 return -EINVAL;
516 531
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 849a975d3fa0..3adebe7e536a 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -41,7 +41,7 @@
41 * physnode_map[16-31] = 1; 41 * physnode_map[16-31] = 1;
42 * physnode_map[32- ] = -1; 42 * physnode_map[32- ] = -1;
43 */ 43 */
44s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; 44s8 physnode_map[MAX_SECTIONS] __read_mostly = { [0 ... (MAX_SECTIONS - 1)] = -1};
45EXPORT_SYMBOL(physnode_map); 45EXPORT_SYMBOL(physnode_map);
46 46
47void memory_present(int nid, unsigned long start, unsigned long end) 47void memory_present(int nid, unsigned long start, unsigned long end)
@@ -52,8 +52,8 @@ void memory_present(int nid, unsigned long start, unsigned long end)
52 nid, start, end); 52 nid, start, end);
53 printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid); 53 printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid);
54 printk(KERN_DEBUG " "); 54 printk(KERN_DEBUG " ");
55 for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) { 55 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
56 physnode_map[pfn / PAGES_PER_ELEMENT] = nid; 56 physnode_map[pfn / PAGES_PER_SECTION] = nid;
57 printk(KERN_CONT "%lx ", pfn); 57 printk(KERN_CONT "%lx ", pfn);
58 } 58 }
59 printk(KERN_CONT "\n"); 59 printk(KERN_CONT "\n");
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9670f71d7be9..c70a326b8f26 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1313,6 +1313,7 @@ extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
1313 unsigned long end_pfn); 1313 unsigned long end_pfn);
1314extern void remove_all_active_ranges(void); 1314extern void remove_all_active_ranges(void);
1315void sort_node_map(void); 1315void sort_node_map(void);
1316unsigned long node_map_pfn_alignment(void);
1316unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, 1317unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn,
1317 unsigned long end_pfn); 1318 unsigned long end_pfn);
1318extern unsigned long absent_pages_in_range(unsigned long start_pfn, 1319extern unsigned long absent_pages_in_range(unsigned long start_pfn,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4e8985acdab8..9119faae6e6a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4585,6 +4585,60 @@ void __init sort_node_map(void)
4585 cmp_node_active_region, NULL); 4585 cmp_node_active_region, NULL);
4586} 4586}
4587 4587
4588/**
4589 * node_map_pfn_alignment - determine the maximum internode alignment
4590 *
4591 * This function should be called after node map is populated and sorted.
4592 * It calculates the maximum power of two alignment which can distinguish
4593 * all the nodes.
4594 *
4595 * For example, if all nodes are 1GiB and aligned to 1GiB, the return value
4596 * would indicate 1GiB alignment with (1 << (30 - PAGE_SHIFT)). If the
4597 * nodes are shifted by 256MiB, 256MiB. Note that if only the last node is
4598 * shifted, 1GiB is enough and this function will indicate so.
4599 *
4600 * This is used to test whether pfn -> nid mapping of the chosen memory
4601 * model has fine enough granularity to avoid incorrect mapping for the
4602 * populated node map.
4603 *
4604 * Returns the determined alignment in pfn's. 0 if there is no alignment
4605 * requirement (single node).
4606 */
4607unsigned long __init node_map_pfn_alignment(void)
4608{
4609 unsigned long accl_mask = 0, last_end = 0;
4610 int last_nid = -1;
4611 int i;
4612
4613 for_each_active_range_index_in_nid(i, MAX_NUMNODES) {
4614 int nid = early_node_map[i].nid;
4615 unsigned long start = early_node_map[i].start_pfn;
4616 unsigned long end = early_node_map[i].end_pfn;
4617 unsigned long mask;
4618
4619 if (!start || last_nid < 0 || last_nid == nid) {
4620 last_nid = nid;
4621 last_end = end;
4622 continue;
4623 }
4624
4625 /*
4626 * Start with a mask granular enough to pin-point to the
4627 * start pfn and tick off bits one-by-one until it becomes
4628 * too coarse to separate the current node from the last.
4629 */
4630 mask = ~((1 << __ffs(start)) - 1);
4631 while (mask && last_end <= (start & (mask << 1)))
4632 mask <<= 1;
4633
4634 /* accumulate all internode masks */
4635 accl_mask |= mask;
4636 }
4637
4638 /* convert mask to number of pages */
4639 return ~accl_mask + 1;
4640}
4641
4588/* Find the lowest pfn for a node */ 4642/* Find the lowest pfn for a node */
4589static unsigned long __init find_min_pfn_for_node(int nid) 4643static unsigned long __init find_min_pfn_for_node(int nid)
4590{ 4644{