aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2009-02-18 17:48:32 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-02-18 18:37:55 -0500
commitf2dbcfa738368c8a40d4a5f0b65dc9879577cb21 (patch)
treebdea32c637fa572a9c356cddd202a57530b2a45c
parentada723dcd681e2dffd7d73345cc8fda0eb0df9bd (diff)
mm: clean up for early_pfn_to_nid()
What's happening is that the assertion in mm/page_alloc.c:move_freepages() is triggering: BUG_ON(page_zone(start_page) != page_zone(end_page)); Once I knew this is what was happening, I added some annotations: if (unlikely(page_zone(start_page) != page_zone(end_page))) { printk(KERN_ERR "move_freepages: Bogus zones: " "start_page[%p] end_page[%p] zone[%p]\n", start_page, end_page, zone); printk(KERN_ERR "move_freepages: " "start_zone[%p] end_zone[%p]\n", page_zone(start_page), page_zone(end_page)); printk(KERN_ERR "move_freepages: " "start_pfn[0x%lx] end_pfn[0x%lx]\n", page_to_pfn(start_page), page_to_pfn(end_page)); printk(KERN_ERR "move_freepages: " "start_nid[%d] end_nid[%d]\n", page_to_nid(start_page), page_to_nid(end_page)); ... And here's what I got: move_freepages: Bogus zones: start_page[2207d0000] end_page[2207dffc0] zone[fffff8103effcb00] move_freepages: start_zone[fffff8103effcb00] end_zone[fffff8003fffeb00] move_freepages: start_pfn[0x81f600] end_pfn[0x81f7ff] move_freepages: start_nid[1] end_nid[0] My memory layout on this box is: [ 0.000000] Zone PFN ranges: [ 0.000000] Normal 0x00000000 -> 0x0081ff5d [ 0.000000] Movable zone start PFN for each node [ 0.000000] early_node_map[8] active PFN ranges [ 0.000000] 0: 0x00000000 -> 0x00020000 [ 0.000000] 1: 0x00800000 -> 0x0081f7ff [ 0.000000] 1: 0x0081f800 -> 0x0081fe50 [ 0.000000] 1: 0x0081fed1 -> 0x0081fed8 [ 0.000000] 1: 0x0081feda -> 0x0081fedb [ 0.000000] 1: 0x0081fedd -> 0x0081fee5 [ 0.000000] 1: 0x0081fee7 -> 0x0081ff51 [ 0.000000] 1: 0x0081ff59 -> 0x0081ff5d So it's a block move in that 0x81f600-->0x81f7ff region which triggers the problem. This patch: Declaration of early_pfn_to_nid() is scattered over per-arch include files, and it seems it's complicated to know when the declaration is used. I think it makes fix-for-memmap-init not easy. This patch moves all declaration to include/linux/mm.h After this, if !CONFIG_NODES_POPULATES_NODE_MAP && !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -> Use static definition in include/linux/mm.h else if !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -> Use generic definition in mm/page_alloc.c else -> per-arch back end function will be called. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Reported-by: David Miller <davem@davemlloft.net> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: <stable@kernel.org> [2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/ia64/include/asm/mmzone.h4
-rw-r--r--arch/ia64/mm/numa.c2
-rw-r--r--arch/x86/include/asm/mmzone_32.h2
-rw-r--r--arch/x86/include/asm/mmzone_64.h2
-rw-r--r--arch/x86/mm/numa_64.c2
-rw-r--r--include/linux/mm.h19
-rw-r--r--mm/page_alloc.c8
7 files changed, 25 insertions, 14 deletions
diff --git a/arch/ia64/include/asm/mmzone.h b/arch/ia64/include/asm/mmzone.h
index 34efe88eb849..f2ca32069b3f 100644
--- a/arch/ia64/include/asm/mmzone.h
+++ b/arch/ia64/include/asm/mmzone.h
@@ -31,10 +31,6 @@ static inline int pfn_to_nid(unsigned long pfn)
31#endif 31#endif
32} 32}
33 33
34#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
35extern int early_pfn_to_nid(unsigned long pfn);
36#endif
37
38#ifdef CONFIG_IA64_DIG /* DIG systems are small */ 34#ifdef CONFIG_IA64_DIG /* DIG systems are small */
39# define MAX_PHYSNODE_ID 8 35# define MAX_PHYSNODE_ID 8
40# define NR_NODE_MEMBLKS (MAX_NUMNODES * 8) 36# define NR_NODE_MEMBLKS (MAX_NUMNODES * 8)
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
index b73bf1838e57..5061c3fb6796 100644
--- a/arch/ia64/mm/numa.c
+++ b/arch/ia64/mm/numa.c
@@ -58,7 +58,7 @@ paddr_to_nid(unsigned long paddr)
58 * SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where 58 * SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where
59 * the section resides. 59 * the section resides.
60 */ 60 */
61int early_pfn_to_nid(unsigned long pfn) 61int __meminit __early_pfn_to_nid(unsigned long pfn)
62{ 62{
63 int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec; 63 int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec;
64 64
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 07f1af494ca5..105fb90a0635 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -32,8 +32,6 @@ static inline void get_memcfg_numa(void)
32 get_memcfg_numa_flat(); 32 get_memcfg_numa_flat();
33} 33}
34 34
35extern int early_pfn_to_nid(unsigned long pfn);
36
37extern void resume_map_numa_kva(pgd_t *pgd); 35extern void resume_map_numa_kva(pgd_t *pgd);
38 36
39#else /* !CONFIG_NUMA */ 37#else /* !CONFIG_NUMA */
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
index a5b3817d4b9e..a29f48c2a322 100644
--- a/arch/x86/include/asm/mmzone_64.h
+++ b/arch/x86/include/asm/mmzone_64.h
@@ -40,8 +40,6 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
40#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ 40#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \
41 NODE_DATA(nid)->node_spanned_pages) 41 NODE_DATA(nid)->node_spanned_pages)
42 42
43extern int early_pfn_to_nid(unsigned long pfn);
44
45#ifdef CONFIG_NUMA_EMU 43#ifdef CONFIG_NUMA_EMU
46#define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024) 44#define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024)
47#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) 45#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 71a14f89f89e..f3516da035d1 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -145,7 +145,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes,
145 return shift; 145 return shift;
146} 146}
147 147
148int early_pfn_to_nid(unsigned long pfn) 148int __meminit __early_pfn_to_nid(unsigned long pfn)
149{ 149{
150 return phys_to_nid(pfn << PAGE_SHIFT); 150 return phys_to_nid(pfn << PAGE_SHIFT);
151} 151}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 10074212a35b..065cdf8c09fb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1041,10 +1041,23 @@ extern void free_bootmem_with_active_regions(int nid,
1041typedef int (*work_fn_t)(unsigned long, unsigned long, void *); 1041typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
1042extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); 1042extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
1043extern void sparse_memory_present_with_active_regions(int nid); 1043extern void sparse_memory_present_with_active_regions(int nid);
1044#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
1045extern int early_pfn_to_nid(unsigned long pfn);
1046#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
1047#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ 1044#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
1045
1046#if !defined(CONFIG_ARCH_POPULATES_NODE_MAP) && \
1047 !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID)
1048static inline int __early_pfn_to_nid(unsigned long pfn)
1049{
1050 return 0;
1051}
1052#else
1053/* please see mm/page_alloc.c */
1054extern int __meminit early_pfn_to_nid(unsigned long pfn);
1055#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
1056/* there is a per-arch backend function. */
1057extern int __meminit __early_pfn_to_nid(unsigned long pfn);
1058#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
1059#endif
1060
1048extern void set_dma_reserve(unsigned long new_dma_reserve); 1061extern void set_dma_reserve(unsigned long new_dma_reserve);
1049extern void memmap_init_zone(unsigned long, int, unsigned long, 1062extern void memmap_init_zone(unsigned long, int, unsigned long,
1050 unsigned long, enum memmap_context); 1063 unsigned long, enum memmap_context);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5675b3073854..c5dd74602efc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2989,7 +2989,7 @@ static int __meminit next_active_region_index_in_nid(int index, int nid)
2989 * was used and there are no special requirements, this is a convenient 2989 * was used and there are no special requirements, this is a convenient
2990 * alternative 2990 * alternative
2991 */ 2991 */
2992int __meminit early_pfn_to_nid(unsigned long pfn) 2992int __meminit __early_pfn_to_nid(unsigned long pfn)
2993{ 2993{
2994 int i; 2994 int i;
2995 2995
@@ -3005,6 +3005,12 @@ int __meminit early_pfn_to_nid(unsigned long pfn)
3005} 3005}
3006#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ 3006#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
3007 3007
3008int __meminit early_pfn_to_nid(unsigned long pfn)
3009{
3010 return __early_pfn_to_nid(pfn);
3011}
3012
3013
3008/* Basic iterator support to walk early_node_map[] */ 3014/* Basic iterator support to walk early_node_map[] */
3009#define for_each_active_range_index_in_nid(i, nid) \ 3015#define for_each_active_range_index_in_nid(i, nid) \
3010 for (i = first_active_region_index_in_nid(nid); i != -1; \ 3016 for (i = first_active_region_index_in_nid(nid); i != -1; \