diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2009-02-18 17:48:32 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-02-18 18:37:55 -0500 |
commit | f2dbcfa738368c8a40d4a5f0b65dc9879577cb21 (patch) | |
tree | bdea32c637fa572a9c356cddd202a57530b2a45c | |
parent | ada723dcd681e2dffd7d73345cc8fda0eb0df9bd (diff) |
mm: clean up for early_pfn_to_nid()
What's happening is that the assertion in mm/page_alloc.c:move_freepages()
is triggering:
BUG_ON(page_zone(start_page) != page_zone(end_page));
Once I knew this is what was happening, I added some annotations:
if (unlikely(page_zone(start_page) != page_zone(end_page))) {
printk(KERN_ERR "move_freepages: Bogus zones: "
"start_page[%p] end_page[%p] zone[%p]\n",
start_page, end_page, zone);
printk(KERN_ERR "move_freepages: "
"start_zone[%p] end_zone[%p]\n",
page_zone(start_page), page_zone(end_page));
printk(KERN_ERR "move_freepages: "
"start_pfn[0x%lx] end_pfn[0x%lx]\n",
page_to_pfn(start_page), page_to_pfn(end_page));
printk(KERN_ERR "move_freepages: "
"start_nid[%d] end_nid[%d]\n",
page_to_nid(start_page), page_to_nid(end_page));
...
And here's what I got:
move_freepages: Bogus zones: start_page[2207d0000] end_page[2207dffc0] zone[fffff8103effcb00]
move_freepages: start_zone[fffff8103effcb00] end_zone[fffff8003fffeb00]
move_freepages: start_pfn[0x81f600] end_pfn[0x81f7ff]
move_freepages: start_nid[1] end_nid[0]
My memory layout on this box is:
[ 0.000000] Zone PFN ranges:
[ 0.000000] Normal 0x00000000 -> 0x0081ff5d
[ 0.000000] Movable zone start PFN for each node
[ 0.000000] early_node_map[8] active PFN ranges
[ 0.000000] 0: 0x00000000 -> 0x00020000
[ 0.000000] 1: 0x00800000 -> 0x0081f7ff
[ 0.000000] 1: 0x0081f800 -> 0x0081fe50
[ 0.000000] 1: 0x0081fed1 -> 0x0081fed8
[ 0.000000] 1: 0x0081feda -> 0x0081fedb
[ 0.000000] 1: 0x0081fedd -> 0x0081fee5
[ 0.000000] 1: 0x0081fee7 -> 0x0081ff51
[ 0.000000] 1: 0x0081ff59 -> 0x0081ff5d
So it's a block move in that 0x81f600-->0x81f7ff region which triggers
the problem.
This patch:
Declaration of early_pfn_to_nid() is scattered over per-arch include
files, and it seems it's complicated to know when the declaration is used.
I think it makes fix-for-memmap-init not easy.
This patch moves all declaration to include/linux/mm.h
After this,
if !CONFIG_NODES_POPULATES_NODE_MAP && !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
-> Use static definition in include/linux/mm.h
else if !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
-> Use generic definition in mm/page_alloc.c
else
-> per-arch back end function will be called.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reported-by: David Miller <davem@davemlloft.net>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: <stable@kernel.org> [2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | arch/ia64/include/asm/mmzone.h | 4 | ||||
-rw-r--r-- | arch/ia64/mm/numa.c | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/mmzone_32.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/mmzone_64.h | 2 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 2 | ||||
-rw-r--r-- | include/linux/mm.h | 19 | ||||
-rw-r--r-- | mm/page_alloc.c | 8 |
7 files changed, 25 insertions, 14 deletions
diff --git a/arch/ia64/include/asm/mmzone.h b/arch/ia64/include/asm/mmzone.h index 34efe88eb849..f2ca32069b3f 100644 --- a/arch/ia64/include/asm/mmzone.h +++ b/arch/ia64/include/asm/mmzone.h | |||
@@ -31,10 +31,6 @@ static inline int pfn_to_nid(unsigned long pfn) | |||
31 | #endif | 31 | #endif |
32 | } | 32 | } |
33 | 33 | ||
34 | #ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID | ||
35 | extern int early_pfn_to_nid(unsigned long pfn); | ||
36 | #endif | ||
37 | |||
38 | #ifdef CONFIG_IA64_DIG /* DIG systems are small */ | 34 | #ifdef CONFIG_IA64_DIG /* DIG systems are small */ |
39 | # define MAX_PHYSNODE_ID 8 | 35 | # define MAX_PHYSNODE_ID 8 |
40 | # define NR_NODE_MEMBLKS (MAX_NUMNODES * 8) | 36 | # define NR_NODE_MEMBLKS (MAX_NUMNODES * 8) |
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index b73bf1838e57..5061c3fb6796 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c | |||
@@ -58,7 +58,7 @@ paddr_to_nid(unsigned long paddr) | |||
58 | * SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where | 58 | * SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where |
59 | * the section resides. | 59 | * the section resides. |
60 | */ | 60 | */ |
61 | int early_pfn_to_nid(unsigned long pfn) | 61 | int __meminit __early_pfn_to_nid(unsigned long pfn) |
62 | { | 62 | { |
63 | int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec; | 63 | int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec; |
64 | 64 | ||
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 07f1af494ca5..105fb90a0635 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h | |||
@@ -32,8 +32,6 @@ static inline void get_memcfg_numa(void) | |||
32 | get_memcfg_numa_flat(); | 32 | get_memcfg_numa_flat(); |
33 | } | 33 | } |
34 | 34 | ||
35 | extern int early_pfn_to_nid(unsigned long pfn); | ||
36 | |||
37 | extern void resume_map_numa_kva(pgd_t *pgd); | 35 | extern void resume_map_numa_kva(pgd_t *pgd); |
38 | 36 | ||
39 | #else /* !CONFIG_NUMA */ | 37 | #else /* !CONFIG_NUMA */ |
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h index a5b3817d4b9e..a29f48c2a322 100644 --- a/arch/x86/include/asm/mmzone_64.h +++ b/arch/x86/include/asm/mmzone_64.h | |||
@@ -40,8 +40,6 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) | |||
40 | #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ | 40 | #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ |
41 | NODE_DATA(nid)->node_spanned_pages) | 41 | NODE_DATA(nid)->node_spanned_pages) |
42 | 42 | ||
43 | extern int early_pfn_to_nid(unsigned long pfn); | ||
44 | |||
45 | #ifdef CONFIG_NUMA_EMU | 43 | #ifdef CONFIG_NUMA_EMU |
46 | #define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024) | 44 | #define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024) |
47 | #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) | 45 | #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 71a14f89f89e..f3516da035d1 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -145,7 +145,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes, | |||
145 | return shift; | 145 | return shift; |
146 | } | 146 | } |
147 | 147 | ||
148 | int early_pfn_to_nid(unsigned long pfn) | 148 | int __meminit __early_pfn_to_nid(unsigned long pfn) |
149 | { | 149 | { |
150 | return phys_to_nid(pfn << PAGE_SHIFT); | 150 | return phys_to_nid(pfn << PAGE_SHIFT); |
151 | } | 151 | } |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 10074212a35b..065cdf8c09fb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1041,10 +1041,23 @@ extern void free_bootmem_with_active_regions(int nid, | |||
1041 | typedef int (*work_fn_t)(unsigned long, unsigned long, void *); | 1041 | typedef int (*work_fn_t)(unsigned long, unsigned long, void *); |
1042 | extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); | 1042 | extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); |
1043 | extern void sparse_memory_present_with_active_regions(int nid); | 1043 | extern void sparse_memory_present_with_active_regions(int nid); |
1044 | #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID | ||
1045 | extern int early_pfn_to_nid(unsigned long pfn); | ||
1046 | #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ | ||
1047 | #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ | 1044 | #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ |
1045 | |||
1046 | #if !defined(CONFIG_ARCH_POPULATES_NODE_MAP) && \ | ||
1047 | !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) | ||
1048 | static inline int __early_pfn_to_nid(unsigned long pfn) | ||
1049 | { | ||
1050 | return 0; | ||
1051 | } | ||
1052 | #else | ||
1053 | /* please see mm/page_alloc.c */ | ||
1054 | extern int __meminit early_pfn_to_nid(unsigned long pfn); | ||
1055 | #ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID | ||
1056 | /* there is a per-arch backend function. */ | ||
1057 | extern int __meminit __early_pfn_to_nid(unsigned long pfn); | ||
1058 | #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ | ||
1059 | #endif | ||
1060 | |||
1048 | extern void set_dma_reserve(unsigned long new_dma_reserve); | 1061 | extern void set_dma_reserve(unsigned long new_dma_reserve); |
1049 | extern void memmap_init_zone(unsigned long, int, unsigned long, | 1062 | extern void memmap_init_zone(unsigned long, int, unsigned long, |
1050 | unsigned long, enum memmap_context); | 1063 | unsigned long, enum memmap_context); |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5675b3073854..c5dd74602efc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -2989,7 +2989,7 @@ static int __meminit next_active_region_index_in_nid(int index, int nid) | |||
2989 | * was used and there are no special requirements, this is a convenient | 2989 | * was used and there are no special requirements, this is a convenient |
2990 | * alternative | 2990 | * alternative |
2991 | */ | 2991 | */ |
2992 | int __meminit early_pfn_to_nid(unsigned long pfn) | 2992 | int __meminit __early_pfn_to_nid(unsigned long pfn) |
2993 | { | 2993 | { |
2994 | int i; | 2994 | int i; |
2995 | 2995 | ||
@@ -3005,6 +3005,12 @@ int __meminit early_pfn_to_nid(unsigned long pfn) | |||
3005 | } | 3005 | } |
3006 | #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ | 3006 | #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ |
3007 | 3007 | ||
3008 | int __meminit early_pfn_to_nid(unsigned long pfn) | ||
3009 | { | ||
3010 | return __early_pfn_to_nid(pfn); | ||
3011 | } | ||
3012 | |||
3013 | |||
3008 | /* Basic iterator support to walk early_node_map[] */ | 3014 | /* Basic iterator support to walk early_node_map[] */ |
3009 | #define for_each_active_range_index_in_nid(i, nid) \ | 3015 | #define for_each_active_range_index_in_nid(i, nid) \ |
3010 | for (i = first_active_region_index_in_nid(nid); i != -1; \ | 3016 | for (i = first_active_region_index_in_nid(nid); i != -1; \ |