From f2dbcfa738368c8a40d4a5f0b65dc9879577cb21 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 18 Feb 2009 14:48:32 -0800 Subject: mm: clean up for early_pfn_to_nid() What's happening is that the assertion in mm/page_alloc.c:move_freepages() is triggering: BUG_ON(page_zone(start_page) != page_zone(end_page)); Once I knew this is what was happening, I added some annotations: if (unlikely(page_zone(start_page) != page_zone(end_page))) { printk(KERN_ERR "move_freepages: Bogus zones: " "start_page[%p] end_page[%p] zone[%p]\n", start_page, end_page, zone); printk(KERN_ERR "move_freepages: " "start_zone[%p] end_zone[%p]\n", page_zone(start_page), page_zone(end_page)); printk(KERN_ERR "move_freepages: " "start_pfn[0x%lx] end_pfn[0x%lx]\n", page_to_pfn(start_page), page_to_pfn(end_page)); printk(KERN_ERR "move_freepages: " "start_nid[%d] end_nid[%d]\n", page_to_nid(start_page), page_to_nid(end_page)); ... And here's what I got: move_freepages: Bogus zones: start_page[2207d0000] end_page[2207dffc0] zone[fffff8103effcb00] move_freepages: start_zone[fffff8103effcb00] end_zone[fffff8003fffeb00] move_freepages: start_pfn[0x81f600] end_pfn[0x81f7ff] move_freepages: start_nid[1] end_nid[0] My memory layout on this box is: [ 0.000000] Zone PFN ranges: [ 0.000000] Normal 0x00000000 -> 0x0081ff5d [ 0.000000] Movable zone start PFN for each node [ 0.000000] early_node_map[8] active PFN ranges [ 0.000000] 0: 0x00000000 -> 0x00020000 [ 0.000000] 1: 0x00800000 -> 0x0081f7ff [ 0.000000] 1: 0x0081f800 -> 0x0081fe50 [ 0.000000] 1: 0x0081fed1 -> 0x0081fed8 [ 0.000000] 1: 0x0081feda -> 0x0081fedb [ 0.000000] 1: 0x0081fedd -> 0x0081fee5 [ 0.000000] 1: 0x0081fee7 -> 0x0081ff51 [ 0.000000] 1: 0x0081ff59 -> 0x0081ff5d So it's a block move in that 0x81f600-->0x81f7ff region which triggers the problem. This patch: Declaration of early_pfn_to_nid() is scattered over per-arch include files, and it seems it's complicated to know when the declaration is used. I think it makes fix-for-memmap-init not easy. This patch moves all declaration to include/linux/mm.h After this, if !CONFIG_NODES_POPULATES_NODE_MAP && !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -> Use static definition in include/linux/mm.h else if !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -> Use generic definition in mm/page_alloc.c else -> per-arch back end function will be called. Signed-off-by: KAMEZAWA Hiroyuki Tested-by: KOSAKI Motohiro Reported-by: David Miller Cc: Mel Gorman Cc: Heiko Carstens Cc: [2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5675b3073854..c5dd74602efc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2989,7 +2989,7 @@ static int __meminit next_active_region_index_in_nid(int index, int nid) * was used and there are no special requirements, this is a convenient * alternative */ -int __meminit early_pfn_to_nid(unsigned long pfn) +int __meminit __early_pfn_to_nid(unsigned long pfn) { int i; @@ -3005,6 +3005,12 @@ int __meminit early_pfn_to_nid(unsigned long pfn) } #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ +int __meminit early_pfn_to_nid(unsigned long pfn) +{ + return __early_pfn_to_nid(pfn); +} + + /* Basic iterator support to walk early_node_map[] */ #define for_each_active_range_index_in_nid(i, nid) \ for (i = first_active_region_index_in_nid(nid); i != -1; \ -- cgit v1.2.2 From cc2559bccc72767cb446f79b071d96c30c26439b Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 18 Feb 2009 14:48:33 -0800 Subject: mm: fix memmap init for handling memory hole Now, early_pfn_in_nid(PFN, NID) may returns false if PFN is a hole. and memmap initialization was not done. This was a trouble for sparc boot. To fix this, the PFN should be initialized and marked as PG_reserved. This patch changes early_pfn_in_nid() return true if PFN is a hole. Signed-off-by: KAMEZAWA Hiroyuki Reported-by: David Miller Tested-by: KOSAKI Motohiro Cc: Mel Gorman Cc: Heiko Carstens Cc: [2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c5dd74602efc..5c44ed49ca93 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3000,16 +3000,33 @@ int __meminit __early_pfn_to_nid(unsigned long pfn) if (start_pfn <= pfn && pfn < end_pfn) return early_node_map[i].nid; } - - return 0; + /* This is a memory hole */ + return -1; } #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ int __meminit early_pfn_to_nid(unsigned long pfn) { - return __early_pfn_to_nid(pfn); + int nid; + + nid = __early_pfn_to_nid(pfn); + if (nid >= 0) + return nid; + /* just returns 0 */ + return 0; } +#ifdef CONFIG_NODES_SPAN_OTHER_NODES +bool __meminit early_pfn_in_nid(unsigned long pfn, int node) +{ + int nid; + + nid = __early_pfn_to_nid(pfn); + if (nid >= 0 && nid != node) + return false; + return true; +} +#endif /* Basic iterator support to walk early_node_map[] */ #define for_each_active_range_index_in_nid(i, nid) \ -- cgit v1.2.2 From a70f730282019f487aa33a84e5ac9a5e89c5abd0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 13 Mar 2009 14:49:46 +1030 Subject: cpumask: replace node_to_cpumask with cpumask_of_node. Impact: cleanup node_to_cpumask (and the blecherous node_to_cpumask_ptr which contained a declaration) are replaced now everyone implements cpumask_of_node. Signed-off-by: Rusty Russell --- mm/page_alloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5c44ed49ca93..a92b0975b9a5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2134,7 +2134,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask) int n, val; int min_val = INT_MAX; int best_node = -1; - node_to_cpumask_ptr(tmp, 0); + const struct cpumask *tmp = cpumask_of_node(0); /* Use the local node if we haven't already */ if (!node_isset(node, *used_node_mask)) { @@ -2155,8 +2155,8 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask) val += (n < node); /* Give preference to headless and unused nodes */ - node_to_cpumask_ptr_next(tmp, n); - if (!cpus_empty(*tmp)) + tmp = cpumask_of_node(n); + if (!cpumask_empty(tmp)) val += PENALTY_FOR_NODE_WITH_CPUS; /* Slight preference for less loaded node */ -- cgit v1.2.2 From e713a21d8251a4c91772f592af46407dfb0b2e4f Mon Sep 17 00:00:00 2001 From: Alexey Zaytsev Date: Sat, 10 Jan 2009 02:47:57 +0300 Subject: trivial: Fix dubious bitwise 'or' usage spotted by sparse. It doesn't change the semantics, but it looks like the logical 'or' was meant to be used here. Signed-off-by: Alexey Zaytsev Signed-off-by: Jiri Kosina --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5c44ed49ca93..daa36f103e77 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -331,7 +331,7 @@ static int destroy_compound_page(struct page *page, unsigned long order) for (i = 1; i < nr_pages; i++) { struct page *p = page + i; - if (unlikely(!PageTail(p) | (p->first_page != page))) { + if (unlikely(!PageTail(p) || (p->first_page != page))) { bad_page(page); bad++; } -- cgit v1.2.2 From ee99c71c59f897436ec65debb99372b3146f9985 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 31 Mar 2009 15:19:31 -0700 Subject: mm: introduce for_each_populated_zone() macro Impact: cleanup In almost cases, for_each_zone() is used with populated_zone(). It's because almost function doesn't need memoryless node information. Therefore, for_each_populated_zone() can help to make code simplify. This patch has no functional change. [akpm@linux-foundation.org: small cleanup] Signed-off-by: KOSAKI Motohiro Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Reviewed-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a3803ea8c27d..cbd532161f68 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -922,13 +922,10 @@ static void drain_pages(unsigned int cpu) unsigned long flags; struct zone *zone; - for_each_zone(zone) { + for_each_populated_zone(zone) { struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; - if (!populated_zone(zone)) - continue; - pset = zone_pcp(zone, cpu); pcp = &pset->pcp; @@ -1879,10 +1876,7 @@ void show_free_areas(void) int cpu; struct zone *zone; - for_each_zone(zone) { - if (!populated_zone(zone)) - continue; - + for_each_populated_zone(zone) { show_node(zone); printk("%s per-cpu:\n", zone->name); @@ -1922,12 +1916,9 @@ void show_free_areas(void) global_page_state(NR_PAGETABLE), global_page_state(NR_BOUNCE)); - for_each_zone(zone) { + for_each_populated_zone(zone) { int i; - if (!populated_zone(zone)) - continue; - show_node(zone); printk("%s" " free:%lukB" @@ -1967,12 +1958,9 @@ void show_free_areas(void) printk("\n"); } - for_each_zone(zone) { + for_each_populated_zone(zone) { unsigned long nr[MAX_ORDER], flags, order, total = 0; - if (!populated_zone(zone)) - continue; - show_node(zone); printk("%s: ", zone->name); @@ -2784,11 +2772,7 @@ static int __cpuinit process_zones(int cpu) node_set_state(node, N_CPU); /* this node has a cpu */ - for_each_zone(zone) { - - if (!populated_zone(zone)) - continue; - + for_each_populated_zone(zone) { zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), GFP_KERNEL, node); if (!zone_pcp(zone, cpu)) -- cgit v1.2.2 From 327c0e968645f2601a43f5ea7c19c7b3a5fa0a34 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 31 Mar 2009 15:23:31 -0700 Subject: vmscan: fix it to take care of nodemask try_to_free_pages() is used for the direct reclaim of up to SWAP_CLUSTER_MAX pages when watermarks are low. The caller to alloc_pages_nodemask() can specify a nodemask of nodes that are allowed to be used but this is not passed to try_to_free_pages(). This can lead to unnecessary reclaim of pages that are unusable by the caller and int the worst case lead to allocation failure as progress was not been make where it is needed. This patch passes the nodemask used for alloc_pages_nodemask() to try_to_free_pages(). Reviewed-by: KOSAKI Motohiro Acked-by: Mel Gorman Signed-off-by: KAMEZAWA Hiroyuki Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cbd532161f68..0284e528748d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1582,7 +1582,8 @@ nofail_alloc: reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state; - did_some_progress = try_to_free_pages(zonelist, order, gfp_mask); + did_some_progress = try_to_free_pages(zonelist, order, + gfp_mask, nodemask); p->reclaim_state = NULL; lockdep_clear_current_reclaim_state(); -- cgit v1.2.2