From 6292d9aaf3047f1abd970bc64ab6d952eda258ac Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Wed, 1 Feb 2006 03:04:44 -0800 Subject: [PATCH] __cpuinit functions wrongly marked __meminit __meminit has overzelously been modified and crept its way into marking cpuup callbacks as __meminit. Signed-off-by: Ashok Raj Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index df54e2fc8ee0..44b4eb4202d9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1799,7 +1799,7 @@ void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn, memmap_init_zone((size), (nid), (zone), (start_pfn)) #endif -static int __meminit zone_batchsize(struct zone *zone) +static int __cpuinit zone_batchsize(struct zone *zone) { int batch; @@ -1893,7 +1893,7 @@ static struct per_cpu_pageset * Dynamically allocate memory for the * per cpu pageset array in struct zone. */ -static int __meminit process_zones(int cpu) +static int __cpuinit process_zones(int cpu) { struct zone *zone, *dzone; @@ -1934,7 +1934,7 @@ static inline void free_zone_pagesets(int cpu) } } -static int __meminit pageset_cpuup_callback(struct notifier_block *nfb, +static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { -- cgit v1.2.2 From 88a2a4ac6b671a4b0dd5d2d762418904c05f4104 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Feb 2006 23:27:36 -0800 Subject: [PATCH] percpu data: only iterate over possible CPUs percpu_data blindly allocates bootmem memory to store NR_CPUS instances of cpudata, instead of allocating memory only for possible cpus. As a preparation for changing that, we need to convert various 0 -> NR_CPUS loops to use for_each_cpu(). (The above only applies to users of asm-generic/percpu.h. powerpc has gone it alone and is presently only allocating memory for present CPUs, so it's currently corrupting memory). Signed-off-by: Eric Dumazet Cc: "David S. Miller" Cc: James Bottomley Acked-by: Ingo Molnar Cc: Jens Axboe Cc: Anton Blanchard Acked-by: William Irwin Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 44b4eb4202d9..dde04ff4be31 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1213,18 +1213,21 @@ static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) { int cpu = 0; - memset(ret, 0, sizeof(*ret)); + memset(ret, 0, nr * sizeof(unsigned long)); cpus_and(*cpumask, *cpumask, cpu_online_map); cpu = first_cpu(*cpumask); while (cpu < NR_CPUS) { unsigned long *in, *out, off; + if (!cpu_isset(cpu, *cpumask)) + continue; + in = (unsigned long *)&per_cpu(page_states, cpu); cpu = next_cpu(cpu, *cpumask); - if (cpu < NR_CPUS) + if (likely(cpu < NR_CPUS)) prefetch(&per_cpu(page_states, cpu)); out = (unsigned long *)ret; @@ -1886,8 +1889,7 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p, * not check if the processor is online before following the pageset pointer. * Other parts of the kernel may not check if the zone is available. */ -static struct per_cpu_pageset - boot_pageset[NR_CPUS]; +static struct per_cpu_pageset boot_pageset[NR_CPUS]; /* * Dynamically allocate memory for the -- cgit v1.2.2 From 41d78ba55037468e6c86c53e3076d1a74841de39 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 14 Feb 2006 13:52:58 -0800 Subject: [PATCH] compound page: use page[1].lru If a compound page has its own put_page_testzero destructor (the only current example is free_huge_page), that is noted in page[1].mapping of the compound page. But that's rather a poor place to keep it: functions which call set_page_dirty_lock after get_user_pages (e.g. Infiniband's __ib_umem_release) ought to be checking first, otherwise set_page_dirty is liable to crash on what's not the address of a struct address_space. And now I'm about to make that worse: it turns out that every compound page needs a destructor, so we can no longer rely on hugetlb pages going their own special way, to avoid further problems of page->mapping reuse. For example, not many people know that: on 50% of i386 -Os builds, the first tail page of a compound page purports to be PageAnon (when its destructor has an odd address), which surprises page_add_file_rmap. Keep the compound page destructor in page[1].lru.next instead. And to free up the common pairing of mapping and index, also move compound page order from index to lru.prev. Slab reuses page->lru too: but if we ever need slab to use compound pages, it can easily stack its use above this. (akpm: decoded version of the above: the tail pages of a compound page now have ->mapping==NULL, so there's no need for the set_page_dirty[_lock]() caller to check that they're not compund pages before doing the dirty). Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index dde04ff4be31..eec89ab39bb6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -169,20 +169,17 @@ static void bad_page(struct page *page) * All pages have PG_compound set. All pages have their ->private pointing at * the head page (even the head page has this). * - * The first tail page's ->mapping, if non-zero, holds the address of the - * compound page's put_page() function. - * - * The order of the allocation is stored in the first tail page's ->index - * This is only for debug at present. This usage means that zero-order pages - * may not be compound. + * The first tail page's ->lru.next holds the address of the compound page's + * put_page() function. Its ->lru.prev holds the order of allocation. + * This usage means that zero-order pages may not be compound. */ static void prep_compound_page(struct page *page, unsigned long order) { int i; int nr_pages = 1 << order; - page[1].mapping = NULL; - page[1].index = order; + page[1].lru.next = NULL; /* set dtor */ + page[1].lru.prev = (void *)order; for (i = 0; i < nr_pages; i++) { struct page *p = page + i; @@ -196,7 +193,7 @@ static void destroy_compound_page(struct page *page, unsigned long order) int i; int nr_pages = 1 << order; - if (unlikely(page[1].index != order)) + if (unlikely((unsigned long)page[1].lru.prev != order)) bad_page(page); for (i = 0; i < nr_pages; i++) { -- cgit v1.2.2 From d98c7a09843621f1b145ca5ae8ed03ff04085edb Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 14 Feb 2006 13:52:59 -0800 Subject: [PATCH] compound page: default destructor Somehow I imagined that calling a NULL destructor would free a compound page rather than oopsing. No, we must supply a default destructor, __free_pages_ok using the order noted by prep_compound_page. hugetlb can still replace this as before with its own free_huge_page pointer. The case that needs this is not common: rarely does put_compound_page's put_page_testzero bring the count down to 0. But if get_user_pages is applied to some part of a compound page, without immediate release (e.g. AIO or Infiniband), then it's possible for its put_page to come after the containing vma has been unmapped and the driver done its free_pages. That's just the kind of case compound pages are supposed to be guarding against (but Nick points out, nor did PageReserved handle this right). Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index eec89ab39bb6..62c122528587 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -56,6 +56,7 @@ long nr_swap_pages; int percpu_pagelist_fraction; static void fastcall free_hot_cold_page(struct page *page, int cold); +static void __free_pages_ok(struct page *page, unsigned int order); /* * results with 256, 32 in the lowmem_reserve sysctl: @@ -173,12 +174,18 @@ static void bad_page(struct page *page) * put_page() function. Its ->lru.prev holds the order of allocation. * This usage means that zero-order pages may not be compound. */ + +static void free_compound_page(struct page *page) +{ + __free_pages_ok(page, (unsigned long)page[1].lru.prev); +} + static void prep_compound_page(struct page *page, unsigned long order) { int i; int nr_pages = 1 << order; - page[1].lru.next = NULL; /* set dtor */ + page[1].lru.next = (void *)free_compound_page; /* set dtor */ page[1].lru.prev = (void *)order; for (i = 0; i < nr_pages; i++) { struct page *p = page + i; -- cgit v1.2.2 From 4cf808eb443ead42777a0230b73aec0cee7fb298 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 17 Feb 2006 20:38:21 +0100 Subject: [PATCH] Handle holes in node mask in node fallback list setup Change the find_next_best_node algorithm to correctly skip over holes in the node online mask. Previously it would not handle missing nodes correctly and cause crashes at boot. [Written by Linus, tested by AK] Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 62c122528587..208812b25597 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1541,29 +1541,29 @@ static int __initdata node_load[MAX_NUMNODES]; */ static int __init find_next_best_node(int node, nodemask_t *used_node_mask) { - int i, n, val; + int n, val; int min_val = INT_MAX; int best_node = -1; - for_each_online_node(i) { - cpumask_t tmp; + /* Use the local node if we haven't already */ + if (!node_isset(node, *used_node_mask)) { + node_set(node, *used_node_mask); + return node; + } - /* Start from local node */ - n = (node+i) % num_online_nodes(); + for_each_online_node(n) { + cpumask_t tmp; /* Don't want a node to appear more than once */ if (node_isset(n, *used_node_mask)) continue; - /* Use the local node if we haven't already */ - if (!node_isset(node, *used_node_mask)) { - best_node = node; - break; - } - /* Use the distance array to find the distance */ val = node_distance(node, n); + /* Penalize nodes under us ("prefer the next node") */ + val += (n < node); + /* Give preference to headless and unused nodes */ tmp = node_to_cpumask(n); if (!cpus_empty(tmp)) -- cgit v1.2.2 From 9b0f8b040acd8dfd23860754c0d09ff4f44e2cbc Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 20 Feb 2006 18:27:52 -0800 Subject: [PATCH] Terminate process that fails on a constrained allocation Some allocations are restricted to a limited set of nodes (due to memory policies or cpuset constraints). If the page allocator is not able to find enough memory then that does not mean that overall system memory is low. In particular going postal and more or less randomly shooting at processes is not likely going to help the situation but may just lead to suicide (the whole system coming down). It is better to signal to the process that no memory exists given the constraints that the process (or the configuration of the process) has placed on the allocation behavior. The process may be killed but then the sysadmin or developer can investigate the situation. The solution is similar to what we do when running out of hugepages. This patch adds a check before we kill processes. At that point performance considerations do not matter much so we just scan the zonelist and reconstruct a list of nodes. If the list of nodes does not contain all online nodes then this is a constrained allocation and we should kill the current process. Signed-off-by: Christoph Lameter Cc: Nick Piggin Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 208812b25597..791690d7d3fa 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1015,7 +1015,7 @@ rebalance: if (page) goto got_pg; - out_of_memory(gfp_mask, order); + out_of_memory(zonelist, gfp_mask, order); goto restart; } -- cgit v1.2.2