aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2010-11-24 15:57:09 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-11-24 16:50:45 -0500
commite9959f0f37160e1f5351af828cc981712b5066c1 (patch)
treec218bce04d8f4dfc97a91b6807be5f2853a9024d /mm
parenta42c390cfa0c2612459d7226ba11612847ca3a64 (diff)
mm/page_alloc.c: fix build_all_zonelist() where percpu_alloc() is wrongly called under stop_machine_run()
During memory hotplug, build_allzonelists() may be called under stop_machine_run(). In this function, setup_zone_pageset() is called. But it's bug because it will do page allocation under stop_machine_run(). Here is a report from Alok Kataria. BUG: sleeping function called from invalid context at kernel/mutex.c:94 in_atomic(): 0, irqs_disabled(): 1, pid: 4, name: migration/0 Pid: 4, comm: migration/0 Not tainted 2.6.35.6-45.fc14.x86_64 #1 Call Trace: [<ffffffff8103d12b>] __might_sleep+0xeb/0xf0 [<ffffffff81468245>] mutex_lock+0x24/0x50 [<ffffffff8110eaa6>] pcpu_alloc+0x6d/0x7ee [<ffffffff81048888>] ? load_balance+0xbe/0x60e [<ffffffff8103a1b3>] ? rt_se_boosted+0x21/0x2f [<ffffffff8103e1cf>] ? dequeue_rt_stack+0x18b/0x1ed [<ffffffff8110f237>] __alloc_percpu+0x10/0x12 [<ffffffff81465e22>] setup_zone_pageset+0x38/0xbe [<ffffffff810d6d81>] ? build_zonelists_node.clone.58+0x79/0x8c [<ffffffff81452539>] __build_all_zonelists+0x419/0x46c [<ffffffff8108ef01>] ? cpu_stopper_thread+0xb2/0x198 [<ffffffff8108f075>] stop_machine_cpu_stop+0x8e/0xc5 [<ffffffff8108efe7>] ? stop_machine_cpu_stop+0x0/0xc5 [<ffffffff8108ef57>] cpu_stopper_thread+0x108/0x198 [<ffffffff81467a37>] ? schedule+0x5b2/0x5cc [<ffffffff8108ee4f>] ? cpu_stopper_thread+0x0/0x198 [<ffffffff81065f29>] kthread+0x7f/0x87 [<ffffffff8100aae4>] kernel_thread_helper+0x4/0x10 [<ffffffff81065eaa>] ? kthread+0x0/0x87 [<ffffffff8100aae0>] ? kernel_thread_helper+0x0/0x10 Built 5 zonelists in Node order, mobility grouping on. Total pages: 289456 Policy zone: Normal This patch tries to fix the issue by moving setup_zone_pageset() out from stop_machine_run(). It's obviously not necessary to be called under stop_machine_run(). [akpm@linux-foundation.org: remove unneeded local] Reported-by: Alok Kataria <akataria@vmware.com> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Tejun Heo <tj@kernel.org> Cc: Petr Vandrovec <petr@vmware.com> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Reviewed-by: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/page_alloc.c14
1 files changed, 5 insertions, 9 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 07a654486f75..e4092704c1a9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3008,14 +3008,6 @@ static __init_refok int __build_all_zonelists(void *data)
3008 build_zonelist_cache(pgdat); 3008 build_zonelist_cache(pgdat);
3009 } 3009 }
3010 3010
3011#ifdef CONFIG_MEMORY_HOTPLUG
3012 /* Setup real pagesets for the new zone */
3013 if (data) {
3014 struct zone *zone = data;
3015 setup_zone_pageset(zone);
3016 }
3017#endif
3018
3019 /* 3011 /*
3020 * Initialize the boot_pagesets that are going to be used 3012 * Initialize the boot_pagesets that are going to be used
3021 * for bootstrapping processors. The real pagesets for 3013 * for bootstrapping processors. The real pagesets for
@@ -3064,7 +3056,11 @@ void build_all_zonelists(void *data)
3064 } else { 3056 } else {
3065 /* we have to stop all cpus to guarantee there is no user 3057 /* we have to stop all cpus to guarantee there is no user
3066 of zonelist */ 3058 of zonelist */
3067 stop_machine(__build_all_zonelists, data, NULL); 3059#ifdef CONFIG_MEMORY_HOTPLUG
3060 if (data)
3061 setup_zone_pageset((struct zone *)data);
3062#endif
3063 stop_machine(__build_all_zonelists, NULL, NULL);
3068 /* cpuset refresh routine should be here */ 3064 /* cpuset refresh routine should be here */
3069 } 3065 }
3070 vm_total_pages = nr_free_pagecache_pages(); 3066 vm_total_pages = nr_free_pagecache_pages();