aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJiang Liu <jiang.liu@huawei.com>2012-07-31 19:43:28 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 21:42:44 -0400
commit9adb62a5df9c0fbef7b4665919329f73a34651ed (patch)
tree8372c9c1202adac889714ea99319346279107f33 /mm
parentda92c47d069890106484cb6605df701a54d24499 (diff)
mm/hotplug: correctly setup fallback zonelists when creating new pgdat
When hotadd_new_pgdat() is called to create new pgdat for a new node, a fallback zonelist should be created for the new node. There's code to try to achieve that in hotadd_new_pgdat() as below: /* * The node we allocated has no zone fallback lists. For avoiding * to access not-initialized zonelist, build here. */ mutex_lock(&zonelists_mutex); build_all_zonelists(pgdat, NULL); mutex_unlock(&zonelists_mutex); But it doesn't work as expected. When hotadd_new_pgdat() is called, the new node is still in offline state because node_set_online(nid) hasn't been called yet. And build_all_zonelists() only builds zonelists for online nodes as: for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); build_zonelists(pgdat); build_zonelist_cache(pgdat); } Though we hope to create zonelist for the new pgdat, but it doesn't. So add a new parameter "pgdat" the build_all_zonelists() to build pgdat for the new pgdat too. Signed-off-by: Jiang Liu <liuj97@gmail.com> Signed-off-by: Xishi Qiu <qiuxishi@huawei.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Michal Hocko <mhocko@suse.cz> Cc: Minchan Kim <minchan@kernel.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Tony Luck <tony.luck@intel.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: David Rientjes <rientjes@google.com> Cc: Keping Chen <chenkeping@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memory_hotplug.c4
-rw-r--r--mm/page_alloc.c17
2 files changed, 14 insertions, 7 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 427bb291dd0f..b8731040b9f9 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -513,7 +513,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages)
513 zone->present_pages += onlined_pages; 513 zone->present_pages += onlined_pages;
514 zone->zone_pgdat->node_present_pages += onlined_pages; 514 zone->zone_pgdat->node_present_pages += onlined_pages;
515 if (need_zonelists_rebuild) 515 if (need_zonelists_rebuild)
516 build_all_zonelists(zone); 516 build_all_zonelists(NULL, zone);
517 else 517 else
518 zone_pcp_update(zone); 518 zone_pcp_update(zone);
519 519
@@ -562,7 +562,7 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
562 * to access not-initialized zonelist, build here. 562 * to access not-initialized zonelist, build here.
563 */ 563 */
564 mutex_lock(&zonelists_mutex); 564 mutex_lock(&zonelists_mutex);
565 build_all_zonelists(NULL); 565 build_all_zonelists(pgdat, NULL);
566 mutex_unlock(&zonelists_mutex); 566 mutex_unlock(&zonelists_mutex);
567 567
568 return pgdat; 568 return pgdat;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6c7e3bd93a85..9ad6866ac49c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3032,7 +3032,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
3032 user_zonelist_order = oldval; 3032 user_zonelist_order = oldval;
3033 } else if (oldval != user_zonelist_order) { 3033 } else if (oldval != user_zonelist_order) {
3034 mutex_lock(&zonelists_mutex); 3034 mutex_lock(&zonelists_mutex);
3035 build_all_zonelists(NULL); 3035 build_all_zonelists(NULL, NULL);
3036 mutex_unlock(&zonelists_mutex); 3036 mutex_unlock(&zonelists_mutex);
3037 } 3037 }
3038 } 3038 }
@@ -3415,10 +3415,17 @@ static __init_refok int __build_all_zonelists(void *data)
3415{ 3415{
3416 int nid; 3416 int nid;
3417 int cpu; 3417 int cpu;
3418 pg_data_t *self = data;
3418 3419
3419#ifdef CONFIG_NUMA 3420#ifdef CONFIG_NUMA
3420 memset(node_load, 0, sizeof(node_load)); 3421 memset(node_load, 0, sizeof(node_load));
3421#endif 3422#endif
3423
3424 if (self && !node_online(self->node_id)) {
3425 build_zonelists(self);
3426 build_zonelist_cache(self);
3427 }
3428
3422 for_each_online_node(nid) { 3429 for_each_online_node(nid) {
3423 pg_data_t *pgdat = NODE_DATA(nid); 3430 pg_data_t *pgdat = NODE_DATA(nid);
3424 3431
@@ -3463,7 +3470,7 @@ static __init_refok int __build_all_zonelists(void *data)
3463 * Called with zonelists_mutex held always 3470 * Called with zonelists_mutex held always
3464 * unless system_state == SYSTEM_BOOTING. 3471 * unless system_state == SYSTEM_BOOTING.
3465 */ 3472 */
3466void __ref build_all_zonelists(void *data) 3473void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
3467{ 3474{
3468 set_zonelist_order(); 3475 set_zonelist_order();
3469 3476
@@ -3475,10 +3482,10 @@ void __ref build_all_zonelists(void *data)
3475 /* we have to stop all cpus to guarantee there is no user 3482 /* we have to stop all cpus to guarantee there is no user
3476 of zonelist */ 3483 of zonelist */
3477#ifdef CONFIG_MEMORY_HOTPLUG 3484#ifdef CONFIG_MEMORY_HOTPLUG
3478 if (data) 3485 if (zone)
3479 setup_zone_pageset((struct zone *)data); 3486 setup_zone_pageset(zone);
3480#endif 3487#endif
3481 stop_machine(__build_all_zonelists, NULL, NULL); 3488 stop_machine(__build_all_zonelists, pgdat, NULL);
3482 /* cpuset refresh routine should be here */ 3489 /* cpuset refresh routine should be here */
3483 } 3490 }
3484 vm_total_pages = nr_free_pagecache_pages(); 3491 vm_total_pages = nr_free_pagecache_pages();