diff options
author | Jiang Liu <jiang.liu@huawei.com> | 2012-07-31 19:43:28 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-31 21:42:44 -0400 |
commit | 9adb62a5df9c0fbef7b4665919329f73a34651ed (patch) | |
tree | 8372c9c1202adac889714ea99319346279107f33 /mm | |
parent | da92c47d069890106484cb6605df701a54d24499 (diff) |
mm/hotplug: correctly setup fallback zonelists when creating new pgdat
When hotadd_new_pgdat() is called to create new pgdat for a new node, a
fallback zonelist should be created for the new node. There's code to try
to achieve that in hotadd_new_pgdat() as below:
/*
* The node we allocated has no zone fallback lists. For avoiding
* to access not-initialized zonelist, build here.
*/
mutex_lock(&zonelists_mutex);
build_all_zonelists(pgdat, NULL);
mutex_unlock(&zonelists_mutex);
But it doesn't work as expected. When hotadd_new_pgdat() is called, the
new node is still in offline state because node_set_online(nid) hasn't
been called yet. And build_all_zonelists() only builds zonelists for
online nodes as:
for_each_online_node(nid) {
pg_data_t *pgdat = NODE_DATA(nid);
build_zonelists(pgdat);
build_zonelist_cache(pgdat);
}
Though we hope to create zonelist for the new pgdat, but it doesn't. So
add a new parameter "pgdat" the build_all_zonelists() to build pgdat for
the new pgdat too.
Signed-off-by: Jiang Liu <liuj97@gmail.com>
Signed-off-by: Xishi Qiu <qiuxishi@huawei.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Keping Chen <chenkeping@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memory_hotplug.c | 4 | ||||
-rw-r--r-- | mm/page_alloc.c | 17 |
2 files changed, 14 insertions, 7 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 427bb291dd0f..b8731040b9f9 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -513,7 +513,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages) | |||
513 | zone->present_pages += onlined_pages; | 513 | zone->present_pages += onlined_pages; |
514 | zone->zone_pgdat->node_present_pages += onlined_pages; | 514 | zone->zone_pgdat->node_present_pages += onlined_pages; |
515 | if (need_zonelists_rebuild) | 515 | if (need_zonelists_rebuild) |
516 | build_all_zonelists(zone); | 516 | build_all_zonelists(NULL, zone); |
517 | else | 517 | else |
518 | zone_pcp_update(zone); | 518 | zone_pcp_update(zone); |
519 | 519 | ||
@@ -562,7 +562,7 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) | |||
562 | * to access not-initialized zonelist, build here. | 562 | * to access not-initialized zonelist, build here. |
563 | */ | 563 | */ |
564 | mutex_lock(&zonelists_mutex); | 564 | mutex_lock(&zonelists_mutex); |
565 | build_all_zonelists(NULL); | 565 | build_all_zonelists(pgdat, NULL); |
566 | mutex_unlock(&zonelists_mutex); | 566 | mutex_unlock(&zonelists_mutex); |
567 | 567 | ||
568 | return pgdat; | 568 | return pgdat; |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6c7e3bd93a85..9ad6866ac49c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -3032,7 +3032,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write, | |||
3032 | user_zonelist_order = oldval; | 3032 | user_zonelist_order = oldval; |
3033 | } else if (oldval != user_zonelist_order) { | 3033 | } else if (oldval != user_zonelist_order) { |
3034 | mutex_lock(&zonelists_mutex); | 3034 | mutex_lock(&zonelists_mutex); |
3035 | build_all_zonelists(NULL); | 3035 | build_all_zonelists(NULL, NULL); |
3036 | mutex_unlock(&zonelists_mutex); | 3036 | mutex_unlock(&zonelists_mutex); |
3037 | } | 3037 | } |
3038 | } | 3038 | } |
@@ -3415,10 +3415,17 @@ static __init_refok int __build_all_zonelists(void *data) | |||
3415 | { | 3415 | { |
3416 | int nid; | 3416 | int nid; |
3417 | int cpu; | 3417 | int cpu; |
3418 | pg_data_t *self = data; | ||
3418 | 3419 | ||
3419 | #ifdef CONFIG_NUMA | 3420 | #ifdef CONFIG_NUMA |
3420 | memset(node_load, 0, sizeof(node_load)); | 3421 | memset(node_load, 0, sizeof(node_load)); |
3421 | #endif | 3422 | #endif |
3423 | |||
3424 | if (self && !node_online(self->node_id)) { | ||
3425 | build_zonelists(self); | ||
3426 | build_zonelist_cache(self); | ||
3427 | } | ||
3428 | |||
3422 | for_each_online_node(nid) { | 3429 | for_each_online_node(nid) { |
3423 | pg_data_t *pgdat = NODE_DATA(nid); | 3430 | pg_data_t *pgdat = NODE_DATA(nid); |
3424 | 3431 | ||
@@ -3463,7 +3470,7 @@ static __init_refok int __build_all_zonelists(void *data) | |||
3463 | * Called with zonelists_mutex held always | 3470 | * Called with zonelists_mutex held always |
3464 | * unless system_state == SYSTEM_BOOTING. | 3471 | * unless system_state == SYSTEM_BOOTING. |
3465 | */ | 3472 | */ |
3466 | void __ref build_all_zonelists(void *data) | 3473 | void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone) |
3467 | { | 3474 | { |
3468 | set_zonelist_order(); | 3475 | set_zonelist_order(); |
3469 | 3476 | ||
@@ -3475,10 +3482,10 @@ void __ref build_all_zonelists(void *data) | |||
3475 | /* we have to stop all cpus to guarantee there is no user | 3482 | /* we have to stop all cpus to guarantee there is no user |
3476 | of zonelist */ | 3483 | of zonelist */ |
3477 | #ifdef CONFIG_MEMORY_HOTPLUG | 3484 | #ifdef CONFIG_MEMORY_HOTPLUG |
3478 | if (data) | 3485 | if (zone) |
3479 | setup_zone_pageset((struct zone *)data); | 3486 | setup_zone_pageset(zone); |
3480 | #endif | 3487 | #endif |
3481 | stop_machine(__build_all_zonelists, NULL, NULL); | 3488 | stop_machine(__build_all_zonelists, pgdat, NULL); |
3482 | /* cpuset refresh routine should be here */ | 3489 | /* cpuset refresh routine should be here */ |
3483 | } | 3490 | } |
3484 | vm_total_pages = nr_free_pagecache_pages(); | 3491 | vm_total_pages = nr_free_pagecache_pages(); |