diff options
author | Jiang Liu <jiang.liu@huawei.com> | 2012-07-31 19:43:28 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-31 21:42:44 -0400 |
commit | 9adb62a5df9c0fbef7b4665919329f73a34651ed (patch) | |
tree | 8372c9c1202adac889714ea99319346279107f33 | |
parent | da92c47d069890106484cb6605df701a54d24499 (diff) |
mm/hotplug: correctly setup fallback zonelists when creating new pgdat
When hotadd_new_pgdat() is called to create new pgdat for a new node, a
fallback zonelist should be created for the new node. There's code to try
to achieve that in hotadd_new_pgdat() as below:
/*
* The node we allocated has no zone fallback lists. For avoiding
* to access not-initialized zonelist, build here.
*/
mutex_lock(&zonelists_mutex);
build_all_zonelists(pgdat, NULL);
mutex_unlock(&zonelists_mutex);
But it doesn't work as expected. When hotadd_new_pgdat() is called, the
new node is still in offline state because node_set_online(nid) hasn't
been called yet. And build_all_zonelists() only builds zonelists for
online nodes as:
for_each_online_node(nid) {
pg_data_t *pgdat = NODE_DATA(nid);
build_zonelists(pgdat);
build_zonelist_cache(pgdat);
}
Though we hope to create zonelist for the new pgdat, but it doesn't. So
add a new parameter "pgdat" the build_all_zonelists() to build pgdat for
the new pgdat too.
Signed-off-by: Jiang Liu <liuj97@gmail.com>
Signed-off-by: Xishi Qiu <qiuxishi@huawei.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Keping Chen <chenkeping@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/mmzone.h | 2 | ||||
-rw-r--r-- | init/main.c | 2 | ||||
-rw-r--r-- | kernel/cpu.c | 2 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 4 | ||||
-rw-r--r-- | mm/page_alloc.c | 17 |
5 files changed, 17 insertions, 10 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f64afa5929fe..98f079bcf399 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -721,7 +721,7 @@ typedef struct pglist_data { | |||
721 | #include <linux/memory_hotplug.h> | 721 | #include <linux/memory_hotplug.h> |
722 | 722 | ||
723 | extern struct mutex zonelists_mutex; | 723 | extern struct mutex zonelists_mutex; |
724 | void build_all_zonelists(void *data); | 724 | void build_all_zonelists(pg_data_t *pgdat, struct zone *zone); |
725 | void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); | 725 | void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); |
726 | bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, | 726 | bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, |
727 | int classzone_idx, int alloc_flags); | 727 | int classzone_idx, int alloc_flags); |
diff --git a/init/main.c b/init/main.c index 95316a1b4a76..e60679de61c3 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -506,7 +506,7 @@ asmlinkage void __init start_kernel(void) | |||
506 | setup_per_cpu_areas(); | 506 | setup_per_cpu_areas(); |
507 | smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ | 507 | smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ |
508 | 508 | ||
509 | build_all_zonelists(NULL); | 509 | build_all_zonelists(NULL, NULL); |
510 | page_alloc_init(); | 510 | page_alloc_init(); |
511 | 511 | ||
512 | printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line); | 512 | printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line); |
diff --git a/kernel/cpu.c b/kernel/cpu.c index a4eb5227a19e..14d32588cccd 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -416,7 +416,7 @@ int __cpuinit cpu_up(unsigned int cpu) | |||
416 | 416 | ||
417 | if (pgdat->node_zonelists->_zonerefs->zone == NULL) { | 417 | if (pgdat->node_zonelists->_zonerefs->zone == NULL) { |
418 | mutex_lock(&zonelists_mutex); | 418 | mutex_lock(&zonelists_mutex); |
419 | build_all_zonelists(NULL); | 419 | build_all_zonelists(NULL, NULL); |
420 | mutex_unlock(&zonelists_mutex); | 420 | mutex_unlock(&zonelists_mutex); |
421 | } | 421 | } |
422 | #endif | 422 | #endif |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 427bb291dd0f..b8731040b9f9 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -513,7 +513,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages) | |||
513 | zone->present_pages += onlined_pages; | 513 | zone->present_pages += onlined_pages; |
514 | zone->zone_pgdat->node_present_pages += onlined_pages; | 514 | zone->zone_pgdat->node_present_pages += onlined_pages; |
515 | if (need_zonelists_rebuild) | 515 | if (need_zonelists_rebuild) |
516 | build_all_zonelists(zone); | 516 | build_all_zonelists(NULL, zone); |
517 | else | 517 | else |
518 | zone_pcp_update(zone); | 518 | zone_pcp_update(zone); |
519 | 519 | ||
@@ -562,7 +562,7 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) | |||
562 | * to access not-initialized zonelist, build here. | 562 | * to access not-initialized zonelist, build here. |
563 | */ | 563 | */ |
564 | mutex_lock(&zonelists_mutex); | 564 | mutex_lock(&zonelists_mutex); |
565 | build_all_zonelists(NULL); | 565 | build_all_zonelists(pgdat, NULL); |
566 | mutex_unlock(&zonelists_mutex); | 566 | mutex_unlock(&zonelists_mutex); |
567 | 567 | ||
568 | return pgdat; | 568 | return pgdat; |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6c7e3bd93a85..9ad6866ac49c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -3032,7 +3032,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write, | |||
3032 | user_zonelist_order = oldval; | 3032 | user_zonelist_order = oldval; |
3033 | } else if (oldval != user_zonelist_order) { | 3033 | } else if (oldval != user_zonelist_order) { |
3034 | mutex_lock(&zonelists_mutex); | 3034 | mutex_lock(&zonelists_mutex); |
3035 | build_all_zonelists(NULL); | 3035 | build_all_zonelists(NULL, NULL); |
3036 | mutex_unlock(&zonelists_mutex); | 3036 | mutex_unlock(&zonelists_mutex); |
3037 | } | 3037 | } |
3038 | } | 3038 | } |
@@ -3415,10 +3415,17 @@ static __init_refok int __build_all_zonelists(void *data) | |||
3415 | { | 3415 | { |
3416 | int nid; | 3416 | int nid; |
3417 | int cpu; | 3417 | int cpu; |
3418 | pg_data_t *self = data; | ||
3418 | 3419 | ||
3419 | #ifdef CONFIG_NUMA | 3420 | #ifdef CONFIG_NUMA |
3420 | memset(node_load, 0, sizeof(node_load)); | 3421 | memset(node_load, 0, sizeof(node_load)); |
3421 | #endif | 3422 | #endif |
3423 | |||
3424 | if (self && !node_online(self->node_id)) { | ||
3425 | build_zonelists(self); | ||
3426 | build_zonelist_cache(self); | ||
3427 | } | ||
3428 | |||
3422 | for_each_online_node(nid) { | 3429 | for_each_online_node(nid) { |
3423 | pg_data_t *pgdat = NODE_DATA(nid); | 3430 | pg_data_t *pgdat = NODE_DATA(nid); |
3424 | 3431 | ||
@@ -3463,7 +3470,7 @@ static __init_refok int __build_all_zonelists(void *data) | |||
3463 | * Called with zonelists_mutex held always | 3470 | * Called with zonelists_mutex held always |
3464 | * unless system_state == SYSTEM_BOOTING. | 3471 | * unless system_state == SYSTEM_BOOTING. |
3465 | */ | 3472 | */ |
3466 | void __ref build_all_zonelists(void *data) | 3473 | void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone) |
3467 | { | 3474 | { |
3468 | set_zonelist_order(); | 3475 | set_zonelist_order(); |
3469 | 3476 | ||
@@ -3475,10 +3482,10 @@ void __ref build_all_zonelists(void *data) | |||
3475 | /* we have to stop all cpus to guarantee there is no user | 3482 | /* we have to stop all cpus to guarantee there is no user |
3476 | of zonelist */ | 3483 | of zonelist */ |
3477 | #ifdef CONFIG_MEMORY_HOTPLUG | 3484 | #ifdef CONFIG_MEMORY_HOTPLUG |
3478 | if (data) | 3485 | if (zone) |
3479 | setup_zone_pageset((struct zone *)data); | 3486 | setup_zone_pageset(zone); |
3480 | #endif | 3487 | #endif |
3481 | stop_machine(__build_all_zonelists, NULL, NULL); | 3488 | stop_machine(__build_all_zonelists, pgdat, NULL); |
3482 | /* cpuset refresh routine should be here */ | 3489 | /* cpuset refresh routine should be here */ |
3483 | } | 3490 | } |
3484 | vm_total_pages = nr_free_pagecache_pages(); | 3491 | vm_total_pages = nr_free_pagecache_pages(); |