aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory_hotplug.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory_hotplug.c')
-rw-r--r--mm/memory_hotplug.c65
1 files changed, 53 insertions, 12 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index ed85fe3870e2..489f235502db 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -31,6 +31,7 @@
31#include <linux/firmware-map.h> 31#include <linux/firmware-map.h>
32#include <linux/stop_machine.h> 32#include <linux/stop_machine.h>
33#include <linux/hugetlb.h> 33#include <linux/hugetlb.h>
34#include <linux/memblock.h>
34 35
35#include <asm/tlbflush.h> 36#include <asm/tlbflush.h>
36 37
@@ -365,8 +366,7 @@ out_fail:
365static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn, 366static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
366 unsigned long end_pfn) 367 unsigned long end_pfn)
367{ 368{
368 unsigned long old_pgdat_end_pfn = 369 unsigned long old_pgdat_end_pfn = pgdat_end_pfn(pgdat);
369 pgdat->node_start_pfn + pgdat->node_spanned_pages;
370 370
371 if (!pgdat->node_spanned_pages || start_pfn < pgdat->node_start_pfn) 371 if (!pgdat->node_spanned_pages || start_pfn < pgdat->node_start_pfn)
372 pgdat->node_start_pfn = start_pfn; 372 pgdat->node_start_pfn = start_pfn;
@@ -402,13 +402,12 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
402static int __meminit __add_section(int nid, struct zone *zone, 402static int __meminit __add_section(int nid, struct zone *zone,
403 unsigned long phys_start_pfn) 403 unsigned long phys_start_pfn)
404{ 404{
405 int nr_pages = PAGES_PER_SECTION;
406 int ret; 405 int ret;
407 406
408 if (pfn_valid(phys_start_pfn)) 407 if (pfn_valid(phys_start_pfn))
409 return -EEXIST; 408 return -EEXIST;
410 409
411 ret = sparse_add_one_section(zone, phys_start_pfn, nr_pages); 410 ret = sparse_add_one_section(zone, phys_start_pfn);
412 411
413 if (ret < 0) 412 if (ret < 0)
414 return ret; 413 return ret;
@@ -579,9 +578,9 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
579static void shrink_pgdat_span(struct pglist_data *pgdat, 578static void shrink_pgdat_span(struct pglist_data *pgdat,
580 unsigned long start_pfn, unsigned long end_pfn) 579 unsigned long start_pfn, unsigned long end_pfn)
581{ 580{
582 unsigned long pgdat_start_pfn = pgdat->node_start_pfn; 581 unsigned long pgdat_start_pfn = pgdat->node_start_pfn;
583 unsigned long pgdat_end_pfn = 582 unsigned long p = pgdat_end_pfn(pgdat); /* pgdat_end_pfn namespace clash */
584 pgdat->node_start_pfn + pgdat->node_spanned_pages; 583 unsigned long pgdat_end_pfn = p;
585 unsigned long pfn; 584 unsigned long pfn;
586 struct mem_section *ms; 585 struct mem_section *ms;
587 int nid = pgdat->node_id; 586 int nid = pgdat->node_id;
@@ -935,7 +934,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
935 arg.nr_pages = nr_pages; 934 arg.nr_pages = nr_pages;
936 node_states_check_changes_online(nr_pages, zone, &arg); 935 node_states_check_changes_online(nr_pages, zone, &arg);
937 936
938 nid = page_to_nid(pfn_to_page(pfn)); 937 nid = pfn_to_nid(pfn);
939 938
940 ret = memory_notify(MEM_GOING_ONLINE, &arg); 939 ret = memory_notify(MEM_GOING_ONLINE, &arg);
941 ret = notifier_to_errno(ret); 940 ret = notifier_to_errno(ret);
@@ -1044,17 +1043,23 @@ static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
1044} 1043}
1045 1044
1046 1045
1047/* 1046/**
1047 * try_online_node - online a node if offlined
1048 *
1048 * called by cpu_up() to online a node without onlined memory. 1049 * called by cpu_up() to online a node without onlined memory.
1049 */ 1050 */
1050int mem_online_node(int nid) 1051int try_online_node(int nid)
1051{ 1052{
1052 pg_data_t *pgdat; 1053 pg_data_t *pgdat;
1053 int ret; 1054 int ret;
1054 1055
1056 if (node_online(nid))
1057 return 0;
1058
1055 lock_memory_hotplug(); 1059 lock_memory_hotplug();
1056 pgdat = hotadd_new_pgdat(nid, 0); 1060 pgdat = hotadd_new_pgdat(nid, 0);
1057 if (!pgdat) { 1061 if (!pgdat) {
1062 pr_err("Cannot online node %d due to NULL pgdat\n", nid);
1058 ret = -ENOMEM; 1063 ret = -ENOMEM;
1059 goto out; 1064 goto out;
1060 } 1065 }
@@ -1062,6 +1067,12 @@ int mem_online_node(int nid)
1062 ret = register_one_node(nid); 1067 ret = register_one_node(nid);
1063 BUG_ON(ret); 1068 BUG_ON(ret);
1064 1069
1070 if (pgdat->node_zonelists->_zonerefs->zone == NULL) {
1071 mutex_lock(&zonelists_mutex);
1072 build_all_zonelists(NULL, NULL);
1073 mutex_unlock(&zonelists_mutex);
1074 }
1075
1065out: 1076out:
1066 unlock_memory_hotplug(); 1077 unlock_memory_hotplug();
1067 return ret; 1078 return ret;
@@ -1412,6 +1423,36 @@ static bool can_offline_normal(struct zone *zone, unsigned long nr_pages)
1412} 1423}
1413#endif /* CONFIG_MOVABLE_NODE */ 1424#endif /* CONFIG_MOVABLE_NODE */
1414 1425
1426static int __init cmdline_parse_movable_node(char *p)
1427{
1428#ifdef CONFIG_MOVABLE_NODE
1429 /*
1430 * Memory used by the kernel cannot be hot-removed because Linux
1431 * cannot migrate the kernel pages. When memory hotplug is
1432 * enabled, we should prevent memblock from allocating memory
1433 * for the kernel.
1434 *
1435 * ACPI SRAT records all hotpluggable memory ranges. But before
1436 * SRAT is parsed, we don't know about it.
1437 *
1438 * The kernel image is loaded into memory at very early time. We
1439 * cannot prevent this anyway. So on NUMA system, we set any
1440 * node the kernel resides in as un-hotpluggable.
1441 *
1442 * Since on modern servers, one node could have double-digit
1443 * gigabytes memory, we can assume the memory around the kernel
1444 * image is also un-hotpluggable. So before SRAT is parsed, just
1445 * allocate memory near the kernel image to try the best to keep
1446 * the kernel away from hotpluggable memory.
1447 */
1448 memblock_set_bottom_up(true);
1449#else
1450 pr_warn("movable_node option not supported\n");
1451#endif
1452 return 0;
1453}
1454early_param("movable_node", cmdline_parse_movable_node);
1455
1415/* check which state of node_states will be changed when offline memory */ 1456/* check which state of node_states will be changed when offline memory */
1416static void node_states_check_changes_offline(unsigned long nr_pages, 1457static void node_states_check_changes_offline(unsigned long nr_pages,
1417 struct zone *zone, struct memory_notify *arg) 1458 struct zone *zone, struct memory_notify *arg)
@@ -1702,7 +1743,7 @@ int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
1702} 1743}
1703 1744
1704#ifdef CONFIG_MEMORY_HOTREMOVE 1745#ifdef CONFIG_MEMORY_HOTREMOVE
1705static int is_memblock_offlined_cb(struct memory_block *mem, void *arg) 1746static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
1706{ 1747{
1707 int ret = !is_memblock_offlined(mem); 1748 int ret = !is_memblock_offlined(mem);
1708 1749
@@ -1854,7 +1895,7 @@ void __ref remove_memory(int nid, u64 start, u64 size)
1854 * if this is not the case. 1895 * if this is not the case.
1855 */ 1896 */
1856 ret = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL, 1897 ret = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL,
1857 is_memblock_offlined_cb); 1898 check_memblock_offlined_cb);
1858 if (ret) { 1899 if (ret) {
1859 unlock_memory_hotplug(); 1900 unlock_memory_hotplug();
1860 BUG(); 1901 BUG();