aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory_hotplug.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory_hotplug.c')
-rw-r--r--mm/memory_hotplug.c112
1 files changed, 88 insertions, 24 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index ca1dd3aa5eee..0eb1a1df649d 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -30,6 +30,7 @@
30#include <linux/mm_inline.h> 30#include <linux/mm_inline.h>
31#include <linux/firmware-map.h> 31#include <linux/firmware-map.h>
32#include <linux/stop_machine.h> 32#include <linux/stop_machine.h>
33#include <linux/hugetlb.h>
33 34
34#include <asm/tlbflush.h> 35#include <asm/tlbflush.h>
35 36
@@ -194,7 +195,7 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat)
194 195
195 zone = &pgdat->node_zones[0]; 196 zone = &pgdat->node_zones[0];
196 for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) { 197 for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) {
197 if (zone->wait_table) { 198 if (zone_is_initialized(zone)) {
198 nr_pages = zone->wait_table_hash_nr_entries 199 nr_pages = zone->wait_table_hash_nr_entries
199 * sizeof(wait_queue_head_t); 200 * sizeof(wait_queue_head_t);
200 nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT; 201 nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT;
@@ -229,8 +230,8 @@ static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
229 230
230 zone_span_writelock(zone); 231 zone_span_writelock(zone);
231 232
232 old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; 233 old_zone_end_pfn = zone_end_pfn(zone);
233 if (!zone->spanned_pages || start_pfn < zone->zone_start_pfn) 234 if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn)
234 zone->zone_start_pfn = start_pfn; 235 zone->zone_start_pfn = start_pfn;
235 236
236 zone->spanned_pages = max(old_zone_end_pfn, end_pfn) - 237 zone->spanned_pages = max(old_zone_end_pfn, end_pfn) -
@@ -305,7 +306,7 @@ static int __meminit move_pfn_range_left(struct zone *z1, struct zone *z2,
305 goto out_fail; 306 goto out_fail;
306 307
307 /* use start_pfn for z1's start_pfn if z1 is empty */ 308 /* use start_pfn for z1's start_pfn if z1 is empty */
308 if (z1->spanned_pages) 309 if (!zone_is_empty(z1))
309 z1_start_pfn = z1->zone_start_pfn; 310 z1_start_pfn = z1->zone_start_pfn;
310 else 311 else
311 z1_start_pfn = start_pfn; 312 z1_start_pfn = start_pfn;
@@ -347,7 +348,7 @@ static int __meminit move_pfn_range_right(struct zone *z1, struct zone *z2,
347 goto out_fail; 348 goto out_fail;
348 349
349 /* use end_pfn for z2's end_pfn if z2 is empty */ 350 /* use end_pfn for z2's end_pfn if z2 is empty */
350 if (z2->spanned_pages) 351 if (!zone_is_empty(z2))
351 z2_end_pfn = zone_end_pfn(z2); 352 z2_end_pfn = zone_end_pfn(z2);
352 else 353 else
353 z2_end_pfn = end_pfn; 354 z2_end_pfn = end_pfn;
@@ -514,8 +515,9 @@ static int find_biggest_section_pfn(int nid, struct zone *zone,
514static void shrink_zone_span(struct zone *zone, unsigned long start_pfn, 515static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
515 unsigned long end_pfn) 516 unsigned long end_pfn)
516{ 517{
517 unsigned long zone_start_pfn = zone->zone_start_pfn; 518 unsigned long zone_start_pfn = zone->zone_start_pfn;
518 unsigned long zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; 519 unsigned long z = zone_end_pfn(zone); /* zone_end_pfn namespace clash */
520 unsigned long zone_end_pfn = z;
519 unsigned long pfn; 521 unsigned long pfn;
520 struct mem_section *ms; 522 struct mem_section *ms;
521 int nid = zone_to_nid(zone); 523 int nid = zone_to_nid(zone);
@@ -1069,6 +1071,23 @@ out:
1069 return ret; 1071 return ret;
1070} 1072}
1071 1073
1074static int check_hotplug_memory_range(u64 start, u64 size)
1075{
1076 u64 start_pfn = start >> PAGE_SHIFT;
1077 u64 nr_pages = size >> PAGE_SHIFT;
1078
1079 /* Memory range must be aligned with section */
1080 if ((start_pfn & ~PAGE_SECTION_MASK) ||
1081 (nr_pages % PAGES_PER_SECTION) || (!nr_pages)) {
1082 pr_err("Section-unaligned hotplug range: start 0x%llx, size 0x%llx\n",
1083 (unsigned long long)start,
1084 (unsigned long long)size);
1085 return -EINVAL;
1086 }
1087
1088 return 0;
1089}
1090
1072/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ 1091/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
1073int __ref add_memory(int nid, u64 start, u64 size) 1092int __ref add_memory(int nid, u64 start, u64 size)
1074{ 1093{
@@ -1078,6 +1097,10 @@ int __ref add_memory(int nid, u64 start, u64 size)
1078 struct resource *res; 1097 struct resource *res;
1079 int ret; 1098 int ret;
1080 1099
1100 ret = check_hotplug_memory_range(start, size);
1101 if (ret)
1102 return ret;
1103
1081 lock_memory_hotplug(); 1104 lock_memory_hotplug();
1082 1105
1083 res = register_memory_resource(start, size); 1106 res = register_memory_resource(start, size);
@@ -1208,10 +1231,12 @@ static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
1208} 1231}
1209 1232
1210/* 1233/*
1211 * Scanning pfn is much easier than scanning lru list. 1234 * Scan pfn range [start,end) to find movable/migratable pages (LRU pages
1212 * Scan pfn from start to end and Find LRU page. 1235 * and hugepages). We scan pfn because it's much easier than scanning over
1236 * linked list. This function returns the pfn of the first found movable
1237 * page if it's found, otherwise 0.
1213 */ 1238 */
1214static unsigned long scan_lru_pages(unsigned long start, unsigned long end) 1239static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
1215{ 1240{
1216 unsigned long pfn; 1241 unsigned long pfn;
1217 struct page *page; 1242 struct page *page;
@@ -1220,6 +1245,13 @@ static unsigned long scan_lru_pages(unsigned long start, unsigned long end)
1220 page = pfn_to_page(pfn); 1245 page = pfn_to_page(pfn);
1221 if (PageLRU(page)) 1246 if (PageLRU(page))
1222 return pfn; 1247 return pfn;
1248 if (PageHuge(page)) {
1249 if (is_hugepage_active(page))
1250 return pfn;
1251 else
1252 pfn = round_up(pfn + 1,
1253 1 << compound_order(page)) - 1;
1254 }
1223 } 1255 }
1224 } 1256 }
1225 return 0; 1257 return 0;
@@ -1240,6 +1272,19 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
1240 if (!pfn_valid(pfn)) 1272 if (!pfn_valid(pfn))
1241 continue; 1273 continue;
1242 page = pfn_to_page(pfn); 1274 page = pfn_to_page(pfn);
1275
1276 if (PageHuge(page)) {
1277 struct page *head = compound_head(page);
1278 pfn = page_to_pfn(head) + (1<<compound_order(head)) - 1;
1279 if (compound_order(head) > PFN_SECTION_SHIFT) {
1280 ret = -EBUSY;
1281 break;
1282 }
1283 if (isolate_huge_page(page, &source))
1284 move_pages -= 1 << compound_order(head);
1285 continue;
1286 }
1287
1243 if (!get_page_unless_zero(page)) 1288 if (!get_page_unless_zero(page))
1244 continue; 1289 continue;
1245 /* 1290 /*
@@ -1272,7 +1317,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
1272 } 1317 }
1273 if (!list_empty(&source)) { 1318 if (!list_empty(&source)) {
1274 if (not_managed) { 1319 if (not_managed) {
1275 putback_lru_pages(&source); 1320 putback_movable_pages(&source);
1276 goto out; 1321 goto out;
1277 } 1322 }
1278 1323
@@ -1283,7 +1328,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
1283 ret = migrate_pages(&source, alloc_migrate_target, 0, 1328 ret = migrate_pages(&source, alloc_migrate_target, 0,
1284 MIGRATE_SYNC, MR_MEMORY_HOTPLUG); 1329 MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
1285 if (ret) 1330 if (ret)
1286 putback_lru_pages(&source); 1331 putback_movable_pages(&source);
1287 } 1332 }
1288out: 1333out:
1289 return ret; 1334 return ret;
@@ -1472,7 +1517,6 @@ static int __ref __offline_pages(unsigned long start_pfn,
1472 struct zone *zone; 1517 struct zone *zone;
1473 struct memory_notify arg; 1518 struct memory_notify arg;
1474 1519
1475 BUG_ON(start_pfn >= end_pfn);
1476 /* at least, alignment against pageblock is necessary */ 1520 /* at least, alignment against pageblock is necessary */
1477 if (!IS_ALIGNED(start_pfn, pageblock_nr_pages)) 1521 if (!IS_ALIGNED(start_pfn, pageblock_nr_pages))
1478 return -EINVAL; 1522 return -EINVAL;
@@ -1527,8 +1571,8 @@ repeat:
1527 drain_all_pages(); 1571 drain_all_pages();
1528 } 1572 }
1529 1573
1530 pfn = scan_lru_pages(start_pfn, end_pfn); 1574 pfn = scan_movable_pages(start_pfn, end_pfn);
1531 if (pfn) { /* We have page on LRU */ 1575 if (pfn) { /* We have movable pages */
1532 ret = do_migrate_range(pfn, end_pfn); 1576 ret = do_migrate_range(pfn, end_pfn);
1533 if (!ret) { 1577 if (!ret) {
1534 drain = 1; 1578 drain = 1;
@@ -1547,6 +1591,11 @@ repeat:
1547 yield(); 1591 yield();
1548 /* drain pcp pages, this is synchronous. */ 1592 /* drain pcp pages, this is synchronous. */
1549 drain_all_pages(); 1593 drain_all_pages();
1594 /*
1595 * dissolve free hugepages in the memory block before doing offlining
1596 * actually in order to make hugetlbfs's object counting consistent.
1597 */
1598 dissolve_free_huge_pages(start_pfn, end_pfn);
1550 /* check again */ 1599 /* check again */
1551 offlined_pages = check_pages_isolated(start_pfn, end_pfn); 1600 offlined_pages = check_pages_isolated(start_pfn, end_pfn);
1552 if (offlined_pages < 0) { 1601 if (offlined_pages < 0) {
@@ -1674,9 +1723,8 @@ static int is_memblock_offlined_cb(struct memory_block *mem, void *arg)
1674 return ret; 1723 return ret;
1675} 1724}
1676 1725
1677static int check_cpu_on_node(void *data) 1726static int check_cpu_on_node(pg_data_t *pgdat)
1678{ 1727{
1679 struct pglist_data *pgdat = data;
1680 int cpu; 1728 int cpu;
1681 1729
1682 for_each_present_cpu(cpu) { 1730 for_each_present_cpu(cpu) {
@@ -1691,10 +1739,9 @@ static int check_cpu_on_node(void *data)
1691 return 0; 1739 return 0;
1692} 1740}
1693 1741
1694static void unmap_cpu_on_node(void *data) 1742static void unmap_cpu_on_node(pg_data_t *pgdat)
1695{ 1743{
1696#ifdef CONFIG_ACPI_NUMA 1744#ifdef CONFIG_ACPI_NUMA
1697 struct pglist_data *pgdat = data;
1698 int cpu; 1745 int cpu;
1699 1746
1700 for_each_possible_cpu(cpu) 1747 for_each_possible_cpu(cpu)
@@ -1703,10 +1750,11 @@ static void unmap_cpu_on_node(void *data)
1703#endif 1750#endif
1704} 1751}
1705 1752
1706static int check_and_unmap_cpu_on_node(void *data) 1753static int check_and_unmap_cpu_on_node(pg_data_t *pgdat)
1707{ 1754{
1708 int ret = check_cpu_on_node(data); 1755 int ret;
1709 1756
1757 ret = check_cpu_on_node(pgdat);
1710 if (ret) 1758 if (ret)
1711 return ret; 1759 return ret;
1712 1760
@@ -1715,11 +1763,18 @@ static int check_and_unmap_cpu_on_node(void *data)
1715 * the cpu_to_node() now. 1763 * the cpu_to_node() now.
1716 */ 1764 */
1717 1765
1718 unmap_cpu_on_node(data); 1766 unmap_cpu_on_node(pgdat);
1719 return 0; 1767 return 0;
1720} 1768}
1721 1769
1722/* offline the node if all memory sections of this node are removed */ 1770/**
1771 * try_offline_node
1772 *
1773 * Offline a node if all memory sections and cpus of the node are removed.
1774 *
1775 * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
1776 * and online/offline operations before this call.
1777 */
1723void try_offline_node(int nid) 1778void try_offline_node(int nid)
1724{ 1779{
1725 pg_data_t *pgdat = NODE_DATA(nid); 1780 pg_data_t *pgdat = NODE_DATA(nid);
@@ -1745,7 +1800,7 @@ void try_offline_node(int nid)
1745 return; 1800 return;
1746 } 1801 }
1747 1802
1748 if (stop_machine(check_and_unmap_cpu_on_node, pgdat, NULL)) 1803 if (check_and_unmap_cpu_on_node(pgdat))
1749 return; 1804 return;
1750 1805
1751 /* 1806 /*
@@ -1782,10 +1837,19 @@ void try_offline_node(int nid)
1782} 1837}
1783EXPORT_SYMBOL(try_offline_node); 1838EXPORT_SYMBOL(try_offline_node);
1784 1839
1840/**
1841 * remove_memory
1842 *
1843 * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
1844 * and online/offline operations before this call, as required by
1845 * try_offline_node().
1846 */
1785void __ref remove_memory(int nid, u64 start, u64 size) 1847void __ref remove_memory(int nid, u64 start, u64 size)
1786{ 1848{
1787 int ret; 1849 int ret;
1788 1850
1851 BUG_ON(check_hotplug_memory_range(start, size));
1852
1789 lock_memory_hotplug(); 1853 lock_memory_hotplug();
1790 1854
1791 /* 1855 /*