aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory_hotplug.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory_hotplug.c')
-rw-r--r--mm/memory_hotplug.c172
1 files changed, 85 insertions, 87 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 2b2b3ccbbfb5..b9a667d36c55 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -34,6 +34,7 @@
34#include <linux/hugetlb.h> 34#include <linux/hugetlb.h>
35#include <linux/memblock.h> 35#include <linux/memblock.h>
36#include <linux/compaction.h> 36#include <linux/compaction.h>
37#include <linux/rmap.h>
37 38
38#include <asm/tlbflush.h> 39#include <asm/tlbflush.h>
39 40
@@ -253,7 +254,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
253 if (pfn_valid(phys_start_pfn)) 254 if (pfn_valid(phys_start_pfn))
254 return -EEXIST; 255 return -EEXIST;
255 256
256 ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap); 257 ret = sparse_add_one_section(nid, phys_start_pfn, altmap);
257 if (ret < 0) 258 if (ret < 0)
258 return ret; 259 return ret;
259 260
@@ -743,14 +744,13 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
743 int nid = pgdat->node_id; 744 int nid = pgdat->node_id;
744 unsigned long flags; 745 unsigned long flags;
745 746
746 if (zone_is_empty(zone))
747 init_currently_empty_zone(zone, start_pfn, nr_pages);
748
749 clear_zone_contiguous(zone); 747 clear_zone_contiguous(zone);
750 748
751 /* TODO Huh pgdat is irqsave while zone is not. It used to be like that before */ 749 /* TODO Huh pgdat is irqsave while zone is not. It used to be like that before */
752 pgdat_resize_lock(pgdat, &flags); 750 pgdat_resize_lock(pgdat, &flags);
753 zone_span_writelock(zone); 751 zone_span_writelock(zone);
752 if (zone_is_empty(zone))
753 init_currently_empty_zone(zone, start_pfn, nr_pages);
754 resize_zone_range(zone, start_pfn, nr_pages); 754 resize_zone_range(zone, start_pfn, nr_pages);
755 zone_span_writeunlock(zone); 755 zone_span_writeunlock(zone);
756 resize_pgdat_range(pgdat, start_pfn, nr_pages); 756 resize_pgdat_range(pgdat, start_pfn, nr_pages);
@@ -1078,7 +1078,7 @@ static int online_memory_block(struct memory_block *mem, void *arg)
1078 * 1078 *
1079 * we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG 1079 * we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG
1080 */ 1080 */
1081int __ref add_memory_resource(int nid, struct resource *res, bool online) 1081int __ref add_memory_resource(int nid, struct resource *res)
1082{ 1082{
1083 u64 start, size; 1083 u64 start, size;
1084 bool new_node = false; 1084 bool new_node = false;
@@ -1133,7 +1133,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
1133 mem_hotplug_done(); 1133 mem_hotplug_done();
1134 1134
1135 /* online pages if requested */ 1135 /* online pages if requested */
1136 if (online) 1136 if (memhp_auto_online)
1137 walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), 1137 walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1),
1138 NULL, online_memory_block); 1138 NULL, online_memory_block);
1139 1139
@@ -1157,7 +1157,7 @@ int __ref __add_memory(int nid, u64 start, u64 size)
1157 if (IS_ERR(res)) 1157 if (IS_ERR(res))
1158 return PTR_ERR(res); 1158 return PTR_ERR(res);
1159 1159
1160 ret = add_memory_resource(nid, res, memhp_auto_online); 1160 ret = add_memory_resource(nid, res);
1161 if (ret < 0) 1161 if (ret < 0)
1162 release_memory_resource(res); 1162 release_memory_resource(res);
1163 return ret; 1163 return ret;
@@ -1226,7 +1226,7 @@ static bool is_pageblock_removable_nolock(struct page *page)
1226 if (!zone_spans_pfn(zone, pfn)) 1226 if (!zone_spans_pfn(zone, pfn))
1227 return false; 1227 return false;
1228 1228
1229 return !has_unmovable_pages(zone, page, 0, MIGRATE_MOVABLE, true); 1229 return !has_unmovable_pages(zone, page, 0, MIGRATE_MOVABLE, SKIP_HWPOISON);
1230} 1230}
1231 1231
1232/* Checks if this range of memory is likely to be hot-removable. */ 1232/* Checks if this range of memory is likely to be hot-removable. */
@@ -1339,18 +1339,16 @@ static struct page *new_node_page(struct page *page, unsigned long private)
1339 return new_page_nodemask(page, nid, &nmask); 1339 return new_page_nodemask(page, nid, &nmask);
1340} 1340}
1341 1341
1342#define NR_OFFLINE_AT_ONCE_PAGES (256)
1343static int 1342static int
1344do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) 1343do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
1345{ 1344{
1346 unsigned long pfn; 1345 unsigned long pfn;
1347 struct page *page; 1346 struct page *page;
1348 int move_pages = NR_OFFLINE_AT_ONCE_PAGES;
1349 int not_managed = 0; 1347 int not_managed = 0;
1350 int ret = 0; 1348 int ret = 0;
1351 LIST_HEAD(source); 1349 LIST_HEAD(source);
1352 1350
1353 for (pfn = start_pfn; pfn < end_pfn && move_pages > 0; pfn++) { 1351 for (pfn = start_pfn; pfn < end_pfn; pfn++) {
1354 if (!pfn_valid(pfn)) 1352 if (!pfn_valid(pfn))
1355 continue; 1353 continue;
1356 page = pfn_to_page(pfn); 1354 page = pfn_to_page(pfn);
@@ -1362,13 +1360,27 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
1362 ret = -EBUSY; 1360 ret = -EBUSY;
1363 break; 1361 break;
1364 } 1362 }
1365 if (isolate_huge_page(page, &source)) 1363 isolate_huge_page(page, &source);
1366 move_pages -= 1 << compound_order(head);
1367 continue; 1364 continue;
1368 } else if (PageTransHuge(page)) 1365 } else if (PageTransHuge(page))
1369 pfn = page_to_pfn(compound_head(page)) 1366 pfn = page_to_pfn(compound_head(page))
1370 + hpage_nr_pages(page) - 1; 1367 + hpage_nr_pages(page) - 1;
1371 1368
1369 /*
1370 * HWPoison pages have elevated reference counts so the migration would
1371 * fail on them. It also doesn't make any sense to migrate them in the
1372 * first place. Still try to unmap such a page in case it is still mapped
1373 * (e.g. current hwpoison implementation doesn't unmap KSM pages but keep
1374 * the unmap as the catch all safety net).
1375 */
1376 if (PageHWPoison(page)) {
1377 if (WARN_ON(PageLRU(page)))
1378 isolate_lru_page(page);
1379 if (page_mapped(page))
1380 try_to_unmap(page, TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS);
1381 continue;
1382 }
1383
1372 if (!get_page_unless_zero(page)) 1384 if (!get_page_unless_zero(page))
1373 continue; 1385 continue;
1374 /* 1386 /*
@@ -1382,16 +1394,13 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
1382 if (!ret) { /* Success */ 1394 if (!ret) { /* Success */
1383 put_page(page); 1395 put_page(page);
1384 list_add_tail(&page->lru, &source); 1396 list_add_tail(&page->lru, &source);
1385 move_pages--;
1386 if (!__PageMovable(page)) 1397 if (!__PageMovable(page))
1387 inc_node_page_state(page, NR_ISOLATED_ANON + 1398 inc_node_page_state(page, NR_ISOLATED_ANON +
1388 page_is_file_cache(page)); 1399 page_is_file_cache(page));
1389 1400
1390 } else { 1401 } else {
1391#ifdef CONFIG_DEBUG_VM 1402 pr_warn("failed to isolate pfn %lx\n", pfn);
1392 pr_alert("failed to isolate pfn %lx\n", pfn);
1393 dump_page(page, "isolation failed"); 1403 dump_page(page, "isolation failed");
1394#endif
1395 put_page(page); 1404 put_page(page);
1396 /* Because we don't have big zone->lock. we should 1405 /* Because we don't have big zone->lock. we should
1397 check this again here. */ 1406 check this again here. */
@@ -1411,8 +1420,14 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
1411 /* Allocate a new page from the nearest neighbor node */ 1420 /* Allocate a new page from the nearest neighbor node */
1412 ret = migrate_pages(&source, new_node_page, NULL, 0, 1421 ret = migrate_pages(&source, new_node_page, NULL, 0,
1413 MIGRATE_SYNC, MR_MEMORY_HOTPLUG); 1422 MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
1414 if (ret) 1423 if (ret) {
1424 list_for_each_entry(page, &source, lru) {
1425 pr_warn("migrating pfn %lx failed ret:%d ",
1426 page_to_pfn(page), ret);
1427 dump_page(page, "migration failure");
1428 }
1415 putback_movable_pages(&source); 1429 putback_movable_pages(&source);
1430 }
1416 } 1431 }
1417out: 1432out:
1418 return ret; 1433 return ret;
@@ -1553,12 +1568,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
1553 unsigned long valid_start, valid_end; 1568 unsigned long valid_start, valid_end;
1554 struct zone *zone; 1569 struct zone *zone;
1555 struct memory_notify arg; 1570 struct memory_notify arg;
1556 1571 char *reason;
1557 /* at least, alignment against pageblock is necessary */
1558 if (!IS_ALIGNED(start_pfn, pageblock_nr_pages))
1559 return -EINVAL;
1560 if (!IS_ALIGNED(end_pfn, pageblock_nr_pages))
1561 return -EINVAL;
1562 1572
1563 mem_hotplug_begin(); 1573 mem_hotplug_begin();
1564 1574
@@ -1567,7 +1577,9 @@ static int __ref __offline_pages(unsigned long start_pfn,
1567 if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, 1577 if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start,
1568 &valid_end)) { 1578 &valid_end)) {
1569 mem_hotplug_done(); 1579 mem_hotplug_done();
1570 return -EINVAL; 1580 ret = -EINVAL;
1581 reason = "multizone range";
1582 goto failed_removal;
1571 } 1583 }
1572 1584
1573 zone = page_zone(pfn_to_page(valid_start)); 1585 zone = page_zone(pfn_to_page(valid_start));
@@ -1576,10 +1588,12 @@ static int __ref __offline_pages(unsigned long start_pfn,
1576 1588
1577 /* set above range as isolated */ 1589 /* set above range as isolated */
1578 ret = start_isolate_page_range(start_pfn, end_pfn, 1590 ret = start_isolate_page_range(start_pfn, end_pfn,
1579 MIGRATE_MOVABLE, true); 1591 MIGRATE_MOVABLE,
1592 SKIP_HWPOISON | REPORT_FAILURE);
1580 if (ret) { 1593 if (ret) {
1581 mem_hotplug_done(); 1594 mem_hotplug_done();
1582 return ret; 1595 reason = "failure to isolate range";
1596 goto failed_removal;
1583 } 1597 }
1584 1598
1585 arg.start_pfn = start_pfn; 1599 arg.start_pfn = start_pfn;
@@ -1588,37 +1602,47 @@ static int __ref __offline_pages(unsigned long start_pfn,
1588 1602
1589 ret = memory_notify(MEM_GOING_OFFLINE, &arg); 1603 ret = memory_notify(MEM_GOING_OFFLINE, &arg);
1590 ret = notifier_to_errno(ret); 1604 ret = notifier_to_errno(ret);
1591 if (ret) 1605 if (ret) {
1592 goto failed_removal; 1606 reason = "notifier failure";
1607 goto failed_removal_isolated;
1608 }
1593 1609
1594 pfn = start_pfn; 1610 do {
1595repeat: 1611 for (pfn = start_pfn; pfn;) {
1596 /* start memory hot removal */ 1612 if (signal_pending(current)) {
1597 ret = -EINTR; 1613 ret = -EINTR;
1598 if (signal_pending(current)) 1614 reason = "signal backoff";
1599 goto failed_removal; 1615 goto failed_removal_isolated;
1616 }
1600 1617
1601 cond_resched(); 1618 cond_resched();
1602 lru_add_drain_all(); 1619 lru_add_drain_all();
1603 drain_all_pages(zone); 1620 drain_all_pages(zone);
1621
1622 pfn = scan_movable_pages(pfn, end_pfn);
1623 if (pfn) {
1624 /*
1625 * TODO: fatal migration failures should bail
1626 * out
1627 */
1628 do_migrate_range(pfn, end_pfn);
1629 }
1630 }
1604 1631
1605 pfn = scan_movable_pages(start_pfn, end_pfn); 1632 /*
1606 if (pfn) { /* We have movable pages */ 1633 * Dissolve free hugepages in the memory block before doing
1607 ret = do_migrate_range(pfn, end_pfn); 1634 * offlining actually in order to make hugetlbfs's object
1608 goto repeat; 1635 * counting consistent.
1609 } 1636 */
1637 ret = dissolve_free_huge_pages(start_pfn, end_pfn);
1638 if (ret) {
1639 reason = "failure to dissolve huge pages";
1640 goto failed_removal_isolated;
1641 }
1642 /* check again */
1643 offlined_pages = check_pages_isolated(start_pfn, end_pfn);
1644 } while (offlined_pages < 0);
1610 1645
1611 /*
1612 * dissolve free hugepages in the memory block before doing offlining
1613 * actually in order to make hugetlbfs's object counting consistent.
1614 */
1615 ret = dissolve_free_huge_pages(start_pfn, end_pfn);
1616 if (ret)
1617 goto failed_removal;
1618 /* check again */
1619 offlined_pages = check_pages_isolated(start_pfn, end_pfn);
1620 if (offlined_pages < 0)
1621 goto repeat;
1622 pr_info("Offlined Pages %ld\n", offlined_pages); 1646 pr_info("Offlined Pages %ld\n", offlined_pages);
1623 /* Ok, all of our target is isolated. 1647 /* Ok, all of our target is isolated.
1624 We cannot do rollback at this point. */ 1648 We cannot do rollback at this point. */
@@ -1654,13 +1678,15 @@ repeat:
1654 mem_hotplug_done(); 1678 mem_hotplug_done();
1655 return 0; 1679 return 0;
1656 1680
1681failed_removal_isolated:
1682 undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
1657failed_removal: 1683failed_removal:
1658 pr_debug("memory offlining [mem %#010llx-%#010llx] failed\n", 1684 pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s\n",
1659 (unsigned long long) start_pfn << PAGE_SHIFT, 1685 (unsigned long long) start_pfn << PAGE_SHIFT,
1660 ((unsigned long long) end_pfn << PAGE_SHIFT) - 1); 1686 ((unsigned long long) end_pfn << PAGE_SHIFT) - 1,
1687 reason);
1661 memory_notify(MEM_CANCEL_OFFLINE, &arg); 1688 memory_notify(MEM_CANCEL_OFFLINE, &arg);
1662 /* pushback to free area */ 1689 /* pushback to free area */
1663 undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
1664 mem_hotplug_done(); 1690 mem_hotplug_done();
1665 return ret; 1691 return ret;
1666} 1692}
@@ -1753,34 +1779,6 @@ static int check_cpu_on_node(pg_data_t *pgdat)
1753 return 0; 1779 return 0;
1754} 1780}
1755 1781
1756static void unmap_cpu_on_node(pg_data_t *pgdat)
1757{
1758#ifdef CONFIG_ACPI_NUMA
1759 int cpu;
1760
1761 for_each_possible_cpu(cpu)
1762 if (cpu_to_node(cpu) == pgdat->node_id)
1763 numa_clear_node(cpu);
1764#endif
1765}
1766
1767static int check_and_unmap_cpu_on_node(pg_data_t *pgdat)
1768{
1769 int ret;
1770
1771 ret = check_cpu_on_node(pgdat);
1772 if (ret)
1773 return ret;
1774
1775 /*
1776 * the node will be offlined when we come here, so we can clear
1777 * the cpu_to_node() now.
1778 */
1779
1780 unmap_cpu_on_node(pgdat);
1781 return 0;
1782}
1783
1784/** 1782/**
1785 * try_offline_node 1783 * try_offline_node
1786 * @nid: the node ID 1784 * @nid: the node ID
@@ -1813,7 +1811,7 @@ void try_offline_node(int nid)
1813 return; 1811 return;
1814 } 1812 }
1815 1813
1816 if (check_and_unmap_cpu_on_node(pgdat)) 1814 if (check_cpu_on_node(pgdat))
1817 return; 1815 return;
1818 1816
1819 /* 1817 /*
@@ -1858,7 +1856,7 @@ void __ref __remove_memory(int nid, u64 start, u64 size)
1858 memblock_free(start, size); 1856 memblock_free(start, size);
1859 memblock_remove(start, size); 1857 memblock_remove(start, size);
1860 1858
1861 arch_remove_memory(start, size, NULL); 1859 arch_remove_memory(nid, start, size, NULL);
1862 1860
1863 try_offline_node(nid); 1861 try_offline_node(nid);
1864 1862