diff options
Diffstat (limited to 'mm/memory_hotplug.c')
| -rw-r--r-- | mm/memory_hotplug.c | 172 |
1 files changed, 85 insertions, 87 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 2b2b3ccbbfb5..b9a667d36c55 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
| @@ -34,6 +34,7 @@ | |||
| 34 | #include <linux/hugetlb.h> | 34 | #include <linux/hugetlb.h> |
| 35 | #include <linux/memblock.h> | 35 | #include <linux/memblock.h> |
| 36 | #include <linux/compaction.h> | 36 | #include <linux/compaction.h> |
| 37 | #include <linux/rmap.h> | ||
| 37 | 38 | ||
| 38 | #include <asm/tlbflush.h> | 39 | #include <asm/tlbflush.h> |
| 39 | 40 | ||
| @@ -253,7 +254,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, | |||
| 253 | if (pfn_valid(phys_start_pfn)) | 254 | if (pfn_valid(phys_start_pfn)) |
| 254 | return -EEXIST; | 255 | return -EEXIST; |
| 255 | 256 | ||
| 256 | ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap); | 257 | ret = sparse_add_one_section(nid, phys_start_pfn, altmap); |
| 257 | if (ret < 0) | 258 | if (ret < 0) |
| 258 | return ret; | 259 | return ret; |
| 259 | 260 | ||
| @@ -743,14 +744,13 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, | |||
| 743 | int nid = pgdat->node_id; | 744 | int nid = pgdat->node_id; |
| 744 | unsigned long flags; | 745 | unsigned long flags; |
| 745 | 746 | ||
| 746 | if (zone_is_empty(zone)) | ||
| 747 | init_currently_empty_zone(zone, start_pfn, nr_pages); | ||
| 748 | |||
| 749 | clear_zone_contiguous(zone); | 747 | clear_zone_contiguous(zone); |
| 750 | 748 | ||
| 751 | /* TODO Huh pgdat is irqsave while zone is not. It used to be like that before */ | 749 | /* TODO Huh pgdat is irqsave while zone is not. It used to be like that before */ |
| 752 | pgdat_resize_lock(pgdat, &flags); | 750 | pgdat_resize_lock(pgdat, &flags); |
| 753 | zone_span_writelock(zone); | 751 | zone_span_writelock(zone); |
| 752 | if (zone_is_empty(zone)) | ||
| 753 | init_currently_empty_zone(zone, start_pfn, nr_pages); | ||
| 754 | resize_zone_range(zone, start_pfn, nr_pages); | 754 | resize_zone_range(zone, start_pfn, nr_pages); |
| 755 | zone_span_writeunlock(zone); | 755 | zone_span_writeunlock(zone); |
| 756 | resize_pgdat_range(pgdat, start_pfn, nr_pages); | 756 | resize_pgdat_range(pgdat, start_pfn, nr_pages); |
| @@ -1078,7 +1078,7 @@ static int online_memory_block(struct memory_block *mem, void *arg) | |||
| 1078 | * | 1078 | * |
| 1079 | * we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG | 1079 | * we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG |
| 1080 | */ | 1080 | */ |
| 1081 | int __ref add_memory_resource(int nid, struct resource *res, bool online) | 1081 | int __ref add_memory_resource(int nid, struct resource *res) |
| 1082 | { | 1082 | { |
| 1083 | u64 start, size; | 1083 | u64 start, size; |
| 1084 | bool new_node = false; | 1084 | bool new_node = false; |
| @@ -1133,7 +1133,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online) | |||
| 1133 | mem_hotplug_done(); | 1133 | mem_hotplug_done(); |
| 1134 | 1134 | ||
| 1135 | /* online pages if requested */ | 1135 | /* online pages if requested */ |
| 1136 | if (online) | 1136 | if (memhp_auto_online) |
| 1137 | walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), | 1137 | walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), |
| 1138 | NULL, online_memory_block); | 1138 | NULL, online_memory_block); |
| 1139 | 1139 | ||
| @@ -1157,7 +1157,7 @@ int __ref __add_memory(int nid, u64 start, u64 size) | |||
| 1157 | if (IS_ERR(res)) | 1157 | if (IS_ERR(res)) |
| 1158 | return PTR_ERR(res); | 1158 | return PTR_ERR(res); |
| 1159 | 1159 | ||
| 1160 | ret = add_memory_resource(nid, res, memhp_auto_online); | 1160 | ret = add_memory_resource(nid, res); |
| 1161 | if (ret < 0) | 1161 | if (ret < 0) |
| 1162 | release_memory_resource(res); | 1162 | release_memory_resource(res); |
| 1163 | return ret; | 1163 | return ret; |
| @@ -1226,7 +1226,7 @@ static bool is_pageblock_removable_nolock(struct page *page) | |||
| 1226 | if (!zone_spans_pfn(zone, pfn)) | 1226 | if (!zone_spans_pfn(zone, pfn)) |
| 1227 | return false; | 1227 | return false; |
| 1228 | 1228 | ||
| 1229 | return !has_unmovable_pages(zone, page, 0, MIGRATE_MOVABLE, true); | 1229 | return !has_unmovable_pages(zone, page, 0, MIGRATE_MOVABLE, SKIP_HWPOISON); |
| 1230 | } | 1230 | } |
| 1231 | 1231 | ||
| 1232 | /* Checks if this range of memory is likely to be hot-removable. */ | 1232 | /* Checks if this range of memory is likely to be hot-removable. */ |
| @@ -1339,18 +1339,16 @@ static struct page *new_node_page(struct page *page, unsigned long private) | |||
| 1339 | return new_page_nodemask(page, nid, &nmask); | 1339 | return new_page_nodemask(page, nid, &nmask); |
| 1340 | } | 1340 | } |
| 1341 | 1341 | ||
| 1342 | #define NR_OFFLINE_AT_ONCE_PAGES (256) | ||
| 1343 | static int | 1342 | static int |
| 1344 | do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | 1343 | do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) |
| 1345 | { | 1344 | { |
| 1346 | unsigned long pfn; | 1345 | unsigned long pfn; |
| 1347 | struct page *page; | 1346 | struct page *page; |
| 1348 | int move_pages = NR_OFFLINE_AT_ONCE_PAGES; | ||
| 1349 | int not_managed = 0; | 1347 | int not_managed = 0; |
| 1350 | int ret = 0; | 1348 | int ret = 0; |
| 1351 | LIST_HEAD(source); | 1349 | LIST_HEAD(source); |
| 1352 | 1350 | ||
| 1353 | for (pfn = start_pfn; pfn < end_pfn && move_pages > 0; pfn++) { | 1351 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { |
| 1354 | if (!pfn_valid(pfn)) | 1352 | if (!pfn_valid(pfn)) |
| 1355 | continue; | 1353 | continue; |
| 1356 | page = pfn_to_page(pfn); | 1354 | page = pfn_to_page(pfn); |
| @@ -1362,13 +1360,27 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | |||
| 1362 | ret = -EBUSY; | 1360 | ret = -EBUSY; |
| 1363 | break; | 1361 | break; |
| 1364 | } | 1362 | } |
| 1365 | if (isolate_huge_page(page, &source)) | 1363 | isolate_huge_page(page, &source); |
| 1366 | move_pages -= 1 << compound_order(head); | ||
| 1367 | continue; | 1364 | continue; |
| 1368 | } else if (PageTransHuge(page)) | 1365 | } else if (PageTransHuge(page)) |
| 1369 | pfn = page_to_pfn(compound_head(page)) | 1366 | pfn = page_to_pfn(compound_head(page)) |
| 1370 | + hpage_nr_pages(page) - 1; | 1367 | + hpage_nr_pages(page) - 1; |
| 1371 | 1368 | ||
| 1369 | /* | ||
| 1370 | * HWPoison pages have elevated reference counts so the migration would | ||
| 1371 | * fail on them. It also doesn't make any sense to migrate them in the | ||
| 1372 | * first place. Still try to unmap such a page in case it is still mapped | ||
| 1373 | * (e.g. current hwpoison implementation doesn't unmap KSM pages but keep | ||
| 1374 | * the unmap as the catch all safety net). | ||
| 1375 | */ | ||
| 1376 | if (PageHWPoison(page)) { | ||
| 1377 | if (WARN_ON(PageLRU(page))) | ||
| 1378 | isolate_lru_page(page); | ||
| 1379 | if (page_mapped(page)) | ||
| 1380 | try_to_unmap(page, TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS); | ||
| 1381 | continue; | ||
| 1382 | } | ||
| 1383 | |||
| 1372 | if (!get_page_unless_zero(page)) | 1384 | if (!get_page_unless_zero(page)) |
| 1373 | continue; | 1385 | continue; |
| 1374 | /* | 1386 | /* |
| @@ -1382,16 +1394,13 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | |||
| 1382 | if (!ret) { /* Success */ | 1394 | if (!ret) { /* Success */ |
| 1383 | put_page(page); | 1395 | put_page(page); |
| 1384 | list_add_tail(&page->lru, &source); | 1396 | list_add_tail(&page->lru, &source); |
| 1385 | move_pages--; | ||
| 1386 | if (!__PageMovable(page)) | 1397 | if (!__PageMovable(page)) |
| 1387 | inc_node_page_state(page, NR_ISOLATED_ANON + | 1398 | inc_node_page_state(page, NR_ISOLATED_ANON + |
| 1388 | page_is_file_cache(page)); | 1399 | page_is_file_cache(page)); |
| 1389 | 1400 | ||
| 1390 | } else { | 1401 | } else { |
| 1391 | #ifdef CONFIG_DEBUG_VM | 1402 | pr_warn("failed to isolate pfn %lx\n", pfn); |
| 1392 | pr_alert("failed to isolate pfn %lx\n", pfn); | ||
| 1393 | dump_page(page, "isolation failed"); | 1403 | dump_page(page, "isolation failed"); |
| 1394 | #endif | ||
| 1395 | put_page(page); | 1404 | put_page(page); |
| 1396 | /* Because we don't have big zone->lock. we should | 1405 | /* Because we don't have big zone->lock. we should |
| 1397 | check this again here. */ | 1406 | check this again here. */ |
| @@ -1411,8 +1420,14 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | |||
| 1411 | /* Allocate a new page from the nearest neighbor node */ | 1420 | /* Allocate a new page from the nearest neighbor node */ |
| 1412 | ret = migrate_pages(&source, new_node_page, NULL, 0, | 1421 | ret = migrate_pages(&source, new_node_page, NULL, 0, |
| 1413 | MIGRATE_SYNC, MR_MEMORY_HOTPLUG); | 1422 | MIGRATE_SYNC, MR_MEMORY_HOTPLUG); |
| 1414 | if (ret) | 1423 | if (ret) { |
| 1424 | list_for_each_entry(page, &source, lru) { | ||
| 1425 | pr_warn("migrating pfn %lx failed ret:%d ", | ||
| 1426 | page_to_pfn(page), ret); | ||
| 1427 | dump_page(page, "migration failure"); | ||
| 1428 | } | ||
| 1415 | putback_movable_pages(&source); | 1429 | putback_movable_pages(&source); |
| 1430 | } | ||
| 1416 | } | 1431 | } |
| 1417 | out: | 1432 | out: |
| 1418 | return ret; | 1433 | return ret; |
| @@ -1553,12 +1568,7 @@ static int __ref __offline_pages(unsigned long start_pfn, | |||
| 1553 | unsigned long valid_start, valid_end; | 1568 | unsigned long valid_start, valid_end; |
| 1554 | struct zone *zone; | 1569 | struct zone *zone; |
| 1555 | struct memory_notify arg; | 1570 | struct memory_notify arg; |
| 1556 | 1571 | char *reason; | |
| 1557 | /* at least, alignment against pageblock is necessary */ | ||
| 1558 | if (!IS_ALIGNED(start_pfn, pageblock_nr_pages)) | ||
| 1559 | return -EINVAL; | ||
| 1560 | if (!IS_ALIGNED(end_pfn, pageblock_nr_pages)) | ||
| 1561 | return -EINVAL; | ||
| 1562 | 1572 | ||
| 1563 | mem_hotplug_begin(); | 1573 | mem_hotplug_begin(); |
| 1564 | 1574 | ||
| @@ -1567,7 +1577,9 @@ static int __ref __offline_pages(unsigned long start_pfn, | |||
| 1567 | if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, | 1577 | if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, |
| 1568 | &valid_end)) { | 1578 | &valid_end)) { |
| 1569 | mem_hotplug_done(); | 1579 | mem_hotplug_done(); |
| 1570 | return -EINVAL; | 1580 | ret = -EINVAL; |
| 1581 | reason = "multizone range"; | ||
| 1582 | goto failed_removal; | ||
| 1571 | } | 1583 | } |
| 1572 | 1584 | ||
| 1573 | zone = page_zone(pfn_to_page(valid_start)); | 1585 | zone = page_zone(pfn_to_page(valid_start)); |
| @@ -1576,10 +1588,12 @@ static int __ref __offline_pages(unsigned long start_pfn, | |||
| 1576 | 1588 | ||
| 1577 | /* set above range as isolated */ | 1589 | /* set above range as isolated */ |
| 1578 | ret = start_isolate_page_range(start_pfn, end_pfn, | 1590 | ret = start_isolate_page_range(start_pfn, end_pfn, |
| 1579 | MIGRATE_MOVABLE, true); | 1591 | MIGRATE_MOVABLE, |
| 1592 | SKIP_HWPOISON | REPORT_FAILURE); | ||
| 1580 | if (ret) { | 1593 | if (ret) { |
| 1581 | mem_hotplug_done(); | 1594 | mem_hotplug_done(); |
| 1582 | return ret; | 1595 | reason = "failure to isolate range"; |
| 1596 | goto failed_removal; | ||
| 1583 | } | 1597 | } |
| 1584 | 1598 | ||
| 1585 | arg.start_pfn = start_pfn; | 1599 | arg.start_pfn = start_pfn; |
| @@ -1588,37 +1602,47 @@ static int __ref __offline_pages(unsigned long start_pfn, | |||
| 1588 | 1602 | ||
| 1589 | ret = memory_notify(MEM_GOING_OFFLINE, &arg); | 1603 | ret = memory_notify(MEM_GOING_OFFLINE, &arg); |
| 1590 | ret = notifier_to_errno(ret); | 1604 | ret = notifier_to_errno(ret); |
| 1591 | if (ret) | 1605 | if (ret) { |
| 1592 | goto failed_removal; | 1606 | reason = "notifier failure"; |
| 1607 | goto failed_removal_isolated; | ||
| 1608 | } | ||
| 1593 | 1609 | ||
| 1594 | pfn = start_pfn; | 1610 | do { |
| 1595 | repeat: | 1611 | for (pfn = start_pfn; pfn;) { |
| 1596 | /* start memory hot removal */ | 1612 | if (signal_pending(current)) { |
| 1597 | ret = -EINTR; | 1613 | ret = -EINTR; |
| 1598 | if (signal_pending(current)) | 1614 | reason = "signal backoff"; |
| 1599 | goto failed_removal; | 1615 | goto failed_removal_isolated; |
| 1616 | } | ||
| 1600 | 1617 | ||
| 1601 | cond_resched(); | 1618 | cond_resched(); |
| 1602 | lru_add_drain_all(); | 1619 | lru_add_drain_all(); |
| 1603 | drain_all_pages(zone); | 1620 | drain_all_pages(zone); |
| 1621 | |||
| 1622 | pfn = scan_movable_pages(pfn, end_pfn); | ||
| 1623 | if (pfn) { | ||
| 1624 | /* | ||
| 1625 | * TODO: fatal migration failures should bail | ||
| 1626 | * out | ||
| 1627 | */ | ||
| 1628 | do_migrate_range(pfn, end_pfn); | ||
| 1629 | } | ||
| 1630 | } | ||
| 1604 | 1631 | ||
| 1605 | pfn = scan_movable_pages(start_pfn, end_pfn); | 1632 | /* |
| 1606 | if (pfn) { /* We have movable pages */ | 1633 | * Dissolve free hugepages in the memory block before doing |
| 1607 | ret = do_migrate_range(pfn, end_pfn); | 1634 | * offlining actually in order to make hugetlbfs's object |
| 1608 | goto repeat; | 1635 | * counting consistent. |
| 1609 | } | 1636 | */ |
| 1637 | ret = dissolve_free_huge_pages(start_pfn, end_pfn); | ||
| 1638 | if (ret) { | ||
| 1639 | reason = "failure to dissolve huge pages"; | ||
| 1640 | goto failed_removal_isolated; | ||
| 1641 | } | ||
| 1642 | /* check again */ | ||
| 1643 | offlined_pages = check_pages_isolated(start_pfn, end_pfn); | ||
| 1644 | } while (offlined_pages < 0); | ||
| 1610 | 1645 | ||
| 1611 | /* | ||
| 1612 | * dissolve free hugepages in the memory block before doing offlining | ||
| 1613 | * actually in order to make hugetlbfs's object counting consistent. | ||
| 1614 | */ | ||
| 1615 | ret = dissolve_free_huge_pages(start_pfn, end_pfn); | ||
| 1616 | if (ret) | ||
| 1617 | goto failed_removal; | ||
| 1618 | /* check again */ | ||
| 1619 | offlined_pages = check_pages_isolated(start_pfn, end_pfn); | ||
| 1620 | if (offlined_pages < 0) | ||
| 1621 | goto repeat; | ||
| 1622 | pr_info("Offlined Pages %ld\n", offlined_pages); | 1646 | pr_info("Offlined Pages %ld\n", offlined_pages); |
| 1623 | /* Ok, all of our target is isolated. | 1647 | /* Ok, all of our target is isolated. |
| 1624 | We cannot do rollback at this point. */ | 1648 | We cannot do rollback at this point. */ |
| @@ -1654,13 +1678,15 @@ repeat: | |||
| 1654 | mem_hotplug_done(); | 1678 | mem_hotplug_done(); |
| 1655 | return 0; | 1679 | return 0; |
| 1656 | 1680 | ||
| 1681 | failed_removal_isolated: | ||
| 1682 | undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); | ||
| 1657 | failed_removal: | 1683 | failed_removal: |
| 1658 | pr_debug("memory offlining [mem %#010llx-%#010llx] failed\n", | 1684 | pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s\n", |
| 1659 | (unsigned long long) start_pfn << PAGE_SHIFT, | 1685 | (unsigned long long) start_pfn << PAGE_SHIFT, |
| 1660 | ((unsigned long long) end_pfn << PAGE_SHIFT) - 1); | 1686 | ((unsigned long long) end_pfn << PAGE_SHIFT) - 1, |
| 1687 | reason); | ||
| 1661 | memory_notify(MEM_CANCEL_OFFLINE, &arg); | 1688 | memory_notify(MEM_CANCEL_OFFLINE, &arg); |
| 1662 | /* pushback to free area */ | 1689 | /* pushback to free area */ |
| 1663 | undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); | ||
| 1664 | mem_hotplug_done(); | 1690 | mem_hotplug_done(); |
| 1665 | return ret; | 1691 | return ret; |
| 1666 | } | 1692 | } |
| @@ -1753,34 +1779,6 @@ static int check_cpu_on_node(pg_data_t *pgdat) | |||
| 1753 | return 0; | 1779 | return 0; |
| 1754 | } | 1780 | } |
| 1755 | 1781 | ||
| 1756 | static void unmap_cpu_on_node(pg_data_t *pgdat) | ||
| 1757 | { | ||
| 1758 | #ifdef CONFIG_ACPI_NUMA | ||
| 1759 | int cpu; | ||
| 1760 | |||
| 1761 | for_each_possible_cpu(cpu) | ||
| 1762 | if (cpu_to_node(cpu) == pgdat->node_id) | ||
| 1763 | numa_clear_node(cpu); | ||
| 1764 | #endif | ||
| 1765 | } | ||
| 1766 | |||
| 1767 | static int check_and_unmap_cpu_on_node(pg_data_t *pgdat) | ||
| 1768 | { | ||
| 1769 | int ret; | ||
| 1770 | |||
| 1771 | ret = check_cpu_on_node(pgdat); | ||
| 1772 | if (ret) | ||
| 1773 | return ret; | ||
| 1774 | |||
| 1775 | /* | ||
| 1776 | * the node will be offlined when we come here, so we can clear | ||
| 1777 | * the cpu_to_node() now. | ||
| 1778 | */ | ||
| 1779 | |||
| 1780 | unmap_cpu_on_node(pgdat); | ||
| 1781 | return 0; | ||
| 1782 | } | ||
| 1783 | |||
| 1784 | /** | 1782 | /** |
| 1785 | * try_offline_node | 1783 | * try_offline_node |
| 1786 | * @nid: the node ID | 1784 | * @nid: the node ID |
| @@ -1813,7 +1811,7 @@ void try_offline_node(int nid) | |||
| 1813 | return; | 1811 | return; |
| 1814 | } | 1812 | } |
| 1815 | 1813 | ||
| 1816 | if (check_and_unmap_cpu_on_node(pgdat)) | 1814 | if (check_cpu_on_node(pgdat)) |
| 1817 | return; | 1815 | return; |
| 1818 | 1816 | ||
| 1819 | /* | 1817 | /* |
| @@ -1858,7 +1856,7 @@ void __ref __remove_memory(int nid, u64 start, u64 size) | |||
| 1858 | memblock_free(start, size); | 1856 | memblock_free(start, size); |
| 1859 | memblock_remove(start, size); | 1857 | memblock_remove(start, size); |
| 1860 | 1858 | ||
| 1861 | arch_remove_memory(start, size, NULL); | 1859 | arch_remove_memory(nid, start, size, NULL); |
| 1862 | 1860 | ||
| 1863 | try_offline_node(nid); | 1861 | try_offline_node(nid); |
| 1864 | 1862 | ||
