diff options
Diffstat (limited to 'mm/memory_hotplug.c')
-rw-r--r-- | mm/memory_hotplug.c | 112 |
1 files changed, 88 insertions, 24 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index ca1dd3aa5eee..0eb1a1df649d 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/mm_inline.h> | 30 | #include <linux/mm_inline.h> |
31 | #include <linux/firmware-map.h> | 31 | #include <linux/firmware-map.h> |
32 | #include <linux/stop_machine.h> | 32 | #include <linux/stop_machine.h> |
33 | #include <linux/hugetlb.h> | ||
33 | 34 | ||
34 | #include <asm/tlbflush.h> | 35 | #include <asm/tlbflush.h> |
35 | 36 | ||
@@ -194,7 +195,7 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat) | |||
194 | 195 | ||
195 | zone = &pgdat->node_zones[0]; | 196 | zone = &pgdat->node_zones[0]; |
196 | for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) { | 197 | for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) { |
197 | if (zone->wait_table) { | 198 | if (zone_is_initialized(zone)) { |
198 | nr_pages = zone->wait_table_hash_nr_entries | 199 | nr_pages = zone->wait_table_hash_nr_entries |
199 | * sizeof(wait_queue_head_t); | 200 | * sizeof(wait_queue_head_t); |
200 | nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT; | 201 | nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT; |
@@ -229,8 +230,8 @@ static void grow_zone_span(struct zone *zone, unsigned long start_pfn, | |||
229 | 230 | ||
230 | zone_span_writelock(zone); | 231 | zone_span_writelock(zone); |
231 | 232 | ||
232 | old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; | 233 | old_zone_end_pfn = zone_end_pfn(zone); |
233 | if (!zone->spanned_pages || start_pfn < zone->zone_start_pfn) | 234 | if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn) |
234 | zone->zone_start_pfn = start_pfn; | 235 | zone->zone_start_pfn = start_pfn; |
235 | 236 | ||
236 | zone->spanned_pages = max(old_zone_end_pfn, end_pfn) - | 237 | zone->spanned_pages = max(old_zone_end_pfn, end_pfn) - |
@@ -305,7 +306,7 @@ static int __meminit move_pfn_range_left(struct zone *z1, struct zone *z2, | |||
305 | goto out_fail; | 306 | goto out_fail; |
306 | 307 | ||
307 | /* use start_pfn for z1's start_pfn if z1 is empty */ | 308 | /* use start_pfn for z1's start_pfn if z1 is empty */ |
308 | if (z1->spanned_pages) | 309 | if (!zone_is_empty(z1)) |
309 | z1_start_pfn = z1->zone_start_pfn; | 310 | z1_start_pfn = z1->zone_start_pfn; |
310 | else | 311 | else |
311 | z1_start_pfn = start_pfn; | 312 | z1_start_pfn = start_pfn; |
@@ -347,7 +348,7 @@ static int __meminit move_pfn_range_right(struct zone *z1, struct zone *z2, | |||
347 | goto out_fail; | 348 | goto out_fail; |
348 | 349 | ||
349 | /* use end_pfn for z2's end_pfn if z2 is empty */ | 350 | /* use end_pfn for z2's end_pfn if z2 is empty */ |
350 | if (z2->spanned_pages) | 351 | if (!zone_is_empty(z2)) |
351 | z2_end_pfn = zone_end_pfn(z2); | 352 | z2_end_pfn = zone_end_pfn(z2); |
352 | else | 353 | else |
353 | z2_end_pfn = end_pfn; | 354 | z2_end_pfn = end_pfn; |
@@ -514,8 +515,9 @@ static int find_biggest_section_pfn(int nid, struct zone *zone, | |||
514 | static void shrink_zone_span(struct zone *zone, unsigned long start_pfn, | 515 | static void shrink_zone_span(struct zone *zone, unsigned long start_pfn, |
515 | unsigned long end_pfn) | 516 | unsigned long end_pfn) |
516 | { | 517 | { |
517 | unsigned long zone_start_pfn = zone->zone_start_pfn; | 518 | unsigned long zone_start_pfn = zone->zone_start_pfn; |
518 | unsigned long zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; | 519 | unsigned long z = zone_end_pfn(zone); /* zone_end_pfn namespace clash */ |
520 | unsigned long zone_end_pfn = z; | ||
519 | unsigned long pfn; | 521 | unsigned long pfn; |
520 | struct mem_section *ms; | 522 | struct mem_section *ms; |
521 | int nid = zone_to_nid(zone); | 523 | int nid = zone_to_nid(zone); |
@@ -1069,6 +1071,23 @@ out: | |||
1069 | return ret; | 1071 | return ret; |
1070 | } | 1072 | } |
1071 | 1073 | ||
1074 | static int check_hotplug_memory_range(u64 start, u64 size) | ||
1075 | { | ||
1076 | u64 start_pfn = start >> PAGE_SHIFT; | ||
1077 | u64 nr_pages = size >> PAGE_SHIFT; | ||
1078 | |||
1079 | /* Memory range must be aligned with section */ | ||
1080 | if ((start_pfn & ~PAGE_SECTION_MASK) || | ||
1081 | (nr_pages % PAGES_PER_SECTION) || (!nr_pages)) { | ||
1082 | pr_err("Section-unaligned hotplug range: start 0x%llx, size 0x%llx\n", | ||
1083 | (unsigned long long)start, | ||
1084 | (unsigned long long)size); | ||
1085 | return -EINVAL; | ||
1086 | } | ||
1087 | |||
1088 | return 0; | ||
1089 | } | ||
1090 | |||
1072 | /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ | 1091 | /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ |
1073 | int __ref add_memory(int nid, u64 start, u64 size) | 1092 | int __ref add_memory(int nid, u64 start, u64 size) |
1074 | { | 1093 | { |
@@ -1078,6 +1097,10 @@ int __ref add_memory(int nid, u64 start, u64 size) | |||
1078 | struct resource *res; | 1097 | struct resource *res; |
1079 | int ret; | 1098 | int ret; |
1080 | 1099 | ||
1100 | ret = check_hotplug_memory_range(start, size); | ||
1101 | if (ret) | ||
1102 | return ret; | ||
1103 | |||
1081 | lock_memory_hotplug(); | 1104 | lock_memory_hotplug(); |
1082 | 1105 | ||
1083 | res = register_memory_resource(start, size); | 1106 | res = register_memory_resource(start, size); |
@@ -1208,10 +1231,12 @@ static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) | |||
1208 | } | 1231 | } |
1209 | 1232 | ||
1210 | /* | 1233 | /* |
1211 | * Scanning pfn is much easier than scanning lru list. | 1234 | * Scan pfn range [start,end) to find movable/migratable pages (LRU pages |
1212 | * Scan pfn from start to end and Find LRU page. | 1235 | * and hugepages). We scan pfn because it's much easier than scanning over |
1236 | * linked list. This function returns the pfn of the first found movable | ||
1237 | * page if it's found, otherwise 0. | ||
1213 | */ | 1238 | */ |
1214 | static unsigned long scan_lru_pages(unsigned long start, unsigned long end) | 1239 | static unsigned long scan_movable_pages(unsigned long start, unsigned long end) |
1215 | { | 1240 | { |
1216 | unsigned long pfn; | 1241 | unsigned long pfn; |
1217 | struct page *page; | 1242 | struct page *page; |
@@ -1220,6 +1245,13 @@ static unsigned long scan_lru_pages(unsigned long start, unsigned long end) | |||
1220 | page = pfn_to_page(pfn); | 1245 | page = pfn_to_page(pfn); |
1221 | if (PageLRU(page)) | 1246 | if (PageLRU(page)) |
1222 | return pfn; | 1247 | return pfn; |
1248 | if (PageHuge(page)) { | ||
1249 | if (is_hugepage_active(page)) | ||
1250 | return pfn; | ||
1251 | else | ||
1252 | pfn = round_up(pfn + 1, | ||
1253 | 1 << compound_order(page)) - 1; | ||
1254 | } | ||
1223 | } | 1255 | } |
1224 | } | 1256 | } |
1225 | return 0; | 1257 | return 0; |
@@ -1240,6 +1272,19 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | |||
1240 | if (!pfn_valid(pfn)) | 1272 | if (!pfn_valid(pfn)) |
1241 | continue; | 1273 | continue; |
1242 | page = pfn_to_page(pfn); | 1274 | page = pfn_to_page(pfn); |
1275 | |||
1276 | if (PageHuge(page)) { | ||
1277 | struct page *head = compound_head(page); | ||
1278 | pfn = page_to_pfn(head) + (1<<compound_order(head)) - 1; | ||
1279 | if (compound_order(head) > PFN_SECTION_SHIFT) { | ||
1280 | ret = -EBUSY; | ||
1281 | break; | ||
1282 | } | ||
1283 | if (isolate_huge_page(page, &source)) | ||
1284 | move_pages -= 1 << compound_order(head); | ||
1285 | continue; | ||
1286 | } | ||
1287 | |||
1243 | if (!get_page_unless_zero(page)) | 1288 | if (!get_page_unless_zero(page)) |
1244 | continue; | 1289 | continue; |
1245 | /* | 1290 | /* |
@@ -1272,7 +1317,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | |||
1272 | } | 1317 | } |
1273 | if (!list_empty(&source)) { | 1318 | if (!list_empty(&source)) { |
1274 | if (not_managed) { | 1319 | if (not_managed) { |
1275 | putback_lru_pages(&source); | 1320 | putback_movable_pages(&source); |
1276 | goto out; | 1321 | goto out; |
1277 | } | 1322 | } |
1278 | 1323 | ||
@@ -1283,7 +1328,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | |||
1283 | ret = migrate_pages(&source, alloc_migrate_target, 0, | 1328 | ret = migrate_pages(&source, alloc_migrate_target, 0, |
1284 | MIGRATE_SYNC, MR_MEMORY_HOTPLUG); | 1329 | MIGRATE_SYNC, MR_MEMORY_HOTPLUG); |
1285 | if (ret) | 1330 | if (ret) |
1286 | putback_lru_pages(&source); | 1331 | putback_movable_pages(&source); |
1287 | } | 1332 | } |
1288 | out: | 1333 | out: |
1289 | return ret; | 1334 | return ret; |
@@ -1472,7 +1517,6 @@ static int __ref __offline_pages(unsigned long start_pfn, | |||
1472 | struct zone *zone; | 1517 | struct zone *zone; |
1473 | struct memory_notify arg; | 1518 | struct memory_notify arg; |
1474 | 1519 | ||
1475 | BUG_ON(start_pfn >= end_pfn); | ||
1476 | /* at least, alignment against pageblock is necessary */ | 1520 | /* at least, alignment against pageblock is necessary */ |
1477 | if (!IS_ALIGNED(start_pfn, pageblock_nr_pages)) | 1521 | if (!IS_ALIGNED(start_pfn, pageblock_nr_pages)) |
1478 | return -EINVAL; | 1522 | return -EINVAL; |
@@ -1527,8 +1571,8 @@ repeat: | |||
1527 | drain_all_pages(); | 1571 | drain_all_pages(); |
1528 | } | 1572 | } |
1529 | 1573 | ||
1530 | pfn = scan_lru_pages(start_pfn, end_pfn); | 1574 | pfn = scan_movable_pages(start_pfn, end_pfn); |
1531 | if (pfn) { /* We have page on LRU */ | 1575 | if (pfn) { /* We have movable pages */ |
1532 | ret = do_migrate_range(pfn, end_pfn); | 1576 | ret = do_migrate_range(pfn, end_pfn); |
1533 | if (!ret) { | 1577 | if (!ret) { |
1534 | drain = 1; | 1578 | drain = 1; |
@@ -1547,6 +1591,11 @@ repeat: | |||
1547 | yield(); | 1591 | yield(); |
1548 | /* drain pcp pages, this is synchronous. */ | 1592 | /* drain pcp pages, this is synchronous. */ |
1549 | drain_all_pages(); | 1593 | drain_all_pages(); |
1594 | /* | ||
1595 | * dissolve free hugepages in the memory block before doing offlining | ||
1596 | * actually in order to make hugetlbfs's object counting consistent. | ||
1597 | */ | ||
1598 | dissolve_free_huge_pages(start_pfn, end_pfn); | ||
1550 | /* check again */ | 1599 | /* check again */ |
1551 | offlined_pages = check_pages_isolated(start_pfn, end_pfn); | 1600 | offlined_pages = check_pages_isolated(start_pfn, end_pfn); |
1552 | if (offlined_pages < 0) { | 1601 | if (offlined_pages < 0) { |
@@ -1674,9 +1723,8 @@ static int is_memblock_offlined_cb(struct memory_block *mem, void *arg) | |||
1674 | return ret; | 1723 | return ret; |
1675 | } | 1724 | } |
1676 | 1725 | ||
1677 | static int check_cpu_on_node(void *data) | 1726 | static int check_cpu_on_node(pg_data_t *pgdat) |
1678 | { | 1727 | { |
1679 | struct pglist_data *pgdat = data; | ||
1680 | int cpu; | 1728 | int cpu; |
1681 | 1729 | ||
1682 | for_each_present_cpu(cpu) { | 1730 | for_each_present_cpu(cpu) { |
@@ -1691,10 +1739,9 @@ static int check_cpu_on_node(void *data) | |||
1691 | return 0; | 1739 | return 0; |
1692 | } | 1740 | } |
1693 | 1741 | ||
1694 | static void unmap_cpu_on_node(void *data) | 1742 | static void unmap_cpu_on_node(pg_data_t *pgdat) |
1695 | { | 1743 | { |
1696 | #ifdef CONFIG_ACPI_NUMA | 1744 | #ifdef CONFIG_ACPI_NUMA |
1697 | struct pglist_data *pgdat = data; | ||
1698 | int cpu; | 1745 | int cpu; |
1699 | 1746 | ||
1700 | for_each_possible_cpu(cpu) | 1747 | for_each_possible_cpu(cpu) |
@@ -1703,10 +1750,11 @@ static void unmap_cpu_on_node(void *data) | |||
1703 | #endif | 1750 | #endif |
1704 | } | 1751 | } |
1705 | 1752 | ||
1706 | static int check_and_unmap_cpu_on_node(void *data) | 1753 | static int check_and_unmap_cpu_on_node(pg_data_t *pgdat) |
1707 | { | 1754 | { |
1708 | int ret = check_cpu_on_node(data); | 1755 | int ret; |
1709 | 1756 | ||
1757 | ret = check_cpu_on_node(pgdat); | ||
1710 | if (ret) | 1758 | if (ret) |
1711 | return ret; | 1759 | return ret; |
1712 | 1760 | ||
@@ -1715,11 +1763,18 @@ static int check_and_unmap_cpu_on_node(void *data) | |||
1715 | * the cpu_to_node() now. | 1763 | * the cpu_to_node() now. |
1716 | */ | 1764 | */ |
1717 | 1765 | ||
1718 | unmap_cpu_on_node(data); | 1766 | unmap_cpu_on_node(pgdat); |
1719 | return 0; | 1767 | return 0; |
1720 | } | 1768 | } |
1721 | 1769 | ||
1722 | /* offline the node if all memory sections of this node are removed */ | 1770 | /** |
1771 | * try_offline_node | ||
1772 | * | ||
1773 | * Offline a node if all memory sections and cpus of the node are removed. | ||
1774 | * | ||
1775 | * NOTE: The caller must call lock_device_hotplug() to serialize hotplug | ||
1776 | * and online/offline operations before this call. | ||
1777 | */ | ||
1723 | void try_offline_node(int nid) | 1778 | void try_offline_node(int nid) |
1724 | { | 1779 | { |
1725 | pg_data_t *pgdat = NODE_DATA(nid); | 1780 | pg_data_t *pgdat = NODE_DATA(nid); |
@@ -1745,7 +1800,7 @@ void try_offline_node(int nid) | |||
1745 | return; | 1800 | return; |
1746 | } | 1801 | } |
1747 | 1802 | ||
1748 | if (stop_machine(check_and_unmap_cpu_on_node, pgdat, NULL)) | 1803 | if (check_and_unmap_cpu_on_node(pgdat)) |
1749 | return; | 1804 | return; |
1750 | 1805 | ||
1751 | /* | 1806 | /* |
@@ -1782,10 +1837,19 @@ void try_offline_node(int nid) | |||
1782 | } | 1837 | } |
1783 | EXPORT_SYMBOL(try_offline_node); | 1838 | EXPORT_SYMBOL(try_offline_node); |
1784 | 1839 | ||
1840 | /** | ||
1841 | * remove_memory | ||
1842 | * | ||
1843 | * NOTE: The caller must call lock_device_hotplug() to serialize hotplug | ||
1844 | * and online/offline operations before this call, as required by | ||
1845 | * try_offline_node(). | ||
1846 | */ | ||
1785 | void __ref remove_memory(int nid, u64 start, u64 size) | 1847 | void __ref remove_memory(int nid, u64 start, u64 size) |
1786 | { | 1848 | { |
1787 | int ret; | 1849 | int ret; |
1788 | 1850 | ||
1851 | BUG_ON(check_hotplug_memory_range(start, size)); | ||
1852 | |||
1789 | lock_memory_hotplug(); | 1853 | lock_memory_hotplug(); |
1790 | 1854 | ||
1791 | /* | 1855 | /* |