diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hugetlb.c | 71 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 42 | ||||
-rw-r--r-- | mm/page_alloc.c | 11 | ||||
-rw-r--r-- | mm/page_isolation.c | 14 |
4 files changed, 129 insertions, 9 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d37b3b95c439..fb4293b93fd0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/rmap.h> | 21 | #include <linux/rmap.h> |
22 | #include <linux/swap.h> | 22 | #include <linux/swap.h> |
23 | #include <linux/swapops.h> | 23 | #include <linux/swapops.h> |
24 | #include <linux/page-isolation.h> | ||
24 | 25 | ||
25 | #include <asm/page.h> | 26 | #include <asm/page.h> |
26 | #include <asm/pgtable.h> | 27 | #include <asm/pgtable.h> |
@@ -522,9 +523,15 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid) | |||
522 | { | 523 | { |
523 | struct page *page; | 524 | struct page *page; |
524 | 525 | ||
525 | if (list_empty(&h->hugepage_freelists[nid])) | 526 | list_for_each_entry(page, &h->hugepage_freelists[nid], lru) |
527 | if (!is_migrate_isolate_page(page)) | ||
528 | break; | ||
529 | /* | ||
530 | * if 'non-isolated free hugepage' not found on the list, | ||
531 | * the allocation fails. | ||
532 | */ | ||
533 | if (&h->hugepage_freelists[nid] == &page->lru) | ||
526 | return NULL; | 534 | return NULL; |
527 | page = list_entry(h->hugepage_freelists[nid].next, struct page, lru); | ||
528 | list_move(&page->lru, &h->hugepage_activelist); | 535 | list_move(&page->lru, &h->hugepage_activelist); |
529 | set_page_refcounted(page); | 536 | set_page_refcounted(page); |
530 | h->free_huge_pages--; | 537 | h->free_huge_pages--; |
@@ -878,6 +885,44 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, | |||
878 | return ret; | 885 | return ret; |
879 | } | 886 | } |
880 | 887 | ||
888 | /* | ||
889 | * Dissolve a given free hugepage into free buddy pages. This function does | ||
890 | * nothing for in-use (including surplus) hugepages. | ||
891 | */ | ||
892 | static void dissolve_free_huge_page(struct page *page) | ||
893 | { | ||
894 | spin_lock(&hugetlb_lock); | ||
895 | if (PageHuge(page) && !page_count(page)) { | ||
896 | struct hstate *h = page_hstate(page); | ||
897 | int nid = page_to_nid(page); | ||
898 | list_del(&page->lru); | ||
899 | h->free_huge_pages--; | ||
900 | h->free_huge_pages_node[nid]--; | ||
901 | update_and_free_page(h, page); | ||
902 | } | ||
903 | spin_unlock(&hugetlb_lock); | ||
904 | } | ||
905 | |||
906 | /* | ||
907 | * Dissolve free hugepages in a given pfn range. Used by memory hotplug to | ||
908 | * make specified memory blocks removable from the system. | ||
909 | * Note that start_pfn should aligned with (minimum) hugepage size. | ||
910 | */ | ||
911 | void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) | ||
912 | { | ||
913 | unsigned int order = 8 * sizeof(void *); | ||
914 | unsigned long pfn; | ||
915 | struct hstate *h; | ||
916 | |||
917 | /* Set scan step to minimum hugepage size */ | ||
918 | for_each_hstate(h) | ||
919 | if (order > huge_page_order(h)) | ||
920 | order = huge_page_order(h); | ||
921 | VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << order)); | ||
922 | for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << order) | ||
923 | dissolve_free_huge_page(pfn_to_page(pfn)); | ||
924 | } | ||
925 | |||
881 | static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) | 926 | static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) |
882 | { | 927 | { |
883 | struct page *page; | 928 | struct page *page; |
@@ -3457,3 +3502,25 @@ void putback_active_hugepage(struct page *page) | |||
3457 | spin_unlock(&hugetlb_lock); | 3502 | spin_unlock(&hugetlb_lock); |
3458 | put_page(page); | 3503 | put_page(page); |
3459 | } | 3504 | } |
3505 | |||
3506 | bool is_hugepage_active(struct page *page) | ||
3507 | { | ||
3508 | VM_BUG_ON(!PageHuge(page)); | ||
3509 | /* | ||
3510 | * This function can be called for a tail page because the caller, | ||
3511 | * scan_movable_pages, scans through a given pfn-range which typically | ||
3512 | * covers one memory block. In systems using gigantic hugepage (1GB | ||
3513 | * for x86_64,) a hugepage is larger than a memory block, and we don't | ||
3514 | * support migrating such large hugepages for now, so return false | ||
3515 | * when called for tail pages. | ||
3516 | */ | ||
3517 | if (PageTail(page)) | ||
3518 | return false; | ||
3519 | /* | ||
3520 | * Refcount of a hwpoisoned hugepages is 1, but they are not active, | ||
3521 | * so we should return false for them. | ||
3522 | */ | ||
3523 | if (unlikely(PageHWPoison(page))) | ||
3524 | return false; | ||
3525 | return page_count(page) > 0; | ||
3526 | } | ||
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index d595606728f9..0eb1a1df649d 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/mm_inline.h> | 30 | #include <linux/mm_inline.h> |
31 | #include <linux/firmware-map.h> | 31 | #include <linux/firmware-map.h> |
32 | #include <linux/stop_machine.h> | 32 | #include <linux/stop_machine.h> |
33 | #include <linux/hugetlb.h> | ||
33 | 34 | ||
34 | #include <asm/tlbflush.h> | 35 | #include <asm/tlbflush.h> |
35 | 36 | ||
@@ -1230,10 +1231,12 @@ static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) | |||
1230 | } | 1231 | } |
1231 | 1232 | ||
1232 | /* | 1233 | /* |
1233 | * Scanning pfn is much easier than scanning lru list. | 1234 | * Scan pfn range [start,end) to find movable/migratable pages (LRU pages |
1234 | * Scan pfn from start to end and Find LRU page. | 1235 | * and hugepages). We scan pfn because it's much easier than scanning over |
1236 | * linked list. This function returns the pfn of the first found movable | ||
1237 | * page if it's found, otherwise 0. | ||
1235 | */ | 1238 | */ |
1236 | static unsigned long scan_lru_pages(unsigned long start, unsigned long end) | 1239 | static unsigned long scan_movable_pages(unsigned long start, unsigned long end) |
1237 | { | 1240 | { |
1238 | unsigned long pfn; | 1241 | unsigned long pfn; |
1239 | struct page *page; | 1242 | struct page *page; |
@@ -1242,6 +1245,13 @@ static unsigned long scan_lru_pages(unsigned long start, unsigned long end) | |||
1242 | page = pfn_to_page(pfn); | 1245 | page = pfn_to_page(pfn); |
1243 | if (PageLRU(page)) | 1246 | if (PageLRU(page)) |
1244 | return pfn; | 1247 | return pfn; |
1248 | if (PageHuge(page)) { | ||
1249 | if (is_hugepage_active(page)) | ||
1250 | return pfn; | ||
1251 | else | ||
1252 | pfn = round_up(pfn + 1, | ||
1253 | 1 << compound_order(page)) - 1; | ||
1254 | } | ||
1245 | } | 1255 | } |
1246 | } | 1256 | } |
1247 | return 0; | 1257 | return 0; |
@@ -1262,6 +1272,19 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | |||
1262 | if (!pfn_valid(pfn)) | 1272 | if (!pfn_valid(pfn)) |
1263 | continue; | 1273 | continue; |
1264 | page = pfn_to_page(pfn); | 1274 | page = pfn_to_page(pfn); |
1275 | |||
1276 | if (PageHuge(page)) { | ||
1277 | struct page *head = compound_head(page); | ||
1278 | pfn = page_to_pfn(head) + (1<<compound_order(head)) - 1; | ||
1279 | if (compound_order(head) > PFN_SECTION_SHIFT) { | ||
1280 | ret = -EBUSY; | ||
1281 | break; | ||
1282 | } | ||
1283 | if (isolate_huge_page(page, &source)) | ||
1284 | move_pages -= 1 << compound_order(head); | ||
1285 | continue; | ||
1286 | } | ||
1287 | |||
1265 | if (!get_page_unless_zero(page)) | 1288 | if (!get_page_unless_zero(page)) |
1266 | continue; | 1289 | continue; |
1267 | /* | 1290 | /* |
@@ -1294,7 +1317,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | |||
1294 | } | 1317 | } |
1295 | if (!list_empty(&source)) { | 1318 | if (!list_empty(&source)) { |
1296 | if (not_managed) { | 1319 | if (not_managed) { |
1297 | putback_lru_pages(&source); | 1320 | putback_movable_pages(&source); |
1298 | goto out; | 1321 | goto out; |
1299 | } | 1322 | } |
1300 | 1323 | ||
@@ -1305,7 +1328,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | |||
1305 | ret = migrate_pages(&source, alloc_migrate_target, 0, | 1328 | ret = migrate_pages(&source, alloc_migrate_target, 0, |
1306 | MIGRATE_SYNC, MR_MEMORY_HOTPLUG); | 1329 | MIGRATE_SYNC, MR_MEMORY_HOTPLUG); |
1307 | if (ret) | 1330 | if (ret) |
1308 | putback_lru_pages(&source); | 1331 | putback_movable_pages(&source); |
1309 | } | 1332 | } |
1310 | out: | 1333 | out: |
1311 | return ret; | 1334 | return ret; |
@@ -1548,8 +1571,8 @@ repeat: | |||
1548 | drain_all_pages(); | 1571 | drain_all_pages(); |
1549 | } | 1572 | } |
1550 | 1573 | ||
1551 | pfn = scan_lru_pages(start_pfn, end_pfn); | 1574 | pfn = scan_movable_pages(start_pfn, end_pfn); |
1552 | if (pfn) { /* We have page on LRU */ | 1575 | if (pfn) { /* We have movable pages */ |
1553 | ret = do_migrate_range(pfn, end_pfn); | 1576 | ret = do_migrate_range(pfn, end_pfn); |
1554 | if (!ret) { | 1577 | if (!ret) { |
1555 | drain = 1; | 1578 | drain = 1; |
@@ -1568,6 +1591,11 @@ repeat: | |||
1568 | yield(); | 1591 | yield(); |
1569 | /* drain pcp pages, this is synchronous. */ | 1592 | /* drain pcp pages, this is synchronous. */ |
1570 | drain_all_pages(); | 1593 | drain_all_pages(); |
1594 | /* | ||
1595 | * dissolve free hugepages in the memory block before doing offlining | ||
1596 | * actually in order to make hugetlbfs's object counting consistent. | ||
1597 | */ | ||
1598 | dissolve_free_huge_pages(start_pfn, end_pfn); | ||
1571 | /* check again */ | 1599 | /* check again */ |
1572 | offlined_pages = check_pages_isolated(start_pfn, end_pfn); | 1600 | offlined_pages = check_pages_isolated(start_pfn, end_pfn); |
1573 | if (offlined_pages < 0) { | 1601 | if (offlined_pages < 0) { |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7c3f8d7e2d8e..f7cc08dad26a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -6008,6 +6008,17 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, | |||
6008 | continue; | 6008 | continue; |
6009 | 6009 | ||
6010 | page = pfn_to_page(check); | 6010 | page = pfn_to_page(check); |
6011 | |||
6012 | /* | ||
6013 | * Hugepages are not in LRU lists, but they're movable. | ||
6014 | * We need not scan over tail pages bacause we don't | ||
6015 | * handle each tail page individually in migration. | ||
6016 | */ | ||
6017 | if (PageHuge(page)) { | ||
6018 | iter = round_up(iter + 1, 1<<compound_order(page)) - 1; | ||
6019 | continue; | ||
6020 | } | ||
6021 | |||
6011 | /* | 6022 | /* |
6012 | * We can't use page_count without pin a page | 6023 | * We can't use page_count without pin a page |
6013 | * because another CPU can free compound page. | 6024 | * because another CPU can free compound page. |
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 0cee10ffb98d..d1473b2e9481 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/page-isolation.h> | 6 | #include <linux/page-isolation.h> |
7 | #include <linux/pageblock-flags.h> | 7 | #include <linux/pageblock-flags.h> |
8 | #include <linux/memory.h> | 8 | #include <linux/memory.h> |
9 | #include <linux/hugetlb.h> | ||
9 | #include "internal.h" | 10 | #include "internal.h" |
10 | 11 | ||
11 | int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages) | 12 | int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages) |
@@ -252,6 +253,19 @@ struct page *alloc_migrate_target(struct page *page, unsigned long private, | |||
252 | { | 253 | { |
253 | gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; | 254 | gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; |
254 | 255 | ||
256 | /* | ||
257 | * TODO: allocate a destination hugepage from a nearest neighbor node, | ||
258 | * accordance with memory policy of the user process if possible. For | ||
259 | * now as a simple work-around, we use the next node for destination. | ||
260 | */ | ||
261 | if (PageHuge(page)) { | ||
262 | nodemask_t src = nodemask_of_node(page_to_nid(page)); | ||
263 | nodemask_t dst; | ||
264 | nodes_complement(dst, src); | ||
265 | return alloc_huge_page_node(page_hstate(compound_head(page)), | ||
266 | next_node(page_to_nid(page), dst)); | ||
267 | } | ||
268 | |||
255 | if (PageHighMem(page)) | 269 | if (PageHighMem(page)) |
256 | gfp_mask |= __GFP_HIGHMEM; | 270 | gfp_mask |= __GFP_HIGHMEM; |
257 | 271 | ||