aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/hugetlb.c71
-rw-r--r--mm/memory_hotplug.c42
-rw-r--r--mm/page_alloc.c11
-rw-r--r--mm/page_isolation.c14
4 files changed, 129 insertions, 9 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d37b3b95c439..fb4293b93fd0 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -21,6 +21,7 @@
21#include <linux/rmap.h> 21#include <linux/rmap.h>
22#include <linux/swap.h> 22#include <linux/swap.h>
23#include <linux/swapops.h> 23#include <linux/swapops.h>
24#include <linux/page-isolation.h>
24 25
25#include <asm/page.h> 26#include <asm/page.h>
26#include <asm/pgtable.h> 27#include <asm/pgtable.h>
@@ -522,9 +523,15 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
522{ 523{
523 struct page *page; 524 struct page *page;
524 525
525 if (list_empty(&h->hugepage_freelists[nid])) 526 list_for_each_entry(page, &h->hugepage_freelists[nid], lru)
527 if (!is_migrate_isolate_page(page))
528 break;
529 /*
530 * if 'non-isolated free hugepage' not found on the list,
531 * the allocation fails.
532 */
533 if (&h->hugepage_freelists[nid] == &page->lru)
526 return NULL; 534 return NULL;
527 page = list_entry(h->hugepage_freelists[nid].next, struct page, lru);
528 list_move(&page->lru, &h->hugepage_activelist); 535 list_move(&page->lru, &h->hugepage_activelist);
529 set_page_refcounted(page); 536 set_page_refcounted(page);
530 h->free_huge_pages--; 537 h->free_huge_pages--;
@@ -878,6 +885,44 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
878 return ret; 885 return ret;
879} 886}
880 887
888/*
889 * Dissolve a given free hugepage into free buddy pages. This function does
890 * nothing for in-use (including surplus) hugepages.
891 */
892static void dissolve_free_huge_page(struct page *page)
893{
894 spin_lock(&hugetlb_lock);
895 if (PageHuge(page) && !page_count(page)) {
896 struct hstate *h = page_hstate(page);
897 int nid = page_to_nid(page);
898 list_del(&page->lru);
899 h->free_huge_pages--;
900 h->free_huge_pages_node[nid]--;
901 update_and_free_page(h, page);
902 }
903 spin_unlock(&hugetlb_lock);
904}
905
906/*
907 * Dissolve free hugepages in a given pfn range. Used by memory hotplug to
908 * make specified memory blocks removable from the system.
909 * Note that start_pfn should aligned with (minimum) hugepage size.
910 */
911void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
912{
913 unsigned int order = 8 * sizeof(void *);
914 unsigned long pfn;
915 struct hstate *h;
916
917 /* Set scan step to minimum hugepage size */
918 for_each_hstate(h)
919 if (order > huge_page_order(h))
920 order = huge_page_order(h);
921 VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << order));
922 for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << order)
923 dissolve_free_huge_page(pfn_to_page(pfn));
924}
925
881static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) 926static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
882{ 927{
883 struct page *page; 928 struct page *page;
@@ -3457,3 +3502,25 @@ void putback_active_hugepage(struct page *page)
3457 spin_unlock(&hugetlb_lock); 3502 spin_unlock(&hugetlb_lock);
3458 put_page(page); 3503 put_page(page);
3459} 3504}
3505
3506bool is_hugepage_active(struct page *page)
3507{
3508 VM_BUG_ON(!PageHuge(page));
3509 /*
3510 * This function can be called for a tail page because the caller,
3511 * scan_movable_pages, scans through a given pfn-range which typically
3512 * covers one memory block. In systems using gigantic hugepage (1GB
3513 * for x86_64,) a hugepage is larger than a memory block, and we don't
3514 * support migrating such large hugepages for now, so return false
3515 * when called for tail pages.
3516 */
3517 if (PageTail(page))
3518 return false;
3519 /*
3520 * Refcount of a hwpoisoned hugepages is 1, but they are not active,
3521 * so we should return false for them.
3522 */
3523 if (unlikely(PageHWPoison(page)))
3524 return false;
3525 return page_count(page) > 0;
3526}
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index d595606728f9..0eb1a1df649d 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -30,6 +30,7 @@
30#include <linux/mm_inline.h> 30#include <linux/mm_inline.h>
31#include <linux/firmware-map.h> 31#include <linux/firmware-map.h>
32#include <linux/stop_machine.h> 32#include <linux/stop_machine.h>
33#include <linux/hugetlb.h>
33 34
34#include <asm/tlbflush.h> 35#include <asm/tlbflush.h>
35 36
@@ -1230,10 +1231,12 @@ static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
1230} 1231}
1231 1232
1232/* 1233/*
1233 * Scanning pfn is much easier than scanning lru list. 1234 * Scan pfn range [start,end) to find movable/migratable pages (LRU pages
1234 * Scan pfn from start to end and Find LRU page. 1235 * and hugepages). We scan pfn because it's much easier than scanning over
1236 * linked list. This function returns the pfn of the first found movable
1237 * page if it's found, otherwise 0.
1235 */ 1238 */
1236static unsigned long scan_lru_pages(unsigned long start, unsigned long end) 1239static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
1237{ 1240{
1238 unsigned long pfn; 1241 unsigned long pfn;
1239 struct page *page; 1242 struct page *page;
@@ -1242,6 +1245,13 @@ static unsigned long scan_lru_pages(unsigned long start, unsigned long end)
1242 page = pfn_to_page(pfn); 1245 page = pfn_to_page(pfn);
1243 if (PageLRU(page)) 1246 if (PageLRU(page))
1244 return pfn; 1247 return pfn;
1248 if (PageHuge(page)) {
1249 if (is_hugepage_active(page))
1250 return pfn;
1251 else
1252 pfn = round_up(pfn + 1,
1253 1 << compound_order(page)) - 1;
1254 }
1245 } 1255 }
1246 } 1256 }
1247 return 0; 1257 return 0;
@@ -1262,6 +1272,19 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
1262 if (!pfn_valid(pfn)) 1272 if (!pfn_valid(pfn))
1263 continue; 1273 continue;
1264 page = pfn_to_page(pfn); 1274 page = pfn_to_page(pfn);
1275
1276 if (PageHuge(page)) {
1277 struct page *head = compound_head(page);
1278 pfn = page_to_pfn(head) + (1<<compound_order(head)) - 1;
1279 if (compound_order(head) > PFN_SECTION_SHIFT) {
1280 ret = -EBUSY;
1281 break;
1282 }
1283 if (isolate_huge_page(page, &source))
1284 move_pages -= 1 << compound_order(head);
1285 continue;
1286 }
1287
1265 if (!get_page_unless_zero(page)) 1288 if (!get_page_unless_zero(page))
1266 continue; 1289 continue;
1267 /* 1290 /*
@@ -1294,7 +1317,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
1294 } 1317 }
1295 if (!list_empty(&source)) { 1318 if (!list_empty(&source)) {
1296 if (not_managed) { 1319 if (not_managed) {
1297 putback_lru_pages(&source); 1320 putback_movable_pages(&source);
1298 goto out; 1321 goto out;
1299 } 1322 }
1300 1323
@@ -1305,7 +1328,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
1305 ret = migrate_pages(&source, alloc_migrate_target, 0, 1328 ret = migrate_pages(&source, alloc_migrate_target, 0,
1306 MIGRATE_SYNC, MR_MEMORY_HOTPLUG); 1329 MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
1307 if (ret) 1330 if (ret)
1308 putback_lru_pages(&source); 1331 putback_movable_pages(&source);
1309 } 1332 }
1310out: 1333out:
1311 return ret; 1334 return ret;
@@ -1548,8 +1571,8 @@ repeat:
1548 drain_all_pages(); 1571 drain_all_pages();
1549 } 1572 }
1550 1573
1551 pfn = scan_lru_pages(start_pfn, end_pfn); 1574 pfn = scan_movable_pages(start_pfn, end_pfn);
1552 if (pfn) { /* We have page on LRU */ 1575 if (pfn) { /* We have movable pages */
1553 ret = do_migrate_range(pfn, end_pfn); 1576 ret = do_migrate_range(pfn, end_pfn);
1554 if (!ret) { 1577 if (!ret) {
1555 drain = 1; 1578 drain = 1;
@@ -1568,6 +1591,11 @@ repeat:
1568 yield(); 1591 yield();
1569 /* drain pcp pages, this is synchronous. */ 1592 /* drain pcp pages, this is synchronous. */
1570 drain_all_pages(); 1593 drain_all_pages();
1594 /*
1595 * dissolve free hugepages in the memory block before doing offlining
1596 * actually in order to make hugetlbfs's object counting consistent.
1597 */
1598 dissolve_free_huge_pages(start_pfn, end_pfn);
1571 /* check again */ 1599 /* check again */
1572 offlined_pages = check_pages_isolated(start_pfn, end_pfn); 1600 offlined_pages = check_pages_isolated(start_pfn, end_pfn);
1573 if (offlined_pages < 0) { 1601 if (offlined_pages < 0) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7c3f8d7e2d8e..f7cc08dad26a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6008,6 +6008,17 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
6008 continue; 6008 continue;
6009 6009
6010 page = pfn_to_page(check); 6010 page = pfn_to_page(check);
6011
6012 /*
6013 * Hugepages are not in LRU lists, but they're movable.
6014 * We need not scan over tail pages bacause we don't
6015 * handle each tail page individually in migration.
6016 */
6017 if (PageHuge(page)) {
6018 iter = round_up(iter + 1, 1<<compound_order(page)) - 1;
6019 continue;
6020 }
6021
6011 /* 6022 /*
6012 * We can't use page_count without pin a page 6023 * We can't use page_count without pin a page
6013 * because another CPU can free compound page. 6024 * because another CPU can free compound page.
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 0cee10ffb98d..d1473b2e9481 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -6,6 +6,7 @@
6#include <linux/page-isolation.h> 6#include <linux/page-isolation.h>
7#include <linux/pageblock-flags.h> 7#include <linux/pageblock-flags.h>
8#include <linux/memory.h> 8#include <linux/memory.h>
9#include <linux/hugetlb.h>
9#include "internal.h" 10#include "internal.h"
10 11
11int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages) 12int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages)
@@ -252,6 +253,19 @@ struct page *alloc_migrate_target(struct page *page, unsigned long private,
252{ 253{
253 gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; 254 gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
254 255
256 /*
257 * TODO: allocate a destination hugepage from a nearest neighbor node,
258 * accordance with memory policy of the user process if possible. For
259 * now as a simple work-around, we use the next node for destination.
260 */
261 if (PageHuge(page)) {
262 nodemask_t src = nodemask_of_node(page_to_nid(page));
263 nodemask_t dst;
264 nodes_complement(dst, src);
265 return alloc_huge_page_node(page_hstate(compound_head(page)),
266 next_node(page_to_nid(page), dst));
267 }
268
255 if (PageHighMem(page)) 269 if (PageHighMem(page))
256 gfp_mask |= __GFP_HIGHMEM; 270 gfp_mask |= __GFP_HIGHMEM;
257 271