aboutsummaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c85
1 files changed, 41 insertions, 44 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 55478ab3c83b..f2d19e4fe854 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -632,37 +632,27 @@ release:
632static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr) 632static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
633{ 633{
634 const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE); 634 const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
635 gfp_t this_node = 0; 635 const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE;
636
637#ifdef CONFIG_NUMA
638 struct mempolicy *pol;
639 /*
640 * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not
641 * specified, to express a general desire to stay on the current
642 * node for optimistic allocation attempts. If the defrag mode
643 * and/or madvise hint requires the direct reclaim then we prefer
644 * to fallback to other node rather than node reclaim because that
645 * can lead to excessive reclaim even though there is free memory
646 * on other nodes. We expect that NUMA preferences are specified
647 * by memory policies.
648 */
649 pol = get_vma_policy(vma, addr);
650 if (pol->mode != MPOL_BIND)
651 this_node = __GFP_THISNODE;
652 mpol_cond_put(pol);
653#endif
654 636
637 /* Always do synchronous compaction */
655 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) 638 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
656 return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY); 639 return GFP_TRANSHUGE | __GFP_THISNODE |
640 (vma_madvised ? 0 : __GFP_NORETRY);
641
642 /* Kick kcompactd and fail quickly */
657 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) 643 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
658 return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node; 644 return gfp_mask | __GFP_KSWAPD_RECLAIM;
645
646 /* Synchronous compaction if madvised, otherwise kick kcompactd */
659 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags)) 647 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
660 return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : 648 return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM :
661 __GFP_KSWAPD_RECLAIM | this_node); 649 __GFP_KSWAPD_RECLAIM);
650
651 /* Only do synchronous compaction if madvised */
662 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags)) 652 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
663 return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : 653 return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
664 this_node); 654
665 return GFP_TRANSHUGE_LIGHT | this_node; 655 return gfp_mask;
666} 656}
667 657
668/* Caller must hold page table lock. */ 658/* Caller must hold page table lock. */
@@ -2350,7 +2340,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
2350 } 2340 }
2351} 2341}
2352 2342
2353static void freeze_page(struct page *page) 2343static void unmap_page(struct page *page)
2354{ 2344{
2355 enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS | 2345 enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
2356 TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD; 2346 TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
@@ -2365,7 +2355,7 @@ static void freeze_page(struct page *page)
2365 VM_BUG_ON_PAGE(!unmap_success, page); 2355 VM_BUG_ON_PAGE(!unmap_success, page);
2366} 2356}
2367 2357
2368static void unfreeze_page(struct page *page) 2358static void remap_page(struct page *page)
2369{ 2359{
2370 int i; 2360 int i;
2371 if (PageTransHuge(page)) { 2361 if (PageTransHuge(page)) {
@@ -2402,6 +2392,12 @@ static void __split_huge_page_tail(struct page *head, int tail,
2402 (1L << PG_unevictable) | 2392 (1L << PG_unevictable) |
2403 (1L << PG_dirty))); 2393 (1L << PG_dirty)));
2404 2394
2395 /* ->mapping in first tail page is compound_mapcount */
2396 VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
2397 page_tail);
2398 page_tail->mapping = head->mapping;
2399 page_tail->index = head->index + tail;
2400
2405 /* Page flags must be visible before we make the page non-compound. */ 2401 /* Page flags must be visible before we make the page non-compound. */
2406 smp_wmb(); 2402 smp_wmb();
2407 2403
@@ -2422,12 +2418,6 @@ static void __split_huge_page_tail(struct page *head, int tail,
2422 if (page_is_idle(head)) 2418 if (page_is_idle(head))
2423 set_page_idle(page_tail); 2419 set_page_idle(page_tail);
2424 2420
2425 /* ->mapping in first tail page is compound_mapcount */
2426 VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
2427 page_tail);
2428 page_tail->mapping = head->mapping;
2429
2430 page_tail->index = head->index + tail;
2431 page_cpupid_xchg_last(page_tail, page_cpupid_last(head)); 2421 page_cpupid_xchg_last(page_tail, page_cpupid_last(head));
2432 2422
2433 /* 2423 /*
@@ -2439,12 +2429,11 @@ static void __split_huge_page_tail(struct page *head, int tail,
2439} 2429}
2440 2430
2441static void __split_huge_page(struct page *page, struct list_head *list, 2431static void __split_huge_page(struct page *page, struct list_head *list,
2442 unsigned long flags) 2432 pgoff_t end, unsigned long flags)
2443{ 2433{
2444 struct page *head = compound_head(page); 2434 struct page *head = compound_head(page);
2445 struct zone *zone = page_zone(head); 2435 struct zone *zone = page_zone(head);
2446 struct lruvec *lruvec; 2436 struct lruvec *lruvec;
2447 pgoff_t end = -1;
2448 int i; 2437 int i;
2449 2438
2450 lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat); 2439 lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat);
@@ -2452,9 +2441,6 @@ static void __split_huge_page(struct page *page, struct list_head *list,
2452 /* complete memcg works before add pages to LRU */ 2441 /* complete memcg works before add pages to LRU */
2453 mem_cgroup_split_huge_fixup(head); 2442 mem_cgroup_split_huge_fixup(head);
2454 2443
2455 if (!PageAnon(page))
2456 end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE);
2457
2458 for (i = HPAGE_PMD_NR - 1; i >= 1; i--) { 2444 for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
2459 __split_huge_page_tail(head, i, lruvec, list); 2445 __split_huge_page_tail(head, i, lruvec, list);
2460 /* Some pages can be beyond i_size: drop them from page cache */ 2446 /* Some pages can be beyond i_size: drop them from page cache */
@@ -2483,7 +2469,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
2483 2469
2484 spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags); 2470 spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
2485 2471
2486 unfreeze_page(head); 2472 remap_page(head);
2487 2473
2488 for (i = 0; i < HPAGE_PMD_NR; i++) { 2474 for (i = 0; i < HPAGE_PMD_NR; i++) {
2489 struct page *subpage = head + i; 2475 struct page *subpage = head + i;
@@ -2626,6 +2612,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
2626 int count, mapcount, extra_pins, ret; 2612 int count, mapcount, extra_pins, ret;
2627 bool mlocked; 2613 bool mlocked;
2628 unsigned long flags; 2614 unsigned long flags;
2615 pgoff_t end;
2629 2616
2630 VM_BUG_ON_PAGE(is_huge_zero_page(page), page); 2617 VM_BUG_ON_PAGE(is_huge_zero_page(page), page);
2631 VM_BUG_ON_PAGE(!PageLocked(page), page); 2618 VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -2648,6 +2635,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
2648 ret = -EBUSY; 2635 ret = -EBUSY;
2649 goto out; 2636 goto out;
2650 } 2637 }
2638 end = -1;
2651 mapping = NULL; 2639 mapping = NULL;
2652 anon_vma_lock_write(anon_vma); 2640 anon_vma_lock_write(anon_vma);
2653 } else { 2641 } else {
@@ -2661,10 +2649,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
2661 2649
2662 anon_vma = NULL; 2650 anon_vma = NULL;
2663 i_mmap_lock_read(mapping); 2651 i_mmap_lock_read(mapping);
2652
2653 /*
2654 *__split_huge_page() may need to trim off pages beyond EOF:
2655 * but on 32-bit, i_size_read() takes an irq-unsafe seqlock,
2656 * which cannot be nested inside the page tree lock. So note
2657 * end now: i_size itself may be changed at any moment, but
2658 * head page lock is good enough to serialize the trimming.
2659 */
2660 end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
2664 } 2661 }
2665 2662
2666 /* 2663 /*
2667 * Racy check if we can split the page, before freeze_page() will 2664 * Racy check if we can split the page, before unmap_page() will
2668 * split PMDs 2665 * split PMDs
2669 */ 2666 */
2670 if (!can_split_huge_page(head, &extra_pins)) { 2667 if (!can_split_huge_page(head, &extra_pins)) {
@@ -2673,7 +2670,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
2673 } 2670 }
2674 2671
2675 mlocked = PageMlocked(page); 2672 mlocked = PageMlocked(page);
2676 freeze_page(head); 2673 unmap_page(head);
2677 VM_BUG_ON_PAGE(compound_mapcount(head), head); 2674 VM_BUG_ON_PAGE(compound_mapcount(head), head);
2678 2675
2679 /* Make sure the page is not on per-CPU pagevec as it takes pin */ 2676 /* Make sure the page is not on per-CPU pagevec as it takes pin */
@@ -2707,7 +2704,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
2707 if (mapping) 2704 if (mapping)
2708 __dec_node_page_state(page, NR_SHMEM_THPS); 2705 __dec_node_page_state(page, NR_SHMEM_THPS);
2709 spin_unlock(&pgdata->split_queue_lock); 2706 spin_unlock(&pgdata->split_queue_lock);
2710 __split_huge_page(page, list, flags); 2707 __split_huge_page(page, list, end, flags);
2711 if (PageSwapCache(head)) { 2708 if (PageSwapCache(head)) {
2712 swp_entry_t entry = { .val = page_private(head) }; 2709 swp_entry_t entry = { .val = page_private(head) };
2713 2710
@@ -2727,7 +2724,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
2727fail: if (mapping) 2724fail: if (mapping)
2728 xa_unlock(&mapping->i_pages); 2725 xa_unlock(&mapping->i_pages);
2729 spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags); 2726 spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
2730 unfreeze_page(head); 2727 remap_page(head);
2731 ret = -EBUSY; 2728 ret = -EBUSY;
2732 } 2729 }
2733 2730