diff options
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 85 |
1 files changed, 41 insertions, 44 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 55478ab3c83b..f2d19e4fe854 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -632,37 +632,27 @@ release: | |||
632 | static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr) | 632 | static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr) |
633 | { | 633 | { |
634 | const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE); | 634 | const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE); |
635 | gfp_t this_node = 0; | 635 | const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE; |
636 | |||
637 | #ifdef CONFIG_NUMA | ||
638 | struct mempolicy *pol; | ||
639 | /* | ||
640 | * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not | ||
641 | * specified, to express a general desire to stay on the current | ||
642 | * node for optimistic allocation attempts. If the defrag mode | ||
643 | * and/or madvise hint requires the direct reclaim then we prefer | ||
644 | * to fallback to other node rather than node reclaim because that | ||
645 | * can lead to excessive reclaim even though there is free memory | ||
646 | * on other nodes. We expect that NUMA preferences are specified | ||
647 | * by memory policies. | ||
648 | */ | ||
649 | pol = get_vma_policy(vma, addr); | ||
650 | if (pol->mode != MPOL_BIND) | ||
651 | this_node = __GFP_THISNODE; | ||
652 | mpol_cond_put(pol); | ||
653 | #endif | ||
654 | 636 | ||
637 | /* Always do synchronous compaction */ | ||
655 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) | 638 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) |
656 | return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY); | 639 | return GFP_TRANSHUGE | __GFP_THISNODE | |
640 | (vma_madvised ? 0 : __GFP_NORETRY); | ||
641 | |||
642 | /* Kick kcompactd and fail quickly */ | ||
657 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) | 643 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) |
658 | return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node; | 644 | return gfp_mask | __GFP_KSWAPD_RECLAIM; |
645 | |||
646 | /* Synchronous compaction if madvised, otherwise kick kcompactd */ | ||
659 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags)) | 647 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags)) |
660 | return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : | 648 | return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : |
661 | __GFP_KSWAPD_RECLAIM | this_node); | 649 | __GFP_KSWAPD_RECLAIM); |
650 | |||
651 | /* Only do synchronous compaction if madvised */ | ||
662 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags)) | 652 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags)) |
663 | return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : | 653 | return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0); |
664 | this_node); | 654 | |
665 | return GFP_TRANSHUGE_LIGHT | this_node; | 655 | return gfp_mask; |
666 | } | 656 | } |
667 | 657 | ||
668 | /* Caller must hold page table lock. */ | 658 | /* Caller must hold page table lock. */ |
@@ -2350,7 +2340,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, | |||
2350 | } | 2340 | } |
2351 | } | 2341 | } |
2352 | 2342 | ||
2353 | static void freeze_page(struct page *page) | 2343 | static void unmap_page(struct page *page) |
2354 | { | 2344 | { |
2355 | enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS | | 2345 | enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS | |
2356 | TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD; | 2346 | TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD; |
@@ -2365,7 +2355,7 @@ static void freeze_page(struct page *page) | |||
2365 | VM_BUG_ON_PAGE(!unmap_success, page); | 2355 | VM_BUG_ON_PAGE(!unmap_success, page); |
2366 | } | 2356 | } |
2367 | 2357 | ||
2368 | static void unfreeze_page(struct page *page) | 2358 | static void remap_page(struct page *page) |
2369 | { | 2359 | { |
2370 | int i; | 2360 | int i; |
2371 | if (PageTransHuge(page)) { | 2361 | if (PageTransHuge(page)) { |
@@ -2402,6 +2392,12 @@ static void __split_huge_page_tail(struct page *head, int tail, | |||
2402 | (1L << PG_unevictable) | | 2392 | (1L << PG_unevictable) | |
2403 | (1L << PG_dirty))); | 2393 | (1L << PG_dirty))); |
2404 | 2394 | ||
2395 | /* ->mapping in first tail page is compound_mapcount */ | ||
2396 | VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, | ||
2397 | page_tail); | ||
2398 | page_tail->mapping = head->mapping; | ||
2399 | page_tail->index = head->index + tail; | ||
2400 | |||
2405 | /* Page flags must be visible before we make the page non-compound. */ | 2401 | /* Page flags must be visible before we make the page non-compound. */ |
2406 | smp_wmb(); | 2402 | smp_wmb(); |
2407 | 2403 | ||
@@ -2422,12 +2418,6 @@ static void __split_huge_page_tail(struct page *head, int tail, | |||
2422 | if (page_is_idle(head)) | 2418 | if (page_is_idle(head)) |
2423 | set_page_idle(page_tail); | 2419 | set_page_idle(page_tail); |
2424 | 2420 | ||
2425 | /* ->mapping in first tail page is compound_mapcount */ | ||
2426 | VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, | ||
2427 | page_tail); | ||
2428 | page_tail->mapping = head->mapping; | ||
2429 | |||
2430 | page_tail->index = head->index + tail; | ||
2431 | page_cpupid_xchg_last(page_tail, page_cpupid_last(head)); | 2421 | page_cpupid_xchg_last(page_tail, page_cpupid_last(head)); |
2432 | 2422 | ||
2433 | /* | 2423 | /* |
@@ -2439,12 +2429,11 @@ static void __split_huge_page_tail(struct page *head, int tail, | |||
2439 | } | 2429 | } |
2440 | 2430 | ||
2441 | static void __split_huge_page(struct page *page, struct list_head *list, | 2431 | static void __split_huge_page(struct page *page, struct list_head *list, |
2442 | unsigned long flags) | 2432 | pgoff_t end, unsigned long flags) |
2443 | { | 2433 | { |
2444 | struct page *head = compound_head(page); | 2434 | struct page *head = compound_head(page); |
2445 | struct zone *zone = page_zone(head); | 2435 | struct zone *zone = page_zone(head); |
2446 | struct lruvec *lruvec; | 2436 | struct lruvec *lruvec; |
2447 | pgoff_t end = -1; | ||
2448 | int i; | 2437 | int i; |
2449 | 2438 | ||
2450 | lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat); | 2439 | lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat); |
@@ -2452,9 +2441,6 @@ static void __split_huge_page(struct page *page, struct list_head *list, | |||
2452 | /* complete memcg works before add pages to LRU */ | 2441 | /* complete memcg works before add pages to LRU */ |
2453 | mem_cgroup_split_huge_fixup(head); | 2442 | mem_cgroup_split_huge_fixup(head); |
2454 | 2443 | ||
2455 | if (!PageAnon(page)) | ||
2456 | end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE); | ||
2457 | |||
2458 | for (i = HPAGE_PMD_NR - 1; i >= 1; i--) { | 2444 | for (i = HPAGE_PMD_NR - 1; i >= 1; i--) { |
2459 | __split_huge_page_tail(head, i, lruvec, list); | 2445 | __split_huge_page_tail(head, i, lruvec, list); |
2460 | /* Some pages can be beyond i_size: drop them from page cache */ | 2446 | /* Some pages can be beyond i_size: drop them from page cache */ |
@@ -2483,7 +2469,7 @@ static void __split_huge_page(struct page *page, struct list_head *list, | |||
2483 | 2469 | ||
2484 | spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags); | 2470 | spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags); |
2485 | 2471 | ||
2486 | unfreeze_page(head); | 2472 | remap_page(head); |
2487 | 2473 | ||
2488 | for (i = 0; i < HPAGE_PMD_NR; i++) { | 2474 | for (i = 0; i < HPAGE_PMD_NR; i++) { |
2489 | struct page *subpage = head + i; | 2475 | struct page *subpage = head + i; |
@@ -2626,6 +2612,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) | |||
2626 | int count, mapcount, extra_pins, ret; | 2612 | int count, mapcount, extra_pins, ret; |
2627 | bool mlocked; | 2613 | bool mlocked; |
2628 | unsigned long flags; | 2614 | unsigned long flags; |
2615 | pgoff_t end; | ||
2629 | 2616 | ||
2630 | VM_BUG_ON_PAGE(is_huge_zero_page(page), page); | 2617 | VM_BUG_ON_PAGE(is_huge_zero_page(page), page); |
2631 | VM_BUG_ON_PAGE(!PageLocked(page), page); | 2618 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
@@ -2648,6 +2635,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) | |||
2648 | ret = -EBUSY; | 2635 | ret = -EBUSY; |
2649 | goto out; | 2636 | goto out; |
2650 | } | 2637 | } |
2638 | end = -1; | ||
2651 | mapping = NULL; | 2639 | mapping = NULL; |
2652 | anon_vma_lock_write(anon_vma); | 2640 | anon_vma_lock_write(anon_vma); |
2653 | } else { | 2641 | } else { |
@@ -2661,10 +2649,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) | |||
2661 | 2649 | ||
2662 | anon_vma = NULL; | 2650 | anon_vma = NULL; |
2663 | i_mmap_lock_read(mapping); | 2651 | i_mmap_lock_read(mapping); |
2652 | |||
2653 | /* | ||
2654 | *__split_huge_page() may need to trim off pages beyond EOF: | ||
2655 | * but on 32-bit, i_size_read() takes an irq-unsafe seqlock, | ||
2656 | * which cannot be nested inside the page tree lock. So note | ||
2657 | * end now: i_size itself may be changed at any moment, but | ||
2658 | * head page lock is good enough to serialize the trimming. | ||
2659 | */ | ||
2660 | end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); | ||
2664 | } | 2661 | } |
2665 | 2662 | ||
2666 | /* | 2663 | /* |
2667 | * Racy check if we can split the page, before freeze_page() will | 2664 | * Racy check if we can split the page, before unmap_page() will |
2668 | * split PMDs | 2665 | * split PMDs |
2669 | */ | 2666 | */ |
2670 | if (!can_split_huge_page(head, &extra_pins)) { | 2667 | if (!can_split_huge_page(head, &extra_pins)) { |
@@ -2673,7 +2670,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) | |||
2673 | } | 2670 | } |
2674 | 2671 | ||
2675 | mlocked = PageMlocked(page); | 2672 | mlocked = PageMlocked(page); |
2676 | freeze_page(head); | 2673 | unmap_page(head); |
2677 | VM_BUG_ON_PAGE(compound_mapcount(head), head); | 2674 | VM_BUG_ON_PAGE(compound_mapcount(head), head); |
2678 | 2675 | ||
2679 | /* Make sure the page is not on per-CPU pagevec as it takes pin */ | 2676 | /* Make sure the page is not on per-CPU pagevec as it takes pin */ |
@@ -2707,7 +2704,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) | |||
2707 | if (mapping) | 2704 | if (mapping) |
2708 | __dec_node_page_state(page, NR_SHMEM_THPS); | 2705 | __dec_node_page_state(page, NR_SHMEM_THPS); |
2709 | spin_unlock(&pgdata->split_queue_lock); | 2706 | spin_unlock(&pgdata->split_queue_lock); |
2710 | __split_huge_page(page, list, flags); | 2707 | __split_huge_page(page, list, end, flags); |
2711 | if (PageSwapCache(head)) { | 2708 | if (PageSwapCache(head)) { |
2712 | swp_entry_t entry = { .val = page_private(head) }; | 2709 | swp_entry_t entry = { .val = page_private(head) }; |
2713 | 2710 | ||
@@ -2727,7 +2724,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) | |||
2727 | fail: if (mapping) | 2724 | fail: if (mapping) |
2728 | xa_unlock(&mapping->i_pages); | 2725 | xa_unlock(&mapping->i_pages); |
2729 | spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags); | 2726 | spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags); |
2730 | unfreeze_page(head); | 2727 | remap_page(head); |
2731 | ret = -EBUSY; | 2728 | ret = -EBUSY; |
2732 | } | 2729 | } |
2733 | 2730 | ||