diff options
author | Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | 2016-01-15 19:53:53 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-15 20:56:32 -0500 |
commit | eef1b3ba053aa68967d294c80a50c4a26db30f52 (patch) | |
tree | a00e8507f8734e1c94203c1eb69a762fc36a15b1 /mm | |
parent | e81c48024f43b4aabe1ec4709786fa1f96814717 (diff) |
thp: implement split_huge_pmd()
Original split_huge_page() combined two operations: splitting PMDs into
tables of PTEs and splitting underlying compound page. This patch
implements split_huge_pmd() which split given PMD without splitting
other PMDs this page mapped with or underlying compound page.
Without tail page refcounting, implementation of split_huge_pmd() is
pretty straight-forward.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Tested-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Jerome Marchand <jmarchan@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Steve Capper <steve.capper@linaro.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/huge_memory.c | 124 |
1 files changed, 124 insertions, 0 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1588f688b75d..22ab365cce52 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -2666,6 +2666,130 @@ static int khugepaged(void *none) | |||
2666 | return 0; | 2666 | return 0; |
2667 | } | 2667 | } |
2668 | 2668 | ||
2669 | static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, | ||
2670 | unsigned long haddr, pmd_t *pmd) | ||
2671 | { | ||
2672 | struct mm_struct *mm = vma->vm_mm; | ||
2673 | pgtable_t pgtable; | ||
2674 | pmd_t _pmd; | ||
2675 | int i; | ||
2676 | |||
2677 | /* leave pmd empty until pte is filled */ | ||
2678 | pmdp_huge_clear_flush_notify(vma, haddr, pmd); | ||
2679 | |||
2680 | pgtable = pgtable_trans_huge_withdraw(mm, pmd); | ||
2681 | pmd_populate(mm, &_pmd, pgtable); | ||
2682 | |||
2683 | for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { | ||
2684 | pte_t *pte, entry; | ||
2685 | entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot); | ||
2686 | entry = pte_mkspecial(entry); | ||
2687 | pte = pte_offset_map(&_pmd, haddr); | ||
2688 | VM_BUG_ON(!pte_none(*pte)); | ||
2689 | set_pte_at(mm, haddr, pte, entry); | ||
2690 | pte_unmap(pte); | ||
2691 | } | ||
2692 | smp_wmb(); /* make pte visible before pmd */ | ||
2693 | pmd_populate(mm, pmd, pgtable); | ||
2694 | put_huge_zero_page(); | ||
2695 | } | ||
2696 | |||
2697 | static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, | ||
2698 | unsigned long haddr) | ||
2699 | { | ||
2700 | struct mm_struct *mm = vma->vm_mm; | ||
2701 | struct page *page; | ||
2702 | pgtable_t pgtable; | ||
2703 | pmd_t _pmd; | ||
2704 | bool young, write; | ||
2705 | int i; | ||
2706 | |||
2707 | VM_BUG_ON(haddr & ~HPAGE_PMD_MASK); | ||
2708 | VM_BUG_ON_VMA(vma->vm_start > haddr, vma); | ||
2709 | VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma); | ||
2710 | VM_BUG_ON(!pmd_trans_huge(*pmd)); | ||
2711 | |||
2712 | count_vm_event(THP_SPLIT_PMD); | ||
2713 | |||
2714 | if (vma_is_dax(vma)) { | ||
2715 | pmd_t _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); | ||
2716 | if (is_huge_zero_pmd(_pmd)) | ||
2717 | put_huge_zero_page(); | ||
2718 | return; | ||
2719 | } else if (is_huge_zero_pmd(*pmd)) { | ||
2720 | return __split_huge_zero_page_pmd(vma, haddr, pmd); | ||
2721 | } | ||
2722 | |||
2723 | page = pmd_page(*pmd); | ||
2724 | VM_BUG_ON_PAGE(!page_count(page), page); | ||
2725 | atomic_add(HPAGE_PMD_NR - 1, &page->_count); | ||
2726 | write = pmd_write(*pmd); | ||
2727 | young = pmd_young(*pmd); | ||
2728 | |||
2729 | /* leave pmd empty until pte is filled */ | ||
2730 | pmdp_huge_clear_flush_notify(vma, haddr, pmd); | ||
2731 | |||
2732 | pgtable = pgtable_trans_huge_withdraw(mm, pmd); | ||
2733 | pmd_populate(mm, &_pmd, pgtable); | ||
2734 | |||
2735 | for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { | ||
2736 | pte_t entry, *pte; | ||
2737 | /* | ||
2738 | * Note that NUMA hinting access restrictions are not | ||
2739 | * transferred to avoid any possibility of altering | ||
2740 | * permissions across VMAs. | ||
2741 | */ | ||
2742 | entry = mk_pte(page + i, vma->vm_page_prot); | ||
2743 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | ||
2744 | if (!write) | ||
2745 | entry = pte_wrprotect(entry); | ||
2746 | if (!young) | ||
2747 | entry = pte_mkold(entry); | ||
2748 | pte = pte_offset_map(&_pmd, haddr); | ||
2749 | BUG_ON(!pte_none(*pte)); | ||
2750 | set_pte_at(mm, haddr, pte, entry); | ||
2751 | atomic_inc(&page[i]._mapcount); | ||
2752 | pte_unmap(pte); | ||
2753 | } | ||
2754 | |||
2755 | /* | ||
2756 | * Set PG_double_map before dropping compound_mapcount to avoid | ||
2757 | * false-negative page_mapped(). | ||
2758 | */ | ||
2759 | if (compound_mapcount(page) > 1 && !TestSetPageDoubleMap(page)) { | ||
2760 | for (i = 0; i < HPAGE_PMD_NR; i++) | ||
2761 | atomic_inc(&page[i]._mapcount); | ||
2762 | } | ||
2763 | |||
2764 | if (atomic_add_negative(-1, compound_mapcount_ptr(page))) { | ||
2765 | /* Last compound_mapcount is gone. */ | ||
2766 | __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); | ||
2767 | if (TestClearPageDoubleMap(page)) { | ||
2768 | /* No need in mapcount reference anymore */ | ||
2769 | for (i = 0; i < HPAGE_PMD_NR; i++) | ||
2770 | atomic_dec(&page[i]._mapcount); | ||
2771 | } | ||
2772 | } | ||
2773 | |||
2774 | smp_wmb(); /* make pte visible before pmd */ | ||
2775 | pmd_populate(mm, pmd, pgtable); | ||
2776 | } | ||
2777 | |||
2778 | void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | ||
2779 | unsigned long address) | ||
2780 | { | ||
2781 | spinlock_t *ptl; | ||
2782 | struct mm_struct *mm = vma->vm_mm; | ||
2783 | unsigned long haddr = address & HPAGE_PMD_MASK; | ||
2784 | |||
2785 | mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PMD_SIZE); | ||
2786 | ptl = pmd_lock(mm, pmd); | ||
2787 | if (likely(pmd_trans_huge(*pmd))) | ||
2788 | __split_huge_pmd_locked(vma, pmd, haddr); | ||
2789 | spin_unlock(ptl); | ||
2790 | mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PMD_SIZE); | ||
2791 | } | ||
2792 | |||
2669 | static void split_huge_pmd_address(struct vm_area_struct *vma, | 2793 | static void split_huge_pmd_address(struct vm_area_struct *vma, |
2670 | unsigned long address) | 2794 | unsigned long address) |
2671 | { | 2795 | { |