mm, x86: add support for PUD-sized transparent hugepages

The current transparent hugepage code only supports PMDs. This patch adds support for transparent use of PUDs with DAX. It does not include support for anonymous pages. x86 support code also added. Most of this patch simply parallels the work that was done for huge PMDs. The only major difference is how the new ->pud_entry method in mm_walk works. The ->pmd_entry method replaces the ->pte_entry method, whereas the ->pud_entry method works along with either ->pmd_entry or ->pte_entry. The pagewalk code takes care of locking the PUD before calling ->pud_walk, so handlers do not need to worry whether the PUD is stable. [dave.jiang@intel.com: fix SMP x86 32bit build for native_pud_clear()] Link: http://lkml.kernel.org/r/148719066814.31111.3239231168815337012.stgit@djiang5-desk3.ch.intel.com [dave.jiang@intel.com: native_pud_clear missing on i386 build] Link: http://lkml.kernel.org/r/148640375195.69754.3315433724330910314.stgit@djiang5-desk3.ch.intel.com Link: http://lkml.kernel.org/r/148545059381.17912.8602162635537598445.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com> Signed-off-by: Dave Jiang <dave.jiang@intel.com> Tested-by: Alexander Kapshuk <alexander.kapshuk@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Jan Kara <jack@suse.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Nilesh Choudhury <nilesh.choudhury@oracle.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Matthew Wilcox <willy@linux.intel.com> 2017-02-24 17:57:02 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2017-02-24 20:46:54 -0500
commit: a00cc7d9dd93d66a3fb83fc52aa57a4bec51c517 (patch)
tree: 54d78e89c63e519cb9e00fdab9efbf3189ef2f5e /mm/huge_memory.c
parent: a2d581675d485eb7188f521f36efc114639a3096 (diff)
1 files changed, 249 insertions, 0 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f9ecc2aeadfc..85742ac5b32e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -757,6 +757,60 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 }
 EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma)
+{
+        if (likely(vma->vm_flags & VM_WRITE))
+                pud = pud_mkwrite(pud);
+        return pud;
+}
+static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
+                pud_t *pud, pfn_t pfn, pgprot_t prot, bool write)
+{
+        struct mm_struct *mm = vma->vm_mm;
+        pud_t entry;
+        spinlock_t *ptl;
+        ptl = pud_lock(mm, pud);
+        entry = pud_mkhuge(pfn_t_pud(pfn, prot));
+        if (pfn_t_devmap(pfn))
+                entry = pud_mkdevmap(entry);
+        if (write) {
+                entry = pud_mkyoung(pud_mkdirty(entry));
+                entry = maybe_pud_mkwrite(entry, vma);
+        }
+        set_pud_at(mm, addr, pud, entry);
+        update_mmu_cache_pud(vma, addr, pud);
+        spin_unlock(ptl);
+}
+int vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
+                        pud_t *pud, pfn_t pfn, bool write)
+{
+        pgprot_t pgprot = vma->vm_page_prot;
+        /*
+         * If we had pud_special, we could avoid all these restrictions,
+         * but we need to be consistent with PTEs and architectures that
+         * can't support a 'special' bit.
+         */
+        BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));
+        BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) ==
+                                                (VM_PFNMAP|VM_MIXEDMAP));
+        BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));
+        BUG_ON(!pfn_t_devmap(pfn));
+        if (addr < vma->vm_start || addr >= vma->vm_end)
+                return VM_FAULT_SIGBUS;
+        track_pfn_insert(vma, &pgprot, pfn);
+        insert_pfn_pud(vma, addr, pud, pfn, pgprot, write);
+        return VM_FAULT_NOPAGE;
+}
+EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud);
+#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
 static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
                pmd_t *pmd)
 {
@@ -887,6 +941,123 @@ out:
        return ret;
 }
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static void touch_pud(struct vm_area_struct *vma, unsigned long addr,
+                pud_t *pud)
+{
+        pud_t _pud;
+        /*
+         * We should set the dirty bit only for FOLL_WRITE but for now
+         * the dirty bit in the pud is meaningless.  And if the dirty
+         * bit will become meaningful and we'll only set it with
+         * FOLL_WRITE, an atomic set_bit will be required on the pud to
+         * set the young bit, instead of the current set_pud_at.
+         */
+        _pud = pud_mkyoung(pud_mkdirty(*pud));
+        if (pudp_set_access_flags(vma, addr & HPAGE_PUD_MASK,
+                                pud, _pud,  1))
+                update_mmu_cache_pud(vma, addr, pud);
+}
+struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
+                pud_t *pud, int flags)
+{
+        unsigned long pfn = pud_pfn(*pud);
+        struct mm_struct *mm = vma->vm_mm;
+        struct dev_pagemap *pgmap;
+        struct page *page;
+        assert_spin_locked(pud_lockptr(mm, pud));
+        if (flags & FOLL_WRITE && !pud_write(*pud))
+                return NULL;
+        if (pud_present(*pud) && pud_devmap(*pud))
+                /* pass */;
+        else
+                return NULL;
+        if (flags & FOLL_TOUCH)
+                touch_pud(vma, addr, pud);
+        /*
+         * device mapped pages can only be returned if the
+         * caller will manage the page reference count.
+         */
+        if (!(flags & FOLL_GET))
+                return ERR_PTR(-EEXIST);
+        pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT;
+        pgmap = get_dev_pagemap(pfn, NULL);
+        if (!pgmap)
+                return ERR_PTR(-EFAULT);
+        page = pfn_to_page(pfn);
+        get_page(page);
+        put_dev_pagemap(pgmap);
+        return page;
+}
+int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+                  pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
+                  struct vm_area_struct *vma)
+{
+        spinlock_t *dst_ptl, *src_ptl;
+        pud_t pud;
+        int ret;
+        dst_ptl = pud_lock(dst_mm, dst_pud);
+        src_ptl = pud_lockptr(src_mm, src_pud);
+        spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
+        ret = -EAGAIN;
+        pud = *src_pud;
+        if (unlikely(!pud_trans_huge(pud) && !pud_devmap(pud)))
+                goto out_unlock;
+        /*
+         * When page table lock is held, the huge zero pud should not be
+         * under splitting since we don't split the page itself, only pud to
+         * a page table.
+         */
+        if (is_huge_zero_pud(pud)) {
+                /* No huge zero pud yet */
+        }
+        pudp_set_wrprotect(src_mm, addr, src_pud);
+        pud = pud_mkold(pud_wrprotect(pud));
+        set_pud_at(dst_mm, addr, dst_pud, pud);
+        ret = 0;
+out_unlock:
+        spin_unlock(src_ptl);
+        spin_unlock(dst_ptl);
+        return ret;
+}
+void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
+{
+        pud_t entry;
+        unsigned long haddr;
+        bool write = vmf->flags & FAULT_FLAG_WRITE;
+        vmf->ptl = pud_lock(vmf->vma->vm_mm, vmf->pud);
+        if (unlikely(!pud_same(*vmf->pud, orig_pud)))
+                goto unlock;
+        entry = pud_mkyoung(orig_pud);
+        if (write)
+                entry = pud_mkdirty(entry);
+        haddr = vmf->address & HPAGE_PUD_MASK;
+        if (pudp_set_access_flags(vmf->vma, haddr, vmf->pud, entry, write))
+                update_mmu_cache_pud(vmf->vma, vmf->address, vmf->pud);
+unlock:
+        spin_unlock(vmf->ptl);
+}
+#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
 void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd)
 {
        pmd_t entry;
@@ -1601,6 +1772,84 @@ spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
        return NULL;
 }
+/*
+ * Returns true if a given pud maps a thp, false otherwise.
+ *
+ * Note that if it returns true, this routine returns without unlocking page
+ * table lock. So callers must unlock it.
+ */
+spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma)
+{
+        spinlock_t *ptl;
+        ptl = pud_lock(vma->vm_mm, pud);
+        if (likely(pud_trans_huge(*pud) || pud_devmap(*pud)))
+                return ptl;
+        spin_unlock(ptl);
+        return NULL;
+}
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
+                 pud_t *pud, unsigned long addr)
+{
+        pud_t orig_pud;
+        spinlock_t *ptl;
+        ptl = __pud_trans_huge_lock(pud, vma);
+        if (!ptl)
+                return 0;
+        /*
+         * For architectures like ppc64 we look at deposited pgtable
+         * when calling pudp_huge_get_and_clear. So do the
+         * pgtable_trans_huge_withdraw after finishing pudp related
+         * operations.
+         */
+        orig_pud = pudp_huge_get_and_clear_full(tlb->mm, addr, pud,
+                        tlb->fullmm);
+        tlb_remove_pud_tlb_entry(tlb, pud, addr);
+        if (vma_is_dax(vma)) {
+                spin_unlock(ptl);
+                /* No zero page support yet */
+        } else {
+                /* No support for anonymous PUD pages yet */
+                BUG();
+        }
+        return 1;
+}
+static void __split_huge_pud_locked(struct vm_area_struct *vma, pud_t *pud,
+                unsigned long haddr)
+{
+        VM_BUG_ON(haddr & ~HPAGE_PUD_MASK);
+        VM_BUG_ON_VMA(vma->vm_start > haddr, vma);
+        VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PUD_SIZE, vma);
+        VM_BUG_ON(!pud_trans_huge(*pud) && !pud_devmap(*pud));
+        count_vm_event(THP_SPLIT_PMD);
+        pudp_huge_clear_flush_notify(vma, haddr, pud);
+}
+void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
+                unsigned long address)
+{
+        spinlock_t *ptl;
+        struct mm_struct *mm = vma->vm_mm;
+        unsigned long haddr = address & HPAGE_PUD_MASK;
+        mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PUD_SIZE);
+        ptl = pud_lock(mm, pud);
+        if (unlikely(!pud_trans_huge(*pud) && !pud_devmap(*pud)))
+                goto out;
+        __split_huge_pud_locked(vma, pud, haddr);
+out:
+        spin_unlock(ptl);
+        mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PUD_SIZE);
+}
+#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
 static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
                unsigned long haddr, pmd_t *pmd)
 {
author	Matthew Wilcox <willy@linux.intel.com>	2017-02-24 17:57:02 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2017-02-24 20:46:54 -0500
commit	a00cc7d9dd93d66a3fb83fc52aa57a4bec51c517 (patch)
tree	54d78e89c63e519cb9e00fdab9efbf3189ef2f5e /mm/huge_memory.c
parent	a2d581675d485eb7188f521f36efc114639a3096 (diff)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f9ecc2aeadfc..85742ac5b32e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c
@@ -757,6 +757,60 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
757	}	757	}
758	EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);	758	EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
759		759
		760	#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
		761	static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma)
		762	{
		763	if (likely(vma->vm_flags & VM_WRITE))
		764	pud = pud_mkwrite(pud);
		765	return pud;
		766	}
		767
		768	static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
		769	pud_t *pud, pfn_t pfn, pgprot_t prot, bool write)
		770	{
		771	struct mm_struct *mm = vma->vm_mm;
		772	pud_t entry;
		773	spinlock_t *ptl;
		774
		775	ptl = pud_lock(mm, pud);
		776	entry = pud_mkhuge(pfn_t_pud(pfn, prot));
		777	if (pfn_t_devmap(pfn))
		778	entry = pud_mkdevmap(entry);
		779	if (write) {
		780	entry = pud_mkyoung(pud_mkdirty(entry));
		781	entry = maybe_pud_mkwrite(entry, vma);
		782	}
		783	set_pud_at(mm, addr, pud, entry);
		784	update_mmu_cache_pud(vma, addr, pud);
		785	spin_unlock(ptl);
		786	}
		787
		788	int vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
		789	pud_t *pud, pfn_t pfn, bool write)
		790	{
		791	pgprot_t pgprot = vma->vm_page_prot;
		792	/*
		793	* If we had pud_special, we could avoid all these restrictions,
		794	* but we need to be consistent with PTEs and architectures that
		795	* can't support a 'special' bit.
		796	*/
		797	BUG_ON(!(vma->vm_flags & (VM_PFNMAP\|VM_MIXEDMAP)));
		798	BUG_ON((vma->vm_flags & (VM_PFNMAP\|VM_MIXEDMAP)) ==
		799	(VM_PFNMAP\|VM_MIXEDMAP));
		800	BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));
		801	BUG_ON(!pfn_t_devmap(pfn));
		802
		803	if (addr < vma->vm_start \|\| addr >= vma->vm_end)
		804	return VM_FAULT_SIGBUS;
		805
		806	track_pfn_insert(vma, &pgprot, pfn);
		807
		808	insert_pfn_pud(vma, addr, pud, pfn, pgprot, write);
		809	return VM_FAULT_NOPAGE;
		810	}
		811	EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud);
		812	#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
		813
760	static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,	814	static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
761	pmd_t *pmd)	815	pmd_t *pmd)
762	{	816	{
@@ -887,6 +941,123 @@ out:
887	return ret;	941	return ret;
888	}	942	}
889		943
		944	#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
		945	static void touch_pud(struct vm_area_struct *vma, unsigned long addr,
		946	pud_t *pud)
		947	{
		948	pud_t _pud;
		949
		950	/*
		951	* We should set the dirty bit only for FOLL_WRITE but for now
		952	* the dirty bit in the pud is meaningless. And if the dirty
		953	* bit will become meaningful and we'll only set it with
		954	* FOLL_WRITE, an atomic set_bit will be required on the pud to
		955	* set the young bit, instead of the current set_pud_at.
		956	*/
		957	_pud = pud_mkyoung(pud_mkdirty(*pud));
		958	if (pudp_set_access_flags(vma, addr & HPAGE_PUD_MASK,
		959	pud, _pud, 1))
		960	update_mmu_cache_pud(vma, addr, pud);
		961	}
		962
		963	struct page follow_devmap_pud(struct vm_area_struct vma, unsigned long addr,
		964	pud_t *pud, int flags)
		965	{
		966	unsigned long pfn = pud_pfn(*pud);
		967	struct mm_struct *mm = vma->vm_mm;
		968	struct dev_pagemap *pgmap;
		969	struct page *page;
		970
		971	assert_spin_locked(pud_lockptr(mm, pud));
		972
		973	if (flags & FOLL_WRITE && !pud_write(*pud))
		974	return NULL;
		975
		976	if (pud_present(pud) && pud_devmap(pud))
		977	/* pass */;
		978	else
		979	return NULL;
		980
		981	if (flags & FOLL_TOUCH)
		982	touch_pud(vma, addr, pud);
		983
		984	/*
		985	* device mapped pages can only be returned if the
		986	* caller will manage the page reference count.
		987	*/
		988	if (!(flags & FOLL_GET))
		989	return ERR_PTR(-EEXIST);
		990
		991	pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT;
		992	pgmap = get_dev_pagemap(pfn, NULL);
		993	if (!pgmap)
		994	return ERR_PTR(-EFAULT);
		995	page = pfn_to_page(pfn);
		996	get_page(page);
		997	put_dev_pagemap(pgmap);
		998
		999	return page;
		1000	}
		1001
		1002	int copy_huge_pud(struct mm_struct dst_mm, struct mm_struct src_mm,
		1003	pud_t dst_pud, pud_t src_pud, unsigned long addr,
		1004	struct vm_area_struct *vma)
		1005	{
		1006	spinlock_t dst_ptl, src_ptl;
		1007	pud_t pud;
		1008	int ret;
		1009
		1010	dst_ptl = pud_lock(dst_mm, dst_pud);
		1011	src_ptl = pud_lockptr(src_mm, src_pud);
		1012	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
		1013
		1014	ret = -EAGAIN;
		1015	pud = *src_pud;
		1016	if (unlikely(!pud_trans_huge(pud) && !pud_devmap(pud)))
		1017	goto out_unlock;
		1018
		1019	/*
		1020	* When page table lock is held, the huge zero pud should not be
		1021	* under splitting since we don't split the page itself, only pud to
		1022	* a page table.
		1023	*/
		1024	if (is_huge_zero_pud(pud)) {
		1025	/* No huge zero pud yet */
		1026	}
		1027
		1028	pudp_set_wrprotect(src_mm, addr, src_pud);
		1029	pud = pud_mkold(pud_wrprotect(pud));
		1030	set_pud_at(dst_mm, addr, dst_pud, pud);
		1031
		1032	ret = 0;
		1033	out_unlock:
		1034	spin_unlock(src_ptl);
		1035	spin_unlock(dst_ptl);
		1036	return ret;
		1037	}
		1038
		1039	void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
		1040	{
		1041	pud_t entry;
		1042	unsigned long haddr;
		1043	bool write = vmf->flags & FAULT_FLAG_WRITE;
		1044
		1045	vmf->ptl = pud_lock(vmf->vma->vm_mm, vmf->pud);
		1046	if (unlikely(!pud_same(*vmf->pud, orig_pud)))
		1047	goto unlock;
		1048
		1049	entry = pud_mkyoung(orig_pud);
		1050	if (write)
		1051	entry = pud_mkdirty(entry);
		1052	haddr = vmf->address & HPAGE_PUD_MASK;
		1053	if (pudp_set_access_flags(vmf->vma, haddr, vmf->pud, entry, write))
		1054	update_mmu_cache_pud(vmf->vma, vmf->address, vmf->pud);
		1055
		1056	unlock:
		1057	spin_unlock(vmf->ptl);
		1058	}
		1059	#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
		1060
890	void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd)	1061	void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd)
891	{	1062	{
892	pmd_t entry;	1063	pmd_t entry;
@@ -1601,6 +1772,84 @@ spinlock_t __pmd_trans_huge_lock(pmd_t pmd, struct vm_area_struct *vma)
1601	return NULL;	1772	return NULL;
1602	}	1773	}
1603		1774
		1775	/*
		1776	* Returns true if a given pud maps a thp, false otherwise.
		1777	*
		1778	* Note that if it returns true, this routine returns without unlocking page
		1779	* table lock. So callers must unlock it.
		1780	*/
		1781	spinlock_t __pud_trans_huge_lock(pud_t pud, struct vm_area_struct *vma)
		1782	{
		1783	spinlock_t *ptl;
		1784
		1785	ptl = pud_lock(vma->vm_mm, pud);
		1786	if (likely(pud_trans_huge(pud) \|\| pud_devmap(pud)))
		1787	return ptl;
		1788	spin_unlock(ptl);
		1789	return NULL;
		1790	}
		1791
		1792	#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
		1793	int zap_huge_pud(struct mmu_gather tlb, struct vm_area_struct vma,
		1794	pud_t *pud, unsigned long addr)
		1795	{
		1796	pud_t orig_pud;
		1797	spinlock_t *ptl;
		1798
		1799	ptl = __pud_trans_huge_lock(pud, vma);
		1800	if (!ptl)
		1801	return 0;
		1802	/*
		1803	* For architectures like ppc64 we look at deposited pgtable
		1804	* when calling pudp_huge_get_and_clear. So do the
		1805	* pgtable_trans_huge_withdraw after finishing pudp related
		1806	* operations.
		1807	*/
		1808	orig_pud = pudp_huge_get_and_clear_full(tlb->mm, addr, pud,
		1809	tlb->fullmm);
		1810	tlb_remove_pud_tlb_entry(tlb, pud, addr);
		1811	if (vma_is_dax(vma)) {
		1812	spin_unlock(ptl);
		1813	/* No zero page support yet */
		1814	} else {
		1815	/* No support for anonymous PUD pages yet */
		1816	BUG();
		1817	}
		1818	return 1;
		1819	}
		1820
		1821	static void __split_huge_pud_locked(struct vm_area_struct vma, pud_t pud,
		1822	unsigned long haddr)
		1823	{
		1824	VM_BUG_ON(haddr & ~HPAGE_PUD_MASK);
		1825	VM_BUG_ON_VMA(vma->vm_start > haddr, vma);
		1826	VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PUD_SIZE, vma);
		1827	VM_BUG_ON(!pud_trans_huge(pud) && !pud_devmap(pud));
		1828
		1829	count_vm_event(THP_SPLIT_PMD);
		1830
		1831	pudp_huge_clear_flush_notify(vma, haddr, pud);
		1832	}
		1833
		1834	void __split_huge_pud(struct vm_area_struct vma, pud_t pud,
		1835	unsigned long address)
		1836	{
		1837	spinlock_t *ptl;
		1838	struct mm_struct *mm = vma->vm_mm;
		1839	unsigned long haddr = address & HPAGE_PUD_MASK;
		1840
		1841	mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PUD_SIZE);
		1842	ptl = pud_lock(mm, pud);
		1843	if (unlikely(!pud_trans_huge(pud) && !pud_devmap(pud)))
		1844	goto out;
		1845	__split_huge_pud_locked(vma, pud, haddr);
		1846
		1847	out:
		1848	spin_unlock(ptl);
		1849	mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PUD_SIZE);
		1850	}
		1851	#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
		1852
1604	static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,	1853	static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
1605	unsigned long haddr, pmd_t *pmd)	1854	unsigned long haddr, pmd_t *pmd)
1606	{	1855	{