mm, x86: add support for PUD-sized transparent hugepages

The current transparent hugepage code only supports PMDs. This patch adds support for transparent use of PUDs with DAX. It does not include support for anonymous pages. x86 support code also added. Most of this patch simply parallels the work that was done for huge PMDs. The only major difference is how the new ->pud_entry method in mm_walk works. The ->pmd_entry method replaces the ->pte_entry method, whereas the ->pud_entry method works along with either ->pmd_entry or ->pte_entry. The pagewalk code takes care of locking the PUD before calling ->pud_walk, so handlers do not need to worry whether the PUD is stable. [dave.jiang@intel.com: fix SMP x86 32bit build for native_pud_clear()] Link: http://lkml.kernel.org/r/148719066814.31111.3239231168815337012.stgit@djiang5-desk3.ch.intel.com [dave.jiang@intel.com: native_pud_clear missing on i386 build] Link: http://lkml.kernel.org/r/148640375195.69754.3315433724330910314.stgit@djiang5-desk3.ch.intel.com Link: http://lkml.kernel.org/r/148545059381.17912.8602162635537598445.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com> Signed-off-by: Dave Jiang <dave.jiang@intel.com> Tested-by: Alexander Kapshuk <alexander.kapshuk@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Jan Kara <jack@suse.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Nilesh Choudhury <nilesh.choudhury@oracle.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Matthew Wilcox <willy@linux.intel.com> 2017-02-24 17:57:02 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2017-02-24 20:46:54 -0500
commit: a00cc7d9dd93d66a3fb83fc52aa57a4bec51c517 (patch)
tree: 54d78e89c63e519cb9e00fdab9efbf3189ef2f5e /include/linux/mm.h
parent: a2d581675d485eb7188f521f36efc114639a3096 (diff)
1 files changed, 29 insertions, 1 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 035a688e5472..d8b75d7d6a9e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -424,6 +424,10 @@ static inline int pmd_devmap(pmd_t pmd)
 {
        return 0;
 }
+static inline int pud_devmap(pud_t pud)
+{
+        return 0;
+}
 #endif
 /*
@@ -1199,6 +1203,10 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 /**
 * mm_walk - callbacks for walk_page_range
+ * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
+ *             this handler should only handle pud_trans_huge() puds.
+ *             the pmd_entry or pte_entry callbacks will be used for
+ *             regular PUDs.
 * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
 *             this handler is required to be able to handle
 *             pmd_trans_huge() pmds.  They may simply choose to
@@ -1218,6 +1226,8 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 * (see the comment on walk_page_range() for more details)
 */
 struct mm_walk {
+        int (*pud_entry)(pud_t *pud, unsigned long addr,
+                         unsigned long next, struct mm_walk *walk);
        int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
                         unsigned long next, struct mm_walk *walk);
        int (*pte_entry)(pte_t *pte, unsigned long addr,
@@ -1801,8 +1811,26 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
        return ptl;
 }
-extern void __init pagecache_init(void);
+/*
+ * No scalability reason to split PUD locks yet, but follow the same pattern
+ * as the PMD locks to make it easier if we decide to.  The VM should not be
+ * considered ready to switch to split PUD locks yet; there may be places
+ * which need to be converted from page_table_lock.
+ */
+static inline spinlock_t *pud_lockptr(struct mm_struct *mm, pud_t *pud)
+{
+        return &mm->page_table_lock;
+}
+static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud)
+{
+        spinlock_t *ptl = pud_lockptr(mm, pud);
+        spin_lock(ptl);
+        return ptl;
+}
+extern void __init pagecache_init(void);
 extern void free_area_init(unsigned long * zones_size);
 extern void free_area_init_node(int nid, unsigned long * zones_size,
                unsigned long zone_start_pfn, unsigned long *zholes_size);
author	Matthew Wilcox <willy@linux.intel.com>	2017-02-24 17:57:02 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2017-02-24 20:46:54 -0500
commit	a00cc7d9dd93d66a3fb83fc52aa57a4bec51c517 (patch)
tree	54d78e89c63e519cb9e00fdab9efbf3189ef2f5e /include/linux/mm.h
parent	a2d581675d485eb7188f521f36efc114639a3096 (diff)

diff --git a/include/linux/mm.h b/include/linux/mm.h index 035a688e5472..d8b75d7d6a9e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h
@@ -424,6 +424,10 @@ static inline int pmd_devmap(pmd_t pmd)
424	{	424	{
425	return 0;	425	return 0;
426	}	426	}
		427	static inline int pud_devmap(pud_t pud)
		428	{
		429	return 0;
		430	}
427	#endif	431	#endif
428		432
429	/*	433	/*
@@ -1199,6 +1203,10 @@ void unmap_vmas(struct mmu_gather tlb, struct vm_area_struct start_vma,
1199		1203
1200	/**	1204	/**
1201	* mm_walk - callbacks for walk_page_range	1205	* mm_walk - callbacks for walk_page_range
		1206	* @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
		1207	* this handler should only handle pud_trans_huge() puds.
		1208	* the pmd_entry or pte_entry callbacks will be used for
		1209	* regular PUDs.
1202	* @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry	1210	* @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
1203	* this handler is required to be able to handle	1211	* this handler is required to be able to handle
1204	* pmd_trans_huge() pmds. They may simply choose to	1212	* pmd_trans_huge() pmds. They may simply choose to
@@ -1218,6 +1226,8 @@ void unmap_vmas(struct mmu_gather tlb, struct vm_area_struct start_vma,
1218	* (see the comment on walk_page_range() for more details)	1226	* (see the comment on walk_page_range() for more details)
1219	*/	1227	*/
1220	struct mm_walk {	1228	struct mm_walk {
		1229	int (pud_entry)(pud_t pud, unsigned long addr,
		1230	unsigned long next, struct mm_walk *walk);
1221	int (pmd_entry)(pmd_t pmd, unsigned long addr,	1231	int (pmd_entry)(pmd_t pmd, unsigned long addr,
1222	unsigned long next, struct mm_walk *walk);	1232	unsigned long next, struct mm_walk *walk);
1223	int (pte_entry)(pte_t pte, unsigned long addr,	1233	int (pte_entry)(pte_t pte, unsigned long addr,
@@ -1801,8 +1811,26 @@ static inline spinlock_t pmd_lock(struct mm_struct mm, pmd_t *pmd)
1801	return ptl;	1811	return ptl;
1802	}	1812	}
1803		1813
1804	extern void __init pagecache_init(void);	1814	/*
		1815	* No scalability reason to split PUD locks yet, but follow the same pattern
		1816	* as the PMD locks to make it easier if we decide to. The VM should not be
		1817	* considered ready to switch to split PUD locks yet; there may be places
		1818	* which need to be converted from page_table_lock.
		1819	*/
		1820	static inline spinlock_t pud_lockptr(struct mm_struct mm, pud_t *pud)
		1821	{
		1822	return &mm->page_table_lock;
		1823	}
		1824
		1825	static inline spinlock_t pud_lock(struct mm_struct mm, pud_t *pud)
		1826	{
		1827	spinlock_t *ptl = pud_lockptr(mm, pud);
		1828
		1829	spin_lock(ptl);
		1830	return ptl;
		1831	}
1805		1832
		1833	extern void __init pagecache_init(void);
1806	extern void free_area_init(unsigned long * zones_size);	1834	extern void free_area_init(unsigned long * zones_size);
1807	extern void free_area_init_node(int nid, unsigned long * zones_size,	1835	extern void free_area_init_node(int nid, unsigned long * zones_size,
1808	unsigned long zone_start_pfn, unsigned long *zholes_size);	1836	unsigned long zone_start_pfn, unsigned long *zholes_size);