1 files changed, 110 insertions, 40 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index dd5ea3016fc4..47a93928b90f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -138,7 +138,6 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_ACCOUNT      0x00100000      /* Is a VM accounted object */
 #define VM_NORESERVE    0x00200000      /* should the VM suppress accounting */
 #define VM_HUGETLB      0x00400000      /* Huge TLB Page VM */
-#define VM_NONLINEAR    0x00800000      /* Is non-linear (remap_file_pages) */
 #define VM_ARCH_1       0x01000000      /* Architecture-specific flag */
 #define VM_ARCH_2       0x02000000
 #define VM_DONTDUMP     0x04000000      /* Do not include in the core dump */
@@ -206,27 +205,26 @@ extern unsigned int kobjsize(const void *objp);
 extern pgprot_t protection_map[16];
 #define FAULT_FLAG_WRITE        0x01    /* Fault was a write access */
-#define FAULT_FLAG_NONLINEAR    0x02    /* Fault was via a nonlinear mapping */
+#define FAULT_FLAG_MKWRITE      0x02    /* Fault was mkwrite of existing pte */
-#define FAULT_FLAG_MKWRITE      0x04    /* Fault was mkwrite of existing pte */
+#define FAULT_FLAG_ALLOW_RETRY  0x04    /* Retry fault if blocking */
-#define FAULT_FLAG_ALLOW_RETRY  0x08    /* Retry fault if blocking */
+#define FAULT_FLAG_RETRY_NOWAIT 0x08    /* Don't drop mmap_sem and wait when retrying */
-#define FAULT_FLAG_RETRY_NOWAIT 0x10    /* Don't drop mmap_sem and wait when retrying */
+#define FAULT_FLAG_KILLABLE     0x10    /* The fault task is in SIGKILL killable region */
-#define FAULT_FLAG_KILLABLE     0x20    /* The fault task is in SIGKILL killable region */
+#define FAULT_FLAG_TRIED        0x20    /* Second try */
-#define FAULT_FLAG_TRIED        0x40    /* second try */
+#define FAULT_FLAG_USER         0x40    /* The fault originated in userspace */
-#define FAULT_FLAG_USER         0x80    /* The fault originated in userspace */
 /*
 * vm_fault is filled by the the pagefault handler and passed to the vma's
 * ->fault function. The vma's ->fault is responsible for returning a bitmask
 * of VM_FAULT_xxx flags that give details about how the fault was handled.
 *
- * pgoff should be used in favour of virtual_address, if possible. If pgoff
+ * pgoff should be used in favour of virtual_address, if possible.
- * is used, one may implement ->remap_pages to get nonlinear mapping support.
 */
 struct vm_fault {
        unsigned int flags;             /* FAULT_FLAG_xxx flags */
        pgoff_t pgoff;                  /* Logical page offset based on vma */
        void __user *virtual_address;   /* Faulting virtual address */
+        struct page *cow_page;          /* Handler may choose to COW */
        struct page *page;              /* ->fault handlers should return a
                                         * page here, unless VM_FAULT_NOPAGE
                                         * is set (which is also implied by
@@ -287,9 +285,13 @@ struct vm_operations_struct {
        struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
                                        unsigned long addr);
 #endif
-        /* called by sys_remap_file_pages() to populate non-linear mapping */
+        /*
-        int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr,
+         * Called by vm_normal_page() for special PTEs to find the
-                           unsigned long size, pgoff_t pgoff);
+         * page for @addr.  This is useful if the default behavior
+         * (using pte_page()) would not find the correct page.
+         */
+        struct page *(*find_special_page)(struct vm_area_struct *vma,
+                                          unsigned long addr);
 };
 struct mmu_gather;
@@ -446,6 +448,12 @@ static inline struct page *compound_head_by_tail(struct page *tail)
        return tail;
 }
+/*
+ * Since either compound page could be dismantled asynchronously in THP
+ * or we access asynchronously arbitrary positioned struct page, there
+ * would be tail flag race. To handle this race, we should call
+ * smp_rmb() before checking tail flag. compound_head_by_tail() did it.
+ */
 static inline struct page *compound_head(struct page *page)
 {
        if (unlikely(PageTail(page)))
@@ -454,6 +462,18 @@ static inline struct page *compound_head(struct page *page)
 }
 /*
+ * If we access compound page synchronously such as access to
+ * allocated page, there is no need to handle tail flag race, so we can
+ * check tail flag directly without any synchronization primitive.
+ */
+static inline struct page *compound_head_fast(struct page *page)
+{
+        if (unlikely(PageTail(page)))
+                return page->first_page;
+        return page;
+}
+/*
 * The atomic page->_mapcount, starts from -1: so that transitions
 * both from it and to it can be tracked, using atomic_inc_and_test
 * and atomic_add_negative(-1).
@@ -465,7 +485,8 @@ static inline void page_mapcount_reset(struct page *page)
 static inline int page_mapcount(struct page *page)
 {
-        return atomic_read(&(page)->_mapcount) + 1;
+        VM_BUG_ON_PAGE(PageSlab(page), page);
+        return atomic_read(&page->_mapcount) + 1;
 }
 static inline int page_count(struct page *page)
@@ -531,7 +552,14 @@ static inline void get_page(struct page *page)
 static inline struct page *virt_to_head_page(const void *x)
 {
        struct page *page = virt_to_page(x);
-        return compound_head(page);
+        /*
+         * We don't need to worry about synchronization of tail flag
+         * when we call virt_to_head_page() since it is only called for
+         * already allocated page and this page won't be freed until
+         * this virt_to_head_page() is finished. So use _fast variant.
+         */
+        return compound_head_fast(page);
 }
 /*
@@ -601,29 +629,28 @@ int split_free_page(struct page *page);
 * prototype for that function and accessor functions.
 * These are _only_ valid on the head of a PG_compound page.
 */
-typedef void compound_page_dtor(struct page *);
 static inline void set_compound_page_dtor(struct page *page,
                                                compound_page_dtor *dtor)
 {
-        page[1].lru.next = (void *)dtor;
+        page[1].compound_dtor = dtor;
 }
 static inline compound_page_dtor *get_compound_page_dtor(struct page *page)
 {
-        return (compound_page_dtor *)page[1].lru.next;
+        return page[1].compound_dtor;
 }
 static inline int compound_order(struct page *page)
 {
        if (!PageHead(page))
                return 0;
-        return (unsigned long)page[1].lru.prev;
+        return page[1].compound_order;
 }
 static inline void set_compound_order(struct page *page, unsigned long order)
 {
-        page[1].lru.prev = (void *)order;
+        page[1].compound_order = order;
 }
 #ifdef CONFIG_MMU
@@ -1121,7 +1148,6 @@ extern void user_shm_unlock(size_t, struct user_struct *);
 * Parameter block passed down to zap_pte_range in exceptional cases.
 */
 struct zap_details {
-        struct vm_area_struct *nonlinear_vma;   /* Check page->index if set */
        struct address_space *check_mapping;    /* Check page->mapping if set */
        pgoff_t first_index;                    /* Lowest page->index to unmap */
        pgoff_t last_index;                     /* Highest page->index to unmap */
@@ -1139,8 +1165,6 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 /**
 * mm_walk - callbacks for walk_page_range
- * @pgd_entry: if set, called for each non-empty PGD (top-level) entry
- * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
 * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
 *             this handler is required to be able to handle
 *             pmd_trans_huge() pmds.  They may simply choose to
@@ -1148,16 +1172,18 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 * @pte_entry: if set, called for each non-empty PTE (4th-level) entry
 * @pte_hole: if set, called for each hole at all levels
 * @hugetlb_entry: if set, called for each hugetlb entry
- *                 *Caution*: The caller must hold mmap_sem() if @hugetlb_entry
+ * @test_walk: caller specific callback function to determine whether
- *                            is used.
+ *             we walk over the current vma or not. A positive returned
+ *             value means "do page table walk over the current vma,"
+ *             and a negative one means "abort current page table walk
+ *             right now." 0 means "skip the current vma."
+ * @mm:        mm_struct representing the target process of page table walk
+ * @vma:       vma currently walked (NULL if walking outside vmas)
+ * @private:   private data for callbacks' usage
 *
- * (see walk_page_range for more details)
+ * (see the comment on walk_page_range() for more details)
 */
 struct mm_walk {
-        int (*pgd_entry)(pgd_t *pgd, unsigned long addr,
-                         unsigned long next, struct mm_walk *walk);
-        int (*pud_entry)(pud_t *pud, unsigned long addr,
-                         unsigned long next, struct mm_walk *walk);
        int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
                         unsigned long next, struct mm_walk *walk);
        int (*pte_entry)(pte_t *pte, unsigned long addr,
@@ -1167,12 +1193,16 @@ struct mm_walk {
        int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,
                             unsigned long addr, unsigned long next,
                             struct mm_walk *walk);
+        int (*test_walk)(unsigned long addr, unsigned long next,
+                        struct mm_walk *walk);
        struct mm_struct *mm;
+        struct vm_area_struct *vma;
        void *private;
 };
 int walk_page_range(unsigned long addr, unsigned long end,
                struct mm_walk *walk);
+int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk);
 void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
                unsigned long end, unsigned long floor, unsigned long ceiling);
 int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
@@ -1236,6 +1266,17 @@ long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                    unsigned long start, unsigned long nr_pages,
                    int write, int force, struct page **pages,
                    struct vm_area_struct **vmas);
+long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm,
+                    unsigned long start, unsigned long nr_pages,
+                    int write, int force, struct page **pages,
+                    int *locked);
+long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
+                               unsigned long start, unsigned long nr_pages,
+                               int write, int force, struct page **pages,
+                               unsigned int gup_flags);
+long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
+                    unsigned long start, unsigned long nr_pages,
+                    int write, int force, struct page **pages);
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
                        struct page **pages);
 struct kvec;
@@ -1368,6 +1409,11 @@ static inline void update_hiwater_vm(struct mm_struct *mm)
                mm->hiwater_vm = mm->total_vm;
 }
+static inline void reset_mm_hiwater_rss(struct mm_struct *mm)
+{
+        mm->hiwater_rss = get_mm_rss(mm);
+}
 static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
                                         struct mm_struct *mm)
 {
@@ -1407,14 +1453,45 @@ static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd,
 int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
 #endif
-#ifdef __PAGETABLE_PMD_FOLDED
+#if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU)
 static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
                                                unsigned long address)
 {
        return 0;
 }
+static inline void mm_nr_pmds_init(struct mm_struct *mm) {}
+static inline unsigned long mm_nr_pmds(struct mm_struct *mm)
+{
+        return 0;
+}
+static inline void mm_inc_nr_pmds(struct mm_struct *mm) {}
+static inline void mm_dec_nr_pmds(struct mm_struct *mm) {}
 #else
 int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
+static inline void mm_nr_pmds_init(struct mm_struct *mm)
+{
+        atomic_long_set(&mm->nr_pmds, 0);
+}
+static inline unsigned long mm_nr_pmds(struct mm_struct *mm)
+{
+        return atomic_long_read(&mm->nr_pmds);
+}
+static inline void mm_inc_nr_pmds(struct mm_struct *mm)
+{
+        atomic_long_inc(&mm->nr_pmds);
+}
+static inline void mm_dec_nr_pmds(struct mm_struct *mm)
+{
+        atomic_long_dec(&mm->nr_pmds);
+}
 #endif
 int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -1777,12 +1854,6 @@ struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,
        for (vma = vma_interval_tree_iter_first(root, start, last);     \
             vma; vma = vma_interval_tree_iter_next(vma, start, last))
-static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
-                                        struct list_head *list)
-{
-        list_add_tail(&vma->shared.nonlinear, list);
-}
 void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
                                   struct rb_root *root);
 void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
@@ -2110,9 +2181,8 @@ int drop_caches_sysctl_handler(struct ctl_table *, int,
                                        void __user *, size_t *, loff_t *);
 #endif
-unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
+void drop_slab(void);
-                                unsigned long nr_scanned,
+void drop_slab_node(int nid);
-                                unsigned long nr_eligible);
 #ifndef CONFIG_MMU
 #define randomize_va_space 0

diff --git a/include/linux/mm.h b/include/linux/mm.h index dd5ea3016fc4..47a93928b90f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h
@@ -138,7 +138,6 @@ extern unsigned int kobjsize(const void *objp);
138	#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */	138	#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
139	#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */	139	#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
140	#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */	140	#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
141	#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
142	#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */	141	#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */
143	#define VM_ARCH_2 0x02000000	142	#define VM_ARCH_2 0x02000000
144	#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */	143	#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */
@@ -206,27 +205,26 @@ extern unsigned int kobjsize(const void *objp);
206	extern pgprot_t protection_map[16];	205	extern pgprot_t protection_map[16];
207		206
208	#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */	207	#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */
209	#define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */	208	#define FAULT_FLAG_MKWRITE 0x02 /* Fault was mkwrite of existing pte */
210	#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */	209	#define FAULT_FLAG_ALLOW_RETRY 0x04 /* Retry fault if blocking */
211	#define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */	210	#define FAULT_FLAG_RETRY_NOWAIT 0x08 /* Don't drop mmap_sem and wait when retrying */
212	#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */	211	#define FAULT_FLAG_KILLABLE 0x10 /* The fault task is in SIGKILL killable region */
213	#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */	212	#define FAULT_FLAG_TRIED 0x20 /* Second try */
214	#define FAULT_FLAG_TRIED 0x40 /* second try */	213	#define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */
215	#define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */
216		214
217	/*	215	/*
218	* vm_fault is filled by the the pagefault handler and passed to the vma's	216	* vm_fault is filled by the the pagefault handler and passed to the vma's
219	* ->fault function. The vma's ->fault is responsible for returning a bitmask	217	* ->fault function. The vma's ->fault is responsible for returning a bitmask
220	* of VM_FAULT_xxx flags that give details about how the fault was handled.	218	* of VM_FAULT_xxx flags that give details about how the fault was handled.
221	*	219	*
222	* pgoff should be used in favour of virtual_address, if possible. If pgoff	220	* pgoff should be used in favour of virtual_address, if possible.
223	* is used, one may implement ->remap_pages to get nonlinear mapping support.
224	*/	221	*/
225	struct vm_fault {	222	struct vm_fault {
226	unsigned int flags; /* FAULT_FLAG_xxx flags */	223	unsigned int flags; /* FAULT_FLAG_xxx flags */
227	pgoff_t pgoff; /* Logical page offset based on vma */	224	pgoff_t pgoff; /* Logical page offset based on vma */
228	void __user virtual_address; / Faulting virtual address */	225	void __user virtual_address; / Faulting virtual address */
229		226
		227	struct page cow_page; / Handler may choose to COW */
230	struct page page; / ->fault handlers should return a	228	struct page page; / ->fault handlers should return a
231	* page here, unless VM_FAULT_NOPAGE	229	* page here, unless VM_FAULT_NOPAGE
232	* is set (which is also implied by	230	* is set (which is also implied by
@@ -287,9 +285,13 @@ struct vm_operations_struct {
287	struct mempolicy (get_policy)(struct vm_area_struct *vma,	285	struct mempolicy (get_policy)(struct vm_area_struct *vma,
288	unsigned long addr);	286	unsigned long addr);
289	#endif	287	#endif
290	/* called by sys_remap_file_pages() to populate non-linear mapping */	288	/*
291	int (remap_pages)(struct vm_area_struct vma, unsigned long addr,	289	* Called by vm_normal_page() for special PTEs to find the
292	unsigned long size, pgoff_t pgoff);	290	* page for @addr. This is useful if the default behavior
		291	* (using pte_page()) would not find the correct page.
		292	*/
		293	struct page (find_special_page)(struct vm_area_struct *vma,
		294	unsigned long addr);
293	};	295	};
294		296
295	struct mmu_gather;	297	struct mmu_gather;
@@ -446,6 +448,12 @@ static inline struct page compound_head_by_tail(struct page tail)
446	return tail;	448	return tail;
447	}	449	}
448		450
		451	/*
		452	* Since either compound page could be dismantled asynchronously in THP
		453	* or we access asynchronously arbitrary positioned struct page, there
		454	* would be tail flag race. To handle this race, we should call
		455	* smp_rmb() before checking tail flag. compound_head_by_tail() did it.
		456	*/
449	static inline struct page compound_head(struct page page)	457	static inline struct page compound_head(struct page page)
450	{	458	{
451	if (unlikely(PageTail(page)))	459	if (unlikely(PageTail(page)))
@@ -454,6 +462,18 @@ static inline struct page compound_head(struct page page)
454	}	462	}
455		463
456	/*	464	/*
		465	* If we access compound page synchronously such as access to
		466	* allocated page, there is no need to handle tail flag race, so we can
		467	* check tail flag directly without any synchronization primitive.
		468	*/
		469	static inline struct page compound_head_fast(struct page page)
		470	{
		471	if (unlikely(PageTail(page)))
		472	return page->first_page;
		473	return page;
		474	}
		475
		476	/*
457	* The atomic page->_mapcount, starts from -1: so that transitions	477	* The atomic page->_mapcount, starts from -1: so that transitions
458	* both from it and to it can be tracked, using atomic_inc_and_test	478	* both from it and to it can be tracked, using atomic_inc_and_test
459	* and atomic_add_negative(-1).	479	* and atomic_add_negative(-1).
@@ -465,7 +485,8 @@ static inline void page_mapcount_reset(struct page *page)
465		485
466	static inline int page_mapcount(struct page *page)	486	static inline int page_mapcount(struct page *page)
467	{	487	{
468	return atomic_read(&(page)->_mapcount) + 1;	488	VM_BUG_ON_PAGE(PageSlab(page), page);
		489	return atomic_read(&page->_mapcount) + 1;
469	}	490	}
470		491
471	static inline int page_count(struct page *page)	492	static inline int page_count(struct page *page)
@@ -531,7 +552,14 @@ static inline void get_page(struct page *page)
531	static inline struct page virt_to_head_page(const void x)	552	static inline struct page virt_to_head_page(const void x)
532	{	553	{
533	struct page *page = virt_to_page(x);	554	struct page *page = virt_to_page(x);
534	return compound_head(page);	555
		556	/*
		557	* We don't need to worry about synchronization of tail flag
		558	* when we call virt_to_head_page() since it is only called for
		559	* already allocated page and this page won't be freed until
		560	* this virt_to_head_page() is finished. So use _fast variant.
		561	*/
		562	return compound_head_fast(page);
535	}	563	}
536		564
537	/*	565	/*
@@ -601,29 +629,28 @@ int split_free_page(struct page *page);
601	* prototype for that function and accessor functions.	629	* prototype for that function and accessor functions.
602	* These are _only_ valid on the head of a PG_compound page.	630	* These are _only_ valid on the head of a PG_compound page.
603	*/	631	*/
604	typedef void compound_page_dtor(struct page *);
605		632
606	static inline void set_compound_page_dtor(struct page *page,	633	static inline void set_compound_page_dtor(struct page *page,
607	compound_page_dtor *dtor)	634	compound_page_dtor *dtor)
608	{	635	{
609	page[1].lru.next = (void *)dtor;	636	page[1].compound_dtor = dtor;
610	}	637	}
611		638
612	static inline compound_page_dtor get_compound_page_dtor(struct page page)	639	static inline compound_page_dtor get_compound_page_dtor(struct page page)
613	{	640	{
614	return (compound_page_dtor *)page[1].lru.next;	641	return page[1].compound_dtor;
615	}	642	}
616		643
617	static inline int compound_order(struct page *page)	644	static inline int compound_order(struct page *page)
618	{	645	{
619	if (!PageHead(page))	646	if (!PageHead(page))
620	return 0;	647	return 0;
621	return (unsigned long)page[1].lru.prev;	648	return page[1].compound_order;
622	}	649	}
623		650
624	static inline void set_compound_order(struct page *page, unsigned long order)	651	static inline void set_compound_order(struct page *page, unsigned long order)
625	{	652	{
626	page[1].lru.prev = (void *)order;	653	page[1].compound_order = order;
627	}	654	}
628		655
629	#ifdef CONFIG_MMU	656	#ifdef CONFIG_MMU
@@ -1121,7 +1148,6 @@ extern void user_shm_unlock(size_t, struct user_struct *);
1121	* Parameter block passed down to zap_pte_range in exceptional cases.	1148	* Parameter block passed down to zap_pte_range in exceptional cases.
1122	*/	1149	*/
1123	struct zap_details {	1150	struct zap_details {
1124	struct vm_area_struct nonlinear_vma; / Check page->index if set */
1125	struct address_space check_mapping; / Check page->mapping if set */	1151	struct address_space check_mapping; / Check page->mapping if set */
1126	pgoff_t first_index; /* Lowest page->index to unmap */	1152	pgoff_t first_index; /* Lowest page->index to unmap */
1127	pgoff_t last_index; /* Highest page->index to unmap */	1153	pgoff_t last_index; /* Highest page->index to unmap */
@@ -1139,8 +1165,6 @@ void unmap_vmas(struct mmu_gather tlb, struct vm_area_struct start_vma,
1139		1165
1140	/**	1166	/**
1141	* mm_walk - callbacks for walk_page_range	1167	* mm_walk - callbacks for walk_page_range
1142	* @pgd_entry: if set, called for each non-empty PGD (top-level) entry
1143	* @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
1144	* @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry	1168	* @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
1145	* this handler is required to be able to handle	1169	* this handler is required to be able to handle
1146	* pmd_trans_huge() pmds. They may simply choose to	1170	* pmd_trans_huge() pmds. They may simply choose to
@@ -1148,16 +1172,18 @@ void unmap_vmas(struct mmu_gather tlb, struct vm_area_struct start_vma,
1148	* @pte_entry: if set, called for each non-empty PTE (4th-level) entry	1172	* @pte_entry: if set, called for each non-empty PTE (4th-level) entry
1149	* @pte_hole: if set, called for each hole at all levels	1173	* @pte_hole: if set, called for each hole at all levels
1150	* @hugetlb_entry: if set, called for each hugetlb entry	1174	* @hugetlb_entry: if set, called for each hugetlb entry
1151	* Caution: The caller must hold mmap_sem() if @hugetlb_entry	1175	* @test_walk: caller specific callback function to determine whether
1152	* is used.	1176	* we walk over the current vma or not. A positive returned
		1177	* value means "do page table walk over the current vma,"
		1178	* and a negative one means "abort current page table walk
		1179	* right now." 0 means "skip the current vma."
		1180	* @mm: mm_struct representing the target process of page table walk
		1181	* @vma: vma currently walked (NULL if walking outside vmas)
		1182	* @private: private data for callbacks' usage
1153	*	1183	*
1154	* (see walk_page_range for more details)	1184	* (see the comment on walk_page_range() for more details)
1155	*/	1185	*/
1156	struct mm_walk {	1186	struct mm_walk {
1157	int (pgd_entry)(pgd_t pgd, unsigned long addr,
1158	unsigned long next, struct mm_walk *walk);
1159	int (pud_entry)(pud_t pud, unsigned long addr,
1160	unsigned long next, struct mm_walk *walk);
1161	int (pmd_entry)(pmd_t pmd, unsigned long addr,	1187	int (pmd_entry)(pmd_t pmd, unsigned long addr,
1162	unsigned long next, struct mm_walk *walk);	1188	unsigned long next, struct mm_walk *walk);
1163	int (pte_entry)(pte_t pte, unsigned long addr,	1189	int (pte_entry)(pte_t pte, unsigned long addr,
@@ -1167,12 +1193,16 @@ struct mm_walk {
1167	int (hugetlb_entry)(pte_t pte, unsigned long hmask,	1193	int (hugetlb_entry)(pte_t pte, unsigned long hmask,
1168	unsigned long addr, unsigned long next,	1194	unsigned long addr, unsigned long next,
1169	struct mm_walk *walk);	1195	struct mm_walk *walk);
		1196	int (*test_walk)(unsigned long addr, unsigned long next,
		1197	struct mm_walk *walk);
1170	struct mm_struct *mm;	1198	struct mm_struct *mm;
		1199	struct vm_area_struct *vma;
1171	void *private;	1200	void *private;
1172	};	1201	};
1173		1202
1174	int walk_page_range(unsigned long addr, unsigned long end,	1203	int walk_page_range(unsigned long addr, unsigned long end,
1175	struct mm_walk *walk);	1204	struct mm_walk *walk);
		1205	int walk_page_vma(struct vm_area_struct vma, struct mm_walk walk);
1176	void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,	1206	void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
1177	unsigned long end, unsigned long floor, unsigned long ceiling);	1207	unsigned long end, unsigned long floor, unsigned long ceiling);
1178	int copy_page_range(struct mm_struct dst, struct mm_struct src,	1208	int copy_page_range(struct mm_struct dst, struct mm_struct src,
@@ -1236,6 +1266,17 @@ long get_user_pages(struct task_struct tsk, struct mm_struct mm,
1236	unsigned long start, unsigned long nr_pages,	1266	unsigned long start, unsigned long nr_pages,
1237	int write, int force, struct page **pages,	1267	int write, int force, struct page **pages,
1238	struct vm_area_struct **vmas);	1268	struct vm_area_struct **vmas);
		1269	long get_user_pages_locked(struct task_struct tsk, struct mm_struct mm,
		1270	unsigned long start, unsigned long nr_pages,
		1271	int write, int force, struct page **pages,
		1272	int *locked);
		1273	long __get_user_pages_unlocked(struct task_struct tsk, struct mm_struct mm,
		1274	unsigned long start, unsigned long nr_pages,
		1275	int write, int force, struct page **pages,
		1276	unsigned int gup_flags);
		1277	long get_user_pages_unlocked(struct task_struct tsk, struct mm_struct mm,
		1278	unsigned long start, unsigned long nr_pages,
		1279	int write, int force, struct page **pages);
1239	int get_user_pages_fast(unsigned long start, int nr_pages, int write,	1280	int get_user_pages_fast(unsigned long start, int nr_pages, int write,
1240	struct page **pages);	1281	struct page **pages);
1241	struct kvec;	1282	struct kvec;
@@ -1368,6 +1409,11 @@ static inline void update_hiwater_vm(struct mm_struct *mm)
1368	mm->hiwater_vm = mm->total_vm;	1409	mm->hiwater_vm = mm->total_vm;
1369	}	1410	}
1370		1411
		1412	static inline void reset_mm_hiwater_rss(struct mm_struct *mm)
		1413	{
		1414	mm->hiwater_rss = get_mm_rss(mm);
		1415	}
		1416
1371	static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,	1417	static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
1372	struct mm_struct *mm)	1418	struct mm_struct *mm)
1373	{	1419	{
@@ -1407,14 +1453,45 @@ static inline int __pud_alloc(struct mm_struct mm, pgd_t pgd,
1407	int __pud_alloc(struct mm_struct mm, pgd_t pgd, unsigned long address);	1453	int __pud_alloc(struct mm_struct mm, pgd_t pgd, unsigned long address);
1408	#endif	1454	#endif
1409		1455
1410	#ifdef __PAGETABLE_PMD_FOLDED	1456	#if defined(__PAGETABLE_PMD_FOLDED) \|\| !defined(CONFIG_MMU)
1411	static inline int __pmd_alloc(struct mm_struct mm, pud_t pud,	1457	static inline int __pmd_alloc(struct mm_struct mm, pud_t pud,
1412	unsigned long address)	1458	unsigned long address)
1413	{	1459	{
1414	return 0;	1460	return 0;
1415	}	1461	}
		1462
		1463	static inline void mm_nr_pmds_init(struct mm_struct *mm) {}
		1464
		1465	static inline unsigned long mm_nr_pmds(struct mm_struct *mm)
		1466	{
		1467	return 0;
		1468	}
		1469
		1470	static inline void mm_inc_nr_pmds(struct mm_struct *mm) {}
		1471	static inline void mm_dec_nr_pmds(struct mm_struct *mm) {}
		1472
1416	#else	1473	#else
1417	int __pmd_alloc(struct mm_struct mm, pud_t pud, unsigned long address);	1474	int __pmd_alloc(struct mm_struct mm, pud_t pud, unsigned long address);
		1475
		1476	static inline void mm_nr_pmds_init(struct mm_struct *mm)
		1477	{
		1478	atomic_long_set(&mm->nr_pmds, 0);
		1479	}
		1480
		1481	static inline unsigned long mm_nr_pmds(struct mm_struct *mm)
		1482	{
		1483	return atomic_long_read(&mm->nr_pmds);
		1484	}
		1485
		1486	static inline void mm_inc_nr_pmds(struct mm_struct *mm)
		1487	{
		1488	atomic_long_inc(&mm->nr_pmds);
		1489	}
		1490
		1491	static inline void mm_dec_nr_pmds(struct mm_struct *mm)
		1492	{
		1493	atomic_long_dec(&mm->nr_pmds);
		1494	}
1418	#endif	1495	#endif
1419		1496
1420	int __pte_alloc(struct mm_struct mm, struct vm_area_struct vma,	1497	int __pte_alloc(struct mm_struct mm, struct vm_area_struct vma,
@@ -1777,12 +1854,6 @@ struct vm_area_struct vma_interval_tree_iter_next(struct vm_area_struct node,
1777	for (vma = vma_interval_tree_iter_first(root, start, last); \	1854	for (vma = vma_interval_tree_iter_first(root, start, last); \
1778	vma; vma = vma_interval_tree_iter_next(vma, start, last))	1855	vma; vma = vma_interval_tree_iter_next(vma, start, last))
1779		1856
1780	static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
1781	struct list_head *list)
1782	{
1783	list_add_tail(&vma->shared.nonlinear, list);
1784	}
1785
1786	void anon_vma_interval_tree_insert(struct anon_vma_chain *node,	1857	void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
1787	struct rb_root *root);	1858	struct rb_root *root);
1788	void anon_vma_interval_tree_remove(struct anon_vma_chain *node,	1859	void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
@@ -2110,9 +2181,8 @@ int drop_caches_sysctl_handler(struct ctl_table *, int,
2110	void __user , size_t , loff_t *);	2181	void __user , size_t , loff_t *);
2111	#endif	2182	#endif
2112		2183
2113	unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,	2184	void drop_slab(void);
2114	unsigned long nr_scanned,	2185	void drop_slab_node(int nid);
2115	unsigned long nr_eligible);
2116		2186
2117	#ifndef CONFIG_MMU	2187	#ifndef CONFIG_MMU
2118	#define randomize_va_space 0	2188	#define randomize_va_space 0