Merge tag 'v5.3-rc7' into devel

Linux 5.3-rc7
author: Linus Walleij <linus.walleij@linaro.org> 2019-09-05 05:40:54 -0400
committer: Linus Walleij <linus.walleij@linaro.org> 2019-09-05 05:40:54 -0400
commit: 151a41014bff92f353263cadc051435dc9c3258e (patch)
tree: aa082a0745edd5b7051668f455dfc0ee1e4a9de0 /mm/mempolicy.c
parent: ae0755b56da9db4190288155ea884331993ed51b (diff)
parent: 089cf7f6ecb266b6a4164919a2e69bd2f938374a (diff)
1 files changed, 77 insertions, 57 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index f48693f75b37..65e0874fce17 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -403,7 +403,7 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
        },
 };
-static void migrate_page_add(struct page *page, struct list_head *pagelist,
+static int migrate_page_add(struct page *page, struct list_head *pagelist,
                                unsigned long flags);
 struct queue_pages {
@@ -429,11 +429,14 @@ static inline bool queue_pages_required(struct page *page,
 }
 /*
- * queue_pages_pmd() has three possible return values:
+ * queue_pages_pmd() has four possible return values:
- * 1 - pages are placed on the right node or queued successfully.
+ * 0 - pages are placed on the right node or queued successfully.
- * 0 - THP was split.
+ * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
- * -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing
+ *     specified.
- *        page was already on a node that does not follow the policy.
+ * 2 - THP was split.
+ * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an
+ *        existing page was already on a node that does not follow the
+ *        policy.
 */
 static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
                                unsigned long end, struct mm_walk *walk)
@@ -451,23 +454,20 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
        if (is_huge_zero_page(page)) {
                spin_unlock(ptl);
                __split_huge_pmd(walk->vma, pmd, addr, false, NULL);
+                ret = 2;
                goto out;
        }
-        if (!queue_pages_required(page, qp)) {
+        if (!queue_pages_required(page, qp))
-                ret = 1;
                goto unlock;
-        }
-        ret = 1;
        flags = qp->flags;
        /* go to thp migration */
        if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
-                if (!vma_migratable(walk->vma)) {
+                if (!vma_migratable(walk->vma) ||
-                        ret = -EIO;
+                    migrate_page_add(page, qp->pagelist, flags)) {
+                        ret = 1;
                        goto unlock;
                }
-                migrate_page_add(page, qp->pagelist, flags);
        } else
                ret = -EIO;
 unlock:
@@ -479,6 +479,13 @@ out:
 /*
 * Scan through pages checking if pages follow certain conditions,
 * and move them to the pagelist if they do.
+ *
+ * queue_pages_pte_range() has three possible return values:
+ * 0 - pages are placed on the right node or queued successfully.
+ * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
+ *     specified.
+ * -EIO - only MPOL_MF_STRICT was specified and an existing page was already
+ *        on a node that does not follow the policy.
 */
 static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
                        unsigned long end, struct mm_walk *walk)
@@ -488,17 +495,17 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
        struct queue_pages *qp = walk->private;
        unsigned long flags = qp->flags;
        int ret;
+        bool has_unmovable = false;
        pte_t *pte;
        spinlock_t *ptl;
        ptl = pmd_trans_huge_lock(pmd, vma);
        if (ptl) {
                ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
-                if (ret > 0)
+                if (ret != 2)
-                        return 0;
-                else if (ret < 0)
                        return ret;
        }
+        /* THP was split, fall through to pte walk */
        if (pmd_trans_unstable(pmd))
                return 0;
@@ -519,14 +526,28 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
                if (!queue_pages_required(page, qp))
                        continue;
                if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
-                        if (!vma_migratable(vma))
+                        /* MPOL_MF_STRICT must be specified if we get here */
+                        if (!vma_migratable(vma)) {
+                                has_unmovable = true;
                                break;
-                        migrate_page_add(page, qp->pagelist, flags);
+                        }
+                        /*
+                         * Do not abort immediately since there may be
+                         * temporary off LRU pages in the range.  Still
+                         * need migrate other LRU pages.
+                         */
+                        if (migrate_page_add(page, qp->pagelist, flags))
+                                has_unmovable = true;
                } else
                        break;
        }
        pte_unmap_unlock(pte - 1, ptl);
        cond_resched();
+        if (has_unmovable)
+                return 1;
        return addr != end ? -EIO : 0;
 }
@@ -639,7 +660,13 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
 *
 * If pages found in a given range are on a set of nodes (determined by
 * @nodes and @flags,) it's isolated and queued to the pagelist which is
- * passed via @private.)
+ * passed via @private.
+ *
+ * queue_pages_range() has three possible return values:
+ * 1 - there is unmovable page, but MPOL_MF_MOVE* & MPOL_MF_STRICT were
+ *     specified.
+ * 0 - queue pages successfully or no misplaced page.
+ * -EIO - there is misplaced page and only MPOL_MF_STRICT was specified.
 */
 static int
 queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
@@ -940,7 +967,7 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
 /*
 * page migration, thp tail pages can be passed.
 */
-static void migrate_page_add(struct page *page, struct list_head *pagelist,
+static int migrate_page_add(struct page *page, struct list_head *pagelist,
                                unsigned long flags)
 {
        struct page *head = compound_head(page);
@@ -953,8 +980,19 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
                        mod_node_page_state(page_pgdat(head),
                                NR_ISOLATED_ANON + page_is_file_cache(head),
                                hpage_nr_pages(head));
+                } else if (flags & MPOL_MF_STRICT) {
+                        /*
+                         * Non-movable page may reach here.  And, there may be
+                         * temporary off LRU pages or non-LRU movable pages.
+                         * Treat them as unmovable pages since they can't be
+                         * isolated, so they can't be moved at the moment.  It
+                         * should return -EIO for this case too.
+                         */
+                        return -EIO;
                }
        }
+        return 0;
 }
 /* page allocation callback for NUMA node migration */
@@ -1142,8 +1180,8 @@ static struct page *new_page(struct page *page, unsigned long start)
        } else if (PageTransHuge(page)) {
                struct page *thp;
-                thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
+                thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
-                                         HPAGE_PMD_ORDER);
+                                address, numa_node_id());
                if (!thp)
                        return NULL;
                prep_transhuge_page(thp);
@@ -1157,9 +1195,10 @@ static struct page *new_page(struct page *page, unsigned long start)
 }
 #else
-static void migrate_page_add(struct page *page, struct list_head *pagelist,
+static int migrate_page_add(struct page *page, struct list_head *pagelist,
                                unsigned long flags)
 {
+        return -EIO;
 }
 int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
@@ -1182,6 +1221,7 @@ static long do_mbind(unsigned long start, unsigned long len,
        struct mempolicy *new;
        unsigned long end;
        int err;
+        int ret;
        LIST_HEAD(pagelist);
        if (flags & ~(unsigned long)MPOL_MF_VALID)
@@ -1243,10 +1283,15 @@ static long do_mbind(unsigned long start, unsigned long len,
        if (err)
                goto mpol_out;
-        err = queue_pages_range(mm, start, end, nmask,
+        ret = queue_pages_range(mm, start, end, nmask,
                          flags | MPOL_MF_INVERT, &pagelist);
-        if (!err)
-                err = mbind_range(mm, start, end, new);
+        if (ret < 0) {
+                err = -EIO;
+                goto up_out;
+        }
+        err = mbind_range(mm, start, end, new);
        if (!err) {
                int nr_failed = 0;
@@ -1259,13 +1304,14 @@ static long do_mbind(unsigned long start, unsigned long len,
                                putback_movable_pages(&pagelist);
                }
-                if (nr_failed && (flags & MPOL_MF_STRICT))
+                if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT)))
                        err = -EIO;
        } else
                putback_movable_pages(&pagelist);
+up_out:
        up_write(&mm->mmap_sem);
- mpol_out:
+mpol_out:
        mpol_put(new);
        return err;
 }
@@ -1688,7 +1734,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
 * freeing by another task.  It is the caller's responsibility to free the
 * extra reference for shared policies.
 */
-static struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
                                                unsigned long addr)
 {
        struct mempolicy *pol = __get_vma_policy(vma, addr);
@@ -2037,7 +2083,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
 *      @vma:  Pointer to VMA or NULL if not available.
 *      @addr: Virtual Address of the allocation. Must be inside the VMA.
 *      @node: Which node to prefer for allocation (modulo policy).
- *      @hugepage: for hugepages try only the preferred node if possible
 *
 *      This function allocates a page from the kernel page pool and applies
 *      a NUMA policy associated with the VMA or the current process.
@@ -2048,7 +2093,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
 */
 struct page *
 alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
-                unsigned long addr, int node, bool hugepage)
+                unsigned long addr, int node)
 {
        struct mempolicy *pol;
        struct page *page;
@@ -2066,31 +2111,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
                goto out;
        }
-        if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
-                int hpage_node = node;
-                /*
-                 * For hugepage allocation and non-interleave policy which
-                 * allows the current node (or other explicitly preferred
-                 * node) we only try to allocate from the current/preferred
-                 * node and don't fall back to other nodes, as the cost of
-                 * remote accesses would likely offset THP benefits.
-                 *
-                 * If the policy is interleave, or does not allow the current
-                 * node in its nodemask, we allocate the standard way.
-                 */
-                if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL))
-                        hpage_node = pol->v.preferred_node;
-                nmask = policy_nodemask(gfp, pol);
-                if (!nmask || node_isset(hpage_node, *nmask)) {
-                        mpol_cond_put(pol);
-                        page = __alloc_pages_node(hpage_node,
-                                                gfp | __GFP_THISNODE, order);
-                        goto out;
-                }
-        }
        nmask = policy_nodemask(gfp, pol);
        preferred_nid = policy_node(gfp, pol, node);
        page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
author	Linus Walleij <linus.walleij@linaro.org>	2019-09-05 05:40:54 -0400
committer	Linus Walleij <linus.walleij@linaro.org>	2019-09-05 05:40:54 -0400
commit	151a41014bff92f353263cadc051435dc9c3258e (patch)
tree	aa082a0745edd5b7051668f455dfc0ee1e4a9de0 /mm/mempolicy.c
parent	ae0755b56da9db4190288155ea884331993ed51b (diff)
parent	089cf7f6ecb266b6a4164919a2e69bd2f938374a (diff)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c index f48693f75b37..65e0874fce17 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c
@@ -403,7 +403,7 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
403	},	403	},
404	};	404	};
405		405
406	static void migrate_page_add(struct page page, struct list_head pagelist,	406	static int migrate_page_add(struct page page, struct list_head pagelist,
407	unsigned long flags);	407	unsigned long flags);
408		408
409	struct queue_pages {	409	struct queue_pages {
@@ -429,11 +429,14 @@ static inline bool queue_pages_required(struct page *page,
429	}	429	}
430		430
431	/*	431	/*
432	* queue_pages_pmd() has three possible return values:	432	* queue_pages_pmd() has four possible return values:
433	* 1 - pages are placed on the right node or queued successfully.	433	* 0 - pages are placed on the right node or queued successfully.
434	* 0 - THP was split.	434	* 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
435	* -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing	435	* specified.
436	* page was already on a node that does not follow the policy.	436	* 2 - THP was split.
		437	* -EIO - is migration entry or only MPOL_MF_STRICT was specified and an
		438	* existing page was already on a node that does not follow the
		439	* policy.
437	*/	440	*/
438	static int queue_pages_pmd(pmd_t pmd, spinlock_t ptl, unsigned long addr,	441	static int queue_pages_pmd(pmd_t pmd, spinlock_t ptl, unsigned long addr,
439	unsigned long end, struct mm_walk *walk)	442	unsigned long end, struct mm_walk *walk)
@@ -451,23 +454,20 @@ static int queue_pages_pmd(pmd_t pmd, spinlock_t ptl, unsigned long addr,
451	if (is_huge_zero_page(page)) {	454	if (is_huge_zero_page(page)) {
452	spin_unlock(ptl);	455	spin_unlock(ptl);
453	__split_huge_pmd(walk->vma, pmd, addr, false, NULL);	456	__split_huge_pmd(walk->vma, pmd, addr, false, NULL);
		457	ret = 2;
454	goto out;	458	goto out;
455	}	459	}
456	if (!queue_pages_required(page, qp)) {	460	if (!queue_pages_required(page, qp))
457	ret = 1;
458	goto unlock;	461	goto unlock;
459	}
460		462
461	ret = 1;
462	flags = qp->flags;	463	flags = qp->flags;
463	/* go to thp migration */	464	/* go to thp migration */
464	if (flags & (MPOL_MF_MOVE \| MPOL_MF_MOVE_ALL)) {	465	if (flags & (MPOL_MF_MOVE \| MPOL_MF_MOVE_ALL)) {
465	if (!vma_migratable(walk->vma)) {	466	if (!vma_migratable(walk->vma) \|\|
466	ret = -EIO;	467	migrate_page_add(page, qp->pagelist, flags)) {
		468	ret = 1;
467	goto unlock;	469	goto unlock;
468	}	470	}
469
470	migrate_page_add(page, qp->pagelist, flags);
471	} else	471	} else
472	ret = -EIO;	472	ret = -EIO;
473	unlock:	473	unlock:
@@ -479,6 +479,13 @@ out:
479	/*	479	/*
480	* Scan through pages checking if pages follow certain conditions,	480	* Scan through pages checking if pages follow certain conditions,
481	* and move them to the pagelist if they do.	481	* and move them to the pagelist if they do.
		482	*
		483	* queue_pages_pte_range() has three possible return values:
		484	* 0 - pages are placed on the right node or queued successfully.
		485	* 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
		486	* specified.
		487	* -EIO - only MPOL_MF_STRICT was specified and an existing page was already
		488	* on a node that does not follow the policy.
482	*/	489	*/
483	static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,	490	static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
484	unsigned long end, struct mm_walk *walk)	491	unsigned long end, struct mm_walk *walk)
@@ -488,17 +495,17 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
488	struct queue_pages *qp = walk->private;	495	struct queue_pages *qp = walk->private;
489	unsigned long flags = qp->flags;	496	unsigned long flags = qp->flags;
490	int ret;	497	int ret;
		498	bool has_unmovable = false;
491	pte_t *pte;	499	pte_t *pte;
492	spinlock_t *ptl;	500	spinlock_t *ptl;
493		501
494	ptl = pmd_trans_huge_lock(pmd, vma);	502	ptl = pmd_trans_huge_lock(pmd, vma);
495	if (ptl) {	503	if (ptl) {
496	ret = queue_pages_pmd(pmd, ptl, addr, end, walk);	504	ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
497	if (ret > 0)	505	if (ret != 2)
498	return 0;
499	else if (ret < 0)
500	return ret;	506	return ret;
501	}	507	}
		508	/* THP was split, fall through to pte walk */
502		509
503	if (pmd_trans_unstable(pmd))	510	if (pmd_trans_unstable(pmd))
504	return 0;	511	return 0;
@@ -519,14 +526,28 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
519	if (!queue_pages_required(page, qp))	526	if (!queue_pages_required(page, qp))
520	continue;	527	continue;
521	if (flags & (MPOL_MF_MOVE \| MPOL_MF_MOVE_ALL)) {	528	if (flags & (MPOL_MF_MOVE \| MPOL_MF_MOVE_ALL)) {
522	if (!vma_migratable(vma))	529	/* MPOL_MF_STRICT must be specified if we get here */
		530	if (!vma_migratable(vma)) {
		531	has_unmovable = true;
523	break;	532	break;
524	migrate_page_add(page, qp->pagelist, flags);	533	}
		534
		535	/*
		536	* Do not abort immediately since there may be
		537	* temporary off LRU pages in the range. Still
		538	* need migrate other LRU pages.
		539	*/
		540	if (migrate_page_add(page, qp->pagelist, flags))
		541	has_unmovable = true;
525	} else	542	} else
526	break;	543	break;
527	}	544	}
528	pte_unmap_unlock(pte - 1, ptl);	545	pte_unmap_unlock(pte - 1, ptl);
529	cond_resched();	546	cond_resched();
		547
		548	if (has_unmovable)
		549	return 1;
		550
530	return addr != end ? -EIO : 0;	551	return addr != end ? -EIO : 0;
531	}	552	}
532		553
@@ -639,7 +660,13 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
639	*	660	*
640	* If pages found in a given range are on a set of nodes (determined by	661	* If pages found in a given range are on a set of nodes (determined by
641	* @nodes and @flags,) it's isolated and queued to the pagelist which is	662	* @nodes and @flags,) it's isolated and queued to the pagelist which is
642	* passed via @private.)	663	* passed via @private.
		664	*
		665	* queue_pages_range() has three possible return values:
		666	* 1 - there is unmovable page, but MPOL_MF_MOVE* & MPOL_MF_STRICT were
		667	* specified.
		668	* 0 - queue pages successfully or no misplaced page.
		669	* -EIO - there is misplaced page and only MPOL_MF_STRICT was specified.
643	*/	670	*/
644	static int	671	static int
645	queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,	672	queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
@@ -940,7 +967,7 @@ static long do_get_mempolicy(int policy, nodemask_t nmask,
940	/*	967	/*
941	* page migration, thp tail pages can be passed.	968	* page migration, thp tail pages can be passed.
942	*/	969	*/
943	static void migrate_page_add(struct page page, struct list_head pagelist,	970	static int migrate_page_add(struct page page, struct list_head pagelist,
944	unsigned long flags)	971	unsigned long flags)
945	{	972	{
946	struct page *head = compound_head(page);	973	struct page *head = compound_head(page);
@@ -953,8 +980,19 @@ static void migrate_page_add(struct page page, struct list_head pagelist,
953	mod_node_page_state(page_pgdat(head),	980	mod_node_page_state(page_pgdat(head),
954	NR_ISOLATED_ANON + page_is_file_cache(head),	981	NR_ISOLATED_ANON + page_is_file_cache(head),
955	hpage_nr_pages(head));	982	hpage_nr_pages(head));
		983	} else if (flags & MPOL_MF_STRICT) {
		984	/*
		985	* Non-movable page may reach here. And, there may be
		986	* temporary off LRU pages or non-LRU movable pages.
		987	* Treat them as unmovable pages since they can't be
		988	* isolated, so they can't be moved at the moment. It
		989	* should return -EIO for this case too.
		990	*/
		991	return -EIO;
956	}	992	}
957	}	993	}
		994
		995	return 0;
958	}	996	}
959		997
960	/* page allocation callback for NUMA node migration */	998	/* page allocation callback for NUMA node migration */
@@ -1142,8 +1180,8 @@ static struct page new_page(struct page page, unsigned long start)
1142	} else if (PageTransHuge(page)) {	1180	} else if (PageTransHuge(page)) {
1143	struct page *thp;	1181	struct page *thp;
1144		1182
1145	thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,	1183	thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
1146	HPAGE_PMD_ORDER);	1184	address, numa_node_id());
1147	if (!thp)	1185	if (!thp)
1148	return NULL;	1186	return NULL;
1149	prep_transhuge_page(thp);	1187	prep_transhuge_page(thp);
@@ -1157,9 +1195,10 @@ static struct page new_page(struct page page, unsigned long start)
1157	}	1195	}
1158	#else	1196	#else
1159		1197
1160	static void migrate_page_add(struct page page, struct list_head pagelist,	1198	static int migrate_page_add(struct page page, struct list_head pagelist,
1161	unsigned long flags)	1199	unsigned long flags)
1162	{	1200	{
		1201	return -EIO;
1163	}	1202	}
1164		1203
1165	int do_migrate_pages(struct mm_struct mm, const nodemask_t from,	1204	int do_migrate_pages(struct mm_struct mm, const nodemask_t from,
@@ -1182,6 +1221,7 @@ static long do_mbind(unsigned long start, unsigned long len,
1182	struct mempolicy *new;	1221	struct mempolicy *new;
1183	unsigned long end;	1222	unsigned long end;
1184	int err;	1223	int err;
		1224	int ret;
1185	LIST_HEAD(pagelist);	1225	LIST_HEAD(pagelist);
1186		1226
1187	if (flags & ~(unsigned long)MPOL_MF_VALID)	1227	if (flags & ~(unsigned long)MPOL_MF_VALID)
@@ -1243,10 +1283,15 @@ static long do_mbind(unsigned long start, unsigned long len,
1243	if (err)	1283	if (err)
1244	goto mpol_out;	1284	goto mpol_out;
1245		1285
1246	err = queue_pages_range(mm, start, end, nmask,	1286	ret = queue_pages_range(mm, start, end, nmask,
1247	flags \| MPOL_MF_INVERT, &pagelist);	1287	flags \| MPOL_MF_INVERT, &pagelist);
1248	if (!err)	1288
1249	err = mbind_range(mm, start, end, new);	1289	if (ret < 0) {
		1290	err = -EIO;
		1291	goto up_out;
		1292	}
		1293
		1294	err = mbind_range(mm, start, end, new);
1250		1295
1251	if (!err) {	1296	if (!err) {
1252	int nr_failed = 0;	1297	int nr_failed = 0;
@@ -1259,13 +1304,14 @@ static long do_mbind(unsigned long start, unsigned long len,
1259	putback_movable_pages(&pagelist);	1304	putback_movable_pages(&pagelist);
1260	}	1305	}
1261		1306
1262	if (nr_failed && (flags & MPOL_MF_STRICT))	1307	if ((ret > 0) \|\| (nr_failed && (flags & MPOL_MF_STRICT)))
1263	err = -EIO;	1308	err = -EIO;
1264	} else	1309	} else
1265	putback_movable_pages(&pagelist);	1310	putback_movable_pages(&pagelist);
1266		1311
		1312	up_out:
1267	up_write(&mm->mmap_sem);	1313	up_write(&mm->mmap_sem);
1268	mpol_out:	1314	mpol_out:
1269	mpol_put(new);	1315	mpol_put(new);
1270	return err;	1316	return err;
1271	}	1317	}
@@ -1688,7 +1734,7 @@ struct mempolicy __get_vma_policy(struct vm_area_struct vma,
1688	* freeing by another task. It is the caller's responsibility to free the	1734	* freeing by another task. It is the caller's responsibility to free the
1689	* extra reference for shared policies.	1735	* extra reference for shared policies.
1690	*/	1736	*/
1691	static struct mempolicy get_vma_policy(struct vm_area_struct vma,	1737	struct mempolicy get_vma_policy(struct vm_area_struct vma,
1692	unsigned long addr)	1738	unsigned long addr)
1693	{	1739	{
1694	struct mempolicy *pol = __get_vma_policy(vma, addr);	1740	struct mempolicy *pol = __get_vma_policy(vma, addr);
@@ -2037,7 +2083,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
2037	* @vma: Pointer to VMA or NULL if not available.	2083	* @vma: Pointer to VMA or NULL if not available.
2038	* @addr: Virtual Address of the allocation. Must be inside the VMA.	2084	* @addr: Virtual Address of the allocation. Must be inside the VMA.
2039	* @node: Which node to prefer for allocation (modulo policy).	2085	* @node: Which node to prefer for allocation (modulo policy).
2040	* @hugepage: for hugepages try only the preferred node if possible
2041	*	2086	*
2042	* This function allocates a page from the kernel page pool and applies	2087	* This function allocates a page from the kernel page pool and applies
2043	* a NUMA policy associated with the VMA or the current process.	2088	* a NUMA policy associated with the VMA or the current process.
@@ -2048,7 +2093,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
2048	*/	2093	*/
2049	struct page *	2094	struct page *
2050	alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,	2095	alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
2051	unsigned long addr, int node, bool hugepage)	2096	unsigned long addr, int node)
2052	{	2097	{
2053	struct mempolicy *pol;	2098	struct mempolicy *pol;
2054	struct page *page;	2099	struct page *page;
@@ -2066,31 +2111,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
2066	goto out;	2111	goto out;
2067	}	2112	}
2068		2113
2069	if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
2070	int hpage_node = node;
2071
2072	/*
2073	* For hugepage allocation and non-interleave policy which
2074	* allows the current node (or other explicitly preferred
2075	* node) we only try to allocate from the current/preferred
2076	* node and don't fall back to other nodes, as the cost of
2077	* remote accesses would likely offset THP benefits.
2078	*
2079	* If the policy is interleave, or does not allow the current
2080	* node in its nodemask, we allocate the standard way.
2081	*/
2082	if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL))
2083	hpage_node = pol->v.preferred_node;
2084
2085	nmask = policy_nodemask(gfp, pol);
2086	if (!nmask \|\| node_isset(hpage_node, *nmask)) {
2087	mpol_cond_put(pol);
2088	page = __alloc_pages_node(hpage_node,
2089	gfp \| __GFP_THISNODE, order);
2090	goto out;
2091	}
2092	}
2093
2094	nmask = policy_nodemask(gfp, pol);	2114	nmask = policy_nodemask(gfp, pol);
2095	preferred_nid = policy_node(gfp, pol, node);	2115	preferred_nid = policy_node(gfp, pol, node);
2096	page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);	2116	page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);