summaryrefslogtreecommitdiffstats
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
authorLinus Walleij <linus.walleij@linaro.org>2019-09-05 05:40:54 -0400
committerLinus Walleij <linus.walleij@linaro.org>2019-09-05 05:40:54 -0400
commit151a41014bff92f353263cadc051435dc9c3258e (patch)
treeaa082a0745edd5b7051668f455dfc0ee1e4a9de0 /mm/mempolicy.c
parentae0755b56da9db4190288155ea884331993ed51b (diff)
parent089cf7f6ecb266b6a4164919a2e69bd2f938374a (diff)
Merge tag 'v5.3-rc7' into devel
Linux 5.3-rc7
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c134
1 files changed, 77 insertions, 57 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index f48693f75b37..65e0874fce17 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -403,7 +403,7 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
403 }, 403 },
404}; 404};
405 405
406static void migrate_page_add(struct page *page, struct list_head *pagelist, 406static int migrate_page_add(struct page *page, struct list_head *pagelist,
407 unsigned long flags); 407 unsigned long flags);
408 408
409struct queue_pages { 409struct queue_pages {
@@ -429,11 +429,14 @@ static inline bool queue_pages_required(struct page *page,
429} 429}
430 430
431/* 431/*
432 * queue_pages_pmd() has three possible return values: 432 * queue_pages_pmd() has four possible return values:
433 * 1 - pages are placed on the right node or queued successfully. 433 * 0 - pages are placed on the right node or queued successfully.
434 * 0 - THP was split. 434 * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
435 * -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing 435 * specified.
436 * page was already on a node that does not follow the policy. 436 * 2 - THP was split.
437 * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an
438 * existing page was already on a node that does not follow the
439 * policy.
437 */ 440 */
438static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, 441static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
439 unsigned long end, struct mm_walk *walk) 442 unsigned long end, struct mm_walk *walk)
@@ -451,23 +454,20 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
451 if (is_huge_zero_page(page)) { 454 if (is_huge_zero_page(page)) {
452 spin_unlock(ptl); 455 spin_unlock(ptl);
453 __split_huge_pmd(walk->vma, pmd, addr, false, NULL); 456 __split_huge_pmd(walk->vma, pmd, addr, false, NULL);
457 ret = 2;
454 goto out; 458 goto out;
455 } 459 }
456 if (!queue_pages_required(page, qp)) { 460 if (!queue_pages_required(page, qp))
457 ret = 1;
458 goto unlock; 461 goto unlock;
459 }
460 462
461 ret = 1;
462 flags = qp->flags; 463 flags = qp->flags;
463 /* go to thp migration */ 464 /* go to thp migration */
464 if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { 465 if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
465 if (!vma_migratable(walk->vma)) { 466 if (!vma_migratable(walk->vma) ||
466 ret = -EIO; 467 migrate_page_add(page, qp->pagelist, flags)) {
468 ret = 1;
467 goto unlock; 469 goto unlock;
468 } 470 }
469
470 migrate_page_add(page, qp->pagelist, flags);
471 } else 471 } else
472 ret = -EIO; 472 ret = -EIO;
473unlock: 473unlock:
@@ -479,6 +479,13 @@ out:
479/* 479/*
480 * Scan through pages checking if pages follow certain conditions, 480 * Scan through pages checking if pages follow certain conditions,
481 * and move them to the pagelist if they do. 481 * and move them to the pagelist if they do.
482 *
483 * queue_pages_pte_range() has three possible return values:
484 * 0 - pages are placed on the right node or queued successfully.
485 * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
486 * specified.
487 * -EIO - only MPOL_MF_STRICT was specified and an existing page was already
488 * on a node that does not follow the policy.
482 */ 489 */
483static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, 490static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
484 unsigned long end, struct mm_walk *walk) 491 unsigned long end, struct mm_walk *walk)
@@ -488,17 +495,17 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
488 struct queue_pages *qp = walk->private; 495 struct queue_pages *qp = walk->private;
489 unsigned long flags = qp->flags; 496 unsigned long flags = qp->flags;
490 int ret; 497 int ret;
498 bool has_unmovable = false;
491 pte_t *pte; 499 pte_t *pte;
492 spinlock_t *ptl; 500 spinlock_t *ptl;
493 501
494 ptl = pmd_trans_huge_lock(pmd, vma); 502 ptl = pmd_trans_huge_lock(pmd, vma);
495 if (ptl) { 503 if (ptl) {
496 ret = queue_pages_pmd(pmd, ptl, addr, end, walk); 504 ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
497 if (ret > 0) 505 if (ret != 2)
498 return 0;
499 else if (ret < 0)
500 return ret; 506 return ret;
501 } 507 }
508 /* THP was split, fall through to pte walk */
502 509
503 if (pmd_trans_unstable(pmd)) 510 if (pmd_trans_unstable(pmd))
504 return 0; 511 return 0;
@@ -519,14 +526,28 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
519 if (!queue_pages_required(page, qp)) 526 if (!queue_pages_required(page, qp))
520 continue; 527 continue;
521 if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { 528 if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
522 if (!vma_migratable(vma)) 529 /* MPOL_MF_STRICT must be specified if we get here */
530 if (!vma_migratable(vma)) {
531 has_unmovable = true;
523 break; 532 break;
524 migrate_page_add(page, qp->pagelist, flags); 533 }
534
535 /*
536 * Do not abort immediately since there may be
537 * temporary off LRU pages in the range. Still
538 * need migrate other LRU pages.
539 */
540 if (migrate_page_add(page, qp->pagelist, flags))
541 has_unmovable = true;
525 } else 542 } else
526 break; 543 break;
527 } 544 }
528 pte_unmap_unlock(pte - 1, ptl); 545 pte_unmap_unlock(pte - 1, ptl);
529 cond_resched(); 546 cond_resched();
547
548 if (has_unmovable)
549 return 1;
550
530 return addr != end ? -EIO : 0; 551 return addr != end ? -EIO : 0;
531} 552}
532 553
@@ -639,7 +660,13 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
639 * 660 *
640 * If pages found in a given range are on a set of nodes (determined by 661 * If pages found in a given range are on a set of nodes (determined by
641 * @nodes and @flags,) it's isolated and queued to the pagelist which is 662 * @nodes and @flags,) it's isolated and queued to the pagelist which is
642 * passed via @private.) 663 * passed via @private.
664 *
665 * queue_pages_range() has three possible return values:
666 * 1 - there is unmovable page, but MPOL_MF_MOVE* & MPOL_MF_STRICT were
667 * specified.
668 * 0 - queue pages successfully or no misplaced page.
669 * -EIO - there is misplaced page and only MPOL_MF_STRICT was specified.
643 */ 670 */
644static int 671static int
645queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, 672queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
@@ -940,7 +967,7 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
940/* 967/*
941 * page migration, thp tail pages can be passed. 968 * page migration, thp tail pages can be passed.
942 */ 969 */
943static void migrate_page_add(struct page *page, struct list_head *pagelist, 970static int migrate_page_add(struct page *page, struct list_head *pagelist,
944 unsigned long flags) 971 unsigned long flags)
945{ 972{
946 struct page *head = compound_head(page); 973 struct page *head = compound_head(page);
@@ -953,8 +980,19 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
953 mod_node_page_state(page_pgdat(head), 980 mod_node_page_state(page_pgdat(head),
954 NR_ISOLATED_ANON + page_is_file_cache(head), 981 NR_ISOLATED_ANON + page_is_file_cache(head),
955 hpage_nr_pages(head)); 982 hpage_nr_pages(head));
983 } else if (flags & MPOL_MF_STRICT) {
984 /*
985 * Non-movable page may reach here. And, there may be
986 * temporary off LRU pages or non-LRU movable pages.
987 * Treat them as unmovable pages since they can't be
988 * isolated, so they can't be moved at the moment. It
989 * should return -EIO for this case too.
990 */
991 return -EIO;
956 } 992 }
957 } 993 }
994
995 return 0;
958} 996}
959 997
960/* page allocation callback for NUMA node migration */ 998/* page allocation callback for NUMA node migration */
@@ -1142,8 +1180,8 @@ static struct page *new_page(struct page *page, unsigned long start)
1142 } else if (PageTransHuge(page)) { 1180 } else if (PageTransHuge(page)) {
1143 struct page *thp; 1181 struct page *thp;
1144 1182
1145 thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address, 1183 thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
1146 HPAGE_PMD_ORDER); 1184 address, numa_node_id());
1147 if (!thp) 1185 if (!thp)
1148 return NULL; 1186 return NULL;
1149 prep_transhuge_page(thp); 1187 prep_transhuge_page(thp);
@@ -1157,9 +1195,10 @@ static struct page *new_page(struct page *page, unsigned long start)
1157} 1195}
1158#else 1196#else
1159 1197
1160static void migrate_page_add(struct page *page, struct list_head *pagelist, 1198static int migrate_page_add(struct page *page, struct list_head *pagelist,
1161 unsigned long flags) 1199 unsigned long flags)
1162{ 1200{
1201 return -EIO;
1163} 1202}
1164 1203
1165int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, 1204int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
@@ -1182,6 +1221,7 @@ static long do_mbind(unsigned long start, unsigned long len,
1182 struct mempolicy *new; 1221 struct mempolicy *new;
1183 unsigned long end; 1222 unsigned long end;
1184 int err; 1223 int err;
1224 int ret;
1185 LIST_HEAD(pagelist); 1225 LIST_HEAD(pagelist);
1186 1226
1187 if (flags & ~(unsigned long)MPOL_MF_VALID) 1227 if (flags & ~(unsigned long)MPOL_MF_VALID)
@@ -1243,10 +1283,15 @@ static long do_mbind(unsigned long start, unsigned long len,
1243 if (err) 1283 if (err)
1244 goto mpol_out; 1284 goto mpol_out;
1245 1285
1246 err = queue_pages_range(mm, start, end, nmask, 1286 ret = queue_pages_range(mm, start, end, nmask,
1247 flags | MPOL_MF_INVERT, &pagelist); 1287 flags | MPOL_MF_INVERT, &pagelist);
1248 if (!err) 1288
1249 err = mbind_range(mm, start, end, new); 1289 if (ret < 0) {
1290 err = -EIO;
1291 goto up_out;
1292 }
1293
1294 err = mbind_range(mm, start, end, new);
1250 1295
1251 if (!err) { 1296 if (!err) {
1252 int nr_failed = 0; 1297 int nr_failed = 0;
@@ -1259,13 +1304,14 @@ static long do_mbind(unsigned long start, unsigned long len,
1259 putback_movable_pages(&pagelist); 1304 putback_movable_pages(&pagelist);
1260 } 1305 }
1261 1306
1262 if (nr_failed && (flags & MPOL_MF_STRICT)) 1307 if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT)))
1263 err = -EIO; 1308 err = -EIO;
1264 } else 1309 } else
1265 putback_movable_pages(&pagelist); 1310 putback_movable_pages(&pagelist);
1266 1311
1312up_out:
1267 up_write(&mm->mmap_sem); 1313 up_write(&mm->mmap_sem);
1268 mpol_out: 1314mpol_out:
1269 mpol_put(new); 1315 mpol_put(new);
1270 return err; 1316 return err;
1271} 1317}
@@ -1688,7 +1734,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
1688 * freeing by another task. It is the caller's responsibility to free the 1734 * freeing by another task. It is the caller's responsibility to free the
1689 * extra reference for shared policies. 1735 * extra reference for shared policies.
1690 */ 1736 */
1691static struct mempolicy *get_vma_policy(struct vm_area_struct *vma, 1737struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
1692 unsigned long addr) 1738 unsigned long addr)
1693{ 1739{
1694 struct mempolicy *pol = __get_vma_policy(vma, addr); 1740 struct mempolicy *pol = __get_vma_policy(vma, addr);
@@ -2037,7 +2083,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
2037 * @vma: Pointer to VMA or NULL if not available. 2083 * @vma: Pointer to VMA or NULL if not available.
2038 * @addr: Virtual Address of the allocation. Must be inside the VMA. 2084 * @addr: Virtual Address of the allocation. Must be inside the VMA.
2039 * @node: Which node to prefer for allocation (modulo policy). 2085 * @node: Which node to prefer for allocation (modulo policy).
2040 * @hugepage: for hugepages try only the preferred node if possible
2041 * 2086 *
2042 * This function allocates a page from the kernel page pool and applies 2087 * This function allocates a page from the kernel page pool and applies
2043 * a NUMA policy associated with the VMA or the current process. 2088 * a NUMA policy associated with the VMA or the current process.
@@ -2048,7 +2093,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
2048 */ 2093 */
2049struct page * 2094struct page *
2050alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, 2095alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
2051 unsigned long addr, int node, bool hugepage) 2096 unsigned long addr, int node)
2052{ 2097{
2053 struct mempolicy *pol; 2098 struct mempolicy *pol;
2054 struct page *page; 2099 struct page *page;
@@ -2066,31 +2111,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
2066 goto out; 2111 goto out;
2067 } 2112 }
2068 2113
2069 if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
2070 int hpage_node = node;
2071
2072 /*
2073 * For hugepage allocation and non-interleave policy which
2074 * allows the current node (or other explicitly preferred
2075 * node) we only try to allocate from the current/preferred
2076 * node and don't fall back to other nodes, as the cost of
2077 * remote accesses would likely offset THP benefits.
2078 *
2079 * If the policy is interleave, or does not allow the current
2080 * node in its nodemask, we allocate the standard way.
2081 */
2082 if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL))
2083 hpage_node = pol->v.preferred_node;
2084
2085 nmask = policy_nodemask(gfp, pol);
2086 if (!nmask || node_isset(hpage_node, *nmask)) {
2087 mpol_cond_put(pol);
2088 page = __alloc_pages_node(hpage_node,
2089 gfp | __GFP_THISNODE, order);
2090 goto out;
2091 }
2092 }
2093
2094 nmask = policy_nodemask(gfp, pol); 2114 nmask = policy_nodemask(gfp, pol);
2095 preferred_nid = policy_node(gfp, pol, node); 2115 preferred_nid = policy_node(gfp, pol, node);
2096 page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); 2116 page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);