diff options
author | Linus Walleij <linus.walleij@linaro.org> | 2019-09-05 05:40:54 -0400 |
---|---|---|
committer | Linus Walleij <linus.walleij@linaro.org> | 2019-09-05 05:40:54 -0400 |
commit | 151a41014bff92f353263cadc051435dc9c3258e (patch) | |
tree | aa082a0745edd5b7051668f455dfc0ee1e4a9de0 /mm/mempolicy.c | |
parent | ae0755b56da9db4190288155ea884331993ed51b (diff) | |
parent | 089cf7f6ecb266b6a4164919a2e69bd2f938374a (diff) |
Merge tag 'v5.3-rc7' into devel
Linux 5.3-rc7
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r-- | mm/mempolicy.c | 134 |
1 files changed, 77 insertions, 57 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index f48693f75b37..65e0874fce17 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -403,7 +403,7 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = { | |||
403 | }, | 403 | }, |
404 | }; | 404 | }; |
405 | 405 | ||
406 | static void migrate_page_add(struct page *page, struct list_head *pagelist, | 406 | static int migrate_page_add(struct page *page, struct list_head *pagelist, |
407 | unsigned long flags); | 407 | unsigned long flags); |
408 | 408 | ||
409 | struct queue_pages { | 409 | struct queue_pages { |
@@ -429,11 +429,14 @@ static inline bool queue_pages_required(struct page *page, | |||
429 | } | 429 | } |
430 | 430 | ||
431 | /* | 431 | /* |
432 | * queue_pages_pmd() has three possible return values: | 432 | * queue_pages_pmd() has four possible return values: |
433 | * 1 - pages are placed on the right node or queued successfully. | 433 | * 0 - pages are placed on the right node or queued successfully. |
434 | * 0 - THP was split. | 434 | * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were |
435 | * -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing | 435 | * specified. |
436 | * page was already on a node that does not follow the policy. | 436 | * 2 - THP was split. |
437 | * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an | ||
438 | * existing page was already on a node that does not follow the | ||
439 | * policy. | ||
437 | */ | 440 | */ |
438 | static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, | 441 | static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, |
439 | unsigned long end, struct mm_walk *walk) | 442 | unsigned long end, struct mm_walk *walk) |
@@ -451,23 +454,20 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, | |||
451 | if (is_huge_zero_page(page)) { | 454 | if (is_huge_zero_page(page)) { |
452 | spin_unlock(ptl); | 455 | spin_unlock(ptl); |
453 | __split_huge_pmd(walk->vma, pmd, addr, false, NULL); | 456 | __split_huge_pmd(walk->vma, pmd, addr, false, NULL); |
457 | ret = 2; | ||
454 | goto out; | 458 | goto out; |
455 | } | 459 | } |
456 | if (!queue_pages_required(page, qp)) { | 460 | if (!queue_pages_required(page, qp)) |
457 | ret = 1; | ||
458 | goto unlock; | 461 | goto unlock; |
459 | } | ||
460 | 462 | ||
461 | ret = 1; | ||
462 | flags = qp->flags; | 463 | flags = qp->flags; |
463 | /* go to thp migration */ | 464 | /* go to thp migration */ |
464 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { | 465 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { |
465 | if (!vma_migratable(walk->vma)) { | 466 | if (!vma_migratable(walk->vma) || |
466 | ret = -EIO; | 467 | migrate_page_add(page, qp->pagelist, flags)) { |
468 | ret = 1; | ||
467 | goto unlock; | 469 | goto unlock; |
468 | } | 470 | } |
469 | |||
470 | migrate_page_add(page, qp->pagelist, flags); | ||
471 | } else | 471 | } else |
472 | ret = -EIO; | 472 | ret = -EIO; |
473 | unlock: | 473 | unlock: |
@@ -479,6 +479,13 @@ out: | |||
479 | /* | 479 | /* |
480 | * Scan through pages checking if pages follow certain conditions, | 480 | * Scan through pages checking if pages follow certain conditions, |
481 | * and move them to the pagelist if they do. | 481 | * and move them to the pagelist if they do. |
482 | * | ||
483 | * queue_pages_pte_range() has three possible return values: | ||
484 | * 0 - pages are placed on the right node or queued successfully. | ||
485 | * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were | ||
486 | * specified. | ||
487 | * -EIO - only MPOL_MF_STRICT was specified and an existing page was already | ||
488 | * on a node that does not follow the policy. | ||
482 | */ | 489 | */ |
483 | static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, | 490 | static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, |
484 | unsigned long end, struct mm_walk *walk) | 491 | unsigned long end, struct mm_walk *walk) |
@@ -488,17 +495,17 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, | |||
488 | struct queue_pages *qp = walk->private; | 495 | struct queue_pages *qp = walk->private; |
489 | unsigned long flags = qp->flags; | 496 | unsigned long flags = qp->flags; |
490 | int ret; | 497 | int ret; |
498 | bool has_unmovable = false; | ||
491 | pte_t *pte; | 499 | pte_t *pte; |
492 | spinlock_t *ptl; | 500 | spinlock_t *ptl; |
493 | 501 | ||
494 | ptl = pmd_trans_huge_lock(pmd, vma); | 502 | ptl = pmd_trans_huge_lock(pmd, vma); |
495 | if (ptl) { | 503 | if (ptl) { |
496 | ret = queue_pages_pmd(pmd, ptl, addr, end, walk); | 504 | ret = queue_pages_pmd(pmd, ptl, addr, end, walk); |
497 | if (ret > 0) | 505 | if (ret != 2) |
498 | return 0; | ||
499 | else if (ret < 0) | ||
500 | return ret; | 506 | return ret; |
501 | } | 507 | } |
508 | /* THP was split, fall through to pte walk */ | ||
502 | 509 | ||
503 | if (pmd_trans_unstable(pmd)) | 510 | if (pmd_trans_unstable(pmd)) |
504 | return 0; | 511 | return 0; |
@@ -519,14 +526,28 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, | |||
519 | if (!queue_pages_required(page, qp)) | 526 | if (!queue_pages_required(page, qp)) |
520 | continue; | 527 | continue; |
521 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { | 528 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { |
522 | if (!vma_migratable(vma)) | 529 | /* MPOL_MF_STRICT must be specified if we get here */ |
530 | if (!vma_migratable(vma)) { | ||
531 | has_unmovable = true; | ||
523 | break; | 532 | break; |
524 | migrate_page_add(page, qp->pagelist, flags); | 533 | } |
534 | |||
535 | /* | ||
536 | * Do not abort immediately since there may be | ||
537 | * temporary off LRU pages in the range. Still | ||
538 | * need migrate other LRU pages. | ||
539 | */ | ||
540 | if (migrate_page_add(page, qp->pagelist, flags)) | ||
541 | has_unmovable = true; | ||
525 | } else | 542 | } else |
526 | break; | 543 | break; |
527 | } | 544 | } |
528 | pte_unmap_unlock(pte - 1, ptl); | 545 | pte_unmap_unlock(pte - 1, ptl); |
529 | cond_resched(); | 546 | cond_resched(); |
547 | |||
548 | if (has_unmovable) | ||
549 | return 1; | ||
550 | |||
530 | return addr != end ? -EIO : 0; | 551 | return addr != end ? -EIO : 0; |
531 | } | 552 | } |
532 | 553 | ||
@@ -639,7 +660,13 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, | |||
639 | * | 660 | * |
640 | * If pages found in a given range are on a set of nodes (determined by | 661 | * If pages found in a given range are on a set of nodes (determined by |
641 | * @nodes and @flags,) it's isolated and queued to the pagelist which is | 662 | * @nodes and @flags,) it's isolated and queued to the pagelist which is |
642 | * passed via @private.) | 663 | * passed via @private. |
664 | * | ||
665 | * queue_pages_range() has three possible return values: | ||
666 | * 1 - there is unmovable page, but MPOL_MF_MOVE* & MPOL_MF_STRICT were | ||
667 | * specified. | ||
668 | * 0 - queue pages successfully or no misplaced page. | ||
669 | * -EIO - there is misplaced page and only MPOL_MF_STRICT was specified. | ||
643 | */ | 670 | */ |
644 | static int | 671 | static int |
645 | queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, | 672 | queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, |
@@ -940,7 +967,7 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, | |||
940 | /* | 967 | /* |
941 | * page migration, thp tail pages can be passed. | 968 | * page migration, thp tail pages can be passed. |
942 | */ | 969 | */ |
943 | static void migrate_page_add(struct page *page, struct list_head *pagelist, | 970 | static int migrate_page_add(struct page *page, struct list_head *pagelist, |
944 | unsigned long flags) | 971 | unsigned long flags) |
945 | { | 972 | { |
946 | struct page *head = compound_head(page); | 973 | struct page *head = compound_head(page); |
@@ -953,8 +980,19 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist, | |||
953 | mod_node_page_state(page_pgdat(head), | 980 | mod_node_page_state(page_pgdat(head), |
954 | NR_ISOLATED_ANON + page_is_file_cache(head), | 981 | NR_ISOLATED_ANON + page_is_file_cache(head), |
955 | hpage_nr_pages(head)); | 982 | hpage_nr_pages(head)); |
983 | } else if (flags & MPOL_MF_STRICT) { | ||
984 | /* | ||
985 | * Non-movable page may reach here. And, there may be | ||
986 | * temporary off LRU pages or non-LRU movable pages. | ||
987 | * Treat them as unmovable pages since they can't be | ||
988 | * isolated, so they can't be moved at the moment. It | ||
989 | * should return -EIO for this case too. | ||
990 | */ | ||
991 | return -EIO; | ||
956 | } | 992 | } |
957 | } | 993 | } |
994 | |||
995 | return 0; | ||
958 | } | 996 | } |
959 | 997 | ||
960 | /* page allocation callback for NUMA node migration */ | 998 | /* page allocation callback for NUMA node migration */ |
@@ -1142,8 +1180,8 @@ static struct page *new_page(struct page *page, unsigned long start) | |||
1142 | } else if (PageTransHuge(page)) { | 1180 | } else if (PageTransHuge(page)) { |
1143 | struct page *thp; | 1181 | struct page *thp; |
1144 | 1182 | ||
1145 | thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address, | 1183 | thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma, |
1146 | HPAGE_PMD_ORDER); | 1184 | address, numa_node_id()); |
1147 | if (!thp) | 1185 | if (!thp) |
1148 | return NULL; | 1186 | return NULL; |
1149 | prep_transhuge_page(thp); | 1187 | prep_transhuge_page(thp); |
@@ -1157,9 +1195,10 @@ static struct page *new_page(struct page *page, unsigned long start) | |||
1157 | } | 1195 | } |
1158 | #else | 1196 | #else |
1159 | 1197 | ||
1160 | static void migrate_page_add(struct page *page, struct list_head *pagelist, | 1198 | static int migrate_page_add(struct page *page, struct list_head *pagelist, |
1161 | unsigned long flags) | 1199 | unsigned long flags) |
1162 | { | 1200 | { |
1201 | return -EIO; | ||
1163 | } | 1202 | } |
1164 | 1203 | ||
1165 | int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, | 1204 | int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, |
@@ -1182,6 +1221,7 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
1182 | struct mempolicy *new; | 1221 | struct mempolicy *new; |
1183 | unsigned long end; | 1222 | unsigned long end; |
1184 | int err; | 1223 | int err; |
1224 | int ret; | ||
1185 | LIST_HEAD(pagelist); | 1225 | LIST_HEAD(pagelist); |
1186 | 1226 | ||
1187 | if (flags & ~(unsigned long)MPOL_MF_VALID) | 1227 | if (flags & ~(unsigned long)MPOL_MF_VALID) |
@@ -1243,10 +1283,15 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
1243 | if (err) | 1283 | if (err) |
1244 | goto mpol_out; | 1284 | goto mpol_out; |
1245 | 1285 | ||
1246 | err = queue_pages_range(mm, start, end, nmask, | 1286 | ret = queue_pages_range(mm, start, end, nmask, |
1247 | flags | MPOL_MF_INVERT, &pagelist); | 1287 | flags | MPOL_MF_INVERT, &pagelist); |
1248 | if (!err) | 1288 | |
1249 | err = mbind_range(mm, start, end, new); | 1289 | if (ret < 0) { |
1290 | err = -EIO; | ||
1291 | goto up_out; | ||
1292 | } | ||
1293 | |||
1294 | err = mbind_range(mm, start, end, new); | ||
1250 | 1295 | ||
1251 | if (!err) { | 1296 | if (!err) { |
1252 | int nr_failed = 0; | 1297 | int nr_failed = 0; |
@@ -1259,13 +1304,14 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
1259 | putback_movable_pages(&pagelist); | 1304 | putback_movable_pages(&pagelist); |
1260 | } | 1305 | } |
1261 | 1306 | ||
1262 | if (nr_failed && (flags & MPOL_MF_STRICT)) | 1307 | if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT))) |
1263 | err = -EIO; | 1308 | err = -EIO; |
1264 | } else | 1309 | } else |
1265 | putback_movable_pages(&pagelist); | 1310 | putback_movable_pages(&pagelist); |
1266 | 1311 | ||
1312 | up_out: | ||
1267 | up_write(&mm->mmap_sem); | 1313 | up_write(&mm->mmap_sem); |
1268 | mpol_out: | 1314 | mpol_out: |
1269 | mpol_put(new); | 1315 | mpol_put(new); |
1270 | return err; | 1316 | return err; |
1271 | } | 1317 | } |
@@ -1688,7 +1734,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, | |||
1688 | * freeing by another task. It is the caller's responsibility to free the | 1734 | * freeing by another task. It is the caller's responsibility to free the |
1689 | * extra reference for shared policies. | 1735 | * extra reference for shared policies. |
1690 | */ | 1736 | */ |
1691 | static struct mempolicy *get_vma_policy(struct vm_area_struct *vma, | 1737 | struct mempolicy *get_vma_policy(struct vm_area_struct *vma, |
1692 | unsigned long addr) | 1738 | unsigned long addr) |
1693 | { | 1739 | { |
1694 | struct mempolicy *pol = __get_vma_policy(vma, addr); | 1740 | struct mempolicy *pol = __get_vma_policy(vma, addr); |
@@ -2037,7 +2083,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, | |||
2037 | * @vma: Pointer to VMA or NULL if not available. | 2083 | * @vma: Pointer to VMA or NULL if not available. |
2038 | * @addr: Virtual Address of the allocation. Must be inside the VMA. | 2084 | * @addr: Virtual Address of the allocation. Must be inside the VMA. |
2039 | * @node: Which node to prefer for allocation (modulo policy). | 2085 | * @node: Which node to prefer for allocation (modulo policy). |
2040 | * @hugepage: for hugepages try only the preferred node if possible | ||
2041 | * | 2086 | * |
2042 | * This function allocates a page from the kernel page pool and applies | 2087 | * This function allocates a page from the kernel page pool and applies |
2043 | * a NUMA policy associated with the VMA or the current process. | 2088 | * a NUMA policy associated with the VMA or the current process. |
@@ -2048,7 +2093,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, | |||
2048 | */ | 2093 | */ |
2049 | struct page * | 2094 | struct page * |
2050 | alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, | 2095 | alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, |
2051 | unsigned long addr, int node, bool hugepage) | 2096 | unsigned long addr, int node) |
2052 | { | 2097 | { |
2053 | struct mempolicy *pol; | 2098 | struct mempolicy *pol; |
2054 | struct page *page; | 2099 | struct page *page; |
@@ -2066,31 +2111,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, | |||
2066 | goto out; | 2111 | goto out; |
2067 | } | 2112 | } |
2068 | 2113 | ||
2069 | if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) { | ||
2070 | int hpage_node = node; | ||
2071 | |||
2072 | /* | ||
2073 | * For hugepage allocation and non-interleave policy which | ||
2074 | * allows the current node (or other explicitly preferred | ||
2075 | * node) we only try to allocate from the current/preferred | ||
2076 | * node and don't fall back to other nodes, as the cost of | ||
2077 | * remote accesses would likely offset THP benefits. | ||
2078 | * | ||
2079 | * If the policy is interleave, or does not allow the current | ||
2080 | * node in its nodemask, we allocate the standard way. | ||
2081 | */ | ||
2082 | if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL)) | ||
2083 | hpage_node = pol->v.preferred_node; | ||
2084 | |||
2085 | nmask = policy_nodemask(gfp, pol); | ||
2086 | if (!nmask || node_isset(hpage_node, *nmask)) { | ||
2087 | mpol_cond_put(pol); | ||
2088 | page = __alloc_pages_node(hpage_node, | ||
2089 | gfp | __GFP_THISNODE, order); | ||
2090 | goto out; | ||
2091 | } | ||
2092 | } | ||
2093 | |||
2094 | nmask = policy_nodemask(gfp, pol); | 2114 | nmask = policy_nodemask(gfp, pol); |
2095 | preferred_nid = policy_node(gfp, pol, node); | 2115 | preferred_nid = policy_node(gfp, pol, node); |
2096 | page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); | 2116 | page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); |