aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c116
1 files changed, 86 insertions, 30 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 4baf12e534d1..04729647f359 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -123,16 +123,19 @@ static struct mempolicy preferred_node_policy[MAX_NUMNODES];
123static struct mempolicy *get_task_policy(struct task_struct *p) 123static struct mempolicy *get_task_policy(struct task_struct *p)
124{ 124{
125 struct mempolicy *pol = p->mempolicy; 125 struct mempolicy *pol = p->mempolicy;
126 int node;
127 126
128 if (!pol) { 127 if (!pol) {
129 node = numa_node_id(); 128 int node = numa_node_id();
130 if (node != NUMA_NO_NODE)
131 pol = &preferred_node_policy[node];
132 129
133 /* preferred_node_policy is not initialised early in boot */ 130 if (node != NUMA_NO_NODE) {
134 if (!pol->mode) 131 pol = &preferred_node_policy[node];
135 pol = NULL; 132 /*
133 * preferred_node_policy is not initialised early in
134 * boot
135 */
136 if (!pol->mode)
137 pol = NULL;
138 }
136 } 139 }
137 140
138 return pol; 141 return pol;
@@ -473,8 +476,11 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
473static void migrate_page_add(struct page *page, struct list_head *pagelist, 476static void migrate_page_add(struct page *page, struct list_head *pagelist,
474 unsigned long flags); 477 unsigned long flags);
475 478
476/* Scan through pages checking if pages follow certain conditions. */ 479/*
477static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 480 * Scan through pages checking if pages follow certain conditions,
481 * and move them to the pagelist if they do.
482 */
483static int queue_pages_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
478 unsigned long addr, unsigned long end, 484 unsigned long addr, unsigned long end,
479 const nodemask_t *nodes, unsigned long flags, 485 const nodemask_t *nodes, unsigned long flags,
480 void *private) 486 void *private)
@@ -512,7 +518,31 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
512 return addr != end; 518 return addr != end;
513} 519}
514 520
515static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, 521static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma,
522 pmd_t *pmd, const nodemask_t *nodes, unsigned long flags,
523 void *private)
524{
525#ifdef CONFIG_HUGETLB_PAGE
526 int nid;
527 struct page *page;
528
529 spin_lock(&vma->vm_mm->page_table_lock);
530 page = pte_page(huge_ptep_get((pte_t *)pmd));
531 nid = page_to_nid(page);
532 if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
533 goto unlock;
534 /* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
535 if (flags & (MPOL_MF_MOVE_ALL) ||
536 (flags & MPOL_MF_MOVE && page_mapcount(page) == 1))
537 isolate_huge_page(page, private);
538unlock:
539 spin_unlock(&vma->vm_mm->page_table_lock);
540#else
541 BUG();
542#endif
543}
544
545static inline int queue_pages_pmd_range(struct vm_area_struct *vma, pud_t *pud,
516 unsigned long addr, unsigned long end, 546 unsigned long addr, unsigned long end,
517 const nodemask_t *nodes, unsigned long flags, 547 const nodemask_t *nodes, unsigned long flags,
518 void *private) 548 void *private)
@@ -523,17 +553,24 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
523 pmd = pmd_offset(pud, addr); 553 pmd = pmd_offset(pud, addr);
524 do { 554 do {
525 next = pmd_addr_end(addr, end); 555 next = pmd_addr_end(addr, end);
556 if (!pmd_present(*pmd))
557 continue;
558 if (pmd_huge(*pmd) && is_vm_hugetlb_page(vma)) {
559 queue_pages_hugetlb_pmd_range(vma, pmd, nodes,
560 flags, private);
561 continue;
562 }
526 split_huge_page_pmd(vma, addr, pmd); 563 split_huge_page_pmd(vma, addr, pmd);
527 if (pmd_none_or_trans_huge_or_clear_bad(pmd)) 564 if (pmd_none_or_trans_huge_or_clear_bad(pmd))
528 continue; 565 continue;
529 if (check_pte_range(vma, pmd, addr, next, nodes, 566 if (queue_pages_pte_range(vma, pmd, addr, next, nodes,
530 flags, private)) 567 flags, private))
531 return -EIO; 568 return -EIO;
532 } while (pmd++, addr = next, addr != end); 569 } while (pmd++, addr = next, addr != end);
533 return 0; 570 return 0;
534} 571}
535 572
536static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, 573static inline int queue_pages_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
537 unsigned long addr, unsigned long end, 574 unsigned long addr, unsigned long end,
538 const nodemask_t *nodes, unsigned long flags, 575 const nodemask_t *nodes, unsigned long flags,
539 void *private) 576 void *private)
@@ -544,16 +581,18 @@ static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
544 pud = pud_offset(pgd, addr); 581 pud = pud_offset(pgd, addr);
545 do { 582 do {
546 next = pud_addr_end(addr, end); 583 next = pud_addr_end(addr, end);
584 if (pud_huge(*pud) && is_vm_hugetlb_page(vma))
585 continue;
547 if (pud_none_or_clear_bad(pud)) 586 if (pud_none_or_clear_bad(pud))
548 continue; 587 continue;
549 if (check_pmd_range(vma, pud, addr, next, nodes, 588 if (queue_pages_pmd_range(vma, pud, addr, next, nodes,
550 flags, private)) 589 flags, private))
551 return -EIO; 590 return -EIO;
552 } while (pud++, addr = next, addr != end); 591 } while (pud++, addr = next, addr != end);
553 return 0; 592 return 0;
554} 593}
555 594
556static inline int check_pgd_range(struct vm_area_struct *vma, 595static inline int queue_pages_pgd_range(struct vm_area_struct *vma,
557 unsigned long addr, unsigned long end, 596 unsigned long addr, unsigned long end,
558 const nodemask_t *nodes, unsigned long flags, 597 const nodemask_t *nodes, unsigned long flags,
559 void *private) 598 void *private)
@@ -566,7 +605,7 @@ static inline int check_pgd_range(struct vm_area_struct *vma,
566 next = pgd_addr_end(addr, end); 605 next = pgd_addr_end(addr, end);
567 if (pgd_none_or_clear_bad(pgd)) 606 if (pgd_none_or_clear_bad(pgd))
568 continue; 607 continue;
569 if (check_pud_range(vma, pgd, addr, next, nodes, 608 if (queue_pages_pud_range(vma, pgd, addr, next, nodes,
570 flags, private)) 609 flags, private))
571 return -EIO; 610 return -EIO;
572 } while (pgd++, addr = next, addr != end); 611 } while (pgd++, addr = next, addr != end);
@@ -604,12 +643,14 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
604#endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */ 643#endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */
605 644
606/* 645/*
607 * Check if all pages in a range are on a set of nodes. 646 * Walk through page tables and collect pages to be migrated.
608 * If pagelist != NULL then isolate pages from the LRU and 647 *
609 * put them on the pagelist. 648 * If pages found in a given range are on a set of nodes (determined by
649 * @nodes and @flags,) it's isolated and queued to the pagelist which is
650 * passed via @private.)
610 */ 651 */
611static struct vm_area_struct * 652static struct vm_area_struct *
612check_range(struct mm_struct *mm, unsigned long start, unsigned long end, 653queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
613 const nodemask_t *nodes, unsigned long flags, void *private) 654 const nodemask_t *nodes, unsigned long flags, void *private)
614{ 655{
615 int err; 656 int err;
@@ -635,9 +676,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
635 return ERR_PTR(-EFAULT); 676 return ERR_PTR(-EFAULT);
636 } 677 }
637 678
638 if (is_vm_hugetlb_page(vma))
639 goto next;
640
641 if (flags & MPOL_MF_LAZY) { 679 if (flags & MPOL_MF_LAZY) {
642 change_prot_numa(vma, start, endvma); 680 change_prot_numa(vma, start, endvma);
643 goto next; 681 goto next;
@@ -647,7 +685,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
647 ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) && 685 ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
648 vma_migratable(vma))) { 686 vma_migratable(vma))) {
649 687
650 err = check_pgd_range(vma, start, endvma, nodes, 688 err = queue_pages_pgd_range(vma, start, endvma, nodes,
651 flags, private); 689 flags, private);
652 if (err) { 690 if (err) {
653 first = ERR_PTR(err); 691 first = ERR_PTR(err);
@@ -990,7 +1028,11 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
990 1028
991static struct page *new_node_page(struct page *page, unsigned long node, int **x) 1029static struct page *new_node_page(struct page *page, unsigned long node, int **x)
992{ 1030{
993 return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0); 1031 if (PageHuge(page))
1032 return alloc_huge_page_node(page_hstate(compound_head(page)),
1033 node);
1034 else
1035 return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0);
994} 1036}
995 1037
996/* 1038/*
@@ -1013,14 +1055,14 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
1013 * space range and MPOL_MF_DISCONTIG_OK, this call can not fail. 1055 * space range and MPOL_MF_DISCONTIG_OK, this call can not fail.
1014 */ 1056 */
1015 VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))); 1057 VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));
1016 check_range(mm, mm->mmap->vm_start, mm->task_size, &nmask, 1058 queue_pages_range(mm, mm->mmap->vm_start, mm->task_size, &nmask,
1017 flags | MPOL_MF_DISCONTIG_OK, &pagelist); 1059 flags | MPOL_MF_DISCONTIG_OK, &pagelist);
1018 1060
1019 if (!list_empty(&pagelist)) { 1061 if (!list_empty(&pagelist)) {
1020 err = migrate_pages(&pagelist, new_node_page, dest, 1062 err = migrate_pages(&pagelist, new_node_page, dest,
1021 MIGRATE_SYNC, MR_SYSCALL); 1063 MIGRATE_SYNC, MR_SYSCALL);
1022 if (err) 1064 if (err)
1023 putback_lru_pages(&pagelist); 1065 putback_movable_pages(&pagelist);
1024 } 1066 }
1025 1067
1026 return err; 1068 return err;
@@ -1154,10 +1196,14 @@ static struct page *new_vma_page(struct page *page, unsigned long private, int *
1154 break; 1196 break;
1155 vma = vma->vm_next; 1197 vma = vma->vm_next;
1156 } 1198 }
1157
1158 /* 1199 /*
1159 * if !vma, alloc_page_vma() will use task or system default policy 1200 * queue_pages_range() confirms that @page belongs to some vma,
1201 * so vma shouldn't be NULL.
1160 */ 1202 */
1203 BUG_ON(!vma);
1204
1205 if (PageHuge(page))
1206 return alloc_huge_page_noerr(vma, address, 1);
1161 return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); 1207 return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
1162} 1208}
1163#else 1209#else
@@ -1249,7 +1295,7 @@ static long do_mbind(unsigned long start, unsigned long len,
1249 if (err) 1295 if (err)
1250 goto mpol_out; 1296 goto mpol_out;
1251 1297
1252 vma = check_range(mm, start, end, nmask, 1298 vma = queue_pages_range(mm, start, end, nmask,
1253 flags | MPOL_MF_INVERT, &pagelist); 1299 flags | MPOL_MF_INVERT, &pagelist);
1254 1300
1255 err = PTR_ERR(vma); /* maybe ... */ 1301 err = PTR_ERR(vma); /* maybe ... */
@@ -1265,7 +1311,7 @@ static long do_mbind(unsigned long start, unsigned long len,
1265 (unsigned long)vma, 1311 (unsigned long)vma,
1266 MIGRATE_SYNC, MR_MEMPOLICY_MBIND); 1312 MIGRATE_SYNC, MR_MEMPOLICY_MBIND);
1267 if (nr_failed) 1313 if (nr_failed)
1268 putback_lru_pages(&pagelist); 1314 putback_movable_pages(&pagelist);
1269 } 1315 }
1270 1316
1271 if (nr_failed && (flags & MPOL_MF_STRICT)) 1317 if (nr_failed && (flags & MPOL_MF_STRICT))
@@ -2065,6 +2111,16 @@ retry_cpuset:
2065} 2111}
2066EXPORT_SYMBOL(alloc_pages_current); 2112EXPORT_SYMBOL(alloc_pages_current);
2067 2113
2114int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
2115{
2116 struct mempolicy *pol = mpol_dup(vma_policy(src));
2117
2118 if (IS_ERR(pol))
2119 return PTR_ERR(pol);
2120 dst->vm_policy = pol;
2121 return 0;
2122}
2123
2068/* 2124/*
2069 * If mpol_dup() sees current->cpuset == cpuset_being_rebound, then it 2125 * If mpol_dup() sees current->cpuset == cpuset_being_rebound, then it
2070 * rebinds the mempolicy its copying by calling mpol_rebind_policy() 2126 * rebinds the mempolicy its copying by calling mpol_rebind_policy()