diff options
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r-- | mm/mempolicy.c | 116 |
1 files changed, 86 insertions, 30 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 4baf12e534d1..04729647f359 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -123,16 +123,19 @@ static struct mempolicy preferred_node_policy[MAX_NUMNODES]; | |||
123 | static struct mempolicy *get_task_policy(struct task_struct *p) | 123 | static struct mempolicy *get_task_policy(struct task_struct *p) |
124 | { | 124 | { |
125 | struct mempolicy *pol = p->mempolicy; | 125 | struct mempolicy *pol = p->mempolicy; |
126 | int node; | ||
127 | 126 | ||
128 | if (!pol) { | 127 | if (!pol) { |
129 | node = numa_node_id(); | 128 | int node = numa_node_id(); |
130 | if (node != NUMA_NO_NODE) | ||
131 | pol = &preferred_node_policy[node]; | ||
132 | 129 | ||
133 | /* preferred_node_policy is not initialised early in boot */ | 130 | if (node != NUMA_NO_NODE) { |
134 | if (!pol->mode) | 131 | pol = &preferred_node_policy[node]; |
135 | pol = NULL; | 132 | /* |
133 | * preferred_node_policy is not initialised early in | ||
134 | * boot | ||
135 | */ | ||
136 | if (!pol->mode) | ||
137 | pol = NULL; | ||
138 | } | ||
136 | } | 139 | } |
137 | 140 | ||
138 | return pol; | 141 | return pol; |
@@ -473,8 +476,11 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = { | |||
473 | static void migrate_page_add(struct page *page, struct list_head *pagelist, | 476 | static void migrate_page_add(struct page *page, struct list_head *pagelist, |
474 | unsigned long flags); | 477 | unsigned long flags); |
475 | 478 | ||
476 | /* Scan through pages checking if pages follow certain conditions. */ | 479 | /* |
477 | static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | 480 | * Scan through pages checking if pages follow certain conditions, |
481 | * and move them to the pagelist if they do. | ||
482 | */ | ||
483 | static int queue_pages_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | ||
478 | unsigned long addr, unsigned long end, | 484 | unsigned long addr, unsigned long end, |
479 | const nodemask_t *nodes, unsigned long flags, | 485 | const nodemask_t *nodes, unsigned long flags, |
480 | void *private) | 486 | void *private) |
@@ -512,7 +518,31 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
512 | return addr != end; | 518 | return addr != end; |
513 | } | 519 | } |
514 | 520 | ||
515 | static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, | 521 | static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma, |
522 | pmd_t *pmd, const nodemask_t *nodes, unsigned long flags, | ||
523 | void *private) | ||
524 | { | ||
525 | #ifdef CONFIG_HUGETLB_PAGE | ||
526 | int nid; | ||
527 | struct page *page; | ||
528 | |||
529 | spin_lock(&vma->vm_mm->page_table_lock); | ||
530 | page = pte_page(huge_ptep_get((pte_t *)pmd)); | ||
531 | nid = page_to_nid(page); | ||
532 | if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) | ||
533 | goto unlock; | ||
534 | /* With MPOL_MF_MOVE, we migrate only unshared hugepage. */ | ||
535 | if (flags & (MPOL_MF_MOVE_ALL) || | ||
536 | (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) | ||
537 | isolate_huge_page(page, private); | ||
538 | unlock: | ||
539 | spin_unlock(&vma->vm_mm->page_table_lock); | ||
540 | #else | ||
541 | BUG(); | ||
542 | #endif | ||
543 | } | ||
544 | |||
545 | static inline int queue_pages_pmd_range(struct vm_area_struct *vma, pud_t *pud, | ||
516 | unsigned long addr, unsigned long end, | 546 | unsigned long addr, unsigned long end, |
517 | const nodemask_t *nodes, unsigned long flags, | 547 | const nodemask_t *nodes, unsigned long flags, |
518 | void *private) | 548 | void *private) |
@@ -523,17 +553,24 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, | |||
523 | pmd = pmd_offset(pud, addr); | 553 | pmd = pmd_offset(pud, addr); |
524 | do { | 554 | do { |
525 | next = pmd_addr_end(addr, end); | 555 | next = pmd_addr_end(addr, end); |
556 | if (!pmd_present(*pmd)) | ||
557 | continue; | ||
558 | if (pmd_huge(*pmd) && is_vm_hugetlb_page(vma)) { | ||
559 | queue_pages_hugetlb_pmd_range(vma, pmd, nodes, | ||
560 | flags, private); | ||
561 | continue; | ||
562 | } | ||
526 | split_huge_page_pmd(vma, addr, pmd); | 563 | split_huge_page_pmd(vma, addr, pmd); |
527 | if (pmd_none_or_trans_huge_or_clear_bad(pmd)) | 564 | if (pmd_none_or_trans_huge_or_clear_bad(pmd)) |
528 | continue; | 565 | continue; |
529 | if (check_pte_range(vma, pmd, addr, next, nodes, | 566 | if (queue_pages_pte_range(vma, pmd, addr, next, nodes, |
530 | flags, private)) | 567 | flags, private)) |
531 | return -EIO; | 568 | return -EIO; |
532 | } while (pmd++, addr = next, addr != end); | 569 | } while (pmd++, addr = next, addr != end); |
533 | return 0; | 570 | return 0; |
534 | } | 571 | } |
535 | 572 | ||
536 | static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, | 573 | static inline int queue_pages_pud_range(struct vm_area_struct *vma, pgd_t *pgd, |
537 | unsigned long addr, unsigned long end, | 574 | unsigned long addr, unsigned long end, |
538 | const nodemask_t *nodes, unsigned long flags, | 575 | const nodemask_t *nodes, unsigned long flags, |
539 | void *private) | 576 | void *private) |
@@ -544,16 +581,18 @@ static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, | |||
544 | pud = pud_offset(pgd, addr); | 581 | pud = pud_offset(pgd, addr); |
545 | do { | 582 | do { |
546 | next = pud_addr_end(addr, end); | 583 | next = pud_addr_end(addr, end); |
584 | if (pud_huge(*pud) && is_vm_hugetlb_page(vma)) | ||
585 | continue; | ||
547 | if (pud_none_or_clear_bad(pud)) | 586 | if (pud_none_or_clear_bad(pud)) |
548 | continue; | 587 | continue; |
549 | if (check_pmd_range(vma, pud, addr, next, nodes, | 588 | if (queue_pages_pmd_range(vma, pud, addr, next, nodes, |
550 | flags, private)) | 589 | flags, private)) |
551 | return -EIO; | 590 | return -EIO; |
552 | } while (pud++, addr = next, addr != end); | 591 | } while (pud++, addr = next, addr != end); |
553 | return 0; | 592 | return 0; |
554 | } | 593 | } |
555 | 594 | ||
556 | static inline int check_pgd_range(struct vm_area_struct *vma, | 595 | static inline int queue_pages_pgd_range(struct vm_area_struct *vma, |
557 | unsigned long addr, unsigned long end, | 596 | unsigned long addr, unsigned long end, |
558 | const nodemask_t *nodes, unsigned long flags, | 597 | const nodemask_t *nodes, unsigned long flags, |
559 | void *private) | 598 | void *private) |
@@ -566,7 +605,7 @@ static inline int check_pgd_range(struct vm_area_struct *vma, | |||
566 | next = pgd_addr_end(addr, end); | 605 | next = pgd_addr_end(addr, end); |
567 | if (pgd_none_or_clear_bad(pgd)) | 606 | if (pgd_none_or_clear_bad(pgd)) |
568 | continue; | 607 | continue; |
569 | if (check_pud_range(vma, pgd, addr, next, nodes, | 608 | if (queue_pages_pud_range(vma, pgd, addr, next, nodes, |
570 | flags, private)) | 609 | flags, private)) |
571 | return -EIO; | 610 | return -EIO; |
572 | } while (pgd++, addr = next, addr != end); | 611 | } while (pgd++, addr = next, addr != end); |
@@ -604,12 +643,14 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma, | |||
604 | #endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */ | 643 | #endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */ |
605 | 644 | ||
606 | /* | 645 | /* |
607 | * Check if all pages in a range are on a set of nodes. | 646 | * Walk through page tables and collect pages to be migrated. |
608 | * If pagelist != NULL then isolate pages from the LRU and | 647 | * |
609 | * put them on the pagelist. | 648 | * If pages found in a given range are on a set of nodes (determined by |
649 | * @nodes and @flags,) it's isolated and queued to the pagelist which is | ||
650 | * passed via @private.) | ||
610 | */ | 651 | */ |
611 | static struct vm_area_struct * | 652 | static struct vm_area_struct * |
612 | check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | 653 | queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, |
613 | const nodemask_t *nodes, unsigned long flags, void *private) | 654 | const nodemask_t *nodes, unsigned long flags, void *private) |
614 | { | 655 | { |
615 | int err; | 656 | int err; |
@@ -635,9 +676,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
635 | return ERR_PTR(-EFAULT); | 676 | return ERR_PTR(-EFAULT); |
636 | } | 677 | } |
637 | 678 | ||
638 | if (is_vm_hugetlb_page(vma)) | ||
639 | goto next; | ||
640 | |||
641 | if (flags & MPOL_MF_LAZY) { | 679 | if (flags & MPOL_MF_LAZY) { |
642 | change_prot_numa(vma, start, endvma); | 680 | change_prot_numa(vma, start, endvma); |
643 | goto next; | 681 | goto next; |
@@ -647,7 +685,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
647 | ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) && | 685 | ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) && |
648 | vma_migratable(vma))) { | 686 | vma_migratable(vma))) { |
649 | 687 | ||
650 | err = check_pgd_range(vma, start, endvma, nodes, | 688 | err = queue_pages_pgd_range(vma, start, endvma, nodes, |
651 | flags, private); | 689 | flags, private); |
652 | if (err) { | 690 | if (err) { |
653 | first = ERR_PTR(err); | 691 | first = ERR_PTR(err); |
@@ -990,7 +1028,11 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist, | |||
990 | 1028 | ||
991 | static struct page *new_node_page(struct page *page, unsigned long node, int **x) | 1029 | static struct page *new_node_page(struct page *page, unsigned long node, int **x) |
992 | { | 1030 | { |
993 | return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0); | 1031 | if (PageHuge(page)) |
1032 | return alloc_huge_page_node(page_hstate(compound_head(page)), | ||
1033 | node); | ||
1034 | else | ||
1035 | return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0); | ||
994 | } | 1036 | } |
995 | 1037 | ||
996 | /* | 1038 | /* |
@@ -1013,14 +1055,14 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, | |||
1013 | * space range and MPOL_MF_DISCONTIG_OK, this call can not fail. | 1055 | * space range and MPOL_MF_DISCONTIG_OK, this call can not fail. |
1014 | */ | 1056 | */ |
1015 | VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))); | 1057 | VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))); |
1016 | check_range(mm, mm->mmap->vm_start, mm->task_size, &nmask, | 1058 | queue_pages_range(mm, mm->mmap->vm_start, mm->task_size, &nmask, |
1017 | flags | MPOL_MF_DISCONTIG_OK, &pagelist); | 1059 | flags | MPOL_MF_DISCONTIG_OK, &pagelist); |
1018 | 1060 | ||
1019 | if (!list_empty(&pagelist)) { | 1061 | if (!list_empty(&pagelist)) { |
1020 | err = migrate_pages(&pagelist, new_node_page, dest, | 1062 | err = migrate_pages(&pagelist, new_node_page, dest, |
1021 | MIGRATE_SYNC, MR_SYSCALL); | 1063 | MIGRATE_SYNC, MR_SYSCALL); |
1022 | if (err) | 1064 | if (err) |
1023 | putback_lru_pages(&pagelist); | 1065 | putback_movable_pages(&pagelist); |
1024 | } | 1066 | } |
1025 | 1067 | ||
1026 | return err; | 1068 | return err; |
@@ -1154,10 +1196,14 @@ static struct page *new_vma_page(struct page *page, unsigned long private, int * | |||
1154 | break; | 1196 | break; |
1155 | vma = vma->vm_next; | 1197 | vma = vma->vm_next; |
1156 | } | 1198 | } |
1157 | |||
1158 | /* | 1199 | /* |
1159 | * if !vma, alloc_page_vma() will use task or system default policy | 1200 | * queue_pages_range() confirms that @page belongs to some vma, |
1201 | * so vma shouldn't be NULL. | ||
1160 | */ | 1202 | */ |
1203 | BUG_ON(!vma); | ||
1204 | |||
1205 | if (PageHuge(page)) | ||
1206 | return alloc_huge_page_noerr(vma, address, 1); | ||
1161 | return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); | 1207 | return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); |
1162 | } | 1208 | } |
1163 | #else | 1209 | #else |
@@ -1249,7 +1295,7 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
1249 | if (err) | 1295 | if (err) |
1250 | goto mpol_out; | 1296 | goto mpol_out; |
1251 | 1297 | ||
1252 | vma = check_range(mm, start, end, nmask, | 1298 | vma = queue_pages_range(mm, start, end, nmask, |
1253 | flags | MPOL_MF_INVERT, &pagelist); | 1299 | flags | MPOL_MF_INVERT, &pagelist); |
1254 | 1300 | ||
1255 | err = PTR_ERR(vma); /* maybe ... */ | 1301 | err = PTR_ERR(vma); /* maybe ... */ |
@@ -1265,7 +1311,7 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
1265 | (unsigned long)vma, | 1311 | (unsigned long)vma, |
1266 | MIGRATE_SYNC, MR_MEMPOLICY_MBIND); | 1312 | MIGRATE_SYNC, MR_MEMPOLICY_MBIND); |
1267 | if (nr_failed) | 1313 | if (nr_failed) |
1268 | putback_lru_pages(&pagelist); | 1314 | putback_movable_pages(&pagelist); |
1269 | } | 1315 | } |
1270 | 1316 | ||
1271 | if (nr_failed && (flags & MPOL_MF_STRICT)) | 1317 | if (nr_failed && (flags & MPOL_MF_STRICT)) |
@@ -2065,6 +2111,16 @@ retry_cpuset: | |||
2065 | } | 2111 | } |
2066 | EXPORT_SYMBOL(alloc_pages_current); | 2112 | EXPORT_SYMBOL(alloc_pages_current); |
2067 | 2113 | ||
2114 | int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) | ||
2115 | { | ||
2116 | struct mempolicy *pol = mpol_dup(vma_policy(src)); | ||
2117 | |||
2118 | if (IS_ERR(pol)) | ||
2119 | return PTR_ERR(pol); | ||
2120 | dst->vm_policy = pol; | ||
2121 | return 0; | ||
2122 | } | ||
2123 | |||
2068 | /* | 2124 | /* |
2069 | * If mpol_dup() sees current->cpuset == cpuset_being_rebound, then it | 2125 | * If mpol_dup() sees current->cpuset == cpuset_being_rebound, then it |
2070 | * rebinds the mempolicy its copying by calling mpol_rebind_policy() | 2126 | * rebinds the mempolicy its copying by calling mpol_rebind_policy() |