diff options
Diffstat (limited to 'mm/mempolicy.c')
| -rw-r--r-- | mm/mempolicy.c | 145 |
1 files changed, 104 insertions, 41 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 3eb4a6fdc043..e08e2c4da63a 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
| @@ -182,13 +182,54 @@ static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes) | |||
| 182 | return 0; | 182 | return 0; |
| 183 | } | 183 | } |
| 184 | 184 | ||
| 185 | /* Create a new policy */ | 185 | /* |
| 186 | * mpol_set_nodemask is called after mpol_new() to set up the nodemask, if | ||
| 187 | * any, for the new policy. mpol_new() has already validated the nodes | ||
| 188 | * parameter with respect to the policy mode and flags. But, we need to | ||
| 189 | * handle an empty nodemask with MPOL_PREFERRED here. | ||
| 190 | * | ||
| 191 | * Must be called holding task's alloc_lock to protect task's mems_allowed | ||
| 192 | * and mempolicy. May also be called holding the mmap_semaphore for write. | ||
| 193 | */ | ||
| 194 | static int mpol_set_nodemask(struct mempolicy *pol, const nodemask_t *nodes) | ||
| 195 | { | ||
| 196 | nodemask_t cpuset_context_nmask; | ||
| 197 | int ret; | ||
| 198 | |||
| 199 | /* if mode is MPOL_DEFAULT, pol is NULL. This is right. */ | ||
| 200 | if (pol == NULL) | ||
| 201 | return 0; | ||
| 202 | |||
| 203 | VM_BUG_ON(!nodes); | ||
| 204 | if (pol->mode == MPOL_PREFERRED && nodes_empty(*nodes)) | ||
| 205 | nodes = NULL; /* explicit local allocation */ | ||
| 206 | else { | ||
| 207 | if (pol->flags & MPOL_F_RELATIVE_NODES) | ||
| 208 | mpol_relative_nodemask(&cpuset_context_nmask, nodes, | ||
| 209 | &cpuset_current_mems_allowed); | ||
| 210 | else | ||
| 211 | nodes_and(cpuset_context_nmask, *nodes, | ||
| 212 | cpuset_current_mems_allowed); | ||
| 213 | if (mpol_store_user_nodemask(pol)) | ||
| 214 | pol->w.user_nodemask = *nodes; | ||
| 215 | else | ||
| 216 | pol->w.cpuset_mems_allowed = | ||
| 217 | cpuset_current_mems_allowed; | ||
| 218 | } | ||
| 219 | |||
| 220 | ret = mpol_ops[pol->mode].create(pol, | ||
| 221 | nodes ? &cpuset_context_nmask : NULL); | ||
| 222 | return ret; | ||
| 223 | } | ||
| 224 | |||
| 225 | /* | ||
| 226 | * This function just creates a new policy, does some check and simple | ||
| 227 | * initialization. You must invoke mpol_set_nodemask() to set nodes. | ||
| 228 | */ | ||
| 186 | static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, | 229 | static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, |
| 187 | nodemask_t *nodes) | 230 | nodemask_t *nodes) |
| 188 | { | 231 | { |
| 189 | struct mempolicy *policy; | 232 | struct mempolicy *policy; |
| 190 | nodemask_t cpuset_context_nmask; | ||
| 191 | int ret; | ||
| 192 | 233 | ||
| 193 | pr_debug("setting mode %d flags %d nodes[0] %lx\n", | 234 | pr_debug("setting mode %d flags %d nodes[0] %lx\n", |
| 194 | mode, flags, nodes ? nodes_addr(*nodes)[0] : -1); | 235 | mode, flags, nodes ? nodes_addr(*nodes)[0] : -1); |
| @@ -210,7 +251,6 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, | |||
| 210 | if (((flags & MPOL_F_STATIC_NODES) || | 251 | if (((flags & MPOL_F_STATIC_NODES) || |
| 211 | (flags & MPOL_F_RELATIVE_NODES))) | 252 | (flags & MPOL_F_RELATIVE_NODES))) |
| 212 | return ERR_PTR(-EINVAL); | 253 | return ERR_PTR(-EINVAL); |
| 213 | nodes = NULL; /* flag local alloc */ | ||
| 214 | } | 254 | } |
| 215 | } else if (nodes_empty(*nodes)) | 255 | } else if (nodes_empty(*nodes)) |
| 216 | return ERR_PTR(-EINVAL); | 256 | return ERR_PTR(-EINVAL); |
| @@ -221,30 +261,6 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, | |||
| 221 | policy->mode = mode; | 261 | policy->mode = mode; |
| 222 | policy->flags = flags; | 262 | policy->flags = flags; |
| 223 | 263 | ||
| 224 | if (nodes) { | ||
| 225 | /* | ||
| 226 | * cpuset related setup doesn't apply to local allocation | ||
| 227 | */ | ||
| 228 | cpuset_update_task_memory_state(); | ||
| 229 | if (flags & MPOL_F_RELATIVE_NODES) | ||
| 230 | mpol_relative_nodemask(&cpuset_context_nmask, nodes, | ||
| 231 | &cpuset_current_mems_allowed); | ||
| 232 | else | ||
| 233 | nodes_and(cpuset_context_nmask, *nodes, | ||
| 234 | cpuset_current_mems_allowed); | ||
| 235 | if (mpol_store_user_nodemask(policy)) | ||
| 236 | policy->w.user_nodemask = *nodes; | ||
| 237 | else | ||
| 238 | policy->w.cpuset_mems_allowed = | ||
| 239 | cpuset_mems_allowed(current); | ||
| 240 | } | ||
| 241 | |||
| 242 | ret = mpol_ops[mode].create(policy, | ||
| 243 | nodes ? &cpuset_context_nmask : NULL); | ||
| 244 | if (ret < 0) { | ||
| 245 | kmem_cache_free(policy_cache, policy); | ||
| 246 | return ERR_PTR(ret); | ||
| 247 | } | ||
| 248 | return policy; | 264 | return policy; |
| 249 | } | 265 | } |
| 250 | 266 | ||
| @@ -324,6 +340,8 @@ static void mpol_rebind_policy(struct mempolicy *pol, | |||
| 324 | /* | 340 | /* |
| 325 | * Wrapper for mpol_rebind_policy() that just requires task | 341 | * Wrapper for mpol_rebind_policy() that just requires task |
| 326 | * pointer, and updates task mempolicy. | 342 | * pointer, and updates task mempolicy. |
| 343 | * | ||
| 344 | * Called with task's alloc_lock held. | ||
| 327 | */ | 345 | */ |
| 328 | 346 | ||
| 329 | void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) | 347 | void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) |
| @@ -600,8 +618,9 @@ static void mpol_set_task_struct_flag(void) | |||
| 600 | static long do_set_mempolicy(unsigned short mode, unsigned short flags, | 618 | static long do_set_mempolicy(unsigned short mode, unsigned short flags, |
| 601 | nodemask_t *nodes) | 619 | nodemask_t *nodes) |
| 602 | { | 620 | { |
| 603 | struct mempolicy *new; | 621 | struct mempolicy *new, *old; |
| 604 | struct mm_struct *mm = current->mm; | 622 | struct mm_struct *mm = current->mm; |
| 623 | int ret; | ||
| 605 | 624 | ||
| 606 | new = mpol_new(mode, flags, nodes); | 625 | new = mpol_new(mode, flags, nodes); |
| 607 | if (IS_ERR(new)) | 626 | if (IS_ERR(new)) |
| @@ -615,20 +634,33 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags, | |||
| 615 | */ | 634 | */ |
| 616 | if (mm) | 635 | if (mm) |
| 617 | down_write(&mm->mmap_sem); | 636 | down_write(&mm->mmap_sem); |
| 618 | mpol_put(current->mempolicy); | 637 | task_lock(current); |
| 638 | ret = mpol_set_nodemask(new, nodes); | ||
| 639 | if (ret) { | ||
| 640 | task_unlock(current); | ||
| 641 | if (mm) | ||
| 642 | up_write(&mm->mmap_sem); | ||
| 643 | mpol_put(new); | ||
| 644 | return ret; | ||
| 645 | } | ||
| 646 | old = current->mempolicy; | ||
| 619 | current->mempolicy = new; | 647 | current->mempolicy = new; |
| 620 | mpol_set_task_struct_flag(); | 648 | mpol_set_task_struct_flag(); |
| 621 | if (new && new->mode == MPOL_INTERLEAVE && | 649 | if (new && new->mode == MPOL_INTERLEAVE && |
| 622 | nodes_weight(new->v.nodes)) | 650 | nodes_weight(new->v.nodes)) |
| 623 | current->il_next = first_node(new->v.nodes); | 651 | current->il_next = first_node(new->v.nodes); |
| 652 | task_unlock(current); | ||
| 624 | if (mm) | 653 | if (mm) |
| 625 | up_write(&mm->mmap_sem); | 654 | up_write(&mm->mmap_sem); |
| 626 | 655 | ||
| 656 | mpol_put(old); | ||
| 627 | return 0; | 657 | return 0; |
| 628 | } | 658 | } |
| 629 | 659 | ||
| 630 | /* | 660 | /* |
| 631 | * Return nodemask for policy for get_mempolicy() query | 661 | * Return nodemask for policy for get_mempolicy() query |
| 662 | * | ||
| 663 | * Called with task's alloc_lock held | ||
| 632 | */ | 664 | */ |
| 633 | static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes) | 665 | static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes) |
| 634 | { | 666 | { |
| @@ -674,7 +706,6 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, | |||
| 674 | struct vm_area_struct *vma = NULL; | 706 | struct vm_area_struct *vma = NULL; |
| 675 | struct mempolicy *pol = current->mempolicy; | 707 | struct mempolicy *pol = current->mempolicy; |
| 676 | 708 | ||
| 677 | cpuset_update_task_memory_state(); | ||
| 678 | if (flags & | 709 | if (flags & |
| 679 | ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR|MPOL_F_MEMS_ALLOWED)) | 710 | ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR|MPOL_F_MEMS_ALLOWED)) |
| 680 | return -EINVAL; | 711 | return -EINVAL; |
| @@ -683,7 +714,9 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, | |||
| 683 | if (flags & (MPOL_F_NODE|MPOL_F_ADDR)) | 714 | if (flags & (MPOL_F_NODE|MPOL_F_ADDR)) |
| 684 | return -EINVAL; | 715 | return -EINVAL; |
| 685 | *policy = 0; /* just so it's initialized */ | 716 | *policy = 0; /* just so it's initialized */ |
| 717 | task_lock(current); | ||
| 686 | *nmask = cpuset_current_mems_allowed; | 718 | *nmask = cpuset_current_mems_allowed; |
| 719 | task_unlock(current); | ||
| 687 | return 0; | 720 | return 0; |
| 688 | } | 721 | } |
| 689 | 722 | ||
| @@ -738,8 +771,11 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, | |||
| 738 | } | 771 | } |
| 739 | 772 | ||
| 740 | err = 0; | 773 | err = 0; |
| 741 | if (nmask) | 774 | if (nmask) { |
| 775 | task_lock(current); | ||
| 742 | get_policy_nodemask(pol, nmask); | 776 | get_policy_nodemask(pol, nmask); |
| 777 | task_unlock(current); | ||
| 778 | } | ||
| 743 | 779 | ||
| 744 | out: | 780 | out: |
| 745 | mpol_cond_put(pol); | 781 | mpol_cond_put(pol); |
| @@ -767,7 +803,7 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist, | |||
| 767 | 803 | ||
| 768 | static struct page *new_node_page(struct page *page, unsigned long node, int **x) | 804 | static struct page *new_node_page(struct page *page, unsigned long node, int **x) |
| 769 | { | 805 | { |
| 770 | return alloc_pages_node(node, GFP_HIGHUSER_MOVABLE, 0); | 806 | return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0); |
| 771 | } | 807 | } |
| 772 | 808 | ||
| 773 | /* | 809 | /* |
| @@ -979,6 +1015,14 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
| 979 | return err; | 1015 | return err; |
| 980 | } | 1016 | } |
| 981 | down_write(&mm->mmap_sem); | 1017 | down_write(&mm->mmap_sem); |
| 1018 | task_lock(current); | ||
| 1019 | err = mpol_set_nodemask(new, nmask); | ||
| 1020 | task_unlock(current); | ||
| 1021 | if (err) { | ||
| 1022 | up_write(&mm->mmap_sem); | ||
| 1023 | mpol_put(new); | ||
| 1024 | return err; | ||
| 1025 | } | ||
| 982 | vma = check_range(mm, start, end, nmask, | 1026 | vma = check_range(mm, start, end, nmask, |
| 983 | flags | MPOL_MF_INVERT, &pagelist); | 1027 | flags | MPOL_MF_INVERT, &pagelist); |
| 984 | 1028 | ||
| @@ -1545,8 +1589,6 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) | |||
| 1545 | struct mempolicy *pol = get_vma_policy(current, vma, addr); | 1589 | struct mempolicy *pol = get_vma_policy(current, vma, addr); |
| 1546 | struct zonelist *zl; | 1590 | struct zonelist *zl; |
| 1547 | 1591 | ||
| 1548 | cpuset_update_task_memory_state(); | ||
| 1549 | |||
| 1550 | if (unlikely(pol->mode == MPOL_INTERLEAVE)) { | 1592 | if (unlikely(pol->mode == MPOL_INTERLEAVE)) { |
| 1551 | unsigned nid; | 1593 | unsigned nid; |
| 1552 | 1594 | ||
| @@ -1593,8 +1635,6 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) | |||
| 1593 | { | 1635 | { |
| 1594 | struct mempolicy *pol = current->mempolicy; | 1636 | struct mempolicy *pol = current->mempolicy; |
| 1595 | 1637 | ||
| 1596 | if ((gfp & __GFP_WAIT) && !in_interrupt()) | ||
| 1597 | cpuset_update_task_memory_state(); | ||
| 1598 | if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) | 1638 | if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) |
| 1599 | pol = &default_policy; | 1639 | pol = &default_policy; |
| 1600 | 1640 | ||
| @@ -1854,6 +1894,8 @@ restart: | |||
| 1854 | */ | 1894 | */ |
| 1855 | void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) | 1895 | void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) |
| 1856 | { | 1896 | { |
| 1897 | int ret; | ||
| 1898 | |||
| 1857 | sp->root = RB_ROOT; /* empty tree == default mempolicy */ | 1899 | sp->root = RB_ROOT; /* empty tree == default mempolicy */ |
| 1858 | spin_lock_init(&sp->lock); | 1900 | spin_lock_init(&sp->lock); |
| 1859 | 1901 | ||
| @@ -1863,9 +1905,19 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) | |||
| 1863 | 1905 | ||
| 1864 | /* contextualize the tmpfs mount point mempolicy */ | 1906 | /* contextualize the tmpfs mount point mempolicy */ |
| 1865 | new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); | 1907 | new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); |
| 1866 | mpol_put(mpol); /* drop our ref on sb mpol */ | 1908 | if (IS_ERR(new)) { |
| 1867 | if (IS_ERR(new)) | 1909 | mpol_put(mpol); /* drop our ref on sb mpol */ |
| 1868 | return; /* no valid nodemask intersection */ | 1910 | return; /* no valid nodemask intersection */ |
| 1911 | } | ||
| 1912 | |||
| 1913 | task_lock(current); | ||
| 1914 | ret = mpol_set_nodemask(new, &mpol->w.user_nodemask); | ||
| 1915 | task_unlock(current); | ||
| 1916 | mpol_put(mpol); /* drop our ref on sb mpol */ | ||
| 1917 | if (ret) { | ||
| 1918 | mpol_put(new); | ||
| 1919 | return; | ||
| 1920 | } | ||
| 1869 | 1921 | ||
| 1870 | /* Create pseudo-vma that contains just the policy */ | 1922 | /* Create pseudo-vma that contains just the policy */ |
| 1871 | memset(&pvma, 0, sizeof(struct vm_area_struct)); | 1923 | memset(&pvma, 0, sizeof(struct vm_area_struct)); |
| @@ -2086,8 +2138,19 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) | |||
| 2086 | new = mpol_new(mode, mode_flags, &nodes); | 2138 | new = mpol_new(mode, mode_flags, &nodes); |
| 2087 | if (IS_ERR(new)) | 2139 | if (IS_ERR(new)) |
| 2088 | err = 1; | 2140 | err = 1; |
| 2089 | else if (no_context) | 2141 | else { |
| 2090 | new->w.user_nodemask = nodes; /* save for contextualization */ | 2142 | int ret; |
| 2143 | |||
| 2144 | task_lock(current); | ||
| 2145 | ret = mpol_set_nodemask(new, &nodes); | ||
| 2146 | task_unlock(current); | ||
| 2147 | if (ret) | ||
| 2148 | err = 1; | ||
| 2149 | else if (no_context) { | ||
| 2150 | /* save for contextualization */ | ||
| 2151 | new->w.user_nodemask = nodes; | ||
| 2152 | } | ||
| 2153 | } | ||
| 2091 | 2154 | ||
| 2092 | out: | 2155 | out: |
| 2093 | /* Restore string for error message */ | 2156 | /* Restore string for error message */ |
