diff options
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r-- | mm/mempolicy.c | 143 |
1 files changed, 103 insertions, 40 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 3eb4a6fdc043..46bdf9ddf2ba 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -182,13 +182,54 @@ static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes) | |||
182 | return 0; | 182 | return 0; |
183 | } | 183 | } |
184 | 184 | ||
185 | /* Create a new policy */ | 185 | /* |
186 | * mpol_set_nodemask is called after mpol_new() to set up the nodemask, if | ||
187 | * any, for the new policy. mpol_new() has already validated the nodes | ||
188 | * parameter with respect to the policy mode and flags. But, we need to | ||
189 | * handle an empty nodemask with MPOL_PREFERRED here. | ||
190 | * | ||
191 | * Must be called holding task's alloc_lock to protect task's mems_allowed | ||
192 | * and mempolicy. May also be called holding the mmap_semaphore for write. | ||
193 | */ | ||
194 | static int mpol_set_nodemask(struct mempolicy *pol, const nodemask_t *nodes) | ||
195 | { | ||
196 | nodemask_t cpuset_context_nmask; | ||
197 | int ret; | ||
198 | |||
199 | /* if mode is MPOL_DEFAULT, pol is NULL. This is right. */ | ||
200 | if (pol == NULL) | ||
201 | return 0; | ||
202 | |||
203 | VM_BUG_ON(!nodes); | ||
204 | if (pol->mode == MPOL_PREFERRED && nodes_empty(*nodes)) | ||
205 | nodes = NULL; /* explicit local allocation */ | ||
206 | else { | ||
207 | if (pol->flags & MPOL_F_RELATIVE_NODES) | ||
208 | mpol_relative_nodemask(&cpuset_context_nmask, nodes, | ||
209 | &cpuset_current_mems_allowed); | ||
210 | else | ||
211 | nodes_and(cpuset_context_nmask, *nodes, | ||
212 | cpuset_current_mems_allowed); | ||
213 | if (mpol_store_user_nodemask(pol)) | ||
214 | pol->w.user_nodemask = *nodes; | ||
215 | else | ||
216 | pol->w.cpuset_mems_allowed = | ||
217 | cpuset_current_mems_allowed; | ||
218 | } | ||
219 | |||
220 | ret = mpol_ops[pol->mode].create(pol, | ||
221 | nodes ? &cpuset_context_nmask : NULL); | ||
222 | return ret; | ||
223 | } | ||
224 | |||
225 | /* | ||
226 | * This function just creates a new policy, does some check and simple | ||
227 | * initialization. You must invoke mpol_set_nodemask() to set nodes. | ||
228 | */ | ||
186 | static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, | 229 | static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, |
187 | nodemask_t *nodes) | 230 | nodemask_t *nodes) |
188 | { | 231 | { |
189 | struct mempolicy *policy; | 232 | struct mempolicy *policy; |
190 | nodemask_t cpuset_context_nmask; | ||
191 | int ret; | ||
192 | 233 | ||
193 | pr_debug("setting mode %d flags %d nodes[0] %lx\n", | 234 | pr_debug("setting mode %d flags %d nodes[0] %lx\n", |
194 | mode, flags, nodes ? nodes_addr(*nodes)[0] : -1); | 235 | mode, flags, nodes ? nodes_addr(*nodes)[0] : -1); |
@@ -210,7 +251,6 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, | |||
210 | if (((flags & MPOL_F_STATIC_NODES) || | 251 | if (((flags & MPOL_F_STATIC_NODES) || |
211 | (flags & MPOL_F_RELATIVE_NODES))) | 252 | (flags & MPOL_F_RELATIVE_NODES))) |
212 | return ERR_PTR(-EINVAL); | 253 | return ERR_PTR(-EINVAL); |
213 | nodes = NULL; /* flag local alloc */ | ||
214 | } | 254 | } |
215 | } else if (nodes_empty(*nodes)) | 255 | } else if (nodes_empty(*nodes)) |
216 | return ERR_PTR(-EINVAL); | 256 | return ERR_PTR(-EINVAL); |
@@ -221,30 +261,6 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, | |||
221 | policy->mode = mode; | 261 | policy->mode = mode; |
222 | policy->flags = flags; | 262 | policy->flags = flags; |
223 | 263 | ||
224 | if (nodes) { | ||
225 | /* | ||
226 | * cpuset related setup doesn't apply to local allocation | ||
227 | */ | ||
228 | cpuset_update_task_memory_state(); | ||
229 | if (flags & MPOL_F_RELATIVE_NODES) | ||
230 | mpol_relative_nodemask(&cpuset_context_nmask, nodes, | ||
231 | &cpuset_current_mems_allowed); | ||
232 | else | ||
233 | nodes_and(cpuset_context_nmask, *nodes, | ||
234 | cpuset_current_mems_allowed); | ||
235 | if (mpol_store_user_nodemask(policy)) | ||
236 | policy->w.user_nodemask = *nodes; | ||
237 | else | ||
238 | policy->w.cpuset_mems_allowed = | ||
239 | cpuset_mems_allowed(current); | ||
240 | } | ||
241 | |||
242 | ret = mpol_ops[mode].create(policy, | ||
243 | nodes ? &cpuset_context_nmask : NULL); | ||
244 | if (ret < 0) { | ||
245 | kmem_cache_free(policy_cache, policy); | ||
246 | return ERR_PTR(ret); | ||
247 | } | ||
248 | return policy; | 264 | return policy; |
249 | } | 265 | } |
250 | 266 | ||
@@ -324,6 +340,8 @@ static void mpol_rebind_policy(struct mempolicy *pol, | |||
324 | /* | 340 | /* |
325 | * Wrapper for mpol_rebind_policy() that just requires task | 341 | * Wrapper for mpol_rebind_policy() that just requires task |
326 | * pointer, and updates task mempolicy. | 342 | * pointer, and updates task mempolicy. |
343 | * | ||
344 | * Called with task's alloc_lock held. | ||
327 | */ | 345 | */ |
328 | 346 | ||
329 | void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) | 347 | void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) |
@@ -600,8 +618,9 @@ static void mpol_set_task_struct_flag(void) | |||
600 | static long do_set_mempolicy(unsigned short mode, unsigned short flags, | 618 | static long do_set_mempolicy(unsigned short mode, unsigned short flags, |
601 | nodemask_t *nodes) | 619 | nodemask_t *nodes) |
602 | { | 620 | { |
603 | struct mempolicy *new; | 621 | struct mempolicy *new, *old; |
604 | struct mm_struct *mm = current->mm; | 622 | struct mm_struct *mm = current->mm; |
623 | int ret; | ||
605 | 624 | ||
606 | new = mpol_new(mode, flags, nodes); | 625 | new = mpol_new(mode, flags, nodes); |
607 | if (IS_ERR(new)) | 626 | if (IS_ERR(new)) |
@@ -615,20 +634,33 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags, | |||
615 | */ | 634 | */ |
616 | if (mm) | 635 | if (mm) |
617 | down_write(&mm->mmap_sem); | 636 | down_write(&mm->mmap_sem); |
618 | mpol_put(current->mempolicy); | 637 | task_lock(current); |
638 | ret = mpol_set_nodemask(new, nodes); | ||
639 | if (ret) { | ||
640 | task_unlock(current); | ||
641 | if (mm) | ||
642 | up_write(&mm->mmap_sem); | ||
643 | mpol_put(new); | ||
644 | return ret; | ||
645 | } | ||
646 | old = current->mempolicy; | ||
619 | current->mempolicy = new; | 647 | current->mempolicy = new; |
620 | mpol_set_task_struct_flag(); | 648 | mpol_set_task_struct_flag(); |
621 | if (new && new->mode == MPOL_INTERLEAVE && | 649 | if (new && new->mode == MPOL_INTERLEAVE && |
622 | nodes_weight(new->v.nodes)) | 650 | nodes_weight(new->v.nodes)) |
623 | current->il_next = first_node(new->v.nodes); | 651 | current->il_next = first_node(new->v.nodes); |
652 | task_unlock(current); | ||
624 | if (mm) | 653 | if (mm) |
625 | up_write(&mm->mmap_sem); | 654 | up_write(&mm->mmap_sem); |
626 | 655 | ||
656 | mpol_put(old); | ||
627 | return 0; | 657 | return 0; |
628 | } | 658 | } |
629 | 659 | ||
630 | /* | 660 | /* |
631 | * Return nodemask for policy for get_mempolicy() query | 661 | * Return nodemask for policy for get_mempolicy() query |
662 | * | ||
663 | * Called with task's alloc_lock held | ||
632 | */ | 664 | */ |
633 | static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes) | 665 | static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes) |
634 | { | 666 | { |
@@ -674,7 +706,6 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, | |||
674 | struct vm_area_struct *vma = NULL; | 706 | struct vm_area_struct *vma = NULL; |
675 | struct mempolicy *pol = current->mempolicy; | 707 | struct mempolicy *pol = current->mempolicy; |
676 | 708 | ||
677 | cpuset_update_task_memory_state(); | ||
678 | if (flags & | 709 | if (flags & |
679 | ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR|MPOL_F_MEMS_ALLOWED)) | 710 | ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR|MPOL_F_MEMS_ALLOWED)) |
680 | return -EINVAL; | 711 | return -EINVAL; |
@@ -683,7 +714,9 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, | |||
683 | if (flags & (MPOL_F_NODE|MPOL_F_ADDR)) | 714 | if (flags & (MPOL_F_NODE|MPOL_F_ADDR)) |
684 | return -EINVAL; | 715 | return -EINVAL; |
685 | *policy = 0; /* just so it's initialized */ | 716 | *policy = 0; /* just so it's initialized */ |
717 | task_lock(current); | ||
686 | *nmask = cpuset_current_mems_allowed; | 718 | *nmask = cpuset_current_mems_allowed; |
719 | task_unlock(current); | ||
687 | return 0; | 720 | return 0; |
688 | } | 721 | } |
689 | 722 | ||
@@ -738,8 +771,11 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, | |||
738 | } | 771 | } |
739 | 772 | ||
740 | err = 0; | 773 | err = 0; |
741 | if (nmask) | 774 | if (nmask) { |
775 | task_lock(current); | ||
742 | get_policy_nodemask(pol, nmask); | 776 | get_policy_nodemask(pol, nmask); |
777 | task_unlock(current); | ||
778 | } | ||
743 | 779 | ||
744 | out: | 780 | out: |
745 | mpol_cond_put(pol); | 781 | mpol_cond_put(pol); |
@@ -979,6 +1015,14 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
979 | return err; | 1015 | return err; |
980 | } | 1016 | } |
981 | down_write(&mm->mmap_sem); | 1017 | down_write(&mm->mmap_sem); |
1018 | task_lock(current); | ||
1019 | err = mpol_set_nodemask(new, nmask); | ||
1020 | task_unlock(current); | ||
1021 | if (err) { | ||
1022 | up_write(&mm->mmap_sem); | ||
1023 | mpol_put(new); | ||
1024 | return err; | ||
1025 | } | ||
982 | vma = check_range(mm, start, end, nmask, | 1026 | vma = check_range(mm, start, end, nmask, |
983 | flags | MPOL_MF_INVERT, &pagelist); | 1027 | flags | MPOL_MF_INVERT, &pagelist); |
984 | 1028 | ||
@@ -1545,8 +1589,6 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) | |||
1545 | struct mempolicy *pol = get_vma_policy(current, vma, addr); | 1589 | struct mempolicy *pol = get_vma_policy(current, vma, addr); |
1546 | struct zonelist *zl; | 1590 | struct zonelist *zl; |
1547 | 1591 | ||
1548 | cpuset_update_task_memory_state(); | ||
1549 | |||
1550 | if (unlikely(pol->mode == MPOL_INTERLEAVE)) { | 1592 | if (unlikely(pol->mode == MPOL_INTERLEAVE)) { |
1551 | unsigned nid; | 1593 | unsigned nid; |
1552 | 1594 | ||
@@ -1593,8 +1635,6 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) | |||
1593 | { | 1635 | { |
1594 | struct mempolicy *pol = current->mempolicy; | 1636 | struct mempolicy *pol = current->mempolicy; |
1595 | 1637 | ||
1596 | if ((gfp & __GFP_WAIT) && !in_interrupt()) | ||
1597 | cpuset_update_task_memory_state(); | ||
1598 | if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) | 1638 | if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) |
1599 | pol = &default_policy; | 1639 | pol = &default_policy; |
1600 | 1640 | ||
@@ -1854,6 +1894,8 @@ restart: | |||
1854 | */ | 1894 | */ |
1855 | void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) | 1895 | void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) |
1856 | { | 1896 | { |
1897 | int ret; | ||
1898 | |||
1857 | sp->root = RB_ROOT; /* empty tree == default mempolicy */ | 1899 | sp->root = RB_ROOT; /* empty tree == default mempolicy */ |
1858 | spin_lock_init(&sp->lock); | 1900 | spin_lock_init(&sp->lock); |
1859 | 1901 | ||
@@ -1863,9 +1905,19 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) | |||
1863 | 1905 | ||
1864 | /* contextualize the tmpfs mount point mempolicy */ | 1906 | /* contextualize the tmpfs mount point mempolicy */ |
1865 | new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); | 1907 | new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); |
1866 | mpol_put(mpol); /* drop our ref on sb mpol */ | 1908 | if (IS_ERR(new)) { |
1867 | if (IS_ERR(new)) | 1909 | mpol_put(mpol); /* drop our ref on sb mpol */ |
1868 | return; /* no valid nodemask intersection */ | 1910 | return; /* no valid nodemask intersection */ |
1911 | } | ||
1912 | |||
1913 | task_lock(current); | ||
1914 | ret = mpol_set_nodemask(new, &mpol->w.user_nodemask); | ||
1915 | task_unlock(current); | ||
1916 | mpol_put(mpol); /* drop our ref on sb mpol */ | ||
1917 | if (ret) { | ||
1918 | mpol_put(new); | ||
1919 | return; | ||
1920 | } | ||
1869 | 1921 | ||
1870 | /* Create pseudo-vma that contains just the policy */ | 1922 | /* Create pseudo-vma that contains just the policy */ |
1871 | memset(&pvma, 0, sizeof(struct vm_area_struct)); | 1923 | memset(&pvma, 0, sizeof(struct vm_area_struct)); |
@@ -2086,8 +2138,19 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) | |||
2086 | new = mpol_new(mode, mode_flags, &nodes); | 2138 | new = mpol_new(mode, mode_flags, &nodes); |
2087 | if (IS_ERR(new)) | 2139 | if (IS_ERR(new)) |
2088 | err = 1; | 2140 | err = 1; |
2089 | else if (no_context) | 2141 | else { |
2090 | new->w.user_nodemask = nodes; /* save for contextualization */ | 2142 | int ret; |
2143 | |||
2144 | task_lock(current); | ||
2145 | ret = mpol_set_nodemask(new, &nodes); | ||
2146 | task_unlock(current); | ||
2147 | if (ret) | ||
2148 | err = 1; | ||
2149 | else if (no_context) { | ||
2150 | /* save for contextualization */ | ||
2151 | new->w.user_nodemask = nodes; | ||
2152 | } | ||
2153 | } | ||
2091 | 2154 | ||
2092 | out: | 2155 | out: |
2093 | /* Restore string for error message */ | 2156 | /* Restore string for error message */ |