diff options
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r-- | mm/mempolicy.c | 227 |
1 files changed, 159 insertions, 68 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 08f40a2f3fe0..5d6fb339de03 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -119,7 +119,22 @@ struct mempolicy default_policy = { | |||
119 | 119 | ||
120 | static const struct mempolicy_operations { | 120 | static const struct mempolicy_operations { |
121 | int (*create)(struct mempolicy *pol, const nodemask_t *nodes); | 121 | int (*create)(struct mempolicy *pol, const nodemask_t *nodes); |
122 | void (*rebind)(struct mempolicy *pol, const nodemask_t *nodes); | 122 | /* |
123 | * If read-side task has no lock to protect task->mempolicy, write-side | ||
124 | * task will rebind the task->mempolicy by two step. The first step is | ||
125 | * setting all the newly nodes, and the second step is cleaning all the | ||
126 | * disallowed nodes. In this way, we can avoid finding no node to alloc | ||
127 | * page. | ||
128 | * If we have a lock to protect task->mempolicy in read-side, we do | ||
129 | * rebind directly. | ||
130 | * | ||
131 | * step: | ||
132 | * MPOL_REBIND_ONCE - do rebind work at once | ||
133 | * MPOL_REBIND_STEP1 - set all the newly nodes | ||
134 | * MPOL_REBIND_STEP2 - clean all the disallowed nodes | ||
135 | */ | ||
136 | void (*rebind)(struct mempolicy *pol, const nodemask_t *nodes, | ||
137 | enum mpol_rebind_step step); | ||
123 | } mpol_ops[MPOL_MAX]; | 138 | } mpol_ops[MPOL_MAX]; |
124 | 139 | ||
125 | /* Check that the nodemask contains at least one populated zone */ | 140 | /* Check that the nodemask contains at least one populated zone */ |
@@ -127,9 +142,6 @@ static int is_valid_nodemask(const nodemask_t *nodemask) | |||
127 | { | 142 | { |
128 | int nd, k; | 143 | int nd, k; |
129 | 144 | ||
130 | /* Check that there is something useful in this mask */ | ||
131 | k = policy_zone; | ||
132 | |||
133 | for_each_node_mask(nd, *nodemask) { | 145 | for_each_node_mask(nd, *nodemask) { |
134 | struct zone *z; | 146 | struct zone *z; |
135 | 147 | ||
@@ -145,7 +157,7 @@ static int is_valid_nodemask(const nodemask_t *nodemask) | |||
145 | 157 | ||
146 | static inline int mpol_store_user_nodemask(const struct mempolicy *pol) | 158 | static inline int mpol_store_user_nodemask(const struct mempolicy *pol) |
147 | { | 159 | { |
148 | return pol->flags & (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES); | 160 | return pol->flags & MPOL_MODE_FLAGS; |
149 | } | 161 | } |
150 | 162 | ||
151 | static void mpol_relative_nodemask(nodemask_t *ret, const nodemask_t *orig, | 163 | static void mpol_relative_nodemask(nodemask_t *ret, const nodemask_t *orig, |
@@ -277,12 +289,19 @@ void __mpol_put(struct mempolicy *p) | |||
277 | kmem_cache_free(policy_cache, p); | 289 | kmem_cache_free(policy_cache, p); |
278 | } | 290 | } |
279 | 291 | ||
280 | static void mpol_rebind_default(struct mempolicy *pol, const nodemask_t *nodes) | 292 | static void mpol_rebind_default(struct mempolicy *pol, const nodemask_t *nodes, |
293 | enum mpol_rebind_step step) | ||
281 | { | 294 | { |
282 | } | 295 | } |
283 | 296 | ||
284 | static void mpol_rebind_nodemask(struct mempolicy *pol, | 297 | /* |
285 | const nodemask_t *nodes) | 298 | * step: |
299 | * MPOL_REBIND_ONCE - do rebind work at once | ||
300 | * MPOL_REBIND_STEP1 - set all the newly nodes | ||
301 | * MPOL_REBIND_STEP2 - clean all the disallowed nodes | ||
302 | */ | ||
303 | static void mpol_rebind_nodemask(struct mempolicy *pol, const nodemask_t *nodes, | ||
304 | enum mpol_rebind_step step) | ||
286 | { | 305 | { |
287 | nodemask_t tmp; | 306 | nodemask_t tmp; |
288 | 307 | ||
@@ -291,12 +310,31 @@ static void mpol_rebind_nodemask(struct mempolicy *pol, | |||
291 | else if (pol->flags & MPOL_F_RELATIVE_NODES) | 310 | else if (pol->flags & MPOL_F_RELATIVE_NODES) |
292 | mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes); | 311 | mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes); |
293 | else { | 312 | else { |
294 | nodes_remap(tmp, pol->v.nodes, pol->w.cpuset_mems_allowed, | 313 | /* |
295 | *nodes); | 314 | * if step == 1, we use ->w.cpuset_mems_allowed to cache the |
296 | pol->w.cpuset_mems_allowed = *nodes; | 315 | * result |
316 | */ | ||
317 | if (step == MPOL_REBIND_ONCE || step == MPOL_REBIND_STEP1) { | ||
318 | nodes_remap(tmp, pol->v.nodes, | ||
319 | pol->w.cpuset_mems_allowed, *nodes); | ||
320 | pol->w.cpuset_mems_allowed = step ? tmp : *nodes; | ||
321 | } else if (step == MPOL_REBIND_STEP2) { | ||
322 | tmp = pol->w.cpuset_mems_allowed; | ||
323 | pol->w.cpuset_mems_allowed = *nodes; | ||
324 | } else | ||
325 | BUG(); | ||
297 | } | 326 | } |
298 | 327 | ||
299 | pol->v.nodes = tmp; | 328 | if (nodes_empty(tmp)) |
329 | tmp = *nodes; | ||
330 | |||
331 | if (step == MPOL_REBIND_STEP1) | ||
332 | nodes_or(pol->v.nodes, pol->v.nodes, tmp); | ||
333 | else if (step == MPOL_REBIND_ONCE || step == MPOL_REBIND_STEP2) | ||
334 | pol->v.nodes = tmp; | ||
335 | else | ||
336 | BUG(); | ||
337 | |||
300 | if (!node_isset(current->il_next, tmp)) { | 338 | if (!node_isset(current->il_next, tmp)) { |
301 | current->il_next = next_node(current->il_next, tmp); | 339 | current->il_next = next_node(current->il_next, tmp); |
302 | if (current->il_next >= MAX_NUMNODES) | 340 | if (current->il_next >= MAX_NUMNODES) |
@@ -307,7 +345,8 @@ static void mpol_rebind_nodemask(struct mempolicy *pol, | |||
307 | } | 345 | } |
308 | 346 | ||
309 | static void mpol_rebind_preferred(struct mempolicy *pol, | 347 | static void mpol_rebind_preferred(struct mempolicy *pol, |
310 | const nodemask_t *nodes) | 348 | const nodemask_t *nodes, |
349 | enum mpol_rebind_step step) | ||
311 | { | 350 | { |
312 | nodemask_t tmp; | 351 | nodemask_t tmp; |
313 | 352 | ||
@@ -330,16 +369,45 @@ static void mpol_rebind_preferred(struct mempolicy *pol, | |||
330 | } | 369 | } |
331 | } | 370 | } |
332 | 371 | ||
333 | /* Migrate a policy to a different set of nodes */ | 372 | /* |
334 | static void mpol_rebind_policy(struct mempolicy *pol, | 373 | * mpol_rebind_policy - Migrate a policy to a different set of nodes |
335 | const nodemask_t *newmask) | 374 | * |
375 | * If read-side task has no lock to protect task->mempolicy, write-side | ||
376 | * task will rebind the task->mempolicy by two step. The first step is | ||
377 | * setting all the newly nodes, and the second step is cleaning all the | ||
378 | * disallowed nodes. In this way, we can avoid finding no node to alloc | ||
379 | * page. | ||
380 | * If we have a lock to protect task->mempolicy in read-side, we do | ||
381 | * rebind directly. | ||
382 | * | ||
383 | * step: | ||
384 | * MPOL_REBIND_ONCE - do rebind work at once | ||
385 | * MPOL_REBIND_STEP1 - set all the newly nodes | ||
386 | * MPOL_REBIND_STEP2 - clean all the disallowed nodes | ||
387 | */ | ||
388 | static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask, | ||
389 | enum mpol_rebind_step step) | ||
336 | { | 390 | { |
337 | if (!pol) | 391 | if (!pol) |
338 | return; | 392 | return; |
339 | if (!mpol_store_user_nodemask(pol) && | 393 | if (!mpol_store_user_nodemask(pol) && step == 0 && |
340 | nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) | 394 | nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) |
341 | return; | 395 | return; |
342 | mpol_ops[pol->mode].rebind(pol, newmask); | 396 | |
397 | if (step == MPOL_REBIND_STEP1 && (pol->flags & MPOL_F_REBINDING)) | ||
398 | return; | ||
399 | |||
400 | if (step == MPOL_REBIND_STEP2 && !(pol->flags & MPOL_F_REBINDING)) | ||
401 | BUG(); | ||
402 | |||
403 | if (step == MPOL_REBIND_STEP1) | ||
404 | pol->flags |= MPOL_F_REBINDING; | ||
405 | else if (step == MPOL_REBIND_STEP2) | ||
406 | pol->flags &= ~MPOL_F_REBINDING; | ||
407 | else if (step >= MPOL_REBIND_NSTEP) | ||
408 | BUG(); | ||
409 | |||
410 | mpol_ops[pol->mode].rebind(pol, newmask, step); | ||
343 | } | 411 | } |
344 | 412 | ||
345 | /* | 413 | /* |
@@ -349,9 +417,10 @@ static void mpol_rebind_policy(struct mempolicy *pol, | |||
349 | * Called with task's alloc_lock held. | 417 | * Called with task's alloc_lock held. |
350 | */ | 418 | */ |
351 | 419 | ||
352 | void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) | 420 | void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new, |
421 | enum mpol_rebind_step step) | ||
353 | { | 422 | { |
354 | mpol_rebind_policy(tsk->mempolicy, new); | 423 | mpol_rebind_policy(tsk->mempolicy, new, step); |
355 | } | 424 | } |
356 | 425 | ||
357 | /* | 426 | /* |
@@ -366,7 +435,7 @@ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) | |||
366 | 435 | ||
367 | down_write(&mm->mmap_sem); | 436 | down_write(&mm->mmap_sem); |
368 | for (vma = mm->mmap; vma; vma = vma->vm_next) | 437 | for (vma = mm->mmap; vma; vma = vma->vm_next) |
369 | mpol_rebind_policy(vma->vm_policy, new); | 438 | mpol_rebind_policy(vma->vm_policy, new, MPOL_REBIND_ONCE); |
370 | up_write(&mm->mmap_sem); | 439 | up_write(&mm->mmap_sem); |
371 | } | 440 | } |
372 | 441 | ||
@@ -859,7 +928,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, | |||
859 | nodes_clear(nmask); | 928 | nodes_clear(nmask); |
860 | node_set(source, nmask); | 929 | node_set(source, nmask); |
861 | 930 | ||
862 | check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nmask, | 931 | check_range(mm, mm->mmap->vm_start, mm->task_size, &nmask, |
863 | flags | MPOL_MF_DISCONTIG_OK, &pagelist); | 932 | flags | MPOL_MF_DISCONTIG_OK, &pagelist); |
864 | 933 | ||
865 | if (!list_empty(&pagelist)) | 934 | if (!list_empty(&pagelist)) |
@@ -1444,15 +1513,13 @@ static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy) | |||
1444 | /* | 1513 | /* |
1445 | * Normally, MPOL_BIND allocations are node-local within the | 1514 | * Normally, MPOL_BIND allocations are node-local within the |
1446 | * allowed nodemask. However, if __GFP_THISNODE is set and the | 1515 | * allowed nodemask. However, if __GFP_THISNODE is set and the |
1447 | * current node is part of the mask, we use the zonelist for | 1516 | * current node isn't part of the mask, we use the zonelist for |
1448 | * the first node in the mask instead. | 1517 | * the first node in the mask instead. |
1449 | */ | 1518 | */ |
1450 | if (unlikely(gfp & __GFP_THISNODE) && | 1519 | if (unlikely(gfp & __GFP_THISNODE) && |
1451 | unlikely(!node_isset(nd, policy->v.nodes))) | 1520 | unlikely(!node_isset(nd, policy->v.nodes))) |
1452 | nd = first_node(policy->v.nodes); | 1521 | nd = first_node(policy->v.nodes); |
1453 | break; | 1522 | break; |
1454 | case MPOL_INTERLEAVE: /* should not happen */ | ||
1455 | break; | ||
1456 | default: | 1523 | default: |
1457 | BUG(); | 1524 | BUG(); |
1458 | } | 1525 | } |
@@ -1572,6 +1639,8 @@ static inline unsigned interleave_nid(struct mempolicy *pol, | |||
1572 | * to the struct mempolicy for conditional unref after allocation. | 1639 | * to the struct mempolicy for conditional unref after allocation. |
1573 | * If the effective policy is 'BIND, returns a pointer to the mempolicy's | 1640 | * If the effective policy is 'BIND, returns a pointer to the mempolicy's |
1574 | * @nodemask for filtering the zonelist. | 1641 | * @nodemask for filtering the zonelist. |
1642 | * | ||
1643 | * Must be protected by get_mems_allowed() | ||
1575 | */ | 1644 | */ |
1576 | struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, | 1645 | struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, |
1577 | gfp_t gfp_flags, struct mempolicy **mpol, | 1646 | gfp_t gfp_flags, struct mempolicy **mpol, |
@@ -1617,6 +1686,7 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask) | |||
1617 | if (!(mask && current->mempolicy)) | 1686 | if (!(mask && current->mempolicy)) |
1618 | return false; | 1687 | return false; |
1619 | 1688 | ||
1689 | task_lock(current); | ||
1620 | mempolicy = current->mempolicy; | 1690 | mempolicy = current->mempolicy; |
1621 | switch (mempolicy->mode) { | 1691 | switch (mempolicy->mode) { |
1622 | case MPOL_PREFERRED: | 1692 | case MPOL_PREFERRED: |
@@ -1636,6 +1706,7 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask) | |||
1636 | default: | 1706 | default: |
1637 | BUG(); | 1707 | BUG(); |
1638 | } | 1708 | } |
1709 | task_unlock(current); | ||
1639 | 1710 | ||
1640 | return true; | 1711 | return true; |
1641 | } | 1712 | } |
@@ -1683,13 +1754,17 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) | |||
1683 | { | 1754 | { |
1684 | struct mempolicy *pol = get_vma_policy(current, vma, addr); | 1755 | struct mempolicy *pol = get_vma_policy(current, vma, addr); |
1685 | struct zonelist *zl; | 1756 | struct zonelist *zl; |
1757 | struct page *page; | ||
1686 | 1758 | ||
1759 | get_mems_allowed(); | ||
1687 | if (unlikely(pol->mode == MPOL_INTERLEAVE)) { | 1760 | if (unlikely(pol->mode == MPOL_INTERLEAVE)) { |
1688 | unsigned nid; | 1761 | unsigned nid; |
1689 | 1762 | ||
1690 | nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); | 1763 | nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); |
1691 | mpol_cond_put(pol); | 1764 | mpol_cond_put(pol); |
1692 | return alloc_page_interleave(gfp, 0, nid); | 1765 | page = alloc_page_interleave(gfp, 0, nid); |
1766 | put_mems_allowed(); | ||
1767 | return page; | ||
1693 | } | 1768 | } |
1694 | zl = policy_zonelist(gfp, pol); | 1769 | zl = policy_zonelist(gfp, pol); |
1695 | if (unlikely(mpol_needs_cond_ref(pol))) { | 1770 | if (unlikely(mpol_needs_cond_ref(pol))) { |
@@ -1699,12 +1774,15 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) | |||
1699 | struct page *page = __alloc_pages_nodemask(gfp, 0, | 1774 | struct page *page = __alloc_pages_nodemask(gfp, 0, |
1700 | zl, policy_nodemask(gfp, pol)); | 1775 | zl, policy_nodemask(gfp, pol)); |
1701 | __mpol_put(pol); | 1776 | __mpol_put(pol); |
1777 | put_mems_allowed(); | ||
1702 | return page; | 1778 | return page; |
1703 | } | 1779 | } |
1704 | /* | 1780 | /* |
1705 | * fast path: default or task policy | 1781 | * fast path: default or task policy |
1706 | */ | 1782 | */ |
1707 | return __alloc_pages_nodemask(gfp, 0, zl, policy_nodemask(gfp, pol)); | 1783 | page = __alloc_pages_nodemask(gfp, 0, zl, policy_nodemask(gfp, pol)); |
1784 | put_mems_allowed(); | ||
1785 | return page; | ||
1708 | } | 1786 | } |
1709 | 1787 | ||
1710 | /** | 1788 | /** |
@@ -1729,18 +1807,23 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) | |||
1729 | struct page *alloc_pages_current(gfp_t gfp, unsigned order) | 1807 | struct page *alloc_pages_current(gfp_t gfp, unsigned order) |
1730 | { | 1808 | { |
1731 | struct mempolicy *pol = current->mempolicy; | 1809 | struct mempolicy *pol = current->mempolicy; |
1810 | struct page *page; | ||
1732 | 1811 | ||
1733 | if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) | 1812 | if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) |
1734 | pol = &default_policy; | 1813 | pol = &default_policy; |
1735 | 1814 | ||
1815 | get_mems_allowed(); | ||
1736 | /* | 1816 | /* |
1737 | * No reference counting needed for current->mempolicy | 1817 | * No reference counting needed for current->mempolicy |
1738 | * nor system default_policy | 1818 | * nor system default_policy |
1739 | */ | 1819 | */ |
1740 | if (pol->mode == MPOL_INTERLEAVE) | 1820 | if (pol->mode == MPOL_INTERLEAVE) |
1741 | return alloc_page_interleave(gfp, order, interleave_nodes(pol)); | 1821 | page = alloc_page_interleave(gfp, order, interleave_nodes(pol)); |
1742 | return __alloc_pages_nodemask(gfp, order, | 1822 | else |
1823 | page = __alloc_pages_nodemask(gfp, order, | ||
1743 | policy_zonelist(gfp, pol), policy_nodemask(gfp, pol)); | 1824 | policy_zonelist(gfp, pol), policy_nodemask(gfp, pol)); |
1825 | put_mems_allowed(); | ||
1826 | return page; | ||
1744 | } | 1827 | } |
1745 | EXPORT_SYMBOL(alloc_pages_current); | 1828 | EXPORT_SYMBOL(alloc_pages_current); |
1746 | 1829 | ||
@@ -1750,6 +1833,9 @@ EXPORT_SYMBOL(alloc_pages_current); | |||
1750 | * with the mems_allowed returned by cpuset_mems_allowed(). This | 1833 | * with the mems_allowed returned by cpuset_mems_allowed(). This |
1751 | * keeps mempolicies cpuset relative after its cpuset moves. See | 1834 | * keeps mempolicies cpuset relative after its cpuset moves. See |
1752 | * further kernel/cpuset.c update_nodemask(). | 1835 | * further kernel/cpuset.c update_nodemask(). |
1836 | * | ||
1837 | * current's mempolicy may be rebinded by the other task(the task that changes | ||
1838 | * cpuset's mems), so we needn't do rebind work for current task. | ||
1753 | */ | 1839 | */ |
1754 | 1840 | ||
1755 | /* Slow path of a mempolicy duplicate */ | 1841 | /* Slow path of a mempolicy duplicate */ |
@@ -1759,13 +1845,24 @@ struct mempolicy *__mpol_dup(struct mempolicy *old) | |||
1759 | 1845 | ||
1760 | if (!new) | 1846 | if (!new) |
1761 | return ERR_PTR(-ENOMEM); | 1847 | return ERR_PTR(-ENOMEM); |
1848 | |||
1849 | /* task's mempolicy is protected by alloc_lock */ | ||
1850 | if (old == current->mempolicy) { | ||
1851 | task_lock(current); | ||
1852 | *new = *old; | ||
1853 | task_unlock(current); | ||
1854 | } else | ||
1855 | *new = *old; | ||
1856 | |||
1762 | rcu_read_lock(); | 1857 | rcu_read_lock(); |
1763 | if (current_cpuset_is_being_rebound()) { | 1858 | if (current_cpuset_is_being_rebound()) { |
1764 | nodemask_t mems = cpuset_mems_allowed(current); | 1859 | nodemask_t mems = cpuset_mems_allowed(current); |
1765 | mpol_rebind_policy(old, &mems); | 1860 | if (new->flags & MPOL_F_REBINDING) |
1861 | mpol_rebind_policy(new, &mems, MPOL_REBIND_STEP2); | ||
1862 | else | ||
1863 | mpol_rebind_policy(new, &mems, MPOL_REBIND_ONCE); | ||
1766 | } | 1864 | } |
1767 | rcu_read_unlock(); | 1865 | rcu_read_unlock(); |
1768 | *new = *old; | ||
1769 | atomic_set(&new->refcnt, 1); | 1866 | atomic_set(&new->refcnt, 1); |
1770 | return new; | 1867 | return new; |
1771 | } | 1868 | } |
@@ -1792,16 +1889,6 @@ struct mempolicy *__mpol_cond_copy(struct mempolicy *tompol, | |||
1792 | return tompol; | 1889 | return tompol; |
1793 | } | 1890 | } |
1794 | 1891 | ||
1795 | static int mpol_match_intent(const struct mempolicy *a, | ||
1796 | const struct mempolicy *b) | ||
1797 | { | ||
1798 | if (a->flags != b->flags) | ||
1799 | return 0; | ||
1800 | if (!mpol_store_user_nodemask(a)) | ||
1801 | return 1; | ||
1802 | return nodes_equal(a->w.user_nodemask, b->w.user_nodemask); | ||
1803 | } | ||
1804 | |||
1805 | /* Slow path of a mempolicy comparison */ | 1892 | /* Slow path of a mempolicy comparison */ |
1806 | int __mpol_equal(struct mempolicy *a, struct mempolicy *b) | 1893 | int __mpol_equal(struct mempolicy *a, struct mempolicy *b) |
1807 | { | 1894 | { |
@@ -1809,8 +1896,12 @@ int __mpol_equal(struct mempolicy *a, struct mempolicy *b) | |||
1809 | return 0; | 1896 | return 0; |
1810 | if (a->mode != b->mode) | 1897 | if (a->mode != b->mode) |
1811 | return 0; | 1898 | return 0; |
1812 | if (a->mode != MPOL_DEFAULT && !mpol_match_intent(a, b)) | 1899 | if (a->flags != b->flags) |
1813 | return 0; | 1900 | return 0; |
1901 | if (mpol_store_user_nodemask(a)) | ||
1902 | if (!nodes_equal(a->w.user_nodemask, b->w.user_nodemask)) | ||
1903 | return 0; | ||
1904 | |||
1814 | switch (a->mode) { | 1905 | switch (a->mode) { |
1815 | case MPOL_BIND: | 1906 | case MPOL_BIND: |
1816 | /* Fall through */ | 1907 | /* Fall through */ |
@@ -2006,27 +2097,24 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) | |||
2006 | return; | 2097 | return; |
2007 | /* contextualize the tmpfs mount point mempolicy */ | 2098 | /* contextualize the tmpfs mount point mempolicy */ |
2008 | new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); | 2099 | new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); |
2009 | if (IS_ERR(new)) { | 2100 | if (IS_ERR(new)) |
2010 | mpol_put(mpol); /* drop our ref on sb mpol */ | 2101 | goto free_scratch; /* no valid nodemask intersection */ |
2011 | NODEMASK_SCRATCH_FREE(scratch); | ||
2012 | return; /* no valid nodemask intersection */ | ||
2013 | } | ||
2014 | 2102 | ||
2015 | task_lock(current); | 2103 | task_lock(current); |
2016 | ret = mpol_set_nodemask(new, &mpol->w.user_nodemask, scratch); | 2104 | ret = mpol_set_nodemask(new, &mpol->w.user_nodemask, scratch); |
2017 | task_unlock(current); | 2105 | task_unlock(current); |
2018 | mpol_put(mpol); /* drop our ref on sb mpol */ | 2106 | mpol_put(mpol); /* drop our ref on sb mpol */ |
2019 | if (ret) { | 2107 | if (ret) |
2020 | NODEMASK_SCRATCH_FREE(scratch); | 2108 | goto put_free; |
2021 | mpol_put(new); | ||
2022 | return; | ||
2023 | } | ||
2024 | 2109 | ||
2025 | /* Create pseudo-vma that contains just the policy */ | 2110 | /* Create pseudo-vma that contains just the policy */ |
2026 | memset(&pvma, 0, sizeof(struct vm_area_struct)); | 2111 | memset(&pvma, 0, sizeof(struct vm_area_struct)); |
2027 | pvma.vm_end = TASK_SIZE; /* policy covers entire file */ | 2112 | pvma.vm_end = TASK_SIZE; /* policy covers entire file */ |
2028 | mpol_set_shared_policy(sp, &pvma, new); /* adds ref */ | 2113 | mpol_set_shared_policy(sp, &pvma, new); /* adds ref */ |
2114 | |||
2115 | put_free: | ||
2029 | mpol_put(new); /* drop initial ref */ | 2116 | mpol_put(new); /* drop initial ref */ |
2117 | free_scratch: | ||
2030 | NODEMASK_SCRATCH_FREE(scratch); | 2118 | NODEMASK_SCRATCH_FREE(scratch); |
2031 | } | 2119 | } |
2032 | } | 2120 | } |
@@ -2132,9 +2220,15 @@ void numa_default_policy(void) | |||
2132 | * "local" is pseudo-policy: MPOL_PREFERRED with MPOL_F_LOCAL flag | 2220 | * "local" is pseudo-policy: MPOL_PREFERRED with MPOL_F_LOCAL flag |
2133 | * Used only for mpol_parse_str() and mpol_to_str() | 2221 | * Used only for mpol_parse_str() and mpol_to_str() |
2134 | */ | 2222 | */ |
2135 | #define MPOL_LOCAL (MPOL_INTERLEAVE + 1) | 2223 | #define MPOL_LOCAL MPOL_MAX |
2136 | static const char * const policy_types[] = | 2224 | static const char * const policy_modes[] = |
2137 | { "default", "prefer", "bind", "interleave", "local" }; | 2225 | { |
2226 | [MPOL_DEFAULT] = "default", | ||
2227 | [MPOL_PREFERRED] = "prefer", | ||
2228 | [MPOL_BIND] = "bind", | ||
2229 | [MPOL_INTERLEAVE] = "interleave", | ||
2230 | [MPOL_LOCAL] = "local" | ||
2231 | }; | ||
2138 | 2232 | ||
2139 | 2233 | ||
2140 | #ifdef CONFIG_TMPFS | 2234 | #ifdef CONFIG_TMPFS |
@@ -2159,12 +2253,11 @@ static const char * const policy_types[] = | |||
2159 | int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) | 2253 | int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) |
2160 | { | 2254 | { |
2161 | struct mempolicy *new = NULL; | 2255 | struct mempolicy *new = NULL; |
2162 | unsigned short uninitialized_var(mode); | 2256 | unsigned short mode; |
2163 | unsigned short uninitialized_var(mode_flags); | 2257 | unsigned short uninitialized_var(mode_flags); |
2164 | nodemask_t nodes; | 2258 | nodemask_t nodes; |
2165 | char *nodelist = strchr(str, ':'); | 2259 | char *nodelist = strchr(str, ':'); |
2166 | char *flags = strchr(str, '='); | 2260 | char *flags = strchr(str, '='); |
2167 | int i; | ||
2168 | int err = 1; | 2261 | int err = 1; |
2169 | 2262 | ||
2170 | if (nodelist) { | 2263 | if (nodelist) { |
@@ -2180,13 +2273,12 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) | |||
2180 | if (flags) | 2273 | if (flags) |
2181 | *flags++ = '\0'; /* terminate mode string */ | 2274 | *flags++ = '\0'; /* terminate mode string */ |
2182 | 2275 | ||
2183 | for (i = 0; i <= MPOL_LOCAL; i++) { | 2276 | for (mode = 0; mode <= MPOL_LOCAL; mode++) { |
2184 | if (!strcmp(str, policy_types[i])) { | 2277 | if (!strcmp(str, policy_modes[mode])) { |
2185 | mode = i; | ||
2186 | break; | 2278 | break; |
2187 | } | 2279 | } |
2188 | } | 2280 | } |
2189 | if (i > MPOL_LOCAL) | 2281 | if (mode > MPOL_LOCAL) |
2190 | goto out; | 2282 | goto out; |
2191 | 2283 | ||
2192 | switch (mode) { | 2284 | switch (mode) { |
@@ -2250,7 +2342,10 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) | |||
2250 | if (IS_ERR(new)) | 2342 | if (IS_ERR(new)) |
2251 | goto out; | 2343 | goto out; |
2252 | 2344 | ||
2253 | { | 2345 | if (no_context) { |
2346 | /* save for contextualization */ | ||
2347 | new->w.user_nodemask = nodes; | ||
2348 | } else { | ||
2254 | int ret; | 2349 | int ret; |
2255 | NODEMASK_SCRATCH(scratch); | 2350 | NODEMASK_SCRATCH(scratch); |
2256 | if (scratch) { | 2351 | if (scratch) { |
@@ -2266,10 +2361,6 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) | |||
2266 | } | 2361 | } |
2267 | } | 2362 | } |
2268 | err = 0; | 2363 | err = 0; |
2269 | if (no_context) { | ||
2270 | /* save for contextualization */ | ||
2271 | new->w.user_nodemask = nodes; | ||
2272 | } | ||
2273 | 2364 | ||
2274 | out: | 2365 | out: |
2275 | /* Restore string for error message */ | 2366 | /* Restore string for error message */ |
@@ -2338,11 +2429,11 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) | |||
2338 | BUG(); | 2429 | BUG(); |
2339 | } | 2430 | } |
2340 | 2431 | ||
2341 | l = strlen(policy_types[mode]); | 2432 | l = strlen(policy_modes[mode]); |
2342 | if (buffer + maxlen < p + l + 1) | 2433 | if (buffer + maxlen < p + l + 1) |
2343 | return -ENOSPC; | 2434 | return -ENOSPC; |
2344 | 2435 | ||
2345 | strcpy(p, policy_types[mode]); | 2436 | strcpy(p, policy_modes[mode]); |
2346 | p += l; | 2437 | p += l; |
2347 | 2438 | ||
2348 | if (flags & MPOL_MODE_FLAGS) { | 2439 | if (flags & MPOL_MODE_FLAGS) { |