aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2010-05-24 17:32:08 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-25 11:06:57 -0400
commitc0ff7453bb5c7c98e0885fb94279f2571946f280 (patch)
tree8bb2b169a5145f0496575dbd2f48bb4b1c83f819 /mm/mempolicy.c
parent708c1bbc9d0c3e57f40501794d9b0eed29d10fce (diff)
cpuset,mm: fix no node to alloc memory when changing cpuset's mems
Before applying this patch, cpuset updates task->mems_allowed and mempolicy by setting all new bits in the nodemask first, and clearing all old unallowed bits later. But in the way, the allocator may find that there is no node to alloc memory. The reason is that cpuset rebinds the task's mempolicy, it cleans the nodes which the allocater can alloc pages on, for example: (mpol: mempolicy) task1 task1's mpol task2 alloc page 1 alloc on node0? NO 1 1 change mems from 1 to 0 1 rebind task1's mpol 0-1 set new bits 0 clear disallowed bits alloc on node1? NO 0 ... can't alloc page goto oom This patch fixes this problem by expanding the nodes range first(set newly allowed bits) and shrink it lazily(clear newly disallowed bits). So we use a variable to tell the write-side task that read-side task is reading nodemask, and the write-side task clears newly disallowed nodes after read-side task ends the current memory allocation. [akpm@linux-foundation.org: fix spello] Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Cc: David Rientjes <rientjes@google.com> Cc: Nick Piggin <npiggin@suse.de> Cc: Paul Menage <menage@google.com> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: Ravikiran Thirumalai <kiran@scalex86.org> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: Andi Kleen <andi@firstfloor.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c24
1 files changed, 20 insertions, 4 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 8a993db88029..721b2b338032 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1639,6 +1639,8 @@ static inline unsigned interleave_nid(struct mempolicy *pol,
1639 * to the struct mempolicy for conditional unref after allocation. 1639 * to the struct mempolicy for conditional unref after allocation.
1640 * If the effective policy is 'BIND, returns a pointer to the mempolicy's 1640 * If the effective policy is 'BIND, returns a pointer to the mempolicy's
1641 * @nodemask for filtering the zonelist. 1641 * @nodemask for filtering the zonelist.
1642 *
1643 * Must be protected by get_mems_allowed()
1642 */ 1644 */
1643struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, 1645struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
1644 gfp_t gfp_flags, struct mempolicy **mpol, 1646 gfp_t gfp_flags, struct mempolicy **mpol,
@@ -1684,6 +1686,7 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask)
1684 if (!(mask && current->mempolicy)) 1686 if (!(mask && current->mempolicy))
1685 return false; 1687 return false;
1686 1688
1689 task_lock(current);
1687 mempolicy = current->mempolicy; 1690 mempolicy = current->mempolicy;
1688 switch (mempolicy->mode) { 1691 switch (mempolicy->mode) {
1689 case MPOL_PREFERRED: 1692 case MPOL_PREFERRED:
@@ -1703,6 +1706,7 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask)
1703 default: 1706 default:
1704 BUG(); 1707 BUG();
1705 } 1708 }
1709 task_unlock(current);
1706 1710
1707 return true; 1711 return true;
1708} 1712}
@@ -1750,13 +1754,17 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
1750{ 1754{
1751 struct mempolicy *pol = get_vma_policy(current, vma, addr); 1755 struct mempolicy *pol = get_vma_policy(current, vma, addr);
1752 struct zonelist *zl; 1756 struct zonelist *zl;
1757 struct page *page;
1753 1758
1759 get_mems_allowed();
1754 if (unlikely(pol->mode == MPOL_INTERLEAVE)) { 1760 if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
1755 unsigned nid; 1761 unsigned nid;
1756 1762
1757 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); 1763 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT);
1758 mpol_cond_put(pol); 1764 mpol_cond_put(pol);
1759 return alloc_page_interleave(gfp, 0, nid); 1765 page = alloc_page_interleave(gfp, 0, nid);
1766 put_mems_allowed();
1767 return page;
1760 } 1768 }
1761 zl = policy_zonelist(gfp, pol); 1769 zl = policy_zonelist(gfp, pol);
1762 if (unlikely(mpol_needs_cond_ref(pol))) { 1770 if (unlikely(mpol_needs_cond_ref(pol))) {
@@ -1766,12 +1774,15 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
1766 struct page *page = __alloc_pages_nodemask(gfp, 0, 1774 struct page *page = __alloc_pages_nodemask(gfp, 0,
1767 zl, policy_nodemask(gfp, pol)); 1775 zl, policy_nodemask(gfp, pol));
1768 __mpol_put(pol); 1776 __mpol_put(pol);
1777 put_mems_allowed();
1769 return page; 1778 return page;
1770 } 1779 }
1771 /* 1780 /*
1772 * fast path: default or task policy 1781 * fast path: default or task policy
1773 */ 1782 */
1774 return __alloc_pages_nodemask(gfp, 0, zl, policy_nodemask(gfp, pol)); 1783 page = __alloc_pages_nodemask(gfp, 0, zl, policy_nodemask(gfp, pol));
1784 put_mems_allowed();
1785 return page;
1775} 1786}
1776 1787
1777/** 1788/**
@@ -1796,18 +1807,23 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
1796struct page *alloc_pages_current(gfp_t gfp, unsigned order) 1807struct page *alloc_pages_current(gfp_t gfp, unsigned order)
1797{ 1808{
1798 struct mempolicy *pol = current->mempolicy; 1809 struct mempolicy *pol = current->mempolicy;
1810 struct page *page;
1799 1811
1800 if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) 1812 if (!pol || in_interrupt() || (gfp & __GFP_THISNODE))
1801 pol = &default_policy; 1813 pol = &default_policy;
1802 1814
1815 get_mems_allowed();
1803 /* 1816 /*
1804 * No reference counting needed for current->mempolicy 1817 * No reference counting needed for current->mempolicy
1805 * nor system default_policy 1818 * nor system default_policy
1806 */ 1819 */
1807 if (pol->mode == MPOL_INTERLEAVE) 1820 if (pol->mode == MPOL_INTERLEAVE)
1808 return alloc_page_interleave(gfp, order, interleave_nodes(pol)); 1821 page = alloc_page_interleave(gfp, order, interleave_nodes(pol));
1809 return __alloc_pages_nodemask(gfp, order, 1822 else
1823 page = __alloc_pages_nodemask(gfp, order,
1810 policy_zonelist(gfp, pol), policy_nodemask(gfp, pol)); 1824 policy_zonelist(gfp, pol), policy_nodemask(gfp, pol));
1825 put_mems_allowed();
1826 return page;
1811} 1827}
1812EXPORT_SYMBOL(alloc_pages_current); 1828EXPORT_SYMBOL(alloc_pages_current);
1813 1829