aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
authorKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>2012-10-08 19:29:16 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 03:22:21 -0400
commit869833f2c5c6e4dd09a5378cfc665ffb4615e5d2 (patch)
tree37ed82c920c9266fe8a470af1de7a4bd72e66cb6 /mm/mempolicy.c
parent8d34694c1abf29df1f3c7317936b7e3e2e308d9b (diff)
mempolicy: remove mempolicy sharing
Dave Jones' system call fuzz testing tool "trinity" triggered the following bug error with slab debugging enabled ============================================================================= BUG numa_policy (Not tainted): Poison overwritten ----------------------------------------------------------------------------- INFO: 0xffff880146498250-0xffff880146498250. First byte 0x6a instead of 0x6b INFO: Allocated in mpol_new+0xa3/0x140 age=46310 cpu=6 pid=32154 __slab_alloc+0x3d3/0x445 kmem_cache_alloc+0x29d/0x2b0 mpol_new+0xa3/0x140 sys_mbind+0x142/0x620 system_call_fastpath+0x16/0x1b INFO: Freed in __mpol_put+0x27/0x30 age=46268 cpu=6 pid=32154 __slab_free+0x2e/0x1de kmem_cache_free+0x25a/0x260 __mpol_put+0x27/0x30 remove_vma+0x68/0x90 exit_mmap+0x118/0x140 mmput+0x73/0x110 exit_mm+0x108/0x130 do_exit+0x162/0xb90 do_group_exit+0x4f/0xc0 sys_exit_group+0x17/0x20 system_call_fastpath+0x16/0x1b INFO: Slab 0xffffea0005192600 objects=27 used=27 fp=0x (null) flags=0x20000000004080 INFO: Object 0xffff880146498250 @offset=592 fp=0xffff88014649b9d0 The problem is that the structure is being prematurely freed due to a reference count imbalance. In the following case mbind(addr, len) should replace the memory policies of both vma1 and vma2 and thus they will become to share the same mempolicy and the new mempolicy will have the MPOL_F_SHARED flag. +-------------------+-------------------+ | vma1 | vma2(shmem) | +-------------------+-------------------+ | | addr addr+len alloc_pages_vma() uses get_vma_policy() and mpol_cond_put() pair for maintaining the mempolicy reference count. The current rule is that get_vma_policy() only increments refcount for shmem VMA and mpol_conf_put() only decrements refcount if the policy has MPOL_F_SHARED. In above case, vma1 is not shmem vma and vma->policy has MPOL_F_SHARED! The reference count will be decreased even though was not increased whenever alloc_page_vma() is called. This has been broken since commit [52cd3b07: mempolicy: rework mempolicy Reference Counting] in 2008. There is another serious bug with the sharing of memory policies. Currently, mempolicy rebind logic (it is called from cpuset rebinding) ignores a refcount of mempolicy and override it forcibly. Thus, any mempolicy sharing may cause mempolicy corruption. The bug was introduced by commit [68860ec1: cpusets: automatic numa mempolicy rebinding]. Ideally, the shared policy handling would be rewritten to either properly handle COW of the policy structures or at least reference count MPOL_F_SHARED based exclusively on information within the policy. However, this patch takes the easier approach of disabling any policy sharing between VMAs. Each new range allocated with sp_alloc will allocate a new policy, set the reference count to 1 and drop the reference count of the old policy. This increases the memory footprint but is not expected to be a major problem as mbind() is unlikely to be used for fine-grained ranges. It is also inefficient because it means we allocate a new policy even in cases where mbind_range() could use the new_policy passed to it. However, it is more straight-forward and the change should be invisible to the user. [mgorman@suse.de: Edited changelog] Reported-by: Dave Jones <davej@redhat.com>, Cc: Christoph Lameter <cl@linux.com>, Reviewed-by: Christoph Lameter <cl@linux.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Mel Gorman <mgorman@suse.de> Cc: Josh Boyer <jwboyer@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c52
1 files changed, 38 insertions, 14 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 92daa267baf..f0728ae7467 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -607,24 +607,39 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
607 return first; 607 return first;
608} 608}
609 609
610/* Apply policy to a single VMA */ 610/*
611static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new) 611 * Apply policy to a single VMA
612 * This must be called with the mmap_sem held for writing.
613 */
614static int vma_replace_policy(struct vm_area_struct *vma,
615 struct mempolicy *pol)
612{ 616{
613 int err = 0; 617 int err;
614 struct mempolicy *old = vma->vm_policy; 618 struct mempolicy *old;
619 struct mempolicy *new;
615 620
616 pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n", 621 pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n",
617 vma->vm_start, vma->vm_end, vma->vm_pgoff, 622 vma->vm_start, vma->vm_end, vma->vm_pgoff,
618 vma->vm_ops, vma->vm_file, 623 vma->vm_ops, vma->vm_file,
619 vma->vm_ops ? vma->vm_ops->set_policy : NULL); 624 vma->vm_ops ? vma->vm_ops->set_policy : NULL);
620 625
621 if (vma->vm_ops && vma->vm_ops->set_policy) 626 new = mpol_dup(pol);
627 if (IS_ERR(new))
628 return PTR_ERR(new);
629
630 if (vma->vm_ops && vma->vm_ops->set_policy) {
622 err = vma->vm_ops->set_policy(vma, new); 631 err = vma->vm_ops->set_policy(vma, new);
623 if (!err) { 632 if (err)
624 mpol_get(new); 633 goto err_out;
625 vma->vm_policy = new;
626 mpol_put(old);
627 } 634 }
635
636 old = vma->vm_policy;
637 vma->vm_policy = new; /* protected by mmap_sem */
638 mpol_put(old);
639
640 return 0;
641 err_out:
642 mpol_put(new);
628 return err; 643 return err;
629} 644}
630 645
@@ -676,7 +691,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
676 if (err) 691 if (err)
677 goto out; 692 goto out;
678 } 693 }
679 err = policy_vma(vma, new_pol); 694 err = vma_replace_policy(vma, new_pol);
680 if (err) 695 if (err)
681 goto out; 696 goto out;
682 } 697 }
@@ -2153,15 +2168,24 @@ static void sp_delete(struct shared_policy *sp, struct sp_node *n)
2153static struct sp_node *sp_alloc(unsigned long start, unsigned long end, 2168static struct sp_node *sp_alloc(unsigned long start, unsigned long end,
2154 struct mempolicy *pol) 2169 struct mempolicy *pol)
2155{ 2170{
2156 struct sp_node *n = kmem_cache_alloc(sn_cache, GFP_KERNEL); 2171 struct sp_node *n;
2172 struct mempolicy *newpol;
2157 2173
2174 n = kmem_cache_alloc(sn_cache, GFP_KERNEL);
2158 if (!n) 2175 if (!n)
2159 return NULL; 2176 return NULL;
2177
2178 newpol = mpol_dup(pol);
2179 if (IS_ERR(newpol)) {
2180 kmem_cache_free(sn_cache, n);
2181 return NULL;
2182 }
2183 newpol->flags |= MPOL_F_SHARED;
2184
2160 n->start = start; 2185 n->start = start;
2161 n->end = end; 2186 n->end = end;
2162 mpol_get(pol); 2187 n->policy = newpol;
2163 pol->flags |= MPOL_F_SHARED; /* for unref */ 2188
2164 n->policy = pol;
2165 return n; 2189 return n;
2166} 2190}
2167 2191