aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mmap.c
diff options
context:
space:
mode:
authorKonstantin Khlebnikov <khlebnikov@openvz.org>2012-10-08 19:28:54 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 03:22:18 -0400
commite9714acf8c439688884234dcac2bfc38bb607d38 (patch)
tree2e21c88f855a9f5168a143fa9948141140ff02a2 /mm/mmap.c
parent2dd8ad81e31d0d36a5d448329c646ab43eb17788 (diff)
mm: kill vma flag VM_EXECUTABLE and mm->num_exe_file_vmas
Currently the kernel sets mm->exe_file during sys_execve() and then tracks number of vmas with VM_EXECUTABLE flag in mm->num_exe_file_vmas, as soon as this counter drops to zero kernel resets mm->exe_file to NULL. Plus it resets mm->exe_file at last mmput() when mm->mm_users drops to zero. VMA with VM_EXECUTABLE flag appears after mapping file with flag MAP_EXECUTABLE, such vmas can appears only at sys_execve() or after vma splitting, because sys_mmap ignores this flag. Usually binfmt module sets mm->exe_file and mmaps executable vmas with this file, they hold mm->exe_file while task is running. comment from v2.6.25-6245-g925d1c4 ("procfs task exe symlink"), where all this stuff was introduced: > The kernel implements readlink of /proc/pid/exe by getting the file from > the first executable VMA. Then the path to the file is reconstructed and > reported as the result. > > Because of the VMA walk the code is slightly different on nommu systems. > This patch avoids separate /proc/pid/exe code on nommu systems. Instead of > walking the VMAs to find the first executable file-backed VMA we store a > reference to the exec'd file in the mm_struct. > > That reference would prevent the filesystem holding the executable file > from being unmounted even after unmapping the VMAs. So we track the number > of VM_EXECUTABLE VMAs and drop the new reference when the last one is > unmapped. This avoids pinning the mounted filesystem. exe_file's vma accounting is hooked into every file mmap/unmmap and vma split/merge just to fix some hypothetical pinning fs from umounting by mm, which already unmapped all its executable files, but still alive. Seems like currently nobody depends on this behaviour. We can try to remove this logic and keep mm->exe_file until final mmput(). mm->exe_file is still protected with mm->mmap_sem, because we want to change it via new sys_prctl(PR_SET_MM_EXE_FILE). Also via this syscall task can change its mm->exe_file and unpin mountpoint explicitly. Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Carsten Otte <cotte@de.ibm.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Cyrill Gorcunov <gorcunov@openvz.org> Cc: Eric Paris <eparis@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Hugh Dickins <hughd@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Morris <james.l.morris@oracle.com> Cc: Jason Baron <jbaron@redhat.com> Cc: Kentaro Takeda <takedakn@nttdata.co.jp> Cc: Matt Helsley <matthltc@us.ibm.com> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Robert Richter <robert.richter@amd.com> Cc: Suresh Siddha <suresh.b.siddha@intel.com> Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Cc: Venkatesh Pallipadi <venki@google.com> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/mmap.c')
-rw-r--r--mm/mmap.c25
1 files changed, 4 insertions, 21 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index d0686d355113..c1ad2e78ea58 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -231,11 +231,8 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
231 might_sleep(); 231 might_sleep();
232 if (vma->vm_ops && vma->vm_ops->close) 232 if (vma->vm_ops && vma->vm_ops->close)
233 vma->vm_ops->close(vma); 233 vma->vm_ops->close(vma);
234 if (vma->vm_file) { 234 if (vma->vm_file)
235 fput(vma->vm_file); 235 fput(vma->vm_file);
236 if (vma->vm_flags & VM_EXECUTABLE)
237 removed_exe_file_vma(vma->vm_mm);
238 }
239 mpol_put(vma_policy(vma)); 236 mpol_put(vma_policy(vma));
240 kmem_cache_free(vm_area_cachep, vma); 237 kmem_cache_free(vm_area_cachep, vma);
241 return next; 238 return next;
@@ -636,8 +633,6 @@ again: remove_next = 1 + (end > next->vm_end);
636 if (file) { 633 if (file) {
637 uprobe_munmap(next, next->vm_start, next->vm_end); 634 uprobe_munmap(next, next->vm_start, next->vm_end);
638 fput(file); 635 fput(file);
639 if (next->vm_flags & VM_EXECUTABLE)
640 removed_exe_file_vma(mm);
641 } 636 }
642 if (next->anon_vma) 637 if (next->anon_vma)
643 anon_vma_merge(vma, next); 638 anon_vma_merge(vma, next);
@@ -1304,8 +1299,6 @@ munmap_back:
1304 error = file->f_op->mmap(file, vma); 1299 error = file->f_op->mmap(file, vma);
1305 if (error) 1300 if (error)
1306 goto unmap_and_free_vma; 1301 goto unmap_and_free_vma;
1307 if (vm_flags & VM_EXECUTABLE)
1308 added_exe_file_vma(mm);
1309 1302
1310 /* Can addr have changed?? 1303 /* Can addr have changed??
1311 * 1304 *
@@ -1987,11 +1980,8 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1987 if (anon_vma_clone(new, vma)) 1980 if (anon_vma_clone(new, vma))
1988 goto out_free_mpol; 1981 goto out_free_mpol;
1989 1982
1990 if (new->vm_file) { 1983 if (new->vm_file)
1991 get_file(new->vm_file); 1984 get_file(new->vm_file);
1992 if (vma->vm_flags & VM_EXECUTABLE)
1993 added_exe_file_vma(mm);
1994 }
1995 1985
1996 if (new->vm_ops && new->vm_ops->open) 1986 if (new->vm_ops && new->vm_ops->open)
1997 new->vm_ops->open(new); 1987 new->vm_ops->open(new);
@@ -2009,11 +1999,8 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
2009 /* Clean everything up if vma_adjust failed. */ 1999 /* Clean everything up if vma_adjust failed. */
2010 if (new->vm_ops && new->vm_ops->close) 2000 if (new->vm_ops && new->vm_ops->close)
2011 new->vm_ops->close(new); 2001 new->vm_ops->close(new);
2012 if (new->vm_file) { 2002 if (new->vm_file)
2013 if (vma->vm_flags & VM_EXECUTABLE)
2014 removed_exe_file_vma(mm);
2015 fput(new->vm_file); 2003 fput(new->vm_file);
2016 }
2017 unlink_anon_vmas(new); 2004 unlink_anon_vmas(new);
2018 out_free_mpol: 2005 out_free_mpol:
2019 mpol_put(pol); 2006 mpol_put(pol);
@@ -2408,12 +2395,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2408 new_vma->vm_start = addr; 2395 new_vma->vm_start = addr;
2409 new_vma->vm_end = addr + len; 2396 new_vma->vm_end = addr + len;
2410 new_vma->vm_pgoff = pgoff; 2397 new_vma->vm_pgoff = pgoff;
2411 if (new_vma->vm_file) { 2398 if (new_vma->vm_file)
2412 get_file(new_vma->vm_file); 2399 get_file(new_vma->vm_file);
2413
2414 if (vma->vm_flags & VM_EXECUTABLE)
2415 added_exe_file_vma(mm);
2416 }
2417 if (new_vma->vm_ops && new_vma->vm_ops->open) 2400 if (new_vma->vm_ops && new_vma->vm_ops->open)
2418 new_vma->vm_ops->open(new_vma); 2401 new_vma->vm_ops->open(new_vma);
2419 vma_link(mm, new_vma, prev, rb_link, rb_parent); 2402 vma_link(mm, new_vma, prev, rb_link, rb_parent);