aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorCyrill Gorcunov <gorcunov@openvz.org>2012-05-31 19:26:46 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-31 20:49:32 -0400
commitb32dfe377102ce668775f8b6b1461f7ad428f8b6 (patch)
treef89be6bd34eb757c471f3ca506e0ce92224f9bc5 /kernel
parentfe8c7f5cbf91124987106faa3bdf0c8b955c4cf7 (diff)
c/r: prctl: add ability to set new mm_struct::exe_file
When we do restore we would like to have a way to setup a former mm_struct::exe_file so that /proc/pid/exe would point to the original executable file a process had at checkpoint time. For this the PR_SET_MM_EXE_FILE code is introduced. This option takes a file descriptor which will be set as a source for new /proc/$pid/exe symlink. Note it allows to change /proc/$pid/exe if there are no VM_EXECUTABLE vmas present for current process, simply because this feature is a special to C/R and mm::num_exe_file_vmas become meaningless after that. To minimize the amount of transition the /proc/pid/exe symlink might have, this feature is implemented in one-shot manner. Thus once changed the symlink can't be changed again. This should help sysadmins to monitor the symlinks over all process running in a system. In particular one could make a snapshot of processes and ring alarm if there unexpected changes of /proc/pid/exe's in a system. Note -- this feature is available iif CONFIG_CHECKPOINT_RESTORE is set and the caller must have CAP_SYS_RESOURCE capability granted, otherwise the request to change symlink will be rejected. Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Pavel Emelyanov <xemul@parallels.com> Cc: Kees Cook <keescook@chromium.org> Cc: Tejun Heo <tj@kernel.org> Cc: Matt Helsley <matthltc@us.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sys.c56
1 files changed, 56 insertions, 0 deletions
diff --git a/kernel/sys.c b/kernel/sys.c
index 8b544972e46e..9ff89cb9657a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -36,6 +36,8 @@
36#include <linux/personality.h> 36#include <linux/personality.h>
37#include <linux/ptrace.h> 37#include <linux/ptrace.h>
38#include <linux/fs_struct.h> 38#include <linux/fs_struct.h>
39#include <linux/file.h>
40#include <linux/mount.h>
39#include <linux/gfp.h> 41#include <linux/gfp.h>
40#include <linux/syscore_ops.h> 42#include <linux/syscore_ops.h>
41#include <linux/version.h> 43#include <linux/version.h>
@@ -1792,6 +1794,57 @@ static bool vma_flags_mismatch(struct vm_area_struct *vma,
1792 (vma->vm_flags & banned); 1794 (vma->vm_flags & banned);
1793} 1795}
1794 1796
1797static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1798{
1799 struct file *exe_file;
1800 struct dentry *dentry;
1801 int err;
1802
1803 /*
1804 * Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's
1805 * remain. So perform a quick test first.
1806 */
1807 if (mm->num_exe_file_vmas)
1808 return -EBUSY;
1809
1810 exe_file = fget(fd);
1811 if (!exe_file)
1812 return -EBADF;
1813
1814 dentry = exe_file->f_path.dentry;
1815
1816 /*
1817 * Because the original mm->exe_file points to executable file, make
1818 * sure that this one is executable as well, to avoid breaking an
1819 * overall picture.
1820 */
1821 err = -EACCES;
1822 if (!S_ISREG(dentry->d_inode->i_mode) ||
1823 exe_file->f_path.mnt->mnt_flags & MNT_NOEXEC)
1824 goto exit;
1825
1826 err = inode_permission(dentry->d_inode, MAY_EXEC);
1827 if (err)
1828 goto exit;
1829
1830 /*
1831 * The symlink can be changed only once, just to disallow arbitrary
1832 * transitions malicious software might bring in. This means one
1833 * could make a snapshot over all processes running and monitor
1834 * /proc/pid/exe changes to notice unusual activity if needed.
1835 */
1836 down_write(&mm->mmap_sem);
1837 if (likely(!mm->exe_file))
1838 set_mm_exe_file(mm, exe_file);
1839 else
1840 err = -EBUSY;
1841 up_write(&mm->mmap_sem);
1842
1843exit:
1844 fput(exe_file);
1845 return err;
1846}
1847
1795static int prctl_set_mm(int opt, unsigned long addr, 1848static int prctl_set_mm(int opt, unsigned long addr,
1796 unsigned long arg4, unsigned long arg5) 1849 unsigned long arg4, unsigned long arg5)
1797{ 1850{
@@ -1806,6 +1859,9 @@ static int prctl_set_mm(int opt, unsigned long addr,
1806 if (!capable(CAP_SYS_RESOURCE)) 1859 if (!capable(CAP_SYS_RESOURCE))
1807 return -EPERM; 1860 return -EPERM;
1808 1861
1862 if (opt == PR_SET_MM_EXE_FILE)
1863 return prctl_set_mm_exe_file(mm, (unsigned int)addr);
1864
1809 if (addr >= TASK_SIZE) 1865 if (addr >= TASK_SIZE)
1810 return -EINVAL; 1866 return -EINVAL;
1811 1867