diff options
author | Cyrill Gorcunov <gorcunov@openvz.org> | 2012-05-31 19:26:46 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-31 20:49:32 -0400 |
commit | b32dfe377102ce668775f8b6b1461f7ad428f8b6 (patch) | |
tree | f89be6bd34eb757c471f3ca506e0ce92224f9bc5 /kernel | |
parent | fe8c7f5cbf91124987106faa3bdf0c8b955c4cf7 (diff) |
c/r: prctl: add ability to set new mm_struct::exe_file
When we do restore we would like to have a way to setup a former
mm_struct::exe_file so that /proc/pid/exe would point to the original
executable file a process had at checkpoint time.
For this the PR_SET_MM_EXE_FILE code is introduced. This option takes a
file descriptor which will be set as a source for new /proc/$pid/exe
symlink.
Note it allows to change /proc/$pid/exe if there are no VM_EXECUTABLE
vmas present for current process, simply because this feature is a special
to C/R and mm::num_exe_file_vmas become meaningless after that.
To minimize the amount of transition the /proc/pid/exe symlink might have,
this feature is implemented in one-shot manner. Thus once changed the
symlink can't be changed again. This should help sysadmins to monitor the
symlinks over all process running in a system.
In particular one could make a snapshot of processes and ring alarm if
there unexpected changes of /proc/pid/exe's in a system.
Note -- this feature is available iif CONFIG_CHECKPOINT_RESTORE is set and
the caller must have CAP_SYS_RESOURCE capability granted, otherwise the
request to change symlink will be rejected.
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Matt Helsley <matthltc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sys.c | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/kernel/sys.c b/kernel/sys.c index 8b544972e46e..9ff89cb9657a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -36,6 +36,8 @@ | |||
36 | #include <linux/personality.h> | 36 | #include <linux/personality.h> |
37 | #include <linux/ptrace.h> | 37 | #include <linux/ptrace.h> |
38 | #include <linux/fs_struct.h> | 38 | #include <linux/fs_struct.h> |
39 | #include <linux/file.h> | ||
40 | #include <linux/mount.h> | ||
39 | #include <linux/gfp.h> | 41 | #include <linux/gfp.h> |
40 | #include <linux/syscore_ops.h> | 42 | #include <linux/syscore_ops.h> |
41 | #include <linux/version.h> | 43 | #include <linux/version.h> |
@@ -1792,6 +1794,57 @@ static bool vma_flags_mismatch(struct vm_area_struct *vma, | |||
1792 | (vma->vm_flags & banned); | 1794 | (vma->vm_flags & banned); |
1793 | } | 1795 | } |
1794 | 1796 | ||
1797 | static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | ||
1798 | { | ||
1799 | struct file *exe_file; | ||
1800 | struct dentry *dentry; | ||
1801 | int err; | ||
1802 | |||
1803 | /* | ||
1804 | * Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's | ||
1805 | * remain. So perform a quick test first. | ||
1806 | */ | ||
1807 | if (mm->num_exe_file_vmas) | ||
1808 | return -EBUSY; | ||
1809 | |||
1810 | exe_file = fget(fd); | ||
1811 | if (!exe_file) | ||
1812 | return -EBADF; | ||
1813 | |||
1814 | dentry = exe_file->f_path.dentry; | ||
1815 | |||
1816 | /* | ||
1817 | * Because the original mm->exe_file points to executable file, make | ||
1818 | * sure that this one is executable as well, to avoid breaking an | ||
1819 | * overall picture. | ||
1820 | */ | ||
1821 | err = -EACCES; | ||
1822 | if (!S_ISREG(dentry->d_inode->i_mode) || | ||
1823 | exe_file->f_path.mnt->mnt_flags & MNT_NOEXEC) | ||
1824 | goto exit; | ||
1825 | |||
1826 | err = inode_permission(dentry->d_inode, MAY_EXEC); | ||
1827 | if (err) | ||
1828 | goto exit; | ||
1829 | |||
1830 | /* | ||
1831 | * The symlink can be changed only once, just to disallow arbitrary | ||
1832 | * transitions malicious software might bring in. This means one | ||
1833 | * could make a snapshot over all processes running and monitor | ||
1834 | * /proc/pid/exe changes to notice unusual activity if needed. | ||
1835 | */ | ||
1836 | down_write(&mm->mmap_sem); | ||
1837 | if (likely(!mm->exe_file)) | ||
1838 | set_mm_exe_file(mm, exe_file); | ||
1839 | else | ||
1840 | err = -EBUSY; | ||
1841 | up_write(&mm->mmap_sem); | ||
1842 | |||
1843 | exit: | ||
1844 | fput(exe_file); | ||
1845 | return err; | ||
1846 | } | ||
1847 | |||
1795 | static int prctl_set_mm(int opt, unsigned long addr, | 1848 | static int prctl_set_mm(int opt, unsigned long addr, |
1796 | unsigned long arg4, unsigned long arg5) | 1849 | unsigned long arg4, unsigned long arg5) |
1797 | { | 1850 | { |
@@ -1806,6 +1859,9 @@ static int prctl_set_mm(int opt, unsigned long addr, | |||
1806 | if (!capable(CAP_SYS_RESOURCE)) | 1859 | if (!capable(CAP_SYS_RESOURCE)) |
1807 | return -EPERM; | 1860 | return -EPERM; |
1808 | 1861 | ||
1862 | if (opt == PR_SET_MM_EXE_FILE) | ||
1863 | return prctl_set_mm_exe_file(mm, (unsigned int)addr); | ||
1864 | |||
1809 | if (addr >= TASK_SIZE) | 1865 | if (addr >= TASK_SIZE) |
1810 | return -EINVAL; | 1866 | return -EINVAL; |
1811 | 1867 | ||