aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCyrill Gorcunov <gorcunov@openvz.org>2012-05-31 19:26:45 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-31 20:49:32 -0400
commitfe8c7f5cbf91124987106faa3bdf0c8b955c4cf7 (patch)
tree3ddb0013465d7cd09ef6bc2fab18cb59055a309b
parent5b172087f99189416d5f47fd7ab5e6fb762a9ba3 (diff)
c/r: prctl: extend PR_SET_MM to set up more mm_struct entries
During checkpoint we dump whole process memory to a file and the dump includes process stack memory. But among stack data itself, the stack carries additional parameters such as command line arguments, environment data and auxiliary vector. So when we do restore procedure and once we've restored stack data itself we need to setup mm_struct::arg_start/end, env_start/end, so restored process would be able to find command line arguments and environment data it had at checkpoint time. The same applies to auxiliary vector. For this reason additional PR_SET_MM_(ARG_START | ARG_END | ENV_START | ENV_END | AUXV) codes are introduced. Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Acked-by: Kees Cook <keescook@chromium.org> Cc: Tejun Heo <tj@kernel.org> Cc: Andrew Vagin <avagin@openvz.org> Cc: Serge Hallyn <serge.hallyn@canonical.com> Cc: Pavel Emelyanov <xemul@parallels.com> Cc: Vasiliy Kulikov <segoon@openwall.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/prctl.h5
-rw-r--r--kernel/sys.c134
2 files changed, 88 insertions, 51 deletions
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index 78b76e24cc7e..18d84c4b42d8 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -113,6 +113,11 @@
113# define PR_SET_MM_START_STACK 5 113# define PR_SET_MM_START_STACK 5
114# define PR_SET_MM_START_BRK 6 114# define PR_SET_MM_START_BRK 6
115# define PR_SET_MM_BRK 7 115# define PR_SET_MM_BRK 7
116# define PR_SET_MM_ARG_START 8
117# define PR_SET_MM_ARG_END 9
118# define PR_SET_MM_ENV_START 10
119# define PR_SET_MM_ENV_END 11
120# define PR_SET_MM_AUXV 12
116 121
117/* 122/*
118 * Set specific pid that is allowed to ptrace the current task. 123 * Set specific pid that is allowed to ptrace the current task.
diff --git a/kernel/sys.c b/kernel/sys.c
index 6e81aa7e4688..8b544972e46e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1784,17 +1784,23 @@ SYSCALL_DEFINE1(umask, int, mask)
1784} 1784}
1785 1785
1786#ifdef CONFIG_CHECKPOINT_RESTORE 1786#ifdef CONFIG_CHECKPOINT_RESTORE
1787static bool vma_flags_mismatch(struct vm_area_struct *vma,
1788 unsigned long required,
1789 unsigned long banned)
1790{
1791 return (vma->vm_flags & required) != required ||
1792 (vma->vm_flags & banned);
1793}
1794
1787static int prctl_set_mm(int opt, unsigned long addr, 1795static int prctl_set_mm(int opt, unsigned long addr,
1788 unsigned long arg4, unsigned long arg5) 1796 unsigned long arg4, unsigned long arg5)
1789{ 1797{
1790 unsigned long rlim = rlimit(RLIMIT_DATA); 1798 unsigned long rlim = rlimit(RLIMIT_DATA);
1791 unsigned long vm_req_flags;
1792 unsigned long vm_bad_flags;
1793 struct vm_area_struct *vma;
1794 int error = 0;
1795 struct mm_struct *mm = current->mm; 1799 struct mm_struct *mm = current->mm;
1800 struct vm_area_struct *vma;
1801 int error;
1796 1802
1797 if (arg4 | arg5) 1803 if (arg5 || (arg4 && opt != PR_SET_MM_AUXV))
1798 return -EINVAL; 1804 return -EINVAL;
1799 1805
1800 if (!capable(CAP_SYS_RESOURCE)) 1806 if (!capable(CAP_SYS_RESOURCE))
@@ -1803,58 +1809,23 @@ static int prctl_set_mm(int opt, unsigned long addr,
1803 if (addr >= TASK_SIZE) 1809 if (addr >= TASK_SIZE)
1804 return -EINVAL; 1810 return -EINVAL;
1805 1811
1812 error = -EINVAL;
1813
1806 down_read(&mm->mmap_sem); 1814 down_read(&mm->mmap_sem);
1807 vma = find_vma(mm, addr); 1815 vma = find_vma(mm, addr);
1808 1816
1809 if (opt != PR_SET_MM_START_BRK && opt != PR_SET_MM_BRK) {
1810 /* It must be existing VMA */
1811 if (!vma || vma->vm_start > addr)
1812 goto out;
1813 }
1814
1815 error = -EINVAL;
1816 switch (opt) { 1817 switch (opt) {
1817 case PR_SET_MM_START_CODE: 1818 case PR_SET_MM_START_CODE:
1819 mm->start_code = addr;
1820 break;
1818 case PR_SET_MM_END_CODE: 1821 case PR_SET_MM_END_CODE:
1819 vm_req_flags = VM_READ | VM_EXEC; 1822 mm->end_code = addr;
1820 vm_bad_flags = VM_WRITE | VM_MAYSHARE;
1821
1822 if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
1823 (vma->vm_flags & vm_bad_flags))
1824 goto out;
1825
1826 if (opt == PR_SET_MM_START_CODE)
1827 mm->start_code = addr;
1828 else
1829 mm->end_code = addr;
1830 break; 1823 break;
1831
1832 case PR_SET_MM_START_DATA: 1824 case PR_SET_MM_START_DATA:
1833 case PR_SET_MM_END_DATA: 1825 mm->start_data = addr;
1834 vm_req_flags = VM_READ | VM_WRITE;
1835 vm_bad_flags = VM_EXEC | VM_MAYSHARE;
1836
1837 if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
1838 (vma->vm_flags & vm_bad_flags))
1839 goto out;
1840
1841 if (opt == PR_SET_MM_START_DATA)
1842 mm->start_data = addr;
1843 else
1844 mm->end_data = addr;
1845 break; 1826 break;
1846 1827 case PR_SET_MM_END_DATA:
1847 case PR_SET_MM_START_STACK: 1828 mm->end_data = addr;
1848
1849#ifdef CONFIG_STACK_GROWSUP
1850 vm_req_flags = VM_READ | VM_WRITE | VM_GROWSUP;
1851#else
1852 vm_req_flags = VM_READ | VM_WRITE | VM_GROWSDOWN;
1853#endif
1854 if ((vma->vm_flags & vm_req_flags) != vm_req_flags)
1855 goto out;
1856
1857 mm->start_stack = addr;
1858 break; 1829 break;
1859 1830
1860 case PR_SET_MM_START_BRK: 1831 case PR_SET_MM_START_BRK:
@@ -1881,16 +1852,77 @@ static int prctl_set_mm(int opt, unsigned long addr,
1881 mm->brk = addr; 1852 mm->brk = addr;
1882 break; 1853 break;
1883 1854
1855 /*
1856 * If command line arguments and environment
1857 * are placed somewhere else on stack, we can
1858 * set them up here, ARG_START/END to setup
1859 * command line argumets and ENV_START/END
1860 * for environment.
1861 */
1862 case PR_SET_MM_START_STACK:
1863 case PR_SET_MM_ARG_START:
1864 case PR_SET_MM_ARG_END:
1865 case PR_SET_MM_ENV_START:
1866 case PR_SET_MM_ENV_END:
1867 if (!vma) {
1868 error = -EFAULT;
1869 goto out;
1870 }
1871#ifdef CONFIG_STACK_GROWSUP
1872 if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSUP, 0))
1873#else
1874 if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSDOWN, 0))
1875#endif
1876 goto out;
1877 if (opt == PR_SET_MM_START_STACK)
1878 mm->start_stack = addr;
1879 else if (opt == PR_SET_MM_ARG_START)
1880 mm->arg_start = addr;
1881 else if (opt == PR_SET_MM_ARG_END)
1882 mm->arg_end = addr;
1883 else if (opt == PR_SET_MM_ENV_START)
1884 mm->env_start = addr;
1885 else if (opt == PR_SET_MM_ENV_END)
1886 mm->env_end = addr;
1887 break;
1888
1889 /*
1890 * This doesn't move auxiliary vector itself
1891 * since it's pinned to mm_struct, but allow
1892 * to fill vector with new values. It's up
1893 * to a caller to provide sane values here
1894 * otherwise user space tools which use this
1895 * vector might be unhappy.
1896 */
1897 case PR_SET_MM_AUXV: {
1898 unsigned long user_auxv[AT_VECTOR_SIZE];
1899
1900 if (arg4 > sizeof(user_auxv))
1901 goto out;
1902 up_read(&mm->mmap_sem);
1903
1904 if (copy_from_user(user_auxv, (const void __user *)addr, arg4))
1905 return -EFAULT;
1906
1907 /* Make sure the last entry is always AT_NULL */
1908 user_auxv[AT_VECTOR_SIZE - 2] = 0;
1909 user_auxv[AT_VECTOR_SIZE - 1] = 0;
1910
1911 BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
1912
1913 task_lock(current);
1914 memcpy(mm->saved_auxv, user_auxv, arg4);
1915 task_unlock(current);
1916
1917 return 0;
1918 }
1884 default: 1919 default:
1885 error = -EINVAL;
1886 goto out; 1920 goto out;
1887 } 1921 }
1888 1922
1889 error = 0; 1923 error = 0;
1890
1891out: 1924out:
1892 up_read(&mm->mmap_sem); 1925 up_read(&mm->mmap_sem);
1893
1894 return error; 1926 return error;
1895} 1927}
1896#else /* CONFIG_CHECKPOINT_RESTORE */ 1928#else /* CONFIG_CHECKPOINT_RESTORE */