aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sys.c
diff options
context:
space:
mode:
authorCyrill Gorcunov <gorcunov@openvz.org>2012-01-12 20:20:55 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-12 23:13:13 -0500
commit028ee4be34a09a6d48bdf30ab991ae933a7bc036 (patch)
tree455783226ceb65aee100f86afed529d4bbde1c8b /kernel/sys.c
parentb3f7f573a20081910e34e99cbc91831f4f02f1ff (diff)
c/r: prctl: add PR_SET_MM codes to set up mm_struct entries
When we restore a task we need to set up text, data and data heap sizes from userspace to the values a task had at checkpoint time. This patch adds auxilary prctl codes for that. While most of them have a statistical nature (their values are involved into calculation of /proc/<pid>/statm output) the start_brk and brk values are used to compute an allowed size of program data segment expansion. Which means an arbitrary changes of this values might be dangerous operation. So to restrict access the following requirements applied to prctl calls: - The process has to have CAP_SYS_ADMIN capability granted. - For all opcodes except start_brk/brk members an appropriate VMA area must exist and should fit certain VMA flags, such as: - code segment must be executable but not writable; - data segment must not be executable. start_brk/brk values must not intersect with data segment and must not exceed RLIMIT_DATA resource limit. Still the main guard is CAP_SYS_ADMIN capability check. Note the kernel should be compiled with CONFIG_CHECKPOINT_RESTORE support otherwise these prctl calls will return -EINVAL. [akpm@linux-foundation.org: cache current->mm in a local, saving 200 bytes text] Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Reviewed-by: Kees Cook <keescook@chromium.org> Cc: Tejun Heo <tj@kernel.org> Cc: Andrew Vagin <avagin@openvz.org> Cc: Serge Hallyn <serge.hallyn@canonical.com> Cc: Pavel Emelyanov <xemul@parallels.com> Cc: Vasiliy Kulikov <segoon@openwall.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/sys.c')
-rw-r--r--kernel/sys.c121
1 files changed, 121 insertions, 0 deletions
diff --git a/kernel/sys.c b/kernel/sys.c
index ddf8155bf3f8..40701538fbd1 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1692,6 +1692,124 @@ SYSCALL_DEFINE1(umask, int, mask)
1692 return mask; 1692 return mask;
1693} 1693}
1694 1694
1695#ifdef CONFIG_CHECKPOINT_RESTORE
1696static int prctl_set_mm(int opt, unsigned long addr,
1697 unsigned long arg4, unsigned long arg5)
1698{
1699 unsigned long rlim = rlimit(RLIMIT_DATA);
1700 unsigned long vm_req_flags;
1701 unsigned long vm_bad_flags;
1702 struct vm_area_struct *vma;
1703 int error = 0;
1704 struct mm_struct *mm = current->mm;
1705
1706 if (arg4 | arg5)
1707 return -EINVAL;
1708
1709 if (!capable(CAP_SYS_ADMIN))
1710 return -EPERM;
1711
1712 if (addr >= TASK_SIZE)
1713 return -EINVAL;
1714
1715 down_read(&mm->mmap_sem);
1716 vma = find_vma(mm, addr);
1717
1718 if (opt != PR_SET_MM_START_BRK && opt != PR_SET_MM_BRK) {
1719 /* It must be existing VMA */
1720 if (!vma || vma->vm_start > addr)
1721 goto out;
1722 }
1723
1724 error = -EINVAL;
1725 switch (opt) {
1726 case PR_SET_MM_START_CODE:
1727 case PR_SET_MM_END_CODE:
1728 vm_req_flags = VM_READ | VM_EXEC;
1729 vm_bad_flags = VM_WRITE | VM_MAYSHARE;
1730
1731 if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
1732 (vma->vm_flags & vm_bad_flags))
1733 goto out;
1734
1735 if (opt == PR_SET_MM_START_CODE)
1736 mm->start_code = addr;
1737 else
1738 mm->end_code = addr;
1739 break;
1740
1741 case PR_SET_MM_START_DATA:
1742 case PR_SET_MM_END_DATA:
1743 vm_req_flags = VM_READ | VM_WRITE;
1744 vm_bad_flags = VM_EXEC | VM_MAYSHARE;
1745
1746 if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
1747 (vma->vm_flags & vm_bad_flags))
1748 goto out;
1749
1750 if (opt == PR_SET_MM_START_DATA)
1751 mm->start_data = addr;
1752 else
1753 mm->end_data = addr;
1754 break;
1755
1756 case PR_SET_MM_START_STACK:
1757
1758#ifdef CONFIG_STACK_GROWSUP
1759 vm_req_flags = VM_READ | VM_WRITE | VM_GROWSUP;
1760#else
1761 vm_req_flags = VM_READ | VM_WRITE | VM_GROWSDOWN;
1762#endif
1763 if ((vma->vm_flags & vm_req_flags) != vm_req_flags)
1764 goto out;
1765
1766 mm->start_stack = addr;
1767 break;
1768
1769 case PR_SET_MM_START_BRK:
1770 if (addr <= mm->end_data)
1771 goto out;
1772
1773 if (rlim < RLIM_INFINITY &&
1774 (mm->brk - addr) +
1775 (mm->end_data - mm->start_data) > rlim)
1776 goto out;
1777
1778 mm->start_brk = addr;
1779 break;
1780
1781 case PR_SET_MM_BRK:
1782 if (addr <= mm->end_data)
1783 goto out;
1784
1785 if (rlim < RLIM_INFINITY &&
1786 (addr - mm->start_brk) +
1787 (mm->end_data - mm->start_data) > rlim)
1788 goto out;
1789
1790 mm->brk = addr;
1791 break;
1792
1793 default:
1794 error = -EINVAL;
1795 goto out;
1796 }
1797
1798 error = 0;
1799
1800out:
1801 up_read(&mm->mmap_sem);
1802
1803 return error;
1804}
1805#else /* CONFIG_CHECKPOINT_RESTORE */
1806static int prctl_set_mm(int opt, unsigned long addr,
1807 unsigned long arg4, unsigned long arg5)
1808{
1809 return -EINVAL;
1810}
1811#endif
1812
1695SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, 1813SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
1696 unsigned long, arg4, unsigned long, arg5) 1814 unsigned long, arg4, unsigned long, arg5)
1697{ 1815{
@@ -1841,6 +1959,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
1841 else 1959 else
1842 error = PR_MCE_KILL_DEFAULT; 1960 error = PR_MCE_KILL_DEFAULT;
1843 break; 1961 break;
1962 case PR_SET_MM:
1963 error = prctl_set_mm(arg2, arg3, arg4, arg5);
1964 break;
1844 default: 1965 default:
1845 error = -EINVAL; 1966 error = -EINVAL;
1846 break; 1967 break;