aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/uapi/linux/prctl.h27
-rw-r--r--kernel/sys.c190
2 files changed, 216 insertions, 1 deletions
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 58afc04c107e..513df75d0fc9 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -1,6 +1,8 @@
1#ifndef _LINUX_PRCTL_H 1#ifndef _LINUX_PRCTL_H
2#define _LINUX_PRCTL_H 2#define _LINUX_PRCTL_H
3 3
4#include <linux/types.h>
5
4/* Values to pass as first argument to prctl() */ 6/* Values to pass as first argument to prctl() */
5 7
6#define PR_SET_PDEATHSIG 1 /* Second arg is a signal */ 8#define PR_SET_PDEATHSIG 1 /* Second arg is a signal */
@@ -119,6 +121,31 @@
119# define PR_SET_MM_ENV_END 11 121# define PR_SET_MM_ENV_END 11
120# define PR_SET_MM_AUXV 12 122# define PR_SET_MM_AUXV 12
121# define PR_SET_MM_EXE_FILE 13 123# define PR_SET_MM_EXE_FILE 13
124# define PR_SET_MM_MAP 14
125# define PR_SET_MM_MAP_SIZE 15
126
127/*
128 * This structure provides new memory descriptor
129 * map which mostly modifies /proc/pid/stat[m]
130 * output for a task. This mostly done in a
131 * sake of checkpoint/restore functionality.
132 */
133struct prctl_mm_map {
134 __u64 start_code; /* code section bounds */
135 __u64 end_code;
136 __u64 start_data; /* data section bounds */
137 __u64 end_data;
138 __u64 start_brk; /* heap for brk() syscall */
139 __u64 brk;
140 __u64 start_stack; /* stack starts at */
141 __u64 arg_start; /* command line arguments bounds */
142 __u64 arg_end;
143 __u64 env_start; /* environment variables bounds */
144 __u64 env_end;
145 __u64 *auxv; /* auxiliary vector */
146 __u32 auxv_size; /* vector size */
147 __u32 exe_fd; /* /proc/$pid/exe link file */
148};
122 149
123/* 150/*
124 * Set specific pid that is allowed to ptrace the current task. 151 * Set specific pid that is allowed to ptrace the current task.
diff --git a/kernel/sys.c b/kernel/sys.c
index 14222a1699c0..f7030b060018 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1687,6 +1687,187 @@ exit:
1687 return err; 1687 return err;
1688} 1688}
1689 1689
1690#ifdef CONFIG_CHECKPOINT_RESTORE
1691/*
1692 * WARNING: we don't require any capability here so be very careful
1693 * in what is allowed for modification from userspace.
1694 */
1695static int validate_prctl_map(struct prctl_mm_map *prctl_map)
1696{
1697 unsigned long mmap_max_addr = TASK_SIZE;
1698 struct mm_struct *mm = current->mm;
1699 int error = -EINVAL, i;
1700
1701 static const unsigned char offsets[] = {
1702 offsetof(struct prctl_mm_map, start_code),
1703 offsetof(struct prctl_mm_map, end_code),
1704 offsetof(struct prctl_mm_map, start_data),
1705 offsetof(struct prctl_mm_map, end_data),
1706 offsetof(struct prctl_mm_map, start_brk),
1707 offsetof(struct prctl_mm_map, brk),
1708 offsetof(struct prctl_mm_map, start_stack),
1709 offsetof(struct prctl_mm_map, arg_start),
1710 offsetof(struct prctl_mm_map, arg_end),
1711 offsetof(struct prctl_mm_map, env_start),
1712 offsetof(struct prctl_mm_map, env_end),
1713 };
1714
1715 /*
1716 * Make sure the members are not somewhere outside
1717 * of allowed address space.
1718 */
1719 for (i = 0; i < ARRAY_SIZE(offsets); i++) {
1720 u64 val = *(u64 *)((char *)prctl_map + offsets[i]);
1721
1722 if ((unsigned long)val >= mmap_max_addr ||
1723 (unsigned long)val < mmap_min_addr)
1724 goto out;
1725 }
1726
1727 /*
1728 * Make sure the pairs are ordered.
1729 */
1730#define __prctl_check_order(__m1, __op, __m2) \
1731 ((unsigned long)prctl_map->__m1 __op \
1732 (unsigned long)prctl_map->__m2) ? 0 : -EINVAL
1733 error = __prctl_check_order(start_code, <, end_code);
1734 error |= __prctl_check_order(start_data, <, end_data);
1735 error |= __prctl_check_order(start_brk, <=, brk);
1736 error |= __prctl_check_order(arg_start, <=, arg_end);
1737 error |= __prctl_check_order(env_start, <=, env_end);
1738 if (error)
1739 goto out;
1740#undef __prctl_check_order
1741
1742 error = -EINVAL;
1743
1744 /*
1745 * @brk should be after @end_data in traditional maps.
1746 */
1747 if (prctl_map->start_brk <= prctl_map->end_data ||
1748 prctl_map->brk <= prctl_map->end_data)
1749 goto out;
1750
1751 /*
1752 * Neither we should allow to override limits if they set.
1753 */
1754 if (check_data_rlimit(rlimit(RLIMIT_DATA), prctl_map->brk,
1755 prctl_map->start_brk, prctl_map->end_data,
1756 prctl_map->start_data))
1757 goto out;
1758
1759 /*
1760 * Someone is trying to cheat the auxv vector.
1761 */
1762 if (prctl_map->auxv_size) {
1763 if (!prctl_map->auxv || prctl_map->auxv_size > sizeof(mm->saved_auxv))
1764 goto out;
1765 }
1766
1767 /*
1768 * Finally, make sure the caller has the rights to
1769 * change /proc/pid/exe link: only local root should
1770 * be allowed to.
1771 */
1772 if (prctl_map->exe_fd != (u32)-1) {
1773 struct user_namespace *ns = current_user_ns();
1774 const struct cred *cred = current_cred();
1775
1776 if (!uid_eq(cred->uid, make_kuid(ns, 0)) ||
1777 !gid_eq(cred->gid, make_kgid(ns, 0)))
1778 goto out;
1779 }
1780
1781 error = 0;
1782out:
1783 return error;
1784}
1785
1786static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data_size)
1787{
1788 struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, };
1789 unsigned long user_auxv[AT_VECTOR_SIZE];
1790 struct mm_struct *mm = current->mm;
1791 int error;
1792
1793 BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
1794 BUILD_BUG_ON(sizeof(struct prctl_mm_map) > 256);
1795
1796 if (opt == PR_SET_MM_MAP_SIZE)
1797 return put_user((unsigned int)sizeof(prctl_map),
1798 (unsigned int __user *)addr);
1799
1800 if (data_size != sizeof(prctl_map))
1801 return -EINVAL;
1802
1803 if (copy_from_user(&prctl_map, addr, sizeof(prctl_map)))
1804 return -EFAULT;
1805
1806 error = validate_prctl_map(&prctl_map);
1807 if (error)
1808 return error;
1809
1810 if (prctl_map.auxv_size) {
1811 memset(user_auxv, 0, sizeof(user_auxv));
1812 if (copy_from_user(user_auxv,
1813 (const void __user *)prctl_map.auxv,
1814 prctl_map.auxv_size))
1815 return -EFAULT;
1816
1817 /* Last entry must be AT_NULL as specification requires */
1818 user_auxv[AT_VECTOR_SIZE - 2] = AT_NULL;
1819 user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL;
1820 }
1821
1822 down_write(&mm->mmap_sem);
1823 if (prctl_map.exe_fd != (u32)-1)
1824 error = prctl_set_mm_exe_file_locked(mm, prctl_map.exe_fd);
1825 downgrade_write(&mm->mmap_sem);
1826 if (error)
1827 goto out;
1828
1829 /*
1830 * We don't validate if these members are pointing to
1831 * real present VMAs because application may have correspond
1832 * VMAs already unmapped and kernel uses these members for statistics
1833 * output in procfs mostly, except
1834 *
1835 * - @start_brk/@brk which are used in do_brk but kernel lookups
1836 * for VMAs when updating these memvers so anything wrong written
1837 * here cause kernel to swear at userspace program but won't lead
1838 * to any problem in kernel itself
1839 */
1840
1841 mm->start_code = prctl_map.start_code;
1842 mm->end_code = prctl_map.end_code;
1843 mm->start_data = prctl_map.start_data;
1844 mm->end_data = prctl_map.end_data;
1845 mm->start_brk = prctl_map.start_brk;
1846 mm->brk = prctl_map.brk;
1847 mm->start_stack = prctl_map.start_stack;
1848 mm->arg_start = prctl_map.arg_start;
1849 mm->arg_end = prctl_map.arg_end;
1850 mm->env_start = prctl_map.env_start;
1851 mm->env_end = prctl_map.env_end;
1852
1853 /*
1854 * Note this update of @saved_auxv is lockless thus
1855 * if someone reads this member in procfs while we're
1856 * updating -- it may get partly updated results. It's
1857 * known and acceptable trade off: we leave it as is to
1858 * not introduce additional locks here making the kernel
1859 * more complex.
1860 */
1861 if (prctl_map.auxv_size)
1862 memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv));
1863
1864 error = 0;
1865out:
1866 up_read(&mm->mmap_sem);
1867 return error;
1868}
1869#endif /* CONFIG_CHECKPOINT_RESTORE */
1870
1690static int prctl_set_mm(int opt, unsigned long addr, 1871static int prctl_set_mm(int opt, unsigned long addr,
1691 unsigned long arg4, unsigned long arg5) 1872 unsigned long arg4, unsigned long arg5)
1692{ 1873{
@@ -1694,9 +1875,16 @@ static int prctl_set_mm(int opt, unsigned long addr,
1694 struct vm_area_struct *vma; 1875 struct vm_area_struct *vma;
1695 int error; 1876 int error;
1696 1877
1697 if (arg5 || (arg4 && opt != PR_SET_MM_AUXV)) 1878 if (arg5 || (arg4 && (opt != PR_SET_MM_AUXV &&
1879 opt != PR_SET_MM_MAP &&
1880 opt != PR_SET_MM_MAP_SIZE)))
1698 return -EINVAL; 1881 return -EINVAL;
1699 1882
1883#ifdef CONFIG_CHECKPOINT_RESTORE
1884 if (opt == PR_SET_MM_MAP || opt == PR_SET_MM_MAP_SIZE)
1885 return prctl_set_mm_map(opt, (const void __user *)addr, arg4);
1886#endif
1887
1700 if (!capable(CAP_SYS_RESOURCE)) 1888 if (!capable(CAP_SYS_RESOURCE))
1701 return -EPERM; 1889 return -EPERM;
1702 1890