diff options
Diffstat (limited to 'kernel/sys.c')
-rw-r--r-- | kernel/sys.c | 190 |
1 files changed, 189 insertions, 1 deletions
diff --git a/kernel/sys.c b/kernel/sys.c index 14222a1699c0..f7030b060018 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1687,6 +1687,187 @@ exit: | |||
1687 | return err; | 1687 | return err; |
1688 | } | 1688 | } |
1689 | 1689 | ||
1690 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
1691 | /* | ||
1692 | * WARNING: we don't require any capability here so be very careful | ||
1693 | * in what is allowed for modification from userspace. | ||
1694 | */ | ||
1695 | static int validate_prctl_map(struct prctl_mm_map *prctl_map) | ||
1696 | { | ||
1697 | unsigned long mmap_max_addr = TASK_SIZE; | ||
1698 | struct mm_struct *mm = current->mm; | ||
1699 | int error = -EINVAL, i; | ||
1700 | |||
1701 | static const unsigned char offsets[] = { | ||
1702 | offsetof(struct prctl_mm_map, start_code), | ||
1703 | offsetof(struct prctl_mm_map, end_code), | ||
1704 | offsetof(struct prctl_mm_map, start_data), | ||
1705 | offsetof(struct prctl_mm_map, end_data), | ||
1706 | offsetof(struct prctl_mm_map, start_brk), | ||
1707 | offsetof(struct prctl_mm_map, brk), | ||
1708 | offsetof(struct prctl_mm_map, start_stack), | ||
1709 | offsetof(struct prctl_mm_map, arg_start), | ||
1710 | offsetof(struct prctl_mm_map, arg_end), | ||
1711 | offsetof(struct prctl_mm_map, env_start), | ||
1712 | offsetof(struct prctl_mm_map, env_end), | ||
1713 | }; | ||
1714 | |||
1715 | /* | ||
1716 | * Make sure the members are not somewhere outside | ||
1717 | * of allowed address space. | ||
1718 | */ | ||
1719 | for (i = 0; i < ARRAY_SIZE(offsets); i++) { | ||
1720 | u64 val = *(u64 *)((char *)prctl_map + offsets[i]); | ||
1721 | |||
1722 | if ((unsigned long)val >= mmap_max_addr || | ||
1723 | (unsigned long)val < mmap_min_addr) | ||
1724 | goto out; | ||
1725 | } | ||
1726 | |||
1727 | /* | ||
1728 | * Make sure the pairs are ordered. | ||
1729 | */ | ||
1730 | #define __prctl_check_order(__m1, __op, __m2) \ | ||
1731 | ((unsigned long)prctl_map->__m1 __op \ | ||
1732 | (unsigned long)prctl_map->__m2) ? 0 : -EINVAL | ||
1733 | error = __prctl_check_order(start_code, <, end_code); | ||
1734 | error |= __prctl_check_order(start_data, <, end_data); | ||
1735 | error |= __prctl_check_order(start_brk, <=, brk); | ||
1736 | error |= __prctl_check_order(arg_start, <=, arg_end); | ||
1737 | error |= __prctl_check_order(env_start, <=, env_end); | ||
1738 | if (error) | ||
1739 | goto out; | ||
1740 | #undef __prctl_check_order | ||
1741 | |||
1742 | error = -EINVAL; | ||
1743 | |||
1744 | /* | ||
1745 | * @brk should be after @end_data in traditional maps. | ||
1746 | */ | ||
1747 | if (prctl_map->start_brk <= prctl_map->end_data || | ||
1748 | prctl_map->brk <= prctl_map->end_data) | ||
1749 | goto out; | ||
1750 | |||
1751 | /* | ||
1752 | * Neither we should allow to override limits if they set. | ||
1753 | */ | ||
1754 | if (check_data_rlimit(rlimit(RLIMIT_DATA), prctl_map->brk, | ||
1755 | prctl_map->start_brk, prctl_map->end_data, | ||
1756 | prctl_map->start_data)) | ||
1757 | goto out; | ||
1758 | |||
1759 | /* | ||
1760 | * Someone is trying to cheat the auxv vector. | ||
1761 | */ | ||
1762 | if (prctl_map->auxv_size) { | ||
1763 | if (!prctl_map->auxv || prctl_map->auxv_size > sizeof(mm->saved_auxv)) | ||
1764 | goto out; | ||
1765 | } | ||
1766 | |||
1767 | /* | ||
1768 | * Finally, make sure the caller has the rights to | ||
1769 | * change /proc/pid/exe link: only local root should | ||
1770 | * be allowed to. | ||
1771 | */ | ||
1772 | if (prctl_map->exe_fd != (u32)-1) { | ||
1773 | struct user_namespace *ns = current_user_ns(); | ||
1774 | const struct cred *cred = current_cred(); | ||
1775 | |||
1776 | if (!uid_eq(cred->uid, make_kuid(ns, 0)) || | ||
1777 | !gid_eq(cred->gid, make_kgid(ns, 0))) | ||
1778 | goto out; | ||
1779 | } | ||
1780 | |||
1781 | error = 0; | ||
1782 | out: | ||
1783 | return error; | ||
1784 | } | ||
1785 | |||
1786 | static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data_size) | ||
1787 | { | ||
1788 | struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, }; | ||
1789 | unsigned long user_auxv[AT_VECTOR_SIZE]; | ||
1790 | struct mm_struct *mm = current->mm; | ||
1791 | int error; | ||
1792 | |||
1793 | BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); | ||
1794 | BUILD_BUG_ON(sizeof(struct prctl_mm_map) > 256); | ||
1795 | |||
1796 | if (opt == PR_SET_MM_MAP_SIZE) | ||
1797 | return put_user((unsigned int)sizeof(prctl_map), | ||
1798 | (unsigned int __user *)addr); | ||
1799 | |||
1800 | if (data_size != sizeof(prctl_map)) | ||
1801 | return -EINVAL; | ||
1802 | |||
1803 | if (copy_from_user(&prctl_map, addr, sizeof(prctl_map))) | ||
1804 | return -EFAULT; | ||
1805 | |||
1806 | error = validate_prctl_map(&prctl_map); | ||
1807 | if (error) | ||
1808 | return error; | ||
1809 | |||
1810 | if (prctl_map.auxv_size) { | ||
1811 | memset(user_auxv, 0, sizeof(user_auxv)); | ||
1812 | if (copy_from_user(user_auxv, | ||
1813 | (const void __user *)prctl_map.auxv, | ||
1814 | prctl_map.auxv_size)) | ||
1815 | return -EFAULT; | ||
1816 | |||
1817 | /* Last entry must be AT_NULL as specification requires */ | ||
1818 | user_auxv[AT_VECTOR_SIZE - 2] = AT_NULL; | ||
1819 | user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL; | ||
1820 | } | ||
1821 | |||
1822 | down_write(&mm->mmap_sem); | ||
1823 | if (prctl_map.exe_fd != (u32)-1) | ||
1824 | error = prctl_set_mm_exe_file_locked(mm, prctl_map.exe_fd); | ||
1825 | downgrade_write(&mm->mmap_sem); | ||
1826 | if (error) | ||
1827 | goto out; | ||
1828 | |||
1829 | /* | ||
1830 | * We don't validate if these members are pointing to | ||
1831 | * real present VMAs because application may have correspond | ||
1832 | * VMAs already unmapped and kernel uses these members for statistics | ||
1833 | * output in procfs mostly, except | ||
1834 | * | ||
1835 | * - @start_brk/@brk which are used in do_brk but kernel lookups | ||
1836 | * for VMAs when updating these memvers so anything wrong written | ||
1837 | * here cause kernel to swear at userspace program but won't lead | ||
1838 | * to any problem in kernel itself | ||
1839 | */ | ||
1840 | |||
1841 | mm->start_code = prctl_map.start_code; | ||
1842 | mm->end_code = prctl_map.end_code; | ||
1843 | mm->start_data = prctl_map.start_data; | ||
1844 | mm->end_data = prctl_map.end_data; | ||
1845 | mm->start_brk = prctl_map.start_brk; | ||
1846 | mm->brk = prctl_map.brk; | ||
1847 | mm->start_stack = prctl_map.start_stack; | ||
1848 | mm->arg_start = prctl_map.arg_start; | ||
1849 | mm->arg_end = prctl_map.arg_end; | ||
1850 | mm->env_start = prctl_map.env_start; | ||
1851 | mm->env_end = prctl_map.env_end; | ||
1852 | |||
1853 | /* | ||
1854 | * Note this update of @saved_auxv is lockless thus | ||
1855 | * if someone reads this member in procfs while we're | ||
1856 | * updating -- it may get partly updated results. It's | ||
1857 | * known and acceptable trade off: we leave it as is to | ||
1858 | * not introduce additional locks here making the kernel | ||
1859 | * more complex. | ||
1860 | */ | ||
1861 | if (prctl_map.auxv_size) | ||
1862 | memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv)); | ||
1863 | |||
1864 | error = 0; | ||
1865 | out: | ||
1866 | up_read(&mm->mmap_sem); | ||
1867 | return error; | ||
1868 | } | ||
1869 | #endif /* CONFIG_CHECKPOINT_RESTORE */ | ||
1870 | |||
1690 | static int prctl_set_mm(int opt, unsigned long addr, | 1871 | static int prctl_set_mm(int opt, unsigned long addr, |
1691 | unsigned long arg4, unsigned long arg5) | 1872 | unsigned long arg4, unsigned long arg5) |
1692 | { | 1873 | { |
@@ -1694,9 +1875,16 @@ static int prctl_set_mm(int opt, unsigned long addr, | |||
1694 | struct vm_area_struct *vma; | 1875 | struct vm_area_struct *vma; |
1695 | int error; | 1876 | int error; |
1696 | 1877 | ||
1697 | if (arg5 || (arg4 && opt != PR_SET_MM_AUXV)) | 1878 | if (arg5 || (arg4 && (opt != PR_SET_MM_AUXV && |
1879 | opt != PR_SET_MM_MAP && | ||
1880 | opt != PR_SET_MM_MAP_SIZE))) | ||
1698 | return -EINVAL; | 1881 | return -EINVAL; |
1699 | 1882 | ||
1883 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
1884 | if (opt == PR_SET_MM_MAP || opt == PR_SET_MM_MAP_SIZE) | ||
1885 | return prctl_set_mm_map(opt, (const void __user *)addr, arg4); | ||
1886 | #endif | ||
1887 | |||
1700 | if (!capable(CAP_SYS_RESOURCE)) | 1888 | if (!capable(CAP_SYS_RESOURCE)) |
1701 | return -EPERM; | 1889 | return -EPERM; |
1702 | 1890 | ||