diff options
author | Davidlohr Bueso <dave@stgolabs.net> | 2015-04-16 15:47:59 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-17 09:04:07 -0400 |
commit | 6e399cd144d8500ffb5d40fa6848890e2580a80a (patch) | |
tree | 930e8ec3664bc6b14c562f38acdc511ea7e938b8 /kernel/sys.c | |
parent | 90f31d0ea88880f780574f3d0bb1a227c4c66ca3 (diff) |
prctl: avoid using mmap_sem for exe_file serialization
Oleg cleverly suggested using xchg() to set the new mm->exe_file instead
of calling set_mm_exe_file() which requires some form of serialization --
mmap_sem in this case. For archs that do not have atomic rmw instructions
we still fallback to a spinlock alternative, so this should always be
safe. As such, we only need the mmap_sem for looking up the backing
vm_file, which can be done sharing the lock. Naturally, this means we
need to manually deal with both the new and old file reference counting,
and we need not worry about the MMF_EXE_FILE_CHANGED bits, which can
probably be deleted in the future anyway.
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Suggested-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/sys.c')
-rw-r--r-- | kernel/sys.c | 47 |
1 files changed, 28 insertions, 19 deletions
diff --git a/kernel/sys.c b/kernel/sys.c index 3be344902316..a4e372b798a5 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1649,14 +1649,13 @@ SYSCALL_DEFINE1(umask, int, mask) | |||
1649 | return mask; | 1649 | return mask; |
1650 | } | 1650 | } |
1651 | 1651 | ||
1652 | static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd) | 1652 | static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) |
1653 | { | 1653 | { |
1654 | struct fd exe; | 1654 | struct fd exe; |
1655 | struct file *old_exe, *exe_file; | ||
1655 | struct inode *inode; | 1656 | struct inode *inode; |
1656 | int err; | 1657 | int err; |
1657 | 1658 | ||
1658 | VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm); | ||
1659 | |||
1660 | exe = fdget(fd); | 1659 | exe = fdget(fd); |
1661 | if (!exe.file) | 1660 | if (!exe.file) |
1662 | return -EBADF; | 1661 | return -EBADF; |
@@ -1680,15 +1679,22 @@ static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd) | |||
1680 | /* | 1679 | /* |
1681 | * Forbid mm->exe_file change if old file still mapped. | 1680 | * Forbid mm->exe_file change if old file still mapped. |
1682 | */ | 1681 | */ |
1682 | exe_file = get_mm_exe_file(mm); | ||
1683 | err = -EBUSY; | 1683 | err = -EBUSY; |
1684 | if (mm->exe_file) { | 1684 | if (exe_file) { |
1685 | struct vm_area_struct *vma; | 1685 | struct vm_area_struct *vma; |
1686 | 1686 | ||
1687 | for (vma = mm->mmap; vma; vma = vma->vm_next) | 1687 | down_read(&mm->mmap_sem); |
1688 | if (vma->vm_file && | 1688 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
1689 | path_equal(&vma->vm_file->f_path, | 1689 | if (!vma->vm_file) |
1690 | &mm->exe_file->f_path)) | 1690 | continue; |
1691 | goto exit; | 1691 | if (path_equal(&vma->vm_file->f_path, |
1692 | &exe_file->f_path)) | ||
1693 | goto exit_err; | ||
1694 | } | ||
1695 | |||
1696 | up_read(&mm->mmap_sem); | ||
1697 | fput(exe_file); | ||
1692 | } | 1698 | } |
1693 | 1699 | ||
1694 | /* | 1700 | /* |
@@ -1702,10 +1708,18 @@ static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd) | |||
1702 | goto exit; | 1708 | goto exit; |
1703 | 1709 | ||
1704 | err = 0; | 1710 | err = 0; |
1705 | set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */ | 1711 | /* set the new file, lockless */ |
1712 | get_file(exe.file); | ||
1713 | old_exe = xchg(&mm->exe_file, exe.file); | ||
1714 | if (old_exe) | ||
1715 | fput(old_exe); | ||
1706 | exit: | 1716 | exit: |
1707 | fdput(exe); | 1717 | fdput(exe); |
1708 | return err; | 1718 | return err; |
1719 | exit_err: | ||
1720 | up_read(&mm->mmap_sem); | ||
1721 | fput(exe_file); | ||
1722 | goto exit; | ||
1709 | } | 1723 | } |
1710 | 1724 | ||
1711 | #ifdef CONFIG_CHECKPOINT_RESTORE | 1725 | #ifdef CONFIG_CHECKPOINT_RESTORE |
@@ -1840,10 +1854,9 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data | |||
1840 | user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL; | 1854 | user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL; |
1841 | } | 1855 | } |
1842 | 1856 | ||
1843 | down_write(&mm->mmap_sem); | ||
1844 | if (prctl_map.exe_fd != (u32)-1) | 1857 | if (prctl_map.exe_fd != (u32)-1) |
1845 | error = prctl_set_mm_exe_file_locked(mm, prctl_map.exe_fd); | 1858 | error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd); |
1846 | downgrade_write(&mm->mmap_sem); | 1859 | down_read(&mm->mmap_sem); |
1847 | if (error) | 1860 | if (error) |
1848 | goto out; | 1861 | goto out; |
1849 | 1862 | ||
@@ -1909,12 +1922,8 @@ static int prctl_set_mm(int opt, unsigned long addr, | |||
1909 | if (!capable(CAP_SYS_RESOURCE)) | 1922 | if (!capable(CAP_SYS_RESOURCE)) |
1910 | return -EPERM; | 1923 | return -EPERM; |
1911 | 1924 | ||
1912 | if (opt == PR_SET_MM_EXE_FILE) { | 1925 | if (opt == PR_SET_MM_EXE_FILE) |
1913 | down_write(&mm->mmap_sem); | 1926 | return prctl_set_mm_exe_file(mm, (unsigned int)addr); |
1914 | error = prctl_set_mm_exe_file_locked(mm, (unsigned int)addr); | ||
1915 | up_write(&mm->mmap_sem); | ||
1916 | return error; | ||
1917 | } | ||
1918 | 1927 | ||
1919 | if (addr >= TASK_SIZE || addr < mmap_min_addr) | 1928 | if (addr >= TASK_SIZE || addr < mmap_min_addr) |
1920 | return -EINVAL; | 1929 | return -EINVAL; |