aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sys.c
diff options
context:
space:
mode:
authorDavidlohr Bueso <dave@stgolabs.net>2015-04-16 15:47:59 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-17 09:04:07 -0400
commit6e399cd144d8500ffb5d40fa6848890e2580a80a (patch)
tree930e8ec3664bc6b14c562f38acdc511ea7e938b8 /kernel/sys.c
parent90f31d0ea88880f780574f3d0bb1a227c4c66ca3 (diff)
prctl: avoid using mmap_sem for exe_file serialization
Oleg cleverly suggested using xchg() to set the new mm->exe_file instead of calling set_mm_exe_file() which requires some form of serialization -- mmap_sem in this case. For archs that do not have atomic rmw instructions we still fallback to a spinlock alternative, so this should always be safe. As such, we only need the mmap_sem for looking up the backing vm_file, which can be done sharing the lock. Naturally, this means we need to manually deal with both the new and old file reference counting, and we need not worry about the MMF_EXE_FILE_CHANGED bits, which can probably be deleted in the future anyway. Signed-off-by: Davidlohr Bueso <dbueso@suse.de> Suggested-by: Oleg Nesterov <oleg@redhat.com> Acked-by: Oleg Nesterov <oleg@redhat.com> Reviewed-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/sys.c')
-rw-r--r--kernel/sys.c47
1 files changed, 28 insertions, 19 deletions
diff --git a/kernel/sys.c b/kernel/sys.c
index 3be344902316..a4e372b798a5 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1649,14 +1649,13 @@ SYSCALL_DEFINE1(umask, int, mask)
1649 return mask; 1649 return mask;
1650} 1650}
1651 1651
1652static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd) 1652static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1653{ 1653{
1654 struct fd exe; 1654 struct fd exe;
1655 struct file *old_exe, *exe_file;
1655 struct inode *inode; 1656 struct inode *inode;
1656 int err; 1657 int err;
1657 1658
1658 VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm);
1659
1660 exe = fdget(fd); 1659 exe = fdget(fd);
1661 if (!exe.file) 1660 if (!exe.file)
1662 return -EBADF; 1661 return -EBADF;
@@ -1680,15 +1679,22 @@ static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd)
1680 /* 1679 /*
1681 * Forbid mm->exe_file change if old file still mapped. 1680 * Forbid mm->exe_file change if old file still mapped.
1682 */ 1681 */
1682 exe_file = get_mm_exe_file(mm);
1683 err = -EBUSY; 1683 err = -EBUSY;
1684 if (mm->exe_file) { 1684 if (exe_file) {
1685 struct vm_area_struct *vma; 1685 struct vm_area_struct *vma;
1686 1686
1687 for (vma = mm->mmap; vma; vma = vma->vm_next) 1687 down_read(&mm->mmap_sem);
1688 if (vma->vm_file && 1688 for (vma = mm->mmap; vma; vma = vma->vm_next) {
1689 path_equal(&vma->vm_file->f_path, 1689 if (!vma->vm_file)
1690 &mm->exe_file->f_path)) 1690 continue;
1691 goto exit; 1691 if (path_equal(&vma->vm_file->f_path,
1692 &exe_file->f_path))
1693 goto exit_err;
1694 }
1695
1696 up_read(&mm->mmap_sem);
1697 fput(exe_file);
1692 } 1698 }
1693 1699
1694 /* 1700 /*
@@ -1702,10 +1708,18 @@ static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd)
1702 goto exit; 1708 goto exit;
1703 1709
1704 err = 0; 1710 err = 0;
1705 set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */ 1711 /* set the new file, lockless */
1712 get_file(exe.file);
1713 old_exe = xchg(&mm->exe_file, exe.file);
1714 if (old_exe)
1715 fput(old_exe);
1706exit: 1716exit:
1707 fdput(exe); 1717 fdput(exe);
1708 return err; 1718 return err;
1719exit_err:
1720 up_read(&mm->mmap_sem);
1721 fput(exe_file);
1722 goto exit;
1709} 1723}
1710 1724
1711#ifdef CONFIG_CHECKPOINT_RESTORE 1725#ifdef CONFIG_CHECKPOINT_RESTORE
@@ -1840,10 +1854,9 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
1840 user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL; 1854 user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL;
1841 } 1855 }
1842 1856
1843 down_write(&mm->mmap_sem);
1844 if (prctl_map.exe_fd != (u32)-1) 1857 if (prctl_map.exe_fd != (u32)-1)
1845 error = prctl_set_mm_exe_file_locked(mm, prctl_map.exe_fd); 1858 error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd);
1846 downgrade_write(&mm->mmap_sem); 1859 down_read(&mm->mmap_sem);
1847 if (error) 1860 if (error)
1848 goto out; 1861 goto out;
1849 1862
@@ -1909,12 +1922,8 @@ static int prctl_set_mm(int opt, unsigned long addr,
1909 if (!capable(CAP_SYS_RESOURCE)) 1922 if (!capable(CAP_SYS_RESOURCE))
1910 return -EPERM; 1923 return -EPERM;
1911 1924
1912 if (opt == PR_SET_MM_EXE_FILE) { 1925 if (opt == PR_SET_MM_EXE_FILE)
1913 down_write(&mm->mmap_sem); 1926 return prctl_set_mm_exe_file(mm, (unsigned int)addr);
1914 error = prctl_set_mm_exe_file_locked(mm, (unsigned int)addr);
1915 up_write(&mm->mmap_sem);
1916 return error;
1917 }
1918 1927
1919 if (addr >= TASK_SIZE || addr < mmap_min_addr) 1928 if (addr >= TASK_SIZE || addr < mmap_min_addr)
1920 return -EINVAL; 1929 return -EINVAL;