aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namei.c
diff options
context:
space:
mode:
authorDave Hansen <haveblue@us.ibm.com>2008-02-15 17:37:48 -0500
committerAl Viro <viro@zeniv.linux.org.uk>2008-04-19 00:29:25 -0400
commit4a3fd211ccfc08a88edc824300e25a87785c6a5f (patch)
tree99f1a76a99fa78464b8de731f7fdb5bcc9667a5e /fs/namei.c
parent42a74f206b914db13ee1f5ae932dcd91a77c8579 (diff)
[PATCH] r/o bind mounts: elevate write count for open()s
This is the first really tricky patch in the series. It elevates the writer count on a mount each time a non-special file is opened for write. We used to do this in may_open(), but Miklos pointed out that __dentry_open() is used as well to create filps. This will cover even those cases, while a call in may_open() would not have. There is also an elevated count around the vfs_create() call in open_namei(). See the comments for more details, but we need this to fix a 'create, remount, fail r/w open()' race. Some filesystems forego the use of normal vfs calls to create struct files. Make sure that these users elevate the mnt writer count because they will get __fput(), and we need to make sure they're balanced. Acked-by: Al Viro <viro@ZenIV.linux.org.uk> Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Hansen <haveblue@us.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c75
1 files changed, 65 insertions, 10 deletions
diff --git a/fs/namei.c b/fs/namei.c
index 83c843b3fea3..e179f71bfcb0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1623,8 +1623,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
1623 return -EACCES; 1623 return -EACCES;
1624 1624
1625 flag &= ~O_TRUNC; 1625 flag &= ~O_TRUNC;
1626 } else if (IS_RDONLY(inode) && (acc_mode & MAY_WRITE)) 1626 }
1627 return -EROFS;
1628 1627
1629 error = vfs_permission(nd, acc_mode); 1628 error = vfs_permission(nd, acc_mode);
1630 if (error) 1629 if (error)
@@ -1724,18 +1723,32 @@ static inline int open_to_namei_flags(int flag)
1724 return flag; 1723 return flag;
1725} 1724}
1726 1725
1726static int open_will_write_to_fs(int flag, struct inode *inode)
1727{
1728 /*
1729 * We'll never write to the fs underlying
1730 * a device file.
1731 */
1732 if (special_file(inode->i_mode))
1733 return 0;
1734 return (flag & O_TRUNC);
1735}
1736
1727/* 1737/*
1728 * Note that the low bits of "flag" aren't the same as in the open 1738 * Note that the low bits of the passed in "open_flag"
1729 * system call. See open_to_namei_flags(). 1739 * are not the same as in the local variable "flag". See
1740 * open_to_namei_flags() for more details.
1730 */ 1741 */
1731struct file *do_filp_open(int dfd, const char *pathname, 1742struct file *do_filp_open(int dfd, const char *pathname,
1732 int open_flag, int mode) 1743 int open_flag, int mode)
1733{ 1744{
1745 struct file *filp;
1734 struct nameidata nd; 1746 struct nameidata nd;
1735 int acc_mode, error; 1747 int acc_mode, error;
1736 struct path path; 1748 struct path path;
1737 struct dentry *dir; 1749 struct dentry *dir;
1738 int count = 0; 1750 int count = 0;
1751 int will_write;
1739 int flag = open_to_namei_flags(open_flag); 1752 int flag = open_to_namei_flags(open_flag);
1740 1753
1741 acc_mode = ACC_MODE(flag); 1754 acc_mode = ACC_MODE(flag);
@@ -1791,17 +1804,30 @@ do_last:
1791 } 1804 }
1792 1805
1793 if (IS_ERR(nd.intent.open.file)) { 1806 if (IS_ERR(nd.intent.open.file)) {
1794 mutex_unlock(&dir->d_inode->i_mutex);
1795 error = PTR_ERR(nd.intent.open.file); 1807 error = PTR_ERR(nd.intent.open.file);
1796 goto exit_dput; 1808 goto exit_mutex_unlock;
1797 } 1809 }
1798 1810
1799 /* Negative dentry, just create the file */ 1811 /* Negative dentry, just create the file */
1800 if (!path.dentry->d_inode) { 1812 if (!path.dentry->d_inode) {
1801 error = __open_namei_create(&nd, &path, flag, mode); 1813 /*
1814 * This write is needed to ensure that a
1815 * ro->rw transition does not occur between
1816 * the time when the file is created and when
1817 * a permanent write count is taken through
1818 * the 'struct file' in nameidata_to_filp().
1819 */
1820 error = mnt_want_write(nd.path.mnt);
1802 if (error) 1821 if (error)
1822 goto exit_mutex_unlock;
1823 error = __open_namei_create(&nd, &path, flag, mode);
1824 if (error) {
1825 mnt_drop_write(nd.path.mnt);
1803 goto exit; 1826 goto exit;
1804 return nameidata_to_filp(&nd, open_flag); 1827 }
1828 filp = nameidata_to_filp(&nd, open_flag);
1829 mnt_drop_write(nd.path.mnt);
1830 return filp;
1805 } 1831 }
1806 1832
1807 /* 1833 /*
@@ -1831,11 +1857,40 @@ do_last:
1831 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) 1857 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
1832 goto exit; 1858 goto exit;
1833ok: 1859ok:
1860 /*
1861 * Consider:
1862 * 1. may_open() truncates a file
1863 * 2. a rw->ro mount transition occurs
1864 * 3. nameidata_to_filp() fails due to
1865 * the ro mount.
1866 * That would be inconsistent, and should
1867 * be avoided. Taking this mnt write here
1868 * ensures that (2) can not occur.
1869 */
1870 will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode);
1871 if (will_write) {
1872 error = mnt_want_write(nd.path.mnt);
1873 if (error)
1874 goto exit;
1875 }
1834 error = may_open(&nd, acc_mode, flag); 1876 error = may_open(&nd, acc_mode, flag);
1835 if (error) 1877 if (error) {
1878 if (will_write)
1879 mnt_drop_write(nd.path.mnt);
1836 goto exit; 1880 goto exit;
1837 return nameidata_to_filp(&nd, open_flag); 1881 }
1882 filp = nameidata_to_filp(&nd, open_flag);
1883 /*
1884 * It is now safe to drop the mnt write
1885 * because the filp has had a write taken
1886 * on its behalf.
1887 */
1888 if (will_write)
1889 mnt_drop_write(nd.path.mnt);
1890 return filp;
1838 1891
1892exit_mutex_unlock:
1893 mutex_unlock(&dir->d_inode->i_mutex);
1839exit_dput: 1894exit_dput:
1840 path_put_conditional(&path, &nd); 1895 path_put_conditional(&path, &nd);
1841exit: 1896exit: