diff options
author | Dave Hansen <haveblue@us.ibm.com> | 2008-02-15 17:37:48 -0500 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2008-04-19 00:29:25 -0400 |
commit | 4a3fd211ccfc08a88edc824300e25a87785c6a5f (patch) | |
tree | 99f1a76a99fa78464b8de731f7fdb5bcc9667a5e /fs/namei.c | |
parent | 42a74f206b914db13ee1f5ae932dcd91a77c8579 (diff) |
[PATCH] r/o bind mounts: elevate write count for open()s
This is the first really tricky patch in the series. It elevates the writer
count on a mount each time a non-special file is opened for write.
We used to do this in may_open(), but Miklos pointed out that __dentry_open()
is used as well to create filps. This will cover even those cases, while a
call in may_open() would not have.
There is also an elevated count around the vfs_create() call in open_namei().
See the comments for more details, but we need this to fix a 'create, remount,
fail r/w open()' race.
Some filesystems forego the use of normal vfs calls to create
struct files. Make sure that these users elevate the mnt
writer count because they will get __fput(), and we need
to make sure they're balanced.
Acked-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/namei.c')
-rw-r--r-- | fs/namei.c | 75 |
1 files changed, 65 insertions, 10 deletions
diff --git a/fs/namei.c b/fs/namei.c index 83c843b3fea3..e179f71bfcb0 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1623,8 +1623,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) | |||
1623 | return -EACCES; | 1623 | return -EACCES; |
1624 | 1624 | ||
1625 | flag &= ~O_TRUNC; | 1625 | flag &= ~O_TRUNC; |
1626 | } else if (IS_RDONLY(inode) && (acc_mode & MAY_WRITE)) | 1626 | } |
1627 | return -EROFS; | ||
1628 | 1627 | ||
1629 | error = vfs_permission(nd, acc_mode); | 1628 | error = vfs_permission(nd, acc_mode); |
1630 | if (error) | 1629 | if (error) |
@@ -1724,18 +1723,32 @@ static inline int open_to_namei_flags(int flag) | |||
1724 | return flag; | 1723 | return flag; |
1725 | } | 1724 | } |
1726 | 1725 | ||
1726 | static int open_will_write_to_fs(int flag, struct inode *inode) | ||
1727 | { | ||
1728 | /* | ||
1729 | * We'll never write to the fs underlying | ||
1730 | * a device file. | ||
1731 | */ | ||
1732 | if (special_file(inode->i_mode)) | ||
1733 | return 0; | ||
1734 | return (flag & O_TRUNC); | ||
1735 | } | ||
1736 | |||
1727 | /* | 1737 | /* |
1728 | * Note that the low bits of "flag" aren't the same as in the open | 1738 | * Note that the low bits of the passed in "open_flag" |
1729 | * system call. See open_to_namei_flags(). | 1739 | * are not the same as in the local variable "flag". See |
1740 | * open_to_namei_flags() for more details. | ||
1730 | */ | 1741 | */ |
1731 | struct file *do_filp_open(int dfd, const char *pathname, | 1742 | struct file *do_filp_open(int dfd, const char *pathname, |
1732 | int open_flag, int mode) | 1743 | int open_flag, int mode) |
1733 | { | 1744 | { |
1745 | struct file *filp; | ||
1734 | struct nameidata nd; | 1746 | struct nameidata nd; |
1735 | int acc_mode, error; | 1747 | int acc_mode, error; |
1736 | struct path path; | 1748 | struct path path; |
1737 | struct dentry *dir; | 1749 | struct dentry *dir; |
1738 | int count = 0; | 1750 | int count = 0; |
1751 | int will_write; | ||
1739 | int flag = open_to_namei_flags(open_flag); | 1752 | int flag = open_to_namei_flags(open_flag); |
1740 | 1753 | ||
1741 | acc_mode = ACC_MODE(flag); | 1754 | acc_mode = ACC_MODE(flag); |
@@ -1791,17 +1804,30 @@ do_last: | |||
1791 | } | 1804 | } |
1792 | 1805 | ||
1793 | if (IS_ERR(nd.intent.open.file)) { | 1806 | if (IS_ERR(nd.intent.open.file)) { |
1794 | mutex_unlock(&dir->d_inode->i_mutex); | ||
1795 | error = PTR_ERR(nd.intent.open.file); | 1807 | error = PTR_ERR(nd.intent.open.file); |
1796 | goto exit_dput; | 1808 | goto exit_mutex_unlock; |
1797 | } | 1809 | } |
1798 | 1810 | ||
1799 | /* Negative dentry, just create the file */ | 1811 | /* Negative dentry, just create the file */ |
1800 | if (!path.dentry->d_inode) { | 1812 | if (!path.dentry->d_inode) { |
1801 | error = __open_namei_create(&nd, &path, flag, mode); | 1813 | /* |
1814 | * This write is needed to ensure that a | ||
1815 | * ro->rw transition does not occur between | ||
1816 | * the time when the file is created and when | ||
1817 | * a permanent write count is taken through | ||
1818 | * the 'struct file' in nameidata_to_filp(). | ||
1819 | */ | ||
1820 | error = mnt_want_write(nd.path.mnt); | ||
1802 | if (error) | 1821 | if (error) |
1822 | goto exit_mutex_unlock; | ||
1823 | error = __open_namei_create(&nd, &path, flag, mode); | ||
1824 | if (error) { | ||
1825 | mnt_drop_write(nd.path.mnt); | ||
1803 | goto exit; | 1826 | goto exit; |
1804 | return nameidata_to_filp(&nd, open_flag); | 1827 | } |
1828 | filp = nameidata_to_filp(&nd, open_flag); | ||
1829 | mnt_drop_write(nd.path.mnt); | ||
1830 | return filp; | ||
1805 | } | 1831 | } |
1806 | 1832 | ||
1807 | /* | 1833 | /* |
@@ -1831,11 +1857,40 @@ do_last: | |||
1831 | if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) | 1857 | if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) |
1832 | goto exit; | 1858 | goto exit; |
1833 | ok: | 1859 | ok: |
1860 | /* | ||
1861 | * Consider: | ||
1862 | * 1. may_open() truncates a file | ||
1863 | * 2. a rw->ro mount transition occurs | ||
1864 | * 3. nameidata_to_filp() fails due to | ||
1865 | * the ro mount. | ||
1866 | * That would be inconsistent, and should | ||
1867 | * be avoided. Taking this mnt write here | ||
1868 | * ensures that (2) can not occur. | ||
1869 | */ | ||
1870 | will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode); | ||
1871 | if (will_write) { | ||
1872 | error = mnt_want_write(nd.path.mnt); | ||
1873 | if (error) | ||
1874 | goto exit; | ||
1875 | } | ||
1834 | error = may_open(&nd, acc_mode, flag); | 1876 | error = may_open(&nd, acc_mode, flag); |
1835 | if (error) | 1877 | if (error) { |
1878 | if (will_write) | ||
1879 | mnt_drop_write(nd.path.mnt); | ||
1836 | goto exit; | 1880 | goto exit; |
1837 | return nameidata_to_filp(&nd, open_flag); | 1881 | } |
1882 | filp = nameidata_to_filp(&nd, open_flag); | ||
1883 | /* | ||
1884 | * It is now safe to drop the mnt write | ||
1885 | * because the filp has had a write taken | ||
1886 | * on its behalf. | ||
1887 | */ | ||
1888 | if (will_write) | ||
1889 | mnt_drop_write(nd.path.mnt); | ||
1890 | return filp; | ||
1838 | 1891 | ||
1892 | exit_mutex_unlock: | ||
1893 | mutex_unlock(&dir->d_inode->i_mutex); | ||
1839 | exit_dput: | 1894 | exit_dput: |
1840 | path_put_conditional(&path, &nd); | 1895 | path_put_conditional(&path, &nd); |
1841 | exit: | 1896 | exit: |