From 1858efd471624ecb37e6b5462cab8076f47d1cee Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Mar 2011 13:14:21 -0500 Subject: minimal fix for do_filp_open() race failure exits on the no-O_CREAT side of do_filp_open() merge with those of O_CREAT one; unfortunately, if do_path_lookup() returns -ESTALE, we'll get out_filp:, notice that we are about to return -ESTALE without having trying to create the sucker with LOOKUP_REVAL and jump right into the O_CREAT side of code. And proceed to try and create a file. Usually that'll fail with -ESTALE again, but we can race and get that attempt of pathname resolution to succeed. open() without O_CREAT really shouldn't end up creating files, races or not. The real fix is to rearchitect the whole do_filp_open(), but for now splitting the failure exits will do. Signed-off-by: Al Viro --- fs/namei.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 0087cf9c2c6b..a5e844fe4b28 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2455,22 +2455,29 @@ struct file *do_filp_open(int dfd, const char *pathname, /* !O_CREAT, simple open */ error = do_path_lookup(dfd, pathname, flags, &nd); if (unlikely(error)) - goto out_filp; + goto out_filp2; error = -ELOOP; if (!(nd.flags & LOOKUP_FOLLOW)) { if (nd.inode->i_op->follow_link) - goto out_path; + goto out_path2; } error = -ENOTDIR; if (nd.flags & LOOKUP_DIRECTORY) { if (!nd.inode->i_op->lookup) - goto out_path; + goto out_path2; } audit_inode(pathname, nd.path.dentry); filp = finish_open(&nd, open_flag, acc_mode); +out2: release_open_intent(&nd); return filp; +out_path2: + path_put(&nd.path); +out_filp2: + filp = ERR_PTR(error); + goto out2; + creat: /* OK, have to create the file. Find the parent. */ error = path_init_rcu(dfd, pathname, -- cgit v1.2.2 From dfef6dcd35cb4a251f6322ca9b2c06f0bb1aa1f4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 8 Mar 2011 01:25:28 -0500 Subject: unfuck proc_sysctl ->d_compare() a) struct inode is not going to be freed under ->d_compare(); however, the thing PROC_I(inode)->sysctl points to just might. Fortunately, it's enough to make freeing that sucker delayed, provided that we don't step on its ->unregistering, clear the pointer to it in PROC_I(inode) before dropping the reference and check if it's NULL in ->d_compare(). b) I'm not sure that we *can* walk into NULL inode here (we recheck dentry->seq between verifying that it's still hashed / fetching dentry->d_inode and passing it to ->d_compare() and there's no negative hashed dentries in /proc/sys/*), but if we can walk into that, we really should not have ->d_compare() return 0 on it! Said that, I really suspect that this check can be simply killed. Nick? Signed-off-by: Al Viro --- fs/proc/inode.c | 8 ++++++-- fs/proc/proc_sysctl.c | 7 +++++-- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 176ce4cda68a..d6a7ca1fdac5 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -27,6 +27,7 @@ static void proc_evict_inode(struct inode *inode) { struct proc_dir_entry *de; + struct ctl_table_header *head; truncate_inode_pages(&inode->i_data, 0); end_writeback(inode); @@ -38,8 +39,11 @@ static void proc_evict_inode(struct inode *inode) de = PROC_I(inode)->pde; if (de) pde_put(de); - if (PROC_I(inode)->sysctl) - sysctl_head_put(PROC_I(inode)->sysctl); + head = PROC_I(inode)->sysctl; + if (head) { + rcu_assign_pointer(PROC_I(inode)->sysctl, NULL); + sysctl_head_put(head); + } } struct vfsmount *proc_mnt; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 09a1f92a34ef..8eb2522111c5 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -408,15 +408,18 @@ static int proc_sys_compare(const struct dentry *parent, const struct dentry *dentry, const struct inode *inode, unsigned int len, const char *str, const struct qstr *name) { + struct ctl_table_header *head; /* Although proc doesn't have negative dentries, rcu-walk means * that inode here can be NULL */ + /* AV: can it, indeed? */ if (!inode) - return 0; + return 1; if (name->len != len) return 1; if (memcmp(name->name, str, len)) return 1; - return !sysctl_is_seen(PROC_I(inode)->sysctl); + head = rcu_dereference(PROC_I(inode)->sysctl); + return !head || !sysctl_is_seen(head); } static const struct dentry_operations proc_sys_dentry_operations = { -- cgit v1.2.2 From b306419ae08d9def53f2142a37cc0a58622307a8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 8 Mar 2011 21:16:28 -0500 Subject: nd->inode is not set on the second attempt in path_walk() We leave it at whatever it had been pointing to after the first link_path_walk() had failed with -ESTALE. Things do not work well after that... Signed-off-by: Al Viro --- fs/namei.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index a5e844fe4b28..a4689eb2df28 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1546,6 +1546,7 @@ static int path_walk(const char *name, struct nameidata *nd) /* nd->path had been dropped */ current->total_link_count = 0; nd->path = save; + nd->inode = save.dentry->d_inode; path_get(&nd->path); nd->flags |= LOOKUP_REVAL; result = link_path_walk(name, nd); -- cgit v1.2.2