From 5529680981807b44abf3be30fb6d612ff04f68ff Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 Aug 2005 11:24:09 -0700 Subject: [PATCH] NFS: split nfsi->flags into two fields Certain bits in nfsi->flags can be manipulated with atomic bitops, and some are better manipulated via logical bitmask operations. This patch splits the flags field into two. The next patch introduces atomic bitops for one of the fields. Test plan: Millions of fsx ops on SMP clients. Signed-off-by: Chuck Lever Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/dir.c | 16 ++++++++------- fs/nfs/file.c | 5 +++-- fs/nfs/inode.c | 61 +++++++++++++++++++++++++++++--------------------------- fs/nfs/nfs3acl.c | 2 +- fs/nfs/read.c | 4 ++-- 5 files changed, 47 insertions(+), 41 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b38a57e78a63..5732e13cd0da 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -189,7 +189,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) goto error; } SetPageUptodate(page); - NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either * through inode->i_sem or some other mechanism. @@ -462,7 +462,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, page, NFS_SERVER(inode)->dtsize, desc->plus); - NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { @@ -608,7 +608,7 @@ static inline int nfs_check_verifier(struct inode *dir, struct dentry *dentry) { if (IS_ROOT(dentry)) return 1; - if ((NFS_FLAGS(dir) & NFS_INO_INVALID_ATTR) != 0 + if ((NFS_I(dir)->cache_validity & NFS_INO_INVALID_ATTR) != 0 || nfs_attribute_timeout(dir)) return 0; return nfs_verify_change_attribute(dir, (unsigned long)dentry->d_fsdata); @@ -1575,11 +1575,12 @@ out: int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) { - struct nfs_access_entry *cache = &NFS_I(inode)->cache_access; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_access_entry *cache = &nfsi->cache_access; if (cache->cred != cred || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) - || (NFS_FLAGS(inode) & NFS_INO_INVALID_ACCESS)) + || (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)) return -ENOENT; memcpy(res, cache, sizeof(*res)); return 0; @@ -1587,14 +1588,15 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) { - struct nfs_access_entry *cache = &NFS_I(inode)->cache_access; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_access_entry *cache = &nfsi->cache_access; if (cache->cred != set->cred) { if (cache->cred) put_rpccred(cache->cred); cache->cred = get_rpccred(set->cred); } - NFS_FLAGS(inode) &= ~NFS_INO_INVALID_ACCESS; + nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; cache->jiffies = set->jiffies; cache->mask = set->mask; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5621ba9885f4..f6b9eda925c5 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -134,9 +134,10 @@ nfs_file_release(struct inode *inode, struct file *filp) */ static int nfs_revalidate_file(struct inode *inode, struct file *filp) { + struct nfs_inode *nfsi = NFS_I(inode); int retval = 0; - if ((NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode)) + if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode)) retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode); nfs_revalidate_mapping(inode, filp->f_mapping); return 0; @@ -164,7 +165,7 @@ static int nfs_revalidate_file_size(struct inode *inode, struct file *filp) goto force_reval; if (nfsi->npages != 0) return 0; - if (!(NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode)) + if (!(nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode)) return 0; force_reval: return __nfs_revalidate_inode(server, inode); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bb7ca022bcb2..622184553516 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -620,9 +620,9 @@ nfs_zap_caches(struct inode *inode) memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; else - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; } static void nfs_zap_acl_cache(struct inode *inode) @@ -632,7 +632,7 @@ static void nfs_zap_acl_cache(struct inode *inode) clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache; if (clear_acl_cache != NULL) clear_acl_cache(inode); - NFS_I(inode)->flags &= ~NFS_INO_INVALID_ACL; + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL; } /* @@ -841,7 +841,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) inode->i_uid = attr->ia_uid; if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; - NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; } if ((attr->ia_valid & ATTR_SIZE) != 0) { inode->i_size = attr->ia_size; @@ -872,8 +872,7 @@ nfs_wait_on_inode(struct inode *inode, int flag) int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; - struct nfs_inode *nfsi = NFS_I(inode); - int need_atime = nfsi->flags & NFS_INO_INVALID_ATIME; + int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; int err; if (__IS_FLG(inode, MS_NOATIME)) @@ -1019,7 +1018,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) struct nfs_fattr fattr; struct nfs_inode *nfsi = NFS_I(inode); unsigned long verifier; - unsigned int flags; + unsigned long cache_validity; dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1036,7 +1035,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) goto out_nowait; if (NFS_ATTRTIMEO(inode) == 0) continue; - if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) + if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) continue; status = NFS_STALE(inode) ? -ESTALE : 0; goto out_nowait; @@ -1065,18 +1064,21 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) (long long)NFS_FILEID(inode), status); goto out; } - flags = nfsi->flags; - nfsi->flags &= ~NFS_INO_REVAL_PAGECACHE; + cache_validity = nfsi->cache_validity; + nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; + /* * We may need to keep the attributes marked as invalid if * we raced with nfs_end_attr_update(). */ if (verifier == nfsi->cache_change_attribute) - nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); - /* Do the page cache invalidation */ + nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); + nfs_revalidate_mapping(inode, inode->i_mapping); - if (flags & NFS_INO_INVALID_ACL) + + if (cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); + dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1107,7 +1109,7 @@ int nfs_attribute_timeout(struct inode *inode) */ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { - if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) + if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) && !nfs_attribute_timeout(inode)) return NFS_STALE(inode) ? -ESTALE : 0; return __nfs_revalidate_inode(server, inode); @@ -1122,14 +1124,14 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); - if (nfsi->flags & NFS_INO_INVALID_DATA) { + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { if (S_ISREG(inode->i_mode)) { if (filemap_fdatawrite(mapping) == 0) filemap_fdatawait(mapping); nfs_wb_all(inode); } invalidate_inode_pages2(mapping); - nfsi->flags &= ~NFS_INO_INVALID_DATA; + nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; if (S_ISDIR(inode->i_mode)) { memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); /* This ensures we revalidate child dentries */ @@ -1164,10 +1166,10 @@ void nfs_end_data_update(struct inode *inode) if (!nfs_have_delegation(inode, FMODE_READ)) { /* Mark the attribute cache for revalidation */ - nfsi->flags |= NFS_INO_INVALID_ATTR; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; /* Directories and symlinks: invalidate page cache too */ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - nfsi->flags |= NFS_INO_INVALID_DATA; + nfsi->cache_validity |= NFS_INO_INVALID_DATA; } nfsi->cache_change_attribute ++; atomic_dec(&nfsi->data_updates); @@ -1200,9 +1202,9 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) && nfsi->change_attr == fattr->pre_change_attr) nfsi->change_attr = fattr->change_attr; if (nfsi->change_attr != fattr->change_attr) { - nfsi->flags |= NFS_INO_INVALID_ATTR; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (!data_unstable) - nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; } } @@ -1227,28 +1229,28 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) /* Verify a few of the more important attributes */ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { - nfsi->flags |= NFS_INO_INVALID_ATTR; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (!data_unstable) - nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; } if (cur_size != new_isize) { - nfsi->flags |= NFS_INO_INVALID_ATTR; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (nfsi->npages == 0) - nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; } /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) - nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; /* Has the link count changed? */ if (inode->i_nlink != fattr->nlink) - nfsi->flags |= NFS_INO_INVALID_ATTR; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (!timespec_equal(&inode->i_atime, &fattr->atime)) - nfsi->flags |= NFS_INO_INVALID_ATIME; + nfsi->cache_validity |= NFS_INO_INVALID_ATIME; nfsi->read_cache_jiffies = fattr->timestamp; return 0; @@ -1384,7 +1386,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign || S_ISLNK(inode->i_mode))) invalid &= ~NFS_INO_INVALID_DATA; if (!nfs_have_delegation(inode, FMODE_READ)) - nfsi->flags |= invalid; + nfsi->cache_validity |= invalid; return 0; out_changed: @@ -1961,7 +1963,8 @@ static struct inode *nfs_alloc_inode(struct super_block *sb) nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL); if (!nfsi) return NULL; - nfsi->flags = 0; + nfsi->flags = 0UL; + nfsi->cache_validity = 0UL; #ifdef CONFIG_NFS_V3_ACL nfsi->acl_access = ERR_PTR(-EAGAIN); nfsi->acl_default = ERR_PTR(-EAGAIN); diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 1b7a3ef2f813..a020e650ffc2 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -308,7 +308,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, nfs_begin_data_update(inode); status = rpc_call(server->client_acl, ACLPROC3_SETACL, &args, &fattr, 0); - NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; nfs_end_data_update(inode); dprintk("NFS reply setacl: %d\n", status); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 6f866b8aa2d5..90df0500ca1b 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -140,7 +140,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, if (rdata->res.eof != 0 || result == 0) break; } while (count); - NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; if (count) memclear_highpage_flush(page, rdata->args.pgbase, count); @@ -473,7 +473,7 @@ void nfs_readpage_result(struct rpc_task *task) } task->tk_status = -EIO; } - NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME; + NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; data->complete(data, status); } -- cgit v1.2.2 From 412d582ec1dd59aab2353f8cb7e74f2c79cd20b9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 Aug 2005 11:24:11 -0700 Subject: [PATCH] NFS: use atomic bitops to manipulate flags in nfsi->flags Introduce atomic bitops to manipulate the bits in the nfs_inode structure's "flags" field. Using bitops means we can use a generic wait_on_bit call instead of an ad hoc locking scheme in fs/nfs/inode.c, so we can remove the "nfs_i_wait" field from nfs_inode at the same time. The other new flags field will continue to use bitmask and logic AND and OR. This permits several flags to be set at the same time efficiently. The following patch adds a spin lock to protect these flags, and this spin lock will later cover other fields in the nfs_inode structure, amortizing the cost of using this type of serialization. Test plan: Millions of fsx ops on SMP clients. Signed-off-by: Chuck Lever Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/dir.c | 4 ++-- fs/nfs/inode.c | 69 ++++++++++++++++++++++++++++++++++++---------------------- 2 files changed, 45 insertions(+), 28 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5732e13cd0da..27cf5577f239 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -182,7 +182,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) /* We requested READDIRPLUS, but the server doesn't grok it */ if (error == -ENOTSUPP && desc->plus) { NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS; - NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; + clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); desc->plus = 0; goto again; } @@ -545,7 +545,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) break; } if (res == -ETOOSMALL && desc->plus) { - NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; + clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); nfs_zap_caches(inode); desc->plus = 0; desc->entry->eof = 0; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 622184553516..ee27578277f3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -739,7 +739,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode->i_fop = &nfs_dir_operations; if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && fattr->size <= NFS_LIMIT_READDIRPLUS) - NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS; + set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); } else if (S_ISLNK(inode->i_mode)) inode->i_op = &nfs_symlink_inode_operations; else @@ -849,26 +849,43 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) } } +static int nfs_wait_schedule(void *word) +{ + if (signal_pending(current)) + return -ERESTARTSYS; + schedule(); + return 0; +} + /* * Wait for the inode to get unlocked. - * (Used for NFS_INO_LOCKED and NFS_INO_REVALIDATING). */ -static int -nfs_wait_on_inode(struct inode *inode, int flag) +static int nfs_wait_on_inode(struct inode *inode) { struct rpc_clnt *clnt = NFS_CLIENT(inode); struct nfs_inode *nfsi = NFS_I(inode); - + sigset_t oldmask; int error; - if (!(NFS_FLAGS(inode) & flag)) - return 0; + atomic_inc(&inode->i_count); - error = nfs_wait_event(clnt, nfsi->nfs_i_wait, - !(NFS_FLAGS(inode) & flag)); + rpc_clnt_sigmask(clnt, &oldmask); + error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING, + nfs_wait_schedule, TASK_INTERRUPTIBLE); + rpc_clnt_sigunmask(clnt, &oldmask); iput(inode); + return error; } +static void nfs_wake_up_inode(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + clear_bit(NFS_INO_REVALIDATING, &nfsi->flags); + smp_mb__after_clear_bit(); + wake_up_bit(&nfsi->flags, NFS_INO_REVALIDATING); +} + int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; @@ -1029,18 +1046,19 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) if (NFS_STALE(inode)) goto out_nowait; - while (NFS_REVALIDATING(inode)) { - status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING); - if (status < 0) - goto out_nowait; - if (NFS_ATTRTIMEO(inode) == 0) - continue; - if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) - continue; - status = NFS_STALE(inode) ? -ESTALE : 0; - goto out_nowait; + status = nfs_wait_on_inode(inode); + if (status < 0) + goto out; + if (NFS_STALE(inode)) { + status = -ESTALE; + /* Do we trust the cached ESTALE? */ + if (NFS_ATTRTIMEO(inode) != 0) { + if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) { + /* no */ + } else + goto out; + } } - NFS_FLAGS(inode) |= NFS_INO_REVALIDATING; /* Protect against RPC races by saving the change attribute */ verifier = nfs_save_change_attribute(inode); @@ -1052,7 +1070,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) if (status == -ESTALE) { nfs_zap_caches(inode); if (!S_ISDIR(inode->i_mode)) - NFS_FLAGS(inode) |= NFS_INO_STALE; + set_bit(NFS_INO_STALE, &NFS_FLAGS(inode)); } goto out; } @@ -1083,9 +1101,9 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) inode->i_sb->s_id, (long long)NFS_FILEID(inode)); -out: - NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING; - wake_up(&nfsi->nfs_i_wait); + out: + nfs_wake_up_inode(inode); + out_nowait: unlock_kernel(); return status; @@ -1404,7 +1422,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign */ nfs_invalidate_inode(inode); out_err: - NFS_FLAGS(inode) |= NFS_INO_STALE; + set_bit(NFS_INO_STALE, &NFS_FLAGS(inode)); return -ESTALE; } @@ -1996,7 +2014,6 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) nfsi->ndirty = 0; nfsi->ncommit = 0; nfsi->npages = 0; - init_waitqueue_head(&nfsi->nfs_i_wait); nfs4_init_once(nfsi); } } -- cgit v1.2.2 From dc59250c6ebed099a9bc0a11298e2281dd896657 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 Aug 2005 11:24:12 -0700 Subject: [PATCH] NFS: Introduce the use of inode->i_lock to protect fields in nfsi Down the road we want to eliminate the use of the global kernel lock entirely from the NFS client. To do this, we need to protect the fields in the nfs_inode structure adequately. Start by serializing updates to the "cache_validity" field. Note this change addresses an SMP hang found by njw@osdl.org, where processes deadlock because nfs_end_data_update and nfs_revalidate_mapping update the "cache_validity" field without proper serialization. Test plan: Millions of fsx ops on SMP clients. Run Nick Wilson's breaknfs program on large SMP clients. Signed-off-by: Chuck Lever Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/dir.c | 7 +++++++ fs/nfs/inode.c | 34 +++++++++++++++++++++++++++++++--- fs/nfs/nfs3acl.c | 2 ++ fs/nfs/read.c | 4 ++++ 4 files changed, 44 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 27cf5577f239..147cbf9261ce 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -189,7 +189,9 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) goto error; } SetPageUptodate(page); + spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&inode->i_lock); /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either * through inode->i_sem or some other mechanism. @@ -462,7 +464,9 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, page, NFS_SERVER(inode)->dtsize, desc->plus); + spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&inode->i_lock); desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { @@ -1596,7 +1600,10 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) put_rpccred(cache->cred); cache->cred = get_rpccred(set->cred); } + /* FIXME: replace current access_cache BKL reliance with inode->i_lock */ + spin_lock(&inode->i_lock); nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; + spin_unlock(&inode->i_lock); cache->jiffies = set->jiffies; cache->mask = set->mask; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index ee27578277f3..541b418327c8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -615,6 +615,8 @@ nfs_zap_caches(struct inode *inode) struct nfs_inode *nfsi = NFS_I(inode); int mode = inode->i_mode; + spin_lock(&inode->i_lock); + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); NFS_ATTRTIMEO_UPDATE(inode) = jiffies; @@ -623,6 +625,8 @@ nfs_zap_caches(struct inode *inode) nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; else nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; + + spin_unlock(&inode->i_lock); } static void nfs_zap_acl_cache(struct inode *inode) @@ -632,7 +636,9 @@ static void nfs_zap_acl_cache(struct inode *inode) clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache; if (clear_acl_cache != NULL) clear_acl_cache(inode); + spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL; + spin_unlock(&inode->i_lock); } /* @@ -841,7 +847,9 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) inode->i_uid = attr->ia_uid; if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; + spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + spin_unlock(&inode->i_lock); } if ((attr->ia_valid & ATTR_SIZE) != 0) { inode->i_size = attr->ia_size; @@ -1082,6 +1090,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) (long long)NFS_FILEID(inode), status); goto out; } + spin_lock(&inode->i_lock); cache_validity = nfsi->cache_validity; nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; @@ -1091,6 +1100,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) */ if (verifier == nfsi->cache_change_attribute) nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); + spin_unlock(&inode->i_lock); nfs_revalidate_mapping(inode, inode->i_mapping); @@ -1149,12 +1159,16 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) nfs_wb_all(inode); } invalidate_inode_pages2(mapping); + + spin_lock(&inode->i_lock); nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; if (S_ISDIR(inode->i_mode)) { memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); /* This ensures we revalidate child dentries */ nfsi->cache_change_attribute++; } + spin_unlock(&inode->i_lock); + dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1184,10 +1198,12 @@ void nfs_end_data_update(struct inode *inode) if (!nfs_have_delegation(inode, FMODE_READ)) { /* Mark the attribute cache for revalidation */ + spin_lock(&inode->i_lock); nfsi->cache_validity |= NFS_INO_INVALID_ATTR; /* Directories and symlinks: invalidate page cache too */ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) nfsi->cache_validity |= NFS_INO_INVALID_DATA; + spin_unlock(&inode->i_lock); } nfsi->cache_change_attribute ++; atomic_dec(&nfsi->data_updates); @@ -1212,6 +1228,8 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) if (nfs_have_delegation(inode, FMODE_READ)) return 0; + spin_lock(&inode->i_lock); + /* Are we in the process of updating data on the server? */ data_unstable = nfs_caches_unstable(inode); @@ -1226,13 +1244,17 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) } } - if ((fattr->valid & NFS_ATTR_FATTR) == 0) + if ((fattr->valid & NFS_ATTR_FATTR) == 0) { + spin_unlock(&inode->i_lock); return 0; + } /* Has the inode gone and changed behind our back? */ if (nfsi->fileid != fattr->fileid - || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { + spin_unlock(&inode->i_lock); return -EIO; + } cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); @@ -1271,6 +1293,7 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->cache_validity |= NFS_INO_INVALID_ATIME; nfsi->read_cache_jiffies = fattr->timestamp; + spin_unlock(&inode->i_lock); return 0; } @@ -1309,11 +1332,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign goto out_err; } + spin_lock(&inode->i_lock); + /* * Make sure the inode's type hasn't changed. */ - if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { + spin_unlock(&inode->i_lock); goto out_changed; + } /* * Update the read time so we don't revalidate too often. @@ -1406,6 +1433,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign if (!nfs_have_delegation(inode, FMODE_READ)) nfsi->cache_validity |= invalid; + spin_unlock(&inode->i_lock); return 0; out_changed: /* diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index a020e650ffc2..6a5bbc0ae941 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -308,7 +308,9 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, nfs_begin_data_update(inode); status = rpc_call(server->client_acl, ACLPROC3_SETACL, &args, &fattr, 0); + spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; + spin_unlock(&inode->i_lock); nfs_end_data_update(inode); dprintk("NFS reply setacl: %d\n", status); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 90df0500ca1b..6ceb1d471f20 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -140,7 +140,9 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, if (rdata->res.eof != 0 || result == 0) break; } while (count); + spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&inode->i_lock); if (count) memclear_highpage_flush(page, rdata->args.pgbase, count); @@ -473,7 +475,9 @@ void nfs_readpage_result(struct rpc_task *task) } task->tk_status = -EIO; } + spin_lock(&data->inode->i_lock); NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&data->inode->i_lock); data->complete(data, status); } -- cgit v1.2.2 From cc314eef0128a807e50fa03baf2d0abc0647952c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 19 Aug 2005 18:02:56 -0700 Subject: Fix nasty ncpfs symlink handling bug. This bug could cause oopses and page state corruption, because ncpfs used the generic page-cache symlink handlign functions. But those functions only work if the page cache is guaranteed to be "stable", ie a page that was installed when the symlink walk was started has to still be installed in the page cache at the end of the walk. We could have fixed ncpfs to not use the generic helper routines, but it is in many ways much cleaner to instead improve on the symlink walking helper routines so that they don't require that absolute stability. We do this by allowing "follow_link()" to return a error-pointer as a cookie, which is fed back to the cleanup "put_link()" routine. This also simplifies NFS symlink handling. Signed-off-by: Linus Torvalds --- fs/nfs/symlink.c | 37 ++++++++----------------------------- 1 file changed, 8 insertions(+), 29 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 35f106599144..18dc95b0b646 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -27,26 +27,14 @@ /* Symlink caching in the page cache is even more simplistic * and straight-forward than readdir caching. - * - * At the beginning of the page we store pointer to struct page in question, - * simplifying nfs_put_link() (if inode got invalidated we can't find the page - * to be freed via pagecache lookup). - * The NUL-terminated string follows immediately thereafter. */ -struct nfs_symlink { - struct page *page; - char body[0]; -}; - static int nfs_symlink_filler(struct inode *inode, struct page *page) { - const unsigned int pgbase = offsetof(struct nfs_symlink, body); - const unsigned int pglen = PAGE_SIZE - pgbase; int error; lock_kernel(); - error = NFS_PROTO(inode)->readlink(inode, page, pgbase, pglen); + error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE); unlock_kernel(); if (error < 0) goto error; @@ -60,11 +48,10 @@ error: return -EIO; } -static int nfs_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; struct page *page; - struct nfs_symlink *p; void *err = ERR_PTR(nfs_revalidate_inode(NFS_SERVER(inode), inode)); if (err) goto read_failed; @@ -78,28 +65,20 @@ static int nfs_follow_link(struct dentry *dentry, struct nameidata *nd) err = ERR_PTR(-EIO); goto getlink_read_error; } - p = kmap(page); - p->page = page; - nd_set_link(nd, p->body); - return 0; + nd_set_link(nd, kmap(page)); + return page; getlink_read_error: page_cache_release(page); read_failed: nd_set_link(nd, err); - return 0; + return NULL; } -static void nfs_put_link(struct dentry *dentry, struct nameidata *nd) +static void nfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) { - char *s = nd_get_link(nd); - if (!IS_ERR(s)) { - struct nfs_symlink *p; - struct page *page; - - p = container_of(s, struct nfs_symlink, body[0]); - page = p->page; - + if (cookie) { + struct page *page = cookie; kunmap(page); page_cache_release(page); } -- cgit v1.2.2 From 01c314a0c0f6367960a7cb1ffb5796560ccaa1c1 Mon Sep 17 00:00:00 2001 From: Steve Dickson Date: Fri, 19 Aug 2005 17:57:48 -0700 Subject: [PATCH] NFSv4: unbalanced BKL in nfs_atomic_lookup() Added missing unlock_kernel() to NFSv4 atomic lookup. Signed-off-by: Steve Dickson Signed-off-by: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/dir.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 147cbf9261ce..2df639f143e8 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -939,6 +939,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry error = nfs_revalidate_inode(NFS_SERVER(dir), dir); if (error < 0) { res = ERR_PTR(error); + unlock_kernel(); goto out; } -- cgit v1.2.2