aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChuck Lever <cel@citi.umich.edu>2005-08-18 14:24:12 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-08-18 15:53:57 -0400
commitdc59250c6ebed099a9bc0a11298e2281dd896657 (patch)
tree80c294437c0868d90abfa617d873370e6dbe6565
parent412d582ec1dd59aab2353f8cb7e74f2c79cd20b9 (diff)
[PATCH] NFS: Introduce the use of inode->i_lock to protect fields in nfsi
Down the road we want to eliminate the use of the global kernel lock entirely from the NFS client. To do this, we need to protect the fields in the nfs_inode structure adequately. Start by serializing updates to the "cache_validity" field. Note this change addresses an SMP hang found by njw@osdl.org, where processes deadlock because nfs_end_data_update and nfs_revalidate_mapping update the "cache_validity" field without proper serialization. Test plan: Millions of fsx ops on SMP clients. Run Nick Wilson's breaknfs program on large SMP clients. Signed-off-by: Chuck Lever <cel@netapp.com> Cc: Trond Myklebust <trond.myklebust@fys.uio.no> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/nfs/dir.c7
-rw-r--r--fs/nfs/inode.c34
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/read.c4
-rw-r--r--include/linux/nfs_fs.h5
5 files changed, 48 insertions, 4 deletions
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 27cf5577f239..147cbf9261ce 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -189,7 +189,9 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
189 goto error; 189 goto error;
190 } 190 }
191 SetPageUptodate(page); 191 SetPageUptodate(page);
192 spin_lock(&inode->i_lock);
192 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; 193 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
194 spin_unlock(&inode->i_lock);
193 /* Ensure consistent page alignment of the data. 195 /* Ensure consistent page alignment of the data.
194 * Note: assumes we have exclusive access to this mapping either 196 * Note: assumes we have exclusive access to this mapping either
195 * through inode->i_sem or some other mechanism. 197 * through inode->i_sem or some other mechanism.
@@ -462,7 +464,9 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
462 page, 464 page,
463 NFS_SERVER(inode)->dtsize, 465 NFS_SERVER(inode)->dtsize,
464 desc->plus); 466 desc->plus);
467 spin_lock(&inode->i_lock);
465 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; 468 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
469 spin_unlock(&inode->i_lock);
466 desc->page = page; 470 desc->page = page;
467 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ 471 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */
468 if (desc->error >= 0) { 472 if (desc->error >= 0) {
@@ -1596,7 +1600,10 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
1596 put_rpccred(cache->cred); 1600 put_rpccred(cache->cred);
1597 cache->cred = get_rpccred(set->cred); 1601 cache->cred = get_rpccred(set->cred);
1598 } 1602 }
1603 /* FIXME: replace current access_cache BKL reliance with inode->i_lock */
1604 spin_lock(&inode->i_lock);
1599 nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; 1605 nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
1606 spin_unlock(&inode->i_lock);
1600 cache->jiffies = set->jiffies; 1607 cache->jiffies = set->jiffies;
1601 cache->mask = set->mask; 1608 cache->mask = set->mask;
1602} 1609}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index ee27578277f3..541b418327c8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -615,6 +615,8 @@ nfs_zap_caches(struct inode *inode)
615 struct nfs_inode *nfsi = NFS_I(inode); 615 struct nfs_inode *nfsi = NFS_I(inode);
616 int mode = inode->i_mode; 616 int mode = inode->i_mode;
617 617
618 spin_lock(&inode->i_lock);
619
618 NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); 620 NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
619 NFS_ATTRTIMEO_UPDATE(inode) = jiffies; 621 NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
620 622
@@ -623,6 +625,8 @@ nfs_zap_caches(struct inode *inode)
623 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; 625 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
624 else 626 else
625 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; 627 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
628
629 spin_unlock(&inode->i_lock);
626} 630}
627 631
628static void nfs_zap_acl_cache(struct inode *inode) 632static void nfs_zap_acl_cache(struct inode *inode)
@@ -632,7 +636,9 @@ static void nfs_zap_acl_cache(struct inode *inode)
632 clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache; 636 clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache;
633 if (clear_acl_cache != NULL) 637 if (clear_acl_cache != NULL)
634 clear_acl_cache(inode); 638 clear_acl_cache(inode);
639 spin_lock(&inode->i_lock);
635 NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL; 640 NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL;
641 spin_unlock(&inode->i_lock);
636} 642}
637 643
638/* 644/*
@@ -841,7 +847,9 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
841 inode->i_uid = attr->ia_uid; 847 inode->i_uid = attr->ia_uid;
842 if ((attr->ia_valid & ATTR_GID) != 0) 848 if ((attr->ia_valid & ATTR_GID) != 0)
843 inode->i_gid = attr->ia_gid; 849 inode->i_gid = attr->ia_gid;
850 spin_lock(&inode->i_lock);
844 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 851 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
852 spin_unlock(&inode->i_lock);
845 } 853 }
846 if ((attr->ia_valid & ATTR_SIZE) != 0) { 854 if ((attr->ia_valid & ATTR_SIZE) != 0) {
847 inode->i_size = attr->ia_size; 855 inode->i_size = attr->ia_size;
@@ -1082,6 +1090,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
1082 (long long)NFS_FILEID(inode), status); 1090 (long long)NFS_FILEID(inode), status);
1083 goto out; 1091 goto out;
1084 } 1092 }
1093 spin_lock(&inode->i_lock);
1085 cache_validity = nfsi->cache_validity; 1094 cache_validity = nfsi->cache_validity;
1086 nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; 1095 nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE;
1087 1096
@@ -1091,6 +1100,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
1091 */ 1100 */
1092 if (verifier == nfsi->cache_change_attribute) 1101 if (verifier == nfsi->cache_change_attribute)
1093 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); 1102 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME);
1103 spin_unlock(&inode->i_lock);
1094 1104
1095 nfs_revalidate_mapping(inode, inode->i_mapping); 1105 nfs_revalidate_mapping(inode, inode->i_mapping);
1096 1106
@@ -1149,12 +1159,16 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
1149 nfs_wb_all(inode); 1159 nfs_wb_all(inode);
1150 } 1160 }
1151 invalidate_inode_pages2(mapping); 1161 invalidate_inode_pages2(mapping);
1162
1163 spin_lock(&inode->i_lock);
1152 nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; 1164 nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
1153 if (S_ISDIR(inode->i_mode)) { 1165 if (S_ISDIR(inode->i_mode)) {
1154 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); 1166 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
1155 /* This ensures we revalidate child dentries */ 1167 /* This ensures we revalidate child dentries */
1156 nfsi->cache_change_attribute++; 1168 nfsi->cache_change_attribute++;
1157 } 1169 }
1170 spin_unlock(&inode->i_lock);
1171
1158 dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", 1172 dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
1159 inode->i_sb->s_id, 1173 inode->i_sb->s_id,
1160 (long long)NFS_FILEID(inode)); 1174 (long long)NFS_FILEID(inode));
@@ -1184,10 +1198,12 @@ void nfs_end_data_update(struct inode *inode)
1184 1198
1185 if (!nfs_have_delegation(inode, FMODE_READ)) { 1199 if (!nfs_have_delegation(inode, FMODE_READ)) {
1186 /* Mark the attribute cache for revalidation */ 1200 /* Mark the attribute cache for revalidation */
1201 spin_lock(&inode->i_lock);
1187 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 1202 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
1188 /* Directories and symlinks: invalidate page cache too */ 1203 /* Directories and symlinks: invalidate page cache too */
1189 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 1204 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
1190 nfsi->cache_validity |= NFS_INO_INVALID_DATA; 1205 nfsi->cache_validity |= NFS_INO_INVALID_DATA;
1206 spin_unlock(&inode->i_lock);
1191 } 1207 }
1192 nfsi->cache_change_attribute ++; 1208 nfsi->cache_change_attribute ++;
1193 atomic_dec(&nfsi->data_updates); 1209 atomic_dec(&nfsi->data_updates);
@@ -1212,6 +1228,8 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
1212 if (nfs_have_delegation(inode, FMODE_READ)) 1228 if (nfs_have_delegation(inode, FMODE_READ))
1213 return 0; 1229 return 0;
1214 1230
1231 spin_lock(&inode->i_lock);
1232
1215 /* Are we in the process of updating data on the server? */ 1233 /* Are we in the process of updating data on the server? */
1216 data_unstable = nfs_caches_unstable(inode); 1234 data_unstable = nfs_caches_unstable(inode);
1217 1235
@@ -1226,13 +1244,17 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
1226 } 1244 }
1227 } 1245 }
1228 1246
1229 if ((fattr->valid & NFS_ATTR_FATTR) == 0) 1247 if ((fattr->valid & NFS_ATTR_FATTR) == 0) {
1248 spin_unlock(&inode->i_lock);
1230 return 0; 1249 return 0;
1250 }
1231 1251
1232 /* Has the inode gone and changed behind our back? */ 1252 /* Has the inode gone and changed behind our back? */
1233 if (nfsi->fileid != fattr->fileid 1253 if (nfsi->fileid != fattr->fileid
1234 || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) 1254 || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
1255 spin_unlock(&inode->i_lock);
1235 return -EIO; 1256 return -EIO;
1257 }
1236 1258
1237 cur_size = i_size_read(inode); 1259 cur_size = i_size_read(inode);
1238 new_isize = nfs_size_to_loff_t(fattr->size); 1260 new_isize = nfs_size_to_loff_t(fattr->size);
@@ -1271,6 +1293,7 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
1271 nfsi->cache_validity |= NFS_INO_INVALID_ATIME; 1293 nfsi->cache_validity |= NFS_INO_INVALID_ATIME;
1272 1294
1273 nfsi->read_cache_jiffies = fattr->timestamp; 1295 nfsi->read_cache_jiffies = fattr->timestamp;
1296 spin_unlock(&inode->i_lock);
1274 return 0; 1297 return 0;
1275} 1298}
1276 1299
@@ -1309,11 +1332,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
1309 goto out_err; 1332 goto out_err;
1310 } 1333 }
1311 1334
1335 spin_lock(&inode->i_lock);
1336
1312 /* 1337 /*
1313 * Make sure the inode's type hasn't changed. 1338 * Make sure the inode's type hasn't changed.
1314 */ 1339 */
1315 if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) 1340 if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
1341 spin_unlock(&inode->i_lock);
1316 goto out_changed; 1342 goto out_changed;
1343 }
1317 1344
1318 /* 1345 /*
1319 * Update the read time so we don't revalidate too often. 1346 * Update the read time so we don't revalidate too often.
@@ -1406,6 +1433,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
1406 if (!nfs_have_delegation(inode, FMODE_READ)) 1433 if (!nfs_have_delegation(inode, FMODE_READ))
1407 nfsi->cache_validity |= invalid; 1434 nfsi->cache_validity |= invalid;
1408 1435
1436 spin_unlock(&inode->i_lock);
1409 return 0; 1437 return 0;
1410 out_changed: 1438 out_changed:
1411 /* 1439 /*
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index a020e650ffc2..6a5bbc0ae941 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -308,7 +308,9 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
308 nfs_begin_data_update(inode); 308 nfs_begin_data_update(inode);
309 status = rpc_call(server->client_acl, ACLPROC3_SETACL, 309 status = rpc_call(server->client_acl, ACLPROC3_SETACL,
310 &args, &fattr, 0); 310 &args, &fattr, 0);
311 spin_lock(&inode->i_lock);
311 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; 312 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS;
313 spin_unlock(&inode->i_lock);
312 nfs_end_data_update(inode); 314 nfs_end_data_update(inode);
313 dprintk("NFS reply setacl: %d\n", status); 315 dprintk("NFS reply setacl: %d\n", status);
314 316
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 90df0500ca1b..6ceb1d471f20 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -140,7 +140,9 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
140 if (rdata->res.eof != 0 || result == 0) 140 if (rdata->res.eof != 0 || result == 0)
141 break; 141 break;
142 } while (count); 142 } while (count);
143 spin_lock(&inode->i_lock);
143 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; 144 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
145 spin_unlock(&inode->i_lock);
144 146
145 if (count) 147 if (count)
146 memclear_highpage_flush(page, rdata->args.pgbase, count); 148 memclear_highpage_flush(page, rdata->args.pgbase, count);
@@ -473,7 +475,9 @@ void nfs_readpage_result(struct rpc_task *task)
473 } 475 }
474 task->tk_status = -EIO; 476 task->tk_status = -EIO;
475 } 477 }
478 spin_lock(&data->inode->i_lock);
476 NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; 479 NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
480 spin_unlock(&data->inode->i_lock);
477 data->complete(data, status); 481 data->complete(data, status);
478} 482}
479 483
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index deef9567788a..9a6047ff1b25 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -238,8 +238,11 @@ static inline int nfs_caches_unstable(struct inode *inode)
238 238
239static inline void NFS_CACHEINV(struct inode *inode) 239static inline void NFS_CACHEINV(struct inode *inode)
240{ 240{
241 if (!nfs_caches_unstable(inode)) 241 if (!nfs_caches_unstable(inode)) {
242 spin_lock(&inode->i_lock);
242 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; 243 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
244 spin_unlock(&inode->i_lock);
245 }
243} 246}
244 247
245static inline int nfs_server_capable(struct inode *inode, int cap) 248static inline int nfs_server_capable(struct inode *inode, int cap)