diff options
author | Chuck Lever <cel@citi.umich.edu> | 2005-08-18 14:24:12 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-08-18 15:53:57 -0400 |
commit | dc59250c6ebed099a9bc0a11298e2281dd896657 (patch) | |
tree | 80c294437c0868d90abfa617d873370e6dbe6565 | |
parent | 412d582ec1dd59aab2353f8cb7e74f2c79cd20b9 (diff) |
[PATCH] NFS: Introduce the use of inode->i_lock to protect fields in nfsi
Down the road we want to eliminate the use of the global kernel lock entirely
from the NFS client. To do this, we need to protect the fields in the
nfs_inode structure adequately. Start by serializing updates to the
"cache_validity" field.
Note this change addresses an SMP hang found by njw@osdl.org, where processes
deadlock because nfs_end_data_update and nfs_revalidate_mapping update the
"cache_validity" field without proper serialization.
Test plan:
Millions of fsx ops on SMP clients. Run Nick Wilson's breaknfs program on
large SMP clients.
Signed-off-by: Chuck Lever <cel@netapp.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | fs/nfs/dir.c | 7 | ||||
-rw-r--r-- | fs/nfs/inode.c | 34 | ||||
-rw-r--r-- | fs/nfs/nfs3acl.c | 2 | ||||
-rw-r--r-- | fs/nfs/read.c | 4 | ||||
-rw-r--r-- | include/linux/nfs_fs.h | 5 |
5 files changed, 48 insertions, 4 deletions
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 27cf5577f239..147cbf9261ce 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -189,7 +189,9 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) | |||
189 | goto error; | 189 | goto error; |
190 | } | 190 | } |
191 | SetPageUptodate(page); | 191 | SetPageUptodate(page); |
192 | spin_lock(&inode->i_lock); | ||
192 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; | 193 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; |
194 | spin_unlock(&inode->i_lock); | ||
193 | /* Ensure consistent page alignment of the data. | 195 | /* Ensure consistent page alignment of the data. |
194 | * Note: assumes we have exclusive access to this mapping either | 196 | * Note: assumes we have exclusive access to this mapping either |
195 | * through inode->i_sem or some other mechanism. | 197 | * through inode->i_sem or some other mechanism. |
@@ -462,7 +464,9 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
462 | page, | 464 | page, |
463 | NFS_SERVER(inode)->dtsize, | 465 | NFS_SERVER(inode)->dtsize, |
464 | desc->plus); | 466 | desc->plus); |
467 | spin_lock(&inode->i_lock); | ||
465 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; | 468 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; |
469 | spin_unlock(&inode->i_lock); | ||
466 | desc->page = page; | 470 | desc->page = page; |
467 | desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ | 471 | desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ |
468 | if (desc->error >= 0) { | 472 | if (desc->error >= 0) { |
@@ -1596,7 +1600,10 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) | |||
1596 | put_rpccred(cache->cred); | 1600 | put_rpccred(cache->cred); |
1597 | cache->cred = get_rpccred(set->cred); | 1601 | cache->cred = get_rpccred(set->cred); |
1598 | } | 1602 | } |
1603 | /* FIXME: replace current access_cache BKL reliance with inode->i_lock */ | ||
1604 | spin_lock(&inode->i_lock); | ||
1599 | nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; | 1605 | nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; |
1606 | spin_unlock(&inode->i_lock); | ||
1600 | cache->jiffies = set->jiffies; | 1607 | cache->jiffies = set->jiffies; |
1601 | cache->mask = set->mask; | 1608 | cache->mask = set->mask; |
1602 | } | 1609 | } |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index ee27578277f3..541b418327c8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -615,6 +615,8 @@ nfs_zap_caches(struct inode *inode) | |||
615 | struct nfs_inode *nfsi = NFS_I(inode); | 615 | struct nfs_inode *nfsi = NFS_I(inode); |
616 | int mode = inode->i_mode; | 616 | int mode = inode->i_mode; |
617 | 617 | ||
618 | spin_lock(&inode->i_lock); | ||
619 | |||
618 | NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); | 620 | NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); |
619 | NFS_ATTRTIMEO_UPDATE(inode) = jiffies; | 621 | NFS_ATTRTIMEO_UPDATE(inode) = jiffies; |
620 | 622 | ||
@@ -623,6 +625,8 @@ nfs_zap_caches(struct inode *inode) | |||
623 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; | 625 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; |
624 | else | 626 | else |
625 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; | 627 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; |
628 | |||
629 | spin_unlock(&inode->i_lock); | ||
626 | } | 630 | } |
627 | 631 | ||
628 | static void nfs_zap_acl_cache(struct inode *inode) | 632 | static void nfs_zap_acl_cache(struct inode *inode) |
@@ -632,7 +636,9 @@ static void nfs_zap_acl_cache(struct inode *inode) | |||
632 | clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache; | 636 | clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache; |
633 | if (clear_acl_cache != NULL) | 637 | if (clear_acl_cache != NULL) |
634 | clear_acl_cache(inode); | 638 | clear_acl_cache(inode); |
639 | spin_lock(&inode->i_lock); | ||
635 | NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL; | 640 | NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL; |
641 | spin_unlock(&inode->i_lock); | ||
636 | } | 642 | } |
637 | 643 | ||
638 | /* | 644 | /* |
@@ -841,7 +847,9 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) | |||
841 | inode->i_uid = attr->ia_uid; | 847 | inode->i_uid = attr->ia_uid; |
842 | if ((attr->ia_valid & ATTR_GID) != 0) | 848 | if ((attr->ia_valid & ATTR_GID) != 0) |
843 | inode->i_gid = attr->ia_gid; | 849 | inode->i_gid = attr->ia_gid; |
850 | spin_lock(&inode->i_lock); | ||
844 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 851 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
852 | spin_unlock(&inode->i_lock); | ||
845 | } | 853 | } |
846 | if ((attr->ia_valid & ATTR_SIZE) != 0) { | 854 | if ((attr->ia_valid & ATTR_SIZE) != 0) { |
847 | inode->i_size = attr->ia_size; | 855 | inode->i_size = attr->ia_size; |
@@ -1082,6 +1090,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
1082 | (long long)NFS_FILEID(inode), status); | 1090 | (long long)NFS_FILEID(inode), status); |
1083 | goto out; | 1091 | goto out; |
1084 | } | 1092 | } |
1093 | spin_lock(&inode->i_lock); | ||
1085 | cache_validity = nfsi->cache_validity; | 1094 | cache_validity = nfsi->cache_validity; |
1086 | nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; | 1095 | nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; |
1087 | 1096 | ||
@@ -1091,6 +1100,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
1091 | */ | 1100 | */ |
1092 | if (verifier == nfsi->cache_change_attribute) | 1101 | if (verifier == nfsi->cache_change_attribute) |
1093 | nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); | 1102 | nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); |
1103 | spin_unlock(&inode->i_lock); | ||
1094 | 1104 | ||
1095 | nfs_revalidate_mapping(inode, inode->i_mapping); | 1105 | nfs_revalidate_mapping(inode, inode->i_mapping); |
1096 | 1106 | ||
@@ -1149,12 +1159,16 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) | |||
1149 | nfs_wb_all(inode); | 1159 | nfs_wb_all(inode); |
1150 | } | 1160 | } |
1151 | invalidate_inode_pages2(mapping); | 1161 | invalidate_inode_pages2(mapping); |
1162 | |||
1163 | spin_lock(&inode->i_lock); | ||
1152 | nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; | 1164 | nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; |
1153 | if (S_ISDIR(inode->i_mode)) { | 1165 | if (S_ISDIR(inode->i_mode)) { |
1154 | memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); | 1166 | memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); |
1155 | /* This ensures we revalidate child dentries */ | 1167 | /* This ensures we revalidate child dentries */ |
1156 | nfsi->cache_change_attribute++; | 1168 | nfsi->cache_change_attribute++; |
1157 | } | 1169 | } |
1170 | spin_unlock(&inode->i_lock); | ||
1171 | |||
1158 | dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", | 1172 | dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", |
1159 | inode->i_sb->s_id, | 1173 | inode->i_sb->s_id, |
1160 | (long long)NFS_FILEID(inode)); | 1174 | (long long)NFS_FILEID(inode)); |
@@ -1184,10 +1198,12 @@ void nfs_end_data_update(struct inode *inode) | |||
1184 | 1198 | ||
1185 | if (!nfs_have_delegation(inode, FMODE_READ)) { | 1199 | if (!nfs_have_delegation(inode, FMODE_READ)) { |
1186 | /* Mark the attribute cache for revalidation */ | 1200 | /* Mark the attribute cache for revalidation */ |
1201 | spin_lock(&inode->i_lock); | ||
1187 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR; | 1202 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR; |
1188 | /* Directories and symlinks: invalidate page cache too */ | 1203 | /* Directories and symlinks: invalidate page cache too */ |
1189 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 1204 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
1190 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | 1205 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; |
1206 | spin_unlock(&inode->i_lock); | ||
1191 | } | 1207 | } |
1192 | nfsi->cache_change_attribute ++; | 1208 | nfsi->cache_change_attribute ++; |
1193 | atomic_dec(&nfsi->data_updates); | 1209 | atomic_dec(&nfsi->data_updates); |
@@ -1212,6 +1228,8 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1212 | if (nfs_have_delegation(inode, FMODE_READ)) | 1228 | if (nfs_have_delegation(inode, FMODE_READ)) |
1213 | return 0; | 1229 | return 0; |
1214 | 1230 | ||
1231 | spin_lock(&inode->i_lock); | ||
1232 | |||
1215 | /* Are we in the process of updating data on the server? */ | 1233 | /* Are we in the process of updating data on the server? */ |
1216 | data_unstable = nfs_caches_unstable(inode); | 1234 | data_unstable = nfs_caches_unstable(inode); |
1217 | 1235 | ||
@@ -1226,13 +1244,17 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1226 | } | 1244 | } |
1227 | } | 1245 | } |
1228 | 1246 | ||
1229 | if ((fattr->valid & NFS_ATTR_FATTR) == 0) | 1247 | if ((fattr->valid & NFS_ATTR_FATTR) == 0) { |
1248 | spin_unlock(&inode->i_lock); | ||
1230 | return 0; | 1249 | return 0; |
1250 | } | ||
1231 | 1251 | ||
1232 | /* Has the inode gone and changed behind our back? */ | 1252 | /* Has the inode gone and changed behind our back? */ |
1233 | if (nfsi->fileid != fattr->fileid | 1253 | if (nfsi->fileid != fattr->fileid |
1234 | || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) | 1254 | || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { |
1255 | spin_unlock(&inode->i_lock); | ||
1235 | return -EIO; | 1256 | return -EIO; |
1257 | } | ||
1236 | 1258 | ||
1237 | cur_size = i_size_read(inode); | 1259 | cur_size = i_size_read(inode); |
1238 | new_isize = nfs_size_to_loff_t(fattr->size); | 1260 | new_isize = nfs_size_to_loff_t(fattr->size); |
@@ -1271,6 +1293,7 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1271 | nfsi->cache_validity |= NFS_INO_INVALID_ATIME; | 1293 | nfsi->cache_validity |= NFS_INO_INVALID_ATIME; |
1272 | 1294 | ||
1273 | nfsi->read_cache_jiffies = fattr->timestamp; | 1295 | nfsi->read_cache_jiffies = fattr->timestamp; |
1296 | spin_unlock(&inode->i_lock); | ||
1274 | return 0; | 1297 | return 0; |
1275 | } | 1298 | } |
1276 | 1299 | ||
@@ -1309,11 +1332,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign | |||
1309 | goto out_err; | 1332 | goto out_err; |
1310 | } | 1333 | } |
1311 | 1334 | ||
1335 | spin_lock(&inode->i_lock); | ||
1336 | |||
1312 | /* | 1337 | /* |
1313 | * Make sure the inode's type hasn't changed. | 1338 | * Make sure the inode's type hasn't changed. |
1314 | */ | 1339 | */ |
1315 | if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) | 1340 | if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { |
1341 | spin_unlock(&inode->i_lock); | ||
1316 | goto out_changed; | 1342 | goto out_changed; |
1343 | } | ||
1317 | 1344 | ||
1318 | /* | 1345 | /* |
1319 | * Update the read time so we don't revalidate too often. | 1346 | * Update the read time so we don't revalidate too often. |
@@ -1406,6 +1433,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign | |||
1406 | if (!nfs_have_delegation(inode, FMODE_READ)) | 1433 | if (!nfs_have_delegation(inode, FMODE_READ)) |
1407 | nfsi->cache_validity |= invalid; | 1434 | nfsi->cache_validity |= invalid; |
1408 | 1435 | ||
1436 | spin_unlock(&inode->i_lock); | ||
1409 | return 0; | 1437 | return 0; |
1410 | out_changed: | 1438 | out_changed: |
1411 | /* | 1439 | /* |
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index a020e650ffc2..6a5bbc0ae941 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c | |||
@@ -308,7 +308,9 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, | |||
308 | nfs_begin_data_update(inode); | 308 | nfs_begin_data_update(inode); |
309 | status = rpc_call(server->client_acl, ACLPROC3_SETACL, | 309 | status = rpc_call(server->client_acl, ACLPROC3_SETACL, |
310 | &args, &fattr, 0); | 310 | &args, &fattr, 0); |
311 | spin_lock(&inode->i_lock); | ||
311 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; | 312 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; |
313 | spin_unlock(&inode->i_lock); | ||
312 | nfs_end_data_update(inode); | 314 | nfs_end_data_update(inode); |
313 | dprintk("NFS reply setacl: %d\n", status); | 315 | dprintk("NFS reply setacl: %d\n", status); |
314 | 316 | ||
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 90df0500ca1b..6ceb1d471f20 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -140,7 +140,9 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, | |||
140 | if (rdata->res.eof != 0 || result == 0) | 140 | if (rdata->res.eof != 0 || result == 0) |
141 | break; | 141 | break; |
142 | } while (count); | 142 | } while (count); |
143 | spin_lock(&inode->i_lock); | ||
143 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; | 144 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; |
145 | spin_unlock(&inode->i_lock); | ||
144 | 146 | ||
145 | if (count) | 147 | if (count) |
146 | memclear_highpage_flush(page, rdata->args.pgbase, count); | 148 | memclear_highpage_flush(page, rdata->args.pgbase, count); |
@@ -473,7 +475,9 @@ void nfs_readpage_result(struct rpc_task *task) | |||
473 | } | 475 | } |
474 | task->tk_status = -EIO; | 476 | task->tk_status = -EIO; |
475 | } | 477 | } |
478 | spin_lock(&data->inode->i_lock); | ||
476 | NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; | 479 | NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; |
480 | spin_unlock(&data->inode->i_lock); | ||
477 | data->complete(data, status); | 481 | data->complete(data, status); |
478 | } | 482 | } |
479 | 483 | ||
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index deef9567788a..9a6047ff1b25 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
@@ -238,8 +238,11 @@ static inline int nfs_caches_unstable(struct inode *inode) | |||
238 | 238 | ||
239 | static inline void NFS_CACHEINV(struct inode *inode) | 239 | static inline void NFS_CACHEINV(struct inode *inode) |
240 | { | 240 | { |
241 | if (!nfs_caches_unstable(inode)) | 241 | if (!nfs_caches_unstable(inode)) { |
242 | spin_lock(&inode->i_lock); | ||
242 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; | 243 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; |
244 | spin_unlock(&inode->i_lock); | ||
245 | } | ||
243 | } | 246 | } |
244 | 247 | ||
245 | static inline int nfs_server_capable(struct inode *inode, int cap) | 248 | static inline int nfs_server_capable(struct inode *inode, int cap) |