aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTrond Myklebust <Trond.Myklebust@netapp.com>2006-03-20 13:44:51 -0500
committerTrond Myklebust <Trond.Myklebust@netapp.com>2006-03-20 13:44:51 -0500
commitc42de9dd67250fe984e0e31c9b542d721af6454b (patch)
treea4079b25a044f7124837f572e5342dc7614ca75d
parent7d46a49f512e8d10b23353781a8ba85edd4fa640 (diff)
NFS: Fix a race in nfs_sync_inode()
Kudos to Neil Brown for spotting the problem: "in nfs_sync_inode, there is effectively the sequence: nfs_wait_on_requests nfs_flush_inode nfs_commit_inode This seems a bit racy to me as if the only requests are on the ->commit list, and nfs_commit_inode is called separately after nfs_wait_on_requests completes, and before nfs_commit_inode start (say: by nfs_write_inode) then none of these function will return >0, yet there will be some pending request that aren't waited for." The solution is to search for requests to wait upon, search for dirty requests, and search for uncommitted requests while holding the nfsi->req_lock The patch also cleans up nfs_sync_inode(), getting rid of the redundant FLUSH_WAIT flag. It turns out that we were always setting it. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r--fs/nfs/inode.c4
-rw-r--r--fs/nfs/write.c72
-rw-r--r--include/linux/nfs_fs.h10
3 files changed, 56 insertions, 30 deletions
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a0cda53461b3..60aac58270a8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -137,7 +137,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
137static int 137static int
138nfs_write_inode(struct inode *inode, int sync) 138nfs_write_inode(struct inode *inode, int sync)
139{ 139{
140 int flags = sync ? FLUSH_WAIT : 0; 140 int flags = sync ? FLUSH_SYNC : 0;
141 int ret; 141 int ret;
142 142
143 ret = nfs_commit_inode(inode, flags); 143 ret = nfs_commit_inode(inode, flags);
@@ -1051,7 +1051,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1051 int err; 1051 int err;
1052 1052
1053 /* Flush out writes to the server in order to update c/mtime */ 1053 /* Flush out writes to the server in order to update c/mtime */
1054 nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT); 1054 nfs_sync_inode_wait(inode, 0, 0, FLUSH_NOCOMMIT);
1055 1055
1056 /* 1056 /*
1057 * We may force a getattr if the user cares about atime. 1057 * We may force a getattr if the user cares about atime.
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2beb1268bb1b..3f5225404c97 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -539,8 +539,7 @@ nfs_mark_request_commit(struct nfs_page *req)
539 * 539 *
540 * Interruptible by signals only if mounted with intr flag. 540 * Interruptible by signals only if mounted with intr flag.
541 */ 541 */
542static int 542static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_start, unsigned int npages)
543nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
544{ 543{
545 struct nfs_inode *nfsi = NFS_I(inode); 544 struct nfs_inode *nfsi = NFS_I(inode);
546 struct nfs_page *req; 545 struct nfs_page *req;
@@ -553,7 +552,6 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
553 else 552 else
554 idx_end = idx_start + npages - 1; 553 idx_end = idx_start + npages - 1;
555 554
556 spin_lock(&nfsi->req_lock);
557 next = idx_start; 555 next = idx_start;
558 while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) { 556 while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
559 if (req->wb_index > idx_end) 557 if (req->wb_index > idx_end)
@@ -566,15 +564,25 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
566 spin_unlock(&nfsi->req_lock); 564 spin_unlock(&nfsi->req_lock);
567 error = nfs_wait_on_request(req); 565 error = nfs_wait_on_request(req);
568 nfs_release_request(req); 566 nfs_release_request(req);
567 spin_lock(&nfsi->req_lock);
569 if (error < 0) 568 if (error < 0)
570 return error; 569 return error;
571 spin_lock(&nfsi->req_lock);
572 res++; 570 res++;
573 } 571 }
574 spin_unlock(&nfsi->req_lock);
575 return res; 572 return res;
576} 573}
577 574
575static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
576{
577 struct nfs_inode *nfsi = NFS_I(inode);
578 int ret;
579
580 spin_lock(&nfsi->req_lock);
581 ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
582 spin_unlock(&nfsi->req_lock);
583 return ret;
584}
585
578/* 586/*
579 * nfs_scan_dirty - Scan an inode for dirty requests 587 * nfs_scan_dirty - Scan an inode for dirty requests
580 * @inode: NFS inode to scan 588 * @inode: NFS inode to scan
@@ -626,6 +634,11 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st
626 } 634 }
627 return res; 635 return res;
628} 636}
637#else
638static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
639{
640 return 0;
641}
629#endif 642#endif
630 643
631static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) 644static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
@@ -1421,6 +1434,11 @@ static const struct rpc_call_ops nfs_commit_ops = {
1421 .rpc_call_done = nfs_commit_done, 1434 .rpc_call_done = nfs_commit_done,
1422 .rpc_release = nfs_commit_release, 1435 .rpc_release = nfs_commit_release,
1423}; 1436};
1437#else
1438static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1439{
1440 return 0;
1441}
1424#endif 1442#endif
1425 1443
1426static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, 1444static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
@@ -1460,28 +1478,38 @@ int nfs_commit_inode(struct inode *inode, int how)
1460} 1478}
1461#endif 1479#endif
1462 1480
1463int nfs_sync_inode(struct inode *inode, unsigned long idx_start, 1481int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
1464 unsigned int npages, int how) 1482 unsigned int npages, int how)
1465{ 1483{
1484 struct nfs_inode *nfsi = NFS_I(inode);
1485 LIST_HEAD(head);
1466 int nocommit = how & FLUSH_NOCOMMIT; 1486 int nocommit = how & FLUSH_NOCOMMIT;
1467 int wait = how & FLUSH_WAIT; 1487 int pages, ret;
1468 int error;
1469
1470 how &= ~(FLUSH_WAIT|FLUSH_NOCOMMIT);
1471 1488
1489 how &= ~FLUSH_NOCOMMIT;
1490 spin_lock(&nfsi->req_lock);
1472 do { 1491 do {
1473 if (wait) { 1492 ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
1474 error = nfs_wait_on_requests(inode, idx_start, npages); 1493 if (ret != 0)
1475 if (error != 0)
1476 continue;
1477 }
1478 error = nfs_flush_inode(inode, idx_start, npages, how);
1479 if (error != 0)
1480 continue; 1494 continue;
1481 if (!nocommit) 1495 pages = nfs_scan_dirty(inode, &head, idx_start, npages);
1482 error = nfs_commit_inode(inode, how); 1496 if (pages != 0) {
1483 } while (error > 0); 1497 spin_unlock(&nfsi->req_lock);
1484 return error; 1498 ret = nfs_flush_list(inode, &head, pages, how);
1499 spin_lock(&nfsi->req_lock);
1500 continue;
1501 }
1502 if (nocommit)
1503 break;
1504 pages = nfs_scan_commit(inode, &head, 0, 0);
1505 if (pages == 0)
1506 break;
1507 spin_unlock(&nfsi->req_lock);
1508 ret = nfs_commit_list(inode, &head, how);
1509 spin_lock(&nfsi->req_lock);
1510 } while (ret >= 0);
1511 spin_unlock(&nfsi->req_lock);
1512 return ret;
1485} 1513}
1486 1514
1487int nfs_init_writepagecache(void) 1515int nfs_init_writepagecache(void)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 55de0770df4a..cbebd7d1b9e8 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -56,9 +56,7 @@
56 * When flushing a cluster of dirty pages, there can be different 56 * When flushing a cluster of dirty pages, there can be different
57 * strategies: 57 * strategies:
58 */ 58 */
59#define FLUSH_AGING 0 /* only flush old buffers */
60#define FLUSH_SYNC 1 /* file being synced, or contention */ 59#define FLUSH_SYNC 1 /* file being synced, or contention */
61#define FLUSH_WAIT 2 /* wait for completion */
62#define FLUSH_STABLE 4 /* commit to stable storage */ 60#define FLUSH_STABLE 4 /* commit to stable storage */
63#define FLUSH_LOWPRI 8 /* low priority background flush */ 61#define FLUSH_LOWPRI 8 /* low priority background flush */
64#define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */ 62#define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */
@@ -419,7 +417,7 @@ void nfs_commit_free(struct nfs_write_data *p);
419 * Try to write back everything synchronously (but check the 417 * Try to write back everything synchronously (but check the
420 * return value!) 418 * return value!)
421 */ 419 */
422extern int nfs_sync_inode(struct inode *, unsigned long, unsigned int, int); 420extern int nfs_sync_inode_wait(struct inode *, unsigned long, unsigned int, int);
423#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 421#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
424extern int nfs_commit_inode(struct inode *, int); 422extern int nfs_commit_inode(struct inode *, int);
425extern void nfs_commit_release(void *wdata); 423extern void nfs_commit_release(void *wdata);
@@ -440,7 +438,7 @@ nfs_have_writebacks(struct inode *inode)
440static inline int 438static inline int
441nfs_wb_all(struct inode *inode) 439nfs_wb_all(struct inode *inode)
442{ 440{
443 int error = nfs_sync_inode(inode, 0, 0, FLUSH_WAIT); 441 int error = nfs_sync_inode_wait(inode, 0, 0, 0);
444 return (error < 0) ? error : 0; 442 return (error < 0) ? error : 0;
445} 443}
446 444
@@ -449,8 +447,8 @@ nfs_wb_all(struct inode *inode)
449 */ 447 */
450static inline int nfs_wb_page_priority(struct inode *inode, struct page* page, int how) 448static inline int nfs_wb_page_priority(struct inode *inode, struct page* page, int how)
451{ 449{
452 int error = nfs_sync_inode(inode, page->index, 1, 450 int error = nfs_sync_inode_wait(inode, page->index, 1,
453 how | FLUSH_WAIT | FLUSH_STABLE); 451 how | FLUSH_STABLE);
454 return (error < 0) ? error : 0; 452 return (error < 0) ? error : 0;
455} 453}
456 454