aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTrond Myklebust <trond.myklebust@primarydata.com>2015-07-09 12:40:01 -0400
committerTrond Myklebust <trond.myklebust@primarydata.com>2015-07-11 10:16:16 -0400
commit7f27392cd4cbcadeeaff9eedebcaec8fae9aec8e (patch)
treec7533a097acf5501b5a0451cc713feb3bcefc597
parentdf9cecc1a36d70ec09d34e83bad452064754fdc4 (diff)
pNFS: Fix races between return-on-close and layoutreturn.
If one or more of the layout segments reports an error during I/O, then we may have to send a layoutreturn to report the error back to the NFS metadata server. This patch ensures that the return-on-close code can detect the outstanding layoutreturn, and not preempt it. Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
-rw-r--r--fs/nfs/nfs4proc.c2
-rw-r--r--fs/nfs/pnfs.c63
2 files changed, 35 insertions, 30 deletions
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 671498ca36d7..c5c9e0d070f8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7972,8 +7972,6 @@ static void nfs4_layoutreturn_release(void *calldata)
7972 pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); 7972 pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
7973 pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range); 7973 pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range);
7974 pnfs_clear_layoutreturn_waitbit(lo); 7974 pnfs_clear_layoutreturn_waitbit(lo);
7975 clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
7976 rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
7977 lo->plh_block_lgets--; 7975 lo->plh_block_lgets--;
7978 spin_unlock(&lo->plh_inode->i_lock); 7976 spin_unlock(&lo->plh_inode->i_lock);
7979 pnfs_free_lseg_list(&freeme); 7977 pnfs_free_lseg_list(&freeme);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8e9f467e409c..27e2bcaa88da 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -352,7 +352,7 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
352{ 352{
353 struct pnfs_layout_segment *s; 353 struct pnfs_layout_segment *s;
354 354
355 if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) 355 if (!test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
356 return false; 356 return false;
357 357
358 list_for_each_entry(s, &lo->plh_segs, pls_list) 358 list_for_each_entry(s, &lo->plh_segs, pls_list)
@@ -362,6 +362,18 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
362 return true; 362 return true;
363} 363}
364 364
365static bool
366pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
367{
368 if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
369 return false;
370 lo->plh_return_iomode = 0;
371 lo->plh_block_lgets++;
372 pnfs_get_layout_hdr(lo);
373 clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
374 return true;
375}
376
365static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg, 377static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
366 struct pnfs_layout_hdr *lo, struct inode *inode) 378 struct pnfs_layout_hdr *lo, struct inode *inode)
367{ 379{
@@ -372,17 +384,16 @@ static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
372 if (pnfs_layout_need_return(lo, lseg)) { 384 if (pnfs_layout_need_return(lo, lseg)) {
373 nfs4_stateid stateid; 385 nfs4_stateid stateid;
374 enum pnfs_iomode iomode; 386 enum pnfs_iomode iomode;
387 bool send;
375 388
376 stateid = lo->plh_stateid; 389 stateid = lo->plh_stateid;
377 iomode = lo->plh_return_iomode; 390 iomode = lo->plh_return_iomode;
378 /* decreased in pnfs_send_layoutreturn() */ 391 send = pnfs_prepare_layoutreturn(lo);
379 lo->plh_block_lgets++;
380 lo->plh_return_iomode = 0;
381 spin_unlock(&inode->i_lock); 392 spin_unlock(&inode->i_lock);
382 pnfs_get_layout_hdr(lo); 393 if (send) {
383 394 /* Send an async layoutreturn so we dont deadlock */
384 /* Send an async layoutreturn so we dont deadlock */ 395 pnfs_send_layoutreturn(lo, stateid, iomode, false);
385 pnfs_send_layoutreturn(lo, stateid, iomode, false); 396 }
386 } else 397 } else
387 spin_unlock(&inode->i_lock); 398 spin_unlock(&inode->i_lock);
388} 399}
@@ -924,6 +935,7 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
924 clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); 935 clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags);
925 smp_mb__after_atomic(); 936 smp_mb__after_atomic();
926 wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); 937 wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
938 rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
927} 939}
928 940
929static int 941static int
@@ -978,6 +990,7 @@ _pnfs_return_layout(struct inode *ino)
978 LIST_HEAD(tmp_list); 990 LIST_HEAD(tmp_list);
979 nfs4_stateid stateid; 991 nfs4_stateid stateid;
980 int status = 0, empty; 992 int status = 0, empty;
993 bool send;
981 994
982 dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); 995 dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino);
983 996
@@ -1007,17 +1020,18 @@ _pnfs_return_layout(struct inode *ino)
1007 /* Don't send a LAYOUTRETURN if list was initially empty */ 1020 /* Don't send a LAYOUTRETURN if list was initially empty */
1008 if (empty) { 1021 if (empty) {
1009 spin_unlock(&ino->i_lock); 1022 spin_unlock(&ino->i_lock);
1010 pnfs_put_layout_hdr(lo);
1011 dprintk("NFS: %s no layout segments to return\n", __func__); 1023 dprintk("NFS: %s no layout segments to return\n", __func__);
1012 goto out; 1024 goto out_put_layout_hdr;
1013 } 1025 }
1014 1026
1015 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 1027 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
1016 lo->plh_block_lgets++; 1028 send = pnfs_prepare_layoutreturn(lo);
1017 spin_unlock(&ino->i_lock); 1029 spin_unlock(&ino->i_lock);
1018 pnfs_free_lseg_list(&tmp_list); 1030 pnfs_free_lseg_list(&tmp_list);
1019 1031 if (send)
1020 status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); 1032 status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true);
1033out_put_layout_hdr:
1034 pnfs_put_layout_hdr(lo);
1021out: 1035out:
1022 dprintk("<-- %s status: %d\n", __func__, status); 1036 dprintk("<-- %s status: %d\n", __func__, status);
1023 return status; 1037 return status;
@@ -1097,13 +1111,9 @@ bool pnfs_roc(struct inode *ino)
1097out_noroc: 1111out_noroc:
1098 if (lo) { 1112 if (lo) {
1099 stateid = lo->plh_stateid; 1113 stateid = lo->plh_stateid;
1100 layoutreturn = 1114 if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
1101 test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 1115 &lo->plh_flags))
1102 &lo->plh_flags); 1116 layoutreturn = pnfs_prepare_layoutreturn(lo);
1103 if (layoutreturn) {
1104 lo->plh_block_lgets++;
1105 pnfs_get_layout_hdr(lo);
1106 }
1107 } 1117 }
1108 spin_unlock(&ino->i_lock); 1118 spin_unlock(&ino->i_lock);
1109 if (layoutreturn) { 1119 if (layoutreturn) {
@@ -1163,16 +1173,14 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task)
1163 */ 1173 */
1164 *barrier = current_seqid + atomic_read(&lo->plh_outstanding); 1174 *barrier = current_seqid + atomic_read(&lo->plh_outstanding);
1165 stateid = lo->plh_stateid; 1175 stateid = lo->plh_stateid;
1166 layoutreturn = test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 1176 if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
1167 &lo->plh_flags); 1177 &lo->plh_flags))
1168 if (layoutreturn) { 1178 layoutreturn = pnfs_prepare_layoutreturn(lo);
1169 lo->plh_block_lgets++; 1179 if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
1170 pnfs_get_layout_hdr(lo); 1180 rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
1171 }
1172 1181
1173 spin_unlock(&ino->i_lock); 1182 spin_unlock(&ino->i_lock);
1174 if (layoutreturn) { 1183 if (layoutreturn) {
1175 rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
1176 pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false); 1184 pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false);
1177 return true; 1185 return true;
1178 } 1186 }
@@ -1693,7 +1701,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
1693 spin_lock(&inode->i_lock); 1701 spin_lock(&inode->i_lock);
1694 /* set failure bit so that pnfs path will be retried later */ 1702 /* set failure bit so that pnfs path will be retried later */
1695 pnfs_layout_set_fail_bit(lo, iomode); 1703 pnfs_layout_set_fail_bit(lo, iomode);
1696 set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
1697 if (lo->plh_return_iomode == 0) 1704 if (lo->plh_return_iomode == 0)
1698 lo->plh_return_iomode = range.iomode; 1705 lo->plh_return_iomode = range.iomode;
1699 else if (lo->plh_return_iomode != range.iomode) 1706 else if (lo->plh_return_iomode != range.iomode)