diff options
| author | Trond Myklebust <trond.myklebust@primarydata.com> | 2015-07-09 12:40:01 -0400 |
|---|---|---|
| committer | Trond Myklebust <trond.myklebust@primarydata.com> | 2015-07-11 10:16:16 -0400 |
| commit | 7f27392cd4cbcadeeaff9eedebcaec8fae9aec8e (patch) | |
| tree | c7533a097acf5501b5a0451cc713feb3bcefc597 | |
| parent | df9cecc1a36d70ec09d34e83bad452064754fdc4 (diff) | |
pNFS: Fix races between return-on-close and layoutreturn.
If one or more of the layout segments reports an error during I/O, then
we may have to send a layoutreturn to report the error back to the NFS
metadata server.
This patch ensures that the return-on-close code can detect the
outstanding layoutreturn, and not preempt it.
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
| -rw-r--r-- | fs/nfs/nfs4proc.c | 2 | ||||
| -rw-r--r-- | fs/nfs/pnfs.c | 63 |
2 files changed, 35 insertions, 30 deletions
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 671498ca36d7..c5c9e0d070f8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
| @@ -7972,8 +7972,6 @@ static void nfs4_layoutreturn_release(void *calldata) | |||
| 7972 | pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); | 7972 | pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); |
| 7973 | pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range); | 7973 | pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range); |
| 7974 | pnfs_clear_layoutreturn_waitbit(lo); | 7974 | pnfs_clear_layoutreturn_waitbit(lo); |
| 7975 | clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags); | ||
| 7976 | rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); | ||
| 7977 | lo->plh_block_lgets--; | 7975 | lo->plh_block_lgets--; |
| 7978 | spin_unlock(&lo->plh_inode->i_lock); | 7976 | spin_unlock(&lo->plh_inode->i_lock); |
| 7979 | pnfs_free_lseg_list(&freeme); | 7977 | pnfs_free_lseg_list(&freeme); |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 8e9f467e409c..27e2bcaa88da 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
| @@ -352,7 +352,7 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo, | |||
| 352 | { | 352 | { |
| 353 | struct pnfs_layout_segment *s; | 353 | struct pnfs_layout_segment *s; |
| 354 | 354 | ||
| 355 | if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) | 355 | if (!test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) |
| 356 | return false; | 356 | return false; |
| 357 | 357 | ||
| 358 | list_for_each_entry(s, &lo->plh_segs, pls_list) | 358 | list_for_each_entry(s, &lo->plh_segs, pls_list) |
| @@ -362,6 +362,18 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo, | |||
| 362 | return true; | 362 | return true; |
| 363 | } | 363 | } |
| 364 | 364 | ||
| 365 | static bool | ||
| 366 | pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo) | ||
| 367 | { | ||
| 368 | if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) | ||
| 369 | return false; | ||
| 370 | lo->plh_return_iomode = 0; | ||
| 371 | lo->plh_block_lgets++; | ||
| 372 | pnfs_get_layout_hdr(lo); | ||
| 373 | clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags); | ||
| 374 | return true; | ||
| 375 | } | ||
| 376 | |||
| 365 | static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg, | 377 | static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg, |
| 366 | struct pnfs_layout_hdr *lo, struct inode *inode) | 378 | struct pnfs_layout_hdr *lo, struct inode *inode) |
| 367 | { | 379 | { |
| @@ -372,17 +384,16 @@ static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg, | |||
| 372 | if (pnfs_layout_need_return(lo, lseg)) { | 384 | if (pnfs_layout_need_return(lo, lseg)) { |
| 373 | nfs4_stateid stateid; | 385 | nfs4_stateid stateid; |
| 374 | enum pnfs_iomode iomode; | 386 | enum pnfs_iomode iomode; |
| 387 | bool send; | ||
| 375 | 388 | ||
| 376 | stateid = lo->plh_stateid; | 389 | stateid = lo->plh_stateid; |
| 377 | iomode = lo->plh_return_iomode; | 390 | iomode = lo->plh_return_iomode; |
| 378 | /* decreased in pnfs_send_layoutreturn() */ | 391 | send = pnfs_prepare_layoutreturn(lo); |
| 379 | lo->plh_block_lgets++; | ||
| 380 | lo->plh_return_iomode = 0; | ||
| 381 | spin_unlock(&inode->i_lock); | 392 | spin_unlock(&inode->i_lock); |
| 382 | pnfs_get_layout_hdr(lo); | 393 | if (send) { |
| 383 | 394 | /* Send an async layoutreturn so we dont deadlock */ | |
| 384 | /* Send an async layoutreturn so we dont deadlock */ | 395 | pnfs_send_layoutreturn(lo, stateid, iomode, false); |
| 385 | pnfs_send_layoutreturn(lo, stateid, iomode, false); | 396 | } |
| 386 | } else | 397 | } else |
| 387 | spin_unlock(&inode->i_lock); | 398 | spin_unlock(&inode->i_lock); |
| 388 | } | 399 | } |
| @@ -924,6 +935,7 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) | |||
| 924 | clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); | 935 | clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); |
| 925 | smp_mb__after_atomic(); | 936 | smp_mb__after_atomic(); |
| 926 | wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); | 937 | wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); |
| 938 | rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); | ||
| 927 | } | 939 | } |
| 928 | 940 | ||
| 929 | static int | 941 | static int |
| @@ -978,6 +990,7 @@ _pnfs_return_layout(struct inode *ino) | |||
| 978 | LIST_HEAD(tmp_list); | 990 | LIST_HEAD(tmp_list); |
| 979 | nfs4_stateid stateid; | 991 | nfs4_stateid stateid; |
| 980 | int status = 0, empty; | 992 | int status = 0, empty; |
| 993 | bool send; | ||
| 981 | 994 | ||
| 982 | dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); | 995 | dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); |
| 983 | 996 | ||
| @@ -1007,17 +1020,18 @@ _pnfs_return_layout(struct inode *ino) | |||
| 1007 | /* Don't send a LAYOUTRETURN if list was initially empty */ | 1020 | /* Don't send a LAYOUTRETURN if list was initially empty */ |
| 1008 | if (empty) { | 1021 | if (empty) { |
| 1009 | spin_unlock(&ino->i_lock); | 1022 | spin_unlock(&ino->i_lock); |
| 1010 | pnfs_put_layout_hdr(lo); | ||
| 1011 | dprintk("NFS: %s no layout segments to return\n", __func__); | 1023 | dprintk("NFS: %s no layout segments to return\n", __func__); |
| 1012 | goto out; | 1024 | goto out_put_layout_hdr; |
| 1013 | } | 1025 | } |
| 1014 | 1026 | ||
| 1015 | set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); | 1027 | set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); |
| 1016 | lo->plh_block_lgets++; | 1028 | send = pnfs_prepare_layoutreturn(lo); |
| 1017 | spin_unlock(&ino->i_lock); | 1029 | spin_unlock(&ino->i_lock); |
| 1018 | pnfs_free_lseg_list(&tmp_list); | 1030 | pnfs_free_lseg_list(&tmp_list); |
| 1019 | 1031 | if (send) | |
| 1020 | status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); | 1032 | status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); |
| 1033 | out_put_layout_hdr: | ||
| 1034 | pnfs_put_layout_hdr(lo); | ||
| 1021 | out: | 1035 | out: |
| 1022 | dprintk("<-- %s status: %d\n", __func__, status); | 1036 | dprintk("<-- %s status: %d\n", __func__, status); |
| 1023 | return status; | 1037 | return status; |
| @@ -1097,13 +1111,9 @@ bool pnfs_roc(struct inode *ino) | |||
| 1097 | out_noroc: | 1111 | out_noroc: |
| 1098 | if (lo) { | 1112 | if (lo) { |
| 1099 | stateid = lo->plh_stateid; | 1113 | stateid = lo->plh_stateid; |
| 1100 | layoutreturn = | 1114 | if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, |
| 1101 | test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, | 1115 | &lo->plh_flags)) |
| 1102 | &lo->plh_flags); | 1116 | layoutreturn = pnfs_prepare_layoutreturn(lo); |
| 1103 | if (layoutreturn) { | ||
| 1104 | lo->plh_block_lgets++; | ||
| 1105 | pnfs_get_layout_hdr(lo); | ||
| 1106 | } | ||
| 1107 | } | 1117 | } |
| 1108 | spin_unlock(&ino->i_lock); | 1118 | spin_unlock(&ino->i_lock); |
| 1109 | if (layoutreturn) { | 1119 | if (layoutreturn) { |
| @@ -1163,16 +1173,14 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) | |||
| 1163 | */ | 1173 | */ |
| 1164 | *barrier = current_seqid + atomic_read(&lo->plh_outstanding); | 1174 | *barrier = current_seqid + atomic_read(&lo->plh_outstanding); |
| 1165 | stateid = lo->plh_stateid; | 1175 | stateid = lo->plh_stateid; |
| 1166 | layoutreturn = test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, | 1176 | if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, |
| 1167 | &lo->plh_flags); | 1177 | &lo->plh_flags)) |
| 1168 | if (layoutreturn) { | 1178 | layoutreturn = pnfs_prepare_layoutreturn(lo); |
| 1169 | lo->plh_block_lgets++; | 1179 | if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) |
| 1170 | pnfs_get_layout_hdr(lo); | 1180 | rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); |
| 1171 | } | ||
| 1172 | 1181 | ||
| 1173 | spin_unlock(&ino->i_lock); | 1182 | spin_unlock(&ino->i_lock); |
| 1174 | if (layoutreturn) { | 1183 | if (layoutreturn) { |
| 1175 | rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); | ||
| 1176 | pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false); | 1184 | pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false); |
| 1177 | return true; | 1185 | return true; |
| 1178 | } | 1186 | } |
| @@ -1693,7 +1701,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, | |||
| 1693 | spin_lock(&inode->i_lock); | 1701 | spin_lock(&inode->i_lock); |
| 1694 | /* set failure bit so that pnfs path will be retried later */ | 1702 | /* set failure bit so that pnfs path will be retried later */ |
| 1695 | pnfs_layout_set_fail_bit(lo, iomode); | 1703 | pnfs_layout_set_fail_bit(lo, iomode); |
| 1696 | set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); | ||
| 1697 | if (lo->plh_return_iomode == 0) | 1704 | if (lo->plh_return_iomode == 0) |
| 1698 | lo->plh_return_iomode = range.iomode; | 1705 | lo->plh_return_iomode = range.iomode; |
| 1699 | else if (lo->plh_return_iomode != range.iomode) | 1706 | else if (lo->plh_return_iomode != range.iomode) |
