aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTrond Myklebust <trond.myklebust@primarydata.com>2016-01-04 13:19:55 -0500
committerTrond Myklebust <trond.myklebust@primarydata.com>2016-01-04 13:19:55 -0500
commit942e3d72a62dcfe5bf1569b179174718bbbcfbc3 (patch)
treeb13189dfe2f9a86123aee14ef365d9e9729d46cb
parent58baac0ac7cc13a690f6f7cea23accaf84e843a0 (diff)
parent506c0d68269e90d354b3cbfc7523611b026c88d0 (diff)
Merge branch 'pnfs_generic'
* pnfs_generic: NFSv4.1/pNFS: Cleanup constify struct pnfs_layout_range arguments NFSv4.1/pnfs: Cleanup copying of pnfs_layout_range structures NFSv4.1/pNFS: Cleanup pnfs_mark_matching_lsegs_invalid() NFSv4.1/pNFS: Fix a race in initiate_file_draining() NFSv4.1/pNFS: pnfs_error_mark_layout_for_return() must always return layout NFSv4.1/pNFS: pnfs_mark_matching_lsegs_return() should set the iomode NFSv4.1/pNFS: Use nfs4_stateid_copy for copying stateids NFSv4.1/pNFS: Don't pass stateids by value to pnfs_send_layoutreturn() NFS: Relax requirements in nfs_flush_incompatible NFSv4.1/pNFS: Don't queue up a new commit if the layout segment is invalid NFS: Allow multiple commit requests in flight per file NFS/pNFS: Fix up pNFS write reschedule layering violations and bugs NFSv4: List stateid information in the callback tracepoints NFSv4.1/pNFS: Don't return NFS4ERR_DELAY unnecessarily in CB_LAYOUTRECALL NFSv4.1/pNFS: Ensure we enforce RFC5661 Section 12.5.5.2.1 pNFS: If we have to delay the layout callback, mark the layout for return NFSv4.1/pNFS: Add a helper to mark the layout as returned pNFS: Ensure nfs4_layoutget_prepare returns the correct error
-rw-r--r--fs/nfs/callback_proc.c52
-rw-r--r--fs/nfs/direct.c33
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c13
-rw-r--r--fs/nfs/internal.h7
-rw-r--r--fs/nfs/nfs4proc.c12
-rw-r--r--fs/nfs/nfs4trace.h69
-rw-r--r--fs/nfs/nfstrace.h1
-rw-r--r--fs/nfs/pagelist.c6
-rw-r--r--fs/nfs/pnfs.c82
-rw-r--r--fs/nfs/pnfs.h33
-rw-r--r--fs/nfs/pnfs_nfs.c10
-rw-r--r--fs/nfs/write.c88
-rw-r--r--include/linux/nfs_fs.h1
-rw-r--r--include/linux/nfs_xdr.h4
15 files changed, 296 insertions, 117 deletions
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 807eb6ef4f91..f0939d097406 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -83,8 +83,11 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
83 83
84 res = htonl(NFS4ERR_BADHANDLE); 84 res = htonl(NFS4ERR_BADHANDLE);
85 inode = nfs_delegation_find_inode(cps->clp, &args->fh); 85 inode = nfs_delegation_find_inode(cps->clp, &args->fh);
86 if (inode == NULL) 86 if (inode == NULL) {
87 trace_nfs4_cb_recall(cps->clp, &args->fh, NULL,
88 &args->stateid, -ntohl(res));
87 goto out; 89 goto out;
90 }
88 /* Set up a helper thread to actually return the delegation */ 91 /* Set up a helper thread to actually return the delegation */
89 switch (nfs_async_inode_return_delegation(inode, &args->stateid)) { 92 switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
90 case 0: 93 case 0:
@@ -96,7 +99,8 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
96 default: 99 default:
97 res = htonl(NFS4ERR_RESOURCE); 100 res = htonl(NFS4ERR_RESOURCE);
98 } 101 }
99 trace_nfs4_recall_delegation(inode, -ntohl(res)); 102 trace_nfs4_cb_recall(cps->clp, &args->fh, inode,
103 &args->stateid, -ntohl(res));
100 iput(inode); 104 iput(inode);
101out: 105out:
102 dprintk("%s: exit with status = %d\n", __func__, ntohl(res)); 106 dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
@@ -160,6 +164,22 @@ static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp,
160 return lo; 164 return lo;
161} 165}
162 166
167/*
168 * Enforce RFC5661 section 12.5.5.2.1. (Layout Recall and Return Sequencing)
169 */
170static bool pnfs_check_stateid_sequence(struct pnfs_layout_hdr *lo,
171 const nfs4_stateid *new)
172{
173 u32 oldseq, newseq;
174
175 oldseq = be32_to_cpu(lo->plh_stateid.seqid);
176 newseq = be32_to_cpu(new->seqid);
177
178 if (newseq > oldseq + 1)
179 return false;
180 return true;
181}
182
163static u32 initiate_file_draining(struct nfs_client *clp, 183static u32 initiate_file_draining(struct nfs_client *clp,
164 struct cb_layoutrecallargs *args) 184 struct cb_layoutrecallargs *args)
165{ 185{
@@ -169,34 +189,52 @@ static u32 initiate_file_draining(struct nfs_client *clp,
169 LIST_HEAD(free_me_list); 189 LIST_HEAD(free_me_list);
170 190
171 lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid); 191 lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid);
172 if (!lo) 192 if (!lo) {
193 trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, NULL,
194 &args->cbl_stateid, -rv);
173 goto out; 195 goto out;
196 }
174 197
175 ino = lo->plh_inode; 198 ino = lo->plh_inode;
176 199
177 spin_lock(&ino->i_lock); 200 spin_lock(&ino->i_lock);
201 if (!pnfs_check_stateid_sequence(lo, &args->cbl_stateid)) {
202 rv = NFS4ERR_DELAY;
203 goto unlock;
204 }
178 pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); 205 pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
179 spin_unlock(&ino->i_lock); 206 spin_unlock(&ino->i_lock);
180 207
181 pnfs_layoutcommit_inode(ino, false); 208 pnfs_layoutcommit_inode(ino, false);
182 209
183 spin_lock(&ino->i_lock); 210 spin_lock(&ino->i_lock);
184 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || 211 /*
185 pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, 212 * Enforce RFC5661 Section 12.5.5.2.1.5 (Bulk Recall and Return)
186 &args->cbl_range)) { 213 */
214 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
187 rv = NFS4ERR_DELAY; 215 rv = NFS4ERR_DELAY;
188 goto unlock; 216 goto unlock;
189 } 217 }
190 218
219 if (pnfs_mark_matching_lsegs_return(lo, &free_me_list,
220 &args->cbl_range)) {
221 rv = NFS4_OK;
222 goto unlock;
223 }
224
191 if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { 225 if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) {
192 NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, 226 NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo,
193 &args->cbl_range); 227 &args->cbl_range);
194 } 228 }
229 pnfs_mark_layout_returned_if_empty(lo);
195unlock: 230unlock:
196 spin_unlock(&ino->i_lock); 231 spin_unlock(&ino->i_lock);
197 pnfs_free_lseg_list(&free_me_list); 232 pnfs_free_lseg_list(&free_me_list);
233 /* Free all lsegs that are attached to commit buckets */
234 nfs_commit_inode(ino, 0);
198 pnfs_put_layout_hdr(lo); 235 pnfs_put_layout_hdr(lo);
199 trace_nfs4_cb_layoutrecall_inode(clp, &args->cbl_fh, ino, -rv); 236 trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, ino,
237 &args->cbl_stateid, -rv);
200 iput(ino); 238 iput(ino);
201out: 239out:
202 return rv; 240 return rv;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 2e7142bcb4c8..7ab7ec9f4eed 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -117,12 +117,6 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
117 return atomic_dec_and_test(&dreq->io_count); 117 return atomic_dec_and_test(&dreq->io_count);
118} 118}
119 119
120void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq)
121{
122 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
123}
124EXPORT_SYMBOL_GPL(nfs_direct_set_resched_writes);
125
126static void 120static void
127nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) 121nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr)
128{ 122{
@@ -735,14 +729,20 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
735 nfs_direct_write_complete(dreq, data->inode); 729 nfs_direct_write_complete(dreq, data->inode);
736} 730}
737 731
738static void nfs_direct_error_cleanup(struct nfs_inode *nfsi) 732static void nfs_direct_resched_write(struct nfs_commit_info *cinfo,
733 struct nfs_page *req)
739{ 734{
740 /* There is no lock to clear */ 735 struct nfs_direct_req *dreq = cinfo->dreq;
736
737 spin_lock(&dreq->lock);
738 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
739 spin_unlock(&dreq->lock);
740 nfs_mark_request_commit(req, NULL, cinfo, 0);
741} 741}
742 742
743static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = { 743static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
744 .completion = nfs_direct_commit_complete, 744 .completion = nfs_direct_commit_complete,
745 .error_cleanup = nfs_direct_error_cleanup, 745 .resched_write = nfs_direct_resched_write,
746}; 746};
747 747
748static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) 748static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
@@ -847,10 +847,25 @@ static void nfs_write_sync_pgio_error(struct list_head *head)
847 } 847 }
848} 848}
849 849
850static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
851{
852 struct nfs_direct_req *dreq = hdr->dreq;
853
854 spin_lock(&dreq->lock);
855 if (dreq->error == 0) {
856 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
857 /* fake unstable write to let common nfs resend pages */
858 hdr->verf.committed = NFS_UNSTABLE;
859 hdr->good_bytes = hdr->args.count;
860 }
861 spin_unlock(&dreq->lock);
862}
863
850static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { 864static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
851 .error_cleanup = nfs_write_sync_pgio_error, 865 .error_cleanup = nfs_write_sync_pgio_error,
852 .init_hdr = nfs_direct_pgio_init, 866 .init_hdr = nfs_direct_pgio_init,
853 .completion = nfs_direct_write_completion, 867 .completion = nfs_direct_write_completion,
868 .reschedule_io = nfs_direct_write_reschedule_io,
854}; 869};
855 870
856 871
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index f188dd071dfc..178ec8da028f 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -514,7 +514,7 @@ static void nfs_check_dirty_writeback(struct page *page,
514 * so it will not block due to pages that will shortly be freeable. 514 * so it will not block due to pages that will shortly be freeable.
515 */ 515 */
516 nfsi = NFS_I(mapping->host); 516 nfsi = NFS_I(mapping->host);
517 if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) { 517 if (atomic_read(&nfsi->commit_info.rpcs_out)) {
518 *writeback = true; 518 *writeback = true;
519 return; 519 return;
520 } 520 }
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 2981cd190bfd..18c329b84ffb 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -941,18 +941,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
941 hdr->args.count, 941 hdr->args.count,
942 (unsigned long long)hdr->args.offset); 942 (unsigned long long)hdr->args.offset);
943 943
944 if (!hdr->dreq) { 944 hdr->completion_ops->reschedule_io(hdr);
945 struct nfs_open_context *ctx;
946
947 ctx = nfs_list_entry(hdr->pages.next)->wb_context;
948 set_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
949 hdr->completion_ops->error_cleanup(&hdr->pages);
950 } else {
951 nfs_direct_set_resched_writes(hdr->dreq);
952 /* fake unstable write to let common nfs resend pages */
953 hdr->verf.committed = NFS_UNSTABLE;
954 hdr->good_bytes = hdr->args.count;
955 }
956 return; 945 return;
957 } 946 }
958 947
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 68f773dc226e..ee81792d2886 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -264,6 +264,12 @@ static inline bool nfs_pgio_has_mirroring(struct nfs_pageio_descriptor *desc)
264 return desc->pg_mirror_count > 1; 264 return desc->pg_mirror_count > 1;
265} 265}
266 266
267static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1,
268 const struct nfs_open_context *ctx2)
269{
270 return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state;
271}
272
267/* nfs2xdr.c */ 273/* nfs2xdr.c */
268extern struct rpc_procinfo nfs_procedures[]; 274extern struct rpc_procinfo nfs_procedures[];
269extern int nfs2_decode_dirent(struct xdr_stream *, 275extern int nfs2_decode_dirent(struct xdr_stream *,
@@ -519,7 +525,6 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
519 inode_dio_wait(inode); 525 inode_dio_wait(inode);
520} 526}
521extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); 527extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
522extern void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq);
523 528
524/* nfs4proc.c */ 529/* nfs4proc.c */
525extern void __nfs4_read_done_cb(struct nfs_pgio_header *); 530extern void __nfs4_read_done_cb(struct nfs_pgio_header *);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index adae525edec4..5e5062c9b92b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7776,6 +7776,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
7776 struct nfs4_layoutget *lgp = calldata; 7776 struct nfs4_layoutget *lgp = calldata;
7777 struct nfs_server *server = NFS_SERVER(lgp->args.inode); 7777 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
7778 struct nfs4_session *session = nfs4_get_session(server); 7778 struct nfs4_session *session = nfs4_get_session(server);
7779 int ret;
7779 7780
7780 dprintk("--> %s\n", __func__); 7781 dprintk("--> %s\n", __func__);
7781 /* Note the is a race here, where a CB_LAYOUTRECALL can come in 7782 /* Note the is a race here, where a CB_LAYOUTRECALL can come in
@@ -7786,12 +7787,12 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
7786 if (nfs41_setup_sequence(session, &lgp->args.seq_args, 7787 if (nfs41_setup_sequence(session, &lgp->args.seq_args,
7787 &lgp->res.seq_res, task)) 7788 &lgp->res.seq_res, task))
7788 return; 7789 return;
7789 if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, 7790 ret = pnfs_choose_layoutget_stateid(&lgp->args.stateid,
7790 NFS_I(lgp->args.inode)->layout, 7791 NFS_I(lgp->args.inode)->layout,
7791 &lgp->args.range, 7792 &lgp->args.range,
7792 lgp->args.ctx->state)) { 7793 lgp->args.ctx->state);
7793 rpc_exit(task, NFS4_OK); 7794 if (ret < 0)
7794 } 7795 rpc_exit(task, ret);
7795} 7796}
7796 7797
7797static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) 7798static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
@@ -8073,9 +8074,10 @@ static void nfs4_layoutreturn_release(void *calldata)
8073 8074
8074 dprintk("--> %s\n", __func__); 8075 dprintk("--> %s\n", __func__);
8075 spin_lock(&lo->plh_inode->i_lock); 8076 spin_lock(&lo->plh_inode->i_lock);
8077 pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range);
8078 pnfs_mark_layout_returned_if_empty(lo);
8076 if (lrp->res.lrs_present) 8079 if (lrp->res.lrs_present)
8077 pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); 8080 pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
8078 pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range);
8079 pnfs_clear_layoutreturn_waitbit(lo); 8081 pnfs_clear_layoutreturn_waitbit(lo);
8080 lo->plh_block_lgets--; 8082 lo->plh_block_lgets--;
8081 spin_unlock(&lo->plh_inode->i_lock); 8083 spin_unlock(&lo->plh_inode->i_lock);
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index d08d0c84b778..88b6b14ce71b 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -982,7 +982,6 @@ DEFINE_NFS4_INODE_EVENT(nfs4_set_acl);
982DEFINE_NFS4_INODE_EVENT(nfs4_get_security_label); 982DEFINE_NFS4_INODE_EVENT(nfs4_get_security_label);
983DEFINE_NFS4_INODE_EVENT(nfs4_set_security_label); 983DEFINE_NFS4_INODE_EVENT(nfs4_set_security_label);
984#endif /* CONFIG_NFS_V4_SECURITY_LABEL */ 984#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
985DEFINE_NFS4_INODE_EVENT(nfs4_recall_delegation);
986 985
987DECLARE_EVENT_CLASS(nfs4_inode_stateid_event, 986DECLARE_EVENT_CLASS(nfs4_inode_stateid_event,
988 TP_PROTO( 987 TP_PROTO(
@@ -1145,8 +1144,74 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event,
1145 ), \ 1144 ), \
1146 TP_ARGS(clp, fhandle, inode, error)) 1145 TP_ARGS(clp, fhandle, inode, error))
1147DEFINE_NFS4_INODE_CALLBACK_EVENT(nfs4_cb_getattr); 1146DEFINE_NFS4_INODE_CALLBACK_EVENT(nfs4_cb_getattr);
1148DEFINE_NFS4_INODE_CALLBACK_EVENT(nfs4_cb_layoutrecall_inode);
1149 1147
1148DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event,
1149 TP_PROTO(
1150 const struct nfs_client *clp,
1151 const struct nfs_fh *fhandle,
1152 const struct inode *inode,
1153 const nfs4_stateid *stateid,
1154 int error
1155 ),
1156
1157 TP_ARGS(clp, fhandle, inode, stateid, error),
1158
1159 TP_STRUCT__entry(
1160 __field(int, error)
1161 __field(dev_t, dev)
1162 __field(u32, fhandle)
1163 __field(u64, fileid)
1164 __string(dstaddr, clp ?
1165 rpc_peeraddr2str(clp->cl_rpcclient,
1166 RPC_DISPLAY_ADDR) : "unknown")
1167 __field(int, stateid_seq)
1168 __field(u32, stateid_hash)
1169 ),
1170
1171 TP_fast_assign(
1172 __entry->error = error;
1173 __entry->fhandle = nfs_fhandle_hash(fhandle);
1174 if (inode != NULL) {
1175 __entry->fileid = NFS_FILEID(inode);
1176 __entry->dev = inode->i_sb->s_dev;
1177 } else {
1178 __entry->fileid = 0;
1179 __entry->dev = 0;
1180 }
1181 __assign_str(dstaddr, clp ?
1182 rpc_peeraddr2str(clp->cl_rpcclient,
1183 RPC_DISPLAY_ADDR) : "unknown")
1184 __entry->stateid_seq =
1185 be32_to_cpu(stateid->seqid);
1186 __entry->stateid_hash =
1187 nfs_stateid_hash(stateid);
1188 ),
1189
1190 TP_printk(
1191 "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
1192 "stateid=%d:0x%08x dstaddr=%s",
1193 __entry->error,
1194 show_nfsv4_errors(__entry->error),
1195 MAJOR(__entry->dev), MINOR(__entry->dev),
1196 (unsigned long long)__entry->fileid,
1197 __entry->fhandle,
1198 __entry->stateid_seq, __entry->stateid_hash,
1199 __get_str(dstaddr)
1200 )
1201);
1202
1203#define DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(name) \
1204 DEFINE_EVENT(nfs4_inode_stateid_callback_event, name, \
1205 TP_PROTO( \
1206 const struct nfs_client *clp, \
1207 const struct nfs_fh *fhandle, \
1208 const struct inode *inode, \
1209 const nfs4_stateid *stateid, \
1210 int error \
1211 ), \
1212 TP_ARGS(clp, fhandle, inode, stateid, error))
1213DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_recall);
1214DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_layoutrecall_file);
1150 1215
1151DECLARE_EVENT_CLASS(nfs4_idmap_event, 1216DECLARE_EVENT_CLASS(nfs4_idmap_event,
1152 TP_PROTO( 1217 TP_PROTO(
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 59f838cdc009..9f80a086b612 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -39,7 +39,6 @@
39 { 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \ 39 { 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \
40 { 1 << NFS_INO_FLUSHING, "FLUSHING" }, \ 40 { 1 << NFS_INO_FLUSHING, "FLUSHING" }, \
41 { 1 << NFS_INO_FSCACHE, "FSCACHE" }, \ 41 { 1 << NFS_INO_FSCACHE, "FSCACHE" }, \
42 { 1 << NFS_INO_COMMIT, "COMMIT" }, \
43 { 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \ 42 { 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \
44 { 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" }) 43 { 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" })
45 44
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7c71b71016b5..eeddbf0bf4c4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -899,12 +899,6 @@ static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio)
899 pgio->pg_mirrors_dynamic = NULL; 899 pgio->pg_mirrors_dynamic = NULL;
900} 900}
901 901
902static bool nfs_match_open_context(const struct nfs_open_context *ctx1,
903 const struct nfs_open_context *ctx2)
904{
905 return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state;
906}
907
908static bool nfs_match_lock_context(const struct nfs_lock_context *l1, 902static bool nfs_match_lock_context(const struct nfs_lock_context *l1,
909 const struct nfs_lock_context *l2) 903 const struct nfs_lock_context *l2)
910{ 904{
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 113c3b327e24..a3592cc34a20 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -53,7 +53,7 @@ static DEFINE_SPINLOCK(pnfs_spinlock);
53static LIST_HEAD(pnfs_modules_tbl); 53static LIST_HEAD(pnfs_modules_tbl);
54 54
55static int 55static int
56pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, 56pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
57 enum pnfs_iomode iomode, bool sync); 57 enum pnfs_iomode iomode, bool sync);
58 58
59/* Return the registered pnfs layout driver module matching given id */ 59/* Return the registered pnfs layout driver module matching given id */
@@ -385,13 +385,13 @@ static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
385 enum pnfs_iomode iomode; 385 enum pnfs_iomode iomode;
386 bool send; 386 bool send;
387 387
388 stateid = lo->plh_stateid; 388 nfs4_stateid_copy(&stateid, &lo->plh_stateid);
389 iomode = lo->plh_return_iomode; 389 iomode = lo->plh_return_iomode;
390 send = pnfs_prepare_layoutreturn(lo); 390 send = pnfs_prepare_layoutreturn(lo);
391 spin_unlock(&inode->i_lock); 391 spin_unlock(&inode->i_lock);
392 if (send) { 392 if (send) {
393 /* Send an async layoutreturn so we dont deadlock */ 393 /* Send an async layoutreturn so we dont deadlock */
394 pnfs_send_layoutreturn(lo, stateid, iomode, false); 394 pnfs_send_layoutreturn(lo, &stateid, iomode, false);
395 } 395 }
396 } else 396 } else
397 spin_unlock(&inode->i_lock); 397 spin_unlock(&inode->i_lock);
@@ -566,10 +566,10 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
566int 566int
567pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 567pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
568 struct list_head *tmp_list, 568 struct list_head *tmp_list,
569 struct pnfs_layout_range *recall_range) 569 const struct pnfs_layout_range *recall_range)
570{ 570{
571 struct pnfs_layout_segment *lseg, *next; 571 struct pnfs_layout_segment *lseg, *next;
572 int invalid = 0, removed = 0; 572 int remaining = 0;
573 573
574 dprintk("%s:Begin lo %p\n", __func__, lo); 574 dprintk("%s:Begin lo %p\n", __func__, lo);
575 575
@@ -582,11 +582,11 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
582 "offset %llu length %llu\n", __func__, 582 "offset %llu length %llu\n", __func__,
583 lseg, lseg->pls_range.iomode, lseg->pls_range.offset, 583 lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
584 lseg->pls_range.length); 584 lseg->pls_range.length);
585 invalid++; 585 if (!mark_lseg_invalid(lseg, tmp_list))
586 removed += mark_lseg_invalid(lseg, tmp_list); 586 remaining++;
587 } 587 }
588 dprintk("%s:Return %i\n", __func__, invalid - removed); 588 dprintk("%s:Return %i\n", __func__, remaining);
589 return invalid - removed; 589 return remaining;
590} 590}
591 591
592/* note free_me must contain lsegs from a single layout_hdr */ 592/* note free_me must contain lsegs from a single layout_hdr */
@@ -702,6 +702,8 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
702 ret = -EAGAIN; 702 ret = -EAGAIN;
703 spin_unlock(&inode->i_lock); 703 spin_unlock(&inode->i_lock);
704 pnfs_free_lseg_list(&lseg_list); 704 pnfs_free_lseg_list(&lseg_list);
705 /* Free all lsegs that are attached to commit buckets */
706 nfs_commit_inode(inode, 0);
705 pnfs_put_layout_hdr(lo); 707 pnfs_put_layout_hdr(lo);
706 iput(inode); 708 iput(inode);
707 } 709 }
@@ -825,7 +827,7 @@ pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo)
825 827
826int 828int
827pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, 829pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
828 struct pnfs_layout_range *range, 830 const struct pnfs_layout_range *range,
829 struct nfs4_state *open_state) 831 struct nfs4_state *open_state)
830{ 832{
831 int status = 0; 833 int status = 0;
@@ -860,7 +862,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
860static struct pnfs_layout_segment * 862static struct pnfs_layout_segment *
861send_layoutget(struct pnfs_layout_hdr *lo, 863send_layoutget(struct pnfs_layout_hdr *lo,
862 struct nfs_open_context *ctx, 864 struct nfs_open_context *ctx,
863 struct pnfs_layout_range *range, 865 const struct pnfs_layout_range *range,
864 gfp_t gfp_flags) 866 gfp_t gfp_flags)
865{ 867{
866 struct inode *ino = lo->plh_inode; 868 struct inode *ino = lo->plh_inode;
@@ -893,7 +895,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
893 lgp->args.minlength = i_size - range->offset; 895 lgp->args.minlength = i_size - range->offset;
894 } 896 }
895 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 897 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
896 lgp->args.range = *range; 898 pnfs_copy_range(&lgp->args.range, range);
897 lgp->args.type = server->pnfs_curr_ld->id; 899 lgp->args.type = server->pnfs_curr_ld->id;
898 lgp->args.inode = ino; 900 lgp->args.inode = ino;
899 lgp->args.ctx = get_nfs_open_context(ctx); 901 lgp->args.ctx = get_nfs_open_context(ctx);
@@ -936,7 +938,7 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
936} 938}
937 939
938static int 940static int
939pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, 941pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
940 enum pnfs_iomode iomode, bool sync) 942 enum pnfs_iomode iomode, bool sync)
941{ 943{
942 struct inode *ino = lo->plh_inode; 944 struct inode *ino = lo->plh_inode;
@@ -953,7 +955,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid,
953 goto out; 955 goto out;
954 } 956 }
955 957
956 lrp->args.stateid = stateid; 958 nfs4_stateid_copy(&lrp->args.stateid, stateid);
957 lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; 959 lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
958 lrp->args.inode = ino; 960 lrp->args.inode = ino;
959 lrp->args.range.iomode = iomode; 961 lrp->args.range.iomode = iomode;
@@ -996,7 +998,7 @@ _pnfs_return_layout(struct inode *ino)
996 dprintk("NFS: %s no layout to return\n", __func__); 998 dprintk("NFS: %s no layout to return\n", __func__);
997 goto out; 999 goto out;
998 } 1000 }
999 stateid = nfsi->layout->plh_stateid; 1001 nfs4_stateid_copy(&stateid, &nfsi->layout->plh_stateid);
1000 /* Reference matched in nfs4_layoutreturn_release */ 1002 /* Reference matched in nfs4_layoutreturn_release */
1001 pnfs_get_layout_hdr(lo); 1003 pnfs_get_layout_hdr(lo);
1002 empty = list_empty(&lo->plh_segs); 1004 empty = list_empty(&lo->plh_segs);
@@ -1024,7 +1026,7 @@ _pnfs_return_layout(struct inode *ino)
1024 spin_unlock(&ino->i_lock); 1026 spin_unlock(&ino->i_lock);
1025 pnfs_free_lseg_list(&tmp_list); 1027 pnfs_free_lseg_list(&tmp_list);
1026 if (send) 1028 if (send)
1027 status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); 1029 status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true);
1028out_put_layout_hdr: 1030out_put_layout_hdr:
1029 pnfs_put_layout_hdr(lo); 1031 pnfs_put_layout_hdr(lo);
1030out: 1032out:
@@ -1087,7 +1089,7 @@ bool pnfs_roc(struct inode *ino)
1087 goto out_noroc; 1089 goto out_noroc;
1088 } 1090 }
1089 1091
1090 stateid = lo->plh_stateid; 1092 nfs4_stateid_copy(&stateid, &lo->plh_stateid);
1091 /* always send layoutreturn if being marked so */ 1093 /* always send layoutreturn if being marked so */
1092 if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 1094 if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
1093 &lo->plh_flags)) 1095 &lo->plh_flags))
@@ -1114,7 +1116,7 @@ out_noroc:
1114 pnfs_free_lseg_list(&tmp_list); 1116 pnfs_free_lseg_list(&tmp_list);
1115 pnfs_layoutcommit_inode(ino, true); 1117 pnfs_layoutcommit_inode(ino, true);
1116 if (layoutreturn) 1118 if (layoutreturn)
1117 pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); 1119 pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true);
1118 return roc; 1120 return roc;
1119} 1121}
1120 1122
@@ -1139,6 +1141,7 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
1139 1141
1140 spin_lock(&ino->i_lock); 1142 spin_lock(&ino->i_lock);
1141 lo = NFS_I(ino)->layout; 1143 lo = NFS_I(ino)->layout;
1144 pnfs_mark_layout_returned_if_empty(lo);
1142 if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) 1145 if (pnfs_seqid_is_newer(barrier, lo->plh_barrier))
1143 lo->plh_barrier = barrier; 1146 lo->plh_barrier = barrier;
1144 spin_unlock(&ino->i_lock); 1147 spin_unlock(&ino->i_lock);
@@ -1734,16 +1737,29 @@ out_forget_reply:
1734} 1737}
1735 1738
1736static void 1739static void
1740pnfs_set_plh_return_iomode(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode)
1741{
1742 if (lo->plh_return_iomode == iomode)
1743 return;
1744 if (lo->plh_return_iomode != 0)
1745 iomode = IOMODE_ANY;
1746 lo->plh_return_iomode = iomode;
1747}
1748
1749int
1737pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, 1750pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
1738 struct list_head *tmp_list, 1751 struct list_head *tmp_list,
1739 struct pnfs_layout_range *return_range) 1752 const struct pnfs_layout_range *return_range)
1740{ 1753{
1741 struct pnfs_layout_segment *lseg, *next; 1754 struct pnfs_layout_segment *lseg, *next;
1755 int remaining = 0;
1742 1756
1743 dprintk("%s:Begin lo %p\n", __func__, lo); 1757 dprintk("%s:Begin lo %p\n", __func__, lo);
1744 1758
1745 if (list_empty(&lo->plh_segs)) 1759 if (list_empty(&lo->plh_segs))
1746 return; 1760 return 0;
1761
1762 assert_spin_locked(&lo->plh_inode->i_lock);
1747 1763
1748 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 1764 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
1749 if (should_free_lseg(&lseg->pls_range, return_range)) { 1765 if (should_free_lseg(&lseg->pls_range, return_range)) {
@@ -1753,10 +1769,13 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
1753 lseg->pls_range.offset, 1769 lseg->pls_range.offset,
1754 lseg->pls_range.length); 1770 lseg->pls_range.length);
1755 set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); 1771 set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
1756 mark_lseg_invalid(lseg, tmp_list); 1772 pnfs_set_plh_return_iomode(lo, return_range->iomode);
1773 if (!mark_lseg_invalid(lseg, tmp_list))
1774 remaining++;
1757 set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 1775 set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
1758 &lo->plh_flags); 1776 &lo->plh_flags);
1759 } 1777 }
1778 return remaining;
1760} 1779}
1761 1780
1762void pnfs_error_mark_layout_for_return(struct inode *inode, 1781void pnfs_error_mark_layout_for_return(struct inode *inode,
@@ -1769,19 +1788,28 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
1769 .length = NFS4_MAX_UINT64, 1788 .length = NFS4_MAX_UINT64,
1770 }; 1789 };
1771 LIST_HEAD(free_me); 1790 LIST_HEAD(free_me);
1791 bool return_now = false;
1772 1792
1773 spin_lock(&inode->i_lock); 1793 spin_lock(&inode->i_lock);
1774 if (lo->plh_return_iomode == 0) 1794 pnfs_set_plh_return_iomode(lo, range.iomode);
1775 lo->plh_return_iomode = range.iomode;
1776 else if (lo->plh_return_iomode != range.iomode)
1777 lo->plh_return_iomode = IOMODE_ANY;
1778 /* 1795 /*
1779 * mark all matching lsegs so that we are sure to have no live 1796 * mark all matching lsegs so that we are sure to have no live
1780 * segments at hand when sending layoutreturn. See pnfs_put_lseg() 1797 * segments at hand when sending layoutreturn. See pnfs_put_lseg()
1781 * for how it works. 1798 * for how it works.
1782 */ 1799 */
1783 pnfs_mark_matching_lsegs_return(lo, &free_me, &range); 1800 if (!pnfs_mark_matching_lsegs_return(lo, &free_me, &range)) {
1784 spin_unlock(&inode->i_lock); 1801 nfs4_stateid stateid;
1802 enum pnfs_iomode iomode = lo->plh_return_iomode;
1803
1804 nfs4_stateid_copy(&stateid, &lo->plh_stateid);
1805 return_now = pnfs_prepare_layoutreturn(lo);
1806 spin_unlock(&inode->i_lock);
1807 if (return_now)
1808 pnfs_send_layoutreturn(lo, &stateid, iomode, false);
1809 } else {
1810 spin_unlock(&inode->i_lock);
1811 nfs_commit_inode(inode, 0);
1812 }
1785 pnfs_free_lseg_list(&free_me); 1813 pnfs_free_lseg_list(&free_me);
1786} 1814}
1787EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); 1815EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 6916ff4e86f9..9f4e2a47f4aa 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -260,11 +260,14 @@ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
260 bool update_barrier); 260 bool update_barrier);
261int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, 261int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
262 struct pnfs_layout_hdr *lo, 262 struct pnfs_layout_hdr *lo,
263 struct pnfs_layout_range *range, 263 const struct pnfs_layout_range *range,
264 struct nfs4_state *open_state); 264 struct nfs4_state *open_state);
265int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 265int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
266 struct list_head *tmp_list, 266 struct list_head *tmp_list,
267 struct pnfs_layout_range *recall_range); 267 const struct pnfs_layout_range *recall_range);
268int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
269 struct list_head *tmp_list,
270 const struct pnfs_layout_range *recall_range);
268bool pnfs_roc(struct inode *ino); 271bool pnfs_roc(struct inode *ino);
269void pnfs_roc_release(struct inode *ino); 272void pnfs_roc_release(struct inode *ino);
270void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); 273void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
@@ -388,6 +391,12 @@ pnfs_get_lseg(struct pnfs_layout_segment *lseg)
388 return lseg; 391 return lseg;
389} 392}
390 393
394static inline bool
395pnfs_is_valid_lseg(struct pnfs_layout_segment *lseg)
396{
397 return test_bit(NFS_LSEG_VALID, &lseg->pls_flags) != 0;
398}
399
391/* Return true if a layout driver is being used for this mountpoint */ 400/* Return true if a layout driver is being used for this mountpoint */
392static inline int pnfs_enabled_sb(struct nfs_server *nfss) 401static inline int pnfs_enabled_sb(struct nfs_server *nfss)
393{ 402{
@@ -535,6 +544,26 @@ pnfs_calc_offset_length(u64 offset, u64 end)
535 return 1 + end - offset; 544 return 1 + end - offset;
536} 545}
537 546
547/**
548 * pnfs_mark_layout_returned_if_empty - marks the layout as returned
549 * @lo: layout header
550 *
551 * Note: Caller must hold inode->i_lock
552 */
553static inline void
554pnfs_mark_layout_returned_if_empty(struct pnfs_layout_hdr *lo)
555{
556 if (list_empty(&lo->plh_segs))
557 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
558}
559
560static inline void
561pnfs_copy_range(struct pnfs_layout_range *dst,
562 const struct pnfs_layout_range *src)
563{
564 memcpy(dst, src, sizeof(*dst));
565}
566
538extern unsigned int layoutstats_timer; 567extern unsigned int layoutstats_timer;
539 568
540#ifdef NFS_DEBUG 569#ifdef NFS_DEBUG
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 24655b807d44..81ac6480f9e7 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -266,17 +266,14 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
266 } else { 266 } else {
267 nfs_retry_commit(mds_pages, NULL, cinfo, 0); 267 nfs_retry_commit(mds_pages, NULL, cinfo, 0);
268 pnfs_generic_retry_commit(cinfo, 0); 268 pnfs_generic_retry_commit(cinfo, 0);
269 cinfo->completion_ops->error_cleanup(NFS_I(inode));
270 return -ENOMEM; 269 return -ENOMEM;
271 } 270 }
272 } 271 }
273 272
274 nreq += pnfs_generic_alloc_ds_commits(cinfo, &list); 273 nreq += pnfs_generic_alloc_ds_commits(cinfo, &list);
275 274
276 if (nreq == 0) { 275 if (nreq == 0)
277 cinfo->completion_ops->error_cleanup(NFS_I(inode));
278 goto out; 276 goto out;
279 }
280 277
281 atomic_add(nreq, &cinfo->mds->rpcs_out); 278 atomic_add(nreq, &cinfo->mds->rpcs_out);
282 279
@@ -871,6 +868,11 @@ pnfs_layout_mark_request_commit(struct nfs_page *req,
871 buckets = cinfo->ds->buckets; 868 buckets = cinfo->ds->buckets;
872 list = &buckets[ds_commit_idx].written; 869 list = &buckets[ds_commit_idx].written;
873 if (list_empty(list)) { 870 if (list_empty(list)) {
871 if (!pnfs_is_valid_lseg(lseg)) {
872 spin_unlock(cinfo->lock);
873 cinfo->completion_ops->resched_write(cinfo, req);
874 return;
875 }
874 /* Non-empty buckets hold a reference on the lseg. That ref 876 /* Non-empty buckets hold a reference on the lseg. That ref
875 * is normally transferred to the COMMIT call and released 877 * is normally transferred to the COMMIT call and released
876 * there. It could also be released if the last req is pulled 878 * there. It could also be released if the last req is pulled
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 4d254232d728..94828b3f8c95 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -21,6 +21,8 @@
21#include <linux/nfs_page.h> 21#include <linux/nfs_page.h>
22#include <linux/backing-dev.h> 22#include <linux/backing-dev.h>
23#include <linux/export.h> 23#include <linux/export.h>
24#include <linux/freezer.h>
25#include <linux/wait.h>
24 26
25#include <asm/uaccess.h> 27#include <asm/uaccess.h>
26 28
@@ -1155,7 +1157,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
1155 if (req == NULL) 1157 if (req == NULL)
1156 return 0; 1158 return 0;
1157 l_ctx = req->wb_lock_context; 1159 l_ctx = req->wb_lock_context;
1158 do_flush = req->wb_page != page || req->wb_context != ctx; 1160 do_flush = req->wb_page != page ||
1161 !nfs_match_open_context(req->wb_context, ctx);
1159 /* for now, flush if more than 1 request in page_group */ 1162 /* for now, flush if more than 1 request in page_group */
1160 do_flush |= req->wb_this_page != req; 1163 do_flush |= req->wb_this_page != req;
1161 if (l_ctx && flctx && 1164 if (l_ctx && flctx &&
@@ -1353,9 +1356,15 @@ static void nfs_async_write_error(struct list_head *head)
1353 } 1356 }
1354} 1357}
1355 1358
1359static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
1360{
1361 nfs_async_write_error(&hdr->pages);
1362}
1363
1356static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { 1364static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
1357 .error_cleanup = nfs_async_write_error, 1365 .error_cleanup = nfs_async_write_error,
1358 .completion = nfs_write_completion, 1366 .completion = nfs_write_completion,
1367 .reschedule_io = nfs_async_write_reschedule_io,
1359}; 1368};
1360 1369
1361void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 1370void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
@@ -1556,27 +1565,29 @@ static void nfs_writeback_result(struct rpc_task *task,
1556 } 1565 }
1557} 1566}
1558 1567
1568static int nfs_wait_atomic_killable(atomic_t *key)
1569{
1570 if (fatal_signal_pending(current))
1571 return -ERESTARTSYS;
1572 freezable_schedule_unsafe();
1573 return 0;
1574}
1559 1575
1560static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) 1576static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
1561{ 1577{
1562 int ret; 1578 return wait_on_atomic_t(&cinfo->rpcs_out,
1579 nfs_wait_atomic_killable, TASK_KILLABLE);
1580}
1563 1581
1564 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) 1582static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
1565 return 1; 1583{
1566 if (!may_wait) 1584 atomic_inc(&cinfo->rpcs_out);
1567 return 0;
1568 ret = out_of_line_wait_on_bit_lock(&nfsi->flags,
1569 NFS_INO_COMMIT,
1570 nfs_wait_bit_killable,
1571 TASK_KILLABLE);
1572 return (ret < 0) ? ret : 1;
1573} 1585}
1574 1586
1575static void nfs_commit_clear_lock(struct nfs_inode *nfsi) 1587static void nfs_commit_end(struct nfs_mds_commit_info *cinfo)
1576{ 1588{
1577 clear_bit(NFS_INO_COMMIT, &nfsi->flags); 1589 if (atomic_dec_and_test(&cinfo->rpcs_out))
1578 smp_mb__after_atomic(); 1590 wake_up_atomic_t(&cinfo->rpcs_out);
1579 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
1580} 1591}
1581 1592
1582void nfs_commitdata_release(struct nfs_commit_data *data) 1593void nfs_commitdata_release(struct nfs_commit_data *data)
@@ -1693,6 +1704,13 @@ void nfs_retry_commit(struct list_head *page_list,
1693} 1704}
1694EXPORT_SYMBOL_GPL(nfs_retry_commit); 1705EXPORT_SYMBOL_GPL(nfs_retry_commit);
1695 1706
1707static void
1708nfs_commit_resched_write(struct nfs_commit_info *cinfo,
1709 struct nfs_page *req)
1710{
1711 __set_page_dirty_nobuffers(req->wb_page);
1712}
1713
1696/* 1714/*
1697 * Commit dirty pages 1715 * Commit dirty pages
1698 */ 1716 */
@@ -1714,7 +1732,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
1714 data->mds_ops, how, 0); 1732 data->mds_ops, how, 0);
1715 out_bad: 1733 out_bad:
1716 nfs_retry_commit(head, NULL, cinfo, 0); 1734 nfs_retry_commit(head, NULL, cinfo, 0);
1717 cinfo->completion_ops->error_cleanup(NFS_I(inode));
1718 return -ENOMEM; 1735 return -ENOMEM;
1719} 1736}
1720 1737
@@ -1776,8 +1793,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
1776 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); 1793 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
1777 1794
1778 nfs_init_cinfo(&cinfo, data->inode, data->dreq); 1795 nfs_init_cinfo(&cinfo, data->inode, data->dreq);
1779 if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) 1796 nfs_commit_end(cinfo.mds);
1780 nfs_commit_clear_lock(NFS_I(data->inode));
1781} 1797}
1782 1798
1783static void nfs_commit_release(void *calldata) 1799static void nfs_commit_release(void *calldata)
@@ -1796,7 +1812,7 @@ static const struct rpc_call_ops nfs_commit_ops = {
1796 1812
1797static const struct nfs_commit_completion_ops nfs_commit_completion_ops = { 1813static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
1798 .completion = nfs_commit_release_pages, 1814 .completion = nfs_commit_release_pages,
1799 .error_cleanup = nfs_commit_clear_lock, 1815 .resched_write = nfs_commit_resched_write,
1800}; 1816};
1801 1817
1802int nfs_generic_commit_list(struct inode *inode, struct list_head *head, 1818int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
@@ -1815,30 +1831,25 @@ int nfs_commit_inode(struct inode *inode, int how)
1815 LIST_HEAD(head); 1831 LIST_HEAD(head);
1816 struct nfs_commit_info cinfo; 1832 struct nfs_commit_info cinfo;
1817 int may_wait = how & FLUSH_SYNC; 1833 int may_wait = how & FLUSH_SYNC;
1834 int error = 0;
1818 int res; 1835 int res;
1819 1836
1820 res = nfs_commit_set_lock(NFS_I(inode), may_wait);
1821 if (res <= 0)
1822 goto out_mark_dirty;
1823 nfs_init_cinfo_from_inode(&cinfo, inode); 1837 nfs_init_cinfo_from_inode(&cinfo, inode);
1838 nfs_commit_begin(cinfo.mds);
1824 res = nfs_scan_commit(inode, &head, &cinfo); 1839 res = nfs_scan_commit(inode, &head, &cinfo);
1825 if (res) { 1840 if (res)
1826 int error;
1827
1828 error = nfs_generic_commit_list(inode, &head, how, &cinfo); 1841 error = nfs_generic_commit_list(inode, &head, how, &cinfo);
1829 if (error < 0) 1842 nfs_commit_end(cinfo.mds);
1830 return error; 1843 if (error < 0)
1831 if (!may_wait) 1844 goto out_error;
1832 goto out_mark_dirty; 1845 if (!may_wait)
1833 error = wait_on_bit_action(&NFS_I(inode)->flags, 1846 goto out_mark_dirty;
1834 NFS_INO_COMMIT, 1847 error = wait_on_commit(cinfo.mds);
1835 nfs_wait_bit_killable, 1848 if (error < 0)
1836 TASK_KILLABLE); 1849 return error;
1837 if (error < 0)
1838 return error;
1839 } else
1840 nfs_commit_clear_lock(NFS_I(inode));
1841 return res; 1850 return res;
1851out_error:
1852 res = error;
1842 /* Note: If we exit without ensuring that the commit is complete, 1853 /* Note: If we exit without ensuring that the commit is complete,
1843 * we must mark the inode as dirty. Otherwise, future calls to 1854 * we must mark the inode as dirty. Otherwise, future calls to
1844 * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure 1855 * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure
@@ -1848,6 +1859,7 @@ out_mark_dirty:
1848 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 1859 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1849 return res; 1860 return res;
1850} 1861}
1862EXPORT_SYMBOL_GPL(nfs_commit_inode);
1851 1863
1852int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) 1864int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1853{ 1865{
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index b88fc46cfbb8..9eee972863a7 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -216,7 +216,6 @@ struct nfs_inode {
216#define NFS_INO_FLUSHING (4) /* inode is flushing out data */ 216#define NFS_INO_FLUSHING (4) /* inode is flushing out data */
217#define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ 217#define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */
218#define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ 218#define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */
219#define NFS_INO_COMMIT (7) /* inode is committing unstable writes */
220#define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ 219#define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */
221#define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ 220#define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */
222#define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ 221#define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 7b30ac0c7def..791098a08a87 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1421,11 +1421,12 @@ struct nfs_mds_commit_info {
1421 struct list_head list; 1421 struct list_head list;
1422}; 1422};
1423 1423
1424struct nfs_commit_info;
1424struct nfs_commit_data; 1425struct nfs_commit_data;
1425struct nfs_inode; 1426struct nfs_inode;
1426struct nfs_commit_completion_ops { 1427struct nfs_commit_completion_ops {
1427 void (*error_cleanup) (struct nfs_inode *nfsi);
1428 void (*completion) (struct nfs_commit_data *data); 1428 void (*completion) (struct nfs_commit_data *data);
1429 void (*resched_write) (struct nfs_commit_info *, struct nfs_page *);
1429}; 1430};
1430 1431
1431struct nfs_commit_info { 1432struct nfs_commit_info {
@@ -1462,6 +1463,7 @@ struct nfs_pgio_completion_ops {
1462 void (*error_cleanup)(struct list_head *head); 1463 void (*error_cleanup)(struct list_head *head);
1463 void (*init_hdr)(struct nfs_pgio_header *hdr); 1464 void (*init_hdr)(struct nfs_pgio_header *hdr);
1464 void (*completion)(struct nfs_pgio_header *hdr); 1465 void (*completion)(struct nfs_pgio_header *hdr);
1466 void (*reschedule_io)(struct nfs_pgio_header *hdr);
1465}; 1467};
1466 1468
1467struct nfs_unlinkdata { 1469struct nfs_unlinkdata {