aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs/nfs4filelayout.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs/nfs4filelayout.c')
-rw-r--r--fs/nfs/nfs4filelayout.c693
1 files changed, 459 insertions, 234 deletions
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 5acfd9ea8a31..474c6305afd9 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -82,29 +82,84 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
82 BUG(); 82 BUG();
83} 83}
84 84
85static void filelayout_reset_write(struct nfs_write_data *data)
86{
87 struct nfs_pgio_header *hdr = data->header;
88 struct inode *inode = hdr->inode;
89 struct rpc_task *task = &data->task;
90
91 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
92 dprintk("%s Reset task %5u for i/o through MDS "
93 "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
94 data->task.tk_pid,
95 inode->i_sb->s_id,
96 (long long)NFS_FILEID(inode),
97 data->args.count,
98 (unsigned long long)data->args.offset);
99
100 task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
101 &hdr->pages,
102 hdr->completion_ops);
103 }
104 /* balance nfs_get_client in filelayout_write_pagelist */
105 nfs_put_client(data->ds_clp);
106 data->ds_clp = NULL;
107}
108
109static void filelayout_reset_read(struct nfs_read_data *data)
110{
111 struct nfs_pgio_header *hdr = data->header;
112 struct inode *inode = hdr->inode;
113 struct rpc_task *task = &data->task;
114
115 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
116 dprintk("%s Reset task %5u for i/o through MDS "
117 "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
118 data->task.tk_pid,
119 inode->i_sb->s_id,
120 (long long)NFS_FILEID(inode),
121 data->args.count,
122 (unsigned long long)data->args.offset);
123
124 task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
125 &hdr->pages,
126 hdr->completion_ops);
127 }
128 /* balance nfs_get_client in filelayout_read_pagelist */
129 nfs_put_client(data->ds_clp);
130 data->ds_clp = NULL;
131}
132
85static int filelayout_async_handle_error(struct rpc_task *task, 133static int filelayout_async_handle_error(struct rpc_task *task,
86 struct nfs4_state *state, 134 struct nfs4_state *state,
87 struct nfs_client *clp, 135 struct nfs_client *clp,
88 int *reset) 136 struct pnfs_layout_segment *lseg)
89{ 137{
90 struct nfs_server *mds_server = NFS_SERVER(state->inode); 138 struct inode *inode = lseg->pls_layout->plh_inode;
139 struct nfs_server *mds_server = NFS_SERVER(inode);
140 struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
91 struct nfs_client *mds_client = mds_server->nfs_client; 141 struct nfs_client *mds_client = mds_server->nfs_client;
142 struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
92 143
93 if (task->tk_status >= 0) 144 if (task->tk_status >= 0)
94 return 0; 145 return 0;
95 *reset = 0;
96 146
97 switch (task->tk_status) { 147 switch (task->tk_status) {
98 /* MDS state errors */ 148 /* MDS state errors */
99 case -NFS4ERR_DELEG_REVOKED: 149 case -NFS4ERR_DELEG_REVOKED:
100 case -NFS4ERR_ADMIN_REVOKED: 150 case -NFS4ERR_ADMIN_REVOKED:
101 case -NFS4ERR_BAD_STATEID: 151 case -NFS4ERR_BAD_STATEID:
152 if (state == NULL)
153 break;
102 nfs_remove_bad_delegation(state->inode); 154 nfs_remove_bad_delegation(state->inode);
103 case -NFS4ERR_OPENMODE: 155 case -NFS4ERR_OPENMODE:
156 if (state == NULL)
157 break;
104 nfs4_schedule_stateid_recovery(mds_server, state); 158 nfs4_schedule_stateid_recovery(mds_server, state);
105 goto wait_on_recovery; 159 goto wait_on_recovery;
106 case -NFS4ERR_EXPIRED: 160 case -NFS4ERR_EXPIRED:
107 nfs4_schedule_stateid_recovery(mds_server, state); 161 if (state != NULL)
162 nfs4_schedule_stateid_recovery(mds_server, state);
108 nfs4_schedule_lease_recovery(mds_client); 163 nfs4_schedule_lease_recovery(mds_client);
109 goto wait_on_recovery; 164 goto wait_on_recovery;
110 /* DS session errors */ 165 /* DS session errors */
@@ -127,11 +182,48 @@ static int filelayout_async_handle_error(struct rpc_task *task,
127 break; 182 break;
128 case -NFS4ERR_RETRY_UNCACHED_REP: 183 case -NFS4ERR_RETRY_UNCACHED_REP:
129 break; 184 break;
185 /* Invalidate Layout errors */
186 case -NFS4ERR_PNFS_NO_LAYOUT:
187 case -ESTALE: /* mapped NFS4ERR_STALE */
188 case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
189 case -EISDIR: /* mapped NFS4ERR_ISDIR */
190 case -NFS4ERR_FHEXPIRED:
191 case -NFS4ERR_WRONG_TYPE:
192 dprintk("%s Invalid layout error %d\n", __func__,
193 task->tk_status);
194 /*
195 * Destroy layout so new i/o will get a new layout.
196 * Layout will not be destroyed until all current lseg
197 * references are put. Mark layout as invalid to resend failed
198 * i/o and all i/o waiting on the slot table to the MDS until
199 * layout is destroyed and a new valid layout is obtained.
200 */
201 set_bit(NFS_LAYOUT_INVALID,
202 &NFS_I(state->inode)->layout->plh_flags);
203 pnfs_destroy_layout(NFS_I(state->inode));
204 rpc_wake_up(&tbl->slot_tbl_waitq);
205 goto reset;
206 /* RPC connection errors */
207 case -ECONNREFUSED:
208 case -EHOSTDOWN:
209 case -EHOSTUNREACH:
210 case -ENETUNREACH:
211 case -EIO:
212 case -ETIMEDOUT:
213 case -EPIPE:
214 dprintk("%s DS connection error %d\n", __func__,
215 task->tk_status);
216 if (!filelayout_test_devid_invalid(devid))
217 _pnfs_return_layout(state->inode);
218 filelayout_mark_devid_invalid(devid);
219 rpc_wake_up(&tbl->slot_tbl_waitq);
220 nfs4_ds_disconnect(clp);
221 /* fall through */
130 default: 222 default:
131 dprintk("%s DS error. Retry through MDS %d\n", __func__, 223reset:
224 dprintk("%s Retry through MDS. Error %d\n", __func__,
132 task->tk_status); 225 task->tk_status);
133 *reset = 1; 226 return -NFS4ERR_RESET_TO_MDS;
134 break;
135 } 227 }
136out: 228out:
137 task->tk_status = 0; 229 task->tk_status = 0;
@@ -148,18 +240,17 @@ wait_on_recovery:
148static int filelayout_read_done_cb(struct rpc_task *task, 240static int filelayout_read_done_cb(struct rpc_task *task,
149 struct nfs_read_data *data) 241 struct nfs_read_data *data)
150{ 242{
151 int reset = 0; 243 struct nfs_pgio_header *hdr = data->header;
244 int err;
152 245
153 dprintk("%s DS read\n", __func__); 246 err = filelayout_async_handle_error(task, data->args.context->state,
247 data->ds_clp, hdr->lseg);
154 248
155 if (filelayout_async_handle_error(task, data->args.context->state, 249 switch (err) {
156 data->ds_clp, &reset) == -EAGAIN) { 250 case -NFS4ERR_RESET_TO_MDS:
157 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", 251 filelayout_reset_read(data);
158 __func__, data->ds_clp, data->ds_clp->cl_session); 252 return task->tk_status;
159 if (reset) { 253 case -EAGAIN:
160 pnfs_set_lo_fail(data->lseg);
161 nfs4_reset_read(task, data);
162 }
163 rpc_restart_call_prepare(task); 254 rpc_restart_call_prepare(task);
164 return -EAGAIN; 255 return -EAGAIN;
165 } 256 }
@@ -175,13 +266,15 @@ static int filelayout_read_done_cb(struct rpc_task *task,
175static void 266static void
176filelayout_set_layoutcommit(struct nfs_write_data *wdata) 267filelayout_set_layoutcommit(struct nfs_write_data *wdata)
177{ 268{
178 if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds || 269 struct nfs_pgio_header *hdr = wdata->header;
270
271 if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
179 wdata->res.verf->committed == NFS_FILE_SYNC) 272 wdata->res.verf->committed == NFS_FILE_SYNC)
180 return; 273 return;
181 274
182 pnfs_set_layoutcommit(wdata); 275 pnfs_set_layoutcommit(wdata);
183 dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino, 276 dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
184 (unsigned long) NFS_I(wdata->inode)->layout->plh_lwb); 277 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
185} 278}
186 279
187/* 280/*
@@ -191,8 +284,14 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata)
191 */ 284 */
192static void filelayout_read_prepare(struct rpc_task *task, void *data) 285static void filelayout_read_prepare(struct rpc_task *task, void *data)
193{ 286{
194 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 287 struct nfs_read_data *rdata = data;
195 288
289 if (filelayout_reset_to_mds(rdata->header->lseg)) {
290 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
291 filelayout_reset_read(rdata);
292 rpc_exit(task, 0);
293 return;
294 }
196 rdata->read_done_cb = filelayout_read_done_cb; 295 rdata->read_done_cb = filelayout_read_done_cb;
197 296
198 if (nfs41_setup_sequence(rdata->ds_clp->cl_session, 297 if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
@@ -205,42 +304,47 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
205 304
206static void filelayout_read_call_done(struct rpc_task *task, void *data) 305static void filelayout_read_call_done(struct rpc_task *task, void *data)
207{ 306{
208 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 307 struct nfs_read_data *rdata = data;
209 308
210 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); 309 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
211 310
311 if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags))
312 return;
313
212 /* Note this may cause RPC to be resent */ 314 /* Note this may cause RPC to be resent */
213 rdata->mds_ops->rpc_call_done(task, data); 315 rdata->header->mds_ops->rpc_call_done(task, data);
214} 316}
215 317
216static void filelayout_read_count_stats(struct rpc_task *task, void *data) 318static void filelayout_read_count_stats(struct rpc_task *task, void *data)
217{ 319{
218 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 320 struct nfs_read_data *rdata = data;
219 321
220 rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics); 322 rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
221} 323}
222 324
223static void filelayout_read_release(void *data) 325static void filelayout_read_release(void *data)
224{ 326{
225 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 327 struct nfs_read_data *rdata = data;
226 328
227 put_lseg(rdata->lseg); 329 if (!test_bit(NFS_IOHDR_REDO, &rdata->header->flags))
228 rdata->mds_ops->rpc_release(data); 330 nfs_put_client(rdata->ds_clp);
331 rdata->header->mds_ops->rpc_release(data);
229} 332}
230 333
231static int filelayout_write_done_cb(struct rpc_task *task, 334static int filelayout_write_done_cb(struct rpc_task *task,
232 struct nfs_write_data *data) 335 struct nfs_write_data *data)
233{ 336{
234 int reset = 0; 337 struct nfs_pgio_header *hdr = data->header;
235 338 int err;
236 if (filelayout_async_handle_error(task, data->args.context->state, 339
237 data->ds_clp, &reset) == -EAGAIN) { 340 err = filelayout_async_handle_error(task, data->args.context->state,
238 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", 341 data->ds_clp, hdr->lseg);
239 __func__, data->ds_clp, data->ds_clp->cl_session); 342
240 if (reset) { 343 switch (err) {
241 pnfs_set_lo_fail(data->lseg); 344 case -NFS4ERR_RESET_TO_MDS:
242 nfs4_reset_write(task, data); 345 filelayout_reset_write(data);
243 } 346 return task->tk_status;
347 case -EAGAIN:
244 rpc_restart_call_prepare(task); 348 rpc_restart_call_prepare(task);
245 return -EAGAIN; 349 return -EAGAIN;
246 } 350 }
@@ -250,7 +354,7 @@ static int filelayout_write_done_cb(struct rpc_task *task,
250} 354}
251 355
252/* Fake up some data that will cause nfs_commit_release to retry the writes. */ 356/* Fake up some data that will cause nfs_commit_release to retry the writes. */
253static void prepare_to_resend_writes(struct nfs_write_data *data) 357static void prepare_to_resend_writes(struct nfs_commit_data *data)
254{ 358{
255 struct nfs_page *first = nfs_list_entry(data->pages.next); 359 struct nfs_page *first = nfs_list_entry(data->pages.next);
256 360
@@ -261,19 +365,19 @@ static void prepare_to_resend_writes(struct nfs_write_data *data)
261} 365}
262 366
263static int filelayout_commit_done_cb(struct rpc_task *task, 367static int filelayout_commit_done_cb(struct rpc_task *task,
264 struct nfs_write_data *data) 368 struct nfs_commit_data *data)
265{ 369{
266 int reset = 0; 370 int err;
267 371
268 if (filelayout_async_handle_error(task, data->args.context->state, 372 err = filelayout_async_handle_error(task, NULL, data->ds_clp,
269 data->ds_clp, &reset) == -EAGAIN) { 373 data->lseg);
270 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", 374
271 __func__, data->ds_clp, data->ds_clp->cl_session); 375 switch (err) {
272 if (reset) { 376 case -NFS4ERR_RESET_TO_MDS:
273 prepare_to_resend_writes(data); 377 prepare_to_resend_writes(data);
274 pnfs_set_lo_fail(data->lseg); 378 return -EAGAIN;
275 } else 379 case -EAGAIN:
276 rpc_restart_call_prepare(task); 380 rpc_restart_call_prepare(task);
277 return -EAGAIN; 381 return -EAGAIN;
278 } 382 }
279 383
@@ -282,8 +386,14 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
282 386
283static void filelayout_write_prepare(struct rpc_task *task, void *data) 387static void filelayout_write_prepare(struct rpc_task *task, void *data)
284{ 388{
285 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 389 struct nfs_write_data *wdata = data;
286 390
391 if (filelayout_reset_to_mds(wdata->header->lseg)) {
392 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
393 filelayout_reset_write(wdata);
394 rpc_exit(task, 0);
395 return;
396 }
287 if (nfs41_setup_sequence(wdata->ds_clp->cl_session, 397 if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
288 &wdata->args.seq_args, &wdata->res.seq_res, 398 &wdata->args.seq_args, &wdata->res.seq_res,
289 task)) 399 task))
@@ -294,36 +404,66 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
294 404
295static void filelayout_write_call_done(struct rpc_task *task, void *data) 405static void filelayout_write_call_done(struct rpc_task *task, void *data)
296{ 406{
297 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 407 struct nfs_write_data *wdata = data;
408
409 if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags))
410 return;
298 411
299 /* Note this may cause RPC to be resent */ 412 /* Note this may cause RPC to be resent */
300 wdata->mds_ops->rpc_call_done(task, data); 413 wdata->header->mds_ops->rpc_call_done(task, data);
301} 414}
302 415
303static void filelayout_write_count_stats(struct rpc_task *task, void *data) 416static void filelayout_write_count_stats(struct rpc_task *task, void *data)
304{ 417{
305 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 418 struct nfs_write_data *wdata = data;
306 419
307 rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics); 420 rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
308} 421}
309 422
310static void filelayout_write_release(void *data) 423static void filelayout_write_release(void *data)
311{ 424{
312 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 425 struct nfs_write_data *wdata = data;
426
427 if (!test_bit(NFS_IOHDR_REDO, &wdata->header->flags))
428 nfs_put_client(wdata->ds_clp);
429 wdata->header->mds_ops->rpc_release(data);
430}
431
432static void filelayout_commit_prepare(struct rpc_task *task, void *data)
433{
434 struct nfs_commit_data *wdata = data;
313 435
314 put_lseg(wdata->lseg); 436 if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
315 wdata->mds_ops->rpc_release(data); 437 &wdata->args.seq_args, &wdata->res.seq_res,
438 task))
439 return;
440
441 rpc_call_start(task);
442}
443
444static void filelayout_write_commit_done(struct rpc_task *task, void *data)
445{
446 struct nfs_commit_data *wdata = data;
447
448 /* Note this may cause RPC to be resent */
449 wdata->mds_ops->rpc_call_done(task, data);
450}
451
452static void filelayout_commit_count_stats(struct rpc_task *task, void *data)
453{
454 struct nfs_commit_data *cdata = data;
455
456 rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics);
316} 457}
317 458
318static void filelayout_commit_release(void *data) 459static void filelayout_commit_release(void *calldata)
319{ 460{
320 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 461 struct nfs_commit_data *data = calldata;
321 462
322 nfs_commit_release_pages(wdata); 463 data->completion_ops->completion(data);
323 if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding)) 464 put_lseg(data->lseg);
324 nfs_commit_clear_lock(NFS_I(wdata->inode)); 465 nfs_put_client(data->ds_clp);
325 put_lseg(wdata->lseg); 466 nfs_commitdata_release(data);
326 nfs_commitdata_release(wdata);
327} 467}
328 468
329static const struct rpc_call_ops filelayout_read_call_ops = { 469static const struct rpc_call_ops filelayout_read_call_ops = {
@@ -341,16 +481,17 @@ static const struct rpc_call_ops filelayout_write_call_ops = {
341}; 481};
342 482
343static const struct rpc_call_ops filelayout_commit_call_ops = { 483static const struct rpc_call_ops filelayout_commit_call_ops = {
344 .rpc_call_prepare = filelayout_write_prepare, 484 .rpc_call_prepare = filelayout_commit_prepare,
345 .rpc_call_done = filelayout_write_call_done, 485 .rpc_call_done = filelayout_write_commit_done,
346 .rpc_count_stats = filelayout_write_count_stats, 486 .rpc_count_stats = filelayout_commit_count_stats,
347 .rpc_release = filelayout_commit_release, 487 .rpc_release = filelayout_commit_release,
348}; 488};
349 489
350static enum pnfs_try_status 490static enum pnfs_try_status
351filelayout_read_pagelist(struct nfs_read_data *data) 491filelayout_read_pagelist(struct nfs_read_data *data)
352{ 492{
353 struct pnfs_layout_segment *lseg = data->lseg; 493 struct nfs_pgio_header *hdr = data->header;
494 struct pnfs_layout_segment *lseg = hdr->lseg;
354 struct nfs4_pnfs_ds *ds; 495 struct nfs4_pnfs_ds *ds;
355 loff_t offset = data->args.offset; 496 loff_t offset = data->args.offset;
356 u32 j, idx; 497 u32 j, idx;
@@ -358,25 +499,20 @@ filelayout_read_pagelist(struct nfs_read_data *data)
358 int status; 499 int status;
359 500
360 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", 501 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
361 __func__, data->inode->i_ino, 502 __func__, hdr->inode->i_ino,
362 data->args.pgbase, (size_t)data->args.count, offset); 503 data->args.pgbase, (size_t)data->args.count, offset);
363 504
364 if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
365 return PNFS_NOT_ATTEMPTED;
366
367 /* Retrieve the correct rpc_client for the byte range */ 505 /* Retrieve the correct rpc_client for the byte range */
368 j = nfs4_fl_calc_j_index(lseg, offset); 506 j = nfs4_fl_calc_j_index(lseg, offset);
369 idx = nfs4_fl_calc_ds_index(lseg, j); 507 idx = nfs4_fl_calc_ds_index(lseg, j);
370 ds = nfs4_fl_prepare_ds(lseg, idx); 508 ds = nfs4_fl_prepare_ds(lseg, idx);
371 if (!ds) { 509 if (!ds)
372 /* Either layout fh index faulty, or ds connect failed */
373 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
374 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
375 return PNFS_NOT_ATTEMPTED; 510 return PNFS_NOT_ATTEMPTED;
376 } 511 dprintk("%s USE DS: %s cl_count %d\n", __func__,
377 dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr); 512 ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
378 513
379 /* No multipath support. Use first DS */ 514 /* No multipath support. Use first DS */
515 atomic_inc(&ds->ds_clp->cl_count);
380 data->ds_clp = ds->ds_clp; 516 data->ds_clp = ds->ds_clp;
381 fh = nfs4_fl_select_ds_fh(lseg, j); 517 fh = nfs4_fl_select_ds_fh(lseg, j);
382 if (fh) 518 if (fh)
@@ -386,8 +522,8 @@ filelayout_read_pagelist(struct nfs_read_data *data)
386 data->mds_offset = offset; 522 data->mds_offset = offset;
387 523
388 /* Perform an asynchronous read to ds */ 524 /* Perform an asynchronous read to ds */
389 status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient, 525 status = nfs_initiate_read(ds->ds_clp->cl_rpcclient, data,
390 &filelayout_read_call_ops); 526 &filelayout_read_call_ops, RPC_TASK_SOFTCONN);
391 BUG_ON(status != 0); 527 BUG_ON(status != 0);
392 return PNFS_ATTEMPTED; 528 return PNFS_ATTEMPTED;
393} 529}
@@ -396,32 +532,26 @@ filelayout_read_pagelist(struct nfs_read_data *data)
396static enum pnfs_try_status 532static enum pnfs_try_status
397filelayout_write_pagelist(struct nfs_write_data *data, int sync) 533filelayout_write_pagelist(struct nfs_write_data *data, int sync)
398{ 534{
399 struct pnfs_layout_segment *lseg = data->lseg; 535 struct nfs_pgio_header *hdr = data->header;
536 struct pnfs_layout_segment *lseg = hdr->lseg;
400 struct nfs4_pnfs_ds *ds; 537 struct nfs4_pnfs_ds *ds;
401 loff_t offset = data->args.offset; 538 loff_t offset = data->args.offset;
402 u32 j, idx; 539 u32 j, idx;
403 struct nfs_fh *fh; 540 struct nfs_fh *fh;
404 int status; 541 int status;
405 542
406 if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
407 return PNFS_NOT_ATTEMPTED;
408
409 /* Retrieve the correct rpc_client for the byte range */ 543 /* Retrieve the correct rpc_client for the byte range */
410 j = nfs4_fl_calc_j_index(lseg, offset); 544 j = nfs4_fl_calc_j_index(lseg, offset);
411 idx = nfs4_fl_calc_ds_index(lseg, j); 545 idx = nfs4_fl_calc_ds_index(lseg, j);
412 ds = nfs4_fl_prepare_ds(lseg, idx); 546 ds = nfs4_fl_prepare_ds(lseg, idx);
413 if (!ds) { 547 if (!ds)
414 printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
415 __func__);
416 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
417 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
418 return PNFS_NOT_ATTEMPTED; 548 return PNFS_NOT_ATTEMPTED;
419 } 549 dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
420 dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__, 550 __func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
421 data->inode->i_ino, sync, (size_t) data->args.count, offset, 551 offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
422 ds->ds_remotestr);
423 552
424 data->write_done_cb = filelayout_write_done_cb; 553 data->write_done_cb = filelayout_write_done_cb;
554 atomic_inc(&ds->ds_clp->cl_count);
425 data->ds_clp = ds->ds_clp; 555 data->ds_clp = ds->ds_clp;
426 fh = nfs4_fl_select_ds_fh(lseg, j); 556 fh = nfs4_fl_select_ds_fh(lseg, j);
427 if (fh) 557 if (fh)
@@ -433,8 +563,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
433 data->args.offset = filelayout_get_dserver_offset(lseg, offset); 563 data->args.offset = filelayout_get_dserver_offset(lseg, offset);
434 564
435 /* Perform an asynchronous write */ 565 /* Perform an asynchronous write */
436 status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient, 566 status = nfs_initiate_write(ds->ds_clp->cl_rpcclient, data,
437 &filelayout_write_call_ops, sync); 567 &filelayout_write_call_ops, sync,
568 RPC_TASK_SOFTCONN);
438 BUG_ON(status != 0); 569 BUG_ON(status != 0);
439 return PNFS_ATTEMPTED; 570 return PNFS_ATTEMPTED;
440} 571}
@@ -650,10 +781,65 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
650 781
651 dprintk("--> %s\n", __func__); 782 dprintk("--> %s\n", __func__);
652 nfs4_fl_put_deviceid(fl->dsaddr); 783 nfs4_fl_put_deviceid(fl->dsaddr);
653 kfree(fl->commit_buckets); 784 /* This assumes a single RW lseg */
785 if (lseg->pls_range.iomode == IOMODE_RW) {
786 struct nfs4_filelayout *flo;
787
788 flo = FILELAYOUT_FROM_HDR(lseg->pls_layout);
789 flo->commit_info.nbuckets = 0;
790 kfree(flo->commit_info.buckets);
791 flo->commit_info.buckets = NULL;
792 }
654 _filelayout_free_lseg(fl); 793 _filelayout_free_lseg(fl);
655} 794}
656 795
796static int
797filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
798 struct nfs_commit_info *cinfo,
799 gfp_t gfp_flags)
800{
801 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
802 struct pnfs_commit_bucket *buckets;
803 int size;
804
805 if (fl->commit_through_mds)
806 return 0;
807 if (cinfo->ds->nbuckets != 0) {
808 /* This assumes there is only one IOMODE_RW lseg. What
809 * we really want to do is have a layout_hdr level
810 * dictionary of <multipath_list4, fh> keys, each
811 * associated with a struct list_head, populated by calls
812 * to filelayout_write_pagelist().
813 * */
814 return 0;
815 }
816
817 size = (fl->stripe_type == STRIPE_SPARSE) ?
818 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
819
820 buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
821 gfp_flags);
822 if (!buckets)
823 return -ENOMEM;
824 else {
825 int i;
826
827 spin_lock(cinfo->lock);
828 if (cinfo->ds->nbuckets != 0)
829 kfree(buckets);
830 else {
831 cinfo->ds->buckets = buckets;
832 cinfo->ds->nbuckets = size;
833 for (i = 0; i < size; i++) {
834 INIT_LIST_HEAD(&buckets[i].written);
835 INIT_LIST_HEAD(&buckets[i].committing);
836 }
837 }
838 spin_unlock(cinfo->lock);
839 return 0;
840 }
841}
842
657static struct pnfs_layout_segment * 843static struct pnfs_layout_segment *
658filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, 844filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
659 struct nfs4_layoutget_res *lgr, 845 struct nfs4_layoutget_res *lgr,
@@ -673,29 +859,6 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
673 _filelayout_free_lseg(fl); 859 _filelayout_free_lseg(fl);
674 return NULL; 860 return NULL;
675 } 861 }
676
677 /* This assumes there is only one IOMODE_RW lseg. What
678 * we really want to do is have a layout_hdr level
679 * dictionary of <multipath_list4, fh> keys, each
680 * associated with a struct list_head, populated by calls
681 * to filelayout_write_pagelist().
682 * */
683 if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) {
684 int i;
685 int size = (fl->stripe_type == STRIPE_SPARSE) ?
686 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
687
688 fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags);
689 if (!fl->commit_buckets) {
690 filelayout_free_lseg(&fl->generic_hdr);
691 return NULL;
692 }
693 fl->number_of_buckets = size;
694 for (i = 0; i < size; i++) {
695 INIT_LIST_HEAD(&fl->commit_buckets[i].written);
696 INIT_LIST_HEAD(&fl->commit_buckets[i].committing);
697 }
698 }
699 return &fl->generic_hdr; 862 return &fl->generic_hdr;
700} 863}
701 864
@@ -716,8 +879,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
716 !nfs_generic_pg_test(pgio, prev, req)) 879 !nfs_generic_pg_test(pgio, prev, req))
717 return false; 880 return false;
718 881
719 p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; 882 p_stripe = (u64)req_offset(prev);
720 r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT; 883 r_stripe = (u64)req_offset(req);
721 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; 884 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
722 885
723 do_div(p_stripe, stripe_unit); 886 do_div(p_stripe, stripe_unit);
@@ -732,6 +895,16 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
732{ 895{
733 BUG_ON(pgio->pg_lseg != NULL); 896 BUG_ON(pgio->pg_lseg != NULL);
734 897
898 if (req->wb_offset != req->wb_pgbase) {
899 /*
900 * Handling unaligned pages is difficult, because have to
901 * somehow split a req in two in certain cases in the
902 * pg.test code. Avoid this by just not using pnfs
903 * in this case.
904 */
905 nfs_pageio_reset_read_mds(pgio);
906 return;
907 }
735 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 908 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
736 req->wb_context, 909 req->wb_context,
737 0, 910 0,
@@ -747,8 +920,13 @@ static void
747filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, 920filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
748 struct nfs_page *req) 921 struct nfs_page *req)
749{ 922{
923 struct nfs_commit_info cinfo;
924 int status;
925
750 BUG_ON(pgio->pg_lseg != NULL); 926 BUG_ON(pgio->pg_lseg != NULL);
751 927
928 if (req->wb_offset != req->wb_pgbase)
929 goto out_mds;
752 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 930 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
753 req->wb_context, 931 req->wb_context,
754 0, 932 0,
@@ -757,7 +935,17 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
757 GFP_NOFS); 935 GFP_NOFS);
758 /* If no lseg, fall back to write through mds */ 936 /* If no lseg, fall back to write through mds */
759 if (pgio->pg_lseg == NULL) 937 if (pgio->pg_lseg == NULL)
760 nfs_pageio_reset_write_mds(pgio); 938 goto out_mds;
939 nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq);
940 status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS);
941 if (status < 0) {
942 put_lseg(pgio->pg_lseg);
943 pgio->pg_lseg = NULL;
944 goto out_mds;
945 }
946 return;
947out_mds:
948 nfs_pageio_reset_write_mds(pgio);
761} 949}
762 950
763static const struct nfs_pageio_ops filelayout_pg_read_ops = { 951static const struct nfs_pageio_ops filelayout_pg_read_ops = {
@@ -784,43 +972,42 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
784 * If this will make the bucket empty, it will need to put the lseg reference. 972 * If this will make the bucket empty, it will need to put the lseg reference.
785 */ 973 */
786static void 974static void
787filelayout_clear_request_commit(struct nfs_page *req) 975filelayout_clear_request_commit(struct nfs_page *req,
976 struct nfs_commit_info *cinfo)
788{ 977{
789 struct pnfs_layout_segment *freeme = NULL; 978 struct pnfs_layout_segment *freeme = NULL;
790 struct inode *inode = req->wb_context->dentry->d_inode;
791 979
792 spin_lock(&inode->i_lock); 980 spin_lock(cinfo->lock);
793 if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) 981 if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
794 goto out; 982 goto out;
983 cinfo->ds->nwritten--;
795 if (list_is_singular(&req->wb_list)) { 984 if (list_is_singular(&req->wb_list)) {
796 struct pnfs_layout_segment *lseg; 985 struct pnfs_commit_bucket *bucket;
797 986
798 /* From here we can find the bucket, but for the moment, 987 bucket = list_first_entry(&req->wb_list,
799 * since there is only one relevant lseg... 988 struct pnfs_commit_bucket,
800 */ 989 written);
801 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { 990 freeme = bucket->wlseg;
802 if (lseg->pls_range.iomode == IOMODE_RW) { 991 bucket->wlseg = NULL;
803 freeme = lseg;
804 break;
805 }
806 }
807 } 992 }
808out: 993out:
809 nfs_request_remove_commit_list(req); 994 nfs_request_remove_commit_list(req, cinfo);
810 spin_unlock(&inode->i_lock); 995 spin_unlock(cinfo->lock);
811 put_lseg(freeme); 996 put_lseg(freeme);
812} 997}
813 998
814static struct list_head * 999static struct list_head *
815filelayout_choose_commit_list(struct nfs_page *req, 1000filelayout_choose_commit_list(struct nfs_page *req,
816 struct pnfs_layout_segment *lseg) 1001 struct pnfs_layout_segment *lseg,
1002 struct nfs_commit_info *cinfo)
817{ 1003{
818 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 1004 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
819 u32 i, j; 1005 u32 i, j;
820 struct list_head *list; 1006 struct list_head *list;
1007 struct pnfs_commit_bucket *buckets;
821 1008
822 if (fl->commit_through_mds) 1009 if (fl->commit_through_mds)
823 return &NFS_I(req->wb_context->dentry->d_inode)->commit_list; 1010 return &cinfo->mds->list;
824 1011
825 /* Note that we are calling nfs4_fl_calc_j_index on each page 1012 /* Note that we are calling nfs4_fl_calc_j_index on each page
826 * that ends up being committed to a data server. An attractive 1013 * that ends up being committed to a data server. An attractive
@@ -828,31 +1015,33 @@ filelayout_choose_commit_list(struct nfs_page *req,
828 * to store the value calculated in filelayout_write_pagelist 1015 * to store the value calculated in filelayout_write_pagelist
829 * and just use that here. 1016 * and just use that here.
830 */ 1017 */
831 j = nfs4_fl_calc_j_index(lseg, 1018 j = nfs4_fl_calc_j_index(lseg, req_offset(req));
832 (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
833 i = select_bucket_index(fl, j); 1019 i = select_bucket_index(fl, j);
834 list = &fl->commit_buckets[i].written; 1020 buckets = cinfo->ds->buckets;
1021 list = &buckets[i].written;
835 if (list_empty(list)) { 1022 if (list_empty(list)) {
836 /* Non-empty buckets hold a reference on the lseg. That ref 1023 /* Non-empty buckets hold a reference on the lseg. That ref
837 * is normally transferred to the COMMIT call and released 1024 * is normally transferred to the COMMIT call and released
838 * there. It could also be released if the last req is pulled 1025 * there. It could also be released if the last req is pulled
839 * off due to a rewrite, in which case it will be done in 1026 * off due to a rewrite, in which case it will be done in
840 * filelayout_remove_commit_req 1027 * filelayout_clear_request_commit
841 */ 1028 */
842 get_lseg(lseg); 1029 buckets[i].wlseg = get_lseg(lseg);
843 } 1030 }
844 set_bit(PG_COMMIT_TO_DS, &req->wb_flags); 1031 set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
1032 cinfo->ds->nwritten++;
845 return list; 1033 return list;
846} 1034}
847 1035
848static void 1036static void
849filelayout_mark_request_commit(struct nfs_page *req, 1037filelayout_mark_request_commit(struct nfs_page *req,
850 struct pnfs_layout_segment *lseg) 1038 struct pnfs_layout_segment *lseg,
1039 struct nfs_commit_info *cinfo)
851{ 1040{
852 struct list_head *list; 1041 struct list_head *list;
853 1042
854 list = filelayout_choose_commit_list(req, lseg); 1043 list = filelayout_choose_commit_list(req, lseg, cinfo);
855 nfs_request_add_commit_list(req, list); 1044 nfs_request_add_commit_list(req, list, cinfo);
856} 1045}
857 1046
858static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) 1047static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
@@ -880,7 +1069,7 @@ select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
880 return flseg->fh_array[i]; 1069 return flseg->fh_array[i];
881} 1070}
882 1071
883static int filelayout_initiate_commit(struct nfs_write_data *data, int how) 1072static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
884{ 1073{
885 struct pnfs_layout_segment *lseg = data->lseg; 1074 struct pnfs_layout_segment *lseg = data->lseg;
886 struct nfs4_pnfs_ds *ds; 1075 struct nfs4_pnfs_ds *ds;
@@ -890,135 +1079,137 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
890 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); 1079 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
891 ds = nfs4_fl_prepare_ds(lseg, idx); 1080 ds = nfs4_fl_prepare_ds(lseg, idx);
892 if (!ds) { 1081 if (!ds) {
893 printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
894 __func__);
895 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
896 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
897 prepare_to_resend_writes(data); 1082 prepare_to_resend_writes(data);
898 filelayout_commit_release(data); 1083 filelayout_commit_release(data);
899 return -EAGAIN; 1084 return -EAGAIN;
900 } 1085 }
901 dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how); 1086 dprintk("%s ino %lu, how %d cl_count %d\n", __func__,
902 data->write_done_cb = filelayout_commit_done_cb; 1087 data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count));
1088 data->commit_done_cb = filelayout_commit_done_cb;
1089 atomic_inc(&ds->ds_clp->cl_count);
903 data->ds_clp = ds->ds_clp; 1090 data->ds_clp = ds->ds_clp;
904 fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); 1091 fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
905 if (fh) 1092 if (fh)
906 data->args.fh = fh; 1093 data->args.fh = fh;
907 return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient, 1094 return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data,
908 &filelayout_commit_call_ops, how); 1095 &filelayout_commit_call_ops, how,
909} 1096 RPC_TASK_SOFTCONN);
910
911/*
912 * This is only useful while we are using whole file layouts.
913 */
914static struct pnfs_layout_segment *
915find_only_write_lseg_locked(struct inode *inode)
916{
917 struct pnfs_layout_segment *lseg;
918
919 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
920 if (lseg->pls_range.iomode == IOMODE_RW)
921 return lseg;
922 return NULL;
923}
924
925static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
926{
927 struct pnfs_layout_segment *rv;
928
929 spin_lock(&inode->i_lock);
930 rv = find_only_write_lseg_locked(inode);
931 if (rv)
932 get_lseg(rv);
933 spin_unlock(&inode->i_lock);
934 return rv;
935} 1097}
936 1098
937static int 1099static int
938filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, 1100transfer_commit_list(struct list_head *src, struct list_head *dst,
939 spinlock_t *lock) 1101 struct nfs_commit_info *cinfo, int max)
940{ 1102{
941 struct list_head *src = &bucket->written;
942 struct list_head *dst = &bucket->committing;
943 struct nfs_page *req, *tmp; 1103 struct nfs_page *req, *tmp;
944 int ret = 0; 1104 int ret = 0;
945 1105
946 list_for_each_entry_safe(req, tmp, src, wb_list) { 1106 list_for_each_entry_safe(req, tmp, src, wb_list) {
947 if (!nfs_lock_request(req)) 1107 if (!nfs_lock_request(req))
948 continue; 1108 continue;
949 if (cond_resched_lock(lock)) 1109 if (cond_resched_lock(cinfo->lock))
950 list_safe_reset_next(req, tmp, wb_list); 1110 list_safe_reset_next(req, tmp, wb_list);
951 nfs_request_remove_commit_list(req); 1111 nfs_request_remove_commit_list(req, cinfo);
952 clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); 1112 clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
953 nfs_list_add_request(req, dst); 1113 nfs_list_add_request(req, dst);
954 ret++; 1114 ret++;
955 if (ret == max) 1115 if ((ret == max) && !cinfo->dreq)
956 break; 1116 break;
957 } 1117 }
958 return ret; 1118 return ret;
959} 1119}
960 1120
1121static int
1122filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
1123 struct nfs_commit_info *cinfo,
1124 int max)
1125{
1126 struct list_head *src = &bucket->written;
1127 struct list_head *dst = &bucket->committing;
1128 int ret;
1129
1130 ret = transfer_commit_list(src, dst, cinfo, max);
1131 if (ret) {
1132 cinfo->ds->nwritten -= ret;
1133 cinfo->ds->ncommitting += ret;
1134 bucket->clseg = bucket->wlseg;
1135 if (list_empty(src))
1136 bucket->wlseg = NULL;
1137 else
1138 get_lseg(bucket->clseg);
1139 }
1140 return ret;
1141}
1142
961/* Move reqs from written to committing lists, returning count of number moved. 1143/* Move reqs from written to committing lists, returning count of number moved.
962 * Note called with i_lock held. 1144 * Note called with cinfo->lock held.
963 */ 1145 */
964static int filelayout_scan_commit_lists(struct inode *inode, int max, 1146static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo,
965 spinlock_t *lock) 1147 int max)
966{ 1148{
967 struct pnfs_layout_segment *lseg;
968 struct nfs4_filelayout_segment *fl;
969 int i, rv = 0, cnt; 1149 int i, rv = 0, cnt;
970 1150
971 lseg = find_only_write_lseg_locked(inode); 1151 for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) {
972 if (!lseg) 1152 cnt = filelayout_scan_ds_commit_list(&cinfo->ds->buckets[i],
973 goto out_done; 1153 cinfo, max);
974 fl = FILELAYOUT_LSEG(lseg);
975 if (fl->commit_through_mds)
976 goto out_done;
977 for (i = 0; i < fl->number_of_buckets && max != 0; i++) {
978 cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i],
979 max, lock);
980 max -= cnt; 1154 max -= cnt;
981 rv += cnt; 1155 rv += cnt;
982 } 1156 }
983out_done:
984 return rv; 1157 return rv;
985} 1158}
986 1159
1160/* Pull everything off the committing lists and dump into @dst */
1161static void filelayout_recover_commit_reqs(struct list_head *dst,
1162 struct nfs_commit_info *cinfo)
1163{
1164 struct pnfs_commit_bucket *b;
1165 int i;
1166
1167 /* NOTE cinfo->lock is NOT held, relying on fact that this is
1168 * only called on single thread per dreq.
1169 * Can't take the lock because need to do put_lseg
1170 */
1171 for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
1172 if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
1173 BUG_ON(!list_empty(&b->written));
1174 put_lseg(b->wlseg);
1175 b->wlseg = NULL;
1176 }
1177 }
1178 cinfo->ds->nwritten = 0;
1179}
1180
987static unsigned int 1181static unsigned int
988alloc_ds_commits(struct inode *inode, struct list_head *list) 1182alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
989{ 1183{
990 struct pnfs_layout_segment *lseg; 1184 struct pnfs_ds_commit_info *fl_cinfo;
991 struct nfs4_filelayout_segment *fl; 1185 struct pnfs_commit_bucket *bucket;
992 struct nfs_write_data *data; 1186 struct nfs_commit_data *data;
993 int i, j; 1187 int i, j;
994 unsigned int nreq = 0; 1188 unsigned int nreq = 0;
995 1189
996 /* Won't need this when non-whole file layout segments are supported 1190 fl_cinfo = cinfo->ds;
997 * instead we will use a pnfs_layout_hdr structure */ 1191 bucket = fl_cinfo->buckets;
998 lseg = find_only_write_lseg(inode); 1192 for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
999 if (!lseg) 1193 if (list_empty(&bucket->committing))
1000 return 0;
1001 fl = FILELAYOUT_LSEG(lseg);
1002 for (i = 0; i < fl->number_of_buckets; i++) {
1003 if (list_empty(&fl->commit_buckets[i].committing))
1004 continue; 1194 continue;
1005 data = nfs_commitdata_alloc(); 1195 data = nfs_commitdata_alloc();
1006 if (!data) 1196 if (!data)
1007 break; 1197 break;
1008 data->ds_commit_index = i; 1198 data->ds_commit_index = i;
1009 data->lseg = lseg; 1199 data->lseg = bucket->clseg;
1200 bucket->clseg = NULL;
1010 list_add(&data->pages, list); 1201 list_add(&data->pages, list);
1011 nreq++; 1202 nreq++;
1012 } 1203 }
1013 1204
1014 /* Clean up on error */ 1205 /* Clean up on error */
1015 for (j = i; j < fl->number_of_buckets; j++) { 1206 for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) {
1016 if (list_empty(&fl->commit_buckets[i].committing)) 1207 if (list_empty(&bucket->committing))
1017 continue; 1208 continue;
1018 nfs_retry_commit(&fl->commit_buckets[i].committing, lseg); 1209 nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
1019 put_lseg(lseg); /* associated with emptying bucket */ 1210 put_lseg(bucket->clseg);
1211 bucket->clseg = NULL;
1020 } 1212 }
1021 put_lseg(lseg);
1022 /* Caller will clean up entries put on list */ 1213 /* Caller will clean up entries put on list */
1023 return nreq; 1214 return nreq;
1024} 1215}
@@ -1026,9 +1217,9 @@ alloc_ds_commits(struct inode *inode, struct list_head *list)
1026/* This follows nfs_commit_list pretty closely */ 1217/* This follows nfs_commit_list pretty closely */
1027static int 1218static int
1028filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, 1219filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
1029 int how) 1220 int how, struct nfs_commit_info *cinfo)
1030{ 1221{
1031 struct nfs_write_data *data, *tmp; 1222 struct nfs_commit_data *data, *tmp;
1032 LIST_HEAD(list); 1223 LIST_HEAD(list);
1033 unsigned int nreq = 0; 1224 unsigned int nreq = 0;
1034 1225
@@ -1039,30 +1230,34 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
1039 list_add(&data->pages, &list); 1230 list_add(&data->pages, &list);
1040 nreq++; 1231 nreq++;
1041 } else 1232 } else
1042 nfs_retry_commit(mds_pages, NULL); 1233 nfs_retry_commit(mds_pages, NULL, cinfo);
1043 } 1234 }
1044 1235
1045 nreq += alloc_ds_commits(inode, &list); 1236 nreq += alloc_ds_commits(cinfo, &list);
1046 1237
1047 if (nreq == 0) { 1238 if (nreq == 0) {
1048 nfs_commit_clear_lock(NFS_I(inode)); 1239 cinfo->completion_ops->error_cleanup(NFS_I(inode));
1049 goto out; 1240 goto out;
1050 } 1241 }
1051 1242
1052 atomic_add(nreq, &NFS_I(inode)->commits_outstanding); 1243 atomic_add(nreq, &cinfo->mds->rpcs_out);
1053 1244
1054 list_for_each_entry_safe(data, tmp, &list, pages) { 1245 list_for_each_entry_safe(data, tmp, &list, pages) {
1055 list_del_init(&data->pages); 1246 list_del_init(&data->pages);
1056 if (!data->lseg) { 1247 if (!data->lseg) {
1057 nfs_init_commit(data, mds_pages, NULL); 1248 nfs_init_commit(data, mds_pages, NULL, cinfo);
1058 nfs_initiate_commit(data, NFS_CLIENT(inode), 1249 nfs_initiate_commit(NFS_CLIENT(inode), data,
1059 data->mds_ops, how); 1250 data->mds_ops, how, 0);
1060 } else { 1251 } else {
1061 nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg); 1252 struct pnfs_commit_bucket *buckets;
1253
1254 buckets = cinfo->ds->buckets;
1255 nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg, cinfo);
1062 filelayout_initiate_commit(data, how); 1256 filelayout_initiate_commit(data, how);
1063 } 1257 }
1064 } 1258 }
1065out: 1259out:
1260 cinfo->ds->ncommitting = 0;
1066 return PNFS_ATTEMPTED; 1261 return PNFS_ATTEMPTED;
1067} 1262}
1068 1263
@@ -1072,17 +1267,47 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
1072 nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node)); 1267 nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
1073} 1268}
1074 1269
1270static struct pnfs_layout_hdr *
1271filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
1272{
1273 struct nfs4_filelayout *flo;
1274
1275 flo = kzalloc(sizeof(*flo), gfp_flags);
1276 return &flo->generic_hdr;
1277}
1278
1279static void
1280filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
1281{
1282 kfree(FILELAYOUT_FROM_HDR(lo));
1283}
1284
1285static struct pnfs_ds_commit_info *
1286filelayout_get_ds_info(struct inode *inode)
1287{
1288 struct pnfs_layout_hdr *layout = NFS_I(inode)->layout;
1289
1290 if (layout == NULL)
1291 return NULL;
1292 else
1293 return &FILELAYOUT_FROM_HDR(layout)->commit_info;
1294}
1295
1075static struct pnfs_layoutdriver_type filelayout_type = { 1296static struct pnfs_layoutdriver_type filelayout_type = {
1076 .id = LAYOUT_NFSV4_1_FILES, 1297 .id = LAYOUT_NFSV4_1_FILES,
1077 .name = "LAYOUT_NFSV4_1_FILES", 1298 .name = "LAYOUT_NFSV4_1_FILES",
1078 .owner = THIS_MODULE, 1299 .owner = THIS_MODULE,
1300 .alloc_layout_hdr = filelayout_alloc_layout_hdr,
1301 .free_layout_hdr = filelayout_free_layout_hdr,
1079 .alloc_lseg = filelayout_alloc_lseg, 1302 .alloc_lseg = filelayout_alloc_lseg,
1080 .free_lseg = filelayout_free_lseg, 1303 .free_lseg = filelayout_free_lseg,
1081 .pg_read_ops = &filelayout_pg_read_ops, 1304 .pg_read_ops = &filelayout_pg_read_ops,
1082 .pg_write_ops = &filelayout_pg_write_ops, 1305 .pg_write_ops = &filelayout_pg_write_ops,
1306 .get_ds_info = &filelayout_get_ds_info,
1083 .mark_request_commit = filelayout_mark_request_commit, 1307 .mark_request_commit = filelayout_mark_request_commit,
1084 .clear_request_commit = filelayout_clear_request_commit, 1308 .clear_request_commit = filelayout_clear_request_commit,
1085 .scan_commit_lists = filelayout_scan_commit_lists, 1309 .scan_commit_lists = filelayout_scan_commit_lists,
1310 .recover_commit_reqs = filelayout_recover_commit_reqs,
1086 .commit_pagelist = filelayout_commit_pagelist, 1311 .commit_pagelist = filelayout_commit_pagelist,
1087 .read_pagelist = filelayout_read_pagelist, 1312 .read_pagelist = filelayout_read_pagelist,
1088 .write_pagelist = filelayout_write_pagelist, 1313 .write_pagelist = filelayout_write_pagelist,