aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorFred Isaman <iisaman@citi.umich.edu>2011-07-30 20:52:54 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2011-07-31 12:18:17 -0400
commit650e2d39bd8f6b99f39b5009dbed9fbd3bb65e54 (patch)
treee078728da5ac6b40af0206ca9903b95e49731cb2 /fs
parent9549ec01b0dcf1c1eb277cba60067236b3f48508 (diff)
pnfsblock: bl_write_pagelist
Note: When upper layer's read/write request cannot be fulfilled, the block layout driver shouldn't silently mark the page as error. It should do what can be done and leave the rest to the upper layer. To do so, we should set rdata/wdata->res.count properly. When upper layer re-send the read/write request to finish the rest part of the request, pgbase is the position where we should start at. [pnfsblock: bl_write_pagelist support functions] [pnfsblock: bl_write_pagelist adjust for missing PG_USE_PNFS] Signed-off-by: Fred Isaman <iisaman@citi.umich.edu> [pnfsblock: handle errors when read or write pagelist.] Signed-off-by: Zhang Jingwang <yyalone@gmail.com> [pnfs-block: use new write_pagelist api] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: Benny Halevy <bhalevy@tonian.com> Signed-off-by: Jim Rees <rees@umich.edu> [SQUASHME: pnfsblock: mds_offset is set in the generic layer] Signed-off-by: Boaz Harrosh <bharrosh@panasas.com> Signed-off-by: Benny Halevy <bhalevy@tonian.com> [pnfsblock: mark IO error with NFS_LAYOUT_{RW|RO}_FAILED] Signed-off-by: Peng Tao <peng_tao@emc.com> [pnfsblock: SQUASHME: adjust to API change] Signed-off-by: Fred Isaman <iisaman@citi.umich.edu> [pnfsblock: fixup blksize alignment in bl_setup_layoutcommit] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: Benny Halevy <bhalevy@tonian.com> [pnfsblock: bl_write_pagelist adjust for missing PG_USE_PNFS] Signed-off-by: Fred Isaman <iisaman@citi.umich.edu> [pnfsblock: handle errors when read or write pagelist.] Signed-off-by: Zhang Jingwang <yyalone@gmail.com> [pnfs-block: use new write_pagelist api] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: Benny Halevy <bhalevy@tonian.com> Signed-off-by: Jim Rees <rees@umich.edu> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/nfs/blocklayout/blocklayout.c129
1 files changed, 126 insertions, 3 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index facb5ba21204..2e373826db80 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -74,6 +74,19 @@ static int is_hole(struct pnfs_block_extent *be, sector_t isect)
74 return !bl_is_sector_init(be->be_inval, isect); 74 return !bl_is_sector_init(be->be_inval, isect);
75} 75}
76 76
77/* Given the be associated with isect, determine if page data can be
78 * written to disk.
79 */
80static int is_writable(struct pnfs_block_extent *be, sector_t isect)
81{
82 if (be->be_state == PNFS_BLOCK_READWRITE_DATA)
83 return 1;
84 else if (be->be_state != PNFS_BLOCK_INVALID_DATA)
85 return 0;
86 else
87 return bl_is_sector_init(be->be_inval, isect);
88}
89
77/* The data we are handed might be spread across several bios. We need 90/* The data we are handed might be spread across several bios. We need
78 * to track when the last one is finished. 91 * to track when the last one is finished.
79 */ 92 */
@@ -316,11 +329,121 @@ out:
316 return PNFS_NOT_ATTEMPTED; 329 return PNFS_NOT_ATTEMPTED;
317} 330}
318 331
332/* This is basically copied from mpage_end_io_read */
333static void bl_end_io_write(struct bio *bio, int err)
334{
335 struct parallel_io *par = bio->bi_private;
336 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
337 struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
338
339 if (!uptodate) {
340 if (!wdata->pnfs_error)
341 wdata->pnfs_error = -EIO;
342 bl_set_lo_fail(wdata->lseg);
343 }
344 bio_put(bio);
345 put_parallel(par);
346}
347
348/* Function scheduled for call during bl_end_par_io_write,
349 * it marks sectors as written and extends the commitlist.
350 */
351static void bl_write_cleanup(struct work_struct *work)
352{
353 struct rpc_task *task;
354 struct nfs_write_data *wdata;
355 dprintk("%s enter\n", __func__);
356 task = container_of(work, struct rpc_task, u.tk_work);
357 wdata = container_of(task, struct nfs_write_data, task);
358 pnfs_ld_write_done(wdata);
359}
360
361/* Called when last of bios associated with a bl_write_pagelist call finishes */
362static void
363bl_end_par_io_write(void *data)
364{
365 struct nfs_write_data *wdata = data;
366
367 /* STUB - ignoring error handling */
368 wdata->task.tk_status = 0;
369 wdata->verf.committed = NFS_FILE_SYNC;
370 INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
371 schedule_work(&wdata->task.u.tk_work);
372}
373
319static enum pnfs_try_status 374static enum pnfs_try_status
320bl_write_pagelist(struct nfs_write_data *wdata, 375bl_write_pagelist(struct nfs_write_data *wdata, int sync)
321 int sync)
322{ 376{
323 return PNFS_NOT_ATTEMPTED; 377 int i;
378 struct bio *bio = NULL;
379 struct pnfs_block_extent *be = NULL;
380 sector_t isect, extent_length = 0;
381 struct parallel_io *par;
382 loff_t offset = wdata->args.offset;
383 size_t count = wdata->args.count;
384 struct page **pages = wdata->args.pages;
385 int pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
386
387 dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
388 /* At this point, wdata->pages is a (sequential) list of nfs_pages.
389 * We want to write each, and if there is an error remove it from
390 * list and call
391 * nfs_retry_request(req) to have it redone using nfs.
392 * QUEST? Do as block or per req? Think have to do per block
393 * as part of end_bio
394 */
395 par = alloc_parallel(wdata);
396 if (!par)
397 return PNFS_NOT_ATTEMPTED;
398 par->call_ops = *wdata->mds_ops;
399 par->call_ops.rpc_call_done = bl_rpc_do_nothing;
400 par->pnfs_callback = bl_end_par_io_write;
401 /* At this point, have to be more careful with error handling */
402
403 isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT);
404 for (i = pg_index; i < wdata->npages ; i++) {
405 if (!extent_length) {
406 /* We've used up the previous extent */
407 bl_put_extent(be);
408 bio = bl_submit_bio(WRITE, bio);
409 /* Get the next one */
410 be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg),
411 isect, NULL);
412 if (!be || !is_writable(be, isect)) {
413 wdata->pnfs_error = -ENOMEM;
414 goto out;
415 }
416 extent_length = be->be_length -
417 (isect - be->be_f_offset);
418 }
419 for (;;) {
420 if (!bio) {
421 bio = bio_alloc(GFP_NOIO, wdata->npages - i);
422 if (!bio) {
423 wdata->pnfs_error = -ENOMEM;
424 goto out;
425 }
426 bio->bi_sector = isect - be->be_f_offset +
427 be->be_v_offset;
428 bio->bi_bdev = be->be_mdev;
429 bio->bi_end_io = bl_end_io_write;
430 bio->bi_private = par;
431 }
432 if (bio_add_page(bio, pages[i], PAGE_SIZE, 0))
433 break;
434 bio = bl_submit_bio(WRITE, bio);
435 }
436 isect += PAGE_CACHE_SECTORS;
437 extent_length -= PAGE_CACHE_SECTORS;
438 }
439 wdata->res.count = (isect << SECTOR_SHIFT) - (offset);
440 if (count < wdata->res.count)
441 wdata->res.count = count;
442out:
443 bl_put_extent(be);
444 bl_submit_bio(WRITE, bio);
445 put_parallel(par);
446 return PNFS_ATTEMPTED;
324} 447}
325 448
326/* FIXME - range ignored */ 449/* FIXME - range ignored */