aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
authorFred Isaman <iisaman@citi.umich.edu>2011-07-30 20:52:51 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2011-07-31 12:18:17 -0400
commit90ace12ac42f65d1f077c5ef5ec2efafdcac338f (patch)
treeb68267bbb609498f1005c56fe957251d92e14d78 /fs/nfs
parent9f3770422c771da32c1d14e650c695eec27dbd1d (diff)
pnfsblock: encode_layoutcommit
In blocklayout driver. There are two things happening while layoutcommit/cleanup. 1. the modified extents are encoded. 2. On cleanup the extents are put back on the layout rw extents list, for reads. In the new system where actual xdr encoding is done in encode_layoutcommit() directly into xdr buffer, these are the new commit stages: 1. On setup_layoutcommit, the range is adjusted as before and a structure is allocated for communication with bl_encode_layoutcommit && bl_cleanup_layoutcommit (Generic layer provides a void-star to hang it on) 2. bl_encode_layoutcommit is called to do the actual encoding directly into xdr. The commit-extent-list is not freed and is stored on above structure. FIXME: The code is not yet converted to the new XDR cleanup 3. On cleanup the commit-extent-list is put back by a call to set_to_rw() as before, but with no need for XDR decoding of the list as before. And the commit-extent-list is freed. Finally allocated structure is freed. [rm inode and pnfs_layout_hdr args from cleanup_layoutcommit()] [pnfsblock: get rid of deprecated xdr macros] Signed-off-by: Jim Rees <rees@umich.edu> Signed-off-by: Peng Tao <peng_tao@emc.com> Signed-off-by: Fred Isaman <iisaman@citi.umich.edu> [blocklayout: encode_layoutcommit implementation] Signed-off-by: Boaz Harrosh <bharrosh@panasas.com> [pnfsblock: fix bug setting up layoutcommit.] Signed-off-by: Tao Guo <guotao@nrchpc.ac.cn> [pnfsblock: prevent commit list corruption] [pnfsblock: fix layoutcommit with an empty opaque] Signed-off-by: Fred Isaman <iisaman@citi.umich.edu> Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: Benny Halevy <bhalevy@tonian.com> Signed-off-by: Jim Rees <rees@umich.edu> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/blocklayout/blocklayout.c2
-rw-r--r--fs/nfs/blocklayout/blocklayout.h12
-rw-r--r--fs/nfs/blocklayout/extents.c176
3 files changed, 146 insertions, 44 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 8c29a189f09b..d096835cfd6b 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -155,6 +155,8 @@ static void
155bl_encode_layoutcommit(struct pnfs_layout_hdr *lo, struct xdr_stream *xdr, 155bl_encode_layoutcommit(struct pnfs_layout_hdr *lo, struct xdr_stream *xdr,
156 const struct nfs4_layoutcommit_args *arg) 156 const struct nfs4_layoutcommit_args *arg)
157{ 157{
158 dprintk("%s enter\n", __func__);
159 encode_pnfs_block_layoutupdate(BLK_LO2EXT(lo), xdr, arg);
158} 160}
159 161
160static void 162static void
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index fcf47b55b5ce..3caaefce85a5 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -91,6 +91,15 @@ struct pnfs_block_extent {
91 struct pnfs_inval_markings *be_inval; /* tracks INVAL->RW transition */ 91 struct pnfs_inval_markings *be_inval; /* tracks INVAL->RW transition */
92}; 92};
93 93
94/* Shortened extent used by LAYOUTCOMMIT */
95struct pnfs_block_short_extent {
96 struct list_head bse_node;
97 struct nfs4_deviceid bse_devid;
98 struct block_device *bse_mdev;
99 sector_t bse_f_offset; /* the starting offset in the file */
100 sector_t bse_length; /* the size of the extent */
101};
102
94static inline void 103static inline void
95BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize) 104BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize)
96{ 105{
@@ -184,6 +193,9 @@ int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
184void bl_put_extent(struct pnfs_block_extent *be); 193void bl_put_extent(struct pnfs_block_extent *be);
185struct pnfs_block_extent *bl_alloc_extent(void); 194struct pnfs_block_extent *bl_alloc_extent(void);
186int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect); 195int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect);
196int encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
197 struct xdr_stream *xdr,
198 const struct nfs4_layoutcommit_args *arg);
187int bl_add_merge_extent(struct pnfs_block_layout *bl, 199int bl_add_merge_extent(struct pnfs_block_layout *bl,
188 struct pnfs_block_extent *new); 200 struct pnfs_block_extent *new);
189 201
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 292aadfd4d46..84bf24087720 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -286,6 +286,49 @@ int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
286 return -ENOMEM; 286 return -ENOMEM;
287} 287}
288 288
289/* Marks sectors in [offest, offset+length) as having been written to disk.
290 * All lengths should be block aligned.
291 */
292static int mark_written_sectors(struct pnfs_inval_markings *marks,
293 sector_t offset, sector_t length)
294{
295 int status;
296
297 dprintk("%s(offset=%llu,len=%llu) enter\n", __func__,
298 (u64)offset, (u64)length);
299 spin_lock(&marks->im_lock);
300 status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length);
301 spin_unlock(&marks->im_lock);
302 return status;
303}
304
305static void print_short_extent(struct pnfs_block_short_extent *be)
306{
307 dprintk("PRINT SHORT EXTENT extent %p\n", be);
308 if (be) {
309 dprintk(" be_f_offset %llu\n", (u64)be->bse_f_offset);
310 dprintk(" be_length %llu\n", (u64)be->bse_length);
311 }
312}
313
314static void print_clist(struct list_head *list, unsigned int count)
315{
316 struct pnfs_block_short_extent *be;
317 unsigned int i = 0;
318
319 ifdebug(FACILITY) {
320 printk(KERN_DEBUG "****************\n");
321 printk(KERN_DEBUG "Extent list looks like:\n");
322 list_for_each_entry(be, list, bse_node) {
323 i++;
324 print_short_extent(be);
325 }
326 if (i != count)
327 printk(KERN_DEBUG "\n\nExpected %u entries\n\n\n", count);
328 printk(KERN_DEBUG "****************\n");
329 }
330}
331
289static void print_bl_extent(struct pnfs_block_extent *be) 332static void print_bl_extent(struct pnfs_block_extent *be)
290{ 333{
291 dprintk("PRINT EXTENT extent %p\n", be); 334 dprintk("PRINT EXTENT extent %p\n", be);
@@ -378,65 +421,67 @@ bl_add_merge_extent(struct pnfs_block_layout *bl,
378 /* Scan for proper place to insert, extending new to the left 421 /* Scan for proper place to insert, extending new to the left
379 * as much as possible. 422 * as much as possible.
380 */ 423 */
381 list_for_each_entry_safe(be, tmp, list, be_node) { 424 list_for_each_entry_safe_reverse(be, tmp, list, be_node) {
382 if (new->be_f_offset < be->be_f_offset) 425 if (new->be_f_offset >= be->be_f_offset + be->be_length)
383 break; 426 break;
384 if (end <= be->be_f_offset + be->be_length) { 427 if (new->be_f_offset >= be->be_f_offset) {
385 /* new is a subset of existing be*/ 428 if (end <= be->be_f_offset + be->be_length) {
429 /* new is a subset of existing be*/
430 if (extents_consistent(be, new)) {
431 dprintk("%s: new is subset, ignoring\n",
432 __func__);
433 bl_put_extent(new);
434 return 0;
435 } else {
436 goto out_err;
437 }
438 } else {
439 /* |<-- be -->|
440 * |<-- new -->| */
441 if (extents_consistent(be, new)) {
442 /* extend new to fully replace be */
443 new->be_length += new->be_f_offset -
444 be->be_f_offset;
445 new->be_f_offset = be->be_f_offset;
446 new->be_v_offset = be->be_v_offset;
447 dprintk("%s: removing %p\n", __func__, be);
448 list_del(&be->be_node);
449 bl_put_extent(be);
450 } else {
451 goto out_err;
452 }
453 }
454 } else if (end >= be->be_f_offset + be->be_length) {
455 /* new extent overlap existing be */
386 if (extents_consistent(be, new)) { 456 if (extents_consistent(be, new)) {
387 dprintk("%s: new is subset, ignoring\n", 457 /* extend new to fully replace be */
388 __func__); 458 dprintk("%s: removing %p\n", __func__, be);
389 bl_put_extent(new); 459 list_del(&be->be_node);
390 return 0; 460 bl_put_extent(be);
391 } else 461 } else {
392 goto out_err; 462 goto out_err;
393 } else if (new->be_f_offset <= 463 }
394 be->be_f_offset + be->be_length) { 464 } else if (end > be->be_f_offset) {
395 /* new overlaps or abuts existing be */ 465 /* |<-- be -->|
396 if (extents_consistent(be, new)) { 466 *|<-- new -->| */
467 if (extents_consistent(new, be)) {
397 /* extend new to fully replace be */ 468 /* extend new to fully replace be */
398 new->be_length += new->be_f_offset - 469 new->be_length += be->be_f_offset + be->be_length -
399 be->be_f_offset; 470 new->be_f_offset - new->be_length;
400 new->be_f_offset = be->be_f_offset;
401 new->be_v_offset = be->be_v_offset;
402 dprintk("%s: removing %p\n", __func__, be); 471 dprintk("%s: removing %p\n", __func__, be);
403 list_del(&be->be_node); 472 list_del(&be->be_node);
404 bl_put_extent(be); 473 bl_put_extent(be);
405 } else if (new->be_f_offset != 474 } else {
406 be->be_f_offset + be->be_length)
407 goto out_err; 475 goto out_err;
476 }
408 } 477 }
409 } 478 }
410 /* Note that if we never hit the above break, be will not point to a 479 /* Note that if we never hit the above break, be will not point to a
411 * valid extent. However, in that case &be->be_node==list. 480 * valid extent. However, in that case &be->be_node==list.
412 */ 481 */
413 list_add_tail(&new->be_node, &be->be_node); 482 list_add(&new->be_node, &be->be_node);
414 dprintk("%s: inserting new\n", __func__); 483 dprintk("%s: inserting new\n", __func__);
415 print_elist(list); 484 print_elist(list);
416 /* Scan forward for overlaps. If we find any, extend new and
417 * remove the overlapped extent.
418 */
419 be = list_prepare_entry(new, list, be_node);
420 list_for_each_entry_safe_continue(be, tmp, list, be_node) {
421 if (end < be->be_f_offset)
422 break;
423 /* new overlaps or abuts existing be */
424 if (extents_consistent(be, new)) {
425 if (end < be->be_f_offset + be->be_length) {
426 /* extend new to fully cover be */
427 end = be->be_f_offset + be->be_length;
428 new->be_length = end - new->be_f_offset;
429 }
430 dprintk("%s: removing %p\n", __func__, be);
431 list_del(&be->be_node);
432 bl_put_extent(be);
433 } else if (end != be->be_f_offset) {
434 list_del(&new->be_node);
435 goto out_err;
436 }
437 }
438 dprintk("%s: after merging\n", __func__);
439 print_elist(list);
440 /* FIXME - The per-list consistency checks have all been done, 485 /* FIXME - The per-list consistency checks have all been done,
441 * should now check cross-list consistency. 486 * should now check cross-list consistency.
442 */ 487 */
@@ -494,6 +539,49 @@ bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
494 return ret; 539 return ret;
495} 540}
496 541
542int
543encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
544 struct xdr_stream *xdr,
545 const struct nfs4_layoutcommit_args *arg)
546{
547 struct pnfs_block_short_extent *lce, *save;
548 unsigned int count = 0;
549 __be32 *p, *xdr_start;
550
551 dprintk("%s enter\n", __func__);
552 /* BUG - creation of bl_commit is buggy - need to wait for
553 * entire block to be marked WRITTEN before it can be added.
554 */
555 spin_lock(&bl->bl_ext_lock);
556 /* Want to adjust for possible truncate */
557 /* We now want to adjust argument range */
558
559 /* XDR encode the ranges found */
560 xdr_start = xdr_reserve_space(xdr, 8);
561 if (!xdr_start)
562 goto out;
563 list_for_each_entry_safe(lce, save, &bl->bl_commit, bse_node) {
564 p = xdr_reserve_space(xdr, 7 * 4 + sizeof(lce->bse_devid.data));
565 if (!p)
566 break;
567 p = xdr_encode_opaque_fixed(p, lce->bse_devid.data, NFS4_DEVICEID4_SIZE);
568 p = xdr_encode_hyper(p, lce->bse_f_offset << SECTOR_SHIFT);
569 p = xdr_encode_hyper(p, lce->bse_length << SECTOR_SHIFT);
570 p = xdr_encode_hyper(p, 0LL);
571 *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
572 list_del(&lce->bse_node);
573 list_add_tail(&lce->bse_node, &bl->bl_committing);
574 bl->bl_count--;
575 count++;
576 }
577 xdr_start[0] = cpu_to_be32((xdr->p - xdr_start - 1) * 4);
578 xdr_start[1] = cpu_to_be32(count);
579out:
580 spin_unlock(&bl->bl_ext_lock);
581 dprintk("%s found %i ranges\n", __func__, count);
582 return 0;
583}
584
497/* Helper function to set_to_rw that initialize a new extent */ 585/* Helper function to set_to_rw that initialize a new extent */
498static void 586static void
499_prep_new_extent(struct pnfs_block_extent *new, 587_prep_new_extent(struct pnfs_block_extent *new,