diff options
author | Christoph Hellwig <hch@lst.de> | 2014-09-10 20:36:30 -0400 |
---|---|---|
committer | Trond Myklebust <trond.myklebust@primarydata.com> | 2014-09-12 13:22:45 -0400 |
commit | 34dc93c2fc04da0d01acf8a1660b4ab276208af7 (patch) | |
tree | 89c6c0dbc294682703f7172e1285a8bf5245ce52 | |
parent | d4b18c3e00b8d18fbd316abe9639b91ad416e1f3 (diff) |
pnfs/blocklayout: allocate separate pages for the layoutcommit payload
Instead of overflowing the XDR send buffer with our extent list allocate
pages and pre-encode the layoutupdate payload into them. We optimistically
allocate a single page use alloc_page and only switch to vmalloc when we
have more extents outstanding. Currently there is only a single testcase
(xfstests generic/113) which can reproduce large enough extent lists for
this to occur.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
-rw-r--r-- | fs/nfs/blocklayout/blocklayout.c | 15 | ||||
-rw-r--r-- | fs/nfs/blocklayout/blocklayout.h | 8 | ||||
-rw-r--r-- | fs/nfs/blocklayout/extent_tree.c | 102 |
3 files changed, 91 insertions, 34 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 3e1f1afc6db4..cf10a6e291e4 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -500,21 +500,16 @@ bl_return_range(struct pnfs_layout_hdr *lo, | |||
500 | err = ext_tree_remove(bl, range->iomode & IOMODE_RW, offset, end); | 500 | err = ext_tree_remove(bl, range->iomode & IOMODE_RW, offset, end); |
501 | } | 501 | } |
502 | 502 | ||
503 | static void | 503 | static int |
504 | bl_encode_layoutcommit(struct pnfs_layout_hdr *lo, struct xdr_stream *xdr, | 504 | bl_prepare_layoutcommit(struct nfs4_layoutcommit_args *arg) |
505 | const struct nfs4_layoutcommit_args *arg) | ||
506 | { | 505 | { |
507 | dprintk("%s enter\n", __func__); | 506 | return ext_tree_prepare_commit(arg); |
508 | ext_tree_encode_commit(BLK_LO2EXT(lo), xdr); | ||
509 | } | 507 | } |
510 | 508 | ||
511 | static void | 509 | static void |
512 | bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) | 510 | bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) |
513 | { | 511 | { |
514 | struct pnfs_layout_hdr *lo = NFS_I(lcdata->args.inode)->layout; | 512 | ext_tree_mark_committed(&lcdata->args, lcdata->res.status); |
515 | |||
516 | dprintk("%s enter\n", __func__); | ||
517 | ext_tree_mark_committed(BLK_LO2EXT(lo), lcdata->res.status); | ||
518 | } | 513 | } |
519 | 514 | ||
520 | static int | 515 | static int |
@@ -670,7 +665,7 @@ static struct pnfs_layoutdriver_type blocklayout_type = { | |||
670 | .alloc_lseg = bl_alloc_lseg, | 665 | .alloc_lseg = bl_alloc_lseg, |
671 | .free_lseg = bl_free_lseg, | 666 | .free_lseg = bl_free_lseg, |
672 | .return_range = bl_return_range, | 667 | .return_range = bl_return_range, |
673 | .encode_layoutcommit = bl_encode_layoutcommit, | 668 | .prepare_layoutcommit = bl_prepare_layoutcommit, |
674 | .cleanup_layoutcommit = bl_cleanup_layoutcommit, | 669 | .cleanup_layoutcommit = bl_cleanup_layoutcommit, |
675 | .set_layoutdriver = bl_set_layoutdriver, | 670 | .set_layoutdriver = bl_set_layoutdriver, |
676 | .alloc_deviceid_node = bl_alloc_deviceid_node, | 671 | .alloc_deviceid_node = bl_alloc_deviceid_node, |
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 19fae5e4c90b..9757f3eabdd2 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h | |||
@@ -72,6 +72,9 @@ struct pnfs_block_extent { | |||
72 | unsigned int be_tag; | 72 | unsigned int be_tag; |
73 | }; | 73 | }; |
74 | 74 | ||
75 | /* on the wire size of the extent */ | ||
76 | #define BL_EXTENT_SIZE (7 * sizeof(__be32) + NFS4_DEVICEID4_SIZE) | ||
77 | |||
75 | struct pnfs_block_layout { | 78 | struct pnfs_block_layout { |
76 | struct pnfs_layout_hdr bl_layout; | 79 | struct pnfs_layout_hdr bl_layout; |
77 | struct rb_root bl_ext_rw; | 80 | struct rb_root bl_ext_rw; |
@@ -129,8 +132,7 @@ int ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, | |||
129 | sector_t len); | 132 | sector_t len); |
130 | bool ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect, | 133 | bool ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect, |
131 | struct pnfs_block_extent *ret, bool rw); | 134 | struct pnfs_block_extent *ret, bool rw); |
132 | int ext_tree_encode_commit(struct pnfs_block_layout *bl, | 135 | int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg); |
133 | struct xdr_stream *xdr); | 136 | void ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status); |
134 | void ext_tree_mark_committed(struct pnfs_block_layout *bl, int status); | ||
135 | 137 | ||
136 | #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ | 138 | #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ |
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c index 43e891b3e0b6..1b6009ee75ce 100644 --- a/fs/nfs/blocklayout/extent_tree.c +++ b/fs/nfs/blocklayout/extent_tree.c | |||
@@ -462,19 +462,25 @@ out: | |||
462 | return err; | 462 | return err; |
463 | } | 463 | } |
464 | 464 | ||
465 | int | 465 | static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg, |
466 | ext_tree_encode_commit(struct pnfs_block_layout *bl, struct xdr_stream *xdr) | 466 | size_t buffer_size) |
467 | { | 467 | { |
468 | struct pnfs_block_extent *be; | 468 | if (arg->layoutupdate_pages != &arg->layoutupdate_page) { |
469 | unsigned int count = 0; | 469 | int nr_pages = DIV_ROUND_UP(buffer_size, PAGE_SIZE), i; |
470 | __be32 *p, *xdr_start; | ||
471 | int ret = 0; | ||
472 | 470 | ||
473 | dprintk("%s enter\n", __func__); | 471 | for (i = 0; i < nr_pages; i++) |
472 | put_page(arg->layoutupdate_pages[i]); | ||
473 | kfree(arg->layoutupdate_pages); | ||
474 | } else { | ||
475 | put_page(arg->layoutupdate_page); | ||
476 | } | ||
477 | } | ||
474 | 478 | ||
475 | xdr_start = xdr_reserve_space(xdr, 8); | 479 | static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, |
476 | if (!xdr_start) | 480 | size_t buffer_size, size_t *count) |
477 | return -ENOSPC; | 481 | { |
482 | struct pnfs_block_extent *be; | ||
483 | int ret = 0; | ||
478 | 484 | ||
479 | spin_lock(&bl->bl_ext_lock); | 485 | spin_lock(&bl->bl_ext_lock); |
480 | for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) { | 486 | for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) { |
@@ -482,12 +488,11 @@ ext_tree_encode_commit(struct pnfs_block_layout *bl, struct xdr_stream *xdr) | |||
482 | be->be_tag != EXTENT_WRITTEN) | 488 | be->be_tag != EXTENT_WRITTEN) |
483 | continue; | 489 | continue; |
484 | 490 | ||
485 | p = xdr_reserve_space(xdr, 7 * sizeof(__be32) + | 491 | (*count)++; |
486 | NFS4_DEVICEID4_SIZE); | 492 | if (*count * BL_EXTENT_SIZE > buffer_size) { |
487 | if (!p) { | 493 | /* keep counting.. */ |
488 | printk("%s: out of space for extent list\n", __func__); | ||
489 | ret = -ENOSPC; | 494 | ret = -ENOSPC; |
490 | break; | 495 | continue; |
491 | } | 496 | } |
492 | 497 | ||
493 | p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data, | 498 | p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data, |
@@ -498,25 +503,80 @@ ext_tree_encode_commit(struct pnfs_block_layout *bl, struct xdr_stream *xdr) | |||
498 | *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA); | 503 | *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA); |
499 | 504 | ||
500 | be->be_tag = EXTENT_COMMITTING; | 505 | be->be_tag = EXTENT_COMMITTING; |
501 | count++; | ||
502 | } | 506 | } |
503 | spin_unlock(&bl->bl_ext_lock); | 507 | spin_unlock(&bl->bl_ext_lock); |
504 | 508 | ||
505 | xdr_start[0] = cpu_to_be32((xdr->p - xdr_start - 1) * 4); | ||
506 | xdr_start[1] = cpu_to_be32(count); | ||
507 | |||
508 | dprintk("%s found %i ranges\n", __func__, count); | ||
509 | return ret; | 509 | return ret; |
510 | } | 510 | } |
511 | 511 | ||
512 | int | ||
513 | ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) | ||
514 | { | ||
515 | struct pnfs_block_layout *bl = BLK_LO2EXT(NFS_I(arg->inode)->layout); | ||
516 | size_t count = 0, buffer_size = PAGE_SIZE; | ||
517 | __be32 *start_p; | ||
518 | int ret; | ||
519 | |||
520 | dprintk("%s enter\n", __func__); | ||
521 | |||
522 | arg->layoutupdate_page = alloc_page(GFP_NOFS); | ||
523 | if (!arg->layoutupdate_page) | ||
524 | return -ENOMEM; | ||
525 | start_p = page_address(arg->layoutupdate_page); | ||
526 | arg->layoutupdate_pages = &arg->layoutupdate_page; | ||
527 | |||
528 | retry: | ||
529 | ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count); | ||
530 | if (unlikely(ret)) { | ||
531 | ext_tree_free_commitdata(arg, buffer_size); | ||
532 | |||
533 | buffer_size = sizeof(__be32) + BL_EXTENT_SIZE * count; | ||
534 | count = 0; | ||
535 | |||
536 | arg->layoutupdate_pages = | ||
537 | kcalloc(DIV_ROUND_UP(buffer_size, PAGE_SIZE), | ||
538 | sizeof(struct page *), GFP_NOFS); | ||
539 | if (!arg->layoutupdate_pages) | ||
540 | return -ENOMEM; | ||
541 | |||
542 | start_p = __vmalloc(buffer_size, GFP_NOFS, PAGE_KERNEL); | ||
543 | if (!start_p) { | ||
544 | kfree(arg->layoutupdate_pages); | ||
545 | return -ENOMEM; | ||
546 | } | ||
547 | |||
548 | goto retry; | ||
549 | } | ||
550 | |||
551 | *start_p = cpu_to_be32(count); | ||
552 | arg->layoutupdate_len = sizeof(__be32) + BL_EXTENT_SIZE * count; | ||
553 | |||
554 | if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) { | ||
555 | __be32 *p = start_p; | ||
556 | int i = 0; | ||
557 | |||
558 | for (p = start_p; | ||
559 | p < start_p + arg->layoutupdate_len; | ||
560 | p += PAGE_SIZE) { | ||
561 | arg->layoutupdate_pages[i++] = vmalloc_to_page(p); | ||
562 | } | ||
563 | } | ||
564 | |||
565 | dprintk("%s found %zu ranges\n", __func__, count); | ||
566 | return 0; | ||
567 | } | ||
568 | |||
512 | void | 569 | void |
513 | ext_tree_mark_committed(struct pnfs_block_layout *bl, int status) | 570 | ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status) |
514 | { | 571 | { |
572 | struct pnfs_block_layout *bl = BLK_LO2EXT(NFS_I(arg->inode)->layout); | ||
515 | struct rb_root *root = &bl->bl_ext_rw; | 573 | struct rb_root *root = &bl->bl_ext_rw; |
516 | struct pnfs_block_extent *be; | 574 | struct pnfs_block_extent *be; |
517 | 575 | ||
518 | dprintk("%s status %d\n", __func__, status); | 576 | dprintk("%s status %d\n", __func__, status); |
519 | 577 | ||
578 | ext_tree_free_commitdata(arg, arg->layoutupdate_len); | ||
579 | |||
520 | spin_lock(&bl->bl_ext_lock); | 580 | spin_lock(&bl->bl_ext_lock); |
521 | for (be = ext_tree_first(root); be; be = ext_tree_next(be)) { | 581 | for (be = ext_tree_first(root); be; be = ext_tree_next(be)) { |
522 | if (be->be_state != PNFS_BLOCK_INVALID_DATA || | 582 | if (be->be_state != PNFS_BLOCK_INVALID_DATA || |