aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2014-09-10 20:36:30 -0400
committerTrond Myklebust <trond.myklebust@primarydata.com>2014-09-12 13:22:45 -0400
commit34dc93c2fc04da0d01acf8a1660b4ab276208af7 (patch)
tree89c6c0dbc294682703f7172e1285a8bf5245ce52
parentd4b18c3e00b8d18fbd316abe9639b91ad416e1f3 (diff)
pnfs/blocklayout: allocate separate pages for the layoutcommit payload
Instead of overflowing the XDR send buffer with our extent list allocate pages and pre-encode the layoutupdate payload into them. We optimistically allocate a single page use alloc_page and only switch to vmalloc when we have more extents outstanding. Currently there is only a single testcase (xfstests generic/113) which can reproduce large enough extent lists for this to occur. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
-rw-r--r--fs/nfs/blocklayout/blocklayout.c15
-rw-r--r--fs/nfs/blocklayout/blocklayout.h8
-rw-r--r--fs/nfs/blocklayout/extent_tree.c102
3 files changed, 91 insertions, 34 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 3e1f1afc6db4..cf10a6e291e4 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -500,21 +500,16 @@ bl_return_range(struct pnfs_layout_hdr *lo,
500 err = ext_tree_remove(bl, range->iomode & IOMODE_RW, offset, end); 500 err = ext_tree_remove(bl, range->iomode & IOMODE_RW, offset, end);
501} 501}
502 502
503static void 503static int
504bl_encode_layoutcommit(struct pnfs_layout_hdr *lo, struct xdr_stream *xdr, 504bl_prepare_layoutcommit(struct nfs4_layoutcommit_args *arg)
505 const struct nfs4_layoutcommit_args *arg)
506{ 505{
507 dprintk("%s enter\n", __func__); 506 return ext_tree_prepare_commit(arg);
508 ext_tree_encode_commit(BLK_LO2EXT(lo), xdr);
509} 507}
510 508
511static void 509static void
512bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) 510bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata)
513{ 511{
514 struct pnfs_layout_hdr *lo = NFS_I(lcdata->args.inode)->layout; 512 ext_tree_mark_committed(&lcdata->args, lcdata->res.status);
515
516 dprintk("%s enter\n", __func__);
517 ext_tree_mark_committed(BLK_LO2EXT(lo), lcdata->res.status);
518} 513}
519 514
520static int 515static int
@@ -670,7 +665,7 @@ static struct pnfs_layoutdriver_type blocklayout_type = {
670 .alloc_lseg = bl_alloc_lseg, 665 .alloc_lseg = bl_alloc_lseg,
671 .free_lseg = bl_free_lseg, 666 .free_lseg = bl_free_lseg,
672 .return_range = bl_return_range, 667 .return_range = bl_return_range,
673 .encode_layoutcommit = bl_encode_layoutcommit, 668 .prepare_layoutcommit = bl_prepare_layoutcommit,
674 .cleanup_layoutcommit = bl_cleanup_layoutcommit, 669 .cleanup_layoutcommit = bl_cleanup_layoutcommit,
675 .set_layoutdriver = bl_set_layoutdriver, 670 .set_layoutdriver = bl_set_layoutdriver,
676 .alloc_deviceid_node = bl_alloc_deviceid_node, 671 .alloc_deviceid_node = bl_alloc_deviceid_node,
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 19fae5e4c90b..9757f3eabdd2 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -72,6 +72,9 @@ struct pnfs_block_extent {
72 unsigned int be_tag; 72 unsigned int be_tag;
73}; 73};
74 74
75/* on the wire size of the extent */
76#define BL_EXTENT_SIZE (7 * sizeof(__be32) + NFS4_DEVICEID4_SIZE)
77
75struct pnfs_block_layout { 78struct pnfs_block_layout {
76 struct pnfs_layout_hdr bl_layout; 79 struct pnfs_layout_hdr bl_layout;
77 struct rb_root bl_ext_rw; 80 struct rb_root bl_ext_rw;
@@ -129,8 +132,7 @@ int ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
129 sector_t len); 132 sector_t len);
130bool ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect, 133bool ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect,
131 struct pnfs_block_extent *ret, bool rw); 134 struct pnfs_block_extent *ret, bool rw);
132int ext_tree_encode_commit(struct pnfs_block_layout *bl, 135int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg);
133 struct xdr_stream *xdr); 136void ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status);
134void ext_tree_mark_committed(struct pnfs_block_layout *bl, int status);
135 137
136#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ 138#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index 43e891b3e0b6..1b6009ee75ce 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -462,19 +462,25 @@ out:
462 return err; 462 return err;
463} 463}
464 464
465int 465static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
466ext_tree_encode_commit(struct pnfs_block_layout *bl, struct xdr_stream *xdr) 466 size_t buffer_size)
467{ 467{
468 struct pnfs_block_extent *be; 468 if (arg->layoutupdate_pages != &arg->layoutupdate_page) {
469 unsigned int count = 0; 469 int nr_pages = DIV_ROUND_UP(buffer_size, PAGE_SIZE), i;
470 __be32 *p, *xdr_start;
471 int ret = 0;
472 470
473 dprintk("%s enter\n", __func__); 471 for (i = 0; i < nr_pages; i++)
472 put_page(arg->layoutupdate_pages[i]);
473 kfree(arg->layoutupdate_pages);
474 } else {
475 put_page(arg->layoutupdate_page);
476 }
477}
474 478
475 xdr_start = xdr_reserve_space(xdr, 8); 479static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
476 if (!xdr_start) 480 size_t buffer_size, size_t *count)
477 return -ENOSPC; 481{
482 struct pnfs_block_extent *be;
483 int ret = 0;
478 484
479 spin_lock(&bl->bl_ext_lock); 485 spin_lock(&bl->bl_ext_lock);
480 for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) { 486 for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) {
@@ -482,12 +488,11 @@ ext_tree_encode_commit(struct pnfs_block_layout *bl, struct xdr_stream *xdr)
482 be->be_tag != EXTENT_WRITTEN) 488 be->be_tag != EXTENT_WRITTEN)
483 continue; 489 continue;
484 490
485 p = xdr_reserve_space(xdr, 7 * sizeof(__be32) + 491 (*count)++;
486 NFS4_DEVICEID4_SIZE); 492 if (*count * BL_EXTENT_SIZE > buffer_size) {
487 if (!p) { 493 /* keep counting.. */
488 printk("%s: out of space for extent list\n", __func__);
489 ret = -ENOSPC; 494 ret = -ENOSPC;
490 break; 495 continue;
491 } 496 }
492 497
493 p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data, 498 p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data,
@@ -498,25 +503,80 @@ ext_tree_encode_commit(struct pnfs_block_layout *bl, struct xdr_stream *xdr)
498 *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA); 503 *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
499 504
500 be->be_tag = EXTENT_COMMITTING; 505 be->be_tag = EXTENT_COMMITTING;
501 count++;
502 } 506 }
503 spin_unlock(&bl->bl_ext_lock); 507 spin_unlock(&bl->bl_ext_lock);
504 508
505 xdr_start[0] = cpu_to_be32((xdr->p - xdr_start - 1) * 4);
506 xdr_start[1] = cpu_to_be32(count);
507
508 dprintk("%s found %i ranges\n", __func__, count);
509 return ret; 509 return ret;
510} 510}
511 511
512int
513ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
514{
515 struct pnfs_block_layout *bl = BLK_LO2EXT(NFS_I(arg->inode)->layout);
516 size_t count = 0, buffer_size = PAGE_SIZE;
517 __be32 *start_p;
518 int ret;
519
520 dprintk("%s enter\n", __func__);
521
522 arg->layoutupdate_page = alloc_page(GFP_NOFS);
523 if (!arg->layoutupdate_page)
524 return -ENOMEM;
525 start_p = page_address(arg->layoutupdate_page);
526 arg->layoutupdate_pages = &arg->layoutupdate_page;
527
528retry:
529 ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count);
530 if (unlikely(ret)) {
531 ext_tree_free_commitdata(arg, buffer_size);
532
533 buffer_size = sizeof(__be32) + BL_EXTENT_SIZE * count;
534 count = 0;
535
536 arg->layoutupdate_pages =
537 kcalloc(DIV_ROUND_UP(buffer_size, PAGE_SIZE),
538 sizeof(struct page *), GFP_NOFS);
539 if (!arg->layoutupdate_pages)
540 return -ENOMEM;
541
542 start_p = __vmalloc(buffer_size, GFP_NOFS, PAGE_KERNEL);
543 if (!start_p) {
544 kfree(arg->layoutupdate_pages);
545 return -ENOMEM;
546 }
547
548 goto retry;
549 }
550
551 *start_p = cpu_to_be32(count);
552 arg->layoutupdate_len = sizeof(__be32) + BL_EXTENT_SIZE * count;
553
554 if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
555 __be32 *p = start_p;
556 int i = 0;
557
558 for (p = start_p;
559 p < start_p + arg->layoutupdate_len;
560 p += PAGE_SIZE) {
561 arg->layoutupdate_pages[i++] = vmalloc_to_page(p);
562 }
563 }
564
565 dprintk("%s found %zu ranges\n", __func__, count);
566 return 0;
567}
568
512void 569void
513ext_tree_mark_committed(struct pnfs_block_layout *bl, int status) 570ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status)
514{ 571{
572 struct pnfs_block_layout *bl = BLK_LO2EXT(NFS_I(arg->inode)->layout);
515 struct rb_root *root = &bl->bl_ext_rw; 573 struct rb_root *root = &bl->bl_ext_rw;
516 struct pnfs_block_extent *be; 574 struct pnfs_block_extent *be;
517 575
518 dprintk("%s status %d\n", __func__, status); 576 dprintk("%s status %d\n", __func__, status);
519 577
578 ext_tree_free_commitdata(arg, arg->layoutupdate_len);
579
520 spin_lock(&bl->bl_ext_lock); 580 spin_lock(&bl->bl_ext_lock);
521 for (be = ext_tree_first(root); be; be = ext_tree_next(be)) { 581 for (be = ext_tree_first(root); be; be = ext_tree_next(be)) {
522 if (be->be_state != PNFS_BLOCK_INVALID_DATA || 582 if (be->be_state != PNFS_BLOCK_INVALID_DATA ||