aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenny Halevy <bhalevy@panasas.com>2011-05-22 12:47:26 -0400
committerBoaz Harrosh <bharrosh@panasas.com>2011-05-29 13:52:32 -0400
commitfb3296eb4636763918edef2d22e45b85b15d4518 (patch)
tree31c85255ccfa03cbca2d1e4e52028e489276a73b
parentf7da7a129d57bfe0f74573dc03531c63e1360fae (diff)
pnfs: Use byte-range for layoutget
Add offset and count parameters to pnfs_update_layout and use them to get the layout in the pageio path. Order cache layout segments in the following order: * offset (ascending) * length (descending) * iomode (RW before READ) Test byte range against the layout segment in use in pnfs_{read,write}_pg_test so not to coalesce pages not using the same layout segment. [fix lseg ordering] [clean up pnfs_find_lseg lseg arg] [remove unnecessary FIXME] [fix ordering in pnfs_insert_layout] [clean up pnfs_insert_layout] Signed-off-by: Benny Halevy <bhalevy@panasas.com>
-rw-r--r--fs/nfs/pnfs.c165
-rw-r--r--fs/nfs/pnfs.h6
-rw-r--r--fs/nfs/read.c8
-rw-r--r--fs/nfs/write.c8
4 files changed, 142 insertions, 45 deletions
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index f57f5281a520..c2f09e9b670e 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -261,6 +261,65 @@ put_lseg(struct pnfs_layout_segment *lseg)
261} 261}
262EXPORT_SYMBOL_GPL(put_lseg); 262EXPORT_SYMBOL_GPL(put_lseg);
263 263
264static inline u64
265end_offset(u64 start, u64 len)
266{
267 u64 end;
268
269 end = start + len;
270 return end >= start ? end : NFS4_MAX_UINT64;
271}
272
273/* last octet in a range */
274static inline u64
275last_byte_offset(u64 start, u64 len)
276{
277 u64 end;
278
279 BUG_ON(!len);
280 end = start + len;
281 return end > start ? end - 1 : NFS4_MAX_UINT64;
282}
283
284/*
285 * is l2 fully contained in l1?
286 * start1 end1
287 * [----------------------------------)
288 * start2 end2
289 * [----------------)
290 */
291static inline int
292lo_seg_contained(struct pnfs_layout_range *l1,
293 struct pnfs_layout_range *l2)
294{
295 u64 start1 = l1->offset;
296 u64 end1 = end_offset(start1, l1->length);
297 u64 start2 = l2->offset;
298 u64 end2 = end_offset(start2, l2->length);
299
300 return (start1 <= start2) && (end1 >= end2);
301}
302
303/*
304 * is l1 and l2 intersecting?
305 * start1 end1
306 * [----------------------------------)
307 * start2 end2
308 * [----------------)
309 */
310static inline int
311lo_seg_intersecting(struct pnfs_layout_range *l1,
312 struct pnfs_layout_range *l2)
313{
314 u64 start1 = l1->offset;
315 u64 end1 = end_offset(start1, l1->length);
316 u64 start2 = l2->offset;
317 u64 end2 = end_offset(start2, l2->length);
318
319 return (end1 == NFS4_MAX_UINT64 || end1 > start2) &&
320 (end2 == NFS4_MAX_UINT64 || end2 > start1);
321}
322
264static bool 323static bool
265should_free_lseg(u32 lseg_iomode, u32 recall_iomode) 324should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
266{ 325{
@@ -467,7 +526,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
467static struct pnfs_layout_segment * 526static struct pnfs_layout_segment *
468send_layoutget(struct pnfs_layout_hdr *lo, 527send_layoutget(struct pnfs_layout_hdr *lo,
469 struct nfs_open_context *ctx, 528 struct nfs_open_context *ctx,
470 u32 iomode, 529 struct pnfs_layout_range *range,
471 gfp_t gfp_flags) 530 gfp_t gfp_flags)
472{ 531{
473 struct inode *ino = lo->plh_inode; 532 struct inode *ino = lo->plh_inode;
@@ -499,11 +558,11 @@ send_layoutget(struct pnfs_layout_hdr *lo,
499 goto out_err_free; 558 goto out_err_free;
500 } 559 }
501 560
502 lgp->args.minlength = NFS4_MAX_UINT64; 561 lgp->args.minlength = PAGE_CACHE_SIZE;
562 if (lgp->args.minlength > range->length)
563 lgp->args.minlength = range->length;
503 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 564 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
504 lgp->args.range.iomode = iomode; 565 lgp->args.range = *range;
505 lgp->args.range.offset = 0;
506 lgp->args.range.length = NFS4_MAX_UINT64;
507 lgp->args.type = server->pnfs_curr_ld->id; 566 lgp->args.type = server->pnfs_curr_ld->id;
508 lgp->args.inode = ino; 567 lgp->args.inode = ino;
509 lgp->args.ctx = get_nfs_open_context(ctx); 568 lgp->args.ctx = get_nfs_open_context(ctx);
@@ -518,7 +577,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
518 nfs4_proc_layoutget(lgp); 577 nfs4_proc_layoutget(lgp);
519 if (!lseg) { 578 if (!lseg) {
520 /* remember that LAYOUTGET failed and suspend trying */ 579 /* remember that LAYOUTGET failed and suspend trying */
521 set_bit(lo_fail_bit(iomode), &lo->plh_flags); 580 set_bit(lo_fail_bit(range->iomode), &lo->plh_flags);
522 } 581 }
523 582
524 /* free xdr pages */ 583 /* free xdr pages */
@@ -625,10 +684,23 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
625 * are seen first. 684 * are seen first.
626 */ 685 */
627static s64 686static s64
628cmp_layout(u32 iomode1, u32 iomode2) 687cmp_layout(struct pnfs_layout_range *l1,
688 struct pnfs_layout_range *l2)
629{ 689{
690 s64 d;
691
692 /* high offset > low offset */
693 d = l1->offset - l2->offset;
694 if (d)
695 return d;
696
697 /* short length > long length */
698 d = l2->length - l1->length;
699 if (d)
700 return d;
701
630 /* read > read/write */ 702 /* read > read/write */
631 return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ); 703 return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ);
632} 704}
633 705
634static void 706static void
@@ -636,13 +708,12 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
636 struct pnfs_layout_segment *lseg) 708 struct pnfs_layout_segment *lseg)
637{ 709{
638 struct pnfs_layout_segment *lp; 710 struct pnfs_layout_segment *lp;
639 int found = 0;
640 711
641 dprintk("%s:Begin\n", __func__); 712 dprintk("%s:Begin\n", __func__);
642 713
643 assert_spin_locked(&lo->plh_inode->i_lock); 714 assert_spin_locked(&lo->plh_inode->i_lock);
644 list_for_each_entry(lp, &lo->plh_segs, pls_list) { 715 list_for_each_entry(lp, &lo->plh_segs, pls_list) {
645 if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0) 716 if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0)
646 continue; 717 continue;
647 list_add_tail(&lseg->pls_list, &lp->pls_list); 718 list_add_tail(&lseg->pls_list, &lp->pls_list);
648 dprintk("%s: inserted lseg %p " 719 dprintk("%s: inserted lseg %p "
@@ -652,16 +723,14 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
652 lseg->pls_range.offset, lseg->pls_range.length, 723 lseg->pls_range.offset, lseg->pls_range.length,
653 lp, lp->pls_range.iomode, lp->pls_range.offset, 724 lp, lp->pls_range.iomode, lp->pls_range.offset,
654 lp->pls_range.length); 725 lp->pls_range.length);
655 found = 1; 726 goto out;
656 break;
657 }
658 if (!found) {
659 list_add_tail(&lseg->pls_list, &lo->plh_segs);
660 dprintk("%s: inserted lseg %p "
661 "iomode %d offset %llu length %llu at tail\n",
662 __func__, lseg, lseg->pls_range.iomode,
663 lseg->pls_range.offset, lseg->pls_range.length);
664 } 727 }
728 list_add_tail(&lseg->pls_list, &lo->plh_segs);
729 dprintk("%s: inserted lseg %p "
730 "iomode %d offset %llu length %llu at tail\n",
731 __func__, lseg, lseg->pls_range.iomode,
732 lseg->pls_range.offset, lseg->pls_range.length);
733out:
665 get_layout_hdr(lo); 734 get_layout_hdr(lo);
666 735
667 dprintk("%s:Return\n", __func__); 736 dprintk("%s:Return\n", __func__);
@@ -721,16 +790,28 @@ pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags)
721 * READ RW true 790 * READ RW true
722 */ 791 */
723static int 792static int
724is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) 793is_matching_lseg(struct pnfs_layout_range *ls_range,
794 struct pnfs_layout_range *range)
725{ 795{
726 return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW); 796 struct pnfs_layout_range range1;
797
798 if ((range->iomode == IOMODE_RW &&
799 ls_range->iomode != IOMODE_RW) ||
800 !lo_seg_intersecting(ls_range, range))
801 return 0;
802
803 /* range1 covers only the first byte in the range */
804 range1 = *range;
805 range1.length = 1;
806 return lo_seg_contained(ls_range, &range1);
727} 807}
728 808
729/* 809/*
730 * lookup range in layout 810 * lookup range in layout
731 */ 811 */
732static struct pnfs_layout_segment * 812static struct pnfs_layout_segment *
733pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) 813pnfs_find_lseg(struct pnfs_layout_hdr *lo,
814 struct pnfs_layout_range *range)
734{ 815{
735 struct pnfs_layout_segment *lseg, *ret = NULL; 816 struct pnfs_layout_segment *lseg, *ret = NULL;
736 817
@@ -739,11 +820,11 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
739 assert_spin_locked(&lo->plh_inode->i_lock); 820 assert_spin_locked(&lo->plh_inode->i_lock);
740 list_for_each_entry(lseg, &lo->plh_segs, pls_list) { 821 list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
741 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && 822 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
742 is_matching_lseg(lseg, iomode)) { 823 is_matching_lseg(&lseg->pls_range, range)) {
743 ret = get_lseg(lseg); 824 ret = get_lseg(lseg);
744 break; 825 break;
745 } 826 }
746 if (cmp_layout(iomode, lseg->pls_range.iomode) > 0) 827 if (cmp_layout(range, &lseg->pls_range) > 0)
747 break; 828 break;
748 } 829 }
749 830
@@ -759,9 +840,16 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
759struct pnfs_layout_segment * 840struct pnfs_layout_segment *
760pnfs_update_layout(struct inode *ino, 841pnfs_update_layout(struct inode *ino,
761 struct nfs_open_context *ctx, 842 struct nfs_open_context *ctx,
843 loff_t pos,
844 u64 count,
762 enum pnfs_iomode iomode, 845 enum pnfs_iomode iomode,
763 gfp_t gfp_flags) 846 gfp_t gfp_flags)
764{ 847{
848 struct pnfs_layout_range arg = {
849 .iomode = iomode,
850 .offset = pos,
851 .length = count,
852 };
765 struct nfs_inode *nfsi = NFS_I(ino); 853 struct nfs_inode *nfsi = NFS_I(ino);
766 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; 854 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
767 struct pnfs_layout_hdr *lo; 855 struct pnfs_layout_hdr *lo;
@@ -789,7 +877,7 @@ pnfs_update_layout(struct inode *ino,
789 goto out_unlock; 877 goto out_unlock;
790 878
791 /* Check to see if the layout for the given range already exists */ 879 /* Check to see if the layout for the given range already exists */
792 lseg = pnfs_find_lseg(lo, iomode); 880 lseg = pnfs_find_lseg(lo, &arg);
793 if (lseg) 881 if (lseg)
794 goto out_unlock; 882 goto out_unlock;
795 883
@@ -811,7 +899,7 @@ pnfs_update_layout(struct inode *ino,
811 spin_unlock(&clp->cl_lock); 899 spin_unlock(&clp->cl_lock);
812 } 900 }
813 901
814 lseg = send_layoutget(lo, ctx, iomode, gfp_flags); 902 lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
815 if (!lseg && first) { 903 if (!lseg && first) {
816 spin_lock(&clp->cl_lock); 904 spin_lock(&clp->cl_lock);
817 list_del_init(&lo->plh_layouts); 905 list_del_init(&lo->plh_layouts);
@@ -838,17 +926,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
838 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; 926 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
839 int status = 0; 927 int status = 0;
840 928
841 /* Verify we got what we asked for.
842 * Note that because the xdr parsing only accepts a single
843 * element array, this can fail even if the server is behaving
844 * correctly.
845 */
846 if (lgp->args.range.iomode > res->range.iomode ||
847 res->range.offset != 0 ||
848 res->range.length != NFS4_MAX_UINT64) {
849 status = -EINVAL;
850 goto out;
851 }
852 /* Inject layout blob into I/O device driver */ 929 /* Inject layout blob into I/O device driver */
853 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); 930 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags);
854 if (!lseg || IS_ERR(lseg)) { 931 if (!lseg || IS_ERR(lseg)) {
@@ -903,9 +980,14 @@ static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
903 /* This is first coelesce call for a series of nfs_pages */ 980 /* This is first coelesce call for a series of nfs_pages */
904 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 981 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
905 prev->wb_context, 982 prev->wb_context,
983 req_offset(req),
984 pgio->pg_count,
906 IOMODE_READ, 985 IOMODE_READ,
907 GFP_KERNEL); 986 GFP_KERNEL);
908 } 987 } else if (pgio->pg_lseg &&
988 req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
989 pgio->pg_lseg->pls_range.length))
990 return 0;
909 return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); 991 return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
910} 992}
911 993
@@ -926,9 +1008,14 @@ static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
926 /* This is first coelesce call for a series of nfs_pages */ 1008 /* This is first coelesce call for a series of nfs_pages */
927 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1009 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
928 prev->wb_context, 1010 prev->wb_context,
1011 req_offset(req),
1012 pgio->pg_count,
929 IOMODE_RW, 1013 IOMODE_RW,
930 GFP_NOFS); 1014 GFP_NOFS);
931 } 1015 } else if (pgio->pg_lseg &&
1016 req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
1017 pgio->pg_lseg->pls_range.length))
1018 return 0;
932 return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); 1019 return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
933} 1020}
934 1021
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 5b083d295334..78f8a4a171b3 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -131,7 +131,8 @@ void get_layout_hdr(struct pnfs_layout_hdr *lo);
131void put_lseg(struct pnfs_layout_segment *lseg); 131void put_lseg(struct pnfs_layout_segment *lseg);
132struct pnfs_layout_segment * 132struct pnfs_layout_segment *
133pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 133pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
134 enum pnfs_iomode access_type, gfp_t gfp_flags); 134 loff_t pos, u64 count, enum pnfs_iomode access_type,
135 gfp_t gfp_flags);
135void set_pnfs_layoutdriver(struct nfs_server *, u32 id); 136void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
136void unset_pnfs_layoutdriver(struct nfs_server *); 137void unset_pnfs_layoutdriver(struct nfs_server *);
137enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, 138enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
@@ -271,7 +272,8 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
271 272
272static inline struct pnfs_layout_segment * 273static inline struct pnfs_layout_segment *
273pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 274pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
274 enum pnfs_iomode access_type, gfp_t gfp_flags) 275 loff_t pos, u64 count, enum pnfs_iomode access_type,
276 gfp_t gfp_flags)
275{ 277{
276 return NULL; 278 return NULL;
277} 279}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 2bcf0dc306a1..540c8bc93f95 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -288,7 +288,9 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
288 atomic_set(&req->wb_complete, requests); 288 atomic_set(&req->wb_complete, requests);
289 289
290 BUG_ON(desc->pg_lseg != NULL); 290 BUG_ON(desc->pg_lseg != NULL);
291 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL); 291 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
292 req_offset(req), desc->pg_count,
293 IOMODE_READ, GFP_KERNEL);
292 ClearPageError(page); 294 ClearPageError(page);
293 offset = 0; 295 offset = 0;
294 nbytes = desc->pg_count; 296 nbytes = desc->pg_count;
@@ -351,7 +353,9 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
351 } 353 }
352 req = nfs_list_entry(data->pages.next); 354 req = nfs_list_entry(data->pages.next);
353 if ((!lseg) && list_is_singular(&data->pages)) 355 if ((!lseg) && list_is_singular(&data->pages))
354 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL); 356 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
357 req_offset(req), desc->pg_count,
358 IOMODE_READ, GFP_KERNEL);
355 359
356 ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, 360 ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
357 0, lseg); 361 0, lseg);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 49c715b4ac92..7edb72f27c22 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -939,7 +939,9 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
939 atomic_set(&req->wb_complete, requests); 939 atomic_set(&req->wb_complete, requests);
940 940
941 BUG_ON(desc->pg_lseg); 941 BUG_ON(desc->pg_lseg);
942 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS); 942 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
943 req_offset(req), desc->pg_count,
944 IOMODE_RW, GFP_NOFS);
943 ClearPageError(page); 945 ClearPageError(page);
944 offset = 0; 946 offset = 0;
945 nbytes = desc->pg_count; 947 nbytes = desc->pg_count;
@@ -1013,7 +1015,9 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
1013 } 1015 }
1014 req = nfs_list_entry(data->pages.next); 1016 req = nfs_list_entry(data->pages.next);
1015 if ((!lseg) && list_is_singular(&data->pages)) 1017 if ((!lseg) && list_is_singular(&data->pages))
1016 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS); 1018 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
1019 req_offset(req), desc->pg_count,
1020 IOMODE_RW, GFP_NOFS);
1017 1021
1018 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 1022 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1019 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) 1023 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))