aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs/pnfs.c
diff options
context:
space:
mode:
authorFred Isaman <iisaman@netapp.com>2011-01-06 06:36:30 -0500
committerTrond Myklebust <Trond.Myklebust@netapp.com>2011-01-06 14:46:32 -0500
commit43f1b3da8b35d706d6c47880fc211d2519b4a587 (patch)
tree67fdbaaab9658cf79b17fa905c62a59d7355d613 /fs/nfs/pnfs.c
parentf2a625616045fe46e1d5fceebdd825f5acdecdb7 (diff)
pnfs: add CB_LAYOUTRECALL handling
This is the heart of the wave 2 submission. Add the code to trigger drain and forget of any afected layouts. In addition, we set a "barrier", below which any LAYOUTGET reply is ignored. This is to compensate for the fact that we do not wait for outstanding LAYOUTGETs to complete as per section 12.5.5.2.1 of RFC 5661. Signed-off-by: Fred Isaman <iisaman@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'fs/nfs/pnfs.c')
-rw-r--r--fs/nfs/pnfs.c83
1 files changed, 64 insertions, 19 deletions
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 32b66468e5db..bf4186b8f2fc 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -178,7 +178,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
178 */ 178 */
179 179
180/* Need to hold i_lock if caller does not already hold reference */ 180/* Need to hold i_lock if caller does not already hold reference */
181static void 181void
182get_layout_hdr(struct pnfs_layout_hdr *lo) 182get_layout_hdr(struct pnfs_layout_hdr *lo)
183{ 183{
184 atomic_inc(&lo->plh_refcount); 184 atomic_inc(&lo->plh_refcount);
@@ -254,6 +254,7 @@ put_lseg_locked(struct pnfs_layout_segment *lseg,
254 /* List does not take a reference, so no need for put here */ 254 /* List does not take a reference, so no need for put here */
255 list_del_init(&lseg->pls_layout->plh_layouts); 255 list_del_init(&lseg->pls_layout->plh_layouts);
256 spin_unlock(&clp->cl_lock); 256 spin_unlock(&clp->cl_lock);
257 clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
257 } 258 }
258 list_add(&lseg->pls_list, tmp_list); 259 list_add(&lseg->pls_list, tmp_list);
259 return 1; 260 return 1;
@@ -287,7 +288,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
287/* Returns count of number of matching invalid lsegs remaining in list 288/* Returns count of number of matching invalid lsegs remaining in list
288 * after call. 289 * after call.
289 */ 290 */
290static int 291int
291mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 292mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
292 struct list_head *tmp_list, 293 struct list_head *tmp_list,
293 u32 iomode) 294 u32 iomode)
@@ -310,7 +311,7 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
310 return invalid - removed; 311 return invalid - removed;
311} 312}
312 313
313static void 314void
314pnfs_free_lseg_list(struct list_head *free_me) 315pnfs_free_lseg_list(struct list_head *free_me)
315{ 316{
316 struct pnfs_layout_segment *lseg, *tmp; 317 struct pnfs_layout_segment *lseg, *tmp;
@@ -363,23 +364,45 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
363} 364}
364 365
365/* update lo->plh_stateid with new if is more recent */ 366/* update lo->plh_stateid with new if is more recent */
366static void 367void
367pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, 368pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
368 const nfs4_stateid *new) 369 bool update_barrier)
369{ 370{
370 u32 oldseq, newseq; 371 u32 oldseq, newseq;
371 372
372 oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid); 373 oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid);
373 newseq = be32_to_cpu(new->stateid.seqid); 374 newseq = be32_to_cpu(new->stateid.seqid);
374 if ((int)(newseq - oldseq) > 0) 375 if ((int)(newseq - oldseq) > 0) {
375 memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid)); 376 memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid));
377 if (update_barrier) {
378 u32 new_barrier = be32_to_cpu(new->stateid.seqid);
379
380 if ((int)(new_barrier - lo->plh_barrier))
381 lo->plh_barrier = new_barrier;
382 } else {
383 /* Because of wraparound, we want to keep the barrier
384 * "close" to the current seqids. It needs to be
385 * within 2**31 to count as "behind", so if it
386 * gets too near that limit, give us a litle leeway
387 * and bring it to within 2**30.
388 * NOTE - and yes, this is all unsigned arithmetic.
389 */
390 if (unlikely((newseq - lo->plh_barrier) > (3 << 29)))
391 lo->plh_barrier = newseq - (1 << 30);
392 }
393 }
376} 394}
377 395
378/* lget is set to 1 if called from inside send_layoutget call chain */ 396/* lget is set to 1 if called from inside send_layoutget call chain */
379static bool 397static bool
380pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, int lget) 398pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
381{ 399 int lget)
382 return (list_empty(&lo->plh_segs) && 400{
401 if ((stateid) &&
402 (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
403 return true;
404 return test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
405 (list_empty(&lo->plh_segs) &&
383 (atomic_read(&lo->plh_outstanding) > lget)); 406 (atomic_read(&lo->plh_outstanding) > lget));
384} 407}
385 408
@@ -391,7 +414,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
391 414
392 dprintk("--> %s\n", __func__); 415 dprintk("--> %s\n", __func__);
393 spin_lock(&lo->plh_inode->i_lock); 416 spin_lock(&lo->plh_inode->i_lock);
394 if (pnfs_layoutgets_blocked(lo, 1)) { 417 if (pnfs_layoutgets_blocked(lo, NULL, 1)) {
395 status = -EAGAIN; 418 status = -EAGAIN;
396 } else if (list_empty(&lo->plh_segs)) { 419 } else if (list_empty(&lo->plh_segs)) {
397 int seq; 420 int seq;
@@ -510,6 +533,7 @@ alloc_init_layout_hdr(struct inode *ino)
510 atomic_set(&lo->plh_refcount, 1); 533 atomic_set(&lo->plh_refcount, 1);
511 INIT_LIST_HEAD(&lo->plh_layouts); 534 INIT_LIST_HEAD(&lo->plh_layouts);
512 INIT_LIST_HEAD(&lo->plh_segs); 535 INIT_LIST_HEAD(&lo->plh_segs);
536 INIT_LIST_HEAD(&lo->plh_bulk_recall);
513 lo->plh_inode = ino; 537 lo->plh_inode = ino;
514 return lo; 538 return lo;
515} 539}
@@ -561,7 +585,7 @@ is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
561 * lookup range in layout 585 * lookup range in layout
562 */ 586 */
563static struct pnfs_layout_segment * 587static struct pnfs_layout_segment *
564pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) 588pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
565{ 589{
566 struct pnfs_layout_segment *lseg, *ret = NULL; 590 struct pnfs_layout_segment *lseg, *ret = NULL;
567 591
@@ -606,19 +630,22 @@ pnfs_update_layout(struct inode *ino,
606 goto out_unlock; 630 goto out_unlock;
607 } 631 }
608 632
609 /* Check to see if the layout for the given range already exists */ 633 /* Do we even need to bother with this? */
610 lseg = pnfs_has_layout(lo, iomode); 634 if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
611 if (lseg) { 635 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
612 dprintk("%s: Using cached lseg %p for iomode %d)\n", 636 dprintk("%s matches recall, use MDS\n", __func__);
613 __func__, lseg, iomode);
614 goto out_unlock; 637 goto out_unlock;
615 } 638 }
639 /* Check to see if the layout for the given range already exists */
640 lseg = pnfs_find_lseg(lo, iomode);
641 if (lseg)
642 goto out_unlock;
616 643
617 /* if LAYOUTGET already failed once we don't try again */ 644 /* if LAYOUTGET already failed once we don't try again */
618 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) 645 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
619 goto out_unlock; 646 goto out_unlock;
620 647
621 if (pnfs_layoutgets_blocked(lo, 0)) 648 if (pnfs_layoutgets_blocked(lo, NULL, 0))
622 goto out_unlock; 649 goto out_unlock;
623 atomic_inc(&lo->plh_outstanding); 650 atomic_inc(&lo->plh_outstanding);
624 651
@@ -641,6 +668,7 @@ pnfs_update_layout(struct inode *ino,
641 spin_lock(&clp->cl_lock); 668 spin_lock(&clp->cl_lock);
642 list_del_init(&lo->plh_layouts); 669 list_del_init(&lo->plh_layouts);
643 spin_unlock(&clp->cl_lock); 670 spin_unlock(&clp->cl_lock);
671 clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
644 } 672 }
645 spin_unlock(&ino->i_lock); 673 spin_unlock(&ino->i_lock);
646 } 674 }
@@ -662,6 +690,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
662 struct nfs4_layoutget_res *res = &lgp->res; 690 struct nfs4_layoutget_res *res = &lgp->res;
663 struct pnfs_layout_segment *lseg; 691 struct pnfs_layout_segment *lseg;
664 struct inode *ino = lo->plh_inode; 692 struct inode *ino = lo->plh_inode;
693 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
665 int status = 0; 694 int status = 0;
666 695
667 /* Verify we got what we asked for. 696 /* Verify we got what we asked for.
@@ -688,16 +717,32 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
688 } 717 }
689 718
690 spin_lock(&ino->i_lock); 719 spin_lock(&ino->i_lock);
720 if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
721 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
722 dprintk("%s forget reply due to recall\n", __func__);
723 goto out_forget_reply;
724 }
725
726 if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) {
727 dprintk("%s forget reply due to state\n", __func__);
728 goto out_forget_reply;
729 }
691 init_lseg(lo, lseg); 730 init_lseg(lo, lseg);
692 lseg->pls_range = res->range; 731 lseg->pls_range = res->range;
693 *lgp->lsegpp = lseg; 732 *lgp->lsegpp = lseg;
694 pnfs_insert_layout(lo, lseg); 733 pnfs_insert_layout(lo, lseg);
695 734
696 /* Done processing layoutget. Set the layout stateid */ 735 /* Done processing layoutget. Set the layout stateid */
697 pnfs_set_layout_stateid(lo, &res->stateid); 736 pnfs_set_layout_stateid(lo, &res->stateid, false);
698 spin_unlock(&ino->i_lock); 737 spin_unlock(&ino->i_lock);
699out: 738out:
700 return status; 739 return status;
740
741out_forget_reply:
742 spin_unlock(&ino->i_lock);
743 lseg->pls_layout = lo;
744 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
745 goto out;
701} 746}
702 747
703/* 748/*