diff options
author | Fred Isaman <iisaman@netapp.com> | 2011-01-06 06:36:30 -0500 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2011-01-06 14:46:32 -0500 |
commit | 43f1b3da8b35d706d6c47880fc211d2519b4a587 (patch) | |
tree | 67fdbaaab9658cf79b17fa905c62a59d7355d613 /fs/nfs/pnfs.c | |
parent | f2a625616045fe46e1d5fceebdd825f5acdecdb7 (diff) |
pnfs: add CB_LAYOUTRECALL handling
This is the heart of the wave 2 submission. Add the code to trigger
drain and forget of any afected layouts. In addition, we set a
"barrier", below which any LAYOUTGET reply is ignored. This is to
compensate for the fact that we do not wait for outstanding LAYOUTGETs
to complete as per section 12.5.5.2.1 of RFC 5661.
Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'fs/nfs/pnfs.c')
-rw-r--r-- | fs/nfs/pnfs.c | 83 |
1 files changed, 64 insertions, 19 deletions
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 32b66468e5db..bf4186b8f2fc 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -178,7 +178,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); | |||
178 | */ | 178 | */ |
179 | 179 | ||
180 | /* Need to hold i_lock if caller does not already hold reference */ | 180 | /* Need to hold i_lock if caller does not already hold reference */ |
181 | static void | 181 | void |
182 | get_layout_hdr(struct pnfs_layout_hdr *lo) | 182 | get_layout_hdr(struct pnfs_layout_hdr *lo) |
183 | { | 183 | { |
184 | atomic_inc(&lo->plh_refcount); | 184 | atomic_inc(&lo->plh_refcount); |
@@ -254,6 +254,7 @@ put_lseg_locked(struct pnfs_layout_segment *lseg, | |||
254 | /* List does not take a reference, so no need for put here */ | 254 | /* List does not take a reference, so no need for put here */ |
255 | list_del_init(&lseg->pls_layout->plh_layouts); | 255 | list_del_init(&lseg->pls_layout->plh_layouts); |
256 | spin_unlock(&clp->cl_lock); | 256 | spin_unlock(&clp->cl_lock); |
257 | clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags); | ||
257 | } | 258 | } |
258 | list_add(&lseg->pls_list, tmp_list); | 259 | list_add(&lseg->pls_list, tmp_list); |
259 | return 1; | 260 | return 1; |
@@ -287,7 +288,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, | |||
287 | /* Returns count of number of matching invalid lsegs remaining in list | 288 | /* Returns count of number of matching invalid lsegs remaining in list |
288 | * after call. | 289 | * after call. |
289 | */ | 290 | */ |
290 | static int | 291 | int |
291 | mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | 292 | mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, |
292 | struct list_head *tmp_list, | 293 | struct list_head *tmp_list, |
293 | u32 iomode) | 294 | u32 iomode) |
@@ -310,7 +311,7 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | |||
310 | return invalid - removed; | 311 | return invalid - removed; |
311 | } | 312 | } |
312 | 313 | ||
313 | static void | 314 | void |
314 | pnfs_free_lseg_list(struct list_head *free_me) | 315 | pnfs_free_lseg_list(struct list_head *free_me) |
315 | { | 316 | { |
316 | struct pnfs_layout_segment *lseg, *tmp; | 317 | struct pnfs_layout_segment *lseg, *tmp; |
@@ -363,23 +364,45 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) | |||
363 | } | 364 | } |
364 | 365 | ||
365 | /* update lo->plh_stateid with new if is more recent */ | 366 | /* update lo->plh_stateid with new if is more recent */ |
366 | static void | 367 | void |
367 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, | 368 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, |
368 | const nfs4_stateid *new) | 369 | bool update_barrier) |
369 | { | 370 | { |
370 | u32 oldseq, newseq; | 371 | u32 oldseq, newseq; |
371 | 372 | ||
372 | oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid); | 373 | oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid); |
373 | newseq = be32_to_cpu(new->stateid.seqid); | 374 | newseq = be32_to_cpu(new->stateid.seqid); |
374 | if ((int)(newseq - oldseq) > 0) | 375 | if ((int)(newseq - oldseq) > 0) { |
375 | memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid)); | 376 | memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid)); |
377 | if (update_barrier) { | ||
378 | u32 new_barrier = be32_to_cpu(new->stateid.seqid); | ||
379 | |||
380 | if ((int)(new_barrier - lo->plh_barrier)) | ||
381 | lo->plh_barrier = new_barrier; | ||
382 | } else { | ||
383 | /* Because of wraparound, we want to keep the barrier | ||
384 | * "close" to the current seqids. It needs to be | ||
385 | * within 2**31 to count as "behind", so if it | ||
386 | * gets too near that limit, give us a litle leeway | ||
387 | * and bring it to within 2**30. | ||
388 | * NOTE - and yes, this is all unsigned arithmetic. | ||
389 | */ | ||
390 | if (unlikely((newseq - lo->plh_barrier) > (3 << 29))) | ||
391 | lo->plh_barrier = newseq - (1 << 30); | ||
392 | } | ||
393 | } | ||
376 | } | 394 | } |
377 | 395 | ||
378 | /* lget is set to 1 if called from inside send_layoutget call chain */ | 396 | /* lget is set to 1 if called from inside send_layoutget call chain */ |
379 | static bool | 397 | static bool |
380 | pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, int lget) | 398 | pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, |
381 | { | 399 | int lget) |
382 | return (list_empty(&lo->plh_segs) && | 400 | { |
401 | if ((stateid) && | ||
402 | (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0) | ||
403 | return true; | ||
404 | return test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || | ||
405 | (list_empty(&lo->plh_segs) && | ||
383 | (atomic_read(&lo->plh_outstanding) > lget)); | 406 | (atomic_read(&lo->plh_outstanding) > lget)); |
384 | } | 407 | } |
385 | 408 | ||
@@ -391,7 +414,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | |||
391 | 414 | ||
392 | dprintk("--> %s\n", __func__); | 415 | dprintk("--> %s\n", __func__); |
393 | spin_lock(&lo->plh_inode->i_lock); | 416 | spin_lock(&lo->plh_inode->i_lock); |
394 | if (pnfs_layoutgets_blocked(lo, 1)) { | 417 | if (pnfs_layoutgets_blocked(lo, NULL, 1)) { |
395 | status = -EAGAIN; | 418 | status = -EAGAIN; |
396 | } else if (list_empty(&lo->plh_segs)) { | 419 | } else if (list_empty(&lo->plh_segs)) { |
397 | int seq; | 420 | int seq; |
@@ -510,6 +533,7 @@ alloc_init_layout_hdr(struct inode *ino) | |||
510 | atomic_set(&lo->plh_refcount, 1); | 533 | atomic_set(&lo->plh_refcount, 1); |
511 | INIT_LIST_HEAD(&lo->plh_layouts); | 534 | INIT_LIST_HEAD(&lo->plh_layouts); |
512 | INIT_LIST_HEAD(&lo->plh_segs); | 535 | INIT_LIST_HEAD(&lo->plh_segs); |
536 | INIT_LIST_HEAD(&lo->plh_bulk_recall); | ||
513 | lo->plh_inode = ino; | 537 | lo->plh_inode = ino; |
514 | return lo; | 538 | return lo; |
515 | } | 539 | } |
@@ -561,7 +585,7 @@ is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) | |||
561 | * lookup range in layout | 585 | * lookup range in layout |
562 | */ | 586 | */ |
563 | static struct pnfs_layout_segment * | 587 | static struct pnfs_layout_segment * |
564 | pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) | 588 | pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) |
565 | { | 589 | { |
566 | struct pnfs_layout_segment *lseg, *ret = NULL; | 590 | struct pnfs_layout_segment *lseg, *ret = NULL; |
567 | 591 | ||
@@ -606,19 +630,22 @@ pnfs_update_layout(struct inode *ino, | |||
606 | goto out_unlock; | 630 | goto out_unlock; |
607 | } | 631 | } |
608 | 632 | ||
609 | /* Check to see if the layout for the given range already exists */ | 633 | /* Do we even need to bother with this? */ |
610 | lseg = pnfs_has_layout(lo, iomode); | 634 | if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || |
611 | if (lseg) { | 635 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { |
612 | dprintk("%s: Using cached lseg %p for iomode %d)\n", | 636 | dprintk("%s matches recall, use MDS\n", __func__); |
613 | __func__, lseg, iomode); | ||
614 | goto out_unlock; | 637 | goto out_unlock; |
615 | } | 638 | } |
639 | /* Check to see if the layout for the given range already exists */ | ||
640 | lseg = pnfs_find_lseg(lo, iomode); | ||
641 | if (lseg) | ||
642 | goto out_unlock; | ||
616 | 643 | ||
617 | /* if LAYOUTGET already failed once we don't try again */ | 644 | /* if LAYOUTGET already failed once we don't try again */ |
618 | if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) | 645 | if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) |
619 | goto out_unlock; | 646 | goto out_unlock; |
620 | 647 | ||
621 | if (pnfs_layoutgets_blocked(lo, 0)) | 648 | if (pnfs_layoutgets_blocked(lo, NULL, 0)) |
622 | goto out_unlock; | 649 | goto out_unlock; |
623 | atomic_inc(&lo->plh_outstanding); | 650 | atomic_inc(&lo->plh_outstanding); |
624 | 651 | ||
@@ -641,6 +668,7 @@ pnfs_update_layout(struct inode *ino, | |||
641 | spin_lock(&clp->cl_lock); | 668 | spin_lock(&clp->cl_lock); |
642 | list_del_init(&lo->plh_layouts); | 669 | list_del_init(&lo->plh_layouts); |
643 | spin_unlock(&clp->cl_lock); | 670 | spin_unlock(&clp->cl_lock); |
671 | clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); | ||
644 | } | 672 | } |
645 | spin_unlock(&ino->i_lock); | 673 | spin_unlock(&ino->i_lock); |
646 | } | 674 | } |
@@ -662,6 +690,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
662 | struct nfs4_layoutget_res *res = &lgp->res; | 690 | struct nfs4_layoutget_res *res = &lgp->res; |
663 | struct pnfs_layout_segment *lseg; | 691 | struct pnfs_layout_segment *lseg; |
664 | struct inode *ino = lo->plh_inode; | 692 | struct inode *ino = lo->plh_inode; |
693 | struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; | ||
665 | int status = 0; | 694 | int status = 0; |
666 | 695 | ||
667 | /* Verify we got what we asked for. | 696 | /* Verify we got what we asked for. |
@@ -688,16 +717,32 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
688 | } | 717 | } |
689 | 718 | ||
690 | spin_lock(&ino->i_lock); | 719 | spin_lock(&ino->i_lock); |
720 | if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || | ||
721 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { | ||
722 | dprintk("%s forget reply due to recall\n", __func__); | ||
723 | goto out_forget_reply; | ||
724 | } | ||
725 | |||
726 | if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) { | ||
727 | dprintk("%s forget reply due to state\n", __func__); | ||
728 | goto out_forget_reply; | ||
729 | } | ||
691 | init_lseg(lo, lseg); | 730 | init_lseg(lo, lseg); |
692 | lseg->pls_range = res->range; | 731 | lseg->pls_range = res->range; |
693 | *lgp->lsegpp = lseg; | 732 | *lgp->lsegpp = lseg; |
694 | pnfs_insert_layout(lo, lseg); | 733 | pnfs_insert_layout(lo, lseg); |
695 | 734 | ||
696 | /* Done processing layoutget. Set the layout stateid */ | 735 | /* Done processing layoutget. Set the layout stateid */ |
697 | pnfs_set_layout_stateid(lo, &res->stateid); | 736 | pnfs_set_layout_stateid(lo, &res->stateid, false); |
698 | spin_unlock(&ino->i_lock); | 737 | spin_unlock(&ino->i_lock); |
699 | out: | 738 | out: |
700 | return status; | 739 | return status; |
740 | |||
741 | out_forget_reply: | ||
742 | spin_unlock(&ino->i_lock); | ||
743 | lseg->pls_layout = lo; | ||
744 | NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); | ||
745 | goto out; | ||
701 | } | 746 | } |
702 | 747 | ||
703 | /* | 748 | /* |