diff options
Diffstat (limited to 'fs/nfs/pnfs.c')
| -rw-r--r-- | fs/nfs/pnfs.c | 471 |
1 files changed, 381 insertions, 90 deletions
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0a5dda4d85c2..4f802b02fbb9 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
| @@ -34,6 +34,7 @@ | |||
| 34 | #include "pnfs.h" | 34 | #include "pnfs.h" |
| 35 | #include "iostat.h" | 35 | #include "iostat.h" |
| 36 | #include "nfs4trace.h" | 36 | #include "nfs4trace.h" |
| 37 | #include "delegation.h" | ||
| 37 | 38 | ||
| 38 | #define NFSDBG_FACILITY NFSDBG_PNFS | 39 | #define NFSDBG_FACILITY NFSDBG_PNFS |
| 39 | #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) | 40 | #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) |
| @@ -50,6 +51,10 @@ static DEFINE_SPINLOCK(pnfs_spinlock); | |||
| 50 | */ | 51 | */ |
| 51 | static LIST_HEAD(pnfs_modules_tbl); | 52 | static LIST_HEAD(pnfs_modules_tbl); |
| 52 | 53 | ||
| 54 | static int | ||
| 55 | pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, | ||
| 56 | enum pnfs_iomode iomode, bool sync); | ||
| 57 | |||
| 53 | /* Return the registered pnfs layout driver module matching given id */ | 58 | /* Return the registered pnfs layout driver module matching given id */ |
| 54 | static struct pnfs_layoutdriver_type * | 59 | static struct pnfs_layoutdriver_type * |
| 55 | find_pnfs_driver_locked(u32 id) | 60 | find_pnfs_driver_locked(u32 id) |
| @@ -238,6 +243,8 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) | |||
| 238 | struct inode *inode = lo->plh_inode; | 243 | struct inode *inode = lo->plh_inode; |
| 239 | 244 | ||
| 240 | if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { | 245 | if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { |
| 246 | if (!list_empty(&lo->plh_segs)) | ||
| 247 | WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); | ||
| 241 | pnfs_detach_layout_hdr(lo); | 248 | pnfs_detach_layout_hdr(lo); |
| 242 | spin_unlock(&inode->i_lock); | 249 | spin_unlock(&inode->i_lock); |
| 243 | pnfs_free_layout_hdr(lo); | 250 | pnfs_free_layout_hdr(lo); |
| @@ -337,6 +344,48 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, | |||
| 337 | rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); | 344 | rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); |
| 338 | } | 345 | } |
| 339 | 346 | ||
| 347 | /* Return true if layoutreturn is needed */ | ||
| 348 | static bool | ||
| 349 | pnfs_layout_need_return(struct pnfs_layout_hdr *lo, | ||
| 350 | struct pnfs_layout_segment *lseg) | ||
| 351 | { | ||
| 352 | struct pnfs_layout_segment *s; | ||
| 353 | |||
| 354 | if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) | ||
| 355 | return false; | ||
| 356 | |||
| 357 | list_for_each_entry(s, &lo->plh_segs, pls_list) | ||
| 358 | if (s != lseg && test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags)) | ||
| 359 | return false; | ||
| 360 | |||
| 361 | return true; | ||
| 362 | } | ||
| 363 | |||
| 364 | static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg, | ||
| 365 | struct pnfs_layout_hdr *lo, struct inode *inode) | ||
| 366 | { | ||
| 367 | lo = lseg->pls_layout; | ||
| 368 | inode = lo->plh_inode; | ||
| 369 | |||
| 370 | spin_lock(&inode->i_lock); | ||
| 371 | if (pnfs_layout_need_return(lo, lseg)) { | ||
| 372 | nfs4_stateid stateid; | ||
| 373 | enum pnfs_iomode iomode; | ||
| 374 | |||
| 375 | stateid = lo->plh_stateid; | ||
| 376 | iomode = lo->plh_return_iomode; | ||
| 377 | /* decreased in pnfs_send_layoutreturn() */ | ||
| 378 | lo->plh_block_lgets++; | ||
| 379 | lo->plh_return_iomode = 0; | ||
| 380 | spin_unlock(&inode->i_lock); | ||
| 381 | pnfs_get_layout_hdr(lo); | ||
| 382 | |||
| 383 | /* Send an async layoutreturn so we dont deadlock */ | ||
| 384 | pnfs_send_layoutreturn(lo, stateid, iomode, false); | ||
| 385 | } else | ||
| 386 | spin_unlock(&inode->i_lock); | ||
| 387 | } | ||
| 388 | |||
| 340 | void | 389 | void |
| 341 | pnfs_put_lseg(struct pnfs_layout_segment *lseg) | 390 | pnfs_put_lseg(struct pnfs_layout_segment *lseg) |
| 342 | { | 391 | { |
| @@ -349,8 +398,17 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) | |||
| 349 | dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, | 398 | dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, |
| 350 | atomic_read(&lseg->pls_refcount), | 399 | atomic_read(&lseg->pls_refcount), |
| 351 | test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); | 400 | test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); |
| 401 | |||
| 402 | /* Handle the case where refcount != 1 */ | ||
| 403 | if (atomic_add_unless(&lseg->pls_refcount, -1, 1)) | ||
| 404 | return; | ||
| 405 | |||
| 352 | lo = lseg->pls_layout; | 406 | lo = lseg->pls_layout; |
| 353 | inode = lo->plh_inode; | 407 | inode = lo->plh_inode; |
| 408 | /* Do we need a layoutreturn? */ | ||
| 409 | if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) | ||
| 410 | pnfs_layoutreturn_before_put_lseg(lseg, lo, inode); | ||
| 411 | |||
| 354 | if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { | 412 | if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { |
| 355 | pnfs_get_layout_hdr(lo); | 413 | pnfs_get_layout_hdr(lo); |
| 356 | pnfs_layout_remove_lseg(lo, lseg); | 414 | pnfs_layout_remove_lseg(lo, lseg); |
| @@ -543,6 +601,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) | |||
| 543 | pnfs_get_layout_hdr(lo); | 601 | pnfs_get_layout_hdr(lo); |
| 544 | pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); | 602 | pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); |
| 545 | pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); | 603 | pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); |
| 604 | pnfs_clear_retry_layoutget(lo); | ||
| 546 | spin_unlock(&nfsi->vfs_inode.i_lock); | 605 | spin_unlock(&nfsi->vfs_inode.i_lock); |
| 547 | pnfs_free_lseg_list(&tmp_list); | 606 | pnfs_free_lseg_list(&tmp_list); |
| 548 | pnfs_put_layout_hdr(lo); | 607 | pnfs_put_layout_hdr(lo); |
| @@ -740,25 +799,37 @@ pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, | |||
| 740 | return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); | 799 | return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); |
| 741 | } | 800 | } |
| 742 | 801 | ||
| 802 | static bool | ||
| 803 | pnfs_layout_returning(const struct pnfs_layout_hdr *lo, | ||
| 804 | struct pnfs_layout_range *range) | ||
| 805 | { | ||
| 806 | return test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) && | ||
| 807 | (lo->plh_return_iomode == IOMODE_ANY || | ||
| 808 | lo->plh_return_iomode == range->iomode); | ||
| 809 | } | ||
| 810 | |||
| 743 | /* lget is set to 1 if called from inside send_layoutget call chain */ | 811 | /* lget is set to 1 if called from inside send_layoutget call chain */ |
| 744 | static bool | 812 | static bool |
| 745 | pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo, int lget) | 813 | pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo, |
| 814 | struct pnfs_layout_range *range, int lget) | ||
| 746 | { | 815 | { |
| 747 | return lo->plh_block_lgets || | 816 | return lo->plh_block_lgets || |
| 748 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || | 817 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || |
| 749 | (list_empty(&lo->plh_segs) && | 818 | (list_empty(&lo->plh_segs) && |
| 750 | (atomic_read(&lo->plh_outstanding) > lget)); | 819 | (atomic_read(&lo->plh_outstanding) > lget)) || |
| 820 | pnfs_layout_returning(lo, range); | ||
| 751 | } | 821 | } |
| 752 | 822 | ||
| 753 | int | 823 | int |
| 754 | pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | 824 | pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, |
| 825 | struct pnfs_layout_range *range, | ||
| 755 | struct nfs4_state *open_state) | 826 | struct nfs4_state *open_state) |
| 756 | { | 827 | { |
| 757 | int status = 0; | 828 | int status = 0; |
| 758 | 829 | ||
| 759 | dprintk("--> %s\n", __func__); | 830 | dprintk("--> %s\n", __func__); |
| 760 | spin_lock(&lo->plh_inode->i_lock); | 831 | spin_lock(&lo->plh_inode->i_lock); |
| 761 | if (pnfs_layoutgets_blocked(lo, 1)) { | 832 | if (pnfs_layoutgets_blocked(lo, range, 1)) { |
| 762 | status = -EAGAIN; | 833 | status = -EAGAIN; |
| 763 | } else if (!nfs4_valid_open_stateid(open_state)) { | 834 | } else if (!nfs4_valid_open_stateid(open_state)) { |
| 764 | status = -EBADF; | 835 | status = -EBADF; |
| @@ -825,7 +896,9 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
| 825 | pnfs_layout_io_set_failed(lo, range->iomode); | 896 | pnfs_layout_io_set_failed(lo, range->iomode); |
| 826 | } | 897 | } |
| 827 | return NULL; | 898 | return NULL; |
| 828 | } | 899 | } else |
| 900 | pnfs_layout_clear_fail_bit(lo, | ||
| 901 | pnfs_iomode_to_fail_bit(range->iomode)); | ||
| 829 | 902 | ||
| 830 | return lseg; | 903 | return lseg; |
| 831 | } | 904 | } |
| @@ -845,6 +918,49 @@ static void pnfs_clear_layoutcommit(struct inode *inode, | |||
| 845 | } | 918 | } |
| 846 | } | 919 | } |
| 847 | 920 | ||
| 921 | void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) | ||
| 922 | { | ||
| 923 | clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); | ||
| 924 | smp_mb__after_atomic(); | ||
| 925 | wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); | ||
| 926 | } | ||
| 927 | |||
| 928 | static int | ||
| 929 | pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, | ||
| 930 | enum pnfs_iomode iomode, bool sync) | ||
| 931 | { | ||
| 932 | struct inode *ino = lo->plh_inode; | ||
| 933 | struct nfs4_layoutreturn *lrp; | ||
| 934 | int status = 0; | ||
| 935 | |||
| 936 | lrp = kzalloc(sizeof(*lrp), GFP_NOFS); | ||
| 937 | if (unlikely(lrp == NULL)) { | ||
| 938 | status = -ENOMEM; | ||
| 939 | spin_lock(&ino->i_lock); | ||
| 940 | lo->plh_block_lgets--; | ||
| 941 | pnfs_clear_layoutreturn_waitbit(lo); | ||
| 942 | rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq); | ||
| 943 | spin_unlock(&ino->i_lock); | ||
| 944 | pnfs_put_layout_hdr(lo); | ||
| 945 | goto out; | ||
| 946 | } | ||
| 947 | |||
| 948 | lrp->args.stateid = stateid; | ||
| 949 | lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; | ||
| 950 | lrp->args.inode = ino; | ||
| 951 | lrp->args.range.iomode = iomode; | ||
| 952 | lrp->args.range.offset = 0; | ||
| 953 | lrp->args.range.length = NFS4_MAX_UINT64; | ||
| 954 | lrp->args.layout = lo; | ||
| 955 | lrp->clp = NFS_SERVER(ino)->nfs_client; | ||
| 956 | lrp->cred = lo->plh_lc_cred; | ||
| 957 | |||
| 958 | status = nfs4_proc_layoutreturn(lrp, sync); | ||
| 959 | out: | ||
| 960 | dprintk("<-- %s status: %d\n", __func__, status); | ||
| 961 | return status; | ||
| 962 | } | ||
| 963 | |||
| 848 | /* | 964 | /* |
| 849 | * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr | 965 | * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr |
| 850 | * when the layout segment list is empty. | 966 | * when the layout segment list is empty. |
| @@ -859,7 +975,6 @@ _pnfs_return_layout(struct inode *ino) | |||
| 859 | struct pnfs_layout_hdr *lo = NULL; | 975 | struct pnfs_layout_hdr *lo = NULL; |
| 860 | struct nfs_inode *nfsi = NFS_I(ino); | 976 | struct nfs_inode *nfsi = NFS_I(ino); |
| 861 | LIST_HEAD(tmp_list); | 977 | LIST_HEAD(tmp_list); |
| 862 | struct nfs4_layoutreturn *lrp; | ||
| 863 | nfs4_stateid stateid; | 978 | nfs4_stateid stateid; |
| 864 | int status = 0, empty; | 979 | int status = 0, empty; |
| 865 | 980 | ||
| @@ -901,24 +1016,7 @@ _pnfs_return_layout(struct inode *ino) | |||
| 901 | spin_unlock(&ino->i_lock); | 1016 | spin_unlock(&ino->i_lock); |
| 902 | pnfs_free_lseg_list(&tmp_list); | 1017 | pnfs_free_lseg_list(&tmp_list); |
| 903 | 1018 | ||
| 904 | lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); | 1019 | status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); |
| 905 | if (unlikely(lrp == NULL)) { | ||
| 906 | status = -ENOMEM; | ||
| 907 | spin_lock(&ino->i_lock); | ||
| 908 | lo->plh_block_lgets--; | ||
| 909 | spin_unlock(&ino->i_lock); | ||
| 910 | pnfs_put_layout_hdr(lo); | ||
| 911 | goto out; | ||
| 912 | } | ||
| 913 | |||
| 914 | lrp->args.stateid = stateid; | ||
| 915 | lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; | ||
| 916 | lrp->args.inode = ino; | ||
| 917 | lrp->args.layout = lo; | ||
| 918 | lrp->clp = NFS_SERVER(ino)->nfs_client; | ||
| 919 | lrp->cred = lo->plh_lc_cred; | ||
| 920 | |||
| 921 | status = nfs4_proc_layoutreturn(lrp); | ||
| 922 | out: | 1020 | out: |
| 923 | dprintk("<-- %s status: %d\n", __func__, status); | 1021 | dprintk("<-- %s status: %d\n", __func__, status); |
| 924 | return status; | 1022 | return status; |
| @@ -954,31 +1052,60 @@ pnfs_commit_and_return_layout(struct inode *inode) | |||
| 954 | 1052 | ||
| 955 | bool pnfs_roc(struct inode *ino) | 1053 | bool pnfs_roc(struct inode *ino) |
| 956 | { | 1054 | { |
| 1055 | struct nfs_inode *nfsi = NFS_I(ino); | ||
| 1056 | struct nfs_open_context *ctx; | ||
| 1057 | struct nfs4_state *state; | ||
| 957 | struct pnfs_layout_hdr *lo; | 1058 | struct pnfs_layout_hdr *lo; |
| 958 | struct pnfs_layout_segment *lseg, *tmp; | 1059 | struct pnfs_layout_segment *lseg, *tmp; |
| 1060 | nfs4_stateid stateid; | ||
| 959 | LIST_HEAD(tmp_list); | 1061 | LIST_HEAD(tmp_list); |
| 960 | bool found = false; | 1062 | bool found = false, layoutreturn = false; |
| 961 | 1063 | ||
| 962 | spin_lock(&ino->i_lock); | 1064 | spin_lock(&ino->i_lock); |
| 963 | lo = NFS_I(ino)->layout; | 1065 | lo = nfsi->layout; |
| 964 | if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) || | 1066 | if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) || |
| 965 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) | 1067 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) |
| 966 | goto out_nolayout; | 1068 | goto out_noroc; |
| 1069 | |||
| 1070 | /* Don't return layout if we hold a delegation */ | ||
| 1071 | if (nfs4_check_delegation(ino, FMODE_READ)) | ||
| 1072 | goto out_noroc; | ||
| 1073 | |||
| 1074 | list_for_each_entry(ctx, &nfsi->open_files, list) { | ||
| 1075 | state = ctx->state; | ||
| 1076 | /* Don't return layout if there is open file state */ | ||
| 1077 | if (state != NULL && state->state != 0) | ||
| 1078 | goto out_noroc; | ||
| 1079 | } | ||
| 1080 | |||
| 1081 | pnfs_clear_retry_layoutget(lo); | ||
| 967 | list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) | 1082 | list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) |
| 968 | if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { | 1083 | if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { |
| 969 | mark_lseg_invalid(lseg, &tmp_list); | 1084 | mark_lseg_invalid(lseg, &tmp_list); |
| 970 | found = true; | 1085 | found = true; |
| 971 | } | 1086 | } |
| 972 | if (!found) | 1087 | if (!found) |
| 973 | goto out_nolayout; | 1088 | goto out_noroc; |
| 974 | lo->plh_block_lgets++; | 1089 | lo->plh_block_lgets++; |
| 975 | pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */ | 1090 | pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */ |
| 976 | spin_unlock(&ino->i_lock); | 1091 | spin_unlock(&ino->i_lock); |
| 977 | pnfs_free_lseg_list(&tmp_list); | 1092 | pnfs_free_lseg_list(&tmp_list); |
| 978 | return true; | 1093 | return true; |
| 979 | 1094 | ||
| 980 | out_nolayout: | 1095 | out_noroc: |
| 1096 | if (lo) { | ||
| 1097 | stateid = lo->plh_stateid; | ||
| 1098 | layoutreturn = | ||
| 1099 | test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, | ||
| 1100 | &lo->plh_flags); | ||
| 1101 | if (layoutreturn) { | ||
| 1102 | lo->plh_block_lgets++; | ||
| 1103 | pnfs_get_layout_hdr(lo); | ||
| 1104 | } | ||
| 1105 | } | ||
| 981 | spin_unlock(&ino->i_lock); | 1106 | spin_unlock(&ino->i_lock); |
| 1107 | if (layoutreturn) | ||
| 1108 | pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); | ||
| 982 | return false; | 1109 | return false; |
| 983 | } | 1110 | } |
| 984 | 1111 | ||
| @@ -1013,8 +1140,9 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) | |||
| 1013 | struct nfs_inode *nfsi = NFS_I(ino); | 1140 | struct nfs_inode *nfsi = NFS_I(ino); |
| 1014 | struct pnfs_layout_hdr *lo; | 1141 | struct pnfs_layout_hdr *lo; |
| 1015 | struct pnfs_layout_segment *lseg; | 1142 | struct pnfs_layout_segment *lseg; |
| 1143 | nfs4_stateid stateid; | ||
| 1016 | u32 current_seqid; | 1144 | u32 current_seqid; |
| 1017 | bool found = false; | 1145 | bool found = false, layoutreturn = false; |
| 1018 | 1146 | ||
| 1019 | spin_lock(&ino->i_lock); | 1147 | spin_lock(&ino->i_lock); |
| 1020 | list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) | 1148 | list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) |
| @@ -1031,7 +1159,21 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) | |||
| 1031 | */ | 1159 | */ |
| 1032 | *barrier = current_seqid + atomic_read(&lo->plh_outstanding); | 1160 | *barrier = current_seqid + atomic_read(&lo->plh_outstanding); |
| 1033 | out: | 1161 | out: |
| 1162 | if (!found) { | ||
| 1163 | stateid = lo->plh_stateid; | ||
| 1164 | layoutreturn = | ||
| 1165 | test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, | ||
| 1166 | &lo->plh_flags); | ||
| 1167 | if (layoutreturn) { | ||
| 1168 | lo->plh_block_lgets++; | ||
| 1169 | pnfs_get_layout_hdr(lo); | ||
| 1170 | } | ||
| 1171 | } | ||
| 1034 | spin_unlock(&ino->i_lock); | 1172 | spin_unlock(&ino->i_lock); |
| 1173 | if (layoutreturn) { | ||
| 1174 | rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); | ||
| 1175 | pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false); | ||
| 1176 | } | ||
| 1035 | return found; | 1177 | return found; |
| 1036 | } | 1178 | } |
| 1037 | 1179 | ||
| @@ -1178,6 +1320,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, | |||
| 1178 | 1320 | ||
| 1179 | list_for_each_entry(lseg, &lo->plh_segs, pls_list) { | 1321 | list_for_each_entry(lseg, &lo->plh_segs, pls_list) { |
| 1180 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && | 1322 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && |
| 1323 | !test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) && | ||
| 1181 | pnfs_lseg_range_match(&lseg->pls_range, range)) { | 1324 | pnfs_lseg_range_match(&lseg->pls_range, range)) { |
| 1182 | ret = pnfs_get_lseg(lseg); | 1325 | ret = pnfs_get_lseg(lseg); |
| 1183 | break; | 1326 | break; |
| @@ -1266,6 +1409,35 @@ static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, | |||
| 1266 | return ret; | 1409 | return ret; |
| 1267 | } | 1410 | } |
| 1268 | 1411 | ||
| 1412 | /* stop waiting if someone clears NFS_LAYOUT_RETRY_LAYOUTGET bit. */ | ||
| 1413 | static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key) | ||
| 1414 | { | ||
| 1415 | if (!test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, key->flags)) | ||
| 1416 | return 1; | ||
| 1417 | return nfs_wait_bit_killable(key); | ||
| 1418 | } | ||
| 1419 | |||
| 1420 | static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) | ||
| 1421 | { | ||
| 1422 | /* | ||
| 1423 | * send layoutcommit as it can hold up layoutreturn due to lseg | ||
| 1424 | * reference | ||
| 1425 | */ | ||
| 1426 | pnfs_layoutcommit_inode(lo->plh_inode, false); | ||
| 1427 | return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN, | ||
| 1428 | pnfs_layoutget_retry_bit_wait, | ||
| 1429 | TASK_UNINTERRUPTIBLE); | ||
| 1430 | } | ||
| 1431 | |||
| 1432 | static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo) | ||
| 1433 | { | ||
| 1434 | unsigned long *bitlock = &lo->plh_flags; | ||
| 1435 | |||
| 1436 | clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock); | ||
| 1437 | smp_mb__after_atomic(); | ||
| 1438 | wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); | ||
| 1439 | } | ||
| 1440 | |||
| 1269 | /* | 1441 | /* |
| 1270 | * Layout segment is retreived from the server if not cached. | 1442 | * Layout segment is retreived from the server if not cached. |
| 1271 | * The appropriate layout segment is referenced and returned to the caller. | 1443 | * The appropriate layout segment is referenced and returned to the caller. |
| @@ -1296,6 +1468,8 @@ pnfs_update_layout(struct inode *ino, | |||
| 1296 | if (pnfs_within_mdsthreshold(ctx, ino, iomode)) | 1468 | if (pnfs_within_mdsthreshold(ctx, ino, iomode)) |
| 1297 | goto out; | 1469 | goto out; |
| 1298 | 1470 | ||
| 1471 | lookup_again: | ||
| 1472 | first = false; | ||
| 1299 | spin_lock(&ino->i_lock); | 1473 | spin_lock(&ino->i_lock); |
| 1300 | lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); | 1474 | lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); |
| 1301 | if (lo == NULL) { | 1475 | if (lo == NULL) { |
| @@ -1310,27 +1484,62 @@ pnfs_update_layout(struct inode *ino, | |||
| 1310 | } | 1484 | } |
| 1311 | 1485 | ||
| 1312 | /* if LAYOUTGET already failed once we don't try again */ | 1486 | /* if LAYOUTGET already failed once we don't try again */ |
| 1313 | if (pnfs_layout_io_test_failed(lo, iomode)) | 1487 | if (pnfs_layout_io_test_failed(lo, iomode) && |
| 1488 | !pnfs_should_retry_layoutget(lo)) | ||
| 1314 | goto out_unlock; | 1489 | goto out_unlock; |
| 1315 | 1490 | ||
| 1316 | /* Check to see if the layout for the given range already exists */ | 1491 | first = list_empty(&lo->plh_segs); |
| 1317 | lseg = pnfs_find_lseg(lo, &arg); | 1492 | if (first) { |
| 1318 | if (lseg) | 1493 | /* The first layoutget for the file. Need to serialize per |
| 1319 | goto out_unlock; | 1494 | * RFC 5661 Errata 3208. |
| 1495 | */ | ||
| 1496 | if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, | ||
| 1497 | &lo->plh_flags)) { | ||
| 1498 | spin_unlock(&ino->i_lock); | ||
| 1499 | wait_on_bit(&lo->plh_flags, NFS_LAYOUT_FIRST_LAYOUTGET, | ||
| 1500 | TASK_UNINTERRUPTIBLE); | ||
| 1501 | pnfs_put_layout_hdr(lo); | ||
| 1502 | goto lookup_again; | ||
| 1503 | } | ||
| 1504 | } else { | ||
| 1505 | /* Check to see if the layout for the given range | ||
| 1506 | * already exists | ||
| 1507 | */ | ||
| 1508 | lseg = pnfs_find_lseg(lo, &arg); | ||
| 1509 | if (lseg) | ||
| 1510 | goto out_unlock; | ||
| 1511 | } | ||
| 1512 | |||
| 1513 | /* | ||
| 1514 | * Because we free lsegs before sending LAYOUTRETURN, we need to wait | ||
| 1515 | * for LAYOUTRETURN even if first is true. | ||
| 1516 | */ | ||
| 1517 | if (!lseg && pnfs_should_retry_layoutget(lo) && | ||
| 1518 | test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { | ||
| 1519 | spin_unlock(&ino->i_lock); | ||
| 1520 | dprintk("%s wait for layoutreturn\n", __func__); | ||
| 1521 | if (pnfs_prepare_to_retry_layoutget(lo)) { | ||
| 1522 | if (first) | ||
| 1523 | pnfs_clear_first_layoutget(lo); | ||
| 1524 | pnfs_put_layout_hdr(lo); | ||
| 1525 | dprintk("%s retrying\n", __func__); | ||
| 1526 | goto lookup_again; | ||
| 1527 | } | ||
| 1528 | goto out_put_layout_hdr; | ||
| 1529 | } | ||
| 1320 | 1530 | ||
| 1321 | if (pnfs_layoutgets_blocked(lo, 0)) | 1531 | if (pnfs_layoutgets_blocked(lo, &arg, 0)) |
| 1322 | goto out_unlock; | 1532 | goto out_unlock; |
| 1323 | atomic_inc(&lo->plh_outstanding); | 1533 | atomic_inc(&lo->plh_outstanding); |
| 1324 | |||
| 1325 | first = list_empty(&lo->plh_layouts) ? true : false; | ||
| 1326 | spin_unlock(&ino->i_lock); | 1534 | spin_unlock(&ino->i_lock); |
| 1327 | 1535 | ||
| 1328 | if (first) { | 1536 | if (list_empty(&lo->plh_layouts)) { |
| 1329 | /* The lo must be on the clp list if there is any | 1537 | /* The lo must be on the clp list if there is any |
| 1330 | * chance of a CB_LAYOUTRECALL(FILE) coming in. | 1538 | * chance of a CB_LAYOUTRECALL(FILE) coming in. |
| 1331 | */ | 1539 | */ |
| 1332 | spin_lock(&clp->cl_lock); | 1540 | spin_lock(&clp->cl_lock); |
| 1333 | list_add_tail(&lo->plh_layouts, &server->layouts); | 1541 | if (list_empty(&lo->plh_layouts)) |
| 1542 | list_add_tail(&lo->plh_layouts, &server->layouts); | ||
| 1334 | spin_unlock(&clp->cl_lock); | 1543 | spin_unlock(&clp->cl_lock); |
| 1335 | } | 1544 | } |
| 1336 | 1545 | ||
| @@ -1343,8 +1552,11 @@ pnfs_update_layout(struct inode *ino, | |||
| 1343 | arg.length = PAGE_CACHE_ALIGN(arg.length); | 1552 | arg.length = PAGE_CACHE_ALIGN(arg.length); |
| 1344 | 1553 | ||
| 1345 | lseg = send_layoutget(lo, ctx, &arg, gfp_flags); | 1554 | lseg = send_layoutget(lo, ctx, &arg, gfp_flags); |
| 1555 | pnfs_clear_retry_layoutget(lo); | ||
| 1346 | atomic_dec(&lo->plh_outstanding); | 1556 | atomic_dec(&lo->plh_outstanding); |
| 1347 | out_put_layout_hdr: | 1557 | out_put_layout_hdr: |
| 1558 | if (first) | ||
| 1559 | pnfs_clear_first_layoutget(lo); | ||
| 1348 | pnfs_put_layout_hdr(lo); | 1560 | pnfs_put_layout_hdr(lo); |
| 1349 | out: | 1561 | out: |
| 1350 | dprintk("%s: inode %s/%llu pNFS layout segment %s for " | 1562 | dprintk("%s: inode %s/%llu pNFS layout segment %s for " |
| @@ -1393,7 +1605,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
| 1393 | goto out_forget_reply; | 1605 | goto out_forget_reply; |
| 1394 | } | 1606 | } |
| 1395 | 1607 | ||
| 1396 | if (pnfs_layoutgets_blocked(lo, 1)) { | 1608 | if (pnfs_layoutgets_blocked(lo, &lgp->args.range, 1)) { |
| 1397 | dprintk("%s forget reply due to state\n", __func__); | 1609 | dprintk("%s forget reply due to state\n", __func__); |
| 1398 | goto out_forget_reply; | 1610 | goto out_forget_reply; |
| 1399 | } | 1611 | } |
| @@ -1440,24 +1652,79 @@ out_forget_reply: | |||
| 1440 | goto out; | 1652 | goto out; |
| 1441 | } | 1653 | } |
| 1442 | 1654 | ||
| 1655 | static void | ||
| 1656 | pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, | ||
| 1657 | struct list_head *tmp_list, | ||
| 1658 | struct pnfs_layout_range *return_range) | ||
| 1659 | { | ||
| 1660 | struct pnfs_layout_segment *lseg, *next; | ||
| 1661 | |||
| 1662 | dprintk("%s:Begin lo %p\n", __func__, lo); | ||
| 1663 | |||
| 1664 | if (list_empty(&lo->plh_segs)) | ||
| 1665 | return; | ||
| 1666 | |||
| 1667 | list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) | ||
| 1668 | if (should_free_lseg(&lseg->pls_range, return_range)) { | ||
| 1669 | dprintk("%s: marking lseg %p iomode %d " | ||
| 1670 | "offset %llu length %llu\n", __func__, | ||
| 1671 | lseg, lseg->pls_range.iomode, | ||
| 1672 | lseg->pls_range.offset, | ||
| 1673 | lseg->pls_range.length); | ||
| 1674 | set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); | ||
| 1675 | mark_lseg_invalid(lseg, tmp_list); | ||
| 1676 | } | ||
| 1677 | } | ||
| 1678 | |||
| 1679 | void pnfs_error_mark_layout_for_return(struct inode *inode, | ||
| 1680 | struct pnfs_layout_segment *lseg) | ||
| 1681 | { | ||
| 1682 | struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; | ||
| 1683 | int iomode = pnfs_iomode_to_fail_bit(lseg->pls_range.iomode); | ||
| 1684 | struct pnfs_layout_range range = { | ||
| 1685 | .iomode = lseg->pls_range.iomode, | ||
| 1686 | .offset = 0, | ||
| 1687 | .length = NFS4_MAX_UINT64, | ||
| 1688 | }; | ||
| 1689 | LIST_HEAD(free_me); | ||
| 1690 | |||
| 1691 | spin_lock(&inode->i_lock); | ||
| 1692 | /* set failure bit so that pnfs path will be retried later */ | ||
| 1693 | pnfs_layout_set_fail_bit(lo, iomode); | ||
| 1694 | set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); | ||
| 1695 | if (lo->plh_return_iomode == 0) | ||
| 1696 | lo->plh_return_iomode = range.iomode; | ||
| 1697 | else if (lo->plh_return_iomode != range.iomode) | ||
| 1698 | lo->plh_return_iomode = IOMODE_ANY; | ||
| 1699 | /* | ||
| 1700 | * mark all matching lsegs so that we are sure to have no live | ||
| 1701 | * segments at hand when sending layoutreturn. See pnfs_put_lseg() | ||
| 1702 | * for how it works. | ||
| 1703 | */ | ||
| 1704 | pnfs_mark_matching_lsegs_return(lo, &free_me, &range); | ||
| 1705 | spin_unlock(&inode->i_lock); | ||
| 1706 | pnfs_free_lseg_list(&free_me); | ||
| 1707 | } | ||
| 1708 | EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); | ||
| 1709 | |||
| 1443 | void | 1710 | void |
| 1444 | pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 1711 | pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) |
| 1445 | { | 1712 | { |
| 1446 | u64 rd_size = req->wb_bytes; | 1713 | u64 rd_size = req->wb_bytes; |
| 1447 | 1714 | ||
| 1448 | WARN_ON_ONCE(pgio->pg_lseg != NULL); | 1715 | if (pgio->pg_lseg == NULL) { |
| 1449 | 1716 | if (pgio->pg_dreq == NULL) | |
| 1450 | if (pgio->pg_dreq == NULL) | 1717 | rd_size = i_size_read(pgio->pg_inode) - req_offset(req); |
| 1451 | rd_size = i_size_read(pgio->pg_inode) - req_offset(req); | 1718 | else |
| 1452 | else | 1719 | rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); |
| 1453 | rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); | 1720 | |
| 1454 | 1721 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | |
| 1455 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 1722 | req->wb_context, |
| 1456 | req->wb_context, | 1723 | req_offset(req), |
| 1457 | req_offset(req), | 1724 | rd_size, |
| 1458 | rd_size, | 1725 | IOMODE_READ, |
| 1459 | IOMODE_READ, | 1726 | GFP_KERNEL); |
| 1460 | GFP_KERNEL); | 1727 | } |
| 1461 | /* If no lseg, fall back to read through mds */ | 1728 | /* If no lseg, fall back to read through mds */ |
| 1462 | if (pgio->pg_lseg == NULL) | 1729 | if (pgio->pg_lseg == NULL) |
| 1463 | nfs_pageio_reset_read_mds(pgio); | 1730 | nfs_pageio_reset_read_mds(pgio); |
| @@ -1469,27 +1736,36 @@ void | |||
| 1469 | pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, | 1736 | pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, |
| 1470 | struct nfs_page *req, u64 wb_size) | 1737 | struct nfs_page *req, u64 wb_size) |
| 1471 | { | 1738 | { |
| 1472 | WARN_ON_ONCE(pgio->pg_lseg != NULL); | 1739 | if (pgio->pg_lseg == NULL) |
| 1473 | 1740 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | |
| 1474 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 1741 | req->wb_context, |
| 1475 | req->wb_context, | 1742 | req_offset(req), |
| 1476 | req_offset(req), | 1743 | wb_size, |
| 1477 | wb_size, | 1744 | IOMODE_RW, |
| 1478 | IOMODE_RW, | 1745 | GFP_NOFS); |
| 1479 | GFP_NOFS); | ||
| 1480 | /* If no lseg, fall back to write through mds */ | 1746 | /* If no lseg, fall back to write through mds */ |
| 1481 | if (pgio->pg_lseg == NULL) | 1747 | if (pgio->pg_lseg == NULL) |
| 1482 | nfs_pageio_reset_write_mds(pgio); | 1748 | nfs_pageio_reset_write_mds(pgio); |
| 1483 | } | 1749 | } |
| 1484 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); | 1750 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); |
| 1485 | 1751 | ||
| 1752 | void | ||
| 1753 | pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *desc) | ||
| 1754 | { | ||
| 1755 | if (desc->pg_lseg) { | ||
| 1756 | pnfs_put_lseg(desc->pg_lseg); | ||
| 1757 | desc->pg_lseg = NULL; | ||
| 1758 | } | ||
| 1759 | } | ||
| 1760 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); | ||
| 1761 | |||
| 1486 | /* | 1762 | /* |
| 1487 | * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number | 1763 | * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number |
| 1488 | * of bytes (maximum @req->wb_bytes) that can be coalesced. | 1764 | * of bytes (maximum @req->wb_bytes) that can be coalesced. |
| 1489 | */ | 1765 | */ |
| 1490 | size_t | 1766 | size_t |
| 1491 | pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 1767 | pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, |
| 1492 | struct nfs_page *req) | 1768 | struct nfs_page *prev, struct nfs_page *req) |
| 1493 | { | 1769 | { |
| 1494 | unsigned int size; | 1770 | unsigned int size; |
| 1495 | u64 seg_end, req_start, seg_left; | 1771 | u64 seg_end, req_start, seg_left; |
| @@ -1513,10 +1789,16 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | |||
| 1513 | seg_end = end_offset(pgio->pg_lseg->pls_range.offset, | 1789 | seg_end = end_offset(pgio->pg_lseg->pls_range.offset, |
| 1514 | pgio->pg_lseg->pls_range.length); | 1790 | pgio->pg_lseg->pls_range.length); |
| 1515 | req_start = req_offset(req); | 1791 | req_start = req_offset(req); |
| 1516 | WARN_ON_ONCE(req_start > seg_end); | 1792 | WARN_ON_ONCE(req_start >= seg_end); |
| 1517 | /* start of request is past the last byte of this segment */ | 1793 | /* start of request is past the last byte of this segment */ |
| 1518 | if (req_start >= seg_end) | 1794 | if (req_start >= seg_end) { |
| 1795 | /* reference the new lseg */ | ||
| 1796 | if (pgio->pg_ops->pg_cleanup) | ||
| 1797 | pgio->pg_ops->pg_cleanup(pgio); | ||
| 1798 | if (pgio->pg_ops->pg_init) | ||
| 1799 | pgio->pg_ops->pg_init(pgio, req); | ||
| 1519 | return 0; | 1800 | return 0; |
| 1801 | } | ||
| 1520 | 1802 | ||
| 1521 | /* adjust 'size' iff there are fewer bytes left in the | 1803 | /* adjust 'size' iff there are fewer bytes left in the |
| 1522 | * segment than what nfs_generic_pg_test returned */ | 1804 | * segment than what nfs_generic_pg_test returned */ |
| @@ -1571,10 +1853,12 @@ static void | |||
| 1571 | pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, | 1853 | pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, |
| 1572 | struct nfs_pgio_header *hdr) | 1854 | struct nfs_pgio_header *hdr) |
| 1573 | { | 1855 | { |
| 1856 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||
| 1857 | |||
| 1574 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { | 1858 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { |
| 1575 | list_splice_tail_init(&hdr->pages, &desc->pg_list); | 1859 | list_splice_tail_init(&hdr->pages, &mirror->pg_list); |
| 1576 | nfs_pageio_reset_write_mds(desc); | 1860 | nfs_pageio_reset_write_mds(desc); |
| 1577 | desc->pg_recoalesce = 1; | 1861 | mirror->pg_recoalesce = 1; |
| 1578 | } | 1862 | } |
| 1579 | nfs_pgio_data_destroy(hdr); | 1863 | nfs_pgio_data_destroy(hdr); |
| 1580 | } | 1864 | } |
| @@ -1608,11 +1892,9 @@ pnfs_do_write(struct nfs_pageio_descriptor *desc, | |||
| 1608 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | 1892 | struct pnfs_layout_segment *lseg = desc->pg_lseg; |
| 1609 | enum pnfs_try_status trypnfs; | 1893 | enum pnfs_try_status trypnfs; |
| 1610 | 1894 | ||
| 1611 | desc->pg_lseg = NULL; | ||
| 1612 | trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); | 1895 | trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); |
| 1613 | if (trypnfs == PNFS_NOT_ATTEMPTED) | 1896 | if (trypnfs == PNFS_NOT_ATTEMPTED) |
| 1614 | pnfs_write_through_mds(desc, hdr); | 1897 | pnfs_write_through_mds(desc, hdr); |
| 1615 | pnfs_put_lseg(lseg); | ||
| 1616 | } | 1898 | } |
| 1617 | 1899 | ||
| 1618 | static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) | 1900 | static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) |
| @@ -1625,24 +1907,23 @@ EXPORT_SYMBOL_GPL(pnfs_writehdr_free); | |||
| 1625 | int | 1907 | int |
| 1626 | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | 1908 | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) |
| 1627 | { | 1909 | { |
| 1910 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||
| 1911 | |||
| 1628 | struct nfs_pgio_header *hdr; | 1912 | struct nfs_pgio_header *hdr; |
| 1629 | int ret; | 1913 | int ret; |
| 1630 | 1914 | ||
| 1631 | hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); | 1915 | hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); |
| 1632 | if (!hdr) { | 1916 | if (!hdr) { |
| 1633 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | 1917 | desc->pg_completion_ops->error_cleanup(&mirror->pg_list); |
| 1634 | pnfs_put_lseg(desc->pg_lseg); | ||
| 1635 | desc->pg_lseg = NULL; | ||
| 1636 | return -ENOMEM; | 1918 | return -ENOMEM; |
| 1637 | } | 1919 | } |
| 1638 | nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); | 1920 | nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); |
| 1921 | |||
| 1639 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); | 1922 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); |
| 1640 | ret = nfs_generic_pgio(desc, hdr); | 1923 | ret = nfs_generic_pgio(desc, hdr); |
| 1641 | if (ret != 0) { | 1924 | if (!ret) |
| 1642 | pnfs_put_lseg(desc->pg_lseg); | ||
| 1643 | desc->pg_lseg = NULL; | ||
| 1644 | } else | ||
| 1645 | pnfs_do_write(desc, hdr, desc->pg_ioflags); | 1925 | pnfs_do_write(desc, hdr, desc->pg_ioflags); |
| 1926 | |||
| 1646 | return ret; | 1927 | return ret; |
| 1647 | } | 1928 | } |
| 1648 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); | 1929 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); |
| @@ -1687,10 +1968,12 @@ static void | |||
| 1687 | pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, | 1968 | pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, |
| 1688 | struct nfs_pgio_header *hdr) | 1969 | struct nfs_pgio_header *hdr) |
| 1689 | { | 1970 | { |
| 1971 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||
| 1972 | |||
| 1690 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { | 1973 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { |
| 1691 | list_splice_tail_init(&hdr->pages, &desc->pg_list); | 1974 | list_splice_tail_init(&hdr->pages, &mirror->pg_list); |
| 1692 | nfs_pageio_reset_read_mds(desc); | 1975 | nfs_pageio_reset_read_mds(desc); |
| 1693 | desc->pg_recoalesce = 1; | 1976 | mirror->pg_recoalesce = 1; |
| 1694 | } | 1977 | } |
| 1695 | nfs_pgio_data_destroy(hdr); | 1978 | nfs_pgio_data_destroy(hdr); |
| 1696 | } | 1979 | } |
| @@ -1719,18 +2002,29 @@ pnfs_try_to_read_data(struct nfs_pgio_header *hdr, | |||
| 1719 | return trypnfs; | 2002 | return trypnfs; |
| 1720 | } | 2003 | } |
| 1721 | 2004 | ||
| 2005 | /* Resend all requests through pnfs. */ | ||
| 2006 | int pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) | ||
| 2007 | { | ||
| 2008 | struct nfs_pageio_descriptor pgio; | ||
| 2009 | |||
| 2010 | nfs_pageio_init_read(&pgio, hdr->inode, false, hdr->completion_ops); | ||
| 2011 | return nfs_pageio_resend(&pgio, hdr); | ||
| 2012 | } | ||
| 2013 | EXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs); | ||
| 2014 | |||
| 1722 | static void | 2015 | static void |
| 1723 | pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) | 2016 | pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) |
| 1724 | { | 2017 | { |
| 1725 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; | 2018 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; |
| 1726 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | 2019 | struct pnfs_layout_segment *lseg = desc->pg_lseg; |
| 1727 | enum pnfs_try_status trypnfs; | 2020 | enum pnfs_try_status trypnfs; |
| 2021 | int err = 0; | ||
| 1728 | 2022 | ||
| 1729 | desc->pg_lseg = NULL; | ||
| 1730 | trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); | 2023 | trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); |
| 1731 | if (trypnfs == PNFS_NOT_ATTEMPTED) | 2024 | if (trypnfs == PNFS_TRY_AGAIN) |
| 2025 | err = pnfs_read_resend_pnfs(hdr); | ||
| 2026 | if (trypnfs == PNFS_NOT_ATTEMPTED || err) | ||
| 1732 | pnfs_read_through_mds(desc, hdr); | 2027 | pnfs_read_through_mds(desc, hdr); |
| 1733 | pnfs_put_lseg(lseg); | ||
| 1734 | } | 2028 | } |
| 1735 | 2029 | ||
| 1736 | static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) | 2030 | static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) |
| @@ -1743,24 +2037,20 @@ EXPORT_SYMBOL_GPL(pnfs_readhdr_free); | |||
| 1743 | int | 2037 | int |
| 1744 | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | 2038 | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) |
| 1745 | { | 2039 | { |
| 2040 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||
| 2041 | |||
| 1746 | struct nfs_pgio_header *hdr; | 2042 | struct nfs_pgio_header *hdr; |
| 1747 | int ret; | 2043 | int ret; |
| 1748 | 2044 | ||
| 1749 | hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); | 2045 | hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); |
| 1750 | if (!hdr) { | 2046 | if (!hdr) { |
| 1751 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | 2047 | desc->pg_completion_ops->error_cleanup(&mirror->pg_list); |
| 1752 | ret = -ENOMEM; | 2048 | return -ENOMEM; |
| 1753 | pnfs_put_lseg(desc->pg_lseg); | ||
| 1754 | desc->pg_lseg = NULL; | ||
| 1755 | return ret; | ||
| 1756 | } | 2049 | } |
| 1757 | nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); | 2050 | nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); |
| 1758 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); | 2051 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); |
| 1759 | ret = nfs_generic_pgio(desc, hdr); | 2052 | ret = nfs_generic_pgio(desc, hdr); |
| 1760 | if (ret != 0) { | 2053 | if (!ret) |
| 1761 | pnfs_put_lseg(desc->pg_lseg); | ||
| 1762 | desc->pg_lseg = NULL; | ||
| 1763 | } else | ||
| 1764 | pnfs_do_read(desc, hdr); | 2054 | pnfs_do_read(desc, hdr); |
| 1765 | return ret; | 2055 | return ret; |
| 1766 | } | 2056 | } |
| @@ -1966,6 +2256,7 @@ clear_layoutcommitting: | |||
| 1966 | pnfs_clear_layoutcommitting(inode); | 2256 | pnfs_clear_layoutcommitting(inode); |
| 1967 | goto out; | 2257 | goto out; |
| 1968 | } | 2258 | } |
| 2259 | EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); | ||
| 1969 | 2260 | ||
| 1970 | struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) | 2261 | struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) |
| 1971 | { | 2262 | { |
