aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorFred Isaman <iisaman@netapp.com>2011-01-06 06:36:30 -0500
committerTrond Myklebust <Trond.Myklebust@netapp.com>2011-01-06 14:46:32 -0500
commit43f1b3da8b35d706d6c47880fc211d2519b4a587 (patch)
tree67fdbaaab9658cf79b17fa905c62a59d7355d613 /fs
parentf2a625616045fe46e1d5fceebdd825f5acdecdb7 (diff)
pnfs: add CB_LAYOUTRECALL handling
This is the heart of the wave 2 submission. Add the code to trigger drain and forget of any afected layouts. In addition, we set a "barrier", below which any LAYOUTGET reply is ignored. This is to compensate for the fact that we do not wait for outstanding LAYOUTGETs to complete as per section 12.5.5.2.1 of RFC 5661. Signed-off-by: Fred Isaman <iisaman@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/nfs/callback_proc.c119
-rw-r--r--fs/nfs/nfs4_fs.h1
-rw-r--r--fs/nfs/pnfs.c83
-rw-r--r--fs/nfs/pnfs.h11
4 files changed, 194 insertions, 20 deletions
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index c1bb157e94b..6619c05b55a 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -12,6 +12,7 @@
12#include "callback.h" 12#include "callback.h"
13#include "delegation.h" 13#include "delegation.h"
14#include "internal.h" 14#include "internal.h"
15#include "pnfs.h"
15 16
16#ifdef NFS_DEBUG 17#ifdef NFS_DEBUG
17#define NFSDBG_FACILITY NFSDBG_CALLBACK 18#define NFSDBG_FACILITY NFSDBG_CALLBACK
@@ -107,10 +108,126 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
107 108
108#if defined(CONFIG_NFS_V4_1) 109#if defined(CONFIG_NFS_V4_1)
109 110
111static u32 initiate_file_draining(struct nfs_client *clp,
112 struct cb_layoutrecallargs *args)
113{
114 struct pnfs_layout_hdr *lo;
115 struct inode *ino;
116 bool found = false;
117 u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
118 LIST_HEAD(free_me_list);
119
120 spin_lock(&clp->cl_lock);
121 list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
122 if (nfs_compare_fh(&args->cbl_fh,
123 &NFS_I(lo->plh_inode)->fh))
124 continue;
125 ino = igrab(lo->plh_inode);
126 if (!ino)
127 continue;
128 found = true;
129 /* Without this, layout can be freed as soon
130 * as we release cl_lock.
131 */
132 get_layout_hdr(lo);
133 break;
134 }
135 spin_unlock(&clp->cl_lock);
136 if (!found)
137 return NFS4ERR_NOMATCHING_LAYOUT;
138
139 spin_lock(&ino->i_lock);
140 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
141 mark_matching_lsegs_invalid(lo, &free_me_list,
142 args->cbl_range.iomode))
143 rv = NFS4ERR_DELAY;
144 else
145 rv = NFS4ERR_NOMATCHING_LAYOUT;
146 pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
147 spin_unlock(&ino->i_lock);
148 pnfs_free_lseg_list(&free_me_list);
149 put_layout_hdr(lo);
150 iput(ino);
151 return rv;
152}
153
154static u32 initiate_bulk_draining(struct nfs_client *clp,
155 struct cb_layoutrecallargs *args)
156{
157 struct pnfs_layout_hdr *lo;
158 struct inode *ino;
159 u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
160 struct pnfs_layout_hdr *tmp;
161 LIST_HEAD(recall_list);
162 LIST_HEAD(free_me_list);
163 struct pnfs_layout_range range = {
164 .iomode = IOMODE_ANY,
165 .offset = 0,
166 .length = NFS4_MAX_UINT64,
167 };
168
169 spin_lock(&clp->cl_lock);
170 list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
171 if ((args->cbl_recall_type == RETURN_FSID) &&
172 memcmp(&NFS_SERVER(lo->plh_inode)->fsid,
173 &args->cbl_fsid, sizeof(struct nfs_fsid)))
174 continue;
175 if (!igrab(lo->plh_inode))
176 continue;
177 get_layout_hdr(lo);
178 BUG_ON(!list_empty(&lo->plh_bulk_recall));
179 list_add(&lo->plh_bulk_recall, &recall_list);
180 }
181 spin_unlock(&clp->cl_lock);
182 list_for_each_entry_safe(lo, tmp,
183 &recall_list, plh_bulk_recall) {
184 ino = lo->plh_inode;
185 spin_lock(&ino->i_lock);
186 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
187 if (mark_matching_lsegs_invalid(lo, &free_me_list, range.iomode))
188 rv = NFS4ERR_DELAY;
189 list_del_init(&lo->plh_bulk_recall);
190 spin_unlock(&ino->i_lock);
191 put_layout_hdr(lo);
192 iput(ino);
193 }
194 pnfs_free_lseg_list(&free_me_list);
195 return rv;
196}
197
198static u32 do_callback_layoutrecall(struct nfs_client *clp,
199 struct cb_layoutrecallargs *args)
200{
201 u32 res = NFS4ERR_DELAY;
202
203 dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
204 if (test_and_set_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state))
205 goto out;
206 if (args->cbl_recall_type == RETURN_FILE)
207 res = initiate_file_draining(clp, args);
208 else
209 res = initiate_bulk_draining(clp, args);
210 clear_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state);
211out:
212 dprintk("%s returning %i\n", __func__, res);
213 return res;
214
215}
216
110__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args, 217__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
111 void *dummy, struct cb_process_state *cps) 218 void *dummy, struct cb_process_state *cps)
112{ 219{
113 return cpu_to_be32(NFS4ERR_NOTSUPP); /* STUB */ 220 u32 res;
221
222 dprintk("%s: -->\n", __func__);
223
224 if (cps->clp)
225 res = do_callback_layoutrecall(cps->clp, args);
226 else
227 res = NFS4ERR_OP_NOT_IN_SESSION;
228
229 dprintk("%s: exit with status = %d\n", __func__, res);
230 return cpu_to_be32(res);
114} 231}
115 232
116int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) 233int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 3b3829c3098..8f169dc789d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -44,6 +44,7 @@ enum nfs4_client_state {
44 NFS4CLNT_RECLAIM_REBOOT, 44 NFS4CLNT_RECLAIM_REBOOT,
45 NFS4CLNT_RECLAIM_NOGRACE, 45 NFS4CLNT_RECLAIM_NOGRACE,
46 NFS4CLNT_DELEGRETURN, 46 NFS4CLNT_DELEGRETURN,
47 NFS4CLNT_LAYOUTRECALL,
47 NFS4CLNT_SESSION_RESET, 48 NFS4CLNT_SESSION_RESET,
48 NFS4CLNT_RECALL_SLOT, 49 NFS4CLNT_RECALL_SLOT,
49}; 50};
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 32b66468e5d..bf4186b8f2f 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -178,7 +178,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
178 */ 178 */
179 179
180/* Need to hold i_lock if caller does not already hold reference */ 180/* Need to hold i_lock if caller does not already hold reference */
181static void 181void
182get_layout_hdr(struct pnfs_layout_hdr *lo) 182get_layout_hdr(struct pnfs_layout_hdr *lo)
183{ 183{
184 atomic_inc(&lo->plh_refcount); 184 atomic_inc(&lo->plh_refcount);
@@ -254,6 +254,7 @@ put_lseg_locked(struct pnfs_layout_segment *lseg,
254 /* List does not take a reference, so no need for put here */ 254 /* List does not take a reference, so no need for put here */
255 list_del_init(&lseg->pls_layout->plh_layouts); 255 list_del_init(&lseg->pls_layout->plh_layouts);
256 spin_unlock(&clp->cl_lock); 256 spin_unlock(&clp->cl_lock);
257 clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
257 } 258 }
258 list_add(&lseg->pls_list, tmp_list); 259 list_add(&lseg->pls_list, tmp_list);
259 return 1; 260 return 1;
@@ -287,7 +288,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
287/* Returns count of number of matching invalid lsegs remaining in list 288/* Returns count of number of matching invalid lsegs remaining in list
288 * after call. 289 * after call.
289 */ 290 */
290static int 291int
291mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 292mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
292 struct list_head *tmp_list, 293 struct list_head *tmp_list,
293 u32 iomode) 294 u32 iomode)
@@ -310,7 +311,7 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
310 return invalid - removed; 311 return invalid - removed;
311} 312}
312 313
313static void 314void
314pnfs_free_lseg_list(struct list_head *free_me) 315pnfs_free_lseg_list(struct list_head *free_me)
315{ 316{
316 struct pnfs_layout_segment *lseg, *tmp; 317 struct pnfs_layout_segment *lseg, *tmp;
@@ -363,23 +364,45 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
363} 364}
364 365
365/* update lo->plh_stateid with new if is more recent */ 366/* update lo->plh_stateid with new if is more recent */
366static void 367void
367pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, 368pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
368 const nfs4_stateid *new) 369 bool update_barrier)
369{ 370{
370 u32 oldseq, newseq; 371 u32 oldseq, newseq;
371 372
372 oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid); 373 oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid);
373 newseq = be32_to_cpu(new->stateid.seqid); 374 newseq = be32_to_cpu(new->stateid.seqid);
374 if ((int)(newseq - oldseq) > 0) 375 if ((int)(newseq - oldseq) > 0) {
375 memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid)); 376 memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid));
377 if (update_barrier) {
378 u32 new_barrier = be32_to_cpu(new->stateid.seqid);
379
380 if ((int)(new_barrier - lo->plh_barrier))
381 lo->plh_barrier = new_barrier;
382 } else {
383 /* Because of wraparound, we want to keep the barrier
384 * "close" to the current seqids. It needs to be
385 * within 2**31 to count as "behind", so if it
386 * gets too near that limit, give us a litle leeway
387 * and bring it to within 2**30.
388 * NOTE - and yes, this is all unsigned arithmetic.
389 */
390 if (unlikely((newseq - lo->plh_barrier) > (3 << 29)))
391 lo->plh_barrier = newseq - (1 << 30);
392 }
393 }
376} 394}
377 395
378/* lget is set to 1 if called from inside send_layoutget call chain */ 396/* lget is set to 1 if called from inside send_layoutget call chain */
379static bool 397static bool
380pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, int lget) 398pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
381{ 399 int lget)
382 return (list_empty(&lo->plh_segs) && 400{
401 if ((stateid) &&
402 (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
403 return true;
404 return test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
405 (list_empty(&lo->plh_segs) &&
383 (atomic_read(&lo->plh_outstanding) > lget)); 406 (atomic_read(&lo->plh_outstanding) > lget));
384} 407}
385 408
@@ -391,7 +414,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
391 414
392 dprintk("--> %s\n", __func__); 415 dprintk("--> %s\n", __func__);
393 spin_lock(&lo->plh_inode->i_lock); 416 spin_lock(&lo->plh_inode->i_lock);
394 if (pnfs_layoutgets_blocked(lo, 1)) { 417 if (pnfs_layoutgets_blocked(lo, NULL, 1)) {
395 status = -EAGAIN; 418 status = -EAGAIN;
396 } else if (list_empty(&lo->plh_segs)) { 419 } else if (list_empty(&lo->plh_segs)) {
397 int seq; 420 int seq;
@@ -510,6 +533,7 @@ alloc_init_layout_hdr(struct inode *ino)
510 atomic_set(&lo->plh_refcount, 1); 533 atomic_set(&lo->plh_refcount, 1);
511 INIT_LIST_HEAD(&lo->plh_layouts); 534 INIT_LIST_HEAD(&lo->plh_layouts);
512 INIT_LIST_HEAD(&lo->plh_segs); 535 INIT_LIST_HEAD(&lo->plh_segs);
536 INIT_LIST_HEAD(&lo->plh_bulk_recall);
513 lo->plh_inode = ino; 537 lo->plh_inode = ino;
514 return lo; 538 return lo;
515} 539}
@@ -561,7 +585,7 @@ is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
561 * lookup range in layout 585 * lookup range in layout
562 */ 586 */
563static struct pnfs_layout_segment * 587static struct pnfs_layout_segment *
564pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) 588pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
565{ 589{
566 struct pnfs_layout_segment *lseg, *ret = NULL; 590 struct pnfs_layout_segment *lseg, *ret = NULL;
567 591
@@ -606,19 +630,22 @@ pnfs_update_layout(struct inode *ino,
606 goto out_unlock; 630 goto out_unlock;
607 } 631 }
608 632
609 /* Check to see if the layout for the given range already exists */ 633 /* Do we even need to bother with this? */
610 lseg = pnfs_has_layout(lo, iomode); 634 if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
611 if (lseg) { 635 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
612 dprintk("%s: Using cached lseg %p for iomode %d)\n", 636 dprintk("%s matches recall, use MDS\n", __func__);
613 __func__, lseg, iomode);
614 goto out_unlock; 637 goto out_unlock;
615 } 638 }
639 /* Check to see if the layout for the given range already exists */
640 lseg = pnfs_find_lseg(lo, iomode);
641 if (lseg)
642 goto out_unlock;
616 643
617 /* if LAYOUTGET already failed once we don't try again */ 644 /* if LAYOUTGET already failed once we don't try again */
618 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) 645 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
619 goto out_unlock; 646 goto out_unlock;
620 647
621 if (pnfs_layoutgets_blocked(lo, 0)) 648 if (pnfs_layoutgets_blocked(lo, NULL, 0))
622 goto out_unlock; 649 goto out_unlock;
623 atomic_inc(&lo->plh_outstanding); 650 atomic_inc(&lo->plh_outstanding);
624 651
@@ -641,6 +668,7 @@ pnfs_update_layout(struct inode *ino,
641 spin_lock(&clp->cl_lock); 668 spin_lock(&clp->cl_lock);
642 list_del_init(&lo->plh_layouts); 669 list_del_init(&lo->plh_layouts);
643 spin_unlock(&clp->cl_lock); 670 spin_unlock(&clp->cl_lock);
671 clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
644 } 672 }
645 spin_unlock(&ino->i_lock); 673 spin_unlock(&ino->i_lock);
646 } 674 }
@@ -662,6 +690,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
662 struct nfs4_layoutget_res *res = &lgp->res; 690 struct nfs4_layoutget_res *res = &lgp->res;
663 struct pnfs_layout_segment *lseg; 691 struct pnfs_layout_segment *lseg;
664 struct inode *ino = lo->plh_inode; 692 struct inode *ino = lo->plh_inode;
693 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
665 int status = 0; 694 int status = 0;
666 695
667 /* Verify we got what we asked for. 696 /* Verify we got what we asked for.
@@ -688,16 +717,32 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
688 } 717 }
689 718
690 spin_lock(&ino->i_lock); 719 spin_lock(&ino->i_lock);
720 if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
721 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
722 dprintk("%s forget reply due to recall\n", __func__);
723 goto out_forget_reply;
724 }
725
726 if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) {
727 dprintk("%s forget reply due to state\n", __func__);
728 goto out_forget_reply;
729 }
691 init_lseg(lo, lseg); 730 init_lseg(lo, lseg);
692 lseg->pls_range = res->range; 731 lseg->pls_range = res->range;
693 *lgp->lsegpp = lseg; 732 *lgp->lsegpp = lseg;
694 pnfs_insert_layout(lo, lseg); 733 pnfs_insert_layout(lo, lseg);
695 734
696 /* Done processing layoutget. Set the layout stateid */ 735 /* Done processing layoutget. Set the layout stateid */
697 pnfs_set_layout_stateid(lo, &res->stateid); 736 pnfs_set_layout_stateid(lo, &res->stateid, false);
698 spin_unlock(&ino->i_lock); 737 spin_unlock(&ino->i_lock);
699out: 738out:
700 return status; 739 return status;
740
741out_forget_reply:
742 spin_unlock(&ino->i_lock);
743 lseg->pls_layout = lo;
744 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
745 goto out;
701} 746}
702 747
703/* 748/*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 8aaab56b794..f91d0d45551 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -49,6 +49,7 @@ struct pnfs_layout_segment {
49enum { 49enum {
50 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ 50 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
51 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ 51 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
52 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
52 NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ 53 NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */
53}; 54};
54 55
@@ -67,9 +68,11 @@ struct pnfs_layoutdriver_type {
67struct pnfs_layout_hdr { 68struct pnfs_layout_hdr {
68 atomic_t plh_refcount; 69 atomic_t plh_refcount;
69 struct list_head plh_layouts; /* other client layouts */ 70 struct list_head plh_layouts; /* other client layouts */
71 struct list_head plh_bulk_recall; /* clnt list of bulk recalls */
70 struct list_head plh_segs; /* layout segments list */ 72 struct list_head plh_segs; /* layout segments list */
71 nfs4_stateid plh_stateid; 73 nfs4_stateid plh_stateid;
72 atomic_t plh_outstanding; /* number of RPCs out */ 74 atomic_t plh_outstanding; /* number of RPCs out */
75 u32 plh_barrier; /* ignore lower seqids */
73 unsigned long plh_flags; 76 unsigned long plh_flags;
74 struct inode *plh_inode; 77 struct inode *plh_inode;
75}; 78};
@@ -139,18 +142,26 @@ extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
139extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); 142extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
140 143
141/* pnfs.c */ 144/* pnfs.c */
145void get_layout_hdr(struct pnfs_layout_hdr *lo);
142struct pnfs_layout_segment * 146struct pnfs_layout_segment *
143pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 147pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
144 enum pnfs_iomode access_type); 148 enum pnfs_iomode access_type);
145void set_pnfs_layoutdriver(struct nfs_server *, u32 id); 149void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
146void unset_pnfs_layoutdriver(struct nfs_server *); 150void unset_pnfs_layoutdriver(struct nfs_server *);
147int pnfs_layout_process(struct nfs4_layoutget *lgp); 151int pnfs_layout_process(struct nfs4_layoutget *lgp);
152void pnfs_free_lseg_list(struct list_head *tmp_list);
148void pnfs_destroy_layout(struct nfs_inode *); 153void pnfs_destroy_layout(struct nfs_inode *);
149void pnfs_destroy_all_layouts(struct nfs_client *); 154void pnfs_destroy_all_layouts(struct nfs_client *);
150void put_layout_hdr(struct pnfs_layout_hdr *lo); 155void put_layout_hdr(struct pnfs_layout_hdr *lo);
156void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
157 const nfs4_stateid *new,
158 bool update_barrier);
151int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, 159int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
152 struct pnfs_layout_hdr *lo, 160 struct pnfs_layout_hdr *lo,
153 struct nfs4_state *open_state); 161 struct nfs4_state *open_state);
162int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
163 struct list_head *tmp_list,
164 u32 iomode);
154 165
155 166
156static inline int lo_fail_bit(u32 iomode) 167static inline int lo_fail_bit(u32 iomode)