aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFred Isaman <iisaman@netapp.com>2011-01-06 06:36:32 -0500
committerTrond Myklebust <Trond.Myklebust@netapp.com>2011-01-06 14:46:32 -0500
commitf7e8917a67980924651a9e244510e63ef05c7755 (patch)
treefb00db5ef8b3b26793fae7a1186f1cd16205ab9d
parent36840370845629e6cb4324d1dd4aff6778670503 (diff)
pnfs: layout roc code
A layout can request return-on-close. How this interacts with the forgetful model of never sending LAYOUTRETURNS is a bit ambiguous. We forget any layouts marked roc, and wait for them to be completely forgotten before continuing with the close. In addition, to compensate for races with any inflight LAYOUTGETs, and the fact that we do not get any layout stateid back from the server, we set the barrier to the worst case scenario of current_seqid + number of outstanding LAYOUTGETS. Signed-off-by: Fred Isaman <iisaman@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r--fs/nfs/client.c12
-rw-r--r--fs/nfs/nfs4_fs.h2
-rw-r--r--fs/nfs/nfs4proc.c21
-rw-r--r--fs/nfs/nfs4state.c7
-rw-r--r--fs/nfs/pnfs.c86
-rw-r--r--fs/nfs/pnfs.h29
-rw-r--r--include/linux/nfs_fs_sb.h1
7 files changed, 152 insertions, 6 deletions
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 11eb9934c747..684b67771199 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -244,6 +244,11 @@ static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
244 idr_remove(&cb_ident_idr, clp->cl_cb_ident); 244 idr_remove(&cb_ident_idr, clp->cl_cb_ident);
245} 245}
246 246
247static void pnfs_init_server(struct nfs_server *server)
248{
249 rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC");
250}
251
247#else 252#else
248static void nfs4_shutdown_client(struct nfs_client *clp) 253static void nfs4_shutdown_client(struct nfs_client *clp)
249{ 254{
@@ -256,6 +261,11 @@ void nfs_cleanup_cb_ident_idr(void)
256static void nfs_cb_idr_remove_locked(struct nfs_client *clp) 261static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
257{ 262{
258} 263}
264
265static void pnfs_init_server(struct nfs_server *server)
266{
267}
268
259#endif /* CONFIG_NFS_V4 */ 269#endif /* CONFIG_NFS_V4 */
260 270
261/* 271/*
@@ -1024,6 +1034,8 @@ static struct nfs_server *nfs_alloc_server(void)
1024 return NULL; 1034 return NULL;
1025 } 1035 }
1026 1036
1037 pnfs_init_server(server);
1038
1027 return server; 1039 return server;
1028} 1040}
1029 1041
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 8f169dc789db..18d64cb5985b 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -236,7 +236,7 @@ extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
236extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); 236extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
237extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); 237extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
238extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); 238extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
239extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait); 239extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
240extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); 240extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
241extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, 241extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
242 struct nfs4_fs_locations *fs_locations, struct page *page); 242 struct nfs4_fs_locations *fs_locations, struct page *page);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a3549ce72ab2..88f590feeb72 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1839,6 +1839,8 @@ struct nfs4_closedata {
1839 struct nfs_closeres res; 1839 struct nfs_closeres res;
1840 struct nfs_fattr fattr; 1840 struct nfs_fattr fattr;
1841 unsigned long timestamp; 1841 unsigned long timestamp;
1842 bool roc;
1843 u32 roc_barrier;
1842}; 1844};
1843 1845
1844static void nfs4_free_closedata(void *data) 1846static void nfs4_free_closedata(void *data)
@@ -1846,6 +1848,8 @@ static void nfs4_free_closedata(void *data)
1846 struct nfs4_closedata *calldata = data; 1848 struct nfs4_closedata *calldata = data;
1847 struct nfs4_state_owner *sp = calldata->state->owner; 1849 struct nfs4_state_owner *sp = calldata->state->owner;
1848 1850
1851 if (calldata->roc)
1852 pnfs_roc_release(calldata->state->inode);
1849 nfs4_put_open_state(calldata->state); 1853 nfs4_put_open_state(calldata->state);
1850 nfs_free_seqid(calldata->arg.seqid); 1854 nfs_free_seqid(calldata->arg.seqid);
1851 nfs4_put_state_owner(sp); 1855 nfs4_put_state_owner(sp);
@@ -1878,6 +1882,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1878 */ 1882 */
1879 switch (task->tk_status) { 1883 switch (task->tk_status) {
1880 case 0: 1884 case 0:
1885 if (calldata->roc)
1886 pnfs_roc_set_barrier(state->inode,
1887 calldata->roc_barrier);
1881 nfs_set_open_stateid(state, &calldata->res.stateid, 0); 1888 nfs_set_open_stateid(state, &calldata->res.stateid, 0);
1882 renew_lease(server, calldata->timestamp); 1889 renew_lease(server, calldata->timestamp);
1883 nfs4_close_clear_stateid_flags(state, 1890 nfs4_close_clear_stateid_flags(state,
@@ -1930,8 +1937,15 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
1930 return; 1937 return;
1931 } 1938 }
1932 1939
1933 if (calldata->arg.fmode == 0) 1940 if (calldata->arg.fmode == 0) {
1934 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; 1941 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
1942 if (calldata->roc &&
1943 pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) {
1944 rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq,
1945 task, NULL);
1946 return;
1947 }
1948 }
1935 1949
1936 nfs_fattr_init(calldata->res.fattr); 1950 nfs_fattr_init(calldata->res.fattr);
1937 calldata->timestamp = jiffies; 1951 calldata->timestamp = jiffies;
@@ -1959,7 +1973,7 @@ static const struct rpc_call_ops nfs4_close_ops = {
1959 * 1973 *
1960 * NOTE: Caller must be holding the sp->so_owner semaphore! 1974 * NOTE: Caller must be holding the sp->so_owner semaphore!
1961 */ 1975 */
1962int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait) 1976int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc)
1963{ 1977{
1964 struct nfs_server *server = NFS_SERVER(state->inode); 1978 struct nfs_server *server = NFS_SERVER(state->inode);
1965 struct nfs4_closedata *calldata; 1979 struct nfs4_closedata *calldata;
@@ -1994,6 +2008,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
1994 calldata->res.fattr = &calldata->fattr; 2008 calldata->res.fattr = &calldata->fattr;
1995 calldata->res.seqid = calldata->arg.seqid; 2009 calldata->res.seqid = calldata->arg.seqid;
1996 calldata->res.server = server; 2010 calldata->res.server = server;
2011 calldata->roc = roc;
1997 path_get(path); 2012 path_get(path);
1998 calldata->path = *path; 2013 calldata->path = *path;
1999 2014
@@ -2011,6 +2026,8 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
2011out_free_calldata: 2026out_free_calldata:
2012 kfree(calldata); 2027 kfree(calldata);
2013out: 2028out:
2029 if (roc)
2030 pnfs_roc_release(state->inode);
2014 nfs4_put_open_state(state); 2031 nfs4_put_open_state(state);
2015 nfs4_put_state_owner(sp); 2032 nfs4_put_state_owner(sp);
2016 return status; 2033 return status;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 6891dedd80f1..286084f148e3 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -606,8 +606,11 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state,
606 if (!call_close) { 606 if (!call_close) {
607 nfs4_put_open_state(state); 607 nfs4_put_open_state(state);
608 nfs4_put_state_owner(owner); 608 nfs4_put_state_owner(owner);
609 } else 609 } else {
610 nfs4_do_close(path, state, gfp_mask, wait); 610 bool roc = pnfs_roc(state->inode);
611
612 nfs4_do_close(path, state, gfp_mask, wait, roc);
613 }
611} 614}
612 615
613void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode) 616void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index bf4186b8f2fc..bc4089769735 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -256,6 +256,7 @@ put_lseg_locked(struct pnfs_layout_segment *lseg,
256 spin_unlock(&clp->cl_lock); 256 spin_unlock(&clp->cl_lock);
257 clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags); 257 clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
258 } 258 }
259 rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq);
259 list_add(&lseg->pls_list, tmp_list); 260 list_add(&lseg->pls_list, tmp_list);
260 return 1; 261 return 1;
261 } 262 }
@@ -401,7 +402,8 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
401 if ((stateid) && 402 if ((stateid) &&
402 (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0) 403 (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
403 return true; 404 return true;
404 return test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || 405 return lo->plh_block_lgets ||
406 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
405 (list_empty(&lo->plh_segs) && 407 (list_empty(&lo->plh_segs) &&
406 (atomic_read(&lo->plh_outstanding) > lget)); 408 (atomic_read(&lo->plh_outstanding) > lget));
407} 409}
@@ -474,6 +476,83 @@ send_layoutget(struct pnfs_layout_hdr *lo,
474 return lseg; 476 return lseg;
475} 477}
476 478
479bool pnfs_roc(struct inode *ino)
480{
481 struct pnfs_layout_hdr *lo;
482 struct pnfs_layout_segment *lseg, *tmp;
483 LIST_HEAD(tmp_list);
484 bool found = false;
485
486 spin_lock(&ino->i_lock);
487 lo = NFS_I(ino)->layout;
488 if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) ||
489 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
490 goto out_nolayout;
491 list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
492 if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
493 mark_lseg_invalid(lseg, &tmp_list);
494 found = true;
495 }
496 if (!found)
497 goto out_nolayout;
498 lo->plh_block_lgets++;
499 get_layout_hdr(lo); /* matched in pnfs_roc_release */
500 spin_unlock(&ino->i_lock);
501 pnfs_free_lseg_list(&tmp_list);
502 return true;
503
504out_nolayout:
505 spin_unlock(&ino->i_lock);
506 return false;
507}
508
509void pnfs_roc_release(struct inode *ino)
510{
511 struct pnfs_layout_hdr *lo;
512
513 spin_lock(&ino->i_lock);
514 lo = NFS_I(ino)->layout;
515 lo->plh_block_lgets--;
516 put_layout_hdr_locked(lo);
517 spin_unlock(&ino->i_lock);
518}
519
520void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
521{
522 struct pnfs_layout_hdr *lo;
523
524 spin_lock(&ino->i_lock);
525 lo = NFS_I(ino)->layout;
526 if ((int)(barrier - lo->plh_barrier) > 0)
527 lo->plh_barrier = barrier;
528 spin_unlock(&ino->i_lock);
529}
530
531bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
532{
533 struct nfs_inode *nfsi = NFS_I(ino);
534 struct pnfs_layout_segment *lseg;
535 bool found = false;
536
537 spin_lock(&ino->i_lock);
538 list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)
539 if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
540 found = true;
541 break;
542 }
543 if (!found) {
544 struct pnfs_layout_hdr *lo = nfsi->layout;
545 u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid);
546
547 /* Since close does not return a layout stateid for use as
548 * a barrier, we choose the worst-case barrier.
549 */
550 *barrier = current_seqid + atomic_read(&lo->plh_outstanding);
551 }
552 spin_unlock(&ino->i_lock);
553 return found;
554}
555
477/* 556/*
478 * Compare two layout segments for sorting into layout cache. 557 * Compare two layout segments for sorting into layout cache.
479 * We want to preferentially return RW over RO layouts, so ensure those 558 * We want to preferentially return RW over RO layouts, so ensure those
@@ -732,6 +811,11 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
732 *lgp->lsegpp = lseg; 811 *lgp->lsegpp = lseg;
733 pnfs_insert_layout(lo, lseg); 812 pnfs_insert_layout(lo, lseg);
734 813
814 if (res->return_on_close) {
815 set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
816 set_bit(NFS_LAYOUT_ROC, &lo->plh_flags);
817 }
818
735 /* Done processing layoutget. Set the layout stateid */ 819 /* Done processing layoutget. Set the layout stateid */
736 pnfs_set_layout_stateid(lo, &res->stateid, false); 820 pnfs_set_layout_stateid(lo, &res->stateid, false);
737 spin_unlock(&ino->i_lock); 821 spin_unlock(&ino->i_lock);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index f91d0d45551c..e2612ea0cbed 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -32,6 +32,7 @@
32 32
33enum { 33enum {
34 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ 34 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
35 NFS_LSEG_ROC, /* roc bit received from server */
35}; 36};
36 37
37struct pnfs_layout_segment { 38struct pnfs_layout_segment {
@@ -50,6 +51,7 @@ enum {
50 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ 51 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
51 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ 52 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
52 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ 53 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
54 NFS_LAYOUT_ROC, /* some lseg had roc bit set */
53 NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ 55 NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */
54}; 56};
55 57
@@ -72,6 +74,7 @@ struct pnfs_layout_hdr {
72 struct list_head plh_segs; /* layout segments list */ 74 struct list_head plh_segs; /* layout segments list */
73 nfs4_stateid plh_stateid; 75 nfs4_stateid plh_stateid;
74 atomic_t plh_outstanding; /* number of RPCs out */ 76 atomic_t plh_outstanding; /* number of RPCs out */
77 unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */
75 u32 plh_barrier; /* ignore lower seqids */ 78 u32 plh_barrier; /* ignore lower seqids */
76 unsigned long plh_flags; 79 unsigned long plh_flags;
77 struct inode *plh_inode; 80 struct inode *plh_inode;
@@ -162,6 +165,10 @@ int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
162int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 165int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
163 struct list_head *tmp_list, 166 struct list_head *tmp_list,
164 u32 iomode); 167 u32 iomode);
168bool pnfs_roc(struct inode *ino);
169void pnfs_roc_release(struct inode *ino);
170void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
171bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
165 172
166 173
167static inline int lo_fail_bit(u32 iomode) 174static inline int lo_fail_bit(u32 iomode)
@@ -193,6 +200,28 @@ pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
193 return NULL; 200 return NULL;
194} 201}
195 202
203static inline bool
204pnfs_roc(struct inode *ino)
205{
206 return false;
207}
208
209static inline void
210pnfs_roc_release(struct inode *ino)
211{
212}
213
214static inline void
215pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
216{
217}
218
219static inline bool
220pnfs_roc_drain(struct inode *ino, u32 *barrier)
221{
222 return false;
223}
224
196static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id) 225static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id)
197{ 226{
198} 227}
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index e93ada0565fc..7f20c0b47a91 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -149,6 +149,7 @@ struct nfs_server {
149 that are supported on this 149 that are supported on this
150 filesystem */ 150 filesystem */
151 struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */ 151 struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */
152 struct rpc_wait_queue roc_rpcwaitq;
152#endif 153#endif
153 void (*destroy)(struct nfs_server *); 154 void (*destroy)(struct nfs_server *);
154 155