summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-02-21 12:23:01 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-21 12:23:01 -0500
commit2171ee8f43968e8d6a2b4712d495e352e881c446 (patch)
tree354078e804c9d721b246c99f1c272d85b92acfb1 /fs
parent9b9a72a8a370a1397fbb153d107e0d9fa8affb48 (diff)
parent666b3d803a511fbc9bc5e5ea8ce66010cf03ea13 (diff)
Merge tag 'nfs-for-3.9-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client bugfixes from Trond Myklebust: - Fix an Oops in the pNFS layoutget code - Fix a number of NFSv4 and v4.1 state recovery deadlocks and hangs due to the interaction of the session drain lock and state management locks. - Remove task->tk_xprt, which was hiding a lot of RCU dereferencing bugs - Fix a long standing NFSv3 posix lock recovery bug. - Revert commit 324d003b0cd8 ("NFS: add nfs_sb_deactive_async to avoid deadlock"). It turned out that the root cause of the deadlock was due to interactions with the workqueues that have now been resolved. * tag 'nfs-for-3.9-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (22 commits) NLM: Ensure that we resend all pending blocking locks after a reclaim umount oops when remove blocklayoutdriver first sunrpc: silence build warning in gss_fill_context nfs: remove kfree() redundant null checks NFSv4.1: Don't decode skipped layoutgets NFSv4.1: Fix bulk recall and destroy of layouts NFSv4.1: Fix an ABBA locking issue with session and state serialisation NFSv4: Fix a reboot recovery race when opening a file NFSv4: Ensure delegation recall and byte range lock removal don't conflict NFSv4: Fix up the return values of nfs4_open_delegation_recall NFSv4.1: Don't lose locks when a server reboots during delegation return NFSv4.1: Prevent deadlocks between state recovery and file locking NFSv4: Allow the state manager to mark an open_owner as being recovered SUNRPC: Add missing static declaration to _gss_mech_get_by_name Revert "NFS: add nfs_sb_deactive_async to avoid deadlock" SUNRPC: Nuke the tk_xprt macro SUNRPC: Avoid RCU dereferences in the transport bind and connect code SUNRPC: Fix an RCU dereference in xprt_reserve SUNRPC: Pass pointers to struct rpc_xprt to the congestion window SUNRPC: Fix an RCU dereference in xs_local_rpcbind ...
Diffstat (limited to 'fs')
-rw-r--r--fs/lockd/clntproc.c3
-rw-r--r--fs/nfs/blocklayout/blocklayout.c1
-rw-r--r--fs/nfs/callback_proc.c61
-rw-r--r--fs/nfs/delegation.c154
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/getroot.c3
-rw-r--r--fs/nfs/inode.c5
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/nfs4_fs.h4
-rw-r--r--fs/nfs/nfs4proc.c133
-rw-r--r--fs/nfs/nfs4state.c11
-rw-r--r--fs/nfs/objlayout/objio_osd.c1
-rw-r--r--fs/nfs/pnfs.c150
-rw-r--r--fs/nfs/pnfs.h7
-rw-r--r--fs/nfs/super.c49
-rw-r--r--fs/nfs/unlink.c5
16 files changed, 370 insertions, 219 deletions
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 54f9e6ce0430..52e5120bb159 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -550,6 +550,9 @@ again:
550 status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT); 550 status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
551 if (status < 0) 551 if (status < 0)
552 break; 552 break;
553 /* Resend the blocking lock request after a server reboot */
554 if (resp->status == nlm_lck_denied_grace_period)
555 continue;
553 if (resp->status != nlm_lck_blocked) 556 if (resp->status != nlm_lck_blocked)
554 break; 557 break;
555 } 558 }
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 4fa788c93f46..434b93ec0970 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -1273,6 +1273,7 @@ static const struct nfs_pageio_ops bl_pg_write_ops = {
1273static struct pnfs_layoutdriver_type blocklayout_type = { 1273static struct pnfs_layoutdriver_type blocklayout_type = {
1274 .id = LAYOUT_BLOCK_VOLUME, 1274 .id = LAYOUT_BLOCK_VOLUME,
1275 .name = "LAYOUT_BLOCK_VOLUME", 1275 .name = "LAYOUT_BLOCK_VOLUME",
1276 .owner = THIS_MODULE,
1276 .read_pagelist = bl_read_pagelist, 1277 .read_pagelist = bl_read_pagelist,
1277 .write_pagelist = bl_write_pagelist, 1278 .write_pagelist = bl_write_pagelist,
1278 .alloc_layout_hdr = bl_alloc_layout_hdr, 1279 .alloc_layout_hdr = bl_alloc_layout_hdr,
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 264d1aa935f2..2960512792c2 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -183,60 +183,15 @@ static u32 initiate_file_draining(struct nfs_client *clp,
183static u32 initiate_bulk_draining(struct nfs_client *clp, 183static u32 initiate_bulk_draining(struct nfs_client *clp,
184 struct cb_layoutrecallargs *args) 184 struct cb_layoutrecallargs *args)
185{ 185{
186 struct nfs_server *server; 186 int stat;
187 struct pnfs_layout_hdr *lo;
188 struct inode *ino;
189 u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
190 struct pnfs_layout_hdr *tmp;
191 LIST_HEAD(recall_list);
192 LIST_HEAD(free_me_list);
193 struct pnfs_layout_range range = {
194 .iomode = IOMODE_ANY,
195 .offset = 0,
196 .length = NFS4_MAX_UINT64,
197 };
198
199 spin_lock(&clp->cl_lock);
200 rcu_read_lock();
201 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
202 if ((args->cbl_recall_type == RETURN_FSID) &&
203 memcmp(&server->fsid, &args->cbl_fsid,
204 sizeof(struct nfs_fsid)))
205 continue;
206 187
207 list_for_each_entry(lo, &server->layouts, plh_layouts) { 188 if (args->cbl_recall_type == RETURN_FSID)
208 ino = igrab(lo->plh_inode); 189 stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true);
209 if (!ino) 190 else
210 continue; 191 stat = pnfs_destroy_layouts_byclid(clp, true);
211 spin_lock(&ino->i_lock); 192 if (stat != 0)
212 /* Is this layout in the process of being freed? */ 193 return NFS4ERR_DELAY;
213 if (NFS_I(ino)->layout != lo) { 194 return NFS4ERR_NOMATCHING_LAYOUT;
214 spin_unlock(&ino->i_lock);
215 iput(ino);
216 continue;
217 }
218 pnfs_get_layout_hdr(lo);
219 spin_unlock(&ino->i_lock);
220 list_add(&lo->plh_bulk_recall, &recall_list);
221 }
222 }
223 rcu_read_unlock();
224 spin_unlock(&clp->cl_lock);
225
226 list_for_each_entry_safe(lo, tmp,
227 &recall_list, plh_bulk_recall) {
228 ino = lo->plh_inode;
229 spin_lock(&ino->i_lock);
230 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
231 if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, &range))
232 rv = NFS4ERR_DELAY;
233 list_del_init(&lo->plh_bulk_recall);
234 spin_unlock(&ino->i_lock);
235 pnfs_free_lseg_list(&free_me_list);
236 pnfs_put_layout_hdr(lo);
237 iput(ino);
238 }
239 return rv;
240} 195}
241 196
242static u32 do_callback_layoutrecall(struct nfs_client *clp, 197static u32 do_callback_layoutrecall(struct nfs_client *clp,
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 81c5eec3cf38..6390a4b5fee7 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -55,7 +55,8 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags)
55 flags &= FMODE_READ|FMODE_WRITE; 55 flags &= FMODE_READ|FMODE_WRITE;
56 rcu_read_lock(); 56 rcu_read_lock();
57 delegation = rcu_dereference(NFS_I(inode)->delegation); 57 delegation = rcu_dereference(NFS_I(inode)->delegation);
58 if (delegation != NULL && (delegation->type & flags) == flags) { 58 if (delegation != NULL && (delegation->type & flags) == flags &&
59 !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
59 nfs_mark_delegation_referenced(delegation); 60 nfs_mark_delegation_referenced(delegation);
60 ret = 1; 61 ret = 1;
61 } 62 }
@@ -70,8 +71,10 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
70 int status = 0; 71 int status = 0;
71 72
72 if (inode->i_flock == NULL) 73 if (inode->i_flock == NULL)
73 goto out; 74 return 0;
74 75
76 if (inode->i_flock == NULL)
77 goto out;
75 /* Protect inode->i_flock using the file locks lock */ 78 /* Protect inode->i_flock using the file locks lock */
76 lock_flocks(); 79 lock_flocks();
77 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 80 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
@@ -94,7 +97,9 @@ static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *s
94{ 97{
95 struct nfs_inode *nfsi = NFS_I(inode); 98 struct nfs_inode *nfsi = NFS_I(inode);
96 struct nfs_open_context *ctx; 99 struct nfs_open_context *ctx;
100 struct nfs4_state_owner *sp;
97 struct nfs4_state *state; 101 struct nfs4_state *state;
102 unsigned int seq;
98 int err; 103 int err;
99 104
100again: 105again:
@@ -109,9 +114,16 @@ again:
109 continue; 114 continue;
110 get_nfs_open_context(ctx); 115 get_nfs_open_context(ctx);
111 spin_unlock(&inode->i_lock); 116 spin_unlock(&inode->i_lock);
117 sp = state->owner;
118 /* Block nfs4_proc_unlck */
119 mutex_lock(&sp->so_delegreturn_mutex);
120 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
112 err = nfs4_open_delegation_recall(ctx, state, stateid); 121 err = nfs4_open_delegation_recall(ctx, state, stateid);
113 if (err >= 0) 122 if (!err)
114 err = nfs_delegation_claim_locks(ctx, state); 123 err = nfs_delegation_claim_locks(ctx, state);
124 if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
125 err = -EAGAIN;
126 mutex_unlock(&sp->so_delegreturn_mutex);
115 put_nfs_open_context(ctx); 127 put_nfs_open_context(ctx);
116 if (err != 0) 128 if (err != 0)
117 return err; 129 return err;
@@ -182,39 +194,91 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
182} 194}
183 195
184static struct nfs_delegation * 196static struct nfs_delegation *
197nfs_start_delegation_return_locked(struct nfs_inode *nfsi)
198{
199 struct nfs_delegation *ret = NULL;
200 struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
201
202 if (delegation == NULL)
203 goto out;
204 spin_lock(&delegation->lock);
205 if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
206 ret = delegation;
207 spin_unlock(&delegation->lock);
208out:
209 return ret;
210}
211
212static struct nfs_delegation *
213nfs_start_delegation_return(struct nfs_inode *nfsi)
214{
215 struct nfs_delegation *delegation;
216
217 rcu_read_lock();
218 delegation = nfs_start_delegation_return_locked(nfsi);
219 rcu_read_unlock();
220 return delegation;
221}
222
223static void
224nfs_abort_delegation_return(struct nfs_delegation *delegation,
225 struct nfs_client *clp)
226{
227
228 spin_lock(&delegation->lock);
229 clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
230 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
231 spin_unlock(&delegation->lock);
232 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
233}
234
235static struct nfs_delegation *
185nfs_detach_delegation_locked(struct nfs_inode *nfsi, 236nfs_detach_delegation_locked(struct nfs_inode *nfsi,
186 struct nfs_server *server) 237 struct nfs_delegation *delegation,
238 struct nfs_client *clp)
187{ 239{
188 struct nfs_delegation *delegation = 240 struct nfs_delegation *deleg_cur =
189 rcu_dereference_protected(nfsi->delegation, 241 rcu_dereference_protected(nfsi->delegation,
190 lockdep_is_held(&server->nfs_client->cl_lock)); 242 lockdep_is_held(&clp->cl_lock));
191 243
192 if (delegation == NULL) 244 if (deleg_cur == NULL || delegation != deleg_cur)
193 goto nomatch; 245 return NULL;
194 246
195 spin_lock(&delegation->lock); 247 spin_lock(&delegation->lock);
248 set_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
196 list_del_rcu(&delegation->super_list); 249 list_del_rcu(&delegation->super_list);
197 delegation->inode = NULL; 250 delegation->inode = NULL;
198 nfsi->delegation_state = 0; 251 nfsi->delegation_state = 0;
199 rcu_assign_pointer(nfsi->delegation, NULL); 252 rcu_assign_pointer(nfsi->delegation, NULL);
200 spin_unlock(&delegation->lock); 253 spin_unlock(&delegation->lock);
201 return delegation; 254 return delegation;
202nomatch:
203 return NULL;
204} 255}
205 256
206static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi, 257static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi,
207 struct nfs_server *server) 258 struct nfs_delegation *delegation,
259 struct nfs_server *server)
208{ 260{
209 struct nfs_client *clp = server->nfs_client; 261 struct nfs_client *clp = server->nfs_client;
210 struct nfs_delegation *delegation;
211 262
212 spin_lock(&clp->cl_lock); 263 spin_lock(&clp->cl_lock);
213 delegation = nfs_detach_delegation_locked(nfsi, server); 264 delegation = nfs_detach_delegation_locked(nfsi, delegation, clp);
214 spin_unlock(&clp->cl_lock); 265 spin_unlock(&clp->cl_lock);
215 return delegation; 266 return delegation;
216} 267}
217 268
269static struct nfs_delegation *
270nfs_inode_detach_delegation(struct inode *inode)
271{
272 struct nfs_inode *nfsi = NFS_I(inode);
273 struct nfs_server *server = NFS_SERVER(inode);
274 struct nfs_delegation *delegation;
275
276 delegation = nfs_start_delegation_return(nfsi);
277 if (delegation == NULL)
278 return NULL;
279 return nfs_detach_delegation(nfsi, delegation, server);
280}
281
218/** 282/**
219 * nfs_inode_set_delegation - set up a delegation on an inode 283 * nfs_inode_set_delegation - set up a delegation on an inode
220 * @inode: inode to which delegation applies 284 * @inode: inode to which delegation applies
@@ -268,7 +332,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
268 delegation = NULL; 332 delegation = NULL;
269 goto out; 333 goto out;
270 } 334 }
271 freeme = nfs_detach_delegation_locked(nfsi, server); 335 freeme = nfs_detach_delegation_locked(nfsi,
336 old_delegation, clp);
337 if (freeme == NULL)
338 goto out;
272 } 339 }
273 list_add_rcu(&delegation->super_list, &server->delegations); 340 list_add_rcu(&delegation->super_list, &server->delegations);
274 nfsi->delegation_state = delegation->type; 341 nfsi->delegation_state = delegation->type;
@@ -292,19 +359,29 @@ out:
292/* 359/*
293 * Basic procedure for returning a delegation to the server 360 * Basic procedure for returning a delegation to the server
294 */ 361 */
295static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) 362static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync)
296{ 363{
364 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
297 struct nfs_inode *nfsi = NFS_I(inode); 365 struct nfs_inode *nfsi = NFS_I(inode);
298 int err; 366 int err;
299 367
300 /* 368 if (delegation == NULL)
301 * Guard against new delegated open/lock/unlock calls and against 369 return 0;
302 * state recovery 370 do {
303 */ 371 err = nfs_delegation_claim_opens(inode, &delegation->stateid);
304 down_write(&nfsi->rwsem); 372 if (!issync || err != -EAGAIN)
305 err = nfs_delegation_claim_opens(inode, &delegation->stateid); 373 break;
306 up_write(&nfsi->rwsem); 374 /*
307 if (err) 375 * Guard against state recovery
376 */
377 err = nfs4_wait_clnt_recover(clp);
378 } while (err == 0);
379
380 if (err) {
381 nfs_abort_delegation_return(delegation, clp);
382 goto out;
383 }
384 if (!nfs_detach_delegation(nfsi, delegation, NFS_SERVER(inode)))
308 goto out; 385 goto out;
309 386
310 err = nfs_do_return_delegation(inode, delegation, issync); 387 err = nfs_do_return_delegation(inode, delegation, issync);
@@ -340,13 +417,10 @@ restart:
340 inode = nfs_delegation_grab_inode(delegation); 417 inode = nfs_delegation_grab_inode(delegation);
341 if (inode == NULL) 418 if (inode == NULL)
342 continue; 419 continue;
343 delegation = nfs_detach_delegation(NFS_I(inode), 420 delegation = nfs_start_delegation_return_locked(NFS_I(inode));
344 server);
345 rcu_read_unlock(); 421 rcu_read_unlock();
346 422
347 if (delegation != NULL) 423 err = nfs_end_delegation_return(inode, delegation, 0);
348 err = __nfs_inode_return_delegation(inode,
349 delegation, 0);
350 iput(inode); 424 iput(inode);
351 if (!err) 425 if (!err)
352 goto restart; 426 goto restart;
@@ -367,15 +441,11 @@ restart:
367 */ 441 */
368void nfs_inode_return_delegation_noreclaim(struct inode *inode) 442void nfs_inode_return_delegation_noreclaim(struct inode *inode)
369{ 443{
370 struct nfs_server *server = NFS_SERVER(inode);
371 struct nfs_inode *nfsi = NFS_I(inode);
372 struct nfs_delegation *delegation; 444 struct nfs_delegation *delegation;
373 445
374 if (rcu_access_pointer(nfsi->delegation) != NULL) { 446 delegation = nfs_inode_detach_delegation(inode);
375 delegation = nfs_detach_delegation(nfsi, server); 447 if (delegation != NULL)
376 if (delegation != NULL) 448 nfs_do_return_delegation(inode, delegation, 0);
377 nfs_do_return_delegation(inode, delegation, 0);
378 }
379} 449}
380 450
381/** 451/**
@@ -390,18 +460,14 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
390 */ 460 */
391int nfs4_inode_return_delegation(struct inode *inode) 461int nfs4_inode_return_delegation(struct inode *inode)
392{ 462{
393 struct nfs_server *server = NFS_SERVER(inode);
394 struct nfs_inode *nfsi = NFS_I(inode); 463 struct nfs_inode *nfsi = NFS_I(inode);
395 struct nfs_delegation *delegation; 464 struct nfs_delegation *delegation;
396 int err = 0; 465 int err = 0;
397 466
398 nfs_wb_all(inode); 467 nfs_wb_all(inode);
399 if (rcu_access_pointer(nfsi->delegation) != NULL) { 468 delegation = nfs_start_delegation_return(nfsi);
400 delegation = nfs_detach_delegation(nfsi, server); 469 if (delegation != NULL)
401 if (delegation != NULL) { 470 err = nfs_end_delegation_return(inode, delegation, 1);
402 err = __nfs_inode_return_delegation(inode, delegation, 1);
403 }
404 }
405 return err; 471 return err;
406} 472}
407 473
@@ -471,7 +537,7 @@ void nfs_remove_bad_delegation(struct inode *inode)
471{ 537{
472 struct nfs_delegation *delegation; 538 struct nfs_delegation *delegation;
473 539
474 delegation = nfs_detach_delegation(NFS_I(inode), NFS_SERVER(inode)); 540 delegation = nfs_inode_detach_delegation(inode);
475 if (delegation) { 541 if (delegation) {
476 nfs_inode_find_state_and_recover(inode, &delegation->stateid); 542 nfs_inode_find_state_and_recover(inode, &delegation->stateid);
477 nfs_free_delegation(delegation); 543 nfs_free_delegation(delegation);
@@ -649,7 +715,7 @@ restart:
649 if (inode == NULL) 715 if (inode == NULL)
650 continue; 716 continue;
651 delegation = nfs_detach_delegation(NFS_I(inode), 717 delegation = nfs_detach_delegation(NFS_I(inode),
652 server); 718 delegation, server);
653 rcu_read_unlock(); 719 rcu_read_unlock();
654 720
655 if (delegation != NULL) 721 if (delegation != NULL)
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index bbc6a4dba0d8..d54d4fca6793 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -29,6 +29,7 @@ enum {
29 NFS_DELEGATION_NEED_RECLAIM = 0, 29 NFS_DELEGATION_NEED_RECLAIM = 0,
30 NFS_DELEGATION_RETURN, 30 NFS_DELEGATION_RETURN,
31 NFS_DELEGATION_REFERENCED, 31 NFS_DELEGATION_REFERENCED,
32 NFS_DELEGATION_RETURNING,
32}; 33};
33 34
34int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 35int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 033803c36644..44efaa8c5f78 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -126,8 +126,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
126 } 126 }
127 spin_unlock(&ret->d_lock); 127 spin_unlock(&ret->d_lock);
128out: 128out:
129 if (name) 129 kfree(name);
130 kfree(name);
131 nfs_free_fattr(fsinfo.fattr); 130 nfs_free_fattr(fsinfo.fattr);
132 return ret; 131 return ret;
133} 132}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index ebeb94ce1b0b..6acc73c80d7f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -694,10 +694,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
694 if (ctx->cred != NULL) 694 if (ctx->cred != NULL)
695 put_rpccred(ctx->cred); 695 put_rpccred(ctx->cred);
696 dput(ctx->dentry); 696 dput(ctx->dentry);
697 if (is_sync) 697 nfs_sb_deactive(sb);
698 nfs_sb_deactive(sb);
699 else
700 nfs_sb_deactive_async(sb);
701 kfree(ctx->mdsthreshold); 698 kfree(ctx->mdsthreshold);
702 kfree(ctx); 699 kfree(ctx);
703} 700}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index f0e6c7df1a07..541c9ebdbc5a 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -329,7 +329,6 @@ extern int __init register_nfs_fs(void);
329extern void __exit unregister_nfs_fs(void); 329extern void __exit unregister_nfs_fs(void);
330extern void nfs_sb_active(struct super_block *sb); 330extern void nfs_sb_active(struct super_block *sb);
331extern void nfs_sb_deactive(struct super_block *sb); 331extern void nfs_sb_deactive(struct super_block *sb);
332extern void nfs_sb_deactive_async(struct super_block *sb);
333 332
334/* namespace.c */ 333/* namespace.c */
335#define NFS_PATH_CANONICAL 1 334#define NFS_PATH_CANONICAL 1
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index a3f488b074a2..944c9a5c1039 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -13,6 +13,8 @@
13 13
14#define NFS4_MAX_LOOP_ON_RECOVER (10) 14#define NFS4_MAX_LOOP_ON_RECOVER (10)
15 15
16#include <linux/seqlock.h>
17
16struct idmap; 18struct idmap;
17 19
18enum nfs4_client_state { 20enum nfs4_client_state {
@@ -90,6 +92,8 @@ struct nfs4_state_owner {
90 unsigned long so_flags; 92 unsigned long so_flags;
91 struct list_head so_states; 93 struct list_head so_states;
92 struct nfs_seqid_counter so_seqid; 94 struct nfs_seqid_counter so_seqid;
95 seqcount_t so_reclaim_seqcount;
96 struct mutex so_delegreturn_mutex;
93}; 97};
94 98
95enum { 99enum {
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cf747ef86650..eae83bf96c6d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -896,6 +896,8 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode)
896 return 0; 896 return 0;
897 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags)) 897 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
898 return 0; 898 return 0;
899 if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
900 return 0;
899 nfs_mark_delegation_referenced(delegation); 901 nfs_mark_delegation_referenced(delegation);
900 return 1; 902 return 1;
901} 903}
@@ -973,6 +975,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat
973 975
974 spin_lock(&deleg_cur->lock); 976 spin_lock(&deleg_cur->lock);
975 if (nfsi->delegation != deleg_cur || 977 if (nfsi->delegation != deleg_cur ||
978 test_bit(NFS_DELEGATION_RETURNING, &deleg_cur->flags) ||
976 (deleg_cur->type & fmode) != fmode) 979 (deleg_cur->type & fmode) != fmode)
977 goto no_delegation_unlock; 980 goto no_delegation_unlock;
978 981
@@ -1352,19 +1355,18 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1352 case -NFS4ERR_BAD_HIGH_SLOT: 1355 case -NFS4ERR_BAD_HIGH_SLOT:
1353 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1356 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1354 case -NFS4ERR_DEADSESSION: 1357 case -NFS4ERR_DEADSESSION:
1358 set_bit(NFS_DELEGATED_STATE, &state->flags);
1355 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); 1359 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
1360 err = -EAGAIN;
1356 goto out; 1361 goto out;
1357 case -NFS4ERR_STALE_CLIENTID: 1362 case -NFS4ERR_STALE_CLIENTID:
1358 case -NFS4ERR_STALE_STATEID: 1363 case -NFS4ERR_STALE_STATEID:
1364 set_bit(NFS_DELEGATED_STATE, &state->flags);
1359 case -NFS4ERR_EXPIRED: 1365 case -NFS4ERR_EXPIRED:
1360 /* Don't recall a delegation if it was lost */ 1366 /* Don't recall a delegation if it was lost */
1361 nfs4_schedule_lease_recovery(server->nfs_client); 1367 nfs4_schedule_lease_recovery(server->nfs_client);
1368 err = -EAGAIN;
1362 goto out; 1369 goto out;
1363 case -ERESTARTSYS:
1364 /*
1365 * The show must go on: exit, but mark the
1366 * stateid as needing recovery.
1367 */
1368 case -NFS4ERR_DELEG_REVOKED: 1370 case -NFS4ERR_DELEG_REVOKED:
1369 case -NFS4ERR_ADMIN_REVOKED: 1371 case -NFS4ERR_ADMIN_REVOKED:
1370 case -NFS4ERR_BAD_STATEID: 1372 case -NFS4ERR_BAD_STATEID:
@@ -1375,6 +1377,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1375 err = 0; 1377 err = 0;
1376 goto out; 1378 goto out;
1377 } 1379 }
1380 set_bit(NFS_DELEGATED_STATE, &state->flags);
1378 err = nfs4_handle_exception(server, err, &exception); 1381 err = nfs4_handle_exception(server, err, &exception);
1379 } while (exception.retry); 1382 } while (exception.retry);
1380out: 1383out:
@@ -1463,7 +1466,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1463 struct nfs4_state_owner *sp = data->owner; 1466 struct nfs4_state_owner *sp = data->owner;
1464 1467
1465 if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) 1468 if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
1466 return; 1469 goto out_wait;
1467 /* 1470 /*
1468 * Check if we still need to send an OPEN call, or if we can use 1471 * Check if we still need to send an OPEN call, or if we can use
1469 * a delegation instead. 1472 * a delegation instead.
@@ -1498,6 +1501,7 @@ unlock_no_action:
1498 rcu_read_unlock(); 1501 rcu_read_unlock();
1499out_no_action: 1502out_no_action:
1500 task->tk_action = NULL; 1503 task->tk_action = NULL;
1504out_wait:
1501 nfs4_sequence_done(task, &data->o_res.seq_res); 1505 nfs4_sequence_done(task, &data->o_res.seq_res);
1502} 1506}
1503 1507
@@ -1845,6 +1849,43 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
1845 sattr->ia_valid |= ATTR_MTIME; 1849 sattr->ia_valid |= ATTR_MTIME;
1846} 1850}
1847 1851
1852static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
1853 fmode_t fmode,
1854 int flags,
1855 struct nfs4_state **res)
1856{
1857 struct nfs4_state_owner *sp = opendata->owner;
1858 struct nfs_server *server = sp->so_server;
1859 struct nfs4_state *state;
1860 unsigned int seq;
1861 int ret;
1862
1863 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
1864
1865 ret = _nfs4_proc_open(opendata);
1866 if (ret != 0)
1867 goto out;
1868
1869 state = nfs4_opendata_to_nfs4_state(opendata);
1870 ret = PTR_ERR(state);
1871 if (IS_ERR(state))
1872 goto out;
1873 if (server->caps & NFS_CAP_POSIX_LOCK)
1874 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
1875
1876 ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags);
1877 if (ret != 0)
1878 goto out;
1879
1880 if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) {
1881 nfs4_schedule_stateid_recovery(server, state);
1882 nfs4_wait_clnt_recover(server->nfs_client);
1883 }
1884 *res = state;
1885out:
1886 return ret;
1887}
1888
1848/* 1889/*
1849 * Returns a referenced nfs4_state 1890 * Returns a referenced nfs4_state
1850 */ 1891 */
@@ -1889,18 +1930,7 @@ static int _nfs4_do_open(struct inode *dir,
1889 if (dentry->d_inode != NULL) 1930 if (dentry->d_inode != NULL)
1890 opendata->state = nfs4_get_open_state(dentry->d_inode, sp); 1931 opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
1891 1932
1892 status = _nfs4_proc_open(opendata); 1933 status = _nfs4_open_and_get_state(opendata, fmode, flags, &state);
1893 if (status != 0)
1894 goto err_opendata_put;
1895
1896 state = nfs4_opendata_to_nfs4_state(opendata);
1897 status = PTR_ERR(state);
1898 if (IS_ERR(state))
1899 goto err_opendata_put;
1900 if (server->caps & NFS_CAP_POSIX_LOCK)
1901 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
1902
1903 status = nfs4_opendata_access(cred, opendata, state, fmode, flags);
1904 if (status != 0) 1934 if (status != 0)
1905 goto err_opendata_put; 1935 goto err_opendata_put;
1906 1936
@@ -2088,7 +2118,7 @@ static void nfs4_free_closedata(void *data)
2088 nfs4_put_open_state(calldata->state); 2118 nfs4_put_open_state(calldata->state);
2089 nfs_free_seqid(calldata->arg.seqid); 2119 nfs_free_seqid(calldata->arg.seqid);
2090 nfs4_put_state_owner(sp); 2120 nfs4_put_state_owner(sp);
2091 nfs_sb_deactive_async(sb); 2121 nfs_sb_deactive(sb);
2092 kfree(calldata); 2122 kfree(calldata);
2093} 2123}
2094 2124
@@ -2150,7 +2180,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2150 2180
2151 dprintk("%s: begin!\n", __func__); 2181 dprintk("%s: begin!\n", __func__);
2152 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) 2182 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
2153 return; 2183 goto out_wait;
2154 2184
2155 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; 2185 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
2156 calldata->arg.fmode = FMODE_READ|FMODE_WRITE; 2186 calldata->arg.fmode = FMODE_READ|FMODE_WRITE;
@@ -2172,16 +2202,14 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2172 2202
2173 if (!call_close) { 2203 if (!call_close) {
2174 /* Note: exit _without_ calling nfs4_close_done */ 2204 /* Note: exit _without_ calling nfs4_close_done */
2175 task->tk_action = NULL; 2205 goto out_no_action;
2176 nfs4_sequence_done(task, &calldata->res.seq_res);
2177 goto out;
2178 } 2206 }
2179 2207
2180 if (calldata->arg.fmode == 0) { 2208 if (calldata->arg.fmode == 0) {
2181 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; 2209 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
2182 if (calldata->roc && 2210 if (calldata->roc &&
2183 pnfs_roc_drain(inode, &calldata->roc_barrier, task)) 2211 pnfs_roc_drain(inode, &calldata->roc_barrier, task))
2184 goto out; 2212 goto out_wait;
2185 } 2213 }
2186 2214
2187 nfs_fattr_init(calldata->res.fattr); 2215 nfs_fattr_init(calldata->res.fattr);
@@ -2191,8 +2219,12 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2191 &calldata->res.seq_res, 2219 &calldata->res.seq_res,
2192 task) != 0) 2220 task) != 0)
2193 nfs_release_seqid(calldata->arg.seqid); 2221 nfs_release_seqid(calldata->arg.seqid);
2194out:
2195 dprintk("%s: done!\n", __func__); 2222 dprintk("%s: done!\n", __func__);
2223 return;
2224out_no_action:
2225 task->tk_action = NULL;
2226out_wait:
2227 nfs4_sequence_done(task, &calldata->res.seq_res);
2196} 2228}
2197 2229
2198static const struct rpc_call_ops nfs4_close_ops = { 2230static const struct rpc_call_ops nfs4_close_ops = {
@@ -4423,12 +4455,10 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
4423 struct nfs4_unlockdata *calldata = data; 4455 struct nfs4_unlockdata *calldata = data;
4424 4456
4425 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) 4457 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
4426 return; 4458 goto out_wait;
4427 if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) { 4459 if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) {
4428 /* Note: exit _without_ running nfs4_locku_done */ 4460 /* Note: exit _without_ running nfs4_locku_done */
4429 task->tk_action = NULL; 4461 goto out_no_action;
4430 nfs4_sequence_done(task, &calldata->res.seq_res);
4431 return;
4432 } 4462 }
4433 calldata->timestamp = jiffies; 4463 calldata->timestamp = jiffies;
4434 if (nfs4_setup_sequence(calldata->server, 4464 if (nfs4_setup_sequence(calldata->server,
@@ -4436,6 +4466,11 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
4436 &calldata->res.seq_res, 4466 &calldata->res.seq_res,
4437 task) != 0) 4467 task) != 0)
4438 nfs_release_seqid(calldata->arg.seqid); 4468 nfs_release_seqid(calldata->arg.seqid);
4469 return;
4470out_no_action:
4471 task->tk_action = NULL;
4472out_wait:
4473 nfs4_sequence_done(task, &calldata->res.seq_res);
4439} 4474}
4440 4475
4441static const struct rpc_call_ops nfs4_locku_ops = { 4476static const struct rpc_call_ops nfs4_locku_ops = {
@@ -4482,7 +4517,9 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
4482 4517
4483static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) 4518static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
4484{ 4519{
4485 struct nfs_inode *nfsi = NFS_I(state->inode); 4520 struct inode *inode = state->inode;
4521 struct nfs4_state_owner *sp = state->owner;
4522 struct nfs_inode *nfsi = NFS_I(inode);
4486 struct nfs_seqid *seqid; 4523 struct nfs_seqid *seqid;
4487 struct nfs4_lock_state *lsp; 4524 struct nfs4_lock_state *lsp;
4488 struct rpc_task *task; 4525 struct rpc_task *task;
@@ -4492,12 +4529,17 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
4492 status = nfs4_set_lock_state(state, request); 4529 status = nfs4_set_lock_state(state, request);
4493 /* Unlock _before_ we do the RPC call */ 4530 /* Unlock _before_ we do the RPC call */
4494 request->fl_flags |= FL_EXISTS; 4531 request->fl_flags |= FL_EXISTS;
4532 /* Exclude nfs_delegation_claim_locks() */
4533 mutex_lock(&sp->so_delegreturn_mutex);
4534 /* Exclude nfs4_reclaim_open_stateid() - note nesting! */
4495 down_read(&nfsi->rwsem); 4535 down_read(&nfsi->rwsem);
4496 if (do_vfs_lock(request->fl_file, request) == -ENOENT) { 4536 if (do_vfs_lock(request->fl_file, request) == -ENOENT) {
4497 up_read(&nfsi->rwsem); 4537 up_read(&nfsi->rwsem);
4538 mutex_unlock(&sp->so_delegreturn_mutex);
4498 goto out; 4539 goto out;
4499 } 4540 }
4500 up_read(&nfsi->rwsem); 4541 up_read(&nfsi->rwsem);
4542 mutex_unlock(&sp->so_delegreturn_mutex);
4501 if (status != 0) 4543 if (status != 0)
4502 goto out; 4544 goto out;
4503 /* Is this a delegated lock? */ 4545 /* Is this a delegated lock? */
@@ -4576,7 +4618,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
4576 4618
4577 dprintk("%s: begin!\n", __func__); 4619 dprintk("%s: begin!\n", __func__);
4578 if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0) 4620 if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
4579 return; 4621 goto out_wait;
4580 /* Do we need to do an open_to_lock_owner? */ 4622 /* Do we need to do an open_to_lock_owner? */
4581 if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { 4623 if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
4582 if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) { 4624 if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) {
@@ -4596,6 +4638,8 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
4596 nfs_release_seqid(data->arg.open_seqid); 4638 nfs_release_seqid(data->arg.open_seqid);
4597out_release_lock_seqid: 4639out_release_lock_seqid:
4598 nfs_release_seqid(data->arg.lock_seqid); 4640 nfs_release_seqid(data->arg.lock_seqid);
4641out_wait:
4642 nfs4_sequence_done(task, &data->res.seq_res);
4599 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); 4643 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
4600} 4644}
4601 4645
@@ -4813,8 +4857,10 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques
4813 4857
4814static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) 4858static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
4815{ 4859{
4860 struct nfs4_state_owner *sp = state->owner;
4816 struct nfs_inode *nfsi = NFS_I(state->inode); 4861 struct nfs_inode *nfsi = NFS_I(state->inode);
4817 unsigned char fl_flags = request->fl_flags; 4862 unsigned char fl_flags = request->fl_flags;
4863 unsigned int seq;
4818 int status = -ENOLCK; 4864 int status = -ENOLCK;
4819 4865
4820 if ((fl_flags & FL_POSIX) && 4866 if ((fl_flags & FL_POSIX) &&
@@ -4836,9 +4882,16 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
4836 status = do_vfs_lock(request->fl_file, request); 4882 status = do_vfs_lock(request->fl_file, request);
4837 goto out_unlock; 4883 goto out_unlock;
4838 } 4884 }
4885 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
4886 up_read(&nfsi->rwsem);
4839 status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW); 4887 status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
4840 if (status != 0) 4888 if (status != 0)
4889 goto out;
4890 down_read(&nfsi->rwsem);
4891 if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) {
4892 status = -NFS4ERR_DELAY;
4841 goto out_unlock; 4893 goto out_unlock;
4894 }
4842 /* Note: we always want to sleep here! */ 4895 /* Note: we always want to sleep here! */
4843 request->fl_flags = fl_flags | FL_SLEEP; 4896 request->fl_flags = fl_flags | FL_SLEEP;
4844 if (do_vfs_lock(request->fl_file, request) < 0) 4897 if (do_vfs_lock(request->fl_file, request) < 0)
@@ -4945,24 +4998,22 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4945 case 0: 4998 case 0:
4946 case -ESTALE: 4999 case -ESTALE:
4947 goto out; 5000 goto out;
4948 case -NFS4ERR_EXPIRED:
4949 nfs4_schedule_stateid_recovery(server, state);
4950 case -NFS4ERR_STALE_CLIENTID: 5001 case -NFS4ERR_STALE_CLIENTID:
4951 case -NFS4ERR_STALE_STATEID: 5002 case -NFS4ERR_STALE_STATEID:
5003 set_bit(NFS_DELEGATED_STATE, &state->flags);
5004 case -NFS4ERR_EXPIRED:
4952 nfs4_schedule_lease_recovery(server->nfs_client); 5005 nfs4_schedule_lease_recovery(server->nfs_client);
5006 err = -EAGAIN;
4953 goto out; 5007 goto out;
4954 case -NFS4ERR_BADSESSION: 5008 case -NFS4ERR_BADSESSION:
4955 case -NFS4ERR_BADSLOT: 5009 case -NFS4ERR_BADSLOT:
4956 case -NFS4ERR_BAD_HIGH_SLOT: 5010 case -NFS4ERR_BAD_HIGH_SLOT:
4957 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 5011 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
4958 case -NFS4ERR_DEADSESSION: 5012 case -NFS4ERR_DEADSESSION:
5013 set_bit(NFS_DELEGATED_STATE, &state->flags);
4959 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); 5014 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
5015 err = -EAGAIN;
4960 goto out; 5016 goto out;
4961 case -ERESTARTSYS:
4962 /*
4963 * The show must go on: exit, but mark the
4964 * stateid as needing recovery.
4965 */
4966 case -NFS4ERR_DELEG_REVOKED: 5017 case -NFS4ERR_DELEG_REVOKED:
4967 case -NFS4ERR_ADMIN_REVOKED: 5018 case -NFS4ERR_ADMIN_REVOKED:
4968 case -NFS4ERR_BAD_STATEID: 5019 case -NFS4ERR_BAD_STATEID:
@@ -4975,9 +5026,8 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4975 /* kill_proc(fl->fl_pid, SIGLOST, 1); */ 5026 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
4976 err = 0; 5027 err = 0;
4977 goto out; 5028 goto out;
4978 case -NFS4ERR_DELAY:
4979 break;
4980 } 5029 }
5030 set_bit(NFS_DELEGATED_STATE, &state->flags);
4981 err = nfs4_handle_exception(server, err, &exception); 5031 err = nfs4_handle_exception(server, err, &exception);
4982 } while (exception.retry); 5032 } while (exception.retry);
4983out: 5033out:
@@ -6134,7 +6184,8 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
6134 status = nfs4_wait_for_completion_rpc_task(task); 6184 status = nfs4_wait_for_completion_rpc_task(task);
6135 if (status == 0) 6185 if (status == 0)
6136 status = task->tk_status; 6186 status = task->tk_status;
6137 if (status == 0) 6187 /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
6188 if (status == 0 && lgp->res.layoutp->len)
6138 lseg = pnfs_layout_process(lgp); 6189 lseg = pnfs_layout_process(lgp);
6139 rpc_put_task(task); 6190 rpc_put_task(task);
6140 dprintk("<-- %s status=%d\n", __func__, status); 6191 dprintk("<-- %s status=%d\n", __func__, status);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e61f68d5ef21..6ace365c6334 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -518,6 +518,8 @@ nfs4_alloc_state_owner(struct nfs_server *server,
518 nfs4_init_seqid_counter(&sp->so_seqid); 518 nfs4_init_seqid_counter(&sp->so_seqid);
519 atomic_set(&sp->so_count, 1); 519 atomic_set(&sp->so_count, 1);
520 INIT_LIST_HEAD(&sp->so_lru); 520 INIT_LIST_HEAD(&sp->so_lru);
521 seqcount_init(&sp->so_reclaim_seqcount);
522 mutex_init(&sp->so_delegreturn_mutex);
521 return sp; 523 return sp;
522} 524}
523 525
@@ -1390,8 +1392,9 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
1390 * recovering after a network partition or a reboot from a 1392 * recovering after a network partition or a reboot from a
1391 * server that doesn't support a grace period. 1393 * server that doesn't support a grace period.
1392 */ 1394 */
1393restart:
1394 spin_lock(&sp->so_lock); 1395 spin_lock(&sp->so_lock);
1396 write_seqcount_begin(&sp->so_reclaim_seqcount);
1397restart:
1395 list_for_each_entry(state, &sp->so_states, open_states) { 1398 list_for_each_entry(state, &sp->so_states, open_states) {
1396 if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) 1399 if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
1397 continue; 1400 continue;
@@ -1412,6 +1415,7 @@ restart:
1412 } 1415 }
1413 spin_unlock(&state->state_lock); 1416 spin_unlock(&state->state_lock);
1414 nfs4_put_open_state(state); 1417 nfs4_put_open_state(state);
1418 spin_lock(&sp->so_lock);
1415 goto restart; 1419 goto restart;
1416 } 1420 }
1417 } 1421 }
@@ -1449,12 +1453,17 @@ restart:
1449 goto out_err; 1453 goto out_err;
1450 } 1454 }
1451 nfs4_put_open_state(state); 1455 nfs4_put_open_state(state);
1456 spin_lock(&sp->so_lock);
1452 goto restart; 1457 goto restart;
1453 } 1458 }
1459 write_seqcount_end(&sp->so_reclaim_seqcount);
1454 spin_unlock(&sp->so_lock); 1460 spin_unlock(&sp->so_lock);
1455 return 0; 1461 return 0;
1456out_err: 1462out_err:
1457 nfs4_put_open_state(state); 1463 nfs4_put_open_state(state);
1464 spin_lock(&sp->so_lock);
1465 write_seqcount_end(&sp->so_reclaim_seqcount);
1466 spin_unlock(&sp->so_lock);
1458 return status; 1467 return status;
1459} 1468}
1460 1469
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index c6f990656f89..88f9611a945c 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -647,6 +647,7 @@ static struct pnfs_layoutdriver_type objlayout_type = {
647 .flags = PNFS_LAYOUTRET_ON_SETATTR | 647 .flags = PNFS_LAYOUTRET_ON_SETATTR |
648 PNFS_LAYOUTRET_ON_ERROR, 648 PNFS_LAYOUTRET_ON_ERROR,
649 649
650 .owner = THIS_MODULE,
650 .alloc_layout_hdr = objlayout_alloc_layout_hdr, 651 .alloc_layout_hdr = objlayout_alloc_layout_hdr,
651 .free_layout_hdr = objlayout_free_layout_hdr, 652 .free_layout_hdr = objlayout_free_layout_hdr,
652 653
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d00260b08103..6be70f622b62 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -505,37 +505,147 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
505} 505}
506EXPORT_SYMBOL_GPL(pnfs_destroy_layout); 506EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
507 507
508/* 508static bool
509 * Called by the state manger to remove all layouts established under an 509pnfs_layout_add_bulk_destroy_list(struct inode *inode,
510 * expired lease. 510 struct list_head *layout_list)
511 */
512void
513pnfs_destroy_all_layouts(struct nfs_client *clp)
514{ 511{
515 struct nfs_server *server;
516 struct pnfs_layout_hdr *lo; 512 struct pnfs_layout_hdr *lo;
517 LIST_HEAD(tmp_list); 513 bool ret = false;
518 514
519 nfs4_deviceid_mark_client_invalid(clp); 515 spin_lock(&inode->i_lock);
520 nfs4_deviceid_purge_client(clp); 516 lo = NFS_I(inode)->layout;
517 if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) {
518 pnfs_get_layout_hdr(lo);
519 list_add(&lo->plh_bulk_destroy, layout_list);
520 ret = true;
521 }
522 spin_unlock(&inode->i_lock);
523 return ret;
524}
525
526/* Caller must hold rcu_read_lock and clp->cl_lock */
527static int
528pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
529 struct nfs_server *server,
530 struct list_head *layout_list)
531{
532 struct pnfs_layout_hdr *lo, *next;
533 struct inode *inode;
534
535 list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) {
536 inode = igrab(lo->plh_inode);
537 if (inode == NULL)
538 continue;
539 list_del_init(&lo->plh_layouts);
540 if (pnfs_layout_add_bulk_destroy_list(inode, layout_list))
541 continue;
542 rcu_read_unlock();
543 spin_unlock(&clp->cl_lock);
544 iput(inode);
545 spin_lock(&clp->cl_lock);
546 rcu_read_lock();
547 return -EAGAIN;
548 }
549 return 0;
550}
551
552static int
553pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
554 bool is_bulk_recall)
555{
556 struct pnfs_layout_hdr *lo;
557 struct inode *inode;
558 struct pnfs_layout_range range = {
559 .iomode = IOMODE_ANY,
560 .offset = 0,
561 .length = NFS4_MAX_UINT64,
562 };
563 LIST_HEAD(lseg_list);
564 int ret = 0;
565
566 while (!list_empty(layout_list)) {
567 lo = list_entry(layout_list->next, struct pnfs_layout_hdr,
568 plh_bulk_destroy);
569 dprintk("%s freeing layout for inode %lu\n", __func__,
570 lo->plh_inode->i_ino);
571 inode = lo->plh_inode;
572 spin_lock(&inode->i_lock);
573 list_del_init(&lo->plh_bulk_destroy);
574 lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
575 if (is_bulk_recall)
576 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
577 if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range))
578 ret = -EAGAIN;
579 spin_unlock(&inode->i_lock);
580 pnfs_free_lseg_list(&lseg_list);
581 pnfs_put_layout_hdr(lo);
582 iput(inode);
583 }
584 return ret;
585}
586
587int
588pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
589 struct nfs_fsid *fsid,
590 bool is_recall)
591{
592 struct nfs_server *server;
593 LIST_HEAD(layout_list);
521 594
522 spin_lock(&clp->cl_lock); 595 spin_lock(&clp->cl_lock);
523 rcu_read_lock(); 596 rcu_read_lock();
597restart:
524 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 598 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
525 if (!list_empty(&server->layouts)) 599 if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0)
526 list_splice_init(&server->layouts, &tmp_list); 600 continue;
601 if (pnfs_layout_bulk_destroy_byserver_locked(clp,
602 server,
603 &layout_list) != 0)
604 goto restart;
527 } 605 }
528 rcu_read_unlock(); 606 rcu_read_unlock();
529 spin_unlock(&clp->cl_lock); 607 spin_unlock(&clp->cl_lock);
530 608
531 while (!list_empty(&tmp_list)) { 609 if (list_empty(&layout_list))
532 lo = list_entry(tmp_list.next, struct pnfs_layout_hdr, 610 return 0;
533 plh_layouts); 611 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
534 dprintk("%s freeing layout for inode %lu\n", __func__, 612}
535 lo->plh_inode->i_ino); 613
536 list_del_init(&lo->plh_layouts); 614int
537 pnfs_destroy_layout(NFS_I(lo->plh_inode)); 615pnfs_destroy_layouts_byclid(struct nfs_client *clp,
616 bool is_recall)
617{
618 struct nfs_server *server;
619 LIST_HEAD(layout_list);
620
621 spin_lock(&clp->cl_lock);
622 rcu_read_lock();
623restart:
624 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
625 if (pnfs_layout_bulk_destroy_byserver_locked(clp,
626 server,
627 &layout_list) != 0)
628 goto restart;
538 } 629 }
630 rcu_read_unlock();
631 spin_unlock(&clp->cl_lock);
632
633 if (list_empty(&layout_list))
634 return 0;
635 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
636}
637
638/*
639 * Called by the state manger to remove all layouts established under an
640 * expired lease.
641 */
642void
643pnfs_destroy_all_layouts(struct nfs_client *clp)
644{
645 nfs4_deviceid_mark_client_invalid(clp);
646 nfs4_deviceid_purge_client(clp);
647
648 pnfs_destroy_layouts_byclid(clp, false);
539} 649}
540 650
541/* 651/*
@@ -888,7 +998,7 @@ alloc_init_layout_hdr(struct inode *ino,
888 atomic_set(&lo->plh_refcount, 1); 998 atomic_set(&lo->plh_refcount, 1);
889 INIT_LIST_HEAD(&lo->plh_layouts); 999 INIT_LIST_HEAD(&lo->plh_layouts);
890 INIT_LIST_HEAD(&lo->plh_segs); 1000 INIT_LIST_HEAD(&lo->plh_segs);
891 INIT_LIST_HEAD(&lo->plh_bulk_recall); 1001 INIT_LIST_HEAD(&lo->plh_bulk_destroy);
892 lo->plh_inode = ino; 1002 lo->plh_inode = ino;
893 lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred); 1003 lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred);
894 return lo; 1004 return lo;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index dbf7bba52da0..97cb358bb882 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -132,7 +132,7 @@ struct pnfs_layoutdriver_type {
132struct pnfs_layout_hdr { 132struct pnfs_layout_hdr {
133 atomic_t plh_refcount; 133 atomic_t plh_refcount;
134 struct list_head plh_layouts; /* other client layouts */ 134 struct list_head plh_layouts; /* other client layouts */
135 struct list_head plh_bulk_recall; /* clnt list of bulk recalls */ 135 struct list_head plh_bulk_destroy;
136 struct list_head plh_segs; /* layout segments list */ 136 struct list_head plh_segs; /* layout segments list */
137 nfs4_stateid plh_stateid; 137 nfs4_stateid plh_stateid;
138 atomic_t plh_outstanding; /* number of RPCs out */ 138 atomic_t plh_outstanding; /* number of RPCs out */
@@ -196,6 +196,11 @@ struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
196void pnfs_free_lseg_list(struct list_head *tmp_list); 196void pnfs_free_lseg_list(struct list_head *tmp_list);
197void pnfs_destroy_layout(struct nfs_inode *); 197void pnfs_destroy_layout(struct nfs_inode *);
198void pnfs_destroy_all_layouts(struct nfs_client *); 198void pnfs_destroy_all_layouts(struct nfs_client *);
199int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
200 struct nfs_fsid *fsid,
201 bool is_recall);
202int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
203 bool is_recall);
199void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); 204void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
200void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, 205void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
201 const nfs4_stateid *new, 206 const nfs4_stateid *new,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b056b1628722..befbae0cce41 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -54,7 +54,6 @@
54#include <linux/parser.h> 54#include <linux/parser.h>
55#include <linux/nsproxy.h> 55#include <linux/nsproxy.h>
56#include <linux/rcupdate.h> 56#include <linux/rcupdate.h>
57#include <linux/kthread.h>
58 57
59#include <asm/uaccess.h> 58#include <asm/uaccess.h>
60 59
@@ -418,54 +417,6 @@ void nfs_sb_deactive(struct super_block *sb)
418} 417}
419EXPORT_SYMBOL_GPL(nfs_sb_deactive); 418EXPORT_SYMBOL_GPL(nfs_sb_deactive);
420 419
421static int nfs_deactivate_super_async_work(void *ptr)
422{
423 struct super_block *sb = ptr;
424
425 deactivate_super(sb);
426 module_put_and_exit(0);
427 return 0;
428}
429
430/*
431 * same effect as deactivate_super, but will do final unmount in kthread
432 * context
433 */
434static void nfs_deactivate_super_async(struct super_block *sb)
435{
436 struct task_struct *task;
437 char buf[INET6_ADDRSTRLEN + 1];
438 struct nfs_server *server = NFS_SB(sb);
439 struct nfs_client *clp = server->nfs_client;
440
441 if (!atomic_add_unless(&sb->s_active, -1, 1)) {
442 rcu_read_lock();
443 snprintf(buf, sizeof(buf),
444 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
445 rcu_read_unlock();
446
447 __module_get(THIS_MODULE);
448 task = kthread_run(nfs_deactivate_super_async_work, sb,
449 "%s-deactivate-super", buf);
450 if (IS_ERR(task)) {
451 pr_err("%s: kthread_run: %ld\n",
452 __func__, PTR_ERR(task));
453 /* make synchronous call and hope for the best */
454 deactivate_super(sb);
455 module_put(THIS_MODULE);
456 }
457 }
458}
459
460void nfs_sb_deactive_async(struct super_block *sb)
461{
462 struct nfs_server *server = NFS_SB(sb);
463
464 if (atomic_dec_and_test(&server->active))
465 nfs_deactivate_super_async(sb);
466}
467EXPORT_SYMBOL_GPL(nfs_sb_deactive_async);
468
469/* 420/*
470 * Deliver file system statistics to userspace 421 * Deliver file system statistics to userspace
471 */ 422 */
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 3f79c77153b8..d26a32f5b53b 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -95,7 +95,7 @@ static void nfs_async_unlink_release(void *calldata)
95 95
96 nfs_dec_sillycount(data->dir); 96 nfs_dec_sillycount(data->dir);
97 nfs_free_unlinkdata(data); 97 nfs_free_unlinkdata(data);
98 nfs_sb_deactive_async(sb); 98 nfs_sb_deactive(sb);
99} 99}
100 100
101static void nfs_unlink_prepare(struct rpc_task *task, void *calldata) 101static void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
@@ -268,8 +268,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
268 * point dentry is definitely not a root, so we won't need 268 * point dentry is definitely not a root, so we won't need
269 * that anymore. 269 * that anymore.
270 */ 270 */
271 if (devname_garbage) 271 kfree(devname_garbage);
272 kfree(devname_garbage);
273 return 0; 272 return 0;
274out_unlock: 273out_unlock:
275 spin_unlock(&dentry->d_lock); 274 spin_unlock(&dentry->d_lock);