aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJ. Bruce Fields <bfields@redhat.com>2013-04-26 11:37:29 -0400
committerJ. Bruce Fields <bfields@redhat.com>2013-04-26 11:37:43 -0400
commitc85b03ab200f6d2c2f80588d96d03c1f8fcaedc3 (patch)
tree34f4f3200ef5db6ca93360879fffda050c04e1ca
parentbf8d909705e9d9bac31d9b8eac6734d2b51332a7 (diff)
parentfd068b200fb86e8fa52368e6f6088d2ab297a5d7 (diff)
Merge Trond's nfs-for-next
Merging Trond's nfs-for-next branch, mainly to get b7993cebb841b0da7a33e9d5ce301a9fd3209165 "SUNRPC: Allow rpc_create() to request that TCP slots be unlimited", which a small piece of the gss-proxy work depends on.
-rw-r--r--fs/lockd/clntlock.c3
-rw-r--r--fs/lockd/clntproc.c3
-rw-r--r--fs/nfs/blocklayout/blocklayoutdm.c4
-rw-r--r--fs/nfs/callback.c3
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/delegation.c121
-rw-r--r--fs/nfs/delegation.h5
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nfs/file.c9
-rw-r--r--fs/nfs/idmap.c13
-rw-r--r--fs/nfs/inode.c8
-rw-r--r--fs/nfs/internal.h7
-rw-r--r--fs/nfs/nfs4_fs.h16
-rw-r--r--fs/nfs/nfs4client.c26
-rw-r--r--fs/nfs/nfs4filelayout.c35
-rw-r--r--fs/nfs/nfs4proc.c440
-rw-r--r--fs/nfs/nfs4state.c81
-rw-r--r--fs/nfs/nfs4xdr.c137
-rw-r--r--fs/nfs/pagelist.c53
-rw-r--r--fs/nfs/pnfs.c83
-rw-r--r--fs/nfs/pnfs.h6
-rw-r--r--fs/nfs/read.c2
-rw-r--r--fs/nfs/write.c2
-rw-r--r--include/linux/nfs_fs.h8
-rw-r--r--include/linux/nfs_fs_sb.h3
-rw-r--r--include/linux/nfs_xdr.h3
-rw-r--r--include/linux/sunrpc/clnt.h1
-rw-r--r--include/linux/sunrpc/xprt.h5
-rw-r--r--net/sunrpc/Kconfig1
-rw-r--r--net/sunrpc/clnt.c43
-rw-r--r--net/sunrpc/sched.c9
-rw-r--r--net/sunrpc/xprt.c61
-rw-r--r--net/sunrpc/xprtsock.c14
34 files changed, 863 insertions, 348 deletions
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 0796c45d0d4d..01bfe7662751 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -144,6 +144,9 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
144 timeout); 144 timeout);
145 if (ret < 0) 145 if (ret < 0)
146 return -ERESTARTSYS; 146 return -ERESTARTSYS;
147 /* Reset the lock status after a server reboot so we resend */
148 if (block->b_status == nlm_lck_denied_grace_period)
149 block->b_status = nlm_lck_blocked;
147 req->a_res.status = block->b_status; 150 req->a_res.status = block->b_status;
148 return 0; 151 return 0;
149} 152}
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 7e529c3c45c0..9760ecb9b60f 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -550,9 +550,6 @@ again:
550 status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT); 550 status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
551 if (status < 0) 551 if (status < 0)
552 break; 552 break;
553 /* Resend the blocking lock request after a server reboot */
554 if (resp->status == nlm_lck_denied_grace_period)
555 continue;
556 if (resp->status != nlm_lck_blocked) 553 if (resp->status != nlm_lck_blocked)
557 break; 554 break;
558 } 555 }
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c
index 737d839bc17b..6fc7b5cae92b 100644
--- a/fs/nfs/blocklayout/blocklayoutdm.c
+++ b/fs/nfs/blocklayout/blocklayoutdm.c
@@ -55,7 +55,8 @@ static void dev_remove(struct net *net, dev_t dev)
55 55
56 bl_pipe_msg.bl_wq = &nn->bl_wq; 56 bl_pipe_msg.bl_wq = &nn->bl_wq;
57 memset(msg, 0, sizeof(*msg)); 57 memset(msg, 0, sizeof(*msg));
58 msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); 58 msg->len = sizeof(bl_msg) + bl_msg.totallen;
59 msg->data = kzalloc(msg->len, GFP_NOFS);
59 if (!msg->data) 60 if (!msg->data)
60 goto out; 61 goto out;
61 62
@@ -66,7 +67,6 @@ static void dev_remove(struct net *net, dev_t dev)
66 memcpy(msg->data, &bl_msg, sizeof(bl_msg)); 67 memcpy(msg->data, &bl_msg, sizeof(bl_msg));
67 dataptr = (uint8_t *) msg->data; 68 dataptr = (uint8_t *) msg->data;
68 memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); 69 memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request));
69 msg->len = sizeof(bl_msg) + bl_msg.totallen;
70 70
71 add_wait_queue(&nn->bl_wq, &wq); 71 add_wait_queue(&nn->bl_wq, &wq);
72 if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) { 72 if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) {
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 5088b57b078a..cff089a412c7 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -125,6 +125,9 @@ nfs41_callback_svc(void *vrqstp)
125 set_freezable(); 125 set_freezable();
126 126
127 while (!kthread_should_stop()) { 127 while (!kthread_should_stop()) {
128 if (try_to_freeze())
129 continue;
130
128 prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); 131 prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
129 spin_lock_bh(&serv->sv_cb_lock); 132 spin_lock_bh(&serv->sv_cb_lock);
130 if (!list_empty(&serv->sv_cb_list)) { 133 if (!list_empty(&serv->sv_cb_list)) {
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 2960512792c2..a13d26ede254 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -500,7 +500,7 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
500 &args->craa_type_mask)) 500 &args->craa_type_mask))
501 pnfs_recall_all_layouts(cps->clp); 501 pnfs_recall_all_layouts(cps->clp);
502 if (flags) 502 if (flags)
503 nfs_expire_all_delegation_types(cps->clp, flags); 503 nfs_expire_unused_delegation_types(cps->clp, flags);
504out: 504out:
505 dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); 505 dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
506 return status; 506 return status;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 84d8eae203a7..c513b0cc835f 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -593,6 +593,8 @@ int nfs_create_rpc_client(struct nfs_client *clp,
593 args.flags |= RPC_CLNT_CREATE_DISCRTRY; 593 args.flags |= RPC_CLNT_CREATE_DISCRTRY;
594 if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags)) 594 if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags))
595 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; 595 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
596 if (test_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags))
597 args.flags |= RPC_CLNT_CREATE_INFINITE_SLOTS;
596 598
597 if (!IS_ERR(clp->cl_rpcclient)) 599 if (!IS_ERR(clp->cl_rpcclient))
598 return 0; 600 return 0;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 6390a4b5fee7..57db3244f4d9 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -64,17 +64,15 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags)
64 return ret; 64 return ret;
65} 65}
66 66
67static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state) 67static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
68{ 68{
69 struct inode *inode = state->inode; 69 struct inode *inode = state->inode;
70 struct file_lock *fl; 70 struct file_lock *fl;
71 int status = 0; 71 int status = 0;
72 72
73 if (inode->i_flock == NULL) 73 if (inode->i_flock == NULL)
74 return 0;
75
76 if (inode->i_flock == NULL)
77 goto out; 74 goto out;
75
78 /* Protect inode->i_flock using the file locks lock */ 76 /* Protect inode->i_flock using the file locks lock */
79 lock_flocks(); 77 lock_flocks();
80 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 78 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
@@ -83,7 +81,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
83 if (nfs_file_open_context(fl->fl_file) != ctx) 81 if (nfs_file_open_context(fl->fl_file) != ctx)
84 continue; 82 continue;
85 unlock_flocks(); 83 unlock_flocks();
86 status = nfs4_lock_delegation_recall(state, fl); 84 status = nfs4_lock_delegation_recall(fl, state, stateid);
87 if (status < 0) 85 if (status < 0)
88 goto out; 86 goto out;
89 lock_flocks(); 87 lock_flocks();
@@ -120,7 +118,7 @@ again:
120 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); 118 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
121 err = nfs4_open_delegation_recall(ctx, state, stateid); 119 err = nfs4_open_delegation_recall(ctx, state, stateid);
122 if (!err) 120 if (!err)
123 err = nfs_delegation_claim_locks(ctx, state); 121 err = nfs_delegation_claim_locks(ctx, state, stateid);
124 if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) 122 if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
125 err = -EAGAIN; 123 err = -EAGAIN;
126 mutex_unlock(&sp->so_delegreturn_mutex); 124 mutex_unlock(&sp->so_delegreturn_mutex);
@@ -389,6 +387,24 @@ out:
389 return err; 387 return err;
390} 388}
391 389
390static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
391{
392 bool ret = false;
393
394 if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
395 ret = true;
396 if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) {
397 struct inode *inode;
398
399 spin_lock(&delegation->lock);
400 inode = delegation->inode;
401 if (inode && list_empty(&NFS_I(inode)->open_files))
402 ret = true;
403 spin_unlock(&delegation->lock);
404 }
405 return ret;
406}
407
392/** 408/**
393 * nfs_client_return_marked_delegations - return previously marked delegations 409 * nfs_client_return_marked_delegations - return previously marked delegations
394 * @clp: nfs_client to process 410 * @clp: nfs_client to process
@@ -411,8 +427,7 @@ restart:
411 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 427 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
412 list_for_each_entry_rcu(delegation, &server->delegations, 428 list_for_each_entry_rcu(delegation, &server->delegations,
413 super_list) { 429 super_list) {
414 if (!test_and_clear_bit(NFS_DELEGATION_RETURN, 430 if (!nfs_delegation_need_return(delegation))
415 &delegation->flags))
416 continue; 431 continue;
417 inode = nfs_delegation_grab_inode(delegation); 432 inode = nfs_delegation_grab_inode(delegation);
418 if (inode == NULL) 433 if (inode == NULL)
@@ -471,6 +486,13 @@ int nfs4_inode_return_delegation(struct inode *inode)
471 return err; 486 return err;
472} 487}
473 488
489static void nfs_mark_return_if_closed_delegation(struct nfs_server *server,
490 struct nfs_delegation *delegation)
491{
492 set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
493 set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
494}
495
474static void nfs_mark_return_delegation(struct nfs_server *server, 496static void nfs_mark_return_delegation(struct nfs_server *server,
475 struct nfs_delegation *delegation) 497 struct nfs_delegation *delegation)
476{ 498{
@@ -478,6 +500,45 @@ static void nfs_mark_return_delegation(struct nfs_server *server,
478 set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); 500 set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
479} 501}
480 502
503static bool nfs_server_mark_return_all_delegations(struct nfs_server *server)
504{
505 struct nfs_delegation *delegation;
506 bool ret = false;
507
508 list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
509 nfs_mark_return_delegation(server, delegation);
510 ret = true;
511 }
512 return ret;
513}
514
515static void nfs_client_mark_return_all_delegations(struct nfs_client *clp)
516{
517 struct nfs_server *server;
518
519 rcu_read_lock();
520 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
521 nfs_server_mark_return_all_delegations(server);
522 rcu_read_unlock();
523}
524
525static void nfs_delegation_run_state_manager(struct nfs_client *clp)
526{
527 if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state))
528 nfs4_schedule_state_manager(clp);
529}
530
531/**
532 * nfs_expire_all_delegations
533 * @clp: client to process
534 *
535 */
536void nfs_expire_all_delegations(struct nfs_client *clp)
537{
538 nfs_client_mark_return_all_delegations(clp);
539 nfs_delegation_run_state_manager(clp);
540}
541
481/** 542/**
482 * nfs_super_return_all_delegations - return delegations for one superblock 543 * nfs_super_return_all_delegations - return delegations for one superblock
483 * @sb: sb to process 544 * @sb: sb to process
@@ -486,24 +547,22 @@ static void nfs_mark_return_delegation(struct nfs_server *server,
486void nfs_server_return_all_delegations(struct nfs_server *server) 547void nfs_server_return_all_delegations(struct nfs_server *server)
487{ 548{
488 struct nfs_client *clp = server->nfs_client; 549 struct nfs_client *clp = server->nfs_client;
489 struct nfs_delegation *delegation; 550 bool need_wait;
490 551
491 if (clp == NULL) 552 if (clp == NULL)
492 return; 553 return;
493 554
494 rcu_read_lock(); 555 rcu_read_lock();
495 list_for_each_entry_rcu(delegation, &server->delegations, super_list) { 556 need_wait = nfs_server_mark_return_all_delegations(server);
496 spin_lock(&delegation->lock);
497 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
498 spin_unlock(&delegation->lock);
499 }
500 rcu_read_unlock(); 557 rcu_read_unlock();
501 558
502 if (nfs_client_return_marked_delegations(clp) != 0) 559 if (need_wait) {
503 nfs4_schedule_state_manager(clp); 560 nfs4_schedule_state_manager(clp);
561 nfs4_wait_clnt_recover(clp);
562 }
504} 563}
505 564
506static void nfs_mark_return_all_delegation_types(struct nfs_server *server, 565static void nfs_mark_return_unused_delegation_types(struct nfs_server *server,
507 fmode_t flags) 566 fmode_t flags)
508{ 567{
509 struct nfs_delegation *delegation; 568 struct nfs_delegation *delegation;
@@ -512,27 +571,21 @@ static void nfs_mark_return_all_delegation_types(struct nfs_server *server,
512 if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) 571 if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
513 continue; 572 continue;
514 if (delegation->type & flags) 573 if (delegation->type & flags)
515 nfs_mark_return_delegation(server, delegation); 574 nfs_mark_return_if_closed_delegation(server, delegation);
516 } 575 }
517} 576}
518 577
519static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp, 578static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *clp,
520 fmode_t flags) 579 fmode_t flags)
521{ 580{
522 struct nfs_server *server; 581 struct nfs_server *server;
523 582
524 rcu_read_lock(); 583 rcu_read_lock();
525 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) 584 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
526 nfs_mark_return_all_delegation_types(server, flags); 585 nfs_mark_return_unused_delegation_types(server, flags);
527 rcu_read_unlock(); 586 rcu_read_unlock();
528} 587}
529 588
530static void nfs_delegation_run_state_manager(struct nfs_client *clp)
531{
532 if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state))
533 nfs4_schedule_state_manager(clp);
534}
535
536void nfs_remove_bad_delegation(struct inode *inode) 589void nfs_remove_bad_delegation(struct inode *inode)
537{ 590{
538 struct nfs_delegation *delegation; 591 struct nfs_delegation *delegation;
@@ -546,27 +599,17 @@ void nfs_remove_bad_delegation(struct inode *inode)
546EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation); 599EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation);
547 600
548/** 601/**
549 * nfs_expire_all_delegation_types 602 * nfs_expire_unused_delegation_types
550 * @clp: client to process 603 * @clp: client to process
551 * @flags: delegation types to expire 604 * @flags: delegation types to expire
552 * 605 *
553 */ 606 */
554void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags) 607void nfs_expire_unused_delegation_types(struct nfs_client *clp, fmode_t flags)
555{ 608{
556 nfs_client_mark_return_all_delegation_types(clp, flags); 609 nfs_client_mark_return_unused_delegation_types(clp, flags);
557 nfs_delegation_run_state_manager(clp); 610 nfs_delegation_run_state_manager(clp);
558} 611}
559 612
560/**
561 * nfs_expire_all_delegations
562 * @clp: client to process
563 *
564 */
565void nfs_expire_all_delegations(struct nfs_client *clp)
566{
567 nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE);
568}
569
570static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) 613static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
571{ 614{
572 struct nfs_delegation *delegation; 615 struct nfs_delegation *delegation;
@@ -574,7 +617,7 @@ static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
574 list_for_each_entry_rcu(delegation, &server->delegations, super_list) { 617 list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
575 if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) 618 if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
576 continue; 619 continue;
577 nfs_mark_return_delegation(server, delegation); 620 nfs_mark_return_if_closed_delegation(server, delegation);
578 } 621 }
579} 622}
580 623
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index d54d4fca6793..9a79c7a99d6d 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -28,6 +28,7 @@ struct nfs_delegation {
28enum { 28enum {
29 NFS_DELEGATION_NEED_RECLAIM = 0, 29 NFS_DELEGATION_NEED_RECLAIM = 0,
30 NFS_DELEGATION_RETURN, 30 NFS_DELEGATION_RETURN,
31 NFS_DELEGATION_RETURN_IF_CLOSED,
31 NFS_DELEGATION_REFERENCED, 32 NFS_DELEGATION_REFERENCED,
32 NFS_DELEGATION_RETURNING, 33 NFS_DELEGATION_RETURNING,
33}; 34};
@@ -41,7 +42,7 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode);
41struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); 42struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
42void nfs_server_return_all_delegations(struct nfs_server *); 43void nfs_server_return_all_delegations(struct nfs_server *);
43void nfs_expire_all_delegations(struct nfs_client *clp); 44void nfs_expire_all_delegations(struct nfs_client *clp);
44void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags); 45void nfs_expire_unused_delegation_types(struct nfs_client *clp, fmode_t flags);
45void nfs_expire_unreferenced_delegations(struct nfs_client *clp); 46void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
46int nfs_client_return_marked_delegations(struct nfs_client *clp); 47int nfs_client_return_marked_delegations(struct nfs_client *clp);
47int nfs_delegations_present(struct nfs_client *clp); 48int nfs_delegations_present(struct nfs_client *clp);
@@ -53,7 +54,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
53/* NFSv4 delegation-related procedures */ 54/* NFSv4 delegation-related procedures */
54int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); 55int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync);
55int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); 56int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
56int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); 57int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid);
57bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags); 58bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags);
58 59
59void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); 60void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index f23f455be42b..e093e73178b7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1486,6 +1486,8 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1486 goto no_open; 1486 goto no_open;
1487 if (d_mountpoint(dentry)) 1487 if (d_mountpoint(dentry))
1488 goto no_open; 1488 goto no_open;
1489 if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1)
1490 goto no_open;
1489 1491
1490 inode = dentry->d_inode; 1492 inode = dentry->d_inode;
1491 parent = dget_parent(dentry); 1493 parent = dget_parent(dentry);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 29f4a48a0ee6..a87a44f84113 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -744,6 +744,7 @@ static int
744do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) 744do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
745{ 745{
746 struct inode *inode = filp->f_mapping->host; 746 struct inode *inode = filp->f_mapping->host;
747 struct nfs_lock_context *l_ctx;
747 int status; 748 int status;
748 749
749 /* 750 /*
@@ -752,6 +753,14 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
752 */ 753 */
753 nfs_sync_mapping(filp->f_mapping); 754 nfs_sync_mapping(filp->f_mapping);
754 755
756 l_ctx = nfs_get_lock_context(nfs_file_open_context(filp));
757 if (!IS_ERR(l_ctx)) {
758 status = nfs_iocounter_wait(&l_ctx->io_count);
759 nfs_put_lock_context(l_ctx);
760 if (status < 0)
761 return status;
762 }
763
755 /* NOTE: special case 764 /* NOTE: special case
756 * If we're signalled while cleaning up locks on process exit, we 765 * If we're signalled while cleaning up locks on process exit, we
757 * still need to complete the unlock. 766 * still need to complete the unlock.
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index dc0f98dfa717..c516da5873fd 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -726,9 +726,9 @@ out1:
726 return ret; 726 return ret;
727} 727}
728 728
729static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data) 729static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data, size_t datalen)
730{ 730{
731 return key_instantiate_and_link(key, data, strlen(data) + 1, 731 return key_instantiate_and_link(key, data, datalen,
732 id_resolver_cache->thread_keyring, 732 id_resolver_cache->thread_keyring,
733 authkey); 733 authkey);
734} 734}
@@ -738,6 +738,7 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
738 struct key *key, struct key *authkey) 738 struct key *key, struct key *authkey)
739{ 739{
740 char id_str[NFS_UINT_MAXLEN]; 740 char id_str[NFS_UINT_MAXLEN];
741 size_t len;
741 int ret = -ENOKEY; 742 int ret = -ENOKEY;
742 743
743 /* ret = -ENOKEY */ 744 /* ret = -ENOKEY */
@@ -747,13 +748,15 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
747 case IDMAP_CONV_NAMETOID: 748 case IDMAP_CONV_NAMETOID:
748 if (strcmp(upcall->im_name, im->im_name) != 0) 749 if (strcmp(upcall->im_name, im->im_name) != 0)
749 break; 750 break;
750 sprintf(id_str, "%d", im->im_id); 751 /* Note: here we store the NUL terminator too */
751 ret = nfs_idmap_instantiate(key, authkey, id_str); 752 len = sprintf(id_str, "%d", im->im_id) + 1;
753 ret = nfs_idmap_instantiate(key, authkey, id_str, len);
752 break; 754 break;
753 case IDMAP_CONV_IDTONAME: 755 case IDMAP_CONV_IDTONAME:
754 if (upcall->im_id != im->im_id) 756 if (upcall->im_id != im->im_id)
755 break; 757 break;
756 ret = nfs_idmap_instantiate(key, authkey, im->im_name); 758 len = strlen(im->im_name);
759 ret = nfs_idmap_instantiate(key, authkey, im->im_name, len);
757 break; 760 break;
758 default: 761 default:
759 ret = -EINVAL; 762 ret = -EINVAL;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1f941674b089..c1c7a9d78722 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -561,20 +561,22 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
561 l_ctx->lockowner.l_owner = current->files; 561 l_ctx->lockowner.l_owner = current->files;
562 l_ctx->lockowner.l_pid = current->tgid; 562 l_ctx->lockowner.l_pid = current->tgid;
563 INIT_LIST_HEAD(&l_ctx->list); 563 INIT_LIST_HEAD(&l_ctx->list);
564 nfs_iocounter_init(&l_ctx->io_count);
564} 565}
565 566
566static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx) 567static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
567{ 568{
568 struct nfs_lock_context *pos; 569 struct nfs_lock_context *head = &ctx->lock_context;
570 struct nfs_lock_context *pos = head;
569 571
570 list_for_each_entry(pos, &ctx->lock_context.list, list) { 572 do {
571 if (pos->lockowner.l_owner != current->files) 573 if (pos->lockowner.l_owner != current->files)
572 continue; 574 continue;
573 if (pos->lockowner.l_pid != current->tgid) 575 if (pos->lockowner.l_pid != current->tgid)
574 continue; 576 continue;
575 atomic_inc(&pos->count); 577 atomic_inc(&pos->count);
576 return pos; 578 return pos;
577 } 579 } while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head);
578 return NULL; 580 return NULL;
579} 581}
580 582
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 541c9ebdbc5a..91e59a39fc08 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -229,6 +229,13 @@ extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
229 struct nfs_pgio_header *hdr, 229 struct nfs_pgio_header *hdr,
230 void (*release)(struct nfs_pgio_header *hdr)); 230 void (*release)(struct nfs_pgio_header *hdr));
231void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); 231void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
232int nfs_iocounter_wait(struct nfs_io_counter *c);
233
234static inline void nfs_iocounter_init(struct nfs_io_counter *c)
235{
236 c->flags = 0;
237 atomic_set(&c->io_count, 0);
238}
232 239
233/* nfs2xdr.c */ 240/* nfs2xdr.c */
234extern struct rpc_procinfo nfs_procedures[]; 241extern struct rpc_procinfo nfs_procedures[];
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 944c9a5c1039..553a83cc4106 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -36,6 +36,7 @@ enum nfs4_client_state {
36 36
37struct nfs4_minor_version_ops { 37struct nfs4_minor_version_ops {
38 u32 minor_version; 38 u32 minor_version;
39 unsigned init_caps;
39 40
40 int (*call_sync)(struct rpc_clnt *clnt, 41 int (*call_sync)(struct rpc_clnt *clnt,
41 struct nfs_server *server, 42 struct nfs_server *server,
@@ -143,12 +144,14 @@ struct nfs4_lock_state {
143enum { 144enum {
144 LK_STATE_IN_USE, 145 LK_STATE_IN_USE,
145 NFS_DELEGATED_STATE, /* Current stateid is delegation */ 146 NFS_DELEGATED_STATE, /* Current stateid is delegation */
147 NFS_OPEN_STATE, /* OPEN stateid is set */
146 NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */ 148 NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */
147 NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */ 149 NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */
148 NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */ 150 NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */
149 NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */ 151 NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */
150 NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */ 152 NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */
151 NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */ 153 NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */
154 NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */
152}; 155};
153 156
154struct nfs4_state { 157struct nfs4_state {
@@ -233,6 +236,10 @@ extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct qstr
233extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); 236extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
234extern int nfs4_release_lockowner(struct nfs4_lock_state *); 237extern int nfs4_release_lockowner(struct nfs4_lock_state *);
235extern const struct xattr_handler *nfs4_xattr_handlers[]; 238extern const struct xattr_handler *nfs4_xattr_handlers[];
239extern int nfs4_set_rw_stateid(nfs4_stateid *stateid,
240 const struct nfs_open_context *ctx,
241 const struct nfs_lock_context *l_ctx,
242 fmode_t fmode);
236 243
237#if defined(CONFIG_NFS_V4_1) 244#if defined(CONFIG_NFS_V4_1)
238static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) 245static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
@@ -347,13 +354,13 @@ extern int nfs4_wait_clnt_recover(struct nfs_client *clp);
347extern int nfs4_client_recover_expired_lease(struct nfs_client *clp); 354extern int nfs4_client_recover_expired_lease(struct nfs_client *clp);
348extern void nfs4_schedule_state_manager(struct nfs_client *); 355extern void nfs4_schedule_state_manager(struct nfs_client *);
349extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); 356extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp);
350extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); 357extern int nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
351extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); 358extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
352extern void nfs41_handle_server_scope(struct nfs_client *, 359extern void nfs41_handle_server_scope(struct nfs_client *,
353 struct nfs41_server_scope **); 360 struct nfs41_server_scope **);
354extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 361extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
355extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 362extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
356extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, 363extern int nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *,
357 fmode_t, const struct nfs_lockowner *); 364 fmode_t, const struct nfs_lockowner *);
358 365
359extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); 366extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
@@ -412,6 +419,11 @@ static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_statei
412 return memcmp(dst, src, sizeof(*dst)) == 0; 419 return memcmp(dst, src, sizeof(*dst)) == 0;
413} 420}
414 421
422static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state)
423{
424 return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0;
425}
426
415#else 427#else
416 428
417#define nfs4_close_state(a, b) do { } while (0) 429#define nfs4_close_state(a, b) do { } while (0)
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index ac4fc9a8fdbc..f4d4d4ec6bf7 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -198,6 +198,8 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
198 /* Check NFS protocol revision and initialize RPC op vector */ 198 /* Check NFS protocol revision and initialize RPC op vector */
199 clp->rpc_ops = &nfs_v4_clientops; 199 clp->rpc_ops = &nfs_v4_clientops;
200 200
201 if (clp->cl_minorversion != 0)
202 __set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags);
201 __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); 203 __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
202 error = nfs_create_rpc_client(clp, timeparms, authflavour); 204 error = nfs_create_rpc_client(clp, timeparms, authflavour);
203 if (error < 0) 205 if (error < 0)
@@ -717,6 +719,19 @@ static int nfs4_server_common_setup(struct nfs_server *server,
717 if (error < 0) 719 if (error < 0)
718 goto out; 720 goto out;
719 721
722 /* Set the basic capabilities */
723 server->caps |= server->nfs_client->cl_mvops->init_caps;
724 if (server->flags & NFS_MOUNT_NORDIRPLUS)
725 server->caps &= ~NFS_CAP_READDIRPLUS;
726 /*
727 * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
728 * authentication.
729 */
730 if (nfs4_disable_idmapping &&
731 server->client->cl_auth->au_flavor == RPC_AUTH_UNIX)
732 server->caps |= NFS_CAP_UIDGID_NOMAP;
733
734
720 /* Probe the root fh to retrieve its FSID and filehandle */ 735 /* Probe the root fh to retrieve its FSID and filehandle */
721 error = nfs4_get_rootfh(server, mntfh); 736 error = nfs4_get_rootfh(server, mntfh);
722 if (error < 0) 737 if (error < 0)
@@ -760,9 +775,6 @@ static int nfs4_init_server(struct nfs_server *server,
760 775
761 /* Initialise the client representation from the mount data */ 776 /* Initialise the client representation from the mount data */
762 server->flags = data->flags; 777 server->flags = data->flags;
763 server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK;
764 if (!(data->flags & NFS_MOUNT_NORDIRPLUS))
765 server->caps |= NFS_CAP_READDIRPLUS;
766 server->options = data->options; 778 server->options = data->options;
767 779
768 /* Get a client record */ 780 /* Get a client record */
@@ -779,13 +791,6 @@ static int nfs4_init_server(struct nfs_server *server,
779 if (error < 0) 791 if (error < 0)
780 goto error; 792 goto error;
781 793
782 /*
783 * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
784 * authentication.
785 */
786 if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX)
787 server->caps |= NFS_CAP_UIDGID_NOMAP;
788
789 if (data->rsize) 794 if (data->rsize)
790 server->rsize = nfs_block_size(data->rsize, NULL); 795 server->rsize = nfs_block_size(data->rsize, NULL);
791 if (data->wsize) 796 if (data->wsize)
@@ -863,7 +868,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
863 868
864 /* Initialise the client representation from the parent server */ 869 /* Initialise the client representation from the parent server */
865 nfs_server_copy_userdata(server, parent_server); 870 nfs_server_copy_userdata(server, parent_server);
866 server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR;
867 871
868 /* Get a client representation. 872 /* Get a client representation.
869 * Note: NFSv4 always uses TCP, */ 873 * Note: NFSv4 always uses TCP, */
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 49eeb044c109..22d10623f5ee 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -129,7 +129,6 @@ static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo)
129{ 129{
130 if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) 130 if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
131 return; 131 return;
132 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
133 pnfs_return_layout(inode); 132 pnfs_return_layout(inode);
134} 133}
135 134
@@ -159,11 +158,14 @@ static int filelayout_async_handle_error(struct rpc_task *task,
159 case -NFS4ERR_OPENMODE: 158 case -NFS4ERR_OPENMODE:
160 if (state == NULL) 159 if (state == NULL)
161 break; 160 break;
162 nfs4_schedule_stateid_recovery(mds_server, state); 161 if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
162 goto out_bad_stateid;
163 goto wait_on_recovery; 163 goto wait_on_recovery;
164 case -NFS4ERR_EXPIRED: 164 case -NFS4ERR_EXPIRED:
165 if (state != NULL) 165 if (state != NULL) {
166 nfs4_schedule_stateid_recovery(mds_server, state); 166 if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
167 goto out_bad_stateid;
168 }
167 nfs4_schedule_lease_recovery(mds_client); 169 nfs4_schedule_lease_recovery(mds_client);
168 goto wait_on_recovery; 170 goto wait_on_recovery;
169 /* DS session errors */ 171 /* DS session errors */
@@ -227,6 +229,9 @@ reset:
227out: 229out:
228 task->tk_status = 0; 230 task->tk_status = 0;
229 return -EAGAIN; 231 return -EAGAIN;
232out_bad_stateid:
233 task->tk_status = -EIO;
234 return 0;
230wait_on_recovery: 235wait_on_recovery:
231 rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL); 236 rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
232 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0) 237 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
@@ -300,6 +305,10 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
300{ 305{
301 struct nfs_read_data *rdata = data; 306 struct nfs_read_data *rdata = data;
302 307
308 if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) {
309 rpc_exit(task, -EIO);
310 return;
311 }
303 if (filelayout_reset_to_mds(rdata->header->lseg)) { 312 if (filelayout_reset_to_mds(rdata->header->lseg)) {
304 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); 313 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
305 filelayout_reset_read(rdata); 314 filelayout_reset_read(rdata);
@@ -308,10 +317,13 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
308 } 317 }
309 rdata->read_done_cb = filelayout_read_done_cb; 318 rdata->read_done_cb = filelayout_read_done_cb;
310 319
311 nfs41_setup_sequence(rdata->ds_clp->cl_session, 320 if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
312 &rdata->args.seq_args, 321 &rdata->args.seq_args,
313 &rdata->res.seq_res, 322 &rdata->res.seq_res,
314 task); 323 task))
324 return;
325 nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context,
326 rdata->args.lock_context, FMODE_READ);
315} 327}
316 328
317static void filelayout_read_call_done(struct rpc_task *task, void *data) 329static void filelayout_read_call_done(struct rpc_task *task, void *data)
@@ -402,16 +414,23 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
402{ 414{
403 struct nfs_write_data *wdata = data; 415 struct nfs_write_data *wdata = data;
404 416
417 if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) {
418 rpc_exit(task, -EIO);
419 return;
420 }
405 if (filelayout_reset_to_mds(wdata->header->lseg)) { 421 if (filelayout_reset_to_mds(wdata->header->lseg)) {
406 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); 422 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
407 filelayout_reset_write(wdata); 423 filelayout_reset_write(wdata);
408 rpc_exit(task, 0); 424 rpc_exit(task, 0);
409 return; 425 return;
410 } 426 }
411 nfs41_setup_sequence(wdata->ds_clp->cl_session, 427 if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
412 &wdata->args.seq_args, 428 &wdata->args.seq_args,
413 &wdata->res.seq_res, 429 &wdata->res.seq_res,
414 task); 430 task))
431 return;
432 nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context,
433 wdata->args.lock_context, FMODE_WRITE);
415} 434}
416 435
417static void filelayout_write_call_done(struct rpc_task *task, void *data) 436static void filelayout_write_call_done(struct rpc_task *task, void *data)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b2671cb0f901..c13144911d20 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -107,6 +107,8 @@ static int nfs4_map_errors(int err)
107 return -EPROTONOSUPPORT; 107 return -EPROTONOSUPPORT;
108 case -NFS4ERR_ACCESS: 108 case -NFS4ERR_ACCESS:
109 return -EACCES; 109 return -EACCES;
110 case -NFS4ERR_FILE_OPEN:
111 return -EBUSY;
110 default: 112 default:
111 dprintk("%s could not handle NFSv4 error %d\n", 113 dprintk("%s could not handle NFSv4 error %d\n",
112 __func__, -err); 114 __func__, -err);
@@ -295,19 +297,30 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
295 } 297 }
296 if (state == NULL) 298 if (state == NULL)
297 break; 299 break;
298 nfs4_schedule_stateid_recovery(server, state); 300 ret = nfs4_schedule_stateid_recovery(server, state);
301 if (ret < 0)
302 break;
299 goto wait_on_recovery; 303 goto wait_on_recovery;
300 case -NFS4ERR_DELEG_REVOKED: 304 case -NFS4ERR_DELEG_REVOKED:
301 case -NFS4ERR_ADMIN_REVOKED: 305 case -NFS4ERR_ADMIN_REVOKED:
302 case -NFS4ERR_BAD_STATEID: 306 case -NFS4ERR_BAD_STATEID:
307 if (inode != NULL && nfs4_have_delegation(inode, FMODE_READ)) {
308 nfs_remove_bad_delegation(inode);
309 exception->retry = 1;
310 break;
311 }
303 if (state == NULL) 312 if (state == NULL)
304 break; 313 break;
305 nfs_remove_bad_delegation(state->inode); 314 ret = nfs4_schedule_stateid_recovery(server, state);
306 nfs4_schedule_stateid_recovery(server, state); 315 if (ret < 0)
316 break;
307 goto wait_on_recovery; 317 goto wait_on_recovery;
308 case -NFS4ERR_EXPIRED: 318 case -NFS4ERR_EXPIRED:
309 if (state != NULL) 319 if (state != NULL) {
310 nfs4_schedule_stateid_recovery(server, state); 320 ret = nfs4_schedule_stateid_recovery(server, state);
321 if (ret < 0)
322 break;
323 }
311 case -NFS4ERR_STALE_STATEID: 324 case -NFS4ERR_STALE_STATEID:
312 case -NFS4ERR_STALE_CLIENTID: 325 case -NFS4ERR_STALE_CLIENTID:
313 nfs4_schedule_lease_recovery(clp); 326 nfs4_schedule_lease_recovery(clp);
@@ -760,6 +773,35 @@ struct nfs4_opendata {
760 int cancelled; 773 int cancelled;
761}; 774};
762 775
776static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server,
777 int err, struct nfs4_exception *exception)
778{
779 if (err != -EINVAL)
780 return false;
781 if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1))
782 return false;
783 server->caps &= ~NFS_CAP_ATOMIC_OPEN_V1;
784 exception->retry = 1;
785 return true;
786}
787
788static enum open_claim_type4
789nfs4_map_atomic_open_claim(struct nfs_server *server,
790 enum open_claim_type4 claim)
791{
792 if (server->caps & NFS_CAP_ATOMIC_OPEN_V1)
793 return claim;
794 switch (claim) {
795 default:
796 return claim;
797 case NFS4_OPEN_CLAIM_FH:
798 return NFS4_OPEN_CLAIM_NULL;
799 case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
800 return NFS4_OPEN_CLAIM_DELEGATE_CUR;
801 case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
802 return NFS4_OPEN_CLAIM_DELEGATE_PREV;
803 }
804}
763 805
764static void nfs4_init_opendata_res(struct nfs4_opendata *p) 806static void nfs4_init_opendata_res(struct nfs4_opendata *p)
765{ 807{
@@ -775,6 +817,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
775static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, 817static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
776 struct nfs4_state_owner *sp, fmode_t fmode, int flags, 818 struct nfs4_state_owner *sp, fmode_t fmode, int flags,
777 const struct iattr *attrs, 819 const struct iattr *attrs,
820 enum open_claim_type4 claim,
778 gfp_t gfp_mask) 821 gfp_t gfp_mask)
779{ 822{
780 struct dentry *parent = dget_parent(dentry); 823 struct dentry *parent = dget_parent(dentry);
@@ -793,7 +836,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
793 p->dir = parent; 836 p->dir = parent;
794 p->owner = sp; 837 p->owner = sp;
795 atomic_inc(&sp->so_count); 838 atomic_inc(&sp->so_count);
796 p->o_arg.fh = NFS_FH(dir);
797 p->o_arg.open_flags = flags; 839 p->o_arg.open_flags = flags;
798 p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); 840 p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
799 /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS 841 /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS
@@ -811,7 +853,19 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
811 p->o_arg.server = server; 853 p->o_arg.server = server;
812 p->o_arg.bitmask = server->attr_bitmask; 854 p->o_arg.bitmask = server->attr_bitmask;
813 p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; 855 p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0];
814 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; 856 p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim);
857 switch (p->o_arg.claim) {
858 case NFS4_OPEN_CLAIM_NULL:
859 case NFS4_OPEN_CLAIM_DELEGATE_CUR:
860 case NFS4_OPEN_CLAIM_DELEGATE_PREV:
861 p->o_arg.fh = NFS_FH(dir);
862 break;
863 case NFS4_OPEN_CLAIM_PREVIOUS:
864 case NFS4_OPEN_CLAIM_FH:
865 case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
866 case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
867 p->o_arg.fh = NFS_FH(dentry->d_inode);
868 }
815 if (attrs != NULL && attrs->ia_valid != 0) { 869 if (attrs != NULL && attrs->ia_valid != 0) {
816 __be32 verf[2]; 870 __be32 verf[2];
817 871
@@ -924,6 +978,7 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *
924 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) 978 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
925 nfs4_stateid_copy(&state->stateid, stateid); 979 nfs4_stateid_copy(&state->stateid, stateid);
926 nfs4_stateid_copy(&state->open_stateid, stateid); 980 nfs4_stateid_copy(&state->open_stateid, stateid);
981 set_bit(NFS_OPEN_STATE, &state->flags);
927 switch (fmode) { 982 switch (fmode) {
928 case FMODE_READ: 983 case FMODE_READ:
929 set_bit(NFS_O_RDONLY_STATE, &state->flags); 984 set_bit(NFS_O_RDONLY_STATE, &state->flags);
@@ -1193,11 +1248,13 @@ static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *
1193 return ERR_PTR(-ENOENT); 1248 return ERR_PTR(-ENOENT);
1194} 1249}
1195 1250
1196static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx, struct nfs4_state *state) 1251static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx,
1252 struct nfs4_state *state, enum open_claim_type4 claim)
1197{ 1253{
1198 struct nfs4_opendata *opendata; 1254 struct nfs4_opendata *opendata;
1199 1255
1200 opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, NULL, GFP_NOFS); 1256 opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0,
1257 NULL, claim, GFP_NOFS);
1201 if (opendata == NULL) 1258 if (opendata == NULL)
1202 return ERR_PTR(-ENOMEM); 1259 return ERR_PTR(-ENOMEM);
1203 opendata->state = state; 1260 opendata->state = state;
@@ -1233,6 +1290,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
1233 1290
1234 /* memory barrier prior to reading state->n_* */ 1291 /* memory barrier prior to reading state->n_* */
1235 clear_bit(NFS_DELEGATED_STATE, &state->flags); 1292 clear_bit(NFS_DELEGATED_STATE, &state->flags);
1293 clear_bit(NFS_OPEN_STATE, &state->flags);
1236 smp_rmb(); 1294 smp_rmb();
1237 if (state->n_rdwr != 0) { 1295 if (state->n_rdwr != 0) {
1238 clear_bit(NFS_O_RDWR_STATE, &state->flags); 1296 clear_bit(NFS_O_RDWR_STATE, &state->flags);
@@ -1283,11 +1341,10 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
1283 fmode_t delegation_type = 0; 1341 fmode_t delegation_type = 0;
1284 int status; 1342 int status;
1285 1343
1286 opendata = nfs4_open_recoverdata_alloc(ctx, state); 1344 opendata = nfs4_open_recoverdata_alloc(ctx, state,
1345 NFS4_OPEN_CLAIM_PREVIOUS);
1287 if (IS_ERR(opendata)) 1346 if (IS_ERR(opendata))
1288 return PTR_ERR(opendata); 1347 return PTR_ERR(opendata);
1289 opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS;
1290 opendata->o_arg.fh = NFS_FH(state->inode);
1291 rcu_read_lock(); 1348 rcu_read_lock();
1292 delegation = rcu_dereference(NFS_I(state->inode)->delegation); 1349 delegation = rcu_dereference(NFS_I(state->inode)->delegation);
1293 if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0) 1350 if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0)
@@ -1306,6 +1363,8 @@ static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
1306 int err; 1363 int err;
1307 do { 1364 do {
1308 err = _nfs4_do_open_reclaim(ctx, state); 1365 err = _nfs4_do_open_reclaim(ctx, state);
1366 if (nfs4_clear_cap_atomic_open_v1(server, err, &exception))
1367 continue;
1309 if (err != -NFS4ERR_DELAY) 1368 if (err != -NFS4ERR_DELAY)
1310 break; 1369 break;
1311 nfs4_handle_exception(server, err, &exception); 1370 nfs4_handle_exception(server, err, &exception);
@@ -1320,71 +1379,72 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta
1320 1379
1321 ctx = nfs4_state_find_open_context(state); 1380 ctx = nfs4_state_find_open_context(state);
1322 if (IS_ERR(ctx)) 1381 if (IS_ERR(ctx))
1323 return PTR_ERR(ctx); 1382 return -EAGAIN;
1324 ret = nfs4_do_open_reclaim(ctx, state); 1383 ret = nfs4_do_open_reclaim(ctx, state);
1325 put_nfs_open_context(ctx); 1384 put_nfs_open_context(ctx);
1326 return ret; 1385 return ret;
1327} 1386}
1328 1387
1329static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) 1388static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct nfs4_state *state, const nfs4_stateid *stateid, int err)
1330{ 1389{
1331 struct nfs4_opendata *opendata; 1390 switch (err) {
1332 int ret; 1391 default:
1333 1392 printk(KERN_ERR "NFS: %s: unhandled error "
1334 opendata = nfs4_open_recoverdata_alloc(ctx, state); 1393 "%d.\n", __func__, err);
1335 if (IS_ERR(opendata)) 1394 case 0:
1336 return PTR_ERR(opendata); 1395 case -ENOENT:
1337 opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; 1396 case -ESTALE:
1338 nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid); 1397 break;
1339 ret = nfs4_open_recover(opendata, state); 1398 case -NFS4ERR_BADSESSION:
1340 nfs4_opendata_put(opendata); 1399 case -NFS4ERR_BADSLOT:
1341 return ret; 1400 case -NFS4ERR_BAD_HIGH_SLOT:
1401 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1402 case -NFS4ERR_DEADSESSION:
1403 set_bit(NFS_DELEGATED_STATE, &state->flags);
1404 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
1405 return -EAGAIN;
1406 case -NFS4ERR_STALE_CLIENTID:
1407 case -NFS4ERR_STALE_STATEID:
1408 set_bit(NFS_DELEGATED_STATE, &state->flags);
1409 case -NFS4ERR_EXPIRED:
1410 /* Don't recall a delegation if it was lost */
1411 nfs4_schedule_lease_recovery(server->nfs_client);
1412 return -EAGAIN;
1413 case -NFS4ERR_DELEG_REVOKED:
1414 case -NFS4ERR_ADMIN_REVOKED:
1415 case -NFS4ERR_BAD_STATEID:
1416 case -NFS4ERR_OPENMODE:
1417 nfs_inode_find_state_and_recover(state->inode,
1418 stateid);
1419 nfs4_schedule_stateid_recovery(server, state);
1420 return 0;
1421 case -NFS4ERR_DELAY:
1422 case -NFS4ERR_GRACE:
1423 set_bit(NFS_DELEGATED_STATE, &state->flags);
1424 ssleep(1);
1425 return -EAGAIN;
1426 case -ENOMEM:
1427 case -NFS4ERR_DENIED:
1428 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
1429 return 0;
1430 }
1431 return err;
1342} 1432}
1343 1433
1344int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) 1434int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
1345{ 1435{
1346 struct nfs4_exception exception = { };
1347 struct nfs_server *server = NFS_SERVER(state->inode); 1436 struct nfs_server *server = NFS_SERVER(state->inode);
1437 struct nfs4_opendata *opendata;
1348 int err; 1438 int err;
1349 do { 1439
1350 err = _nfs4_open_delegation_recall(ctx, state, stateid); 1440 opendata = nfs4_open_recoverdata_alloc(ctx, state,
1351 switch (err) { 1441 NFS4_OPEN_CLAIM_DELEG_CUR_FH);
1352 case 0: 1442 if (IS_ERR(opendata))
1353 case -ENOENT: 1443 return PTR_ERR(opendata);
1354 case -ESTALE: 1444 nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid);
1355 goto out; 1445 err = nfs4_open_recover(opendata, state);
1356 case -NFS4ERR_BADSESSION: 1446 nfs4_opendata_put(opendata);
1357 case -NFS4ERR_BADSLOT: 1447 return nfs4_handle_delegation_recall_error(server, state, stateid, err);
1358 case -NFS4ERR_BAD_HIGH_SLOT:
1359 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1360 case -NFS4ERR_DEADSESSION:
1361 set_bit(NFS_DELEGATED_STATE, &state->flags);
1362 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
1363 err = -EAGAIN;
1364 goto out;
1365 case -NFS4ERR_STALE_CLIENTID:
1366 case -NFS4ERR_STALE_STATEID:
1367 set_bit(NFS_DELEGATED_STATE, &state->flags);
1368 case -NFS4ERR_EXPIRED:
1369 /* Don't recall a delegation if it was lost */
1370 nfs4_schedule_lease_recovery(server->nfs_client);
1371 err = -EAGAIN;
1372 goto out;
1373 case -NFS4ERR_DELEG_REVOKED:
1374 case -NFS4ERR_ADMIN_REVOKED:
1375 case -NFS4ERR_BAD_STATEID:
1376 nfs_inode_find_state_and_recover(state->inode,
1377 stateid);
1378 nfs4_schedule_stateid_recovery(server, state);
1379 case -ENOMEM:
1380 err = 0;
1381 goto out;
1382 }
1383 set_bit(NFS_DELEGATED_STATE, &state->flags);
1384 err = nfs4_handle_exception(server, err, &exception);
1385 } while (exception.retry);
1386out:
1387 return err;
1388} 1448}
1389 1449
1390static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) 1450static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
@@ -1467,6 +1527,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1467{ 1527{
1468 struct nfs4_opendata *data = calldata; 1528 struct nfs4_opendata *data = calldata;
1469 struct nfs4_state_owner *sp = data->owner; 1529 struct nfs4_state_owner *sp = data->owner;
1530 struct nfs_client *clp = sp->so_server->nfs_client;
1470 1531
1471 if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) 1532 if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
1472 goto out_wait; 1533 goto out_wait;
@@ -1487,7 +1548,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1487 rcu_read_unlock(); 1548 rcu_read_unlock();
1488 } 1549 }
1489 /* Update client id. */ 1550 /* Update client id. */
1490 data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; 1551 data->o_arg.clientid = clp->cl_clientid;
1491 if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { 1552 if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) {
1492 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; 1553 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
1493 data->o_arg.open_bitmap = &nfs4_open_noattr_bitmap[0]; 1554 data->o_arg.open_bitmap = &nfs4_open_noattr_bitmap[0];
@@ -1499,6 +1560,16 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1499 &data->o_res.seq_res, 1560 &data->o_res.seq_res,
1500 task) != 0) 1561 task) != 0)
1501 nfs_release_seqid(data->o_arg.seqid); 1562 nfs_release_seqid(data->o_arg.seqid);
1563
1564 /* Set the create mode (note dependency on the session type) */
1565 data->o_arg.createmode = NFS4_CREATE_UNCHECKED;
1566 if (data->o_arg.open_flags & O_EXCL) {
1567 data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE;
1568 if (nfs4_has_persistent_session(clp))
1569 data->o_arg.createmode = NFS4_CREATE_GUARDED;
1570 else if (clp->cl_mvops->minor_version > 0)
1571 data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE4_1;
1572 }
1502 return; 1573 return;
1503unlock_no_action: 1574unlock_no_action:
1504 rcu_read_unlock(); 1575 rcu_read_unlock();
@@ -1720,7 +1791,8 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s
1720 struct nfs4_opendata *opendata; 1791 struct nfs4_opendata *opendata;
1721 int ret; 1792 int ret;
1722 1793
1723 opendata = nfs4_open_recoverdata_alloc(ctx, state); 1794 opendata = nfs4_open_recoverdata_alloc(ctx, state,
1795 NFS4_OPEN_CLAIM_FH);
1724 if (IS_ERR(opendata)) 1796 if (IS_ERR(opendata))
1725 return PTR_ERR(opendata); 1797 return PTR_ERR(opendata);
1726 ret = nfs4_open_recover(opendata, state); 1798 ret = nfs4_open_recover(opendata, state);
@@ -1738,6 +1810,8 @@ static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state
1738 1810
1739 do { 1811 do {
1740 err = _nfs4_open_expired(ctx, state); 1812 err = _nfs4_open_expired(ctx, state);
1813 if (nfs4_clear_cap_atomic_open_v1(server, err, &exception))
1814 continue;
1741 switch (err) { 1815 switch (err) {
1742 default: 1816 default:
1743 goto out; 1817 goto out;
@@ -1758,7 +1832,7 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
1758 1832
1759 ctx = nfs4_state_find_open_context(state); 1833 ctx = nfs4_state_find_open_context(state);
1760 if (IS_ERR(ctx)) 1834 if (IS_ERR(ctx))
1761 return PTR_ERR(ctx); 1835 return -EAGAIN;
1762 ret = nfs4_do_open_expired(ctx, state); 1836 ret = nfs4_do_open_expired(ctx, state);
1763 put_nfs_open_context(ctx); 1837 put_nfs_open_context(ctx);
1764 return ret; 1838 return ret;
@@ -1820,6 +1894,7 @@ static int nfs41_check_open_stateid(struct nfs4_state *state)
1820 clear_bit(NFS_O_RDONLY_STATE, &state->flags); 1894 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1821 clear_bit(NFS_O_WRONLY_STATE, &state->flags); 1895 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1822 clear_bit(NFS_O_RDWR_STATE, &state->flags); 1896 clear_bit(NFS_O_RDWR_STATE, &state->flags);
1897 clear_bit(NFS_OPEN_STATE, &state->flags);
1823 } 1898 }
1824 return status; 1899 return status;
1825} 1900}
@@ -1880,10 +1955,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
1880 if (ret != 0) 1955 if (ret != 0)
1881 goto out; 1956 goto out;
1882 1957
1883 if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) { 1958 if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
1884 nfs4_schedule_stateid_recovery(server, state); 1959 nfs4_schedule_stateid_recovery(server, state);
1885 nfs4_wait_clnt_recover(server->nfs_client);
1886 }
1887 *res = state; 1960 *res = state;
1888out: 1961out:
1889 return ret; 1962 return ret;
@@ -1905,6 +1978,7 @@ static int _nfs4_do_open(struct inode *dir,
1905 struct nfs4_state *state = NULL; 1978 struct nfs4_state *state = NULL;
1906 struct nfs_server *server = NFS_SERVER(dir); 1979 struct nfs_server *server = NFS_SERVER(dir);
1907 struct nfs4_opendata *opendata; 1980 struct nfs4_opendata *opendata;
1981 enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL;
1908 int status; 1982 int status;
1909 1983
1910 /* Protect against reboot recovery conflicts */ 1984 /* Protect against reboot recovery conflicts */
@@ -1920,7 +1994,10 @@ static int _nfs4_do_open(struct inode *dir,
1920 if (dentry->d_inode != NULL) 1994 if (dentry->d_inode != NULL)
1921 nfs4_return_incompatible_delegation(dentry->d_inode, fmode); 1995 nfs4_return_incompatible_delegation(dentry->d_inode, fmode);
1922 status = -ENOMEM; 1996 status = -ENOMEM;
1923 opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, GFP_KERNEL); 1997 if (dentry->d_inode)
1998 claim = NFS4_OPEN_CLAIM_FH;
1999 opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr,
2000 claim, GFP_KERNEL);
1924 if (opendata == NULL) 2001 if (opendata == NULL)
1925 goto err_put_state_owner; 2002 goto err_put_state_owner;
1926 2003
@@ -1937,7 +2014,8 @@ static int _nfs4_do_open(struct inode *dir,
1937 if (status != 0) 2014 if (status != 0)
1938 goto err_opendata_put; 2015 goto err_opendata_put;
1939 2016
1940 if (opendata->o_arg.open_flags & O_EXCL) { 2017 if ((opendata->o_arg.open_flags & O_EXCL) &&
2018 (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) {
1941 nfs4_exclusive_attrset(opendata, sattr); 2019 nfs4_exclusive_attrset(opendata, sattr);
1942 2020
1943 nfs_fattr_init(opendata->o_res.f_attr); 2021 nfs_fattr_init(opendata->o_res.f_attr);
@@ -1978,6 +2056,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
1978 struct rpc_cred *cred, 2056 struct rpc_cred *cred,
1979 struct nfs4_threshold **ctx_th) 2057 struct nfs4_threshold **ctx_th)
1980{ 2058{
2059 struct nfs_server *server = NFS_SERVER(dir);
1981 struct nfs4_exception exception = { }; 2060 struct nfs4_exception exception = { };
1982 struct nfs4_state *res; 2061 struct nfs4_state *res;
1983 int status; 2062 int status;
@@ -2021,7 +2100,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
2021 exception.retry = 1; 2100 exception.retry = 1;
2022 continue; 2101 continue;
2023 } 2102 }
2024 res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), 2103 if (nfs4_clear_cap_atomic_open_v1(server, status, &exception))
2104 continue;
2105 res = ERR_PTR(nfs4_handle_exception(server,
2025 status, &exception)); 2106 status, &exception));
2026 } while (exception.retry); 2107 } while (exception.retry);
2027 return res; 2108 return res;
@@ -2053,7 +2134,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
2053 2134
2054 nfs_fattr_init(fattr); 2135 nfs_fattr_init(fattr);
2055 2136
2056 if (state != NULL) { 2137 if (state != NULL && nfs4_valid_open_stateid(state)) {
2057 struct nfs_lockowner lockowner = { 2138 struct nfs_lockowner lockowner = {
2058 .l_owner = current->files, 2139 .l_owner = current->files,
2059 .l_pid = current->tgid, 2140 .l_pid = current->tgid,
@@ -2129,11 +2210,19 @@ static void nfs4_close_clear_stateid_flags(struct nfs4_state *state,
2129 fmode_t fmode) 2210 fmode_t fmode)
2130{ 2211{
2131 spin_lock(&state->owner->so_lock); 2212 spin_lock(&state->owner->so_lock);
2132 if (!(fmode & FMODE_READ)) 2213 clear_bit(NFS_O_RDWR_STATE, &state->flags);
2214 switch (fmode & (FMODE_READ|FMODE_WRITE)) {
2215 case FMODE_WRITE:
2133 clear_bit(NFS_O_RDONLY_STATE, &state->flags); 2216 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
2134 if (!(fmode & FMODE_WRITE)) 2217 break;
2218 case FMODE_READ:
2135 clear_bit(NFS_O_WRONLY_STATE, &state->flags); 2219 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
2136 clear_bit(NFS_O_RDWR_STATE, &state->flags); 2220 break;
2221 case 0:
2222 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
2223 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
2224 clear_bit(NFS_OPEN_STATE, &state->flags);
2225 }
2137 spin_unlock(&state->owner->so_lock); 2226 spin_unlock(&state->owner->so_lock);
2138} 2227}
2139 2228
@@ -2201,6 +2290,8 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2201 calldata->arg.fmode &= ~FMODE_WRITE; 2290 calldata->arg.fmode &= ~FMODE_WRITE;
2202 } 2291 }
2203 } 2292 }
2293 if (!nfs4_valid_open_stateid(state))
2294 call_close = 0;
2204 spin_unlock(&state->owner->so_lock); 2295 spin_unlock(&state->owner->so_lock);
2205 2296
2206 if (!call_close) { 2297 if (!call_close) {
@@ -2211,8 +2302,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2211 if (calldata->arg.fmode == 0) { 2302 if (calldata->arg.fmode == 0) {
2212 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; 2303 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
2213 if (calldata->roc && 2304 if (calldata->roc &&
2214 pnfs_roc_drain(inode, &calldata->roc_barrier, task)) 2305 pnfs_roc_drain(inode, &calldata->roc_barrier, task)) {
2306 nfs_release_seqid(calldata->arg.seqid);
2215 goto out_wait; 2307 goto out_wait;
2308 }
2216 } 2309 }
2217 2310
2218 nfs_fattr_init(calldata->res.fattr); 2311 nfs_fattr_init(calldata->res.fattr);
@@ -2632,7 +2725,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
2632 int status; 2725 int status;
2633 2726
2634 if (pnfs_ld_layoutret_on_setattr(inode)) 2727 if (pnfs_ld_layoutret_on_setattr(inode))
2635 pnfs_return_layout(inode); 2728 pnfs_commit_and_return_layout(inode);
2636 2729
2637 nfs_fattr_init(fattr); 2730 nfs_fattr_init(fattr);
2638 2731
@@ -3445,6 +3538,46 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
3445 return err; 3538 return err;
3446} 3539}
3447 3540
3541int nfs4_set_rw_stateid(nfs4_stateid *stateid,
3542 const struct nfs_open_context *ctx,
3543 const struct nfs_lock_context *l_ctx,
3544 fmode_t fmode)
3545{
3546 const struct nfs_lockowner *lockowner = NULL;
3547
3548 if (l_ctx != NULL)
3549 lockowner = &l_ctx->lockowner;
3550 return nfs4_select_rw_stateid(stateid, ctx->state, fmode, lockowner);
3551}
3552EXPORT_SYMBOL_GPL(nfs4_set_rw_stateid);
3553
3554static bool nfs4_stateid_is_current(nfs4_stateid *stateid,
3555 const struct nfs_open_context *ctx,
3556 const struct nfs_lock_context *l_ctx,
3557 fmode_t fmode)
3558{
3559 nfs4_stateid current_stateid;
3560
3561 if (nfs4_set_rw_stateid(&current_stateid, ctx, l_ctx, fmode))
3562 return false;
3563 return nfs4_stateid_match(stateid, &current_stateid);
3564}
3565
3566static bool nfs4_error_stateid_expired(int err)
3567{
3568 switch (err) {
3569 case -NFS4ERR_DELEG_REVOKED:
3570 case -NFS4ERR_ADMIN_REVOKED:
3571 case -NFS4ERR_BAD_STATEID:
3572 case -NFS4ERR_STALE_STATEID:
3573 case -NFS4ERR_OLD_STATEID:
3574 case -NFS4ERR_OPENMODE:
3575 case -NFS4ERR_EXPIRED:
3576 return true;
3577 }
3578 return false;
3579}
3580
3448void __nfs4_read_done_cb(struct nfs_read_data *data) 3581void __nfs4_read_done_cb(struct nfs_read_data *data)
3449{ 3582{
3450 nfs_invalidate_atime(data->header->inode); 3583 nfs_invalidate_atime(data->header->inode);
@@ -3465,6 +3598,20 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
3465 return 0; 3598 return 0;
3466} 3599}
3467 3600
3601static bool nfs4_read_stateid_changed(struct rpc_task *task,
3602 struct nfs_readargs *args)
3603{
3604
3605 if (!nfs4_error_stateid_expired(task->tk_status) ||
3606 nfs4_stateid_is_current(&args->stateid,
3607 args->context,
3608 args->lock_context,
3609 FMODE_READ))
3610 return false;
3611 rpc_restart_call_prepare(task);
3612 return true;
3613}
3614
3468static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) 3615static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
3469{ 3616{
3470 3617
@@ -3472,7 +3619,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
3472 3619
3473 if (!nfs4_sequence_done(task, &data->res.seq_res)) 3620 if (!nfs4_sequence_done(task, &data->res.seq_res))
3474 return -EAGAIN; 3621 return -EAGAIN;
3475 3622 if (nfs4_read_stateid_changed(task, &data->args))
3623 return -EAGAIN;
3476 return data->read_done_cb ? data->read_done_cb(task, data) : 3624 return data->read_done_cb ? data->read_done_cb(task, data) :
3477 nfs4_read_done_cb(task, data); 3625 nfs4_read_done_cb(task, data);
3478} 3626}
@@ -3487,10 +3635,13 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message
3487 3635
3488static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) 3636static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
3489{ 3637{
3490 nfs4_setup_sequence(NFS_SERVER(data->header->inode), 3638 if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
3491 &data->args.seq_args, 3639 &data->args.seq_args,
3492 &data->res.seq_res, 3640 &data->res.seq_res,
3493 task); 3641 task))
3642 return;
3643 nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
3644 data->args.lock_context, FMODE_READ);
3494} 3645}
3495 3646
3496static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) 3647static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
@@ -3508,10 +3659,26 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data
3508 return 0; 3659 return 0;
3509} 3660}
3510 3661
3662static bool nfs4_write_stateid_changed(struct rpc_task *task,
3663 struct nfs_writeargs *args)
3664{
3665
3666 if (!nfs4_error_stateid_expired(task->tk_status) ||
3667 nfs4_stateid_is_current(&args->stateid,
3668 args->context,
3669 args->lock_context,
3670 FMODE_WRITE))
3671 return false;
3672 rpc_restart_call_prepare(task);
3673 return true;
3674}
3675
3511static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) 3676static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
3512{ 3677{
3513 if (!nfs4_sequence_done(task, &data->res.seq_res)) 3678 if (!nfs4_sequence_done(task, &data->res.seq_res))
3514 return -EAGAIN; 3679 return -EAGAIN;
3680 if (nfs4_write_stateid_changed(task, &data->args))
3681 return -EAGAIN;
3515 return data->write_done_cb ? data->write_done_cb(task, data) : 3682 return data->write_done_cb ? data->write_done_cb(task, data) :
3516 nfs4_write_done_cb(task, data); 3683 nfs4_write_done_cb(task, data);
3517} 3684}
@@ -3551,10 +3718,13 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
3551 3718
3552static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) 3719static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
3553{ 3720{
3554 nfs4_setup_sequence(NFS_SERVER(data->header->inode), 3721 if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
3555 &data->args.seq_args, 3722 &data->args.seq_args,
3556 &data->res.seq_res, 3723 &data->res.seq_res,
3557 task); 3724 task))
3725 return;
3726 nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
3727 data->args.lock_context, FMODE_WRITE);
3558} 3728}
3559 3729
3560static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) 3730static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
@@ -3656,7 +3826,7 @@ static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred,
3656 return -ENOMEM; 3826 return -ENOMEM;
3657 data->client = clp; 3827 data->client = clp;
3658 data->timestamp = jiffies; 3828 data->timestamp = jiffies;
3659 return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, 3829 return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT,
3660 &nfs4_renew_ops, data); 3830 &nfs4_renew_ops, data);
3661} 3831}
3662 3832
@@ -3670,7 +3840,7 @@ static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
3670 unsigned long now = jiffies; 3840 unsigned long now = jiffies;
3671 int status; 3841 int status;
3672 3842
3673 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); 3843 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
3674 if (status < 0) 3844 if (status < 0)
3675 return status; 3845 return status;
3676 do_renew_lease(clp, now); 3846 do_renew_lease(clp, now);
@@ -3980,11 +4150,14 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3980 case -NFS4ERR_OPENMODE: 4150 case -NFS4ERR_OPENMODE:
3981 if (state == NULL) 4151 if (state == NULL)
3982 break; 4152 break;
3983 nfs4_schedule_stateid_recovery(server, state); 4153 if (nfs4_schedule_stateid_recovery(server, state) < 0)
4154 goto stateid_invalid;
3984 goto wait_on_recovery; 4155 goto wait_on_recovery;
3985 case -NFS4ERR_EXPIRED: 4156 case -NFS4ERR_EXPIRED:
3986 if (state != NULL) 4157 if (state != NULL) {
3987 nfs4_schedule_stateid_recovery(server, state); 4158 if (nfs4_schedule_stateid_recovery(server, state) < 0)
4159 goto stateid_invalid;
4160 }
3988 case -NFS4ERR_STALE_STATEID: 4161 case -NFS4ERR_STALE_STATEID:
3989 case -NFS4ERR_STALE_CLIENTID: 4162 case -NFS4ERR_STALE_CLIENTID:
3990 nfs4_schedule_lease_recovery(clp); 4163 nfs4_schedule_lease_recovery(clp);
@@ -4016,6 +4189,9 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
4016 } 4189 }
4017 task->tk_status = nfs4_map_errors(task->tk_status); 4190 task->tk_status = nfs4_map_errors(task->tk_status);
4018 return 0; 4191 return 0;
4192stateid_invalid:
4193 task->tk_status = -EIO;
4194 return 0;
4019wait_on_recovery: 4195wait_on_recovery:
4020 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); 4196 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
4021 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) 4197 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
@@ -4627,17 +4803,23 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
4627 if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) { 4803 if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) {
4628 goto out_release_lock_seqid; 4804 goto out_release_lock_seqid;
4629 } 4805 }
4630 data->arg.open_stateid = &state->stateid; 4806 data->arg.open_stateid = &state->open_stateid;
4631 data->arg.new_lock_owner = 1; 4807 data->arg.new_lock_owner = 1;
4632 data->res.open_seqid = data->arg.open_seqid; 4808 data->res.open_seqid = data->arg.open_seqid;
4633 } else 4809 } else
4634 data->arg.new_lock_owner = 0; 4810 data->arg.new_lock_owner = 0;
4811 if (!nfs4_valid_open_stateid(state)) {
4812 data->rpc_status = -EBADF;
4813 task->tk_action = NULL;
4814 goto out_release_open_seqid;
4815 }
4635 data->timestamp = jiffies; 4816 data->timestamp = jiffies;
4636 if (nfs4_setup_sequence(data->server, 4817 if (nfs4_setup_sequence(data->server,
4637 &data->arg.seq_args, 4818 &data->arg.seq_args,
4638 &data->res.seq_res, 4819 &data->res.seq_res,
4639 task) == 0) 4820 task) == 0)
4640 return; 4821 return;
4822out_release_open_seqid:
4641 nfs_release_seqid(data->arg.open_seqid); 4823 nfs_release_seqid(data->arg.open_seqid);
4642out_release_lock_seqid: 4824out_release_lock_seqid:
4643 nfs_release_seqid(data->arg.lock_seqid); 4825 nfs_release_seqid(data->arg.lock_seqid);
@@ -4983,58 +5165,16 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
4983 return status; 5165 return status;
4984} 5166}
4985 5167
4986int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) 5168int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid)
4987{ 5169{
4988 struct nfs_server *server = NFS_SERVER(state->inode); 5170 struct nfs_server *server = NFS_SERVER(state->inode);
4989 struct nfs4_exception exception = { };
4990 int err; 5171 int err;
4991 5172
4992 err = nfs4_set_lock_state(state, fl); 5173 err = nfs4_set_lock_state(state, fl);
4993 if (err != 0) 5174 if (err != 0)
4994 goto out; 5175 return err;
4995 do { 5176 err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
4996 err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); 5177 return nfs4_handle_delegation_recall_error(server, state, stateid, err);
4997 switch (err) {
4998 default:
4999 printk(KERN_ERR "NFS: %s: unhandled error "
5000 "%d.\n", __func__, err);
5001 case 0:
5002 case -ESTALE:
5003 goto out;
5004 case -NFS4ERR_STALE_CLIENTID:
5005 case -NFS4ERR_STALE_STATEID:
5006 set_bit(NFS_DELEGATED_STATE, &state->flags);
5007 case -NFS4ERR_EXPIRED:
5008 nfs4_schedule_lease_recovery(server->nfs_client);
5009 err = -EAGAIN;
5010 goto out;
5011 case -NFS4ERR_BADSESSION:
5012 case -NFS4ERR_BADSLOT:
5013 case -NFS4ERR_BAD_HIGH_SLOT:
5014 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
5015 case -NFS4ERR_DEADSESSION:
5016 set_bit(NFS_DELEGATED_STATE, &state->flags);
5017 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
5018 err = -EAGAIN;
5019 goto out;
5020 case -NFS4ERR_DELEG_REVOKED:
5021 case -NFS4ERR_ADMIN_REVOKED:
5022 case -NFS4ERR_BAD_STATEID:
5023 case -NFS4ERR_OPENMODE:
5024 nfs4_schedule_stateid_recovery(server, state);
5025 err = 0;
5026 goto out;
5027 case -ENOMEM:
5028 case -NFS4ERR_DENIED:
5029 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
5030 err = 0;
5031 goto out;
5032 }
5033 set_bit(NFS_DELEGATED_STATE, &state->flags);
5034 err = nfs4_handle_exception(server, err, &exception);
5035 } while (exception.retry);
5036out:
5037 return err;
5038} 5178}
5039 5179
5040struct nfs_release_lockowner_data { 5180struct nfs_release_lockowner_data {
@@ -5848,7 +5988,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
5848 .rpc_client = clp->cl_rpcclient, 5988 .rpc_client = clp->cl_rpcclient,
5849 .rpc_message = &msg, 5989 .rpc_message = &msg,
5850 .callback_ops = &nfs41_sequence_ops, 5990 .callback_ops = &nfs41_sequence_ops,
5851 .flags = RPC_TASK_ASYNC | RPC_TASK_SOFT, 5991 .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
5852 }; 5992 };
5853 5993
5854 if (!atomic_inc_not_zero(&clp->cl_count)) 5994 if (!atomic_inc_not_zero(&clp->cl_count))
@@ -6416,22 +6556,8 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
6416static void nfs4_layoutcommit_release(void *calldata) 6556static void nfs4_layoutcommit_release(void *calldata)
6417{ 6557{
6418 struct nfs4_layoutcommit_data *data = calldata; 6558 struct nfs4_layoutcommit_data *data = calldata;
6419 struct pnfs_layout_segment *lseg, *tmp;
6420 unsigned long *bitlock = &NFS_I(data->args.inode)->flags;
6421 6559
6422 pnfs_cleanup_layoutcommit(data); 6560 pnfs_cleanup_layoutcommit(data);
6423 /* Matched by references in pnfs_set_layoutcommit */
6424 list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) {
6425 list_del_init(&lseg->pls_lc_list);
6426 if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
6427 &lseg->pls_flags))
6428 pnfs_put_lseg(lseg);
6429 }
6430
6431 clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
6432 smp_mb__after_clear_bit();
6433 wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
6434
6435 put_rpccred(data->cred); 6561 put_rpccred(data->cred);
6436 kfree(data); 6562 kfree(data);
6437} 6563}
@@ -6739,6 +6865,10 @@ static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
6739 6865
6740static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { 6866static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
6741 .minor_version = 0, 6867 .minor_version = 0,
6868 .init_caps = NFS_CAP_READDIRPLUS
6869 | NFS_CAP_ATOMIC_OPEN
6870 | NFS_CAP_CHANGE_ATTR
6871 | NFS_CAP_POSIX_LOCK,
6742 .call_sync = _nfs4_call_sync, 6872 .call_sync = _nfs4_call_sync,
6743 .match_stateid = nfs4_match_stateid, 6873 .match_stateid = nfs4_match_stateid,
6744 .find_root_sec = nfs4_find_root_sec, 6874 .find_root_sec = nfs4_find_root_sec,
@@ -6750,6 +6880,12 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
6750#if defined(CONFIG_NFS_V4_1) 6880#if defined(CONFIG_NFS_V4_1)
6751static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { 6881static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
6752 .minor_version = 1, 6882 .minor_version = 1,
6883 .init_caps = NFS_CAP_READDIRPLUS
6884 | NFS_CAP_ATOMIC_OPEN
6885 | NFS_CAP_CHANGE_ATTR
6886 | NFS_CAP_POSIX_LOCK
6887 | NFS_CAP_STATEID_NFSV41
6888 | NFS_CAP_ATOMIC_OPEN_V1,
6753 .call_sync = nfs4_call_sync_sequence, 6889 .call_sync = nfs4_call_sync_sequence,
6754 .match_stateid = nfs41_match_stateid, 6890 .match_stateid = nfs41_match_stateid,
6755 .find_root_sec = nfs41_find_root_sec, 6891 .find_root_sec = nfs41_find_root_sec,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 6ace365c6334..b7796950eceb 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -699,6 +699,8 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
699 list_for_each_entry(state, &nfsi->open_states, inode_states) { 699 list_for_each_entry(state, &nfsi->open_states, inode_states) {
700 if (state->owner != owner) 700 if (state->owner != owner)
701 continue; 701 continue;
702 if (!nfs4_valid_open_stateid(state))
703 continue;
702 if (atomic_inc_not_zero(&state->count)) 704 if (atomic_inc_not_zero(&state->count))
703 return state; 705 return state;
704 } 706 }
@@ -987,13 +989,14 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
987 return 0; 989 return 0;
988} 990}
989 991
990static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, 992static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
993 struct nfs4_state *state,
991 const struct nfs_lockowner *lockowner) 994 const struct nfs_lockowner *lockowner)
992{ 995{
993 struct nfs4_lock_state *lsp; 996 struct nfs4_lock_state *lsp;
994 fl_owner_t fl_owner; 997 fl_owner_t fl_owner;
995 pid_t fl_pid; 998 pid_t fl_pid;
996 bool ret = false; 999 int ret = -ENOENT;
997 1000
998 1001
999 if (lockowner == NULL) 1002 if (lockowner == NULL)
@@ -1008,7 +1011,10 @@ static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state,
1008 lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); 1011 lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
1009 if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { 1012 if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
1010 nfs4_stateid_copy(dst, &lsp->ls_stateid); 1013 nfs4_stateid_copy(dst, &lsp->ls_stateid);
1011 ret = true; 1014 ret = 0;
1015 smp_rmb();
1016 if (!list_empty(&lsp->ls_seqid.list))
1017 ret = -EWOULDBLOCK;
1012 } 1018 }
1013 spin_unlock(&state->state_lock); 1019 spin_unlock(&state->state_lock);
1014 nfs4_put_lock_state(lsp); 1020 nfs4_put_lock_state(lsp);
@@ -1016,28 +1022,44 @@ out:
1016 return ret; 1022 return ret;
1017} 1023}
1018 1024
1019static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) 1025static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
1020{ 1026{
1027 const nfs4_stateid *src;
1028 int ret;
1021 int seq; 1029 int seq;
1022 1030
1023 do { 1031 do {
1032 src = &zero_stateid;
1024 seq = read_seqbegin(&state->seqlock); 1033 seq = read_seqbegin(&state->seqlock);
1025 nfs4_stateid_copy(dst, &state->stateid); 1034 if (test_bit(NFS_OPEN_STATE, &state->flags))
1035 src = &state->open_stateid;
1036 nfs4_stateid_copy(dst, src);
1037 ret = 0;
1038 smp_rmb();
1039 if (!list_empty(&state->owner->so_seqid.list))
1040 ret = -EWOULDBLOCK;
1026 } while (read_seqretry(&state->seqlock, seq)); 1041 } while (read_seqretry(&state->seqlock, seq));
1042 return ret;
1027} 1043}
1028 1044
1029/* 1045/*
1030 * Byte-range lock aware utility to initialize the stateid of read/write 1046 * Byte-range lock aware utility to initialize the stateid of read/write
1031 * requests. 1047 * requests.
1032 */ 1048 */
1033void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, 1049int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state,
1034 fmode_t fmode, const struct nfs_lockowner *lockowner) 1050 fmode_t fmode, const struct nfs_lockowner *lockowner)
1035{ 1051{
1052 int ret = 0;
1036 if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) 1053 if (nfs4_copy_delegation_stateid(dst, state->inode, fmode))
1037 return; 1054 goto out;
1038 if (nfs4_copy_lock_stateid(dst, state, lockowner)) 1055 ret = nfs4_copy_lock_stateid(dst, state, lockowner);
1039 return; 1056 if (ret != -ENOENT)
1040 nfs4_copy_open_stateid(dst, state); 1057 goto out;
1058 ret = nfs4_copy_open_stateid(dst, state);
1059out:
1060 if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41))
1061 dst->seqid = 0;
1062 return ret;
1041} 1063}
1042 1064
1043struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask) 1065struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask)
@@ -1286,14 +1308,17 @@ static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_s
1286 return 1; 1308 return 1;
1287} 1309}
1288 1310
1289void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state) 1311int nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
1290{ 1312{
1291 struct nfs_client *clp = server->nfs_client; 1313 struct nfs_client *clp = server->nfs_client;
1292 1314
1315 if (!nfs4_valid_open_stateid(state))
1316 return -EBADF;
1293 nfs4_state_mark_reclaim_nograce(clp, state); 1317 nfs4_state_mark_reclaim_nograce(clp, state);
1294 dprintk("%s: scheduling stateid recovery for server %s\n", __func__, 1318 dprintk("%s: scheduling stateid recovery for server %s\n", __func__,
1295 clp->cl_hostname); 1319 clp->cl_hostname);
1296 nfs4_schedule_state_manager(clp); 1320 nfs4_schedule_state_manager(clp);
1321 return 0;
1297} 1322}
1298EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery); 1323EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery);
1299 1324
@@ -1323,6 +1348,27 @@ void nfs_inode_find_state_and_recover(struct inode *inode,
1323 nfs4_schedule_state_manager(clp); 1348 nfs4_schedule_state_manager(clp);
1324} 1349}
1325 1350
1351static void nfs4_state_mark_open_context_bad(struct nfs4_state *state)
1352{
1353 struct inode *inode = state->inode;
1354 struct nfs_inode *nfsi = NFS_I(inode);
1355 struct nfs_open_context *ctx;
1356
1357 spin_lock(&inode->i_lock);
1358 list_for_each_entry(ctx, &nfsi->open_files, list) {
1359 if (ctx->state != state)
1360 continue;
1361 set_bit(NFS_CONTEXT_BAD, &ctx->flags);
1362 }
1363 spin_unlock(&inode->i_lock);
1364}
1365
1366static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error)
1367{
1368 set_bit(NFS_STATE_RECOVERY_FAILED, &state->flags);
1369 nfs4_state_mark_open_context_bad(state);
1370}
1371
1326 1372
1327static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) 1373static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
1328{ 1374{
@@ -1398,6 +1444,8 @@ restart:
1398 list_for_each_entry(state, &sp->so_states, open_states) { 1444 list_for_each_entry(state, &sp->so_states, open_states) {
1399 if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) 1445 if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
1400 continue; 1446 continue;
1447 if (!nfs4_valid_open_stateid(state))
1448 continue;
1401 if (state->state == 0) 1449 if (state->state == 0)
1402 continue; 1450 continue;
1403 atomic_inc(&state->count); 1451 atomic_inc(&state->count);
@@ -1430,11 +1478,10 @@ restart:
1430 * Open state on this file cannot be recovered 1478 * Open state on this file cannot be recovered
1431 * All we can do is revert to using the zero stateid. 1479 * All we can do is revert to using the zero stateid.
1432 */ 1480 */
1433 memset(&state->stateid, 0, 1481 nfs4_state_mark_recovery_failed(state, status);
1434 sizeof(state->stateid));
1435 /* Mark the file as being 'closed' */
1436 state->state = 0;
1437 break; 1482 break;
1483 case -EAGAIN:
1484 ssleep(1);
1438 case -NFS4ERR_ADMIN_REVOKED: 1485 case -NFS4ERR_ADMIN_REVOKED:
1439 case -NFS4ERR_STALE_STATEID: 1486 case -NFS4ERR_STALE_STATEID:
1440 case -NFS4ERR_BAD_STATEID: 1487 case -NFS4ERR_BAD_STATEID:
@@ -1696,6 +1743,10 @@ static int nfs4_check_lease(struct nfs_client *clp)
1696 } 1743 }
1697 status = ops->renew_lease(clp, cred); 1744 status = ops->renew_lease(clp, cred);
1698 put_rpccred(cred); 1745 put_rpccred(cred);
1746 if (status == -ETIMEDOUT) {
1747 set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1748 return 0;
1749 }
1699out: 1750out:
1700 return nfs4_recovery_handle_error(clp, status); 1751 return nfs4_recovery_handle_error(clp, status);
1701} 1752}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e3edda554ac7..c2cbf0d90a31 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1058,8 +1058,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
1058 if (iap->ia_valid & ATTR_ATIME_SET) { 1058 if (iap->ia_valid & ATTR_ATIME_SET) {
1059 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; 1059 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
1060 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); 1060 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
1061 *p++ = cpu_to_be32(0); 1061 p = xdr_encode_hyper(p, (s64)iap->ia_atime.tv_sec);
1062 *p++ = cpu_to_be32(iap->ia_atime.tv_sec);
1063 *p++ = cpu_to_be32(iap->ia_atime.tv_nsec); 1062 *p++ = cpu_to_be32(iap->ia_atime.tv_nsec);
1064 } 1063 }
1065 else if (iap->ia_valid & ATTR_ATIME) { 1064 else if (iap->ia_valid & ATTR_ATIME) {
@@ -1069,8 +1068,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
1069 if (iap->ia_valid & ATTR_MTIME_SET) { 1068 if (iap->ia_valid & ATTR_MTIME_SET) {
1070 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; 1069 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
1071 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); 1070 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
1072 *p++ = cpu_to_be32(0); 1071 p = xdr_encode_hyper(p, (s64)iap->ia_mtime.tv_sec);
1073 *p++ = cpu_to_be32(iap->ia_mtime.tv_sec);
1074 *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec); 1072 *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
1075 } 1073 }
1076 else if (iap->ia_valid & ATTR_MTIME) { 1074 else if (iap->ia_valid & ATTR_MTIME) {
@@ -1366,33 +1364,28 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
1366 1364
1367static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) 1365static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
1368{ 1366{
1367 struct iattr dummy;
1369 __be32 *p; 1368 __be32 *p;
1370 struct nfs_client *clp;
1371 1369
1372 p = reserve_space(xdr, 4); 1370 p = reserve_space(xdr, 4);
1373 switch(arg->open_flags & O_EXCL) { 1371 switch(arg->createmode) {
1374 case 0: 1372 case NFS4_CREATE_UNCHECKED:
1375 *p = cpu_to_be32(NFS4_CREATE_UNCHECKED); 1373 *p = cpu_to_be32(NFS4_CREATE_UNCHECKED);
1376 encode_attrs(xdr, arg->u.attrs, arg->server); 1374 encode_attrs(xdr, arg->u.attrs, arg->server);
1377 break; 1375 break;
1378 default: 1376 case NFS4_CREATE_GUARDED:
1379 clp = arg->server->nfs_client; 1377 *p = cpu_to_be32(NFS4_CREATE_GUARDED);
1380 if (clp->cl_mvops->minor_version > 0) { 1378 encode_attrs(xdr, arg->u.attrs, arg->server);
1381 if (nfs4_has_persistent_session(clp)) { 1379 break;
1382 *p = cpu_to_be32(NFS4_CREATE_GUARDED); 1380 case NFS4_CREATE_EXCLUSIVE:
1383 encode_attrs(xdr, arg->u.attrs, arg->server); 1381 *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE);
1384 } else { 1382 encode_nfs4_verifier(xdr, &arg->u.verifier);
1385 struct iattr dummy; 1383 break;
1386 1384 case NFS4_CREATE_EXCLUSIVE4_1:
1387 *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1); 1385 *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1);
1388 encode_nfs4_verifier(xdr, &arg->u.verifier); 1386 encode_nfs4_verifier(xdr, &arg->u.verifier);
1389 dummy.ia_valid = 0; 1387 dummy.ia_valid = 0;
1390 encode_attrs(xdr, &dummy, arg->server); 1388 encode_attrs(xdr, &dummy, arg->server);
1391 }
1392 } else {
1393 *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE);
1394 encode_nfs4_verifier(xdr, &arg->u.verifier);
1395 }
1396 } 1389 }
1397} 1390}
1398 1391
@@ -1459,6 +1452,23 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc
1459 encode_string(xdr, name->len, name->name); 1452 encode_string(xdr, name->len, name->name);
1460} 1453}
1461 1454
1455static inline void encode_claim_fh(struct xdr_stream *xdr)
1456{
1457 __be32 *p;
1458
1459 p = reserve_space(xdr, 4);
1460 *p = cpu_to_be32(NFS4_OPEN_CLAIM_FH);
1461}
1462
1463static inline void encode_claim_delegate_cur_fh(struct xdr_stream *xdr, const nfs4_stateid *stateid)
1464{
1465 __be32 *p;
1466
1467 p = reserve_space(xdr, 4);
1468 *p = cpu_to_be32(NFS4_OPEN_CLAIM_DELEG_CUR_FH);
1469 encode_nfs4_stateid(xdr, stateid);
1470}
1471
1462static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr) 1472static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr)
1463{ 1473{
1464 encode_op_hdr(xdr, OP_OPEN, decode_open_maxsz, hdr); 1474 encode_op_hdr(xdr, OP_OPEN, decode_open_maxsz, hdr);
@@ -1474,6 +1484,12 @@ static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg,
1474 case NFS4_OPEN_CLAIM_DELEGATE_CUR: 1484 case NFS4_OPEN_CLAIM_DELEGATE_CUR:
1475 encode_claim_delegate_cur(xdr, arg->name, &arg->u.delegation); 1485 encode_claim_delegate_cur(xdr, arg->name, &arg->u.delegation);
1476 break; 1486 break;
1487 case NFS4_OPEN_CLAIM_FH:
1488 encode_claim_fh(xdr);
1489 break;
1490 case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
1491 encode_claim_delegate_cur_fh(xdr, &arg->u.delegation);
1492 break;
1477 default: 1493 default:
1478 BUG(); 1494 BUG();
1479 } 1495 }
@@ -1506,35 +1522,12 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1506 encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr); 1522 encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr);
1507} 1523}
1508 1524
1509static void encode_open_stateid(struct xdr_stream *xdr,
1510 const struct nfs_open_context *ctx,
1511 const struct nfs_lock_context *l_ctx,
1512 fmode_t fmode,
1513 int zero_seqid)
1514{
1515 nfs4_stateid stateid;
1516
1517 if (ctx->state != NULL) {
1518 const struct nfs_lockowner *lockowner = NULL;
1519
1520 if (l_ctx != NULL)
1521 lockowner = &l_ctx->lockowner;
1522 nfs4_select_rw_stateid(&stateid, ctx->state,
1523 fmode, lockowner);
1524 if (zero_seqid)
1525 stateid.seqid = 0;
1526 encode_nfs4_stateid(xdr, &stateid);
1527 } else
1528 encode_nfs4_stateid(xdr, &zero_stateid);
1529}
1530
1531static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) 1525static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
1532{ 1526{
1533 __be32 *p; 1527 __be32 *p;
1534 1528
1535 encode_op_hdr(xdr, OP_READ, decode_read_maxsz, hdr); 1529 encode_op_hdr(xdr, OP_READ, decode_read_maxsz, hdr);
1536 encode_open_stateid(xdr, args->context, args->lock_context, 1530 encode_nfs4_stateid(xdr, &args->stateid);
1537 FMODE_READ, hdr->minorversion);
1538 1531
1539 p = reserve_space(xdr, 12); 1532 p = reserve_space(xdr, 12);
1540 p = xdr_encode_hyper(p, args->offset); 1533 p = xdr_encode_hyper(p, args->offset);
@@ -1670,8 +1663,7 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
1670 __be32 *p; 1663 __be32 *p;
1671 1664
1672 encode_op_hdr(xdr, OP_WRITE, decode_write_maxsz, hdr); 1665 encode_op_hdr(xdr, OP_WRITE, decode_write_maxsz, hdr);
1673 encode_open_stateid(xdr, args->context, args->lock_context, 1666 encode_nfs4_stateid(xdr, &args->stateid);
1674 FMODE_WRITE, hdr->minorversion);
1675 1667
1676 p = reserve_space(xdr, 16); 1668 p = reserve_space(xdr, 16);
1677 p = xdr_encode_hyper(p, args->offset); 1669 p = xdr_encode_hyper(p, args->offset);
@@ -3497,8 +3489,11 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
3497 if (n == 0) 3489 if (n == 0)
3498 goto root_path; 3490 goto root_path;
3499 dprintk("pathname4: "); 3491 dprintk("pathname4: ");
3500 path->ncomponents = 0; 3492 if (n > NFS4_PATHNAME_MAXCOMPONENTS) {
3501 while (path->ncomponents < n) { 3493 dprintk("cannot parse %d components in path\n", n);
3494 goto out_eio;
3495 }
3496 for (path->ncomponents = 0; path->ncomponents < n; path->ncomponents++) {
3502 struct nfs4_string *component = &path->components[path->ncomponents]; 3497 struct nfs4_string *component = &path->components[path->ncomponents];
3503 status = decode_opaque_inline(xdr, &component->len, &component->data); 3498 status = decode_opaque_inline(xdr, &component->len, &component->data);
3504 if (unlikely(status != 0)) 3499 if (unlikely(status != 0))
@@ -3507,12 +3502,6 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
3507 pr_cont("%s%.*s ", 3502 pr_cont("%s%.*s ",
3508 (path->ncomponents != n ? "/ " : ""), 3503 (path->ncomponents != n ? "/ " : ""),
3509 component->len, component->data); 3504 component->len, component->data);
3510 if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS)
3511 path->ncomponents++;
3512 else {
3513 dprintk("cannot parse %d components in path\n", n);
3514 goto out_eio;
3515 }
3516 } 3505 }
3517out: 3506out:
3518 return status; 3507 return status;
@@ -3557,27 +3546,23 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
3557 n = be32_to_cpup(p); 3546 n = be32_to_cpup(p);
3558 if (n <= 0) 3547 if (n <= 0)
3559 goto out_eio; 3548 goto out_eio;
3560 res->nlocations = 0; 3549 for (res->nlocations = 0; res->nlocations < n; res->nlocations++) {
3561 while (res->nlocations < n) {
3562 u32 m; 3550 u32 m;
3563 struct nfs4_fs_location *loc = &res->locations[res->nlocations]; 3551 struct nfs4_fs_location *loc;
3564 3552
3553 if (res->nlocations == NFS4_FS_LOCATIONS_MAXENTRIES)
3554 break;
3555 loc = &res->locations[res->nlocations];
3565 p = xdr_inline_decode(xdr, 4); 3556 p = xdr_inline_decode(xdr, 4);
3566 if (unlikely(!p)) 3557 if (unlikely(!p))
3567 goto out_overflow; 3558 goto out_overflow;
3568 m = be32_to_cpup(p); 3559 m = be32_to_cpup(p);
3569 3560
3570 loc->nservers = 0;
3571 dprintk("%s: servers:\n", __func__); 3561 dprintk("%s: servers:\n", __func__);
3572 while (loc->nservers < m) { 3562 for (loc->nservers = 0; loc->nservers < m; loc->nservers++) {
3573 struct nfs4_string *server = &loc->servers[loc->nservers]; 3563 struct nfs4_string *server;
3574 status = decode_opaque_inline(xdr, &server->len, &server->data); 3564
3575 if (unlikely(status != 0)) 3565 if (loc->nservers == NFS4_FS_LOCATION_MAXSERVERS) {
3576 goto out_eio;
3577 dprintk("%s ", server->data);
3578 if (loc->nservers < NFS4_FS_LOCATION_MAXSERVERS)
3579 loc->nservers++;
3580 else {
3581 unsigned int i; 3566 unsigned int i;
3582 dprintk("%s: using first %u of %u servers " 3567 dprintk("%s: using first %u of %u servers "
3583 "returned for location %u\n", 3568 "returned for location %u\n",
@@ -3591,13 +3576,17 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
3591 if (unlikely(status != 0)) 3576 if (unlikely(status != 0))
3592 goto out_eio; 3577 goto out_eio;
3593 } 3578 }
3579 break;
3594 } 3580 }
3581 server = &loc->servers[loc->nservers];
3582 status = decode_opaque_inline(xdr, &server->len, &server->data);
3583 if (unlikely(status != 0))
3584 goto out_eio;
3585 dprintk("%s ", server->data);
3595 } 3586 }
3596 status = decode_pathname(xdr, &loc->rootpath); 3587 status = decode_pathname(xdr, &loc->rootpath);
3597 if (unlikely(status != 0)) 3588 if (unlikely(status != 0))
3598 goto out_eio; 3589 goto out_eio;
3599 if (res->nlocations < NFS4_FS_LOCATIONS_MAXENTRIES)
3600 res->nlocations++;
3601 } 3590 }
3602 if (res->nlocations != 0) 3591 if (res->nlocations != 0)
3603 status = NFS_ATTR_FATTR_V4_LOCATIONS; 3592 status = NFS_ATTR_FATTR_V4_LOCATIONS;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e56e846e9d2d..29cfb7ade121 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -84,6 +84,55 @@ nfs_page_free(struct nfs_page *p)
84 kmem_cache_free(nfs_page_cachep, p); 84 kmem_cache_free(nfs_page_cachep, p);
85} 85}
86 86
87static void
88nfs_iocounter_inc(struct nfs_io_counter *c)
89{
90 atomic_inc(&c->io_count);
91}
92
93static void
94nfs_iocounter_dec(struct nfs_io_counter *c)
95{
96 if (atomic_dec_and_test(&c->io_count)) {
97 clear_bit(NFS_IO_INPROGRESS, &c->flags);
98 smp_mb__after_clear_bit();
99 wake_up_bit(&c->flags, NFS_IO_INPROGRESS);
100 }
101}
102
103static int
104__nfs_iocounter_wait(struct nfs_io_counter *c)
105{
106 wait_queue_head_t *wq = bit_waitqueue(&c->flags, NFS_IO_INPROGRESS);
107 DEFINE_WAIT_BIT(q, &c->flags, NFS_IO_INPROGRESS);
108 int ret = 0;
109
110 do {
111 prepare_to_wait(wq, &q.wait, TASK_KILLABLE);
112 set_bit(NFS_IO_INPROGRESS, &c->flags);
113 if (atomic_read(&c->io_count) == 0)
114 break;
115 ret = nfs_wait_bit_killable(&c->flags);
116 } while (atomic_read(&c->io_count) != 0);
117 finish_wait(wq, &q.wait);
118 return ret;
119}
120
121/**
122 * nfs_iocounter_wait - wait for i/o to complete
123 * @c: nfs_io_counter to use
124 *
125 * returns -ERESTARTSYS if interrupted by a fatal signal.
126 * Otherwise returns 0 once the io_count hits 0.
127 */
128int
129nfs_iocounter_wait(struct nfs_io_counter *c)
130{
131 if (atomic_read(&c->io_count) == 0)
132 return 0;
133 return __nfs_iocounter_wait(c);
134}
135
87/** 136/**
88 * nfs_create_request - Create an NFS read/write request. 137 * nfs_create_request - Create an NFS read/write request.
89 * @ctx: open context to use 138 * @ctx: open context to use
@@ -104,6 +153,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
104 struct nfs_page *req; 153 struct nfs_page *req;
105 struct nfs_lock_context *l_ctx; 154 struct nfs_lock_context *l_ctx;
106 155
156 if (test_bit(NFS_CONTEXT_BAD, &ctx->flags))
157 return ERR_PTR(-EBADF);
107 /* try to allocate the request struct */ 158 /* try to allocate the request struct */
108 req = nfs_page_alloc(); 159 req = nfs_page_alloc();
109 if (req == NULL) 160 if (req == NULL)
@@ -116,6 +167,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
116 return ERR_CAST(l_ctx); 167 return ERR_CAST(l_ctx);
117 } 168 }
118 req->wb_lock_context = l_ctx; 169 req->wb_lock_context = l_ctx;
170 nfs_iocounter_inc(&l_ctx->io_count);
119 171
120 /* Initialize the request struct. Initially, we assume a 172 /* Initialize the request struct. Initially, we assume a
121 * long write-back delay. This will be adjusted in 173 * long write-back delay. This will be adjusted in
@@ -175,6 +227,7 @@ static void nfs_clear_request(struct nfs_page *req)
175 req->wb_page = NULL; 227 req->wb_page = NULL;
176 } 228 }
177 if (l_ctx != NULL) { 229 if (l_ctx != NULL) {
230 nfs_iocounter_dec(&l_ctx->io_count);
178 nfs_put_lock_context(l_ctx); 231 nfs_put_lock_context(l_ctx);
179 req->wb_lock_context = NULL; 232 req->wb_lock_context = NULL;
180 } 233 }
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 48ac5aad6258..c5bd758e5637 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -417,6 +417,16 @@ should_free_lseg(struct pnfs_layout_range *lseg_range,
417 lo_seg_intersecting(lseg_range, recall_range); 417 lo_seg_intersecting(lseg_range, recall_range);
418} 418}
419 419
420static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
421 struct list_head *tmp_list)
422{
423 if (!atomic_dec_and_test(&lseg->pls_refcount))
424 return false;
425 pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
426 list_add(&lseg->pls_list, tmp_list);
427 return true;
428}
429
420/* Returns 1 if lseg is removed from list, 0 otherwise */ 430/* Returns 1 if lseg is removed from list, 0 otherwise */
421static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, 431static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
422 struct list_head *tmp_list) 432 struct list_head *tmp_list)
@@ -430,11 +440,8 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
430 */ 440 */
431 dprintk("%s: lseg %p ref %d\n", __func__, lseg, 441 dprintk("%s: lseg %p ref %d\n", __func__, lseg,
432 atomic_read(&lseg->pls_refcount)); 442 atomic_read(&lseg->pls_refcount));
433 if (atomic_dec_and_test(&lseg->pls_refcount)) { 443 if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list))
434 pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
435 list_add(&lseg->pls_list, tmp_list);
436 rv = 1; 444 rv = 1;
437 }
438 } 445 }
439 return rv; 446 return rv;
440} 447}
@@ -711,6 +718,8 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
711 spin_lock(&lo->plh_inode->i_lock); 718 spin_lock(&lo->plh_inode->i_lock);
712 if (pnfs_layoutgets_blocked(lo, 1)) { 719 if (pnfs_layoutgets_blocked(lo, 1)) {
713 status = -EAGAIN; 720 status = -EAGAIN;
721 } else if (!nfs4_valid_open_stateid(open_state)) {
722 status = -EBADF;
714 } else if (list_empty(&lo->plh_segs)) { 723 } else if (list_empty(&lo->plh_segs)) {
715 int seq; 724 int seq;
716 725
@@ -777,6 +786,21 @@ send_layoutget(struct pnfs_layout_hdr *lo,
777 return lseg; 786 return lseg;
778} 787}
779 788
789static void pnfs_clear_layoutcommit(struct inode *inode,
790 struct list_head *head)
791{
792 struct nfs_inode *nfsi = NFS_I(inode);
793 struct pnfs_layout_segment *lseg, *tmp;
794
795 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
796 return;
797 list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) {
798 if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
799 continue;
800 pnfs_lseg_dec_and_remove_zero(lseg, head);
801 }
802}
803
780/* 804/*
781 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr 805 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
782 * when the layout segment list is empty. 806 * when the layout segment list is empty.
@@ -808,6 +832,7 @@ _pnfs_return_layout(struct inode *ino)
808 /* Reference matched in nfs4_layoutreturn_release */ 832 /* Reference matched in nfs4_layoutreturn_release */
809 pnfs_get_layout_hdr(lo); 833 pnfs_get_layout_hdr(lo);
810 empty = list_empty(&lo->plh_segs); 834 empty = list_empty(&lo->plh_segs);
835 pnfs_clear_layoutcommit(ino, &tmp_list);
811 pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); 836 pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
812 /* Don't send a LAYOUTRETURN if list was initially empty */ 837 /* Don't send a LAYOUTRETURN if list was initially empty */
813 if (empty) { 838 if (empty) {
@@ -820,8 +845,6 @@ _pnfs_return_layout(struct inode *ino)
820 spin_unlock(&ino->i_lock); 845 spin_unlock(&ino->i_lock);
821 pnfs_free_lseg_list(&tmp_list); 846 pnfs_free_lseg_list(&tmp_list);
822 847
823 WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
824
825 lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); 848 lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
826 if (unlikely(lrp == NULL)) { 849 if (unlikely(lrp == NULL)) {
827 status = -ENOMEM; 850 status = -ENOMEM;
@@ -845,6 +868,33 @@ out:
845} 868}
846EXPORT_SYMBOL_GPL(_pnfs_return_layout); 869EXPORT_SYMBOL_GPL(_pnfs_return_layout);
847 870
871int
872pnfs_commit_and_return_layout(struct inode *inode)
873{
874 struct pnfs_layout_hdr *lo;
875 int ret;
876
877 spin_lock(&inode->i_lock);
878 lo = NFS_I(inode)->layout;
879 if (lo == NULL) {
880 spin_unlock(&inode->i_lock);
881 return 0;
882 }
883 pnfs_get_layout_hdr(lo);
884 /* Block new layoutgets and read/write to ds */
885 lo->plh_block_lgets++;
886 spin_unlock(&inode->i_lock);
887 filemap_fdatawait(inode->i_mapping);
888 ret = pnfs_layoutcommit_inode(inode, true);
889 if (ret == 0)
890 ret = _pnfs_return_layout(inode);
891 spin_lock(&inode->i_lock);
892 lo->plh_block_lgets--;
893 spin_unlock(&inode->i_lock);
894 pnfs_put_layout_hdr(lo);
895 return ret;
896}
897
848bool pnfs_roc(struct inode *ino) 898bool pnfs_roc(struct inode *ino)
849{ 899{
850 struct pnfs_layout_hdr *lo; 900 struct pnfs_layout_hdr *lo;
@@ -1458,7 +1508,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
1458 dprintk("pnfs write error = %d\n", hdr->pnfs_error); 1508 dprintk("pnfs write error = %d\n", hdr->pnfs_error);
1459 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 1509 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1460 PNFS_LAYOUTRET_ON_ERROR) { 1510 PNFS_LAYOUTRET_ON_ERROR) {
1461 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
1462 pnfs_return_layout(hdr->inode); 1511 pnfs_return_layout(hdr->inode);
1463 } 1512 }
1464 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 1513 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -1613,7 +1662,6 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1613 dprintk("pnfs read error = %d\n", hdr->pnfs_error); 1662 dprintk("pnfs read error = %d\n", hdr->pnfs_error);
1614 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 1663 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1615 PNFS_LAYOUTRET_ON_ERROR) { 1664 PNFS_LAYOUTRET_ON_ERROR) {
1616 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
1617 pnfs_return_layout(hdr->inode); 1665 pnfs_return_layout(hdr->inode);
1618 } 1666 }
1619 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 1667 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -1746,11 +1794,27 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
1746 1794
1747 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { 1795 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
1748 if (lseg->pls_range.iomode == IOMODE_RW && 1796 if (lseg->pls_range.iomode == IOMODE_RW &&
1749 test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) 1797 test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
1750 list_add(&lseg->pls_lc_list, listp); 1798 list_add(&lseg->pls_lc_list, listp);
1751 } 1799 }
1752} 1800}
1753 1801
1802static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
1803{
1804 struct pnfs_layout_segment *lseg, *tmp;
1805 unsigned long *bitlock = &NFS_I(inode)->flags;
1806
1807 /* Matched by references in pnfs_set_layoutcommit */
1808 list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
1809 list_del_init(&lseg->pls_lc_list);
1810 pnfs_put_lseg(lseg);
1811 }
1812
1813 clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
1814 smp_mb__after_clear_bit();
1815 wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
1816}
1817
1754void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) 1818void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
1755{ 1819{
1756 pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); 1820 pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
@@ -1795,6 +1859,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
1795 1859
1796 if (nfss->pnfs_curr_ld->cleanup_layoutcommit) 1860 if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
1797 nfss->pnfs_curr_ld->cleanup_layoutcommit(data); 1861 nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
1862 pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
1798} 1863}
1799 1864
1800/* 1865/*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 94ba80417748..f5f8a470a647 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -219,6 +219,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
219void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); 219void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
220int pnfs_layoutcommit_inode(struct inode *inode, bool sync); 220int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
221int _pnfs_return_layout(struct inode *); 221int _pnfs_return_layout(struct inode *);
222int pnfs_commit_and_return_layout(struct inode *);
222void pnfs_ld_write_done(struct nfs_write_data *); 223void pnfs_ld_write_done(struct nfs_write_data *);
223void pnfs_ld_read_done(struct nfs_read_data *); 224void pnfs_ld_read_done(struct nfs_read_data *);
224struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, 225struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
@@ -407,6 +408,11 @@ static inline int pnfs_return_layout(struct inode *ino)
407 return 0; 408 return 0;
408} 409}
409 410
411static inline int pnfs_commit_and_return_layout(struct inode *inode)
412{
413 return 0;
414}
415
410static inline bool 416static inline bool
411pnfs_ld_layoutret_on_setattr(struct inode *inode) 417pnfs_ld_layoutret_on_setattr(struct inode *inode)
412{ 418{
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index a5e5d9899d56..70a26c651f09 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -514,6 +514,8 @@ void nfs_read_prepare(struct rpc_task *task, void *calldata)
514{ 514{
515 struct nfs_read_data *data = calldata; 515 struct nfs_read_data *data = calldata;
516 NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); 516 NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
517 if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
518 rpc_exit(task, -EIO);
517} 519}
518 520
519static const struct rpc_call_ops nfs_read_common_ops = { 521static const struct rpc_call_ops nfs_read_common_ops = {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c483cc50b82e..a2c7c28049d5 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1251,6 +1251,8 @@ void nfs_write_prepare(struct rpc_task *task, void *calldata)
1251{ 1251{
1252 struct nfs_write_data *data = calldata; 1252 struct nfs_write_data *data = calldata;
1253 NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data); 1253 NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
1254 if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
1255 rpc_exit(task, -EIO);
1254} 1256}
1255 1257
1256void nfs_commit_prepare(struct rpc_task *task, void *calldata) 1258void nfs_commit_prepare(struct rpc_task *task, void *calldata)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1cc25682b20b..fc01d5cb4cf1 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -59,11 +59,18 @@ struct nfs_lockowner {
59 pid_t l_pid; 59 pid_t l_pid;
60}; 60};
61 61
62#define NFS_IO_INPROGRESS 0
63struct nfs_io_counter {
64 unsigned long flags;
65 atomic_t io_count;
66};
67
62struct nfs_lock_context { 68struct nfs_lock_context {
63 atomic_t count; 69 atomic_t count;
64 struct list_head list; 70 struct list_head list;
65 struct nfs_open_context *open_context; 71 struct nfs_open_context *open_context;
66 struct nfs_lockowner lockowner; 72 struct nfs_lockowner lockowner;
73 struct nfs_io_counter io_count;
67}; 74};
68 75
69struct nfs4_state; 76struct nfs4_state;
@@ -77,6 +84,7 @@ struct nfs_open_context {
77 unsigned long flags; 84 unsigned long flags;
78#define NFS_CONTEXT_ERROR_WRITE (0) 85#define NFS_CONTEXT_ERROR_WRITE (0)
79#define NFS_CONTEXT_RESEND_WRITES (1) 86#define NFS_CONTEXT_RESEND_WRITES (1)
87#define NFS_CONTEXT_BAD (2)
80 int error; 88 int error;
81 89
82 struct list_head list; 90 struct list_head list;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 6c6ed153a9b4..3b7fa2abecca 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -40,6 +40,7 @@ struct nfs_client {
40#define NFS_CS_NORESVPORT 0 /* - use ephemeral src port */ 40#define NFS_CS_NORESVPORT 0 /* - use ephemeral src port */
41#define NFS_CS_DISCRTRY 1 /* - disconnect on RPC retry */ 41#define NFS_CS_DISCRTRY 1 /* - disconnect on RPC retry */
42#define NFS_CS_MIGRATION 2 /* - transparent state migr */ 42#define NFS_CS_MIGRATION 2 /* - transparent state migr */
43#define NFS_CS_INFINITE_SLOTS 3 /* - don't limit TCP slots */
43 struct sockaddr_storage cl_addr; /* server identifier */ 44 struct sockaddr_storage cl_addr; /* server identifier */
44 size_t cl_addrlen; 45 size_t cl_addrlen;
45 char * cl_hostname; /* hostname of server */ 46 char * cl_hostname; /* hostname of server */
@@ -197,5 +198,7 @@ struct nfs_server {
197#define NFS_CAP_MTIME (1U << 13) 198#define NFS_CAP_MTIME (1U << 13)
198#define NFS_CAP_POSIX_LOCK (1U << 14) 199#define NFS_CAP_POSIX_LOCK (1U << 14)
199#define NFS_CAP_UIDGID_NOMAP (1U << 15) 200#define NFS_CAP_UIDGID_NOMAP (1U << 15)
201#define NFS_CAP_STATEID_NFSV41 (1U << 16)
202#define NFS_CAP_ATOMIC_OPEN_V1 (1U << 17)
200 203
201#endif 204#endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 4b993d358dad..bdc100f66dfb 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -349,6 +349,7 @@ struct nfs_openargs {
349 const u32 * bitmask; 349 const u32 * bitmask;
350 const u32 * open_bitmap; 350 const u32 * open_bitmap;
351 __u32 claim; 351 __u32 claim;
352 enum createmode4 createmode;
352}; 353};
353 354
354struct nfs_openres { 355struct nfs_openres {
@@ -486,6 +487,7 @@ struct nfs_readargs {
486 struct nfs_fh * fh; 487 struct nfs_fh * fh;
487 struct nfs_open_context *context; 488 struct nfs_open_context *context;
488 struct nfs_lock_context *lock_context; 489 struct nfs_lock_context *lock_context;
490 nfs4_stateid stateid;
489 __u64 offset; 491 __u64 offset;
490 __u32 count; 492 __u32 count;
491 unsigned int pgbase; 493 unsigned int pgbase;
@@ -507,6 +509,7 @@ struct nfs_writeargs {
507 struct nfs_fh * fh; 509 struct nfs_fh * fh;
508 struct nfs_open_context *context; 510 struct nfs_open_context *context;
509 struct nfs_lock_context *lock_context; 511 struct nfs_lock_context *lock_context;
512 nfs4_stateid stateid;
510 __u64 offset; 513 __u64 offset;
511 __u32 count; 514 __u32 count;
512 enum nfs3_stable_how stable; 515 enum nfs3_stable_how stable;
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 2cf4ffaa3cd4..e7d492ce7c18 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -124,6 +124,7 @@ struct rpc_create_args {
124#define RPC_CLNT_CREATE_NOPING (1UL << 4) 124#define RPC_CLNT_CREATE_NOPING (1UL << 4)
125#define RPC_CLNT_CREATE_DISCRTRY (1UL << 5) 125#define RPC_CLNT_CREATE_DISCRTRY (1UL << 5)
126#define RPC_CLNT_CREATE_QUIET (1UL << 6) 126#define RPC_CLNT_CREATE_QUIET (1UL << 6)
127#define RPC_CLNT_CREATE_INFINITE_SLOTS (1UL << 7)
127 128
128struct rpc_clnt *rpc_create(struct rpc_create_args *args); 129struct rpc_clnt *rpc_create(struct rpc_create_args *args);
129struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, 130struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 30834be03011..ff5392421cb2 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -255,6 +255,8 @@ static inline int bc_prealloc(struct rpc_rqst *req)
255} 255}
256#endif /* CONFIG_SUNRPC_BACKCHANNEL */ 256#endif /* CONFIG_SUNRPC_BACKCHANNEL */
257 257
258#define XPRT_CREATE_INFINITE_SLOTS (1U)
259
258struct xprt_create { 260struct xprt_create {
259 int ident; /* XPRT_TRANSPORT identifier */ 261 int ident; /* XPRT_TRANSPORT identifier */
260 struct net * net; 262 struct net * net;
@@ -263,6 +265,7 @@ struct xprt_create {
263 size_t addrlen; 265 size_t addrlen;
264 const char *servername; 266 const char *servername;
265 struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ 267 struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
268 unsigned int flags;
266}; 269};
267 270
268struct xprt_class { 271struct xprt_class {
@@ -279,6 +282,7 @@ struct xprt_class {
279struct rpc_xprt *xprt_create_transport(struct xprt_create *args); 282struct rpc_xprt *xprt_create_transport(struct xprt_create *args);
280void xprt_connect(struct rpc_task *task); 283void xprt_connect(struct rpc_task *task);
281void xprt_reserve(struct rpc_task *task); 284void xprt_reserve(struct rpc_task *task);
285void xprt_retry_reserve(struct rpc_task *task);
282int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); 286int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
283int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); 287int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
284void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); 288void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
@@ -334,6 +338,7 @@ int xs_swapper(struct rpc_xprt *xprt, int enable);
334#define XPRT_CLOSING (6) 338#define XPRT_CLOSING (6)
335#define XPRT_CONNECTION_ABORT (7) 339#define XPRT_CONNECTION_ABORT (7)
336#define XPRT_CONNECTION_CLOSE (8) 340#define XPRT_CONNECTION_CLOSE (8)
341#define XPRT_CONGESTED (9)
337 342
338static inline void xprt_set_connected(struct rpc_xprt *xprt) 343static inline void xprt_set_connected(struct rpc_xprt *xprt)
339{ 344{
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 516fe2caac2c..262caf03bd5f 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -24,7 +24,6 @@ config SUNRPC_XPRT_RDMA
24config SUNRPC_SWAP 24config SUNRPC_SWAP
25 bool 25 bool
26 depends on SUNRPC 26 depends on SUNRPC
27 select NETVM
28 27
29config RPCSEC_GSS_KRB5 28config RPCSEC_GSS_KRB5
30 tristate "Secure RPC: Kerberos V mechanism" 29 tristate "Secure RPC: Kerberos V mechanism"
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index dcc446e7fbf6..651245aa829a 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -414,6 +414,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
414 }; 414 };
415 char servername[48]; 415 char servername[48];
416 416
417 if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS)
418 xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS;
417 /* 419 /*
418 * If the caller chooses not to specify a hostname, whip 420 * If the caller chooses not to specify a hostname, whip
419 * up a string representation of the passed-in address. 421 * up a string representation of the passed-in address.
@@ -1306,6 +1308,8 @@ call_reserve(struct rpc_task *task)
1306 xprt_reserve(task); 1308 xprt_reserve(task);
1307} 1309}
1308 1310
1311static void call_retry_reserve(struct rpc_task *task);
1312
1309/* 1313/*
1310 * 1b. Grok the result of xprt_reserve() 1314 * 1b. Grok the result of xprt_reserve()
1311 */ 1315 */
@@ -1347,7 +1351,7 @@ call_reserveresult(struct rpc_task *task)
1347 case -ENOMEM: 1351 case -ENOMEM:
1348 rpc_delay(task, HZ >> 2); 1352 rpc_delay(task, HZ >> 2);
1349 case -EAGAIN: /* woken up; retry */ 1353 case -EAGAIN: /* woken up; retry */
1350 task->tk_action = call_reserve; 1354 task->tk_action = call_retry_reserve;
1351 return; 1355 return;
1352 case -EIO: /* probably a shutdown */ 1356 case -EIO: /* probably a shutdown */
1353 break; 1357 break;
@@ -1360,6 +1364,19 @@ call_reserveresult(struct rpc_task *task)
1360} 1364}
1361 1365
1362/* 1366/*
1367 * 1c. Retry reserving an RPC call slot
1368 */
1369static void
1370call_retry_reserve(struct rpc_task *task)
1371{
1372 dprint_status(task);
1373
1374 task->tk_status = 0;
1375 task->tk_action = call_reserveresult;
1376 xprt_retry_reserve(task);
1377}
1378
1379/*
1363 * 2. Bind and/or refresh the credentials 1380 * 2. Bind and/or refresh the credentials
1364 */ 1381 */
1365static void 1382static void
@@ -1644,22 +1661,26 @@ call_connect_status(struct rpc_task *task)
1644 1661
1645 dprint_status(task); 1662 dprint_status(task);
1646 1663
1647 task->tk_status = 0;
1648 if (status >= 0 || status == -EAGAIN) {
1649 clnt->cl_stats->netreconn++;
1650 task->tk_action = call_transmit;
1651 return;
1652 }
1653
1654 trace_rpc_connect_status(task, status); 1664 trace_rpc_connect_status(task, status);
1655 switch (status) { 1665 switch (status) {
1656 /* if soft mounted, test if we've timed out */ 1666 /* if soft mounted, test if we've timed out */
1657 case -ETIMEDOUT: 1667 case -ETIMEDOUT:
1658 task->tk_action = call_timeout; 1668 task->tk_action = call_timeout;
1659 break; 1669 return;
1660 default: 1670 case -ECONNREFUSED:
1661 rpc_exit(task, -EIO); 1671 case -ECONNRESET:
1672 case -ENETUNREACH:
1673 if (RPC_IS_SOFTCONN(task))
1674 break;
1675 /* retry with existing socket, after a delay */
1676 case 0:
1677 case -EAGAIN:
1678 task->tk_status = 0;
1679 clnt->cl_stats->netreconn++;
1680 task->tk_action = call_transmit;
1681 return;
1662 } 1682 }
1683 rpc_exit(task, status);
1663} 1684}
1664 1685
1665/* 1686/*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index fb20f25ddec9..f8529fc8e542 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -180,6 +180,8 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
180 list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); 180 list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
181 task->tk_waitqueue = queue; 181 task->tk_waitqueue = queue;
182 queue->qlen++; 182 queue->qlen++;
183 /* barrier matches the read in rpc_wake_up_task_queue_locked() */
184 smp_wmb();
183 rpc_set_queued(task); 185 rpc_set_queued(task);
184 186
185 dprintk("RPC: %5u added to queue %p \"%s\"\n", 187 dprintk("RPC: %5u added to queue %p \"%s\"\n",
@@ -430,8 +432,11 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task
430 */ 432 */
431static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task) 433static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task)
432{ 434{
433 if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue) 435 if (RPC_IS_QUEUED(task)) {
434 __rpc_do_wake_up_task(queue, task); 436 smp_rmb();
437 if (task->tk_waitqueue == queue)
438 __rpc_do_wake_up_task(queue, task);
439 }
435} 440}
436 441
437/* 442/*
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index b7478d5e7ffd..745fca3cfd36 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -948,6 +948,34 @@ void xprt_transmit(struct rpc_task *task)
948 spin_unlock_bh(&xprt->transport_lock); 948 spin_unlock_bh(&xprt->transport_lock);
949} 949}
950 950
951static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
952{
953 set_bit(XPRT_CONGESTED, &xprt->state);
954 rpc_sleep_on(&xprt->backlog, task, NULL);
955}
956
957static void xprt_wake_up_backlog(struct rpc_xprt *xprt)
958{
959 if (rpc_wake_up_next(&xprt->backlog) == NULL)
960 clear_bit(XPRT_CONGESTED, &xprt->state);
961}
962
963static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task)
964{
965 bool ret = false;
966
967 if (!test_bit(XPRT_CONGESTED, &xprt->state))
968 goto out;
969 spin_lock(&xprt->reserve_lock);
970 if (test_bit(XPRT_CONGESTED, &xprt->state)) {
971 rpc_sleep_on(&xprt->backlog, task, NULL);
972 ret = true;
973 }
974 spin_unlock(&xprt->reserve_lock);
975out:
976 return ret;
977}
978
951static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags) 979static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags)
952{ 980{
953 struct rpc_rqst *req = ERR_PTR(-EAGAIN); 981 struct rpc_rqst *req = ERR_PTR(-EAGAIN);
@@ -992,7 +1020,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
992 task->tk_status = -ENOMEM; 1020 task->tk_status = -ENOMEM;
993 break; 1021 break;
994 case -EAGAIN: 1022 case -EAGAIN:
995 rpc_sleep_on(&xprt->backlog, task, NULL); 1023 xprt_add_backlog(xprt, task);
996 dprintk("RPC: waiting for request slot\n"); 1024 dprintk("RPC: waiting for request slot\n");
997 default: 1025 default:
998 task->tk_status = -EAGAIN; 1026 task->tk_status = -EAGAIN;
@@ -1028,7 +1056,7 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
1028 memset(req, 0, sizeof(*req)); /* mark unused */ 1056 memset(req, 0, sizeof(*req)); /* mark unused */
1029 list_add(&req->rq_list, &xprt->free); 1057 list_add(&req->rq_list, &xprt->free);
1030 } 1058 }
1031 rpc_wake_up_next(&xprt->backlog); 1059 xprt_wake_up_backlog(xprt);
1032 spin_unlock(&xprt->reserve_lock); 1060 spin_unlock(&xprt->reserve_lock);
1033} 1061}
1034 1062
@@ -1092,7 +1120,8 @@ EXPORT_SYMBOL_GPL(xprt_free);
1092 * xprt_reserve - allocate an RPC request slot 1120 * xprt_reserve - allocate an RPC request slot
1093 * @task: RPC task requesting a slot allocation 1121 * @task: RPC task requesting a slot allocation
1094 * 1122 *
1095 * If no more slots are available, place the task on the transport's 1123 * If the transport is marked as being congested, or if no more
1124 * slots are available, place the task on the transport's
1096 * backlog queue. 1125 * backlog queue.
1097 */ 1126 */
1098void xprt_reserve(struct rpc_task *task) 1127void xprt_reserve(struct rpc_task *task)
@@ -1107,6 +1136,32 @@ void xprt_reserve(struct rpc_task *task)
1107 task->tk_status = -EAGAIN; 1136 task->tk_status = -EAGAIN;
1108 rcu_read_lock(); 1137 rcu_read_lock();
1109 xprt = rcu_dereference(task->tk_client->cl_xprt); 1138 xprt = rcu_dereference(task->tk_client->cl_xprt);
1139 if (!xprt_throttle_congested(xprt, task))
1140 xprt->ops->alloc_slot(xprt, task);
1141 rcu_read_unlock();
1142}
1143
1144/**
1145 * xprt_retry_reserve - allocate an RPC request slot
1146 * @task: RPC task requesting a slot allocation
1147 *
1148 * If no more slots are available, place the task on the transport's
1149 * backlog queue.
1150 * Note that the only difference with xprt_reserve is that we now
1151 * ignore the value of the XPRT_CONGESTED flag.
1152 */
1153void xprt_retry_reserve(struct rpc_task *task)
1154{
1155 struct rpc_xprt *xprt;
1156
1157 task->tk_status = 0;
1158 if (task->tk_rqstp != NULL)
1159 return;
1160
1161 task->tk_timeout = 0;
1162 task->tk_status = -EAGAIN;
1163 rcu_read_lock();
1164 xprt = rcu_dereference(task->tk_client->cl_xprt);
1110 xprt->ops->alloc_slot(xprt, task); 1165 xprt->ops->alloc_slot(xprt, task);
1111 rcu_read_unlock(); 1166 rcu_read_unlock();
1112} 1167}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 3d02130828da..9c2825827dec 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2207,10 +2207,6 @@ static void xs_tcp_setup_socket(struct work_struct *work)
2207 */ 2207 */
2208 xs_tcp_force_close(xprt); 2208 xs_tcp_force_close(xprt);
2209 break; 2209 break;
2210 case -ECONNREFUSED:
2211 case -ECONNRESET:
2212 case -ENETUNREACH:
2213 /* retry with existing socket, after a delay */
2214 case 0: 2210 case 0:
2215 case -EINPROGRESS: 2211 case -EINPROGRESS:
2216 case -EALREADY: 2212 case -EALREADY:
@@ -2221,6 +2217,10 @@ static void xs_tcp_setup_socket(struct work_struct *work)
2221 /* Happens, for instance, if the user specified a link 2217 /* Happens, for instance, if the user specified a link
2222 * local IPv6 address without a scope-id. 2218 * local IPv6 address without a scope-id.
2223 */ 2219 */
2220 case -ECONNREFUSED:
2221 case -ECONNRESET:
2222 case -ENETUNREACH:
2223 /* retry with existing socket, after a delay */
2224 goto out; 2224 goto out;
2225 } 2225 }
2226out_eagain: 2226out_eagain:
@@ -2767,9 +2767,13 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2767 struct rpc_xprt *xprt; 2767 struct rpc_xprt *xprt;
2768 struct sock_xprt *transport; 2768 struct sock_xprt *transport;
2769 struct rpc_xprt *ret; 2769 struct rpc_xprt *ret;
2770 unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries;
2771
2772 if (args->flags & XPRT_CREATE_INFINITE_SLOTS)
2773 max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT;
2770 2774
2771 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, 2775 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
2772 xprt_max_tcp_slot_table_entries); 2776 max_slot_table_size);
2773 if (IS_ERR(xprt)) 2777 if (IS_ERR(xprt))
2774 return xprt; 2778 return xprt;
2775 transport = container_of(xprt, struct sock_xprt, xprt); 2779 transport = container_of(xprt, struct sock_xprt, xprt);