aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-18 14:22:04 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-18 14:22:04 -0500
commit4dd3c2e5a4225e3df85afc6033e62ce8b09f0ed2 (patch)
tree3d1dac5206550994b161eaab8ac73828f410228a
parent07c455ee222f3ad219c2835d05a175a326a138fb (diff)
parent22700f3c6df55387cec2ee27c533a7b23c76dc51 (diff)
Merge tag 'nfsd-4.15' of git://linux-nfs.org/~bfields/linux
Pull nfsd updates from Bruce Fields: "Lots of good bugfixes, including: - fix a number of races in the NFSv4+ state code - fix some shutdown crashes in multiple-network-namespace cases - relax our 4.1 session limits; if you've an artificially low limit to the number of 4.1 clients that can mount simultaneously, try upgrading" * tag 'nfsd-4.15' of git://linux-nfs.org/~bfields/linux: (22 commits) SUNRPC: Improve ordering of transport processing nfsd: deal with revoked delegations appropriately svcrdma: Enqueue after setting XPT_CLOSE in completion handlers nfsd: use nfs->ns.inum as net ID rpc: remove some BUG()s svcrdma: Preserve CB send buffer across retransmits nfds: avoid gettimeofday for nfssvc_boot time fs, nfsd: convert nfs4_file.fi_ref from atomic_t to refcount_t fs, nfsd: convert nfs4_cntl_odstate.co_odcount from atomic_t to refcount_t fs, nfsd: convert nfs4_stid.sc_count from atomic_t to refcount_t lockd: double unregister of inetaddr notifiers nfsd4: catch some false session retries nfsd4: fix cached replies to solo SEQUENCE compounds sunrcp: make function _svc_create_xprt static SUNRPC: Fix tracepoint storage issues with svc_recv and svc_rqst_status nfsd: use ARRAY_SIZE nfsd: give out fewer session slots as limit approaches nfsd: increase DRC cache limit nfsd: remove unnecessary nofilehandle checks nfs_common: convert int to bool ...
-rw-r--r--fs/lockd/svc.c20
-rw-r--r--fs/nfs_common/grace.c24
-rw-r--r--fs/nfsd/fault_inject.c5
-rw-r--r--fs/nfsd/netns.h2
-rw-r--r--fs/nfsd/nfs3xdr.c10
-rw-r--r--fs/nfsd/nfs4layouts.c4
-rw-r--r--fs/nfsd/nfs4proc.c19
-rw-r--r--fs/nfsd/nfs4state.c127
-rw-r--r--fs/nfsd/nfssvc.c4
-rw-r--r--fs/nfsd/state.h11
-rw-r--r--fs/nfsd/xdr4.h13
-rw-r--r--include/linux/fs.h4
-rw-r--r--include/linux/sunrpc/svc.h1
-rw-r--r--include/trace/events/sunrpc.h17
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c14
-rw-r--r--net/sunrpc/svc_xprt.c106
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c6
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c11
18 files changed, 225 insertions, 173 deletions
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index b837fb7e290a..a8e3777c94dc 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -369,6 +369,7 @@ static int lockd_start_svc(struct svc_serv *serv)
369 printk(KERN_WARNING 369 printk(KERN_WARNING
370 "lockd_up: svc_rqst allocation failed, error=%d\n", 370 "lockd_up: svc_rqst allocation failed, error=%d\n",
371 error); 371 error);
372 lockd_unregister_notifiers();
372 goto out_rqst; 373 goto out_rqst;
373 } 374 }
374 375
@@ -459,13 +460,16 @@ int lockd_up(struct net *net)
459 } 460 }
460 461
461 error = lockd_up_net(serv, net); 462 error = lockd_up_net(serv, net);
462 if (error < 0) 463 if (error < 0) {
463 goto err_net; 464 lockd_unregister_notifiers();
465 goto err_put;
466 }
464 467
465 error = lockd_start_svc(serv); 468 error = lockd_start_svc(serv);
466 if (error < 0) 469 if (error < 0) {
467 goto err_start; 470 lockd_down_net(serv, net);
468 471 goto err_put;
472 }
469 nlmsvc_users++; 473 nlmsvc_users++;
470 /* 474 /*
471 * Note: svc_serv structures have an initial use count of 1, 475 * Note: svc_serv structures have an initial use count of 1,
@@ -476,12 +480,6 @@ err_put:
476err_create: 480err_create:
477 mutex_unlock(&nlmsvc_mutex); 481 mutex_unlock(&nlmsvc_mutex);
478 return error; 482 return error;
479
480err_start:
481 lockd_down_net(serv, net);
482err_net:
483 lockd_unregister_notifiers();
484 goto err_put;
485} 483}
486EXPORT_SYMBOL_GPL(lockd_up); 484EXPORT_SYMBOL_GPL(lockd_up);
487 485
diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c
index 420d3a0ab258..897b299db55e 100644
--- a/fs/nfs_common/grace.c
+++ b/fs/nfs_common/grace.c
@@ -55,14 +55,7 @@ locks_end_grace(struct lock_manager *lm)
55} 55}
56EXPORT_SYMBOL_GPL(locks_end_grace); 56EXPORT_SYMBOL_GPL(locks_end_grace);
57 57
58/** 58static bool
59 * locks_in_grace
60 *
61 * Lock managers call this function to determine when it is OK for them
62 * to answer ordinary lock requests, and when they should accept only
63 * lock reclaims.
64 */
65int
66__state_in_grace(struct net *net, bool open) 59__state_in_grace(struct net *net, bool open)
67{ 60{
68 struct list_head *grace_list = net_generic(net, grace_net_id); 61 struct list_head *grace_list = net_generic(net, grace_net_id);
@@ -78,15 +71,22 @@ __state_in_grace(struct net *net, bool open)
78 return false; 71 return false;
79} 72}
80 73
81int locks_in_grace(struct net *net) 74/**
75 * locks_in_grace
76 *
77 * Lock managers call this function to determine when it is OK for them
78 * to answer ordinary lock requests, and when they should accept only
79 * lock reclaims.
80 */
81bool locks_in_grace(struct net *net)
82{ 82{
83 return __state_in_grace(net, 0); 83 return __state_in_grace(net, false);
84} 84}
85EXPORT_SYMBOL_GPL(locks_in_grace); 85EXPORT_SYMBOL_GPL(locks_in_grace);
86 86
87int opens_in_grace(struct net *net) 87bool opens_in_grace(struct net *net)
88{ 88{
89 return __state_in_grace(net, 1); 89 return __state_in_grace(net, true);
90} 90}
91EXPORT_SYMBOL_GPL(opens_in_grace); 91EXPORT_SYMBOL_GPL(opens_in_grace);
92 92
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index 6dfede6d172a..84831253203d 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -12,6 +12,7 @@
12#include <linux/nsproxy.h> 12#include <linux/nsproxy.h>
13#include <linux/sunrpc/addr.h> 13#include <linux/sunrpc/addr.h>
14#include <linux/uaccess.h> 14#include <linux/uaccess.h>
15#include <linux/kernel.h>
15 16
16#include "state.h" 17#include "state.h"
17#include "netns.h" 18#include "netns.h"
@@ -126,8 +127,6 @@ static struct nfsd_fault_inject_op inject_ops[] = {
126 }, 127 },
127}; 128};
128 129
129#define NUM_INJECT_OPS (sizeof(inject_ops)/sizeof(struct nfsd_fault_inject_op))
130
131int nfsd_fault_inject_init(void) 130int nfsd_fault_inject_init(void)
132{ 131{
133 unsigned int i; 132 unsigned int i;
@@ -138,7 +137,7 @@ int nfsd_fault_inject_init(void)
138 if (!debug_dir) 137 if (!debug_dir)
139 goto fail; 138 goto fail;
140 139
141 for (i = 0; i < NUM_INJECT_OPS; i++) { 140 for (i = 0; i < ARRAY_SIZE(inject_ops); i++) {
142 op = &inject_ops[i]; 141 op = &inject_ops[i];
143 if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd)) 142 if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd))
144 goto fail; 143 goto fail;
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 3714231a9d0f..1c91391f4805 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -107,7 +107,7 @@ struct nfsd_net {
107 bool lockd_up; 107 bool lockd_up;
108 108
109 /* Time of server startup */ 109 /* Time of server startup */
110 struct timeval nfssvc_boot; 110 struct timespec64 nfssvc_boot;
111 111
112 /* 112 /*
113 * Max number of connections this nfsd container will allow. Defaults 113 * Max number of connections this nfsd container will allow. Defaults
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index f38acd905441..2758480555fa 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -748,8 +748,9 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
748 if (resp->status == 0) { 748 if (resp->status == 0) {
749 *p++ = htonl(resp->count); 749 *p++ = htonl(resp->count);
750 *p++ = htonl(resp->committed); 750 *p++ = htonl(resp->committed);
751 *p++ = htonl(nn->nfssvc_boot.tv_sec); 751 /* unique identifier, y2038 overflow can be ignored */
752 *p++ = htonl(nn->nfssvc_boot.tv_usec); 752 *p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
753 *p++ = htonl(nn->nfssvc_boot.tv_nsec);
753 } 754 }
754 return xdr_ressize_check(rqstp, p); 755 return xdr_ressize_check(rqstp, p);
755} 756}
@@ -1119,8 +1120,9 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
1119 p = encode_wcc_data(rqstp, p, &resp->fh); 1120 p = encode_wcc_data(rqstp, p, &resp->fh);
1120 /* Write verifier */ 1121 /* Write verifier */
1121 if (resp->status == 0) { 1122 if (resp->status == 0) {
1122 *p++ = htonl(nn->nfssvc_boot.tv_sec); 1123 /* unique identifier, y2038 overflow can be ignored */
1123 *p++ = htonl(nn->nfssvc_boot.tv_usec); 1124 *p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
1125 *p++ = htonl(nn->nfssvc_boot.tv_nsec);
1124 } 1126 }
1125 return xdr_ressize_check(rqstp, p); 1127 return xdr_ressize_check(rqstp, p);
1126} 1128}
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index ea45d954e8d7..7d888369f85a 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -336,7 +336,7 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls)
336 336
337 trace_layout_recall(&ls->ls_stid.sc_stateid); 337 trace_layout_recall(&ls->ls_stid.sc_stateid);
338 338
339 atomic_inc(&ls->ls_stid.sc_count); 339 refcount_inc(&ls->ls_stid.sc_count);
340 nfsd4_run_cb(&ls->ls_recall); 340 nfsd4_run_cb(&ls->ls_recall);
341 341
342out_unlock: 342out_unlock:
@@ -441,7 +441,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls)
441 goto done; 441 goto done;
442 } 442 }
443 443
444 atomic_inc(&ls->ls_stid.sc_count); 444 refcount_inc(&ls->ls_stid.sc_count);
445 list_add_tail(&new->lo_perstate, &ls->ls_layouts); 445 list_add_tail(&new->lo_perstate, &ls->ls_layouts);
446 new = NULL; 446 new = NULL;
447done: 447done:
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 8487486ec496..008ea0b627d0 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -485,9 +485,6 @@ static __be32
485nfsd4_getfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 485nfsd4_getfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
486 union nfsd4_op_u *u) 486 union nfsd4_op_u *u)
487{ 487{
488 if (!cstate->current_fh.fh_dentry)
489 return nfserr_nofilehandle;
490
491 u->getfh = &cstate->current_fh; 488 u->getfh = &cstate->current_fh;
492 return nfs_ok; 489 return nfs_ok;
493} 490}
@@ -535,9 +532,6 @@ static __be32
535nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 532nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
536 union nfsd4_op_u *u) 533 union nfsd4_op_u *u)
537{ 534{
538 if (!cstate->current_fh.fh_dentry)
539 return nfserr_nofilehandle;
540
541 fh_dup2(&cstate->save_fh, &cstate->current_fh); 535 fh_dup2(&cstate->save_fh, &cstate->current_fh);
542 if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) { 536 if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) {
543 memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t)); 537 memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t));
@@ -570,10 +564,11 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
570 564
571 /* 565 /*
572 * This is opaque to client, so no need to byte-swap. Use 566 * This is opaque to client, so no need to byte-swap. Use
573 * __force to keep sparse happy 567 * __force to keep sparse happy. y2038 time_t overflow is
568 * irrelevant in this usage.
574 */ 569 */
575 verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec; 570 verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
576 verf[1] = (__force __be32)nn->nfssvc_boot.tv_usec; 571 verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
577 memcpy(verifier->data, verf, sizeof(verifier->data)); 572 memcpy(verifier->data, verf, sizeof(verifier->data));
578} 573}
579 574
@@ -703,10 +698,8 @@ nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
703 union nfsd4_op_u *u) 698 union nfsd4_op_u *u)
704{ 699{
705 struct nfsd4_link *link = &u->link; 700 struct nfsd4_link *link = &u->link;
706 __be32 status = nfserr_nofilehandle; 701 __be32 status;
707 702
708 if (!cstate->save_fh.fh_dentry)
709 return status;
710 status = nfsd_link(rqstp, &cstate->current_fh, 703 status = nfsd_link(rqstp, &cstate->current_fh,
711 link->li_name, link->li_namelen, &cstate->save_fh); 704 link->li_name, link->li_namelen, &cstate->save_fh);
712 if (!status) 705 if (!status)
@@ -850,10 +843,8 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
850 union nfsd4_op_u *u) 843 union nfsd4_op_u *u)
851{ 844{
852 struct nfsd4_rename *rename = &u->rename; 845 struct nfsd4_rename *rename = &u->rename;
853 __be32 status = nfserr_nofilehandle; 846 __be32 status;
854 847
855 if (!cstate->save_fh.fh_dentry)
856 return status;
857 if (opens_in_grace(SVC_NET(rqstp)) && 848 if (opens_in_grace(SVC_NET(rqstp)) &&
858 !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK)) 849 !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK))
859 return nfserr_grace; 850 return nfserr_grace;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0c04f81aa63b..b82817767b9d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -359,7 +359,7 @@ put_nfs4_file(struct nfs4_file *fi)
359{ 359{
360 might_lock(&state_lock); 360 might_lock(&state_lock);
361 361
362 if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) { 362 if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) {
363 hlist_del_rcu(&fi->fi_hash); 363 hlist_del_rcu(&fi->fi_hash);
364 spin_unlock(&state_lock); 364 spin_unlock(&state_lock);
365 WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); 365 WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
@@ -568,7 +568,7 @@ alloc_clnt_odstate(struct nfs4_client *clp)
568 co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL); 568 co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL);
569 if (co) { 569 if (co) {
570 co->co_client = clp; 570 co->co_client = clp;
571 atomic_set(&co->co_odcount, 1); 571 refcount_set(&co->co_odcount, 1);
572 } 572 }
573 return co; 573 return co;
574} 574}
@@ -586,7 +586,7 @@ static inline void
586get_clnt_odstate(struct nfs4_clnt_odstate *co) 586get_clnt_odstate(struct nfs4_clnt_odstate *co)
587{ 587{
588 if (co) 588 if (co)
589 atomic_inc(&co->co_odcount); 589 refcount_inc(&co->co_odcount);
590} 590}
591 591
592static void 592static void
@@ -598,7 +598,7 @@ put_clnt_odstate(struct nfs4_clnt_odstate *co)
598 return; 598 return;
599 599
600 fp = co->co_file; 600 fp = co->co_file;
601 if (atomic_dec_and_lock(&co->co_odcount, &fp->fi_lock)) { 601 if (refcount_dec_and_lock(&co->co_odcount, &fp->fi_lock)) {
602 list_del(&co->co_perfile); 602 list_del(&co->co_perfile);
603 spin_unlock(&fp->fi_lock); 603 spin_unlock(&fp->fi_lock);
604 604
@@ -656,7 +656,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
656 stid->sc_stateid.si_opaque.so_id = new_id; 656 stid->sc_stateid.si_opaque.so_id = new_id;
657 stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; 657 stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid;
658 /* Will be incremented before return to client: */ 658 /* Will be incremented before return to client: */
659 atomic_set(&stid->sc_count, 1); 659 refcount_set(&stid->sc_count, 1);
660 spin_lock_init(&stid->sc_lock); 660 spin_lock_init(&stid->sc_lock);
661 661
662 /* 662 /*
@@ -813,7 +813,7 @@ nfs4_put_stid(struct nfs4_stid *s)
813 813
814 might_lock(&clp->cl_lock); 814 might_lock(&clp->cl_lock);
815 815
816 if (!atomic_dec_and_lock(&s->sc_count, &clp->cl_lock)) { 816 if (!refcount_dec_and_lock(&s->sc_count, &clp->cl_lock)) {
817 wake_up_all(&close_wq); 817 wake_up_all(&close_wq);
818 return; 818 return;
819 } 819 }
@@ -913,7 +913,7 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
913 if (status) 913 if (status)
914 return status; 914 return status;
915 ++fp->fi_delegees; 915 ++fp->fi_delegees;
916 atomic_inc(&dp->dl_stid.sc_count); 916 refcount_inc(&dp->dl_stid.sc_count);
917 dp->dl_stid.sc_type = NFS4_DELEG_STID; 917 dp->dl_stid.sc_type = NFS4_DELEG_STID;
918 list_add(&dp->dl_perfile, &fp->fi_delegations); 918 list_add(&dp->dl_perfile, &fp->fi_delegations);
919 list_add(&dp->dl_perclnt, &clp->cl_delegations); 919 list_add(&dp->dl_perclnt, &clp->cl_delegations);
@@ -1214,7 +1214,7 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
1214 1214
1215 WARN_ON_ONCE(!list_empty(&stp->st_locks)); 1215 WARN_ON_ONCE(!list_empty(&stp->st_locks));
1216 1216
1217 if (!atomic_dec_and_test(&s->sc_count)) { 1217 if (!refcount_dec_and_test(&s->sc_count)) {
1218 wake_up_all(&close_wq); 1218 wake_up_all(&close_wq);
1219 return; 1219 return;
1220 } 1220 }
@@ -1439,8 +1439,10 @@ free_session_slots(struct nfsd4_session *ses)
1439{ 1439{
1440 int i; 1440 int i;
1441 1441
1442 for (i = 0; i < ses->se_fchannel.maxreqs; i++) 1442 for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
1443 free_svc_cred(&ses->se_slots[i]->sl_cred);
1443 kfree(ses->se_slots[i]); 1444 kfree(ses->se_slots[i]);
1445 }
1444} 1446}
1445 1447
1446/* 1448/*
@@ -1472,6 +1474,11 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca)
1472 spin_lock(&nfsd_drc_lock); 1474 spin_lock(&nfsd_drc_lock);
1473 avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, 1475 avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION,
1474 nfsd_drc_max_mem - nfsd_drc_mem_used); 1476 nfsd_drc_max_mem - nfsd_drc_mem_used);
1477 /*
1478 * Never use more than a third of the remaining memory,
1479 * unless it's the only way to give this client a slot:
1480 */
1481 avail = clamp_t(int, avail, slotsize, avail/3);
1475 num = min_t(int, num, avail / slotsize); 1482 num = min_t(int, num, avail / slotsize);
1476 nfsd_drc_mem_used += num * slotsize; 1483 nfsd_drc_mem_used += num * slotsize;
1477 spin_unlock(&nfsd_drc_lock); 1484 spin_unlock(&nfsd_drc_lock);
@@ -2072,7 +2079,7 @@ find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
2072 s = find_stateid_locked(cl, t); 2079 s = find_stateid_locked(cl, t);
2073 if (s != NULL) { 2080 if (s != NULL) {
2074 if (typemask & s->sc_type) 2081 if (typemask & s->sc_type)
2075 atomic_inc(&s->sc_count); 2082 refcount_inc(&s->sc_count);
2076 else 2083 else
2077 s = NULL; 2084 s = NULL;
2078 } 2085 }
@@ -2287,14 +2294,18 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
2287 2294
2288 dprintk("--> %s slot %p\n", __func__, slot); 2295 dprintk("--> %s slot %p\n", __func__, slot);
2289 2296
2297 slot->sl_flags |= NFSD4_SLOT_INITIALIZED;
2290 slot->sl_opcnt = resp->opcnt; 2298 slot->sl_opcnt = resp->opcnt;
2291 slot->sl_status = resp->cstate.status; 2299 slot->sl_status = resp->cstate.status;
2300 free_svc_cred(&slot->sl_cred);
2301 copy_cred(&slot->sl_cred, &resp->rqstp->rq_cred);
2292 2302
2293 slot->sl_flags |= NFSD4_SLOT_INITIALIZED; 2303 if (!nfsd4_cache_this(resp)) {
2294 if (nfsd4_not_cached(resp)) { 2304 slot->sl_flags &= ~NFSD4_SLOT_CACHED;
2295 slot->sl_datalen = 0;
2296 return; 2305 return;
2297 } 2306 }
2307 slot->sl_flags |= NFSD4_SLOT_CACHED;
2308
2298 base = resp->cstate.data_offset; 2309 base = resp->cstate.data_offset;
2299 slot->sl_datalen = buf->len - base; 2310 slot->sl_datalen = buf->len - base;
2300 if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen)) 2311 if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen))
@@ -2321,8 +2332,16 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
2321 op = &args->ops[resp->opcnt - 1]; 2332 op = &args->ops[resp->opcnt - 1];
2322 nfsd4_encode_operation(resp, op); 2333 nfsd4_encode_operation(resp, op);
2323 2334
2324 /* Return nfserr_retry_uncached_rep in next operation. */ 2335 if (slot->sl_flags & NFSD4_SLOT_CACHED)
2325 if (args->opcnt > 1 && !(slot->sl_flags & NFSD4_SLOT_CACHETHIS)) { 2336 return op->status;
2337 if (args->opcnt == 1) {
2338 /*
2339 * The original operation wasn't a solo sequence--we
2340 * always cache those--so this retry must not match the
2341 * original:
2342 */
2343 op->status = nfserr_seq_false_retry;
2344 } else {
2326 op = &args->ops[resp->opcnt++]; 2345 op = &args->ops[resp->opcnt++];
2327 op->status = nfserr_retry_uncached_rep; 2346 op->status = nfserr_retry_uncached_rep;
2328 nfsd4_encode_operation(resp, op); 2347 nfsd4_encode_operation(resp, op);
@@ -2986,6 +3005,34 @@ static bool nfsd4_request_too_big(struct svc_rqst *rqstp,
2986 return xb->len > session->se_fchannel.maxreq_sz; 3005 return xb->len > session->se_fchannel.maxreq_sz;
2987} 3006}
2988 3007
3008static bool replay_matches_cache(struct svc_rqst *rqstp,
3009 struct nfsd4_sequence *seq, struct nfsd4_slot *slot)
3010{
3011 struct nfsd4_compoundargs *argp = rqstp->rq_argp;
3012
3013 if ((bool)(slot->sl_flags & NFSD4_SLOT_CACHETHIS) !=
3014 (bool)seq->cachethis)
3015 return false;
3016 /*
3017 * If there's an error than the reply can have fewer ops than
3018 * the call. But if we cached a reply with *more* ops than the
3019 * call you're sending us now, then this new call is clearly not
3020 * really a replay of the old one:
3021 */
3022 if (slot->sl_opcnt < argp->opcnt)
3023 return false;
3024 /* This is the only check explicitly called by spec: */
3025 if (!same_creds(&rqstp->rq_cred, &slot->sl_cred))
3026 return false;
3027 /*
3028 * There may be more comparisons we could actually do, but the
3029 * spec doesn't require us to catch every case where the calls
3030 * don't match (that would require caching the call as well as
3031 * the reply), so we don't bother.
3032 */
3033 return true;
3034}
3035
2989__be32 3036__be32
2990nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 3037nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2991 union nfsd4_op_u *u) 3038 union nfsd4_op_u *u)
@@ -3045,6 +3092,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3045 status = nfserr_seq_misordered; 3092 status = nfserr_seq_misordered;
3046 if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED)) 3093 if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
3047 goto out_put_session; 3094 goto out_put_session;
3095 status = nfserr_seq_false_retry;
3096 if (!replay_matches_cache(rqstp, seq, slot))
3097 goto out_put_session;
3048 cstate->slot = slot; 3098 cstate->slot = slot;
3049 cstate->session = session; 3099 cstate->session = session;
3050 cstate->clp = clp; 3100 cstate->clp = clp;
@@ -3351,7 +3401,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
3351{ 3401{
3352 lockdep_assert_held(&state_lock); 3402 lockdep_assert_held(&state_lock);
3353 3403
3354 atomic_set(&fp->fi_ref, 1); 3404 refcount_set(&fp->fi_ref, 1);
3355 spin_lock_init(&fp->fi_lock); 3405 spin_lock_init(&fp->fi_lock);
3356 INIT_LIST_HEAD(&fp->fi_stateids); 3406 INIT_LIST_HEAD(&fp->fi_stateids);
3357 INIT_LIST_HEAD(&fp->fi_delegations); 3407 INIT_LIST_HEAD(&fp->fi_delegations);
@@ -3514,7 +3564,7 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
3514 continue; 3564 continue;
3515 if (local->st_stateowner == &oo->oo_owner) { 3565 if (local->st_stateowner == &oo->oo_owner) {
3516 ret = local; 3566 ret = local;
3517 atomic_inc(&ret->st_stid.sc_count); 3567 refcount_inc(&ret->st_stid.sc_count);
3518 break; 3568 break;
3519 } 3569 }
3520 } 3570 }
@@ -3573,7 +3623,7 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
3573 goto out_unlock; 3623 goto out_unlock;
3574 3624
3575 open->op_stp = NULL; 3625 open->op_stp = NULL;
3576 atomic_inc(&stp->st_stid.sc_count); 3626 refcount_inc(&stp->st_stid.sc_count);
3577 stp->st_stid.sc_type = NFS4_OPEN_STID; 3627 stp->st_stid.sc_type = NFS4_OPEN_STID;
3578 INIT_LIST_HEAD(&stp->st_locks); 3628 INIT_LIST_HEAD(&stp->st_locks);
3579 stp->st_stateowner = nfs4_get_stateowner(&oo->oo_owner); 3629 stp->st_stateowner = nfs4_get_stateowner(&oo->oo_owner);
@@ -3621,7 +3671,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
3621 * there should be no danger of the refcount going back up again at 3671 * there should be no danger of the refcount going back up again at
3622 * this point. 3672 * this point.
3623 */ 3673 */
3624 wait_event(close_wq, atomic_read(&s->st_stid.sc_count) == 2); 3674 wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
3625 3675
3626 release_all_access(s); 3676 release_all_access(s);
3627 if (s->st_stid.sc_file) { 3677 if (s->st_stid.sc_file) {
@@ -3647,7 +3697,7 @@ find_file_locked(struct knfsd_fh *fh, unsigned int hashval)
3647 3697
3648 hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) { 3698 hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) {
3649 if (fh_match(&fp->fi_fhandle, fh)) { 3699 if (fh_match(&fp->fi_fhandle, fh)) {
3650 if (atomic_inc_not_zero(&fp->fi_ref)) 3700 if (refcount_inc_not_zero(&fp->fi_ref))
3651 return fp; 3701 return fp;
3652 } 3702 }
3653 } 3703 }
@@ -3783,7 +3833,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
3783 * lock) we know the server hasn't removed the lease yet, we know 3833 * lock) we know the server hasn't removed the lease yet, we know
3784 * it's safe to take a reference. 3834 * it's safe to take a reference.
3785 */ 3835 */
3786 atomic_inc(&dp->dl_stid.sc_count); 3836 refcount_inc(&dp->dl_stid.sc_count);
3787 nfsd4_run_cb(&dp->dl_recall); 3837 nfsd4_run_cb(&dp->dl_recall);
3788} 3838}
3789 3839
@@ -3966,7 +4016,8 @@ static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, statei
3966{ 4016{
3967 struct nfs4_stid *ret; 4017 struct nfs4_stid *ret;
3968 4018
3969 ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID); 4019 ret = find_stateid_by_type(cl, s,
4020 NFS4_DELEG_STID|NFS4_REVOKED_DELEG_STID);
3970 if (!ret) 4021 if (!ret)
3971 return NULL; 4022 return NULL;
3972 return delegstateid(ret); 4023 return delegstateid(ret);
@@ -3989,6 +4040,12 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
3989 deleg = find_deleg_stateid(cl, &open->op_delegate_stateid); 4040 deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
3990 if (deleg == NULL) 4041 if (deleg == NULL)
3991 goto out; 4042 goto out;
4043 if (deleg->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) {
4044 nfs4_put_stid(&deleg->dl_stid);
4045 if (cl->cl_minorversion)
4046 status = nfserr_deleg_revoked;
4047 goto out;
4048 }
3992 flags = share_access_to_flags(open->op_share_access); 4049 flags = share_access_to_flags(open->op_share_access);
3993 status = nfs4_check_delegmode(deleg, flags); 4050 status = nfs4_check_delegmode(deleg, flags);
3994 if (status) { 4051 if (status) {
@@ -4858,6 +4915,16 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
4858 struct nfs4_stid **s, struct nfsd_net *nn) 4915 struct nfs4_stid **s, struct nfsd_net *nn)
4859{ 4916{
4860 __be32 status; 4917 __be32 status;
4918 bool return_revoked = false;
4919
4920 /*
4921 * only return revoked delegations if explicitly asked.
4922 * otherwise we report revoked or bad_stateid status.
4923 */
4924 if (typemask & NFS4_REVOKED_DELEG_STID)
4925 return_revoked = true;
4926 else if (typemask & NFS4_DELEG_STID)
4927 typemask |= NFS4_REVOKED_DELEG_STID;
4861 4928
4862 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) 4929 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
4863 return nfserr_bad_stateid; 4930 return nfserr_bad_stateid;
@@ -4872,6 +4939,12 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
4872 *s = find_stateid_by_type(cstate->clp, stateid, typemask); 4939 *s = find_stateid_by_type(cstate->clp, stateid, typemask);
4873 if (!*s) 4940 if (!*s)
4874 return nfserr_bad_stateid; 4941 return nfserr_bad_stateid;
4942 if (((*s)->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) {
4943 nfs4_put_stid(*s);
4944 if (cstate->minorversion)
4945 return nfserr_deleg_revoked;
4946 return nfserr_bad_stateid;
4947 }
4875 return nfs_ok; 4948 return nfs_ok;
4876} 4949}
4877 4950
@@ -5071,7 +5144,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5071 ret = nfserr_locks_held; 5144 ret = nfserr_locks_held;
5072 break; 5145 break;
5073 case NFS4_LOCK_STID: 5146 case NFS4_LOCK_STID:
5074 atomic_inc(&s->sc_count); 5147 refcount_inc(&s->sc_count);
5075 spin_unlock(&cl->cl_lock); 5148 spin_unlock(&cl->cl_lock);
5076 ret = nfsd4_free_lock_stateid(stateid, s); 5149 ret = nfsd4_free_lock_stateid(stateid, s);
5077 goto out; 5150 goto out;
@@ -5578,7 +5651,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
5578 5651
5579 lockdep_assert_held(&clp->cl_lock); 5652 lockdep_assert_held(&clp->cl_lock);
5580 5653
5581 atomic_inc(&stp->st_stid.sc_count); 5654 refcount_inc(&stp->st_stid.sc_count);
5582 stp->st_stid.sc_type = NFS4_LOCK_STID; 5655 stp->st_stid.sc_type = NFS4_LOCK_STID;
5583 stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner); 5656 stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner);
5584 get_nfs4_file(fp); 5657 get_nfs4_file(fp);
@@ -5604,7 +5677,7 @@ find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp)
5604 5677
5605 list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { 5678 list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) {
5606 if (lst->st_stid.sc_file == fp) { 5679 if (lst->st_stid.sc_file == fp) {
5607 atomic_inc(&lst->st_stid.sc_count); 5680 refcount_inc(&lst->st_stid.sc_count);
5608 return lst; 5681 return lst;
5609 } 5682 }
5610 } 5683 }
@@ -7006,8 +7079,8 @@ nfs4_state_start_net(struct net *net)
7006 nn->nfsd4_manager.block_opens = true; 7079 nn->nfsd4_manager.block_opens = true;
7007 locks_start_grace(net, &nn->nfsd4_manager); 7080 locks_start_grace(net, &nn->nfsd4_manager);
7008 nfsd4_client_tracking_init(net); 7081 nfsd4_client_tracking_init(net);
7009 printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", 7082 printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n",
7010 nn->nfsd4_grace, net); 7083 nn->nfsd4_grace, net->ns.inum);
7011 queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); 7084 queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
7012 return 0; 7085 return 0;
7013} 7086}
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index e02bd2783124..33117d4ffce0 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -447,7 +447,7 @@ void nfsd_reset_versions(void)
447 */ 447 */
448static void set_max_drc(void) 448static void set_max_drc(void)
449{ 449{
450 #define NFSD_DRC_SIZE_SHIFT 10 450 #define NFSD_DRC_SIZE_SHIFT 7
451 nfsd_drc_max_mem = (nr_free_buffer_pages() 451 nfsd_drc_max_mem = (nr_free_buffer_pages()
452 >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; 452 >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
453 nfsd_drc_mem_used = 0; 453 nfsd_drc_mem_used = 0;
@@ -517,7 +517,7 @@ int nfsd_create_serv(struct net *net)
517 register_inet6addr_notifier(&nfsd_inet6addr_notifier); 517 register_inet6addr_notifier(&nfsd_inet6addr_notifier);
518#endif 518#endif
519 } 519 }
520 do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ 520 ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */
521 return 0; 521 return 0;
522} 522}
523 523
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 005c911b34ac..f3772ea8ba0d 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -36,6 +36,7 @@
36#define _NFSD4_STATE_H 36#define _NFSD4_STATE_H
37 37
38#include <linux/idr.h> 38#include <linux/idr.h>
39#include <linux/refcount.h>
39#include <linux/sunrpc/svc_xprt.h> 40#include <linux/sunrpc/svc_xprt.h>
40#include "nfsfh.h" 41#include "nfsfh.h"
41 42
@@ -83,7 +84,7 @@ struct nfsd4_callback_ops {
83 * fields that are of general use to any stateid. 84 * fields that are of general use to any stateid.
84 */ 85 */
85struct nfs4_stid { 86struct nfs4_stid {
86 atomic_t sc_count; 87 refcount_t sc_count;
87#define NFS4_OPEN_STID 1 88#define NFS4_OPEN_STID 1
88#define NFS4_LOCK_STID 2 89#define NFS4_LOCK_STID 2
89#define NFS4_DELEG_STID 4 90#define NFS4_DELEG_STID 4
@@ -169,11 +170,13 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
169struct nfsd4_slot { 170struct nfsd4_slot {
170 u32 sl_seqid; 171 u32 sl_seqid;
171 __be32 sl_status; 172 __be32 sl_status;
173 struct svc_cred sl_cred;
172 u32 sl_datalen; 174 u32 sl_datalen;
173 u16 sl_opcnt; 175 u16 sl_opcnt;
174#define NFSD4_SLOT_INUSE (1 << 0) 176#define NFSD4_SLOT_INUSE (1 << 0)
175#define NFSD4_SLOT_CACHETHIS (1 << 1) 177#define NFSD4_SLOT_CACHETHIS (1 << 1)
176#define NFSD4_SLOT_INITIALIZED (1 << 2) 178#define NFSD4_SLOT_INITIALIZED (1 << 2)
179#define NFSD4_SLOT_CACHED (1 << 3)
177 u8 sl_flags; 180 u8 sl_flags;
178 char sl_data[]; 181 char sl_data[];
179}; 182};
@@ -465,7 +468,7 @@ struct nfs4_clnt_odstate {
465 struct nfs4_client *co_client; 468 struct nfs4_client *co_client;
466 struct nfs4_file *co_file; 469 struct nfs4_file *co_file;
467 struct list_head co_perfile; 470 struct list_head co_perfile;
468 atomic_t co_odcount; 471 refcount_t co_odcount;
469}; 472};
470 473
471/* 474/*
@@ -481,7 +484,7 @@ struct nfs4_clnt_odstate {
481 * the global state_lock spinlock. 484 * the global state_lock spinlock.
482 */ 485 */
483struct nfs4_file { 486struct nfs4_file {
484 atomic_t fi_ref; 487 refcount_t fi_ref;
485 spinlock_t fi_lock; 488 spinlock_t fi_lock;
486 struct hlist_node fi_hash; /* hash on fi_fhandle */ 489 struct hlist_node fi_hash; /* hash on fi_fhandle */
487 struct list_head fi_stateids; 490 struct list_head fi_stateids;
@@ -634,7 +637,7 @@ struct nfs4_file *find_file(struct knfsd_fh *fh);
634void put_nfs4_file(struct nfs4_file *fi); 637void put_nfs4_file(struct nfs4_file *fi);
635static inline void get_nfs4_file(struct nfs4_file *fi) 638static inline void get_nfs4_file(struct nfs4_file *fi)
636{ 639{
637 atomic_inc(&fi->fi_ref); 640 refcount_inc(&fi->fi_ref);
638} 641}
639struct file *find_any_file(struct nfs4_file *f); 642struct file *find_any_file(struct nfs4_file *f);
640 643
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 1e4edbf70052..bc29511b6405 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -649,9 +649,18 @@ static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
649 return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE; 649 return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
650} 650}
651 651
652static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) 652/*
653 * The session reply cache only needs to cache replies that the client
654 * actually asked us to. But it's almost free for us to cache compounds
655 * consisting of only a SEQUENCE op, so we may as well cache those too.
656 * Also, the protocol doesn't give us a convenient response in the case
657 * of a replay of a solo SEQUENCE op that wasn't cached
658 * (RETRY_UNCACHED_REP can only be returned in the second op of a
659 * compound).
660 */
661static inline bool nfsd4_cache_this(struct nfsd4_compoundres *resp)
653{ 662{
654 return !(resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS) 663 return (resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS)
655 || nfsd4_is_solo_sequence(resp); 664 || nfsd4_is_solo_sequence(resp);
656} 665}
657 666
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e9379e258d64..2995a271ec46 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -971,8 +971,8 @@ struct lock_manager {
971struct net; 971struct net;
972void locks_start_grace(struct net *, struct lock_manager *); 972void locks_start_grace(struct net *, struct lock_manager *);
973void locks_end_grace(struct lock_manager *); 973void locks_end_grace(struct lock_manager *);
974int locks_in_grace(struct net *); 974bool locks_in_grace(struct net *);
975int opens_in_grace(struct net *); 975bool opens_in_grace(struct net *);
976 976
977/* that will die - we need it for nfs_lock_info */ 977/* that will die - we need it for nfs_lock_info */
978#include <linux/nfs_fs_i.h> 978#include <linux/nfs_fs_i.h>
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 3b9f0d1dbb80..786ae2255f05 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -47,6 +47,7 @@ struct svc_pool {
47 struct svc_pool_stats sp_stats; /* statistics on pool operation */ 47 struct svc_pool_stats sp_stats; /* statistics on pool operation */
48#define SP_TASK_PENDING (0) /* still work to do even if no 48#define SP_TASK_PENDING (0) /* still work to do even if no
49 * xprt is queued. */ 49 * xprt is queued. */
50#define SP_CONGESTED (1)
50 unsigned long sp_flags; 51 unsigned long sp_flags;
51} ____cacheline_aligned_in_smp; 52} ____cacheline_aligned_in_smp;
52 53
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index ecbdbfe86eb6..8c153f68509e 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -486,20 +486,22 @@ TRACE_EVENT(svc_recv,
486 TP_ARGS(rqst, status), 486 TP_ARGS(rqst, status),
487 487
488 TP_STRUCT__entry( 488 TP_STRUCT__entry(
489 __field(struct sockaddr *, addr)
490 __field(u32, xid) 489 __field(u32, xid)
491 __field(int, status) 490 __field(int, status)
492 __field(unsigned long, flags) 491 __field(unsigned long, flags)
492 __dynamic_array(unsigned char, addr, rqst->rq_addrlen)
493 ), 493 ),
494 494
495 TP_fast_assign( 495 TP_fast_assign(
496 __entry->addr = (struct sockaddr *)&rqst->rq_addr;
497 __entry->xid = status > 0 ? be32_to_cpu(rqst->rq_xid) : 0; 496 __entry->xid = status > 0 ? be32_to_cpu(rqst->rq_xid) : 0;
498 __entry->status = status; 497 __entry->status = status;
499 __entry->flags = rqst->rq_flags; 498 __entry->flags = rqst->rq_flags;
499 memcpy(__get_dynamic_array(addr),
500 &rqst->rq_addr, rqst->rq_addrlen);
500 ), 501 ),
501 502
502 TP_printk("addr=%pIScp xid=0x%08x status=%d flags=%s", __entry->addr, 503 TP_printk("addr=%pIScp xid=0x%08x status=%d flags=%s",
504 (struct sockaddr *)__get_dynamic_array(addr),
503 __entry->xid, __entry->status, 505 __entry->xid, __entry->status,
504 show_rqstp_flags(__entry->flags)) 506 show_rqstp_flags(__entry->flags))
505); 507);
@@ -544,22 +546,23 @@ DECLARE_EVENT_CLASS(svc_rqst_status,
544 TP_ARGS(rqst, status), 546 TP_ARGS(rqst, status),
545 547
546 TP_STRUCT__entry( 548 TP_STRUCT__entry(
547 __field(struct sockaddr *, addr)
548 __field(u32, xid) 549 __field(u32, xid)
549 __field(int, dropme)
550 __field(int, status) 550 __field(int, status)
551 __field(unsigned long, flags) 551 __field(unsigned long, flags)
552 __dynamic_array(unsigned char, addr, rqst->rq_addrlen)
552 ), 553 ),
553 554
554 TP_fast_assign( 555 TP_fast_assign(
555 __entry->addr = (struct sockaddr *)&rqst->rq_addr;
556 __entry->xid = be32_to_cpu(rqst->rq_xid); 556 __entry->xid = be32_to_cpu(rqst->rq_xid);
557 __entry->status = status; 557 __entry->status = status;
558 __entry->flags = rqst->rq_flags; 558 __entry->flags = rqst->rq_flags;
559 memcpy(__get_dynamic_array(addr),
560 &rqst->rq_addr, rqst->rq_addrlen);
559 ), 561 ),
560 562
561 TP_printk("addr=%pIScp rq_xid=0x%08x status=%d flags=%s", 563 TP_printk("addr=%pIScp rq_xid=0x%08x status=%d flags=%s",
562 __entry->addr, __entry->xid, 564 (struct sockaddr *)__get_dynamic_array(addr),
565 __entry->xid,
563 __entry->status, show_rqstp_flags(__entry->flags)) 566 __entry->status, show_rqstp_flags(__entry->flags))
564); 567);
565 568
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 7b1ee5a0b03c..73165e9ca5bf 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -855,11 +855,13 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
855 return stat; 855 return stat;
856 if (integ_len > buf->len) 856 if (integ_len > buf->len)
857 return stat; 857 return stat;
858 if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) 858 if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) {
859 BUG(); 859 WARN_ON_ONCE(1);
860 return stat;
861 }
860 /* copy out mic... */ 862 /* copy out mic... */
861 if (read_u32_from_xdr_buf(buf, integ_len, &mic.len)) 863 if (read_u32_from_xdr_buf(buf, integ_len, &mic.len))
862 BUG(); 864 return stat;
863 if (mic.len > RPC_MAX_AUTH_SIZE) 865 if (mic.len > RPC_MAX_AUTH_SIZE)
864 return stat; 866 return stat;
865 mic.data = kmalloc(mic.len, GFP_KERNEL); 867 mic.data = kmalloc(mic.len, GFP_KERNEL);
@@ -1611,8 +1613,10 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
1611 BUG_ON(integ_len % 4); 1613 BUG_ON(integ_len % 4);
1612 *p++ = htonl(integ_len); 1614 *p++ = htonl(integ_len);
1613 *p++ = htonl(gc->gc_seq); 1615 *p++ = htonl(gc->gc_seq);
1614 if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) 1616 if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) {
1615 BUG(); 1617 WARN_ON_ONCE(1);
1618 goto out_err;
1619 }
1616 if (resbuf->tail[0].iov_base == NULL) { 1620 if (resbuf->tail[0].iov_base == NULL) {
1617 if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE) 1621 if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE)
1618 goto out_err; 1622 goto out_err;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 71de77bd4423..e8e0831229cf 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -250,9 +250,9 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
250 svc_xprt_received(new); 250 svc_xprt_received(new);
251} 251}
252 252
253int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, 253static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
254 struct net *net, const int family, 254 struct net *net, const int family,
255 const unsigned short port, int flags) 255 const unsigned short port, int flags)
256{ 256{
257 struct svc_xprt_class *xcl; 257 struct svc_xprt_class *xcl;
258 258
@@ -380,7 +380,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
380 struct svc_pool *pool; 380 struct svc_pool *pool;
381 struct svc_rqst *rqstp = NULL; 381 struct svc_rqst *rqstp = NULL;
382 int cpu; 382 int cpu;
383 bool queued = false;
384 383
385 if (!svc_xprt_has_something_to_do(xprt)) 384 if (!svc_xprt_has_something_to_do(xprt))
386 goto out; 385 goto out;
@@ -401,58 +400,25 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
401 400
402 atomic_long_inc(&pool->sp_stats.packets); 401 atomic_long_inc(&pool->sp_stats.packets);
403 402
404redo_search: 403 dprintk("svc: transport %p put into queue\n", xprt);
404 spin_lock_bh(&pool->sp_lock);
405 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
406 pool->sp_stats.sockets_queued++;
407 spin_unlock_bh(&pool->sp_lock);
408
405 /* find a thread for this xprt */ 409 /* find a thread for this xprt */
406 rcu_read_lock(); 410 rcu_read_lock();
407 list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { 411 list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
408 /* Do a lockless check first */ 412 if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
409 if (test_bit(RQ_BUSY, &rqstp->rq_flags))
410 continue; 413 continue;
411
412 /*
413 * Once the xprt has been queued, it can only be dequeued by
414 * the task that intends to service it. All we can do at that
415 * point is to try to wake this thread back up so that it can
416 * do so.
417 */
418 if (!queued) {
419 spin_lock_bh(&rqstp->rq_lock);
420 if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) {
421 /* already busy, move on... */
422 spin_unlock_bh(&rqstp->rq_lock);
423 continue;
424 }
425
426 /* this one will do */
427 rqstp->rq_xprt = xprt;
428 svc_xprt_get(xprt);
429 spin_unlock_bh(&rqstp->rq_lock);
430 }
431 rcu_read_unlock();
432
433 atomic_long_inc(&pool->sp_stats.threads_woken); 414 atomic_long_inc(&pool->sp_stats.threads_woken);
434 wake_up_process(rqstp->rq_task); 415 wake_up_process(rqstp->rq_task);
435 put_cpu(); 416 goto out_unlock;
436 goto out;
437 }
438 rcu_read_unlock();
439
440 /*
441 * We didn't find an idle thread to use, so we need to queue the xprt.
442 * Do so and then search again. If we find one, we can't hook this one
443 * up to it directly but we can wake the thread up in the hopes that it
444 * will pick it up once it searches for a xprt to service.
445 */
446 if (!queued) {
447 queued = true;
448 dprintk("svc: transport %p put into queue\n", xprt);
449 spin_lock_bh(&pool->sp_lock);
450 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
451 pool->sp_stats.sockets_queued++;
452 spin_unlock_bh(&pool->sp_lock);
453 goto redo_search;
454 } 417 }
418 set_bit(SP_CONGESTED, &pool->sp_flags);
455 rqstp = NULL; 419 rqstp = NULL;
420out_unlock:
421 rcu_read_unlock();
456 put_cpu(); 422 put_cpu();
457out: 423out:
458 trace_svc_xprt_do_enqueue(xprt, rqstp); 424 trace_svc_xprt_do_enqueue(xprt, rqstp);
@@ -721,38 +687,25 @@ rqst_should_sleep(struct svc_rqst *rqstp)
721 687
722static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) 688static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
723{ 689{
724 struct svc_xprt *xprt;
725 struct svc_pool *pool = rqstp->rq_pool; 690 struct svc_pool *pool = rqstp->rq_pool;
726 long time_left = 0; 691 long time_left = 0;
727 692
728 /* rq_xprt should be clear on entry */ 693 /* rq_xprt should be clear on entry */
729 WARN_ON_ONCE(rqstp->rq_xprt); 694 WARN_ON_ONCE(rqstp->rq_xprt);
730 695
731 /* Normally we will wait up to 5 seconds for any required 696 rqstp->rq_xprt = svc_xprt_dequeue(pool);
732 * cache information to be provided. 697 if (rqstp->rq_xprt)
733 */ 698 goto out_found;
734 rqstp->rq_chandle.thread_wait = 5*HZ;
735
736 xprt = svc_xprt_dequeue(pool);
737 if (xprt) {
738 rqstp->rq_xprt = xprt;
739
740 /* As there is a shortage of threads and this request
741 * had to be queued, don't allow the thread to wait so
742 * long for cache updates.
743 */
744 rqstp->rq_chandle.thread_wait = 1*HZ;
745 clear_bit(SP_TASK_PENDING, &pool->sp_flags);
746 return xprt;
747 }
748 699
749 /* 700 /*
750 * We have to be able to interrupt this wait 701 * We have to be able to interrupt this wait
751 * to bring down the daemons ... 702 * to bring down the daemons ...
752 */ 703 */
753 set_current_state(TASK_INTERRUPTIBLE); 704 set_current_state(TASK_INTERRUPTIBLE);
705 smp_mb__before_atomic();
706 clear_bit(SP_CONGESTED, &pool->sp_flags);
754 clear_bit(RQ_BUSY, &rqstp->rq_flags); 707 clear_bit(RQ_BUSY, &rqstp->rq_flags);
755 smp_mb(); 708 smp_mb__after_atomic();
756 709
757 if (likely(rqst_should_sleep(rqstp))) 710 if (likely(rqst_should_sleep(rqstp)))
758 time_left = schedule_timeout(timeout); 711 time_left = schedule_timeout(timeout);
@@ -761,13 +714,11 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
761 714
762 try_to_freeze(); 715 try_to_freeze();
763 716
764 spin_lock_bh(&rqstp->rq_lock);
765 set_bit(RQ_BUSY, &rqstp->rq_flags); 717 set_bit(RQ_BUSY, &rqstp->rq_flags);
766 spin_unlock_bh(&rqstp->rq_lock); 718 smp_mb__after_atomic();
767 719 rqstp->rq_xprt = svc_xprt_dequeue(pool);
768 xprt = rqstp->rq_xprt; 720 if (rqstp->rq_xprt)
769 if (xprt != NULL) 721 goto out_found;
770 return xprt;
771 722
772 if (!time_left) 723 if (!time_left)
773 atomic_long_inc(&pool->sp_stats.threads_timedout); 724 atomic_long_inc(&pool->sp_stats.threads_timedout);
@@ -775,6 +726,15 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
775 if (signalled() || kthread_should_stop()) 726 if (signalled() || kthread_should_stop())
776 return ERR_PTR(-EINTR); 727 return ERR_PTR(-EINTR);
777 return ERR_PTR(-EAGAIN); 728 return ERR_PTR(-EAGAIN);
729out_found:
730 /* Normally we will wait up to 5 seconds for any required
731 * cache information to be provided.
732 */
733 if (!test_bit(SP_CONGESTED, &pool->sp_flags))
734 rqstp->rq_chandle.thread_wait = 5*HZ;
735 else
736 rqstp->rq_chandle.thread_wait = 1*HZ;
737 return rqstp->rq_xprt;
778} 738}
779 739
780static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt) 740static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 992594b7cc6b..af7893501e40 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -133,6 +133,10 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
133 if (ret) 133 if (ret)
134 goto out_err; 134 goto out_err;
135 135
136 /* Bump page refcnt so Send completion doesn't release
137 * the rq_buffer before all retransmits are complete.
138 */
139 get_page(virt_to_page(rqst->rq_buffer));
136 ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0); 140 ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0);
137 if (ret) 141 if (ret)
138 goto out_unmap; 142 goto out_unmap;
@@ -165,7 +169,6 @@ xprt_rdma_bc_allocate(struct rpc_task *task)
165 return -EINVAL; 169 return -EINVAL;
166 } 170 }
167 171
168 /* svc_rdma_sendto releases this page */
169 page = alloc_page(RPCRDMA_DEF_GFP); 172 page = alloc_page(RPCRDMA_DEF_GFP);
170 if (!page) 173 if (!page)
171 return -ENOMEM; 174 return -ENOMEM;
@@ -184,6 +187,7 @@ xprt_rdma_bc_free(struct rpc_task *task)
184{ 187{
185 struct rpc_rqst *rqst = task->tk_rqstp; 188 struct rpc_rqst *rqst = task->tk_rqstp;
186 189
190 put_page(virt_to_page(rqst->rq_buffer));
187 kfree(rqst->rq_rbuffer); 191 kfree(rqst->rq_rbuffer);
188} 192}
189 193
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 5caf8e722a11..46ec069150d5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -290,6 +290,7 @@ static void qp_event_handler(struct ib_event *event, void *context)
290 ib_event_msg(event->event), event->event, 290 ib_event_msg(event->event), event->event,
291 event->element.qp); 291 event->element.qp);
292 set_bit(XPT_CLOSE, &xprt->xpt_flags); 292 set_bit(XPT_CLOSE, &xprt->xpt_flags);
293 svc_xprt_enqueue(xprt);
293 break; 294 break;
294 } 295 }
295} 296}
@@ -322,8 +323,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
322 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); 323 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
323 if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) 324 if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
324 goto out; 325 goto out;
325 svc_xprt_enqueue(&xprt->sc_xprt); 326 goto out_enqueue;
326 goto out;
327 327
328flushed: 328flushed:
329 if (wc->status != IB_WC_WR_FLUSH_ERR) 329 if (wc->status != IB_WC_WR_FLUSH_ERR)
@@ -333,6 +333,8 @@ flushed:
333 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 333 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
334 svc_rdma_put_context(ctxt, 1); 334 svc_rdma_put_context(ctxt, 1);
335 335
336out_enqueue:
337 svc_xprt_enqueue(&xprt->sc_xprt);
336out: 338out:
337 svc_xprt_put(&xprt->sc_xprt); 339 svc_xprt_put(&xprt->sc_xprt);
338} 340}
@@ -358,6 +360,7 @@ void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
358 360
359 if (unlikely(wc->status != IB_WC_SUCCESS)) { 361 if (unlikely(wc->status != IB_WC_SUCCESS)) {
360 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 362 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
363 svc_xprt_enqueue(&xprt->sc_xprt);
361 if (wc->status != IB_WC_WR_FLUSH_ERR) 364 if (wc->status != IB_WC_WR_FLUSH_ERR)
362 pr_err("svcrdma: Send: %s (%u/0x%x)\n", 365 pr_err("svcrdma: Send: %s (%u/0x%x)\n",
363 ib_wc_status_msg(wc->status), 366 ib_wc_status_msg(wc->status),
@@ -569,8 +572,10 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
569 case RDMA_CM_EVENT_DEVICE_REMOVAL: 572 case RDMA_CM_EVENT_DEVICE_REMOVAL:
570 dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n", 573 dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n",
571 xprt, cma_id); 574 xprt, cma_id);
572 if (xprt) 575 if (xprt) {
573 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 576 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
577 svc_xprt_enqueue(&xprt->sc_xprt);
578 }
574 break; 579 break;
575 580
576 default: 581 default: