aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-01-02 19:35:23 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2019-01-02 19:35:23 -0500
commite6b92572808467f35fd159d47c45b650de29e722 (patch)
tree5fbd2e6279539c4f3eeeca0d6a69779bdbd0d6a4 /net
parente45428a436765fcd154d461a2739b5640916dc00 (diff)
parent260f71eff493a844531629854c0935fa8de4fa2c (diff)
Merge tag 'nfs-for-4.21-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
Pull NFS client updates from Anna Schumaker: "Stable bugfixes: - xprtrdma: Yet another double DMA-unmap # v4.20 Features: - Allow some /proc/sys/sunrpc entries without CONFIG_SUNRPC_DEBUG - Per-xprt rdma receive workqueues - Drop support for FMR memory registration - Make port= mount option optional for RDMA mounts Other bugfixes and cleanups: - Remove unused nfs4_xdev_fs_type declaration - Fix comments for behavior that has changed - Remove generic RPC credentials by switching to 'struct cred' - Fix crossing mountpoints with different auth flavors - Various xprtrdma fixes from testing and auditing the close code - Fixes for disconnect issues when using xprtrdma with krb5 - Clean up and improve xprtrdma trace points - Fix NFS v4.2 async copy reboot recovery" * tag 'nfs-for-4.21-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (63 commits) sunrpc: convert to DEFINE_SHOW_ATTRIBUTE sunrpc: Add xprt after nfs4_test_session_trunk() sunrpc: convert unnecessary GFP_ATOMIC to GFP_NOFS sunrpc: handle ENOMEM in rpcb_getport_async NFS: remove unnecessary test for IS_ERR(cred) xprtrdma: Prevent leak of rpcrdma_rep objects NFSv4.2 fix async copy reboot recovery xprtrdma: Don't leak freed MRs xprtrdma: Add documenting comment for rpcrdma_buffer_destroy xprtrdma: Replace outdated comment for rpcrdma_ep_post xprtrdma: Update comments in frwr_op_send SUNRPC: Fix some kernel doc complaints SUNRPC: Simplify defining common RPC trace events NFS: Fix NFSv4 symbolic trace point output xprtrdma: Trace mapping, alloc, and dereg failures xprtrdma: Add trace points for calls to transport switch methods xprtrdma: Relocate the xprtrdma_mr_map trace points xprtrdma: Clean up of xprtrdma chunk trace points xprtrdma: Remove unused fields from rpcrdma_ia xprtrdma: Cull dprintk() call sites ...
Diffstat (limited to 'net')
-rw-r--r--net/sunrpc/Makefile2
-rw-r--r--net/sunrpc/auth.c116
-rw-r--r--net/sunrpc/auth_generic.c293
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c47
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c2
-rw-r--r--net/sunrpc/auth_null.c4
-rw-r--r--net/sunrpc/auth_unix.c110
-rw-r--r--net/sunrpc/backchannel_rqst.c2
-rw-r--r--net/sunrpc/clnt.c29
-rw-r--r--net/sunrpc/rpc_pipe.c19
-rw-r--r--net/sunrpc/rpcb_clnt.c12
-rw-r--r--net/sunrpc/sched.c5
-rw-r--r--net/sunrpc/xprtmultipath.c4
-rw-r--r--net/sunrpc/xprtrdma/Makefile3
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c39
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c337
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c209
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c78
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c8
-rw-r--r--net/sunrpc/xprtrdma/transport.c91
-rw-r--r--net/sunrpc/xprtrdma/verbs.c255
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h80
-rw-r--r--net/sunrpc/xprtsock.c10
23 files changed, 487 insertions, 1268 deletions
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 090658c3da12..9488600451e8 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
9obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ 9obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
10 10
11sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ 11sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
12 auth.o auth_null.o auth_unix.o auth_generic.o \ 12 auth.o auth_null.o auth_unix.o \
13 svc.o svcsock.o svcauth.o svcauth_unix.o \ 13 svc.o svcsock.o svcauth.o svcauth_unix.o \
14 addr.o rpcb_clnt.o timer.o xdr.o \ 14 addr.o rpcb_clnt.o timer.o xdr.o \
15 sunrpc_syms.o cache.o rpc_pipe.o \ 15 sunrpc_syms.o cache.o rpc_pipe.o \
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index ad8ead738981..1ff9768f5456 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -39,6 +39,20 @@ static const struct rpc_authops __rcu *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
39static LIST_HEAD(cred_unused); 39static LIST_HEAD(cred_unused);
40static unsigned long number_cred_unused; 40static unsigned long number_cred_unused;
41 41
42static struct cred machine_cred = {
43 .usage = ATOMIC_INIT(1),
44};
45
46/*
47 * Return the machine_cred pointer to be used whenever
48 * the a generic machine credential is needed.
49 */
50const struct cred *rpc_machine_cred(void)
51{
52 return &machine_cred;
53}
54EXPORT_SYMBOL_GPL(rpc_machine_cred);
55
42#define MAX_HASHTABLE_BITS (14) 56#define MAX_HASHTABLE_BITS (14)
43static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp) 57static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp)
44{ 58{
@@ -346,29 +360,6 @@ out_nocache:
346} 360}
347EXPORT_SYMBOL_GPL(rpcauth_init_credcache); 361EXPORT_SYMBOL_GPL(rpcauth_init_credcache);
348 362
349/*
350 * Setup a credential key lifetime timeout notification
351 */
352int
353rpcauth_key_timeout_notify(struct rpc_auth *auth, struct rpc_cred *cred)
354{
355 if (!cred->cr_auth->au_ops->key_timeout)
356 return 0;
357 return cred->cr_auth->au_ops->key_timeout(auth, cred);
358}
359EXPORT_SYMBOL_GPL(rpcauth_key_timeout_notify);
360
361bool
362rpcauth_cred_key_to_expire(struct rpc_auth *auth, struct rpc_cred *cred)
363{
364 if (auth->au_flags & RPCAUTH_AUTH_NO_CRKEY_TIMEOUT)
365 return false;
366 if (!cred->cr_ops->crkey_to_expire)
367 return false;
368 return cred->cr_ops->crkey_to_expire(cred);
369}
370EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire);
371
372char * 363char *
373rpcauth_stringify_acceptor(struct rpc_cred *cred) 364rpcauth_stringify_acceptor(struct rpc_cred *cred)
374{ 365{
@@ -587,13 +578,6 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
587 hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) { 578 hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) {
588 if (!entry->cr_ops->crmatch(acred, entry, flags)) 579 if (!entry->cr_ops->crmatch(acred, entry, flags))
589 continue; 580 continue;
590 if (flags & RPCAUTH_LOOKUP_RCU) {
591 if (test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags) ||
592 refcount_read(&entry->cr_count) == 0)
593 continue;
594 cred = entry;
595 break;
596 }
597 cred = get_rpccred(entry); 581 cred = get_rpccred(entry);
598 if (cred) 582 if (cred)
599 break; 583 break;
@@ -603,9 +587,6 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
603 if (cred != NULL) 587 if (cred != NULL)
604 goto found; 588 goto found;
605 589
606 if (flags & RPCAUTH_LOOKUP_RCU)
607 return ERR_PTR(-ECHILD);
608
609 new = auth->au_ops->crcreate(auth, acred, flags, gfp); 590 new = auth->au_ops->crcreate(auth, acred, flags, gfp);
610 if (IS_ERR(new)) { 591 if (IS_ERR(new)) {
611 cred = new; 592 cred = new;
@@ -656,9 +637,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags)
656 auth->au_ops->au_name); 637 auth->au_ops->au_name);
657 638
658 memset(&acred, 0, sizeof(acred)); 639 memset(&acred, 0, sizeof(acred));
659 acred.uid = cred->fsuid; 640 acred.cred = cred;
660 acred.gid = cred->fsgid;
661 acred.group_info = cred->group_info;
662 ret = auth->au_ops->lookup_cred(auth, &acred, flags); 641 ret = auth->au_ops->lookup_cred(auth, &acred, flags);
663 return ret; 642 return ret;
664} 643}
@@ -672,31 +651,41 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
672 INIT_LIST_HEAD(&cred->cr_lru); 651 INIT_LIST_HEAD(&cred->cr_lru);
673 refcount_set(&cred->cr_count, 1); 652 refcount_set(&cred->cr_count, 1);
674 cred->cr_auth = auth; 653 cred->cr_auth = auth;
654 cred->cr_flags = 0;
675 cred->cr_ops = ops; 655 cred->cr_ops = ops;
676 cred->cr_expire = jiffies; 656 cred->cr_expire = jiffies;
677 cred->cr_uid = acred->uid; 657 cred->cr_cred = get_cred(acred->cred);
678} 658}
679EXPORT_SYMBOL_GPL(rpcauth_init_cred); 659EXPORT_SYMBOL_GPL(rpcauth_init_cred);
680 660
681struct rpc_cred * 661static struct rpc_cred *
682rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags) 662rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
683{ 663{
684 dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, 664 struct rpc_auth *auth = task->tk_client->cl_auth;
685 cred->cr_auth->au_ops->au_name, cred); 665 struct auth_cred acred = {
686 return get_rpccred(cred); 666 .cred = get_task_cred(&init_task),
667 };
668 struct rpc_cred *ret;
669
670 dprintk("RPC: %5u looking up %s cred\n",
671 task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
672 ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
673 put_cred(acred.cred);
674 return ret;
687} 675}
688EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred);
689 676
690static struct rpc_cred * 677static struct rpc_cred *
691rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) 678rpcauth_bind_machine_cred(struct rpc_task *task, int lookupflags)
692{ 679{
693 struct rpc_auth *auth = task->tk_client->cl_auth; 680 struct rpc_auth *auth = task->tk_client->cl_auth;
694 struct auth_cred acred = { 681 struct auth_cred acred = {
695 .uid = GLOBAL_ROOT_UID, 682 .principal = task->tk_client->cl_principal,
696 .gid = GLOBAL_ROOT_GID, 683 .cred = init_task.cred,
697 }; 684 };
698 685
699 dprintk("RPC: %5u looking up %s cred\n", 686 if (!acred.principal)
687 return NULL;
688 dprintk("RPC: %5u looking up %s machine cred\n",
700 task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); 689 task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
701 return auth->au_ops->lookup_cred(auth, &acred, lookupflags); 690 return auth->au_ops->lookup_cred(auth, &acred, lookupflags);
702} 691}
@@ -712,18 +701,33 @@ rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags)
712} 701}
713 702
714static int 703static int
715rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) 704rpcauth_bindcred(struct rpc_task *task, const struct cred *cred, int flags)
716{ 705{
717 struct rpc_rqst *req = task->tk_rqstp; 706 struct rpc_rqst *req = task->tk_rqstp;
718 struct rpc_cred *new; 707 struct rpc_cred *new = NULL;
719 int lookupflags = 0; 708 int lookupflags = 0;
709 struct rpc_auth *auth = task->tk_client->cl_auth;
710 struct auth_cred acred = {
711 .cred = cred,
712 };
720 713
721 if (flags & RPC_TASK_ASYNC) 714 if (flags & RPC_TASK_ASYNC)
722 lookupflags |= RPCAUTH_LOOKUP_NEW; 715 lookupflags |= RPCAUTH_LOOKUP_NEW;
723 if (cred != NULL) 716 if (task->tk_op_cred)
724 new = cred->cr_ops->crbind(task, cred, lookupflags); 717 /* Task must use exactly this rpc_cred */
725 else if (flags & RPC_TASK_ROOTCREDS) 718 new = get_rpccred(task->tk_op_cred);
719 else if (cred != NULL && cred != &machine_cred)
720 new = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
721 else if (cred == &machine_cred)
722 new = rpcauth_bind_machine_cred(task, lookupflags);
723
724 /* If machine cred couldn't be bound, try a root cred */
725 if (new)
726 ;
727 else if (cred == &machine_cred || (flags & RPC_TASK_ROOTCREDS))
726 new = rpcauth_bind_root_cred(task, lookupflags); 728 new = rpcauth_bind_root_cred(task, lookupflags);
729 else if (flags & RPC_TASK_NULLCREDS)
730 new = authnull_ops.lookup_cred(NULL, NULL, 0);
727 else 731 else
728 new = rpcauth_bind_new_cred(task, lookupflags); 732 new = rpcauth_bind_new_cred(task, lookupflags);
729 if (IS_ERR(new)) 733 if (IS_ERR(new))
@@ -901,15 +905,10 @@ int __init rpcauth_init_module(void)
901 err = rpc_init_authunix(); 905 err = rpc_init_authunix();
902 if (err < 0) 906 if (err < 0)
903 goto out1; 907 goto out1;
904 err = rpc_init_generic_auth();
905 if (err < 0)
906 goto out2;
907 err = register_shrinker(&rpc_cred_shrinker); 908 err = register_shrinker(&rpc_cred_shrinker);
908 if (err < 0) 909 if (err < 0)
909 goto out3; 910 goto out2;
910 return 0; 911 return 0;
911out3:
912 rpc_destroy_generic_auth();
913out2: 912out2:
914 rpc_destroy_authunix(); 913 rpc_destroy_authunix();
915out1: 914out1:
@@ -919,6 +918,5 @@ out1:
919void rpcauth_remove_module(void) 918void rpcauth_remove_module(void)
920{ 919{
921 rpc_destroy_authunix(); 920 rpc_destroy_authunix();
922 rpc_destroy_generic_auth();
923 unregister_shrinker(&rpc_cred_shrinker); 921 unregister_shrinker(&rpc_cred_shrinker);
924} 922}
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
deleted file mode 100644
index ab4a3be1542a..000000000000
--- a/net/sunrpc/auth_generic.c
+++ /dev/null
@@ -1,293 +0,0 @@
1/*
2 * Generic RPC credential
3 *
4 * Copyright (C) 2008, Trond Myklebust <Trond.Myklebust@netapp.com>
5 */
6
7#include <linux/err.h>
8#include <linux/slab.h>
9#include <linux/types.h>
10#include <linux/module.h>
11#include <linux/sched.h>
12#include <linux/sunrpc/auth.h>
13#include <linux/sunrpc/clnt.h>
14#include <linux/sunrpc/debug.h>
15#include <linux/sunrpc/sched.h>
16
17#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
18# define RPCDBG_FACILITY RPCDBG_AUTH
19#endif
20
21#define RPC_MACHINE_CRED_USERID GLOBAL_ROOT_UID
22#define RPC_MACHINE_CRED_GROUPID GLOBAL_ROOT_GID
23
24struct generic_cred {
25 struct rpc_cred gc_base;
26 struct auth_cred acred;
27};
28
29static struct rpc_auth generic_auth;
30static const struct rpc_credops generic_credops;
31
32/*
33 * Public call interface
34 */
35struct rpc_cred *rpc_lookup_cred(void)
36{
37 return rpcauth_lookupcred(&generic_auth, 0);
38}
39EXPORT_SYMBOL_GPL(rpc_lookup_cred);
40
41struct rpc_cred *
42rpc_lookup_generic_cred(struct auth_cred *acred, int flags, gfp_t gfp)
43{
44 return rpcauth_lookup_credcache(&generic_auth, acred, flags, gfp);
45}
46EXPORT_SYMBOL_GPL(rpc_lookup_generic_cred);
47
48struct rpc_cred *rpc_lookup_cred_nonblock(void)
49{
50 return rpcauth_lookupcred(&generic_auth, RPCAUTH_LOOKUP_RCU);
51}
52EXPORT_SYMBOL_GPL(rpc_lookup_cred_nonblock);
53
54/*
55 * Public call interface for looking up machine creds.
56 */
57struct rpc_cred *rpc_lookup_machine_cred(const char *service_name)
58{
59 struct auth_cred acred = {
60 .uid = RPC_MACHINE_CRED_USERID,
61 .gid = RPC_MACHINE_CRED_GROUPID,
62 .principal = service_name,
63 .machine_cred = 1,
64 };
65
66 dprintk("RPC: looking up machine cred for service %s\n",
67 service_name);
68 return generic_auth.au_ops->lookup_cred(&generic_auth, &acred, 0);
69}
70EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred);
71
72static struct rpc_cred *generic_bind_cred(struct rpc_task *task,
73 struct rpc_cred *cred, int lookupflags)
74{
75 struct rpc_auth *auth = task->tk_client->cl_auth;
76 struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred;
77
78 return auth->au_ops->lookup_cred(auth, acred, lookupflags);
79}
80
81static int
82generic_hash_cred(struct auth_cred *acred, unsigned int hashbits)
83{
84 return hash_64(from_kgid(&init_user_ns, acred->gid) |
85 ((u64)from_kuid(&init_user_ns, acred->uid) <<
86 (sizeof(gid_t) * 8)), hashbits);
87}
88
89/*
90 * Lookup generic creds for current process
91 */
92static struct rpc_cred *
93generic_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
94{
95 return rpcauth_lookup_credcache(&generic_auth, acred, flags, GFP_KERNEL);
96}
97
98static struct rpc_cred *
99generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
100{
101 struct generic_cred *gcred;
102
103 gcred = kmalloc(sizeof(*gcred), gfp);
104 if (gcred == NULL)
105 return ERR_PTR(-ENOMEM);
106
107 rpcauth_init_cred(&gcred->gc_base, acred, &generic_auth, &generic_credops);
108 gcred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
109
110 gcred->acred.uid = acred->uid;
111 gcred->acred.gid = acred->gid;
112 gcred->acred.group_info = acred->group_info;
113 gcred->acred.ac_flags = 0;
114 if (gcred->acred.group_info != NULL)
115 get_group_info(gcred->acred.group_info);
116 gcred->acred.machine_cred = acred->machine_cred;
117 gcred->acred.principal = acred->principal;
118
119 dprintk("RPC: allocated %s cred %p for uid %d gid %d\n",
120 gcred->acred.machine_cred ? "machine" : "generic",
121 gcred,
122 from_kuid(&init_user_ns, acred->uid),
123 from_kgid(&init_user_ns, acred->gid));
124 return &gcred->gc_base;
125}
126
127static void
128generic_free_cred(struct rpc_cred *cred)
129{
130 struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base);
131
132 dprintk("RPC: generic_free_cred %p\n", gcred);
133 if (gcred->acred.group_info != NULL)
134 put_group_info(gcred->acred.group_info);
135 kfree(gcred);
136}
137
138static void
139generic_free_cred_callback(struct rcu_head *head)
140{
141 struct rpc_cred *cred = container_of(head, struct rpc_cred, cr_rcu);
142 generic_free_cred(cred);
143}
144
145static void
146generic_destroy_cred(struct rpc_cred *cred)
147{
148 call_rcu(&cred->cr_rcu, generic_free_cred_callback);
149}
150
151static int
152machine_cred_match(struct auth_cred *acred, struct generic_cred *gcred, int flags)
153{
154 if (!gcred->acred.machine_cred ||
155 gcred->acred.principal != acred->principal ||
156 !uid_eq(gcred->acred.uid, acred->uid) ||
157 !gid_eq(gcred->acred.gid, acred->gid))
158 return 0;
159 return 1;
160}
161
162/*
163 * Match credentials against current process creds.
164 */
165static int
166generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
167{
168 struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base);
169 int i;
170
171 if (acred->machine_cred)
172 return machine_cred_match(acred, gcred, flags);
173
174 if (!uid_eq(gcred->acred.uid, acred->uid) ||
175 !gid_eq(gcred->acred.gid, acred->gid) ||
176 gcred->acred.machine_cred != 0)
177 goto out_nomatch;
178
179 /* Optimisation in the case where pointers are identical... */
180 if (gcred->acred.group_info == acred->group_info)
181 goto out_match;
182
183 /* Slow path... */
184 if (gcred->acred.group_info->ngroups != acred->group_info->ngroups)
185 goto out_nomatch;
186 for (i = 0; i < gcred->acred.group_info->ngroups; i++) {
187 if (!gid_eq(gcred->acred.group_info->gid[i],
188 acred->group_info->gid[i]))
189 goto out_nomatch;
190 }
191out_match:
192 return 1;
193out_nomatch:
194 return 0;
195}
196
197int __init rpc_init_generic_auth(void)
198{
199 return rpcauth_init_credcache(&generic_auth);
200}
201
202void rpc_destroy_generic_auth(void)
203{
204 rpcauth_destroy_credcache(&generic_auth);
205}
206
207/*
208 * Test the the current time (now) against the underlying credential key expiry
209 * minus a timeout and setup notification.
210 *
211 * The normal case:
212 * If 'now' is before the key expiry minus RPC_KEY_EXPIRE_TIMEO, set
213 * the RPC_CRED_NOTIFY_TIMEOUT flag to setup the underlying credential
214 * rpc_credops crmatch routine to notify this generic cred when it's key
215 * expiration is within RPC_KEY_EXPIRE_TIMEO, and return 0.
216 *
217 * The error case:
218 * If the underlying cred lookup fails, return -EACCES.
219 *
220 * The 'almost' error case:
221 * If 'now' is within key expiry minus RPC_KEY_EXPIRE_TIMEO, but not within
222 * key expiry minus RPC_KEY_EXPIRE_FAIL, set the RPC_CRED_EXPIRE_SOON bit
223 * on the acred ac_flags and return 0.
224 */
225static int
226generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred)
227{
228 struct auth_cred *acred = &container_of(cred, struct generic_cred,
229 gc_base)->acred;
230 struct rpc_cred *tcred;
231 int ret = 0;
232
233
234 /* Fast track for non crkey_timeout (no key) underlying credentials */
235 if (auth->au_flags & RPCAUTH_AUTH_NO_CRKEY_TIMEOUT)
236 return 0;
237
238 /* Fast track for the normal case */
239 if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags))
240 return 0;
241
242 /* lookup_cred either returns a valid referenced rpc_cred, or PTR_ERR */
243 tcred = auth->au_ops->lookup_cred(auth, acred, 0);
244 if (IS_ERR(tcred))
245 return -EACCES;
246
247 /* Test for the almost error case */
248 ret = tcred->cr_ops->crkey_timeout(tcred);
249 if (ret != 0) {
250 set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
251 ret = 0;
252 } else {
253 /* In case underlying cred key has been reset */
254 if (test_and_clear_bit(RPC_CRED_KEY_EXPIRE_SOON,
255 &acred->ac_flags))
256 dprintk("RPC: UID %d Credential key reset\n",
257 from_kuid(&init_user_ns, tcred->cr_uid));
258 /* set up fasttrack for the normal case */
259 set_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
260 }
261
262 put_rpccred(tcred);
263 return ret;
264}
265
266static const struct rpc_authops generic_auth_ops = {
267 .owner = THIS_MODULE,
268 .au_name = "Generic",
269 .hash_cred = generic_hash_cred,
270 .lookup_cred = generic_lookup_cred,
271 .crcreate = generic_create_cred,
272 .key_timeout = generic_key_timeout,
273};
274
275static struct rpc_auth generic_auth = {
276 .au_ops = &generic_auth_ops,
277 .au_count = REFCOUNT_INIT(1),
278};
279
280static bool generic_key_to_expire(struct rpc_cred *cred)
281{
282 struct auth_cred *acred = &container_of(cred, struct generic_cred,
283 gc_base)->acred;
284 return test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
285}
286
287static const struct rpc_credops generic_credops = {
288 .cr_name = "Generic cred",
289 .crdestroy = generic_destroy_cred,
290 .crbind = generic_bind_cred,
291 .crmatch = generic_match,
292 .crkey_to_expire = generic_key_to_expire,
293};
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index ba765473d1f0..dc86713b32b6 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -565,7 +565,7 @@ gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred)
565 struct gss_cred *gss_cred = container_of(cred, 565 struct gss_cred *gss_cred = container_of(cred,
566 struct gss_cred, gc_base); 566 struct gss_cred, gc_base);
567 struct gss_upcall_msg *gss_new, *gss_msg; 567 struct gss_upcall_msg *gss_new, *gss_msg;
568 kuid_t uid = cred->cr_uid; 568 kuid_t uid = cred->cr_cred->fsuid;
569 569
570 gss_new = gss_alloc_msg(gss_auth, uid, gss_cred->gc_principal); 570 gss_new = gss_alloc_msg(gss_auth, uid, gss_cred->gc_principal);
571 if (IS_ERR(gss_new)) 571 if (IS_ERR(gss_new))
@@ -604,7 +604,7 @@ gss_refresh_upcall(struct rpc_task *task)
604 int err = 0; 604 int err = 0;
605 605
606 dprintk("RPC: %5u %s for uid %u\n", 606 dprintk("RPC: %5u %s for uid %u\n",
607 task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_uid)); 607 task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid));
608 gss_msg = gss_setup_upcall(gss_auth, cred); 608 gss_msg = gss_setup_upcall(gss_auth, cred);
609 if (PTR_ERR(gss_msg) == -EAGAIN) { 609 if (PTR_ERR(gss_msg) == -EAGAIN) {
610 /* XXX: warning on the first, under the assumption we 610 /* XXX: warning on the first, under the assumption we
@@ -637,7 +637,7 @@ gss_refresh_upcall(struct rpc_task *task)
637out: 637out:
638 dprintk("RPC: %5u %s for uid %u result %d\n", 638 dprintk("RPC: %5u %s for uid %u result %d\n",
639 task->tk_pid, __func__, 639 task->tk_pid, __func__,
640 from_kuid(&init_user_ns, cred->cr_uid), err); 640 from_kuid(&init_user_ns, cred->cr_cred->fsuid), err);
641 return err; 641 return err;
642} 642}
643 643
@@ -653,7 +653,7 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
653 int err; 653 int err;
654 654
655 dprintk("RPC: %s for uid %u\n", 655 dprintk("RPC: %s for uid %u\n",
656 __func__, from_kuid(&init_user_ns, cred->cr_uid)); 656 __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid));
657retry: 657retry:
658 err = 0; 658 err = 0;
659 /* if gssd is down, just skip upcalling altogether */ 659 /* if gssd is down, just skip upcalling altogether */
@@ -701,7 +701,7 @@ out_intr:
701 gss_release_msg(gss_msg); 701 gss_release_msg(gss_msg);
702out: 702out:
703 dprintk("RPC: %s for uid %u result %d\n", 703 dprintk("RPC: %s for uid %u result %d\n",
704 __func__, from_kuid(&init_user_ns, cred->cr_uid), err); 704 __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid), err);
705 return err; 705 return err;
706} 706}
707 707
@@ -1248,7 +1248,7 @@ gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
1248 new = kzalloc(sizeof(*gss_cred), GFP_NOIO); 1248 new = kzalloc(sizeof(*gss_cred), GFP_NOIO);
1249 if (new) { 1249 if (new) {
1250 struct auth_cred acred = { 1250 struct auth_cred acred = {
1251 .uid = gss_cred->gc_base.cr_uid, 1251 .cred = gss_cred->gc_base.cr_cred,
1252 }; 1252 };
1253 struct gss_cl_ctx *ctx = 1253 struct gss_cl_ctx *ctx =
1254 rcu_dereference_protected(gss_cred->gc_ctx, 1); 1254 rcu_dereference_protected(gss_cred->gc_ctx, 1);
@@ -1343,6 +1343,7 @@ gss_destroy_nullcred(struct rpc_cred *cred)
1343 struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1); 1343 struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1);
1344 1344
1345 RCU_INIT_POINTER(gss_cred->gc_ctx, NULL); 1345 RCU_INIT_POINTER(gss_cred->gc_ctx, NULL);
1346 put_cred(cred->cr_cred);
1346 call_rcu(&cred->cr_rcu, gss_free_cred_callback); 1347 call_rcu(&cred->cr_rcu, gss_free_cred_callback);
1347 if (ctx) 1348 if (ctx)
1348 gss_put_ctx(ctx); 1349 gss_put_ctx(ctx);
@@ -1361,7 +1362,7 @@ gss_destroy_cred(struct rpc_cred *cred)
1361static int 1362static int
1362gss_hash_cred(struct auth_cred *acred, unsigned int hashbits) 1363gss_hash_cred(struct auth_cred *acred, unsigned int hashbits)
1363{ 1364{
1364 return hash_64(from_kuid(&init_user_ns, acred->uid), hashbits); 1365 return hash_64(from_kuid(&init_user_ns, acred->cred->fsuid), hashbits);
1365} 1366}
1366 1367
1367/* 1368/*
@@ -1381,7 +1382,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
1381 int err = -ENOMEM; 1382 int err = -ENOMEM;
1382 1383
1383 dprintk("RPC: %s for uid %d, flavor %d\n", 1384 dprintk("RPC: %s for uid %d, flavor %d\n",
1384 __func__, from_kuid(&init_user_ns, acred->uid), 1385 __func__, from_kuid(&init_user_ns, acred->cred->fsuid),
1385 auth->au_flavor); 1386 auth->au_flavor);
1386 1387
1387 if (!(cred = kzalloc(sizeof(*cred), gfp))) 1388 if (!(cred = kzalloc(sizeof(*cred), gfp)))
@@ -1394,9 +1395,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
1394 */ 1395 */
1395 cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW; 1396 cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW;
1396 cred->gc_service = gss_auth->service; 1397 cred->gc_service = gss_auth->service;
1397 cred->gc_principal = NULL; 1398 cred->gc_principal = acred->principal;
1398 if (acred->machine_cred)
1399 cred->gc_principal = acred->principal;
1400 kref_get(&gss_auth->kref); 1399 kref_get(&gss_auth->kref);
1401 return &cred->gc_base; 1400 return &cred->gc_base;
1402 1401
@@ -1518,23 +1517,10 @@ out:
1518 if (gss_cred->gc_principal == NULL) 1517 if (gss_cred->gc_principal == NULL)
1519 return 0; 1518 return 0;
1520 ret = strcmp(acred->principal, gss_cred->gc_principal) == 0; 1519 ret = strcmp(acred->principal, gss_cred->gc_principal) == 0;
1521 goto check_expire; 1520 } else {
1522 } 1521 if (gss_cred->gc_principal != NULL)
1523 if (gss_cred->gc_principal != NULL) 1522 return 0;
1524 return 0; 1523 ret = uid_eq(rc->cr_cred->fsuid, acred->cred->fsuid);
1525 ret = uid_eq(rc->cr_uid, acred->uid);
1526
1527check_expire:
1528 if (ret == 0)
1529 return ret;
1530
1531 /* Notify acred users of GSS context expiration timeout */
1532 if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags) &&
1533 (gss_key_timeout(rc) != 0)) {
1534 /* test will now be done from generic cred */
1535 test_and_clear_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
1536 /* tell NFS layer that key will expire soon */
1537 set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
1538 } 1524 }
1539 return ret; 1525 return ret;
1540} 1526}
@@ -1607,9 +1593,8 @@ static int gss_renew_cred(struct rpc_task *task)
1607 gc_base); 1593 gc_base);
1608 struct rpc_auth *auth = oldcred->cr_auth; 1594 struct rpc_auth *auth = oldcred->cr_auth;
1609 struct auth_cred acred = { 1595 struct auth_cred acred = {
1610 .uid = oldcred->cr_uid, 1596 .cred = oldcred->cr_cred,
1611 .principal = gss_cred->gc_principal, 1597 .principal = gss_cred->gc_principal,
1612 .machine_cred = (gss_cred->gc_principal != NULL ? 1 : 0),
1613 }; 1598 };
1614 struct rpc_cred *new; 1599 struct rpc_cred *new;
1615 1600
@@ -2110,7 +2095,6 @@ static const struct rpc_credops gss_credops = {
2110 .cr_name = "AUTH_GSS", 2095 .cr_name = "AUTH_GSS",
2111 .crdestroy = gss_destroy_cred, 2096 .crdestroy = gss_destroy_cred,
2112 .cr_init = gss_cred_init, 2097 .cr_init = gss_cred_init,
2113 .crbind = rpcauth_generic_bind_cred,
2114 .crmatch = gss_match, 2098 .crmatch = gss_match,
2115 .crmarshal = gss_marshal, 2099 .crmarshal = gss_marshal,
2116 .crrefresh = gss_refresh, 2100 .crrefresh = gss_refresh,
@@ -2125,7 +2109,6 @@ static const struct rpc_credops gss_credops = {
2125static const struct rpc_credops gss_nullops = { 2109static const struct rpc_credops gss_nullops = {
2126 .cr_name = "AUTH_GSS", 2110 .cr_name = "AUTH_GSS",
2127 .crdestroy = gss_destroy_nullcred, 2111 .crdestroy = gss_destroy_nullcred,
2128 .crbind = rpcauth_generic_bind_cred,
2129 .crmatch = gss_match, 2112 .crmatch = gss_match,
2130 .crmarshal = gss_marshal, 2113 .crmarshal = gss_marshal,
2131 .crrefresh = gss_refresh_null, 2114 .crrefresh = gss_refresh_null,
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 16ac0f4cb7d8..379318dff534 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -244,7 +244,7 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
244 244
245/** 245/**
246 * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors 246 * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors
247 * @array: array to fill in 247 * @array_ptr: array to fill in
248 * @size: size of "array" 248 * @size: size of "array"
249 * 249 *
250 * Returns the number of array items filled in, or a negative errno. 250 * Returns the number of array items filled in, or a negative errno.
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 2694a1bc026b..d0ceac57c06e 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -36,8 +36,6 @@ nul_destroy(struct rpc_auth *auth)
36static struct rpc_cred * 36static struct rpc_cred *
37nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) 37nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
38{ 38{
39 if (flags & RPCAUTH_LOOKUP_RCU)
40 return &null_cred;
41 return get_rpccred(&null_cred); 39 return get_rpccred(&null_cred);
42} 40}
43 41
@@ -116,7 +114,6 @@ static
116struct rpc_auth null_auth = { 114struct rpc_auth null_auth = {
117 .au_cslack = NUL_CALLSLACK, 115 .au_cslack = NUL_CALLSLACK,
118 .au_rslack = NUL_REPLYSLACK, 116 .au_rslack = NUL_REPLYSLACK,
119 .au_flags = RPCAUTH_AUTH_NO_CRKEY_TIMEOUT,
120 .au_ops = &authnull_ops, 117 .au_ops = &authnull_ops,
121 .au_flavor = RPC_AUTH_NULL, 118 .au_flavor = RPC_AUTH_NULL,
122 .au_count = REFCOUNT_INIT(1), 119 .au_count = REFCOUNT_INIT(1),
@@ -126,7 +123,6 @@ static
126const struct rpc_credops null_credops = { 123const struct rpc_credops null_credops = {
127 .cr_name = "AUTH_NULL", 124 .cr_name = "AUTH_NULL",
128 .crdestroy = nul_destroy_cred, 125 .crdestroy = nul_destroy_cred,
129 .crbind = rpcauth_generic_bind_cred,
130 .crmatch = nul_match, 126 .crmatch = nul_match,
131 .crmarshal = nul_marshal, 127 .crmarshal = nul_marshal,
132 .crrefresh = nul_refresh, 128 .crrefresh = nul_refresh,
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 4c1c7e56288f..387f6b3ffbea 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -11,16 +11,11 @@
11#include <linux/types.h> 11#include <linux/types.h>
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/mempool.h>
14#include <linux/sunrpc/clnt.h> 15#include <linux/sunrpc/clnt.h>
15#include <linux/sunrpc/auth.h> 16#include <linux/sunrpc/auth.h>
16#include <linux/user_namespace.h> 17#include <linux/user_namespace.h>
17 18
18struct unx_cred {
19 struct rpc_cred uc_base;
20 kgid_t uc_gid;
21 kgid_t uc_gids[UNX_NGROUPS];
22};
23#define uc_uid uc_base.cr_uid
24 19
25#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 20#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
26# define RPCDBG_FACILITY RPCDBG_AUTH 21# define RPCDBG_FACILITY RPCDBG_AUTH
@@ -28,6 +23,7 @@ struct unx_cred {
28 23
29static struct rpc_auth unix_auth; 24static struct rpc_auth unix_auth;
30static const struct rpc_credops unix_credops; 25static const struct rpc_credops unix_credops;
26static mempool_t *unix_pool;
31 27
32static struct rpc_auth * 28static struct rpc_auth *
33unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) 29unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
@@ -42,15 +38,6 @@ static void
42unx_destroy(struct rpc_auth *auth) 38unx_destroy(struct rpc_auth *auth)
43{ 39{
44 dprintk("RPC: destroying UNIX authenticator %p\n", auth); 40 dprintk("RPC: destroying UNIX authenticator %p\n", auth);
45 rpcauth_clear_credcache(auth->au_credcache);
46}
47
48static int
49unx_hash_cred(struct auth_cred *acred, unsigned int hashbits)
50{
51 return hash_64(from_kgid(&init_user_ns, acred->gid) |
52 ((u64)from_kuid(&init_user_ns, acred->uid) <<
53 (sizeof(gid_t) * 8)), hashbits);
54} 41}
55 42
56/* 43/*
@@ -59,52 +46,24 @@ unx_hash_cred(struct auth_cred *acred, unsigned int hashbits)
59static struct rpc_cred * 46static struct rpc_cred *
60unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) 47unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
61{ 48{
62 return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS); 49 struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_NOFS);
63}
64
65static struct rpc_cred *
66unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
67{
68 struct unx_cred *cred;
69 unsigned int groups = 0;
70 unsigned int i;
71 50
72 dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", 51 dprintk("RPC: allocating UNIX cred for uid %d gid %d\n",
73 from_kuid(&init_user_ns, acred->uid), 52 from_kuid(&init_user_ns, acred->cred->fsuid),
74 from_kgid(&init_user_ns, acred->gid)); 53 from_kgid(&init_user_ns, acred->cred->fsgid));
75
76 if (!(cred = kmalloc(sizeof(*cred), gfp)))
77 return ERR_PTR(-ENOMEM);
78 54
79 rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); 55 rpcauth_init_cred(ret, acred, auth, &unix_credops);
80 cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; 56 ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
81 57 return ret;
82 if (acred->group_info != NULL)
83 groups = acred->group_info->ngroups;
84 if (groups > UNX_NGROUPS)
85 groups = UNX_NGROUPS;
86
87 cred->uc_gid = acred->gid;
88 for (i = 0; i < groups; i++)
89 cred->uc_gids[i] = acred->group_info->gid[i];
90 if (i < UNX_NGROUPS)
91 cred->uc_gids[i] = INVALID_GID;
92
93 return &cred->uc_base;
94}
95
96static void
97unx_free_cred(struct unx_cred *unx_cred)
98{
99 dprintk("RPC: unx_free_cred %p\n", unx_cred);
100 kfree(unx_cred);
101} 58}
102 59
103static void 60static void
104unx_free_cred_callback(struct rcu_head *head) 61unx_free_cred_callback(struct rcu_head *head)
105{ 62{
106 struct unx_cred *unx_cred = container_of(head, struct unx_cred, uc_base.cr_rcu); 63 struct rpc_cred *rpc_cred = container_of(head, struct rpc_cred, cr_rcu);
107 unx_free_cred(unx_cred); 64 dprintk("RPC: unx_free_cred %p\n", rpc_cred);
65 put_cred(rpc_cred->cr_cred);
66 mempool_free(rpc_cred, unix_pool);
108} 67}
109 68
110static void 69static void
@@ -114,30 +73,32 @@ unx_destroy_cred(struct rpc_cred *cred)
114} 73}
115 74
116/* 75/*
117 * Match credentials against current process creds. 76 * Match credentials against current the auth_cred.
118 * The root_override argument takes care of cases where the caller may
119 * request root creds (e.g. for NFS swapping).
120 */ 77 */
121static int 78static int
122unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) 79unx_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
123{ 80{
124 struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base);
125 unsigned int groups = 0; 81 unsigned int groups = 0;
126 unsigned int i; 82 unsigned int i;
127 83
84 if (cred->cr_cred == acred->cred)
85 return 1;
128 86
129 if (!uid_eq(cred->uc_uid, acred->uid) || !gid_eq(cred->uc_gid, acred->gid)) 87 if (!uid_eq(cred->cr_cred->fsuid, acred->cred->fsuid) || !gid_eq(cred->cr_cred->fsgid, acred->cred->fsgid))
130 return 0; 88 return 0;
131 89
132 if (acred->group_info != NULL) 90 if (acred->cred && acred->cred->group_info != NULL)
133 groups = acred->group_info->ngroups; 91 groups = acred->cred->group_info->ngroups;
134 if (groups > UNX_NGROUPS) 92 if (groups > UNX_NGROUPS)
135 groups = UNX_NGROUPS; 93 groups = UNX_NGROUPS;
94 if (cred->cr_cred->group_info == NULL)
95 return groups == 0;
96 if (groups != cred->cr_cred->group_info->ngroups)
97 return 0;
98
136 for (i = 0; i < groups ; i++) 99 for (i = 0; i < groups ; i++)
137 if (!gid_eq(cred->uc_gids[i], acred->group_info->gid[i])) 100 if (!gid_eq(cred->cr_cred->group_info->gid[i], acred->cred->group_info->gid[i]))
138 return 0; 101 return 0;
139 if (groups < UNX_NGROUPS && gid_valid(cred->uc_gids[groups]))
140 return 0;
141 return 1; 102 return 1;
142} 103}
143 104
@@ -149,9 +110,10 @@ static __be32 *
149unx_marshal(struct rpc_task *task, __be32 *p) 110unx_marshal(struct rpc_task *task, __be32 *p)
150{ 111{
151 struct rpc_clnt *clnt = task->tk_client; 112 struct rpc_clnt *clnt = task->tk_client;
152 struct unx_cred *cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base); 113 struct rpc_cred *cred = task->tk_rqstp->rq_cred;
153 __be32 *base, *hold; 114 __be32 *base, *hold;
154 int i; 115 int i;
116 struct group_info *gi = cred->cr_cred->group_info;
155 117
156 *p++ = htonl(RPC_AUTH_UNIX); 118 *p++ = htonl(RPC_AUTH_UNIX);
157 base = p++; 119 base = p++;
@@ -162,11 +124,12 @@ unx_marshal(struct rpc_task *task, __be32 *p)
162 */ 124 */
163 p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); 125 p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
164 126
165 *p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid)); 127 *p++ = htonl((u32) from_kuid(&init_user_ns, cred->cr_cred->fsuid));
166 *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid)); 128 *p++ = htonl((u32) from_kgid(&init_user_ns, cred->cr_cred->fsgid));
167 hold = p++; 129 hold = p++;
168 for (i = 0; i < UNX_NGROUPS && gid_valid(cred->uc_gids[i]); i++) 130 if (gi)
169 *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i])); 131 for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++)
132 *p++ = htonl((u32) from_kgid(&init_user_ns, gi->gid[i]));
170 *hold = htonl(p - hold - 1); /* gid array length */ 133 *hold = htonl(p - hold - 1); /* gid array length */
171 *base = htonl((p - base - 1) << 2); /* cred length */ 134 *base = htonl((p - base - 1) << 2); /* cred length */
172 135
@@ -213,12 +176,13 @@ unx_validate(struct rpc_task *task, __be32 *p)
213 176
214int __init rpc_init_authunix(void) 177int __init rpc_init_authunix(void)
215{ 178{
216 return rpcauth_init_credcache(&unix_auth); 179 unix_pool = mempool_create_kmalloc_pool(16, sizeof(struct rpc_cred));
180 return unix_pool ? 0 : -ENOMEM;
217} 181}
218 182
219void rpc_destroy_authunix(void) 183void rpc_destroy_authunix(void)
220{ 184{
221 rpcauth_destroy_credcache(&unix_auth); 185 mempool_destroy(unix_pool);
222} 186}
223 187
224const struct rpc_authops authunix_ops = { 188const struct rpc_authops authunix_ops = {
@@ -227,16 +191,13 @@ const struct rpc_authops authunix_ops = {
227 .au_name = "UNIX", 191 .au_name = "UNIX",
228 .create = unx_create, 192 .create = unx_create,
229 .destroy = unx_destroy, 193 .destroy = unx_destroy,
230 .hash_cred = unx_hash_cred,
231 .lookup_cred = unx_lookup_cred, 194 .lookup_cred = unx_lookup_cred,
232 .crcreate = unx_create_cred,
233}; 195};
234 196
235static 197static
236struct rpc_auth unix_auth = { 198struct rpc_auth unix_auth = {
237 .au_cslack = UNX_CALLSLACK, 199 .au_cslack = UNX_CALLSLACK,
238 .au_rslack = NUL_REPLYSLACK, 200 .au_rslack = NUL_REPLYSLACK,
239 .au_flags = RPCAUTH_AUTH_NO_CRKEY_TIMEOUT,
240 .au_ops = &authunix_ops, 201 .au_ops = &authunix_ops,
241 .au_flavor = RPC_AUTH_UNIX, 202 .au_flavor = RPC_AUTH_UNIX,
242 .au_count = REFCOUNT_INIT(1), 203 .au_count = REFCOUNT_INIT(1),
@@ -246,7 +207,6 @@ static
246const struct rpc_credops unix_credops = { 207const struct rpc_credops unix_credops = {
247 .cr_name = "AUTH_UNIX", 208 .cr_name = "AUTH_UNIX",
248 .crdestroy = unx_destroy_cred, 209 .crdestroy = unx_destroy_cred,
249 .crbind = rpcauth_generic_bind_cred,
250 .crmatch = unx_match, 210 .crmatch = unx_match,
251 .crmarshal = unx_marshal, 211 .crmarshal = unx_marshal,
252 .crrefresh = unx_refresh, 212 .crrefresh = unx_refresh,
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index fa5ba6ed3197..ec451b8114b0 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -197,7 +197,7 @@ out_free:
197/** 197/**
198 * xprt_destroy_backchannel - Destroys the backchannel preallocated structures. 198 * xprt_destroy_backchannel - Destroys the backchannel preallocated structures.
199 * @xprt: the transport holding the preallocated strucures 199 * @xprt: the transport holding the preallocated strucures
200 * @max_reqs the maximum number of preallocated structures to destroy 200 * @max_reqs: the maximum number of preallocated structures to destroy
201 * 201 *
202 * Since these structures may have been allocated by multiple calls 202 * Since these structures may have been allocated by multiple calls
203 * to xprt_setup_backchannel, we only destroy up to the maximum number 203 * to xprt_setup_backchannel, we only destroy up to the maximum number
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 24cbddc44c88..71d9599b5816 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -627,6 +627,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
627 new->cl_noretranstimeo = clnt->cl_noretranstimeo; 627 new->cl_noretranstimeo = clnt->cl_noretranstimeo;
628 new->cl_discrtry = clnt->cl_discrtry; 628 new->cl_discrtry = clnt->cl_discrtry;
629 new->cl_chatty = clnt->cl_chatty; 629 new->cl_chatty = clnt->cl_chatty;
630 new->cl_principal = clnt->cl_principal;
630 return new; 631 return new;
631 632
632out_err: 633out_err:
@@ -1029,7 +1030,7 @@ rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
1029 task->tk_msg.rpc_argp = msg->rpc_argp; 1030 task->tk_msg.rpc_argp = msg->rpc_argp;
1030 task->tk_msg.rpc_resp = msg->rpc_resp; 1031 task->tk_msg.rpc_resp = msg->rpc_resp;
1031 if (msg->rpc_cred != NULL) 1032 if (msg->rpc_cred != NULL)
1032 task->tk_msg.rpc_cred = get_rpccred(msg->rpc_cred); 1033 task->tk_msg.rpc_cred = get_cred(msg->rpc_cred);
1033 } 1034 }
1034} 1035}
1035 1036
@@ -2521,9 +2522,8 @@ static int rpc_ping(struct rpc_clnt *clnt)
2521 .rpc_proc = &rpcproc_null, 2522 .rpc_proc = &rpcproc_null,
2522 }; 2523 };
2523 int err; 2524 int err;
2524 msg.rpc_cred = authnull_ops.lookup_cred(NULL, NULL, 0); 2525 err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN |
2525 err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN); 2526 RPC_TASK_NULLCREDS);
2526 put_rpccred(msg.rpc_cred);
2527 return err; 2527 return err;
2528} 2528}
2529 2529
@@ -2534,15 +2534,15 @@ struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
2534{ 2534{
2535 struct rpc_message msg = { 2535 struct rpc_message msg = {
2536 .rpc_proc = &rpcproc_null, 2536 .rpc_proc = &rpcproc_null,
2537 .rpc_cred = cred,
2538 }; 2537 };
2539 struct rpc_task_setup task_setup_data = { 2538 struct rpc_task_setup task_setup_data = {
2540 .rpc_client = clnt, 2539 .rpc_client = clnt,
2541 .rpc_xprt = xprt, 2540 .rpc_xprt = xprt,
2542 .rpc_message = &msg, 2541 .rpc_message = &msg,
2542 .rpc_op_cred = cred,
2543 .callback_ops = (ops != NULL) ? ops : &rpc_default_ops, 2543 .callback_ops = (ops != NULL) ? ops : &rpc_default_ops,
2544 .callback_data = data, 2544 .callback_data = data,
2545 .flags = flags, 2545 .flags = flags | RPC_TASK_NULLCREDS,
2546 }; 2546 };
2547 2547
2548 return rpc_run_task(&task_setup_data); 2548 return rpc_run_task(&task_setup_data);
@@ -2593,7 +2593,6 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
2593 void *dummy) 2593 void *dummy)
2594{ 2594{
2595 struct rpc_cb_add_xprt_calldata *data; 2595 struct rpc_cb_add_xprt_calldata *data;
2596 struct rpc_cred *cred;
2597 struct rpc_task *task; 2596 struct rpc_task *task;
2598 2597
2599 data = kmalloc(sizeof(*data), GFP_NOFS); 2598 data = kmalloc(sizeof(*data), GFP_NOFS);
@@ -2602,11 +2601,9 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
2602 data->xps = xprt_switch_get(xps); 2601 data->xps = xprt_switch_get(xps);
2603 data->xprt = xprt_get(xprt); 2602 data->xprt = xprt_get(xprt);
2604 2603
2605 cred = authnull_ops.lookup_cred(NULL, NULL, 0); 2604 task = rpc_call_null_helper(clnt, xprt, NULL,
2606 task = rpc_call_null_helper(clnt, xprt, cred, 2605 RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC|RPC_TASK_NULLCREDS,
2607 RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC,
2608 &rpc_cb_add_xprt_call_ops, data); 2606 &rpc_cb_add_xprt_call_ops, data);
2609 put_rpccred(cred);
2610 if (IS_ERR(task)) 2607 if (IS_ERR(task))
2611 return PTR_ERR(task); 2608 return PTR_ERR(task);
2612 rpc_put_task(task); 2609 rpc_put_task(task);
@@ -2637,7 +2634,6 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt,
2637 struct rpc_xprt *xprt, 2634 struct rpc_xprt *xprt,
2638 void *data) 2635 void *data)
2639{ 2636{
2640 struct rpc_cred *cred;
2641 struct rpc_task *task; 2637 struct rpc_task *task;
2642 struct rpc_add_xprt_test *xtest = (struct rpc_add_xprt_test *)data; 2638 struct rpc_add_xprt_test *xtest = (struct rpc_add_xprt_test *)data;
2643 int status = -EADDRINUSE; 2639 int status = -EADDRINUSE;
@@ -2649,11 +2645,9 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt,
2649 goto out_err; 2645 goto out_err;
2650 2646
2651 /* Test the connection */ 2647 /* Test the connection */
2652 cred = authnull_ops.lookup_cred(NULL, NULL, 0); 2648 task = rpc_call_null_helper(clnt, xprt, NULL,
2653 task = rpc_call_null_helper(clnt, xprt, cred, 2649 RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS,
2654 RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
2655 NULL, NULL); 2650 NULL, NULL);
2656 put_rpccred(cred);
2657 if (IS_ERR(task)) { 2651 if (IS_ERR(task)) {
2658 status = PTR_ERR(task); 2652 status = PTR_ERR(task);
2659 goto out_err; 2653 goto out_err;
@@ -2667,6 +2661,9 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt,
2667 /* rpc_xprt_switch and rpc_xprt are deferrenced by add_xprt_test() */ 2661 /* rpc_xprt_switch and rpc_xprt are deferrenced by add_xprt_test() */
2668 xtest->add_xprt_test(clnt, xprt, xtest->data); 2662 xtest->add_xprt_test(clnt, xprt, xtest->data);
2669 2663
2664 xprt_put(xprt);
2665 xprt_switch_put(xps);
2666
2670 /* so that rpc_clnt_add_xprt does not call rpc_xprt_switch_add_xprt */ 2667 /* so that rpc_clnt_add_xprt does not call rpc_xprt_switch_add_xprt */
2671 return 1; 2668 return 1;
2672out_err: 2669out_err:
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 4fda18d47e2c..69663681bf9d 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1266,7 +1266,7 @@ static const struct rpc_pipe_ops gssd_dummy_pipe_ops = {
1266 * that this file will be there and have a certain format. 1266 * that this file will be there and have a certain format.
1267 */ 1267 */
1268static int 1268static int
1269rpc_show_dummy_info(struct seq_file *m, void *v) 1269rpc_dummy_info_show(struct seq_file *m, void *v)
1270{ 1270{
1271 seq_printf(m, "RPC server: %s\n", utsname()->nodename); 1271 seq_printf(m, "RPC server: %s\n", utsname()->nodename);
1272 seq_printf(m, "service: foo (1) version 0\n"); 1272 seq_printf(m, "service: foo (1) version 0\n");
@@ -1275,25 +1275,12 @@ rpc_show_dummy_info(struct seq_file *m, void *v)
1275 seq_printf(m, "port: 0\n"); 1275 seq_printf(m, "port: 0\n");
1276 return 0; 1276 return 0;
1277} 1277}
1278 1278DEFINE_SHOW_ATTRIBUTE(rpc_dummy_info);
1279static int
1280rpc_dummy_info_open(struct inode *inode, struct file *file)
1281{
1282 return single_open(file, rpc_show_dummy_info, NULL);
1283}
1284
1285static const struct file_operations rpc_dummy_info_operations = {
1286 .owner = THIS_MODULE,
1287 .open = rpc_dummy_info_open,
1288 .read = seq_read,
1289 .llseek = seq_lseek,
1290 .release = single_release,
1291};
1292 1279
1293static const struct rpc_filelist gssd_dummy_info_file[] = { 1280static const struct rpc_filelist gssd_dummy_info_file[] = {
1294 [0] = { 1281 [0] = {
1295 .name = "info", 1282 .name = "info",
1296 .i_fop = &rpc_dummy_info_operations, 1283 .i_fop = &rpc_dummy_info_fops,
1297 .mode = S_IFREG | 0400, 1284 .mode = S_IFREG | 0400,
1298 }, 1285 },
1299}; 1286};
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index c7872bc13860..41a971ac1c63 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -752,7 +752,7 @@ void rpcb_getport_async(struct rpc_task *task)
752 goto bailout_nofree; 752 goto bailout_nofree;
753 } 753 }
754 754
755 map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC); 755 map = kzalloc(sizeof(struct rpcbind_args), GFP_NOFS);
756 if (!map) { 756 if (!map) {
757 status = -ENOMEM; 757 status = -ENOMEM;
758 dprintk("RPC: %5u %s: no memory available\n", 758 dprintk("RPC: %5u %s: no memory available\n",
@@ -770,7 +770,13 @@ void rpcb_getport_async(struct rpc_task *task)
770 case RPCBVERS_4: 770 case RPCBVERS_4:
771 case RPCBVERS_3: 771 case RPCBVERS_3:
772 map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID]; 772 map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID];
773 map->r_addr = rpc_sockaddr2uaddr(sap, GFP_ATOMIC); 773 map->r_addr = rpc_sockaddr2uaddr(sap, GFP_NOFS);
774 if (!map->r_addr) {
775 status = -ENOMEM;
776 dprintk("RPC: %5u %s: no memory available\n",
777 task->tk_pid, __func__);
778 goto bailout_free_args;
779 }
774 map->r_owner = ""; 780 map->r_owner = "";
775 break; 781 break;
776 case RPCBVERS_2: 782 case RPCBVERS_2:
@@ -793,6 +799,8 @@ void rpcb_getport_async(struct rpc_task *task)
793 rpc_put_task(child); 799 rpc_put_task(child);
794 return; 800 return;
795 801
802bailout_free_args:
803 kfree(map);
796bailout_release_client: 804bailout_release_client:
797 rpc_release_client(rpcb_clnt); 805 rpc_release_client(rpcb_clnt);
798bailout_nofree: 806bailout_nofree:
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 57ca5bead1cb..adc3c40cc733 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -997,6 +997,8 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
997 997
998 task->tk_xprt = xprt_get(task_setup_data->rpc_xprt); 998 task->tk_xprt = xprt_get(task_setup_data->rpc_xprt);
999 999
1000 task->tk_op_cred = get_rpccred(task_setup_data->rpc_op_cred);
1001
1000 if (task->tk_ops->rpc_call_prepare != NULL) 1002 if (task->tk_ops->rpc_call_prepare != NULL)
1001 task->tk_action = rpc_prepare_task; 1003 task->tk_action = rpc_prepare_task;
1002 1004
@@ -1054,6 +1056,7 @@ static void rpc_free_task(struct rpc_task *task)
1054{ 1056{
1055 unsigned short tk_flags = task->tk_flags; 1057 unsigned short tk_flags = task->tk_flags;
1056 1058
1059 put_rpccred(task->tk_op_cred);
1057 rpc_release_calldata(task->tk_ops, task->tk_calldata); 1060 rpc_release_calldata(task->tk_ops, task->tk_calldata);
1058 1061
1059 if (tk_flags & RPC_TASK_DYNAMIC) { 1062 if (tk_flags & RPC_TASK_DYNAMIC) {
@@ -1071,7 +1074,7 @@ static void rpc_release_resources_task(struct rpc_task *task)
1071{ 1074{
1072 xprt_release(task); 1075 xprt_release(task);
1073 if (task->tk_msg.rpc_cred) { 1076 if (task->tk_msg.rpc_cred) {
1074 put_rpccred(task->tk_msg.rpc_cred); 1077 put_cred(task->tk_msg.rpc_cred);
1075 task->tk_msg.rpc_cred = NULL; 1078 task->tk_msg.rpc_cred = NULL;
1076 } 1079 }
1077 rpc_task_release_client(task); 1080 rpc_task_release_client(task);
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index e2d64c7138c3..8394124126f8 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -383,7 +383,7 @@ void xprt_iter_init_listall(struct rpc_xprt_iter *xpi,
383/** 383/**
384 * xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch 384 * xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch
385 * @xpi: pointer to rpc_xprt_iter 385 * @xpi: pointer to rpc_xprt_iter
386 * @xps: pointer to a new rpc_xprt_switch or NULL 386 * @newswitch: pointer to a new rpc_xprt_switch or NULL
387 * 387 *
388 * Swaps out the existing xpi->xpi_xpswitch with a new value. 388 * Swaps out the existing xpi->xpi_xpswitch with a new value.
389 */ 389 */
@@ -401,7 +401,7 @@ struct rpc_xprt_switch *xprt_iter_xchg_switch(struct rpc_xprt_iter *xpi,
401 401
402/** 402/**
403 * xprt_iter_destroy - Destroys the xprt iterator 403 * xprt_iter_destroy - Destroys the xprt iterator
404 * @xpi pointer to rpc_xprt_iter 404 * @xpi: pointer to rpc_xprt_iter
405 */ 405 */
406void xprt_iter_destroy(struct rpc_xprt_iter *xpi) 406void xprt_iter_destroy(struct rpc_xprt_iter *xpi)
407{ 407{
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index 8bf19e142b6b..8ed0377d7a18 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -1,8 +1,7 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o 2obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
3 3
4rpcrdma-y := transport.o rpc_rdma.o verbs.o \ 4rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o \
5 fmr_ops.o frwr_ops.o \
6 svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \ 5 svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
7 svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \ 6 svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \
8 module.o 7 module.o
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index edba0d35776b..0de9b3e63770 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -5,7 +5,6 @@
5 * Support for backward direction RPCs on RPC/RDMA. 5 * Support for backward direction RPCs on RPC/RDMA.
6 */ 6 */
7 7
8#include <linux/module.h>
9#include <linux/sunrpc/xprt.h> 8#include <linux/sunrpc/xprt.h>
10#include <linux/sunrpc/svc.h> 9#include <linux/sunrpc/svc.h>
11#include <linux/sunrpc/svc_xprt.h> 10#include <linux/sunrpc/svc_xprt.h>
@@ -20,29 +19,16 @@
20 19
21#undef RPCRDMA_BACKCHANNEL_DEBUG 20#undef RPCRDMA_BACKCHANNEL_DEBUG
22 21
23static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
24 struct rpc_rqst *rqst)
25{
26 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
27 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
28
29 spin_lock(&buf->rb_reqslock);
30 list_del(&req->rl_all);
31 spin_unlock(&buf->rb_reqslock);
32
33 rpcrdma_destroy_req(req);
34}
35
36static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt, 22static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
37 unsigned int count) 23 unsigned int count)
38{ 24{
39 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 25 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
26 struct rpcrdma_req *req;
40 struct rpc_rqst *rqst; 27 struct rpc_rqst *rqst;
41 unsigned int i; 28 unsigned int i;
42 29
43 for (i = 0; i < (count << 1); i++) { 30 for (i = 0; i < (count << 1); i++) {
44 struct rpcrdma_regbuf *rb; 31 struct rpcrdma_regbuf *rb;
45 struct rpcrdma_req *req;
46 size_t size; 32 size_t size;
47 33
48 req = rpcrdma_create_req(r_xprt); 34 req = rpcrdma_create_req(r_xprt);
@@ -68,7 +54,7 @@ static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
68 return 0; 54 return 0;
69 55
70out_fail: 56out_fail:
71 rpcrdma_bc_free_rqst(r_xprt, rqst); 57 rpcrdma_req_destroy(req);
72 return -ENOMEM; 58 return -ENOMEM;
73} 59}
74 60
@@ -101,7 +87,6 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
101 goto out_free; 87 goto out_free;
102 88
103 r_xprt->rx_buf.rb_bc_srv_max_requests = reqs; 89 r_xprt->rx_buf.rb_bc_srv_max_requests = reqs;
104 request_module("svcrdma");
105 trace_xprtrdma_cb_setup(r_xprt, reqs); 90 trace_xprtrdma_cb_setup(r_xprt, reqs);
106 return 0; 91 return 0;
107 92
@@ -173,21 +158,21 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
173 */ 158 */
174int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst) 159int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
175{ 160{
176 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); 161 struct rpc_xprt *xprt = rqst->rq_xprt;
162 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
177 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 163 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
178 int rc; 164 int rc;
179 165
180 if (!xprt_connected(rqst->rq_xprt)) 166 if (!xprt_connected(xprt))
181 goto drop_connection; 167 return -ENOTCONN;
182 168
183 if (!xprt_request_get_cong(rqst->rq_xprt, rqst)) 169 if (!xprt_request_get_cong(xprt, rqst))
184 return -EBADSLT; 170 return -EBADSLT;
185 171
186 rc = rpcrdma_bc_marshal_reply(rqst); 172 rc = rpcrdma_bc_marshal_reply(rqst);
187 if (rc < 0) 173 if (rc < 0)
188 goto failed_marshal; 174 goto failed_marshal;
189 175
190 rpcrdma_post_recvs(r_xprt, true);
191 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) 176 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
192 goto drop_connection; 177 goto drop_connection;
193 return 0; 178 return 0;
@@ -196,7 +181,7 @@ failed_marshal:
196 if (rc != -ENOTCONN) 181 if (rc != -ENOTCONN)
197 return rc; 182 return rc;
198drop_connection: 183drop_connection:
199 xprt_disconnect_done(rqst->rq_xprt); 184 xprt_rdma_close(xprt);
200 return -ENOTCONN; 185 return -ENOTCONN;
201} 186}
202 187
@@ -207,7 +192,6 @@ drop_connection:
207 */ 192 */
208void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs) 193void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
209{ 194{
210 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
211 struct rpc_rqst *rqst, *tmp; 195 struct rpc_rqst *rqst, *tmp;
212 196
213 spin_lock(&xprt->bc_pa_lock); 197 spin_lock(&xprt->bc_pa_lock);
@@ -215,7 +199,7 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
215 list_del(&rqst->rq_bc_pa_list); 199 list_del(&rqst->rq_bc_pa_list);
216 spin_unlock(&xprt->bc_pa_lock); 200 spin_unlock(&xprt->bc_pa_lock);
217 201
218 rpcrdma_bc_free_rqst(r_xprt, rqst); 202 rpcrdma_req_destroy(rpcr_to_rdmar(rqst));
219 203
220 spin_lock(&xprt->bc_pa_lock); 204 spin_lock(&xprt->bc_pa_lock);
221 } 205 }
@@ -231,9 +215,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
231 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 215 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
232 struct rpc_xprt *xprt = rqst->rq_xprt; 216 struct rpc_xprt *xprt = rqst->rq_xprt;
233 217
234 dprintk("RPC: %s: freeing rqst %p (req %p)\n",
235 __func__, rqst, req);
236
237 rpcrdma_recv_buffer_put(req->rl_reply); 218 rpcrdma_recv_buffer_put(req->rl_reply);
238 req->rl_reply = NULL; 219 req->rl_reply = NULL;
239 220
@@ -319,7 +300,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
319 300
320out_overflow: 301out_overflow:
321 pr_warn("RPC/RDMA backchannel overflow\n"); 302 pr_warn("RPC/RDMA backchannel overflow\n");
322 xprt_disconnect_done(xprt); 303 xprt_force_disconnect(xprt);
323 /* This receive buffer gets reposted automatically 304 /* This receive buffer gets reposted automatically
324 * when the connection is re-established. 305 * when the connection is re-established.
325 */ 306 */
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
deleted file mode 100644
index fd8fea59fe92..000000000000
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ /dev/null
@@ -1,337 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2015, 2017 Oracle. All rights reserved.
4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
5 */
6
7/* Lightweight memory registration using Fast Memory Regions (FMR).
8 * Referred to sometimes as MTHCAFMR mode.
9 *
10 * FMR uses synchronous memory registration and deregistration.
11 * FMR registration is known to be fast, but FMR deregistration
12 * can take tens of usecs to complete.
13 */
14
15/* Normal operation
16 *
17 * A Memory Region is prepared for RDMA READ or WRITE using the
18 * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is
19 * finished, the Memory Region is unmapped using the ib_unmap_fmr
20 * verb (fmr_op_unmap).
21 */
22
23#include <linux/sunrpc/svc_rdma.h>
24
25#include "xprt_rdma.h"
26#include <trace/events/rpcrdma.h>
27
28#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
29# define RPCDBG_FACILITY RPCDBG_TRANS
30#endif
31
32/* Maximum scatter/gather per FMR */
33#define RPCRDMA_MAX_FMR_SGES (64)
34
35/* Access mode of externally registered pages */
36enum {
37 RPCRDMA_FMR_ACCESS_FLAGS = IB_ACCESS_REMOTE_WRITE |
38 IB_ACCESS_REMOTE_READ,
39};
40
41bool
42fmr_is_supported(struct rpcrdma_ia *ia)
43{
44 if (!ia->ri_device->ops.alloc_fmr) {
45 pr_info("rpcrdma: 'fmr' mode is not supported by device %s\n",
46 ia->ri_device->name);
47 return false;
48 }
49 return true;
50}
51
52static void
53__fmr_unmap(struct rpcrdma_mr *mr)
54{
55 LIST_HEAD(l);
56 int rc;
57
58 list_add(&mr->fmr.fm_mr->list, &l);
59 rc = ib_unmap_fmr(&l);
60 list_del(&mr->fmr.fm_mr->list);
61 if (rc)
62 pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
63 mr, rc);
64}
65
66/* Release an MR.
67 */
68static void
69fmr_op_release_mr(struct rpcrdma_mr *mr)
70{
71 int rc;
72
73 kfree(mr->fmr.fm_physaddrs);
74 kfree(mr->mr_sg);
75
76 /* In case this one was left mapped, try to unmap it
77 * to prevent dealloc_fmr from failing with EBUSY
78 */
79 __fmr_unmap(mr);
80
81 rc = ib_dealloc_fmr(mr->fmr.fm_mr);
82 if (rc)
83 pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n",
84 mr, rc);
85
86 kfree(mr);
87}
88
89/* MRs are dynamically allocated, so simply clean up and release the MR.
90 * A replacement MR will subsequently be allocated on demand.
91 */
92static void
93fmr_mr_recycle_worker(struct work_struct *work)
94{
95 struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle);
96 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
97
98 trace_xprtrdma_mr_recycle(mr);
99
100 trace_xprtrdma_mr_unmap(mr);
101 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
102 mr->mr_sg, mr->mr_nents, mr->mr_dir);
103
104 spin_lock(&r_xprt->rx_buf.rb_mrlock);
105 list_del(&mr->mr_all);
106 r_xprt->rx_stats.mrs_recycled++;
107 spin_unlock(&r_xprt->rx_buf.rb_mrlock);
108 fmr_op_release_mr(mr);
109}
110
111static int
112fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
113{
114 static struct ib_fmr_attr fmr_attr = {
115 .max_pages = RPCRDMA_MAX_FMR_SGES,
116 .max_maps = 1,
117 .page_shift = PAGE_SHIFT
118 };
119
120 mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
121 sizeof(u64), GFP_KERNEL);
122 if (!mr->fmr.fm_physaddrs)
123 goto out_free;
124
125 mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
126 sizeof(*mr->mr_sg), GFP_KERNEL);
127 if (!mr->mr_sg)
128 goto out_free;
129
130 sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
131
132 mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
133 &fmr_attr);
134 if (IS_ERR(mr->fmr.fm_mr))
135 goto out_fmr_err;
136
137 INIT_LIST_HEAD(&mr->mr_list);
138 INIT_WORK(&mr->mr_recycle, fmr_mr_recycle_worker);
139 return 0;
140
141out_fmr_err:
142 dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
143 PTR_ERR(mr->fmr.fm_mr));
144
145out_free:
146 kfree(mr->mr_sg);
147 kfree(mr->fmr.fm_physaddrs);
148 return -ENOMEM;
149}
150
151/* On success, sets:
152 * ep->rep_attr.cap.max_send_wr
153 * ep->rep_attr.cap.max_recv_wr
154 * cdata->max_requests
155 * ia->ri_max_segs
156 */
157static int
158fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
159 struct rpcrdma_create_data_internal *cdata)
160{
161 int max_qp_wr;
162
163 max_qp_wr = ia->ri_device->attrs.max_qp_wr;
164 max_qp_wr -= RPCRDMA_BACKWARD_WRS;
165 max_qp_wr -= 1;
166 if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
167 return -ENOMEM;
168 if (cdata->max_requests > max_qp_wr)
169 cdata->max_requests = max_qp_wr;
170 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
171 ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
172 ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
173 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
174 ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
175 ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
176
177 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
178 RPCRDMA_MAX_FMR_SGES);
179 ia->ri_max_segs += 2; /* segments for head and tail buffers */
180 return 0;
181}
182
183/* FMR mode conveys up to 64 pages of payload per chunk segment.
184 */
185static size_t
186fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
187{
188 return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
189 RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES);
190}
191
192/* Use the ib_map_phys_fmr() verb to register a memory region
193 * for remote access via RDMA READ or RDMA WRITE.
194 */
195static struct rpcrdma_mr_seg *
196fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
197 int nsegs, bool writing, struct rpcrdma_mr **out)
198{
199 struct rpcrdma_mr_seg *seg1 = seg;
200 int len, pageoff, i, rc;
201 struct rpcrdma_mr *mr;
202 u64 *dma_pages;
203
204 mr = rpcrdma_mr_get(r_xprt);
205 if (!mr)
206 return ERR_PTR(-EAGAIN);
207
208 pageoff = offset_in_page(seg1->mr_offset);
209 seg1->mr_offset -= pageoff; /* start of page */
210 seg1->mr_len += pageoff;
211 len = -pageoff;
212 if (nsegs > RPCRDMA_MAX_FMR_SGES)
213 nsegs = RPCRDMA_MAX_FMR_SGES;
214 for (i = 0; i < nsegs;) {
215 if (seg->mr_page)
216 sg_set_page(&mr->mr_sg[i],
217 seg->mr_page,
218 seg->mr_len,
219 offset_in_page(seg->mr_offset));
220 else
221 sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
222 seg->mr_len);
223 len += seg->mr_len;
224 ++seg;
225 ++i;
226 /* Check for holes */
227 if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
228 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
229 break;
230 }
231 mr->mr_dir = rpcrdma_data_dir(writing);
232
233 mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
234 mr->mr_sg, i, mr->mr_dir);
235 if (!mr->mr_nents)
236 goto out_dmamap_err;
237 trace_xprtrdma_mr_map(mr);
238
239 for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
240 dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
241 rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents,
242 dma_pages[0]);
243 if (rc)
244 goto out_maperr;
245
246 mr->mr_handle = mr->fmr.fm_mr->rkey;
247 mr->mr_length = len;
248 mr->mr_offset = dma_pages[0] + pageoff;
249
250 *out = mr;
251 return seg;
252
253out_dmamap_err:
254 pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
255 mr->mr_sg, i);
256 rpcrdma_mr_put(mr);
257 return ERR_PTR(-EIO);
258
259out_maperr:
260 pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
261 len, (unsigned long long)dma_pages[0],
262 pageoff, mr->mr_nents, rc);
263 rpcrdma_mr_unmap_and_put(mr);
264 return ERR_PTR(-EIO);
265}
266
267/* Post Send WR containing the RPC Call message.
268 */
269static int
270fmr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
271{
272 return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, NULL);
273}
274
275/* Invalidate all memory regions that were registered for "req".
276 *
277 * Sleeps until it is safe for the host CPU to access the
278 * previously mapped memory regions.
279 *
280 * Caller ensures that @mrs is not empty before the call. This
281 * function empties the list.
282 */
283static void
284fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
285{
286 struct rpcrdma_mr *mr;
287 LIST_HEAD(unmap_list);
288 int rc;
289
290 /* ORDER: Invalidate all of the req's MRs first
291 *
292 * ib_unmap_fmr() is slow, so use a single call instead
293 * of one call per mapped FMR.
294 */
295 list_for_each_entry(mr, mrs, mr_list) {
296 dprintk("RPC: %s: unmapping fmr %p\n",
297 __func__, &mr->fmr);
298 trace_xprtrdma_mr_localinv(mr);
299 list_add_tail(&mr->fmr.fm_mr->list, &unmap_list);
300 }
301 r_xprt->rx_stats.local_inv_needed++;
302 rc = ib_unmap_fmr(&unmap_list);
303 if (rc)
304 goto out_release;
305
306 /* ORDER: Now DMA unmap all of the req's MRs, and return
307 * them to the free MW list.
308 */
309 while (!list_empty(mrs)) {
310 mr = rpcrdma_mr_pop(mrs);
311 list_del(&mr->fmr.fm_mr->list);
312 rpcrdma_mr_unmap_and_put(mr);
313 }
314
315 return;
316
317out_release:
318 pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
319
320 while (!list_empty(mrs)) {
321 mr = rpcrdma_mr_pop(mrs);
322 list_del(&mr->fmr.fm_mr->list);
323 rpcrdma_mr_recycle(mr);
324 }
325}
326
327const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
328 .ro_map = fmr_op_map,
329 .ro_send = fmr_op_send,
330 .ro_unmap_sync = fmr_op_unmap_sync,
331 .ro_open = fmr_op_open,
332 .ro_maxpages = fmr_op_maxpages,
333 .ro_init_mr = fmr_op_init_mr,
334 .ro_release_mr = fmr_op_release_mr,
335 .ro_displayname = "fmr",
336 .ro_send_w_inv_ok = 0,
337};
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index fc6378cc0c1c..6a561056b538 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -15,21 +15,21 @@
15/* Normal operation 15/* Normal operation
16 * 16 *
17 * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG 17 * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
18 * Work Request (frwr_op_map). When the RDMA operation is finished, this 18 * Work Request (frwr_map). When the RDMA operation is finished, this
19 * Memory Region is invalidated using a LOCAL_INV Work Request 19 * Memory Region is invalidated using a LOCAL_INV Work Request
20 * (frwr_op_unmap_sync). 20 * (frwr_unmap_sync).
21 * 21 *
22 * Typically these Work Requests are not signaled, and neither are RDMA 22 * Typically these Work Requests are not signaled, and neither are RDMA
23 * SEND Work Requests (with the exception of signaling occasionally to 23 * SEND Work Requests (with the exception of signaling occasionally to
24 * prevent provider work queue overflows). This greatly reduces HCA 24 * prevent provider work queue overflows). This greatly reduces HCA
25 * interrupt workload. 25 * interrupt workload.
26 * 26 *
27 * As an optimization, frwr_op_unmap marks MRs INVALID before the 27 * As an optimization, frwr_unmap marks MRs INVALID before the
28 * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on 28 * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
29 * rb_mrs immediately so that no work (like managing a linked list 29 * rb_mrs immediately so that no work (like managing a linked list
30 * under a spinlock) is needed in the completion upcall. 30 * under a spinlock) is needed in the completion upcall.
31 * 31 *
32 * But this means that frwr_op_map() can occasionally encounter an MR 32 * But this means that frwr_map() can occasionally encounter an MR
33 * that is INVALID but the LOCAL_INV WR has not completed. Work Queue 33 * that is INVALID but the LOCAL_INV WR has not completed. Work Queue
34 * ordering prevents a subsequent FAST_REG WR from executing against 34 * ordering prevents a subsequent FAST_REG WR from executing against
35 * that MR while it is still being invalidated. 35 * that MR while it is still being invalidated.
@@ -57,14 +57,14 @@
57 * FLUSHED_LI: The MR was being invalidated when the QP entered ERROR 57 * FLUSHED_LI: The MR was being invalidated when the QP entered ERROR
58 * state, and the pending WR was flushed. 58 * state, and the pending WR was flushed.
59 * 59 *
60 * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered 60 * When frwr_map encounters FLUSHED and VALID MRs, they are recovered
61 * with ib_dereg_mr and then are re-initialized. Because MR recovery 61 * with ib_dereg_mr and then are re-initialized. Because MR recovery
62 * allocates fresh resources, it is deferred to a workqueue, and the 62 * allocates fresh resources, it is deferred to a workqueue, and the
63 * recovered MRs are placed back on the rb_mrs list when recovery is 63 * recovered MRs are placed back on the rb_mrs list when recovery is
64 * complete. frwr_op_map allocates another MR for the current RPC while 64 * complete. frwr_map allocates another MR for the current RPC while
65 * the broken MR is reset. 65 * the broken MR is reset.
66 * 66 *
67 * To ensure that frwr_op_map doesn't encounter an MR that is marked 67 * To ensure that frwr_map doesn't encounter an MR that is marked
68 * INVALID but that is about to be flushed due to a previous transport 68 * INVALID but that is about to be flushed due to a previous transport
69 * disconnect, the transport connect worker attempts to drain all 69 * disconnect, the transport connect worker attempts to drain all
70 * pending send queue WRs before the transport is reconnected. 70 * pending send queue WRs before the transport is reconnected.
@@ -80,8 +80,13 @@
80# define RPCDBG_FACILITY RPCDBG_TRANS 80# define RPCDBG_FACILITY RPCDBG_TRANS
81#endif 81#endif
82 82
83bool 83/**
84frwr_is_supported(struct rpcrdma_ia *ia) 84 * frwr_is_supported - Check if device supports FRWR
85 * @ia: interface adapter to check
86 *
87 * Returns true if device supports FRWR, otherwise false
88 */
89bool frwr_is_supported(struct rpcrdma_ia *ia)
85{ 90{
86 struct ib_device_attr *attrs = &ia->ri_device->attrs; 91 struct ib_device_attr *attrs = &ia->ri_device->attrs;
87 92
@@ -97,15 +102,18 @@ out_not_supported:
97 return false; 102 return false;
98} 103}
99 104
100static void 105/**
101frwr_op_release_mr(struct rpcrdma_mr *mr) 106 * frwr_release_mr - Destroy one MR
107 * @mr: MR allocated by frwr_init_mr
108 *
109 */
110void frwr_release_mr(struct rpcrdma_mr *mr)
102{ 111{
103 int rc; 112 int rc;
104 113
105 rc = ib_dereg_mr(mr->frwr.fr_mr); 114 rc = ib_dereg_mr(mr->frwr.fr_mr);
106 if (rc) 115 if (rc)
107 pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", 116 trace_xprtrdma_frwr_dereg(mr, rc);
108 mr, rc);
109 kfree(mr->mr_sg); 117 kfree(mr->mr_sg);
110 kfree(mr); 118 kfree(mr);
111} 119}
@@ -117,60 +125,78 @@ static void
117frwr_mr_recycle_worker(struct work_struct *work) 125frwr_mr_recycle_worker(struct work_struct *work)
118{ 126{
119 struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle); 127 struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle);
120 enum rpcrdma_frwr_state state = mr->frwr.fr_state;
121 struct rpcrdma_xprt *r_xprt = mr->mr_xprt; 128 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
122 129
123 trace_xprtrdma_mr_recycle(mr); 130 trace_xprtrdma_mr_recycle(mr);
124 131
125 if (state != FRWR_FLUSHED_LI) { 132 if (mr->mr_dir != DMA_NONE) {
126 trace_xprtrdma_mr_unmap(mr); 133 trace_xprtrdma_mr_unmap(mr);
127 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, 134 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
128 mr->mr_sg, mr->mr_nents, mr->mr_dir); 135 mr->mr_sg, mr->mr_nents, mr->mr_dir);
136 mr->mr_dir = DMA_NONE;
129 } 137 }
130 138
131 spin_lock(&r_xprt->rx_buf.rb_mrlock); 139 spin_lock(&r_xprt->rx_buf.rb_mrlock);
132 list_del(&mr->mr_all); 140 list_del(&mr->mr_all);
133 r_xprt->rx_stats.mrs_recycled++; 141 r_xprt->rx_stats.mrs_recycled++;
134 spin_unlock(&r_xprt->rx_buf.rb_mrlock); 142 spin_unlock(&r_xprt->rx_buf.rb_mrlock);
135 frwr_op_release_mr(mr); 143
144 frwr_release_mr(mr);
136} 145}
137 146
138static int 147/**
139frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) 148 * frwr_init_mr - Initialize one MR
149 * @ia: interface adapter
150 * @mr: generic MR to prepare for FRWR
151 *
152 * Returns zero if successful. Otherwise a negative errno
153 * is returned.
154 */
155int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
140{ 156{
141 unsigned int depth = ia->ri_max_frwr_depth; 157 unsigned int depth = ia->ri_max_frwr_depth;
142 struct rpcrdma_frwr *frwr = &mr->frwr; 158 struct scatterlist *sg;
159 struct ib_mr *frmr;
143 int rc; 160 int rc;
144 161
145 frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); 162 frmr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
146 if (IS_ERR(frwr->fr_mr)) 163 if (IS_ERR(frmr))
147 goto out_mr_err; 164 goto out_mr_err;
148 165
149 mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL); 166 sg = kcalloc(depth, sizeof(*sg), GFP_KERNEL);
150 if (!mr->mr_sg) 167 if (!sg)
151 goto out_list_err; 168 goto out_list_err;
152 169
170 mr->frwr.fr_mr = frmr;
171 mr->frwr.fr_state = FRWR_IS_INVALID;
172 mr->mr_dir = DMA_NONE;
153 INIT_LIST_HEAD(&mr->mr_list); 173 INIT_LIST_HEAD(&mr->mr_list);
154 INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker); 174 INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker);
155 sg_init_table(mr->mr_sg, depth); 175 init_completion(&mr->frwr.fr_linv_done);
156 init_completion(&frwr->fr_linv_done); 176
177 sg_init_table(sg, depth);
178 mr->mr_sg = sg;
157 return 0; 179 return 0;
158 180
159out_mr_err: 181out_mr_err:
160 rc = PTR_ERR(frwr->fr_mr); 182 rc = PTR_ERR(frmr);
161 dprintk("RPC: %s: ib_alloc_mr status %i\n", 183 trace_xprtrdma_frwr_alloc(mr, rc);
162 __func__, rc);
163 return rc; 184 return rc;
164 185
165out_list_err: 186out_list_err:
166 rc = -ENOMEM;
167 dprintk("RPC: %s: sg allocation failure\n", 187 dprintk("RPC: %s: sg allocation failure\n",
168 __func__); 188 __func__);
169 ib_dereg_mr(frwr->fr_mr); 189 ib_dereg_mr(frmr);
170 return rc; 190 return -ENOMEM;
171} 191}
172 192
173/* On success, sets: 193/**
194 * frwr_open - Prepare an endpoint for use with FRWR
195 * @ia: interface adapter this endpoint will use
196 * @ep: endpoint to prepare
197 * @cdata: transport parameters
198 *
199 * On success, sets:
174 * ep->rep_attr.cap.max_send_wr 200 * ep->rep_attr.cap.max_send_wr
175 * ep->rep_attr.cap.max_recv_wr 201 * ep->rep_attr.cap.max_recv_wr
176 * cdata->max_requests 202 * cdata->max_requests
@@ -179,10 +205,11 @@ out_list_err:
179 * And these FRWR-related fields: 205 * And these FRWR-related fields:
180 * ia->ri_max_frwr_depth 206 * ia->ri_max_frwr_depth
181 * ia->ri_mrtype 207 * ia->ri_mrtype
208 *
209 * On failure, a negative errno is returned.
182 */ 210 */
183static int 211int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
184frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, 212 struct rpcrdma_create_data_internal *cdata)
185 struct rpcrdma_create_data_internal *cdata)
186{ 213{
187 struct ib_device_attr *attrs = &ia->ri_device->attrs; 214 struct ib_device_attr *attrs = &ia->ri_device->attrs;
188 int max_qp_wr, depth, delta; 215 int max_qp_wr, depth, delta;
@@ -191,10 +218,17 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
191 if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) 218 if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
192 ia->ri_mrtype = IB_MR_TYPE_SG_GAPS; 219 ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
193 220
194 ia->ri_max_frwr_depth = 221 /* Quirk: Some devices advertise a large max_fast_reg_page_list_len
195 min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, 222 * capability, but perform optimally when the MRs are not larger
196 attrs->max_fast_reg_page_list_len); 223 * than a page.
197 dprintk("RPC: %s: device's max FR page list len = %u\n", 224 */
225 if (attrs->max_sge_rd > 1)
226 ia->ri_max_frwr_depth = attrs->max_sge_rd;
227 else
228 ia->ri_max_frwr_depth = attrs->max_fast_reg_page_list_len;
229 if (ia->ri_max_frwr_depth > RPCRDMA_MAX_DATA_SEGS)
230 ia->ri_max_frwr_depth = RPCRDMA_MAX_DATA_SEGS;
231 dprintk("RPC: %s: max FR page list depth = %u\n",
198 __func__, ia->ri_max_frwr_depth); 232 __func__, ia->ri_max_frwr_depth);
199 233
200 /* Add room for frwr register and invalidate WRs. 234 /* Add room for frwr register and invalidate WRs.
@@ -242,20 +276,28 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
242 276
243 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / 277 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
244 ia->ri_max_frwr_depth); 278 ia->ri_max_frwr_depth);
245 ia->ri_max_segs += 2; /* segments for head and tail buffers */ 279 /* Reply chunks require segments for head and tail buffers */
280 ia->ri_max_segs += 2;
281 if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS)
282 ia->ri_max_segs = RPCRDMA_MAX_HDR_SEGS;
246 return 0; 283 return 0;
247} 284}
248 285
249/* FRWR mode conveys a list of pages per chunk segment. The 286/**
287 * frwr_maxpages - Compute size of largest payload
288 * @r_xprt: transport
289 *
290 * Returns maximum size of an RPC message, in pages.
291 *
292 * FRWR mode conveys a list of pages per chunk segment. The
250 * maximum length of that list is the FRWR page list depth. 293 * maximum length of that list is the FRWR page list depth.
251 */ 294 */
252static size_t 295size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt)
253frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
254{ 296{
255 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 297 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
256 298
257 return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, 299 return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
258 RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth); 300 (ia->ri_max_segs - 2) * ia->ri_max_frwr_depth);
259} 301}
260 302
261static void 303static void
@@ -332,12 +374,25 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
332 trace_xprtrdma_wc_li_wake(wc, frwr); 374 trace_xprtrdma_wc_li_wake(wc, frwr);
333} 375}
334 376
335/* Post a REG_MR Work Request to register a memory region 377/**
378 * frwr_map - Register a memory region
379 * @r_xprt: controlling transport
380 * @seg: memory region co-ordinates
381 * @nsegs: number of segments remaining
382 * @writing: true when RDMA Write will be used
383 * @xid: XID of RPC using the registered memory
384 * @out: initialized MR
385 *
386 * Prepare a REG_MR Work Request to register a memory region
336 * for remote access via RDMA READ or RDMA WRITE. 387 * for remote access via RDMA READ or RDMA WRITE.
388 *
389 * Returns the next segment or a negative errno pointer.
390 * On success, the prepared MR is planted in @out.
337 */ 391 */
338static struct rpcrdma_mr_seg * 392struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
339frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, 393 struct rpcrdma_mr_seg *seg,
340 int nsegs, bool writing, struct rpcrdma_mr **out) 394 int nsegs, bool writing, u32 xid,
395 struct rpcrdma_mr **out)
341{ 396{
342 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 397 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
343 bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; 398 bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
@@ -384,13 +439,14 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
384 mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir); 439 mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
385 if (!mr->mr_nents) 440 if (!mr->mr_nents)
386 goto out_dmamap_err; 441 goto out_dmamap_err;
387 trace_xprtrdma_mr_map(mr);
388 442
389 ibmr = frwr->fr_mr; 443 ibmr = frwr->fr_mr;
390 n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE); 444 n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
391 if (unlikely(n != mr->mr_nents)) 445 if (unlikely(n != mr->mr_nents))
392 goto out_mapmr_err; 446 goto out_mapmr_err;
393 447
448 ibmr->iova &= 0x00000000ffffffff;
449 ibmr->iova |= ((u64)cpu_to_be32(xid)) << 32;
394 key = (u8)(ibmr->rkey & 0x000000FF); 450 key = (u8)(ibmr->rkey & 0x000000FF);
395 ib_update_fast_reg_key(ibmr, ++key); 451 ib_update_fast_reg_key(ibmr, ++key);
396 452
@@ -404,32 +460,35 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
404 mr->mr_handle = ibmr->rkey; 460 mr->mr_handle = ibmr->rkey;
405 mr->mr_length = ibmr->length; 461 mr->mr_length = ibmr->length;
406 mr->mr_offset = ibmr->iova; 462 mr->mr_offset = ibmr->iova;
463 trace_xprtrdma_mr_map(mr);
407 464
408 *out = mr; 465 *out = mr;
409 return seg; 466 return seg;
410 467
411out_dmamap_err: 468out_dmamap_err:
412 pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
413 mr->mr_sg, i);
414 frwr->fr_state = FRWR_IS_INVALID; 469 frwr->fr_state = FRWR_IS_INVALID;
470 trace_xprtrdma_frwr_sgerr(mr, i);
415 rpcrdma_mr_put(mr); 471 rpcrdma_mr_put(mr);
416 return ERR_PTR(-EIO); 472 return ERR_PTR(-EIO);
417 473
418out_mapmr_err: 474out_mapmr_err:
419 pr_err("rpcrdma: failed to map mr %p (%d/%d)\n", 475 trace_xprtrdma_frwr_maperr(mr, n);
420 frwr->fr_mr, n, mr->mr_nents);
421 rpcrdma_mr_recycle(mr); 476 rpcrdma_mr_recycle(mr);
422 return ERR_PTR(-EIO); 477 return ERR_PTR(-EIO);
423} 478}
424 479
425/* Post Send WR containing the RPC Call message. 480/**
481 * frwr_send - post Send WR containing the RPC Call message
482 * @ia: interface adapter
483 * @req: Prepared RPC Call
426 * 484 *
427 * For FRMR, chain any FastReg WRs to the Send WR. Only a 485 * For FRWR, chain any FastReg WRs to the Send WR. Only a
428 * single ib_post_send call is needed to register memory 486 * single ib_post_send call is needed to register memory
429 * and then post the Send WR. 487 * and then post the Send WR.
488 *
489 * Returns the result of ib_post_send.
430 */ 490 */
431static int 491int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
432frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
433{ 492{
434 struct ib_send_wr *post_wr; 493 struct ib_send_wr *post_wr;
435 struct rpcrdma_mr *mr; 494 struct rpcrdma_mr *mr;
@@ -451,15 +510,18 @@ frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
451 } 510 }
452 511
453 /* If ib_post_send fails, the next ->send_request for 512 /* If ib_post_send fails, the next ->send_request for
454 * @req will queue these MWs for recovery. 513 * @req will queue these MRs for recovery.
455 */ 514 */
456 return ib_post_send(ia->ri_id->qp, post_wr, NULL); 515 return ib_post_send(ia->ri_id->qp, post_wr, NULL);
457} 516}
458 517
459/* Handle a remotely invalidated mr on the @mrs list 518/**
519 * frwr_reminv - handle a remotely invalidated mr on the @mrs list
520 * @rep: Received reply
521 * @mrs: list of MRs to check
522 *
460 */ 523 */
461static void 524void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
462frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
463{ 525{
464 struct rpcrdma_mr *mr; 526 struct rpcrdma_mr *mr;
465 527
@@ -473,7 +535,10 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
473 } 535 }
474} 536}
475 537
476/* Invalidate all memory regions that were registered for "req". 538/**
539 * frwr_unmap_sync - invalidate memory regions that were registered for @req
540 * @r_xprt: controlling transport
541 * @mrs: list of MRs to process
477 * 542 *
478 * Sleeps until it is safe for the host CPU to access the 543 * Sleeps until it is safe for the host CPU to access the
479 * previously mapped memory regions. 544 * previously mapped memory regions.
@@ -481,8 +546,7 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
481 * Caller ensures that @mrs is not empty before the call. This 546 * Caller ensures that @mrs is not empty before the call. This
482 * function empties the list. 547 * function empties the list.
483 */ 548 */
484static void 549void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
485frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
486{ 550{
487 struct ib_send_wr *first, **prev, *last; 551 struct ib_send_wr *first, **prev, *last;
488 const struct ib_send_wr *bad_wr; 552 const struct ib_send_wr *bad_wr;
@@ -561,20 +625,7 @@ out_release:
561 mr = container_of(frwr, struct rpcrdma_mr, frwr); 625 mr = container_of(frwr, struct rpcrdma_mr, frwr);
562 bad_wr = bad_wr->next; 626 bad_wr = bad_wr->next;
563 627
564 list_del(&mr->mr_list); 628 list_del_init(&mr->mr_list);
565 frwr_op_release_mr(mr); 629 rpcrdma_mr_recycle(mr);
566 } 630 }
567} 631}
568
569const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
570 .ro_map = frwr_op_map,
571 .ro_send = frwr_op_send,
572 .ro_reminv = frwr_op_reminv,
573 .ro_unmap_sync = frwr_op_unmap_sync,
574 .ro_open = frwr_op_open,
575 .ro_maxpages = frwr_op_maxpages,
576 .ro_init_mr = frwr_op_init_mr,
577 .ro_release_mr = frwr_op_release_mr,
578 .ro_displayname = "frwr",
579 .ro_send_w_inv_ok = RPCRDMA_CMP_F_SND_W_INV_OK,
580};
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 9f53e0240035..d18614e02b4e 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -218,11 +218,12 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
218 ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT); 218 ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
219 page_base = offset_in_page(xdrbuf->page_base); 219 page_base = offset_in_page(xdrbuf->page_base);
220 while (len) { 220 while (len) {
221 if (unlikely(!*ppages)) { 221 /* ACL likes to be lazy in allocating pages - ACLs
222 /* XXX: Certain upper layer operations do 222 * are small by default but can get huge.
223 * not provide receive buffer pages. 223 */
224 */ 224 if (unlikely(xdrbuf->flags & XDRBUF_SPARSE_PAGES)) {
225 *ppages = alloc_page(GFP_ATOMIC); 225 if (!*ppages)
226 *ppages = alloc_page(GFP_ATOMIC);
226 if (!*ppages) 227 if (!*ppages)
227 return -ENOBUFS; 228 return -ENOBUFS;
228 } 229 }
@@ -356,8 +357,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
356 return nsegs; 357 return nsegs;
357 358
358 do { 359 do {
359 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 360 seg = frwr_map(r_xprt, seg, nsegs, false, rqst->rq_xid, &mr);
360 false, &mr);
361 if (IS_ERR(seg)) 361 if (IS_ERR(seg))
362 return PTR_ERR(seg); 362 return PTR_ERR(seg);
363 rpcrdma_mr_push(mr, &req->rl_registered); 363 rpcrdma_mr_push(mr, &req->rl_registered);
@@ -365,7 +365,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
365 if (encode_read_segment(xdr, mr, pos) < 0) 365 if (encode_read_segment(xdr, mr, pos) < 0)
366 return -EMSGSIZE; 366 return -EMSGSIZE;
367 367
368 trace_xprtrdma_read_chunk(rqst->rq_task, pos, mr, nsegs); 368 trace_xprtrdma_chunk_read(rqst->rq_task, pos, mr, nsegs);
369 r_xprt->rx_stats.read_chunk_count++; 369 r_xprt->rx_stats.read_chunk_count++;
370 nsegs -= mr->mr_nents; 370 nsegs -= mr->mr_nents;
371 } while (nsegs); 371 } while (nsegs);
@@ -414,8 +414,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
414 414
415 nchunks = 0; 415 nchunks = 0;
416 do { 416 do {
417 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 417 seg = frwr_map(r_xprt, seg, nsegs, true, rqst->rq_xid, &mr);
418 true, &mr);
419 if (IS_ERR(seg)) 418 if (IS_ERR(seg))
420 return PTR_ERR(seg); 419 return PTR_ERR(seg);
421 rpcrdma_mr_push(mr, &req->rl_registered); 420 rpcrdma_mr_push(mr, &req->rl_registered);
@@ -423,7 +422,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
423 if (encode_rdma_segment(xdr, mr) < 0) 422 if (encode_rdma_segment(xdr, mr) < 0)
424 return -EMSGSIZE; 423 return -EMSGSIZE;
425 424
426 trace_xprtrdma_write_chunk(rqst->rq_task, mr, nsegs); 425 trace_xprtrdma_chunk_write(rqst->rq_task, mr, nsegs);
427 r_xprt->rx_stats.write_chunk_count++; 426 r_xprt->rx_stats.write_chunk_count++;
428 r_xprt->rx_stats.total_rdma_request += mr->mr_length; 427 r_xprt->rx_stats.total_rdma_request += mr->mr_length;
429 nchunks++; 428 nchunks++;
@@ -472,8 +471,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
472 471
473 nchunks = 0; 472 nchunks = 0;
474 do { 473 do {
475 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 474 seg = frwr_map(r_xprt, seg, nsegs, true, rqst->rq_xid, &mr);
476 true, &mr);
477 if (IS_ERR(seg)) 475 if (IS_ERR(seg))
478 return PTR_ERR(seg); 476 return PTR_ERR(seg);
479 rpcrdma_mr_push(mr, &req->rl_registered); 477 rpcrdma_mr_push(mr, &req->rl_registered);
@@ -481,7 +479,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
481 if (encode_rdma_segment(xdr, mr) < 0) 479 if (encode_rdma_segment(xdr, mr) < 0)
482 return -EMSGSIZE; 480 return -EMSGSIZE;
483 481
484 trace_xprtrdma_reply_chunk(rqst->rq_task, mr, nsegs); 482 trace_xprtrdma_chunk_reply(rqst->rq_task, mr, nsegs);
485 r_xprt->rx_stats.reply_chunk_count++; 483 r_xprt->rx_stats.reply_chunk_count++;
486 r_xprt->rx_stats.total_rdma_request += mr->mr_length; 484 r_xprt->rx_stats.total_rdma_request += mr->mr_length;
487 nchunks++; 485 nchunks++;
@@ -667,7 +665,7 @@ out_mapping_overflow:
667 665
668out_mapping_err: 666out_mapping_err:
669 rpcrdma_unmap_sendctx(sc); 667 rpcrdma_unmap_sendctx(sc);
670 pr_err("rpcrdma: Send mapping error\n"); 668 trace_xprtrdma_dma_maperr(sge[sge_no].addr);
671 return false; 669 return false;
672} 670}
673 671
@@ -1188,17 +1186,20 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
1188 p = xdr_inline_decode(xdr, 2 * sizeof(*p)); 1186 p = xdr_inline_decode(xdr, 2 * sizeof(*p));
1189 if (!p) 1187 if (!p)
1190 break; 1188 break;
1191 dprintk("RPC: %5u: %s: server reports version error (%u-%u)\n", 1189 dprintk("RPC: %s: server reports "
1192 rqst->rq_task->tk_pid, __func__, 1190 "version error (%u-%u), xid %08x\n", __func__,
1193 be32_to_cpup(p), be32_to_cpu(*(p + 1))); 1191 be32_to_cpup(p), be32_to_cpu(*(p + 1)),
1192 be32_to_cpu(rep->rr_xid));
1194 break; 1193 break;
1195 case err_chunk: 1194 case err_chunk:
1196 dprintk("RPC: %5u: %s: server reports header decoding error\n", 1195 dprintk("RPC: %s: server reports "
1197 rqst->rq_task->tk_pid, __func__); 1196 "header decoding error, xid %08x\n", __func__,
1197 be32_to_cpu(rep->rr_xid));
1198 break; 1198 break;
1199 default: 1199 default:
1200 dprintk("RPC: %5u: %s: server reports unrecognized error %d\n", 1200 dprintk("RPC: %s: server reports "
1201 rqst->rq_task->tk_pid, __func__, be32_to_cpup(p)); 1201 "unrecognized error %d, xid %08x\n", __func__,
1202 be32_to_cpup(p), be32_to_cpu(rep->rr_xid));
1202 } 1203 }
1203 1204
1204 r_xprt->rx_stats.bad_reply_count++; 1205 r_xprt->rx_stats.bad_reply_count++;
@@ -1248,7 +1249,6 @@ out:
1248out_badheader: 1249out_badheader:
1249 trace_xprtrdma_reply_hdr(rep); 1250 trace_xprtrdma_reply_hdr(rep);
1250 r_xprt->rx_stats.bad_reply_count++; 1251 r_xprt->rx_stats.bad_reply_count++;
1251 status = -EIO;
1252 goto out; 1252 goto out;
1253} 1253}
1254 1254
@@ -1262,8 +1262,7 @@ void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
1262 * RPC has relinquished all its Send Queue entries. 1262 * RPC has relinquished all its Send Queue entries.
1263 */ 1263 */
1264 if (!list_empty(&req->rl_registered)) 1264 if (!list_empty(&req->rl_registered))
1265 r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, 1265 frwr_unmap_sync(r_xprt, &req->rl_registered);
1266 &req->rl_registered);
1267 1266
1268 /* Ensure that any DMA mapped pages associated with 1267 /* Ensure that any DMA mapped pages associated with
1269 * the Send of the RPC Call have been unmapped before 1268 * the Send of the RPC Call have been unmapped before
@@ -1292,7 +1291,7 @@ void rpcrdma_deferred_completion(struct work_struct *work)
1292 1291
1293 trace_xprtrdma_defer_cmp(rep); 1292 trace_xprtrdma_defer_cmp(rep);
1294 if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) 1293 if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
1295 r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered); 1294 frwr_reminv(rep, &req->rl_registered);
1296 rpcrdma_release_rqst(r_xprt, req); 1295 rpcrdma_release_rqst(r_xprt, req);
1297 rpcrdma_complete_rqst(rep); 1296 rpcrdma_complete_rqst(rep);
1298} 1297}
@@ -1312,11 +1311,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1312 u32 credits; 1311 u32 credits;
1313 __be32 *p; 1312 __be32 *p;
1314 1313
1315 --buf->rb_posted_receives;
1316
1317 if (rep->rr_hdrbuf.head[0].iov_len == 0)
1318 goto out_badstatus;
1319
1320 /* Fixed transport header fields */ 1314 /* Fixed transport header fields */
1321 xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf, 1315 xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
1322 rep->rr_hdrbuf.head[0].iov_base); 1316 rep->rr_hdrbuf.head[0].iov_base);
@@ -1356,36 +1350,30 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1356 } 1350 }
1357 1351
1358 req = rpcr_to_rdmar(rqst); 1352 req = rpcr_to_rdmar(rqst);
1353 if (req->rl_reply) {
1354 trace_xprtrdma_leaked_rep(rqst, req->rl_reply);
1355 rpcrdma_recv_buffer_put(req->rl_reply);
1356 }
1359 req->rl_reply = rep; 1357 req->rl_reply = rep;
1360 rep->rr_rqst = rqst; 1358 rep->rr_rqst = rqst;
1361 clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); 1359 clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
1362 1360
1363 trace_xprtrdma_reply(rqst->rq_task, rep, req, credits); 1361 trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
1364 1362 queue_work(buf->rb_completion_wq, &rep->rr_work);
1365 rpcrdma_post_recvs(r_xprt, false);
1366 queue_work(rpcrdma_receive_wq, &rep->rr_work);
1367 return; 1363 return;
1368 1364
1369out_badversion: 1365out_badversion:
1370 trace_xprtrdma_reply_vers(rep); 1366 trace_xprtrdma_reply_vers(rep);
1371 goto repost; 1367 goto out;
1372 1368
1373/* The RPC transaction has already been terminated, or the header
1374 * is corrupt.
1375 */
1376out_norqst: 1369out_norqst:
1377 spin_unlock(&xprt->queue_lock); 1370 spin_unlock(&xprt->queue_lock);
1378 trace_xprtrdma_reply_rqst(rep); 1371 trace_xprtrdma_reply_rqst(rep);
1379 goto repost; 1372 goto out;
1380 1373
1381out_shortreply: 1374out_shortreply:
1382 trace_xprtrdma_reply_short(rep); 1375 trace_xprtrdma_reply_short(rep);
1383 1376
1384/* If no pending RPC transaction was matched, post a replacement 1377out:
1385 * receive buffer before returning.
1386 */
1387repost:
1388 rpcrdma_post_recvs(r_xprt, false);
1389out_badstatus:
1390 rpcrdma_recv_buffer_put(rep); 1378 rpcrdma_recv_buffer_put(rep);
1391} 1379}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index f3c147d70286..b908f2ca08fd 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -200,11 +200,10 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
200 svc_rdma_send_ctxt_put(rdma, ctxt); 200 svc_rdma_send_ctxt_put(rdma, ctxt);
201 goto drop_connection; 201 goto drop_connection;
202 } 202 }
203 return rc; 203 return 0;
204 204
205drop_connection: 205drop_connection:
206 dprintk("svcrdma: failed to send bc call\n"); 206 dprintk("svcrdma: failed to send bc call\n");
207 xprt_disconnect_done(xprt);
208 return -ENOTCONN; 207 return -ENOTCONN;
209} 208}
210 209
@@ -225,8 +224,11 @@ xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
225 224
226 ret = -ENOTCONN; 225 ret = -ENOTCONN;
227 rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); 226 rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
228 if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) 227 if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) {
229 ret = rpcrdma_bc_send_request(rdma, rqst); 228 ret = rpcrdma_bc_send_request(rdma, rqst);
229 if (ret == -ENOTCONN)
230 svc_close_xprt(sxprt);
231 }
230 232
231 mutex_unlock(&sxprt->xpt_mutex); 233 mutex_unlock(&sxprt->xpt_mutex);
232 234
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 9141068693fa..fbc171ebfe91 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -268,7 +268,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
268{ 268{
269 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 269 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
270 270
271 trace_xprtrdma_inject_dsc(r_xprt); 271 trace_xprtrdma_op_inject_dsc(r_xprt);
272 rdma_disconnect(r_xprt->rx_ia.ri_id); 272 rdma_disconnect(r_xprt->rx_ia.ri_id);
273} 273}
274 274
@@ -284,7 +284,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
284{ 284{
285 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 285 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
286 286
287 trace_xprtrdma_destroy(r_xprt); 287 trace_xprtrdma_op_destroy(r_xprt);
288 288
289 cancel_delayed_work_sync(&r_xprt->rx_connect_worker); 289 cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
290 290
@@ -318,17 +318,12 @@ xprt_setup_rdma(struct xprt_create *args)
318 struct sockaddr *sap; 318 struct sockaddr *sap;
319 int rc; 319 int rc;
320 320
321 if (args->addrlen > sizeof(xprt->addr)) { 321 if (args->addrlen > sizeof(xprt->addr))
322 dprintk("RPC: %s: address too large\n", __func__);
323 return ERR_PTR(-EBADF); 322 return ERR_PTR(-EBADF);
324 }
325 323
326 xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0); 324 xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0);
327 if (xprt == NULL) { 325 if (!xprt)
328 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
329 __func__);
330 return ERR_PTR(-ENOMEM); 326 return ERR_PTR(-ENOMEM);
331 }
332 327
333 /* 60 second timeout, no retries */ 328 /* 60 second timeout, no retries */
334 xprt->timeout = &xprt_rdma_default_timeout; 329 xprt->timeout = &xprt_rdma_default_timeout;
@@ -399,7 +394,7 @@ xprt_setup_rdma(struct xprt_create *args)
399 INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, 394 INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
400 xprt_rdma_connect_worker); 395 xprt_rdma_connect_worker);
401 396
402 xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); 397 xprt->max_payload = frwr_maxpages(new_xprt);
403 if (xprt->max_payload == 0) 398 if (xprt->max_payload == 0)
404 goto out4; 399 goto out4;
405 xprt->max_payload <<= PAGE_SHIFT; 400 xprt->max_payload <<= PAGE_SHIFT;
@@ -423,7 +418,7 @@ out3:
423out2: 418out2:
424 rpcrdma_ia_close(&new_xprt->rx_ia); 419 rpcrdma_ia_close(&new_xprt->rx_ia);
425out1: 420out1:
426 trace_xprtrdma_destroy(new_xprt); 421 trace_xprtrdma_op_destroy(new_xprt);
427 xprt_rdma_free_addresses(xprt); 422 xprt_rdma_free_addresses(xprt);
428 xprt_free(xprt); 423 xprt_free(xprt);
429 return ERR_PTR(rc); 424 return ERR_PTR(rc);
@@ -433,29 +428,33 @@ out1:
433 * xprt_rdma_close - close a transport connection 428 * xprt_rdma_close - close a transport connection
434 * @xprt: transport context 429 * @xprt: transport context
435 * 430 *
436 * Called during transport shutdown, reconnect, or device removal. 431 * Called during autoclose or device removal.
432 *
437 * Caller holds @xprt's send lock to prevent activity on this 433 * Caller holds @xprt's send lock to prevent activity on this
438 * transport while the connection is torn down. 434 * transport while the connection is torn down.
439 */ 435 */
440static void 436void xprt_rdma_close(struct rpc_xprt *xprt)
441xprt_rdma_close(struct rpc_xprt *xprt)
442{ 437{
443 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 438 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
444 struct rpcrdma_ep *ep = &r_xprt->rx_ep; 439 struct rpcrdma_ep *ep = &r_xprt->rx_ep;
445 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 440 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
446 441
447 dprintk("RPC: %s: closing xprt %p\n", __func__, xprt); 442 might_sleep();
443
444 trace_xprtrdma_op_close(r_xprt);
445
446 /* Prevent marshaling and sending of new requests */
447 xprt_clear_connected(xprt);
448 448
449 if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) { 449 if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) {
450 xprt_clear_connected(xprt);
451 rpcrdma_ia_remove(ia); 450 rpcrdma_ia_remove(ia);
452 return; 451 goto out;
453 } 452 }
453
454 if (ep->rep_connected == -ENODEV) 454 if (ep->rep_connected == -ENODEV)
455 return; 455 return;
456 if (ep->rep_connected > 0) 456 if (ep->rep_connected > 0)
457 xprt->reestablish_timeout = 0; 457 xprt->reestablish_timeout = 0;
458 xprt_disconnect_done(xprt);
459 rpcrdma_ep_disconnect(ep, ia); 458 rpcrdma_ep_disconnect(ep, ia);
460 459
461 /* Prepare @xprt for the next connection by reinitializing 460 /* Prepare @xprt for the next connection by reinitializing
@@ -463,6 +462,10 @@ xprt_rdma_close(struct rpc_xprt *xprt)
463 */ 462 */
464 r_xprt->rx_buf.rb_credits = 1; 463 r_xprt->rx_buf.rb_credits = 1;
465 xprt->cwnd = RPC_CWNDSHIFT; 464 xprt->cwnd = RPC_CWNDSHIFT;
465
466out:
467 ++xprt->connect_cookie;
468 xprt_disconnect_done(xprt);
466} 469}
467 470
468/** 471/**
@@ -525,6 +528,7 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
525{ 528{
526 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 529 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
527 530
531 trace_xprtrdma_op_connect(r_xprt);
528 if (r_xprt->rx_ep.rep_connected != 0) { 532 if (r_xprt->rx_ep.rep_connected != 0) {
529 /* Reconnect */ 533 /* Reconnect */
530 schedule_delayed_work(&r_xprt->rx_connect_worker, 534 schedule_delayed_work(&r_xprt->rx_connect_worker,
@@ -659,11 +663,11 @@ xprt_rdma_allocate(struct rpc_task *task)
659 663
660 rqst->rq_buffer = req->rl_sendbuf->rg_base; 664 rqst->rq_buffer = req->rl_sendbuf->rg_base;
661 rqst->rq_rbuffer = req->rl_recvbuf->rg_base; 665 rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
662 trace_xprtrdma_allocate(task, req); 666 trace_xprtrdma_op_allocate(task, req);
663 return 0; 667 return 0;
664 668
665out_fail: 669out_fail:
666 trace_xprtrdma_allocate(task, NULL); 670 trace_xprtrdma_op_allocate(task, NULL);
667 return -ENOMEM; 671 return -ENOMEM;
668} 672}
669 673
@@ -682,7 +686,7 @@ xprt_rdma_free(struct rpc_task *task)
682 686
683 if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) 687 if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
684 rpcrdma_release_rqst(r_xprt, req); 688 rpcrdma_release_rqst(r_xprt, req);
685 trace_xprtrdma_rpc_done(task, req); 689 trace_xprtrdma_op_free(task, req);
686} 690}
687 691
688/** 692/**
@@ -696,8 +700,10 @@ xprt_rdma_free(struct rpc_task *task)
696 * %-ENOTCONN if the caller should reconnect and call again 700 * %-ENOTCONN if the caller should reconnect and call again
697 * %-EAGAIN if the caller should call again 701 * %-EAGAIN if the caller should call again
698 * %-ENOBUFS if the caller should call again after a delay 702 * %-ENOBUFS if the caller should call again after a delay
699 * %-EIO if a permanent error occurred and the request was not 703 * %-EMSGSIZE if encoding ran out of buffer space. The request
700 * sent. Do not try to send this message again. 704 * was not sent. Do not try to send this message again.
705 * %-EIO if an I/O error occurred. The request was not sent.
706 * Do not try to send this message again.
701 */ 707 */
702static int 708static int
703xprt_rdma_send_request(struct rpc_rqst *rqst) 709xprt_rdma_send_request(struct rpc_rqst *rqst)
@@ -713,7 +719,7 @@ xprt_rdma_send_request(struct rpc_rqst *rqst)
713#endif /* CONFIG_SUNRPC_BACKCHANNEL */ 719#endif /* CONFIG_SUNRPC_BACKCHANNEL */
714 720
715 if (!xprt_connected(xprt)) 721 if (!xprt_connected(xprt))
716 goto drop_connection; 722 return -ENOTCONN;
717 723
718 if (!xprt_request_get_cong(xprt, rqst)) 724 if (!xprt_request_get_cong(xprt, rqst))
719 return -EBADSLT; 725 return -EBADSLT;
@@ -745,8 +751,8 @@ failed_marshal:
745 if (rc != -ENOTCONN) 751 if (rc != -ENOTCONN)
746 return rc; 752 return rc;
747drop_connection: 753drop_connection:
748 xprt_disconnect_done(xprt); 754 xprt_rdma_close(xprt);
749 return -ENOTCONN; /* implies disconnect */ 755 return -ENOTCONN;
750} 756}
751 757
752void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) 758void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
@@ -843,58 +849,31 @@ static struct xprt_class xprt_rdma = {
843 849
844void xprt_rdma_cleanup(void) 850void xprt_rdma_cleanup(void)
845{ 851{
846 int rc;
847
848 dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
849#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 852#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
850 if (sunrpc_table_header) { 853 if (sunrpc_table_header) {
851 unregister_sysctl_table(sunrpc_table_header); 854 unregister_sysctl_table(sunrpc_table_header);
852 sunrpc_table_header = NULL; 855 sunrpc_table_header = NULL;
853 } 856 }
854#endif 857#endif
855 rc = xprt_unregister_transport(&xprt_rdma);
856 if (rc)
857 dprintk("RPC: %s: xprt_unregister returned %i\n",
858 __func__, rc);
859
860 rpcrdma_destroy_wq();
861 858
862 rc = xprt_unregister_transport(&xprt_rdma_bc); 859 xprt_unregister_transport(&xprt_rdma);
863 if (rc) 860 xprt_unregister_transport(&xprt_rdma_bc);
864 dprintk("RPC: %s: xprt_unregister(bc) returned %i\n",
865 __func__, rc);
866} 861}
867 862
868int xprt_rdma_init(void) 863int xprt_rdma_init(void)
869{ 864{
870 int rc; 865 int rc;
871 866
872 rc = rpcrdma_alloc_wq();
873 if (rc)
874 return rc;
875
876 rc = xprt_register_transport(&xprt_rdma); 867 rc = xprt_register_transport(&xprt_rdma);
877 if (rc) { 868 if (rc)
878 rpcrdma_destroy_wq();
879 return rc; 869 return rc;
880 }
881 870
882 rc = xprt_register_transport(&xprt_rdma_bc); 871 rc = xprt_register_transport(&xprt_rdma_bc);
883 if (rc) { 872 if (rc) {
884 xprt_unregister_transport(&xprt_rdma); 873 xprt_unregister_transport(&xprt_rdma);
885 rpcrdma_destroy_wq();
886 return rc; 874 return rc;
887 } 875 }
888 876
889 dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
890
891 dprintk("Defaults:\n");
892 dprintk("\tSlots %d\n"
893 "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
894 xprt_rdma_slot_table_entries,
895 xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
896 dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy);
897
898#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 877#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
899 if (!sunrpc_table_header) 878 if (!sunrpc_table_header)
900 sunrpc_table_header = register_sysctl_table(sunrpc_table); 879 sunrpc_table_header = register_sysctl_table(sunrpc_table);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3ddba94c939f..7749a2bf6887 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -78,53 +78,25 @@ static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
78static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); 78static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
79static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp); 79static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp);
80static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); 80static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
81static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
81 82
82struct workqueue_struct *rpcrdma_receive_wq __read_mostly; 83/* Wait for outstanding transport work to finish.
83
84int
85rpcrdma_alloc_wq(void)
86{
87 struct workqueue_struct *recv_wq;
88
89 recv_wq = alloc_workqueue("xprtrdma_receive",
90 WQ_MEM_RECLAIM | WQ_HIGHPRI,
91 0);
92 if (!recv_wq)
93 return -ENOMEM;
94
95 rpcrdma_receive_wq = recv_wq;
96 return 0;
97}
98
99void
100rpcrdma_destroy_wq(void)
101{
102 struct workqueue_struct *wq;
103
104 if (rpcrdma_receive_wq) {
105 wq = rpcrdma_receive_wq;
106 rpcrdma_receive_wq = NULL;
107 destroy_workqueue(wq);
108 }
109}
110
111/**
112 * rpcrdma_disconnect_worker - Force a disconnect
113 * @work: endpoint to be disconnected
114 *
115 * Provider callbacks can possibly run in an IRQ context. This function
116 * is invoked in a worker thread to guarantee that disconnect wake-up
117 * calls are always done in process context.
118 */ 84 */
119static void 85static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
120rpcrdma_disconnect_worker(struct work_struct *work)
121{ 86{
122 struct rpcrdma_ep *ep = container_of(work, struct rpcrdma_ep, 87 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
123 rep_disconnect_worker.work); 88 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
124 struct rpcrdma_xprt *r_xprt =
125 container_of(ep, struct rpcrdma_xprt, rx_ep);
126 89
127 xprt_force_disconnect(&r_xprt->rx_xprt); 90 /* Flush Receives, then wait for deferred Reply work
91 * to complete.
92 */
93 ib_drain_qp(ia->ri_id->qp);
94 drain_workqueue(buf->rb_completion_wq);
95
96 /* Deferred Reply processing might have scheduled
97 * local invalidations.
98 */
99 ib_drain_sq(ia->ri_id->qp);
128} 100}
129 101
130/** 102/**
@@ -143,15 +115,6 @@ rpcrdma_qp_event_handler(struct ib_event *event, void *context)
143 rx_ep); 115 rx_ep);
144 116
145 trace_xprtrdma_qp_event(r_xprt, event); 117 trace_xprtrdma_qp_event(r_xprt, event);
146 pr_err("rpcrdma: %s on device %s connected to %s:%s\n",
147 ib_event_msg(event->event), event->device->name,
148 rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt));
149
150 if (ep->rep_connected == 1) {
151 ep->rep_connected = -EIO;
152 schedule_delayed_work(&ep->rep_disconnect_worker, 0);
153 wake_up_all(&ep->rep_connect_wait);
154 }
155} 118}
156 119
157/** 120/**
@@ -189,11 +152,13 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
189 struct ib_cqe *cqe = wc->wr_cqe; 152 struct ib_cqe *cqe = wc->wr_cqe;
190 struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep, 153 struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep,
191 rr_cqe); 154 rr_cqe);
155 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
192 156
193 /* WARNING: Only wr_id and status are reliable at this point */ 157 /* WARNING: Only wr_cqe and status are reliable at this point */
194 trace_xprtrdma_wc_receive(wc); 158 trace_xprtrdma_wc_receive(wc);
159 --r_xprt->rx_ep.rep_receive_count;
195 if (wc->status != IB_WC_SUCCESS) 160 if (wc->status != IB_WC_SUCCESS)
196 goto out_fail; 161 goto out_flushed;
197 162
198 /* status == SUCCESS means all fields in wc are trustworthy */ 163 /* status == SUCCESS means all fields in wc are trustworthy */
199 rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); 164 rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len);
@@ -204,17 +169,16 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
204 rdmab_addr(rep->rr_rdmabuf), 169 rdmab_addr(rep->rr_rdmabuf),
205 wc->byte_len, DMA_FROM_DEVICE); 170 wc->byte_len, DMA_FROM_DEVICE);
206 171
207out_schedule: 172 rpcrdma_post_recvs(r_xprt, false);
208 rpcrdma_reply_handler(rep); 173 rpcrdma_reply_handler(rep);
209 return; 174 return;
210 175
211out_fail: 176out_flushed:
212 if (wc->status != IB_WC_WR_FLUSH_ERR) 177 if (wc->status != IB_WC_WR_FLUSH_ERR)
213 pr_err("rpcrdma: Recv: %s (%u/0x%x)\n", 178 pr_err("rpcrdma: Recv: %s (%u/0x%x)\n",
214 ib_wc_status_msg(wc->status), 179 ib_wc_status_msg(wc->status),
215 wc->status, wc->vendor_err); 180 wc->status, wc->vendor_err);
216 rpcrdma_set_xdrlen(&rep->rr_hdrbuf, 0); 181 rpcrdma_recv_buffer_put(rep);
217 goto out_schedule;
218} 182}
219 183
220static void 184static void
@@ -316,7 +280,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
316 ep->rep_connected = -EAGAIN; 280 ep->rep_connected = -EAGAIN;
317 goto disconnected; 281 goto disconnected;
318 case RDMA_CM_EVENT_DISCONNECTED: 282 case RDMA_CM_EVENT_DISCONNECTED:
319 ++xprt->connect_cookie;
320 ep->rep_connected = -ECONNABORTED; 283 ep->rep_connected = -ECONNABORTED;
321disconnected: 284disconnected:
322 xprt_force_disconnect(xprt); 285 xprt_force_disconnect(xprt);
@@ -326,10 +289,9 @@ disconnected:
326 break; 289 break;
327 } 290 }
328 291
329 dprintk("RPC: %s: %s:%s on %s/%s: %s\n", __func__, 292 dprintk("RPC: %s: %s:%s on %s/frwr: %s\n", __func__,
330 rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt), 293 rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt),
331 ia->ri_device->name, ia->ri_ops->ro_displayname, 294 ia->ri_device->name, rdma_event_msg(event->event));
332 rdma_event_msg(event->event));
333 return 0; 295 return 0;
334} 296}
335 297
@@ -347,22 +309,15 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
347 309
348 id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_cm_event_handler, 310 id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_cm_event_handler,
349 xprt, RDMA_PS_TCP, IB_QPT_RC); 311 xprt, RDMA_PS_TCP, IB_QPT_RC);
350 if (IS_ERR(id)) { 312 if (IS_ERR(id))
351 rc = PTR_ERR(id);
352 dprintk("RPC: %s: rdma_create_id() failed %i\n",
353 __func__, rc);
354 return id; 313 return id;
355 }
356 314
357 ia->ri_async_rc = -ETIMEDOUT; 315 ia->ri_async_rc = -ETIMEDOUT;
358 rc = rdma_resolve_addr(id, NULL, 316 rc = rdma_resolve_addr(id, NULL,
359 (struct sockaddr *)&xprt->rx_xprt.addr, 317 (struct sockaddr *)&xprt->rx_xprt.addr,
360 RDMA_RESOLVE_TIMEOUT); 318 RDMA_RESOLVE_TIMEOUT);
361 if (rc) { 319 if (rc)
362 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
363 __func__, rc);
364 goto out; 320 goto out;
365 }
366 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); 321 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
367 if (rc < 0) { 322 if (rc < 0) {
368 trace_xprtrdma_conn_tout(xprt); 323 trace_xprtrdma_conn_tout(xprt);
@@ -375,11 +330,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
375 330
376 ia->ri_async_rc = -ETIMEDOUT; 331 ia->ri_async_rc = -ETIMEDOUT;
377 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); 332 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
378 if (rc) { 333 if (rc)
379 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
380 __func__, rc);
381 goto out; 334 goto out;
382 }
383 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); 335 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
384 if (rc < 0) { 336 if (rc < 0) {
385 trace_xprtrdma_conn_tout(xprt); 337 trace_xprtrdma_conn_tout(xprt);
@@ -429,16 +381,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt)
429 381
430 switch (xprt_rdma_memreg_strategy) { 382 switch (xprt_rdma_memreg_strategy) {
431 case RPCRDMA_FRWR: 383 case RPCRDMA_FRWR:
432 if (frwr_is_supported(ia)) { 384 if (frwr_is_supported(ia))
433 ia->ri_ops = &rpcrdma_frwr_memreg_ops;
434 break;
435 }
436 /*FALLTHROUGH*/
437 case RPCRDMA_MTHCAFMR:
438 if (fmr_is_supported(ia)) {
439 ia->ri_ops = &rpcrdma_fmr_memreg_ops;
440 break; 385 break;
441 }
442 /*FALLTHROUGH*/ 386 /*FALLTHROUGH*/
443 default: 387 default:
444 pr_err("rpcrdma: Device %s does not support memreg mode %d\n", 388 pr_err("rpcrdma: Device %s does not support memreg mode %d\n",
@@ -481,7 +425,7 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
481 * connection is already gone. 425 * connection is already gone.
482 */ 426 */
483 if (ia->ri_id->qp) { 427 if (ia->ri_id->qp) {
484 ib_drain_qp(ia->ri_id->qp); 428 rpcrdma_xprt_drain(r_xprt);
485 rdma_destroy_qp(ia->ri_id); 429 rdma_destroy_qp(ia->ri_id);
486 ia->ri_id->qp = NULL; 430 ia->ri_id->qp = NULL;
487 } 431 }
@@ -552,7 +496,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
552 } 496 }
553 ia->ri_max_send_sges = max_sge; 497 ia->ri_max_send_sges = max_sge;
554 498
555 rc = ia->ri_ops->ro_open(ia, ep, cdata); 499 rc = frwr_open(ia, ep, cdata);
556 if (rc) 500 if (rc)
557 return rc; 501 return rc;
558 502
@@ -579,16 +523,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
579 cdata->max_requests >> 2); 523 cdata->max_requests >> 2);
580 ep->rep_send_count = ep->rep_send_batch; 524 ep->rep_send_count = ep->rep_send_batch;
581 init_waitqueue_head(&ep->rep_connect_wait); 525 init_waitqueue_head(&ep->rep_connect_wait);
582 INIT_DELAYED_WORK(&ep->rep_disconnect_worker, 526 ep->rep_receive_count = 0;
583 rpcrdma_disconnect_worker);
584 527
585 sendcq = ib_alloc_cq(ia->ri_device, NULL, 528 sendcq = ib_alloc_cq(ia->ri_device, NULL,
586 ep->rep_attr.cap.max_send_wr + 1, 529 ep->rep_attr.cap.max_send_wr + 1,
587 1, IB_POLL_WORKQUEUE); 530 1, IB_POLL_WORKQUEUE);
588 if (IS_ERR(sendcq)) { 531 if (IS_ERR(sendcq)) {
589 rc = PTR_ERR(sendcq); 532 rc = PTR_ERR(sendcq);
590 dprintk("RPC: %s: failed to create send CQ: %i\n",
591 __func__, rc);
592 goto out1; 533 goto out1;
593 } 534 }
594 535
@@ -597,8 +538,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
597 0, IB_POLL_WORKQUEUE); 538 0, IB_POLL_WORKQUEUE);
598 if (IS_ERR(recvcq)) { 539 if (IS_ERR(recvcq)) {
599 rc = PTR_ERR(recvcq); 540 rc = PTR_ERR(recvcq);
600 dprintk("RPC: %s: failed to create recv CQ: %i\n",
601 __func__, rc);
602 goto out2; 541 goto out2;
603 } 542 }
604 543
@@ -611,7 +550,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
611 /* Prepare RDMA-CM private message */ 550 /* Prepare RDMA-CM private message */
612 pmsg->cp_magic = rpcrdma_cmp_magic; 551 pmsg->cp_magic = rpcrdma_cmp_magic;
613 pmsg->cp_version = RPCRDMA_CMP_VERSION; 552 pmsg->cp_version = RPCRDMA_CMP_VERSION;
614 pmsg->cp_flags |= ia->ri_ops->ro_send_w_inv_ok; 553 pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK;
615 pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize); 554 pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize);
616 pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize); 555 pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize);
617 ep->rep_remote_cma.private_data = pmsg; 556 ep->rep_remote_cma.private_data = pmsg;
@@ -653,8 +592,6 @@ out1:
653void 592void
654rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) 593rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
655{ 594{
656 cancel_delayed_work_sync(&ep->rep_disconnect_worker);
657
658 if (ia->ri_id && ia->ri_id->qp) { 595 if (ia->ri_id && ia->ri_id->qp) {
659 rpcrdma_ep_disconnect(ep, ia); 596 rpcrdma_ep_disconnect(ep, ia);
660 rdma_destroy_qp(ia->ri_id); 597 rdma_destroy_qp(ia->ri_id);
@@ -740,11 +677,8 @@ rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
740 } 677 }
741 678
742 err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); 679 err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
743 if (err) { 680 if (err)
744 dprintk("RPC: %s: rdma_create_qp returned %d\n",
745 __func__, err);
746 goto out_destroy; 681 goto out_destroy;
747 }
748 682
749 /* Atomically replace the transport's ID and QP. */ 683 /* Atomically replace the transport's ID and QP. */
750 rc = 0; 684 rc = 0;
@@ -775,8 +709,6 @@ retry:
775 dprintk("RPC: %s: connecting...\n", __func__); 709 dprintk("RPC: %s: connecting...\n", __func__);
776 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); 710 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
777 if (rc) { 711 if (rc) {
778 dprintk("RPC: %s: rdma_create_qp failed %i\n",
779 __func__, rc);
780 rc = -ENETUNREACH; 712 rc = -ENETUNREACH;
781 goto out_noupdate; 713 goto out_noupdate;
782 } 714 }
@@ -798,11 +730,8 @@ retry:
798 rpcrdma_post_recvs(r_xprt, true); 730 rpcrdma_post_recvs(r_xprt, true);
799 731
800 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); 732 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
801 if (rc) { 733 if (rc)
802 dprintk("RPC: %s: rdma_connect() failed with %i\n",
803 __func__, rc);
804 goto out; 734 goto out;
805 }
806 735
807 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); 736 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
808 if (ep->rep_connected <= 0) { 737 if (ep->rep_connected <= 0) {
@@ -822,8 +751,10 @@ out_noupdate:
822 return rc; 751 return rc;
823} 752}
824 753
825/* 754/**
826 * rpcrdma_ep_disconnect 755 * rpcrdma_ep_disconnect - Disconnect underlying transport
756 * @ep: endpoint to disconnect
757 * @ia: associated interface adapter
827 * 758 *
828 * This is separate from destroy to facilitate the ability 759 * This is separate from destroy to facilitate the ability
829 * to reconnect without recreating the endpoint. 760 * to reconnect without recreating the endpoint.
@@ -834,19 +765,20 @@ out_noupdate:
834void 765void
835rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) 766rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
836{ 767{
768 struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt,
769 rx_ep);
837 int rc; 770 int rc;
838 771
772 /* returns without wait if ID is not connected */
839 rc = rdma_disconnect(ia->ri_id); 773 rc = rdma_disconnect(ia->ri_id);
840 if (!rc) 774 if (!rc)
841 /* returns without wait if not connected */
842 wait_event_interruptible(ep->rep_connect_wait, 775 wait_event_interruptible(ep->rep_connect_wait,
843 ep->rep_connected != 1); 776 ep->rep_connected != 1);
844 else 777 else
845 ep->rep_connected = rc; 778 ep->rep_connected = rc;
846 trace_xprtrdma_disconnect(container_of(ep, struct rpcrdma_xprt, 779 trace_xprtrdma_disconnect(r_xprt, rc);
847 rx_ep), rc);
848 780
849 ib_drain_qp(ia->ri_id->qp); 781 rpcrdma_xprt_drain(r_xprt);
850} 782}
851 783
852/* Fixed-size circular FIFO queue. This implementation is wait-free and 784/* Fixed-size circular FIFO queue. This implementation is wait-free and
@@ -1034,7 +966,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
1034 if (!mr) 966 if (!mr)
1035 break; 967 break;
1036 968
1037 rc = ia->ri_ops->ro_init_mr(ia, mr); 969 rc = frwr_init_mr(ia, mr);
1038 if (rc) { 970 if (rc) {
1039 kfree(mr); 971 kfree(mr);
1040 break; 972 break;
@@ -1089,9 +1021,9 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
1089 req->rl_buffer = buffer; 1021 req->rl_buffer = buffer;
1090 INIT_LIST_HEAD(&req->rl_registered); 1022 INIT_LIST_HEAD(&req->rl_registered);
1091 1023
1092 spin_lock(&buffer->rb_reqslock); 1024 spin_lock(&buffer->rb_lock);
1093 list_add(&req->rl_all, &buffer->rb_allreqs); 1025 list_add(&req->rl_all, &buffer->rb_allreqs);
1094 spin_unlock(&buffer->rb_reqslock); 1026 spin_unlock(&buffer->rb_lock);
1095 return req; 1027 return req;
1096} 1028}
1097 1029
@@ -1134,8 +1066,6 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp)
1134out_free: 1066out_free:
1135 kfree(rep); 1067 kfree(rep);
1136out: 1068out:
1137 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1138 __func__, rc);
1139 return rc; 1069 return rc;
1140} 1070}
1141 1071
@@ -1159,7 +1089,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1159 1089
1160 INIT_LIST_HEAD(&buf->rb_send_bufs); 1090 INIT_LIST_HEAD(&buf->rb_send_bufs);
1161 INIT_LIST_HEAD(&buf->rb_allreqs); 1091 INIT_LIST_HEAD(&buf->rb_allreqs);
1162 spin_lock_init(&buf->rb_reqslock);
1163 for (i = 0; i < buf->rb_max_requests; i++) { 1092 for (i = 0; i < buf->rb_max_requests; i++) {
1164 struct rpcrdma_req *req; 1093 struct rpcrdma_req *req;
1165 1094
@@ -1174,13 +1103,19 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1174 } 1103 }
1175 1104
1176 buf->rb_credits = 1; 1105 buf->rb_credits = 1;
1177 buf->rb_posted_receives = 0;
1178 INIT_LIST_HEAD(&buf->rb_recv_bufs); 1106 INIT_LIST_HEAD(&buf->rb_recv_bufs);
1179 1107
1180 rc = rpcrdma_sendctxs_create(r_xprt); 1108 rc = rpcrdma_sendctxs_create(r_xprt);
1181 if (rc) 1109 if (rc)
1182 goto out; 1110 goto out;
1183 1111
1112 buf->rb_completion_wq = alloc_workqueue("rpcrdma-%s",
1113 WQ_MEM_RECLAIM | WQ_HIGHPRI,
1114 0,
1115 r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR]);
1116 if (!buf->rb_completion_wq)
1117 goto out;
1118
1184 return 0; 1119 return 0;
1185out: 1120out:
1186 rpcrdma_buffer_destroy(buf); 1121 rpcrdma_buffer_destroy(buf);
@@ -1194,9 +1129,18 @@ rpcrdma_destroy_rep(struct rpcrdma_rep *rep)
1194 kfree(rep); 1129 kfree(rep);
1195} 1130}
1196 1131
1132/**
1133 * rpcrdma_req_destroy - Destroy an rpcrdma_req object
1134 * @req: unused object to be destroyed
1135 *
1136 * This function assumes that the caller prevents concurrent device
1137 * unload and transport tear-down.
1138 */
1197void 1139void
1198rpcrdma_destroy_req(struct rpcrdma_req *req) 1140rpcrdma_req_destroy(struct rpcrdma_req *req)
1199{ 1141{
1142 list_del(&req->rl_all);
1143
1200 rpcrdma_free_regbuf(req->rl_recvbuf); 1144 rpcrdma_free_regbuf(req->rl_recvbuf);
1201 rpcrdma_free_regbuf(req->rl_sendbuf); 1145 rpcrdma_free_regbuf(req->rl_sendbuf);
1202 rpcrdma_free_regbuf(req->rl_rdmabuf); 1146 rpcrdma_free_regbuf(req->rl_rdmabuf);
@@ -1208,7 +1152,6 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
1208{ 1152{
1209 struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, 1153 struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
1210 rx_buf); 1154 rx_buf);
1211 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1212 struct rpcrdma_mr *mr; 1155 struct rpcrdma_mr *mr;
1213 unsigned int count; 1156 unsigned int count;
1214 1157
@@ -1224,7 +1167,7 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
1224 if (!list_empty(&mr->mr_list)) 1167 if (!list_empty(&mr->mr_list))
1225 list_del(&mr->mr_list); 1168 list_del(&mr->mr_list);
1226 1169
1227 ia->ri_ops->ro_release_mr(mr); 1170 frwr_release_mr(mr);
1228 count++; 1171 count++;
1229 spin_lock(&buf->rb_mrlock); 1172 spin_lock(&buf->rb_mrlock);
1230 } 1173 }
@@ -1234,11 +1177,24 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
1234 dprintk("RPC: %s: released %u MRs\n", __func__, count); 1177 dprintk("RPC: %s: released %u MRs\n", __func__, count);
1235} 1178}
1236 1179
1180/**
1181 * rpcrdma_buffer_destroy - Release all hw resources
1182 * @buf: root control block for resources
1183 *
1184 * ORDERING: relies on a prior ib_drain_qp :
1185 * - No more Send or Receive completions can occur
1186 * - All MRs, reps, and reqs are returned to their free lists
1187 */
1237void 1188void
1238rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) 1189rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1239{ 1190{
1240 cancel_delayed_work_sync(&buf->rb_refresh_worker); 1191 cancel_delayed_work_sync(&buf->rb_refresh_worker);
1241 1192
1193 if (buf->rb_completion_wq) {
1194 destroy_workqueue(buf->rb_completion_wq);
1195 buf->rb_completion_wq = NULL;
1196 }
1197
1242 rpcrdma_sendctxs_destroy(buf); 1198 rpcrdma_sendctxs_destroy(buf);
1243 1199
1244 while (!list_empty(&buf->rb_recv_bufs)) { 1200 while (!list_empty(&buf->rb_recv_bufs)) {
@@ -1250,19 +1206,14 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1250 rpcrdma_destroy_rep(rep); 1206 rpcrdma_destroy_rep(rep);
1251 } 1207 }
1252 1208
1253 spin_lock(&buf->rb_reqslock); 1209 while (!list_empty(&buf->rb_send_bufs)) {
1254 while (!list_empty(&buf->rb_allreqs)) {
1255 struct rpcrdma_req *req; 1210 struct rpcrdma_req *req;
1256 1211
1257 req = list_first_entry(&buf->rb_allreqs, 1212 req = list_first_entry(&buf->rb_send_bufs,
1258 struct rpcrdma_req, rl_all); 1213 struct rpcrdma_req, rl_list);
1259 list_del(&req->rl_all); 1214 list_del(&req->rl_list);
1260 1215 rpcrdma_req_destroy(req);
1261 spin_unlock(&buf->rb_reqslock);
1262 rpcrdma_destroy_req(req);
1263 spin_lock(&buf->rb_reqslock);
1264 } 1216 }
1265 spin_unlock(&buf->rb_reqslock);
1266 1217
1267 rpcrdma_mrs_destroy(buf); 1218 rpcrdma_mrs_destroy(buf);
1268} 1219}
@@ -1329,9 +1280,12 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
1329{ 1280{
1330 struct rpcrdma_xprt *r_xprt = mr->mr_xprt; 1281 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
1331 1282
1332 trace_xprtrdma_mr_unmap(mr); 1283 if (mr->mr_dir != DMA_NONE) {
1333 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, 1284 trace_xprtrdma_mr_unmap(mr);
1334 mr->mr_sg, mr->mr_nents, mr->mr_dir); 1285 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
1286 mr->mr_sg, mr->mr_nents, mr->mr_dir);
1287 mr->mr_dir = DMA_NONE;
1288 }
1335 __rpcrdma_mr_put(&r_xprt->rx_buf, mr); 1289 __rpcrdma_mr_put(&r_xprt->rx_buf, mr);
1336} 1290}
1337 1291
@@ -1410,7 +1364,7 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1410 * 1364 *
1411 * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for 1365 * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
1412 * receiving the payload of RDMA RECV operations. During Long Calls 1366 * receiving the payload of RDMA RECV operations. During Long Calls
1413 * or Replies they may be registered externally via ro_map. 1367 * or Replies they may be registered externally via frwr_map.
1414 */ 1368 */
1415struct rpcrdma_regbuf * 1369struct rpcrdma_regbuf *
1416rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction, 1370rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction,
@@ -1446,8 +1400,10 @@ __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
1446 (void *)rb->rg_base, 1400 (void *)rb->rg_base,
1447 rdmab_length(rb), 1401 rdmab_length(rb),
1448 rb->rg_direction); 1402 rb->rg_direction);
1449 if (ib_dma_mapping_error(device, rdmab_addr(rb))) 1403 if (ib_dma_mapping_error(device, rdmab_addr(rb))) {
1404 trace_xprtrdma_dma_maperr(rdmab_addr(rb));
1450 return false; 1405 return false;
1406 }
1451 1407
1452 rb->rg_device = device; 1408 rb->rg_device = device;
1453 rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey; 1409 rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey;
@@ -1479,10 +1435,14 @@ rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb)
1479 kfree(rb); 1435 kfree(rb);
1480} 1436}
1481 1437
1482/* 1438/**
1483 * Prepost any receive buffer, then post send. 1439 * rpcrdma_ep_post - Post WRs to a transport's Send Queue
1440 * @ia: transport's device information
1441 * @ep: transport's RDMA endpoint information
1442 * @req: rpcrdma_req containing the Send WR to post
1484 * 1443 *
1485 * Receive buffer is donated to hardware, reclaimed upon recv completion. 1444 * Returns 0 if the post was successful, otherwise -ENOTCONN
1445 * is returned.
1486 */ 1446 */
1487int 1447int
1488rpcrdma_ep_post(struct rpcrdma_ia *ia, 1448rpcrdma_ep_post(struct rpcrdma_ia *ia,
@@ -1501,32 +1461,27 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
1501 --ep->rep_send_count; 1461 --ep->rep_send_count;
1502 } 1462 }
1503 1463
1504 rc = ia->ri_ops->ro_send(ia, req); 1464 rc = frwr_send(ia, req);
1505 trace_xprtrdma_post_send(req, rc); 1465 trace_xprtrdma_post_send(req, rc);
1506 if (rc) 1466 if (rc)
1507 return -ENOTCONN; 1467 return -ENOTCONN;
1508 return 0; 1468 return 0;
1509} 1469}
1510 1470
1511/** 1471static void
1512 * rpcrdma_post_recvs - Maybe post some Receive buffers
1513 * @r_xprt: controlling transport
1514 * @temp: when true, allocate temp rpcrdma_rep objects
1515 *
1516 */
1517void
1518rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) 1472rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
1519{ 1473{
1520 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1474 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1475 struct rpcrdma_ep *ep = &r_xprt->rx_ep;
1521 struct ib_recv_wr *wr, *bad_wr; 1476 struct ib_recv_wr *wr, *bad_wr;
1522 int needed, count, rc; 1477 int needed, count, rc;
1523 1478
1524 rc = 0; 1479 rc = 0;
1525 count = 0; 1480 count = 0;
1526 needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1); 1481 needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
1527 if (buf->rb_posted_receives > needed) 1482 if (ep->rep_receive_count > needed)
1528 goto out; 1483 goto out;
1529 needed -= buf->rb_posted_receives; 1484 needed -= ep->rep_receive_count;
1530 1485
1531 count = 0; 1486 count = 0;
1532 wr = NULL; 1487 wr = NULL;
@@ -1574,7 +1529,7 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
1574 --count; 1529 --count;
1575 } 1530 }
1576 } 1531 }
1577 buf->rb_posted_receives += count; 1532 ep->rep_receive_count += count;
1578out: 1533out:
1579 trace_xprtrdma_post_recvs(r_xprt, count, rc); 1534 trace_xprtrdma_post_recvs(r_xprt, count, rc);
1580} 1535}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 9218dbebedce..5a18472f2c9c 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -66,7 +66,6 @@
66 * Interface Adapter -- one per transport instance 66 * Interface Adapter -- one per transport instance
67 */ 67 */
68struct rpcrdma_ia { 68struct rpcrdma_ia {
69 const struct rpcrdma_memreg_ops *ri_ops;
70 struct ib_device *ri_device; 69 struct ib_device *ri_device;
71 struct rdma_cm_id *ri_id; 70 struct rdma_cm_id *ri_id;
72 struct ib_pd *ri_pd; 71 struct ib_pd *ri_pd;
@@ -81,8 +80,6 @@ struct rpcrdma_ia {
81 bool ri_implicit_roundup; 80 bool ri_implicit_roundup;
82 enum ib_mr_type ri_mrtype; 81 enum ib_mr_type ri_mrtype;
83 unsigned long ri_flags; 82 unsigned long ri_flags;
84 struct ib_qp_attr ri_qp_attr;
85 struct ib_qp_init_attr ri_qp_init_attr;
86}; 83};
87 84
88enum { 85enum {
@@ -101,7 +98,7 @@ struct rpcrdma_ep {
101 wait_queue_head_t rep_connect_wait; 98 wait_queue_head_t rep_connect_wait;
102 struct rpcrdma_connect_private rep_cm_private; 99 struct rpcrdma_connect_private rep_cm_private;
103 struct rdma_conn_param rep_remote_cma; 100 struct rdma_conn_param rep_remote_cma;
104 struct delayed_work rep_disconnect_worker; 101 int rep_receive_count;
105}; 102};
106 103
107/* Pre-allocate extra Work Requests for handling backward receives 104/* Pre-allocate extra Work Requests for handling backward receives
@@ -262,20 +259,12 @@ struct rpcrdma_frwr {
262 }; 259 };
263}; 260};
264 261
265struct rpcrdma_fmr {
266 struct ib_fmr *fm_mr;
267 u64 *fm_physaddrs;
268};
269
270struct rpcrdma_mr { 262struct rpcrdma_mr {
271 struct list_head mr_list; 263 struct list_head mr_list;
272 struct scatterlist *mr_sg; 264 struct scatterlist *mr_sg;
273 int mr_nents; 265 int mr_nents;
274 enum dma_data_direction mr_dir; 266 enum dma_data_direction mr_dir;
275 union { 267 struct rpcrdma_frwr frwr;
276 struct rpcrdma_fmr fmr;
277 struct rpcrdma_frwr frwr;
278 };
279 struct rpcrdma_xprt *mr_xprt; 268 struct rpcrdma_xprt *mr_xprt;
280 u32 mr_handle; 269 u32 mr_handle;
281 u32 mr_length; 270 u32 mr_length;
@@ -401,20 +390,18 @@ struct rpcrdma_buffer {
401 spinlock_t rb_lock; /* protect buf lists */ 390 spinlock_t rb_lock; /* protect buf lists */
402 struct list_head rb_send_bufs; 391 struct list_head rb_send_bufs;
403 struct list_head rb_recv_bufs; 392 struct list_head rb_recv_bufs;
393 struct list_head rb_allreqs;
394
404 unsigned long rb_flags; 395 unsigned long rb_flags;
405 u32 rb_max_requests; 396 u32 rb_max_requests;
406 u32 rb_credits; /* most recent credit grant */ 397 u32 rb_credits; /* most recent credit grant */
407 int rb_posted_receives;
408 398
409 u32 rb_bc_srv_max_requests; 399 u32 rb_bc_srv_max_requests;
410 spinlock_t rb_reqslock; /* protect rb_allreqs */
411 struct list_head rb_allreqs;
412
413 u32 rb_bc_max_requests; 400 u32 rb_bc_max_requests;
414 401
402 struct workqueue_struct *rb_completion_wq;
415 struct delayed_work rb_refresh_worker; 403 struct delayed_work rb_refresh_worker;
416}; 404};
417#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
418 405
419/* rb_flags */ 406/* rb_flags */
420enum { 407enum {
@@ -465,35 +452,6 @@ struct rpcrdma_stats {
465}; 452};
466 453
467/* 454/*
468 * Per-registration mode operations
469 */
470struct rpcrdma_xprt;
471struct rpcrdma_memreg_ops {
472 struct rpcrdma_mr_seg *
473 (*ro_map)(struct rpcrdma_xprt *,
474 struct rpcrdma_mr_seg *, int, bool,
475 struct rpcrdma_mr **);
476 int (*ro_send)(struct rpcrdma_ia *ia,
477 struct rpcrdma_req *req);
478 void (*ro_reminv)(struct rpcrdma_rep *rep,
479 struct list_head *mrs);
480 void (*ro_unmap_sync)(struct rpcrdma_xprt *,
481 struct list_head *);
482 int (*ro_open)(struct rpcrdma_ia *,
483 struct rpcrdma_ep *,
484 struct rpcrdma_create_data_internal *);
485 size_t (*ro_maxpages)(struct rpcrdma_xprt *);
486 int (*ro_init_mr)(struct rpcrdma_ia *,
487 struct rpcrdma_mr *);
488 void (*ro_release_mr)(struct rpcrdma_mr *mr);
489 const char *ro_displayname;
490 const int ro_send_w_inv_ok;
491};
492
493extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops;
494extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops;
495
496/*
497 * RPCRDMA transport -- encapsulates the structures above for 455 * RPCRDMA transport -- encapsulates the structures above for
498 * integration with RPC. 456 * integration with RPC.
499 * 457 *
@@ -544,10 +502,6 @@ extern unsigned int xprt_rdma_memreg_strategy;
544int rpcrdma_ia_open(struct rpcrdma_xprt *xprt); 502int rpcrdma_ia_open(struct rpcrdma_xprt *xprt);
545void rpcrdma_ia_remove(struct rpcrdma_ia *ia); 503void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
546void rpcrdma_ia_close(struct rpcrdma_ia *); 504void rpcrdma_ia_close(struct rpcrdma_ia *);
547bool frwr_is_supported(struct rpcrdma_ia *);
548bool fmr_is_supported(struct rpcrdma_ia *);
549
550extern struct workqueue_struct *rpcrdma_receive_wq;
551 505
552/* 506/*
553 * Endpoint calls - xprtrdma/verbs.c 507 * Endpoint calls - xprtrdma/verbs.c
@@ -560,13 +514,12 @@ void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
560 514
561int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, 515int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
562 struct rpcrdma_req *); 516 struct rpcrdma_req *);
563void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
564 517
565/* 518/*
566 * Buffer calls - xprtrdma/verbs.c 519 * Buffer calls - xprtrdma/verbs.c
567 */ 520 */
568struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); 521struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
569void rpcrdma_destroy_req(struct rpcrdma_req *); 522void rpcrdma_req_destroy(struct rpcrdma_req *req);
570int rpcrdma_buffer_create(struct rpcrdma_xprt *); 523int rpcrdma_buffer_create(struct rpcrdma_xprt *);
571void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); 524void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
572struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); 525struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
@@ -604,9 +557,6 @@ rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
604 return __rpcrdma_dma_map_regbuf(ia, rb); 557 return __rpcrdma_dma_map_regbuf(ia, rb);
605} 558}
606 559
607int rpcrdma_alloc_wq(void);
608void rpcrdma_destroy_wq(void);
609
610/* 560/*
611 * Wrappers for chunk registration, shared by read/write chunk code. 561 * Wrappers for chunk registration, shared by read/write chunk code.
612 */ 562 */
@@ -617,6 +567,23 @@ rpcrdma_data_dir(bool writing)
617 return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; 567 return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
618} 568}
619 569
570/* Memory registration calls xprtrdma/frwr_ops.c
571 */
572bool frwr_is_supported(struct rpcrdma_ia *);
573int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
574 struct rpcrdma_create_data_internal *cdata);
575int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr);
576void frwr_release_mr(struct rpcrdma_mr *mr);
577size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt);
578struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
579 struct rpcrdma_mr_seg *seg,
580 int nsegs, bool writing, u32 xid,
581 struct rpcrdma_mr **mr);
582int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req);
583void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs);
584void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt,
585 struct list_head *mrs);
586
620/* 587/*
621 * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c 588 * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
622 */ 589 */
@@ -653,6 +620,7 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
653extern unsigned int xprt_rdma_max_inline_read; 620extern unsigned int xprt_rdma_max_inline_read;
654void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); 621void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
655void xprt_rdma_free_addresses(struct rpc_xprt *xprt); 622void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
623void xprt_rdma_close(struct rpc_xprt *xprt);
656void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq); 624void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
657int xprt_rdma_init(void); 625int xprt_rdma_init(void);
658void xprt_rdma_cleanup(void); 626void xprt_rdma_cleanup(void);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 44467caf3cd8..13559e6a460b 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -68,8 +68,6 @@ static unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE;
68static unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; 68static unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
69static unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; 69static unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
70 70
71#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
72
73#define XS_TCP_LINGER_TO (15U * HZ) 71#define XS_TCP_LINGER_TO (15U * HZ)
74static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; 72static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
75 73
@@ -159,8 +157,6 @@ static struct ctl_table sunrpc_table[] = {
159 { }, 157 { },
160}; 158};
161 159
162#endif
163
164/* 160/*
165 * Wait duration for a reply from the RPC portmapper. 161 * Wait duration for a reply from the RPC portmapper.
166 */ 162 */
@@ -1589,6 +1585,7 @@ static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t
1589 1585
1590/** 1586/**
1591 * xs_udp_timer - called when a retransmit timeout occurs on a UDP transport 1587 * xs_udp_timer - called when a retransmit timeout occurs on a UDP transport
1588 * @xprt: controlling transport
1592 * @task: task that timed out 1589 * @task: task that timed out
1593 * 1590 *
1594 * Adjust the congestion window after a retransmit timeout has occurred. 1591 * Adjust the congestion window after a retransmit timeout has occurred.
@@ -2246,6 +2243,7 @@ out:
2246 2243
2247/** 2244/**
2248 * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint 2245 * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint
2246 * @work: queued work item
2249 * 2247 *
2250 * Invoked by a work queue tasklet. 2248 * Invoked by a work queue tasklet.
2251 */ 2249 */
@@ -3095,10 +3093,8 @@ static struct xprt_class xs_bc_tcp_transport = {
3095 */ 3093 */
3096int init_socket_xprt(void) 3094int init_socket_xprt(void)
3097{ 3095{
3098#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
3099 if (!sunrpc_table_header) 3096 if (!sunrpc_table_header)
3100 sunrpc_table_header = register_sysctl_table(sunrpc_table); 3097 sunrpc_table_header = register_sysctl_table(sunrpc_table);
3101#endif
3102 3098
3103 xprt_register_transport(&xs_local_transport); 3099 xprt_register_transport(&xs_local_transport);
3104 xprt_register_transport(&xs_udp_transport); 3100 xprt_register_transport(&xs_udp_transport);
@@ -3114,12 +3110,10 @@ int init_socket_xprt(void)
3114 */ 3110 */
3115void cleanup_socket_xprt(void) 3111void cleanup_socket_xprt(void)
3116{ 3112{
3117#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
3118 if (sunrpc_table_header) { 3113 if (sunrpc_table_header) {
3119 unregister_sysctl_table(sunrpc_table_header); 3114 unregister_sysctl_table(sunrpc_table_header);
3120 sunrpc_table_header = NULL; 3115 sunrpc_table_header = NULL;
3121 } 3116 }
3122#endif
3123 3117
3124 xprt_unregister_transport(&xs_local_transport); 3118 xprt_unregister_transport(&xs_local_transport);
3125 xprt_unregister_transport(&xs_udp_transport); 3119 xprt_unregister_transport(&xs_udp_transport);