aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTrond Myklebust <trondmy@gmail.com>2018-01-23 14:55:50 -0500
committerTrond Myklebust <trondmy@gmail.com>2018-01-23 14:55:50 -0500
commit8f39fce84a6765b62cbc30fd9c3fdaa5bdf46ded (patch)
tree9cd8d4725586a3225e57c54b076cc077e6bbc410
parent0be283f676a1e7b208db0c992283197ef8b52158 (diff)
parent21ead9ff3dc72604d89499a1da5a18cc193ec4ff (diff)
Merge tag 'nfs-rdma-for-4.16-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
NFS-over-RDMA client updates for Linux 4.16 New features: - xprtrdma tracepoints Bugfixes and cleanups: - Fix memory leak if rpcrdma_buffer_create() fails - Fix allocating extra rpcrdma_reps for the backchannel - Remove various unused and redundant variables and lock cycles - Fix IPv6 support in xprt_rdma_set_port() - Fix memory leak by calling buf_free for callback replies - Fix "bytes registered" accounting - Fix kernel-doc comments - SUNRPC tracepoint cleanups for consistent information - Optimizations for __rpc_execute()
-rw-r--r--include/linux/sunrpc/xprtrdma.h2
-rw-r--r--include/trace/events/rdma.h129
-rw-r--r--include/trace/events/rpcrdma.h890
-rw-r--r--include/trace/events/sunrpc.h8
-rw-r--r--net/sunrpc/sched.c26
-rw-r--r--net/sunrpc/xprt.c2
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c78
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c157
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c329
-rw-r--r--net/sunrpc/xprtrdma/module.c12
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c162
-rw-r--r--net/sunrpc/xprtrdma/transport.c128
-rw-r--r--net/sunrpc/xprtrdma/verbs.c280
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h116
14 files changed, 1662 insertions, 657 deletions
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h
index 221b7a2e5406..5859563e3c1f 100644
--- a/include/linux/sunrpc/xprtrdma.h
+++ b/include/linux/sunrpc/xprtrdma.h
@@ -64,7 +64,7 @@ enum rpcrdma_memreg {
64 RPCRDMA_MEMWINDOWS, 64 RPCRDMA_MEMWINDOWS,
65 RPCRDMA_MEMWINDOWS_ASYNC, 65 RPCRDMA_MEMWINDOWS_ASYNC,
66 RPCRDMA_MTHCAFMR, 66 RPCRDMA_MTHCAFMR,
67 RPCRDMA_FRMR, 67 RPCRDMA_FRWR,
68 RPCRDMA_ALLPHYSICAL, 68 RPCRDMA_ALLPHYSICAL,
69 RPCRDMA_LAST 69 RPCRDMA_LAST
70}; 70};
diff --git a/include/trace/events/rdma.h b/include/trace/events/rdma.h
new file mode 100644
index 000000000000..aa19afc73a4e
--- /dev/null
+++ b/include/trace/events/rdma.h
@@ -0,0 +1,129 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (c) 2017 Oracle. All rights reserved.
4 */
5
6/*
7 * enum ib_event_type, from include/rdma/ib_verbs.h
8 */
9
10#define IB_EVENT_LIST \
11 ib_event(CQ_ERR) \
12 ib_event(QP_FATAL) \
13 ib_event(QP_REQ_ERR) \
14 ib_event(QP_ACCESS_ERR) \
15 ib_event(COMM_EST) \
16 ib_event(SQ_DRAINED) \
17 ib_event(PATH_MIG) \
18 ib_event(PATH_MIG_ERR) \
19 ib_event(DEVICE_FATAL) \
20 ib_event(PORT_ACTIVE) \
21 ib_event(PORT_ERR) \
22 ib_event(LID_CHANGE) \
23 ib_event(PKEY_CHANGE) \
24 ib_event(SM_CHANGE) \
25 ib_event(SRQ_ERR) \
26 ib_event(SRQ_LIMIT_REACHED) \
27 ib_event(QP_LAST_WQE_REACHED) \
28 ib_event(CLIENT_REREGISTER) \
29 ib_event(GID_CHANGE) \
30 ib_event_end(WQ_FATAL)
31
32#undef ib_event
33#undef ib_event_end
34
35#define ib_event(x) TRACE_DEFINE_ENUM(IB_EVENT_##x);
36#define ib_event_end(x) TRACE_DEFINE_ENUM(IB_EVENT_##x);
37
38IB_EVENT_LIST
39
40#undef ib_event
41#undef ib_event_end
42
43#define ib_event(x) { IB_EVENT_##x, #x },
44#define ib_event_end(x) { IB_EVENT_##x, #x }
45
46#define rdma_show_ib_event(x) \
47 __print_symbolic(x, IB_EVENT_LIST)
48
49/*
50 * enum ib_wc_status type, from include/rdma/ib_verbs.h
51 */
52#define IB_WC_STATUS_LIST \
53 ib_wc_status(SUCCESS) \
54 ib_wc_status(LOC_LEN_ERR) \
55 ib_wc_status(LOC_QP_OP_ERR) \
56 ib_wc_status(LOC_EEC_OP_ERR) \
57 ib_wc_status(LOC_PROT_ERR) \
58 ib_wc_status(WR_FLUSH_ERR) \
59 ib_wc_status(MW_BIND_ERR) \
60 ib_wc_status(BAD_RESP_ERR) \
61 ib_wc_status(LOC_ACCESS_ERR) \
62 ib_wc_status(REM_INV_REQ_ERR) \
63 ib_wc_status(REM_ACCESS_ERR) \
64 ib_wc_status(REM_OP_ERR) \
65 ib_wc_status(RETRY_EXC_ERR) \
66 ib_wc_status(RNR_RETRY_EXC_ERR) \
67 ib_wc_status(LOC_RDD_VIOL_ERR) \
68 ib_wc_status(REM_INV_RD_REQ_ERR) \
69 ib_wc_status(REM_ABORT_ERR) \
70 ib_wc_status(INV_EECN_ERR) \
71 ib_wc_status(INV_EEC_STATE_ERR) \
72 ib_wc_status(FATAL_ERR) \
73 ib_wc_status(RESP_TIMEOUT_ERR) \
74 ib_wc_status_end(GENERAL_ERR)
75
76#undef ib_wc_status
77#undef ib_wc_status_end
78
79#define ib_wc_status(x) TRACE_DEFINE_ENUM(IB_WC_##x);
80#define ib_wc_status_end(x) TRACE_DEFINE_ENUM(IB_WC_##x);
81
82IB_WC_STATUS_LIST
83
84#undef ib_wc_status
85#undef ib_wc_status_end
86
87#define ib_wc_status(x) { IB_WC_##x, #x },
88#define ib_wc_status_end(x) { IB_WC_##x, #x }
89
90#define rdma_show_wc_status(x) \
91 __print_symbolic(x, IB_WC_STATUS_LIST)
92
93/*
94 * enum rdma_cm_event_type, from include/rdma/rdma_cm.h
95 */
96#define RDMA_CM_EVENT_LIST \
97 rdma_cm_event(ADDR_RESOLVED) \
98 rdma_cm_event(ADDR_ERROR) \
99 rdma_cm_event(ROUTE_RESOLVED) \
100 rdma_cm_event(ROUTE_ERROR) \
101 rdma_cm_event(CONNECT_REQUEST) \
102 rdma_cm_event(CONNECT_RESPONSE) \
103 rdma_cm_event(CONNECT_ERROR) \
104 rdma_cm_event(UNREACHABLE) \
105 rdma_cm_event(REJECTED) \
106 rdma_cm_event(ESTABLISHED) \
107 rdma_cm_event(DISCONNECTED) \
108 rdma_cm_event(DEVICE_REMOVAL) \
109 rdma_cm_event(MULTICAST_JOIN) \
110 rdma_cm_event(MULTICAST_ERROR) \
111 rdma_cm_event(ADDR_CHANGE) \
112 rdma_cm_event_end(TIMEWAIT_EXIT)
113
114#undef rdma_cm_event
115#undef rdma_cm_event_end
116
117#define rdma_cm_event(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x);
118#define rdma_cm_event_end(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x);
119
120RDMA_CM_EVENT_LIST
121
122#undef rdma_cm_event
123#undef rdma_cm_event_end
124
125#define rdma_cm_event(x) { RDMA_CM_EVENT_##x, #x },
126#define rdma_cm_event_end(x) { RDMA_CM_EVENT_##x, #x }
127
128#define rdma_show_cm_event(x) \
129 __print_symbolic(x, RDMA_CM_EVENT_LIST)
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
new file mode 100644
index 000000000000..50ed3f8bf534
--- /dev/null
+++ b/include/trace/events/rpcrdma.h
@@ -0,0 +1,890 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (c) 2017 Oracle. All rights reserved.
4 */
5#undef TRACE_SYSTEM
6#define TRACE_SYSTEM rpcrdma
7
8#if !defined(_TRACE_RPCRDMA_H) || defined(TRACE_HEADER_MULTI_READ)
9#define _TRACE_RPCRDMA_H
10
11#include <linux/tracepoint.h>
12#include <trace/events/rdma.h>
13
14/**
15 ** Event classes
16 **/
17
18DECLARE_EVENT_CLASS(xprtrdma_reply_event,
19 TP_PROTO(
20 const struct rpcrdma_rep *rep
21 ),
22
23 TP_ARGS(rep),
24
25 TP_STRUCT__entry(
26 __field(const void *, rep)
27 __field(const void *, r_xprt)
28 __field(u32, xid)
29 __field(u32, version)
30 __field(u32, proc)
31 ),
32
33 TP_fast_assign(
34 __entry->rep = rep;
35 __entry->r_xprt = rep->rr_rxprt;
36 __entry->xid = be32_to_cpu(rep->rr_xid);
37 __entry->version = be32_to_cpu(rep->rr_vers);
38 __entry->proc = be32_to_cpu(rep->rr_proc);
39 ),
40
41 TP_printk("rxprt %p xid=0x%08x rep=%p: version %u proc %u",
42 __entry->r_xprt, __entry->xid, __entry->rep,
43 __entry->version, __entry->proc
44 )
45);
46
47#define DEFINE_REPLY_EVENT(name) \
48 DEFINE_EVENT(xprtrdma_reply_event, name, \
49 TP_PROTO( \
50 const struct rpcrdma_rep *rep \
51 ), \
52 TP_ARGS(rep))
53
54DECLARE_EVENT_CLASS(xprtrdma_rxprt,
55 TP_PROTO(
56 const struct rpcrdma_xprt *r_xprt
57 ),
58
59 TP_ARGS(r_xprt),
60
61 TP_STRUCT__entry(
62 __field(const void *, r_xprt)
63 __string(addr, rpcrdma_addrstr(r_xprt))
64 __string(port, rpcrdma_portstr(r_xprt))
65 ),
66
67 TP_fast_assign(
68 __entry->r_xprt = r_xprt;
69 __assign_str(addr, rpcrdma_addrstr(r_xprt));
70 __assign_str(port, rpcrdma_portstr(r_xprt));
71 ),
72
73 TP_printk("peer=[%s]:%s r_xprt=%p",
74 __get_str(addr), __get_str(port), __entry->r_xprt
75 )
76);
77
78#define DEFINE_RXPRT_EVENT(name) \
79 DEFINE_EVENT(xprtrdma_rxprt, name, \
80 TP_PROTO( \
81 const struct rpcrdma_xprt *r_xprt \
82 ), \
83 TP_ARGS(r_xprt))
84
85DECLARE_EVENT_CLASS(xprtrdma_rdch_event,
86 TP_PROTO(
87 const struct rpc_task *task,
88 unsigned int pos,
89 struct rpcrdma_mr *mr,
90 int nsegs
91 ),
92
93 TP_ARGS(task, pos, mr, nsegs),
94
95 TP_STRUCT__entry(
96 __field(unsigned int, task_id)
97 __field(unsigned int, client_id)
98 __field(const void *, mr)
99 __field(unsigned int, pos)
100 __field(int, nents)
101 __field(u32, handle)
102 __field(u32, length)
103 __field(u64, offset)
104 __field(int, nsegs)
105 ),
106
107 TP_fast_assign(
108 __entry->task_id = task->tk_pid;
109 __entry->client_id = task->tk_client->cl_clid;
110 __entry->mr = mr;
111 __entry->pos = pos;
112 __entry->nents = mr->mr_nents;
113 __entry->handle = mr->mr_handle;
114 __entry->length = mr->mr_length;
115 __entry->offset = mr->mr_offset;
116 __entry->nsegs = nsegs;
117 ),
118
119 TP_printk("task:%u@%u mr=%p pos=%u %u@0x%016llx:0x%08x (%s)",
120 __entry->task_id, __entry->client_id, __entry->mr,
121 __entry->pos, __entry->length,
122 (unsigned long long)__entry->offset, __entry->handle,
123 __entry->nents < __entry->nsegs ? "more" : "last"
124 )
125);
126
127#define DEFINE_RDCH_EVENT(name) \
128 DEFINE_EVENT(xprtrdma_rdch_event, name, \
129 TP_PROTO( \
130 const struct rpc_task *task, \
131 unsigned int pos, \
132 struct rpcrdma_mr *mr, \
133 int nsegs \
134 ), \
135 TP_ARGS(task, pos, mr, nsegs))
136
137DECLARE_EVENT_CLASS(xprtrdma_wrch_event,
138 TP_PROTO(
139 const struct rpc_task *task,
140 struct rpcrdma_mr *mr,
141 int nsegs
142 ),
143
144 TP_ARGS(task, mr, nsegs),
145
146 TP_STRUCT__entry(
147 __field(unsigned int, task_id)
148 __field(unsigned int, client_id)
149 __field(const void *, mr)
150 __field(int, nents)
151 __field(u32, handle)
152 __field(u32, length)
153 __field(u64, offset)
154 __field(int, nsegs)
155 ),
156
157 TP_fast_assign(
158 __entry->task_id = task->tk_pid;
159 __entry->client_id = task->tk_client->cl_clid;
160 __entry->mr = mr;
161 __entry->nents = mr->mr_nents;
162 __entry->handle = mr->mr_handle;
163 __entry->length = mr->mr_length;
164 __entry->offset = mr->mr_offset;
165 __entry->nsegs = nsegs;
166 ),
167
168 TP_printk("task:%u@%u mr=%p %u@0x%016llx:0x%08x (%s)",
169 __entry->task_id, __entry->client_id, __entry->mr,
170 __entry->length, (unsigned long long)__entry->offset,
171 __entry->handle,
172 __entry->nents < __entry->nsegs ? "more" : "last"
173 )
174);
175
176#define DEFINE_WRCH_EVENT(name) \
177 DEFINE_EVENT(xprtrdma_wrch_event, name, \
178 TP_PROTO( \
179 const struct rpc_task *task, \
180 struct rpcrdma_mr *mr, \
181 int nsegs \
182 ), \
183 TP_ARGS(task, mr, nsegs))
184
185TRACE_DEFINE_ENUM(FRWR_IS_INVALID);
186TRACE_DEFINE_ENUM(FRWR_IS_VALID);
187TRACE_DEFINE_ENUM(FRWR_FLUSHED_FR);
188TRACE_DEFINE_ENUM(FRWR_FLUSHED_LI);
189
190#define xprtrdma_show_frwr_state(x) \
191 __print_symbolic(x, \
192 { FRWR_IS_INVALID, "INVALID" }, \
193 { FRWR_IS_VALID, "VALID" }, \
194 { FRWR_FLUSHED_FR, "FLUSHED_FR" }, \
195 { FRWR_FLUSHED_LI, "FLUSHED_LI" })
196
197DECLARE_EVENT_CLASS(xprtrdma_frwr_done,
198 TP_PROTO(
199 const struct ib_wc *wc,
200 const struct rpcrdma_frwr *frwr
201 ),
202
203 TP_ARGS(wc, frwr),
204
205 TP_STRUCT__entry(
206 __field(const void *, mr)
207 __field(unsigned int, state)
208 __field(unsigned int, status)
209 __field(unsigned int, vendor_err)
210 ),
211
212 TP_fast_assign(
213 __entry->mr = container_of(frwr, struct rpcrdma_mr, frwr);
214 __entry->state = frwr->fr_state;
215 __entry->status = wc->status;
216 __entry->vendor_err = __entry->status ? wc->vendor_err : 0;
217 ),
218
219 TP_printk(
220 "mr=%p state=%s: %s (%u/0x%x)",
221 __entry->mr, xprtrdma_show_frwr_state(__entry->state),
222 rdma_show_wc_status(__entry->status),
223 __entry->status, __entry->vendor_err
224 )
225);
226
227#define DEFINE_FRWR_DONE_EVENT(name) \
228 DEFINE_EVENT(xprtrdma_frwr_done, name, \
229 TP_PROTO( \
230 const struct ib_wc *wc, \
231 const struct rpcrdma_frwr *frwr \
232 ), \
233 TP_ARGS(wc, frwr))
234
235DECLARE_EVENT_CLASS(xprtrdma_mr,
236 TP_PROTO(
237 const struct rpcrdma_mr *mr
238 ),
239
240 TP_ARGS(mr),
241
242 TP_STRUCT__entry(
243 __field(const void *, mr)
244 __field(u32, handle)
245 __field(u32, length)
246 __field(u64, offset)
247 ),
248
249 TP_fast_assign(
250 __entry->mr = mr;
251 __entry->handle = mr->mr_handle;
252 __entry->length = mr->mr_length;
253 __entry->offset = mr->mr_offset;
254 ),
255
256 TP_printk("mr=%p %u@0x%016llx:0x%08x",
257 __entry->mr, __entry->length,
258 (unsigned long long)__entry->offset,
259 __entry->handle
260 )
261);
262
263#define DEFINE_MR_EVENT(name) \
264 DEFINE_EVENT(xprtrdma_mr, name, \
265 TP_PROTO( \
266 const struct rpcrdma_mr *mr \
267 ), \
268 TP_ARGS(mr))
269
270DECLARE_EVENT_CLASS(xprtrdma_cb_event,
271 TP_PROTO(
272 const struct rpc_rqst *rqst
273 ),
274
275 TP_ARGS(rqst),
276
277 TP_STRUCT__entry(
278 __field(const void *, rqst)
279 __field(const void *, rep)
280 __field(const void *, req)
281 __field(u32, xid)
282 ),
283
284 TP_fast_assign(
285 __entry->rqst = rqst;
286 __entry->req = rpcr_to_rdmar(rqst);
287 __entry->rep = rpcr_to_rdmar(rqst)->rl_reply;
288 __entry->xid = be32_to_cpu(rqst->rq_xid);
289 ),
290
291 TP_printk("xid=0x%08x, rqst=%p req=%p rep=%p",
292 __entry->xid, __entry->rqst, __entry->req, __entry->rep
293 )
294);
295
296#define DEFINE_CB_EVENT(name) \
297 DEFINE_EVENT(xprtrdma_cb_event, name, \
298 TP_PROTO( \
299 const struct rpc_rqst *rqst \
300 ), \
301 TP_ARGS(rqst))
302
303/**
304 ** Connection events
305 **/
306
307TRACE_EVENT(xprtrdma_conn_upcall,
308 TP_PROTO(
309 const struct rpcrdma_xprt *r_xprt,
310 struct rdma_cm_event *event
311 ),
312
313 TP_ARGS(r_xprt, event),
314
315 TP_STRUCT__entry(
316 __field(const void *, r_xprt)
317 __field(unsigned int, event)
318 __field(int, status)
319 __string(addr, rpcrdma_addrstr(r_xprt))
320 __string(port, rpcrdma_portstr(r_xprt))
321 ),
322
323 TP_fast_assign(
324 __entry->r_xprt = r_xprt;
325 __entry->event = event->event;
326 __entry->status = event->status;
327 __assign_str(addr, rpcrdma_addrstr(r_xprt));
328 __assign_str(port, rpcrdma_portstr(r_xprt));
329 ),
330
331 TP_printk("peer=[%s]:%s r_xprt=%p: %s (%u/%d)",
332 __get_str(addr), __get_str(port),
333 __entry->r_xprt, rdma_show_cm_event(__entry->event),
334 __entry->event, __entry->status
335 )
336);
337
338TRACE_EVENT(xprtrdma_disconnect,
339 TP_PROTO(
340 const struct rpcrdma_xprt *r_xprt,
341 int status
342 ),
343
344 TP_ARGS(r_xprt, status),
345
346 TP_STRUCT__entry(
347 __field(const void *, r_xprt)
348 __field(int, status)
349 __field(int, connected)
350 __string(addr, rpcrdma_addrstr(r_xprt))
351 __string(port, rpcrdma_portstr(r_xprt))
352 ),
353
354 TP_fast_assign(
355 __entry->r_xprt = r_xprt;
356 __entry->status = status;
357 __entry->connected = r_xprt->rx_ep.rep_connected;
358 __assign_str(addr, rpcrdma_addrstr(r_xprt));
359 __assign_str(port, rpcrdma_portstr(r_xprt));
360 ),
361
362 TP_printk("peer=[%s]:%s r_xprt=%p: status=%d %sconnected",
363 __get_str(addr), __get_str(port),
364 __entry->r_xprt, __entry->status,
365 __entry->connected == 1 ? "still " : "dis"
366 )
367);
368
369DEFINE_RXPRT_EVENT(xprtrdma_conn_start);
370DEFINE_RXPRT_EVENT(xprtrdma_conn_tout);
371DEFINE_RXPRT_EVENT(xprtrdma_create);
372DEFINE_RXPRT_EVENT(xprtrdma_destroy);
373DEFINE_RXPRT_EVENT(xprtrdma_remove);
374DEFINE_RXPRT_EVENT(xprtrdma_reinsert);
375DEFINE_RXPRT_EVENT(xprtrdma_reconnect);
376DEFINE_RXPRT_EVENT(xprtrdma_inject_dsc);
377
378TRACE_EVENT(xprtrdma_qp_error,
379 TP_PROTO(
380 const struct rpcrdma_xprt *r_xprt,
381 const struct ib_event *event
382 ),
383
384 TP_ARGS(r_xprt, event),
385
386 TP_STRUCT__entry(
387 __field(const void *, r_xprt)
388 __field(unsigned int, event)
389 __string(name, event->device->name)
390 __string(addr, rpcrdma_addrstr(r_xprt))
391 __string(port, rpcrdma_portstr(r_xprt))
392 ),
393
394 TP_fast_assign(
395 __entry->r_xprt = r_xprt;
396 __entry->event = event->event;
397 __assign_str(name, event->device->name);
398 __assign_str(addr, rpcrdma_addrstr(r_xprt));
399 __assign_str(port, rpcrdma_portstr(r_xprt));
400 ),
401
402 TP_printk("peer=[%s]:%s r_xprt=%p: dev %s: %s (%u)",
403 __get_str(addr), __get_str(port), __entry->r_xprt,
404 __get_str(name), rdma_show_ib_event(__entry->event),
405 __entry->event
406 )
407);
408
409/**
410 ** Call events
411 **/
412
413TRACE_EVENT(xprtrdma_createmrs,
414 TP_PROTO(
415 const struct rpcrdma_xprt *r_xprt,
416 unsigned int count
417 ),
418
419 TP_ARGS(r_xprt, count),
420
421 TP_STRUCT__entry(
422 __field(const void *, r_xprt)
423 __field(unsigned int, count)
424 ),
425
426 TP_fast_assign(
427 __entry->r_xprt = r_xprt;
428 __entry->count = count;
429 ),
430
431 TP_printk("r_xprt=%p: created %u MRs",
432 __entry->r_xprt, __entry->count
433 )
434);
435
436DEFINE_RXPRT_EVENT(xprtrdma_nomrs);
437
438DEFINE_RDCH_EVENT(xprtrdma_read_chunk);
439DEFINE_WRCH_EVENT(xprtrdma_write_chunk);
440DEFINE_WRCH_EVENT(xprtrdma_reply_chunk);
441
442TRACE_DEFINE_ENUM(rpcrdma_noch);
443TRACE_DEFINE_ENUM(rpcrdma_readch);
444TRACE_DEFINE_ENUM(rpcrdma_areadch);
445TRACE_DEFINE_ENUM(rpcrdma_writech);
446TRACE_DEFINE_ENUM(rpcrdma_replych);
447
448#define xprtrdma_show_chunktype(x) \
449 __print_symbolic(x, \
450 { rpcrdma_noch, "inline" }, \
451 { rpcrdma_readch, "read list" }, \
452 { rpcrdma_areadch, "*read list" }, \
453 { rpcrdma_writech, "write list" }, \
454 { rpcrdma_replych, "reply chunk" })
455
456TRACE_EVENT(xprtrdma_marshal,
457 TP_PROTO(
458 const struct rpc_rqst *rqst,
459 unsigned int hdrlen,
460 unsigned int rtype,
461 unsigned int wtype
462 ),
463
464 TP_ARGS(rqst, hdrlen, rtype, wtype),
465
466 TP_STRUCT__entry(
467 __field(unsigned int, task_id)
468 __field(unsigned int, client_id)
469 __field(u32, xid)
470 __field(unsigned int, hdrlen)
471 __field(unsigned int, headlen)
472 __field(unsigned int, pagelen)
473 __field(unsigned int, taillen)
474 __field(unsigned int, rtype)
475 __field(unsigned int, wtype)
476 ),
477
478 TP_fast_assign(
479 __entry->task_id = rqst->rq_task->tk_pid;
480 __entry->client_id = rqst->rq_task->tk_client->cl_clid;
481 __entry->xid = be32_to_cpu(rqst->rq_xid);
482 __entry->hdrlen = hdrlen;
483 __entry->headlen = rqst->rq_snd_buf.head[0].iov_len;
484 __entry->pagelen = rqst->rq_snd_buf.page_len;
485 __entry->taillen = rqst->rq_snd_buf.tail[0].iov_len;
486 __entry->rtype = rtype;
487 __entry->wtype = wtype;
488 ),
489
490 TP_printk("task:%u@%u xid=0x%08x: hdr=%u xdr=%u/%u/%u %s/%s",
491 __entry->task_id, __entry->client_id, __entry->xid,
492 __entry->hdrlen,
493 __entry->headlen, __entry->pagelen, __entry->taillen,
494 xprtrdma_show_chunktype(__entry->rtype),
495 xprtrdma_show_chunktype(__entry->wtype)
496 )
497);
498
499TRACE_EVENT(xprtrdma_post_send,
500 TP_PROTO(
501 const struct rpcrdma_req *req,
502 int status
503 ),
504
505 TP_ARGS(req, status),
506
507 TP_STRUCT__entry(
508 __field(const void *, req)
509 __field(int, num_sge)
510 __field(bool, signaled)
511 __field(int, status)
512 ),
513
514 TP_fast_assign(
515 __entry->req = req;
516 __entry->num_sge = req->rl_sendctx->sc_wr.num_sge;
517 __entry->signaled = req->rl_sendctx->sc_wr.send_flags &
518 IB_SEND_SIGNALED;
519 __entry->status = status;
520 ),
521
522 TP_printk("req=%p, %d SGEs%s, status=%d",
523 __entry->req, __entry->num_sge,
524 (__entry->signaled ? ", signaled" : ""),
525 __entry->status
526 )
527);
528
529TRACE_EVENT(xprtrdma_post_recv,
530 TP_PROTO(
531 const struct rpcrdma_rep *rep,
532 int status
533 ),
534
535 TP_ARGS(rep, status),
536
537 TP_STRUCT__entry(
538 __field(const void *, rep)
539 __field(int, status)
540 ),
541
542 TP_fast_assign(
543 __entry->rep = rep;
544 __entry->status = status;
545 ),
546
547 TP_printk("rep=%p status=%d",
548 __entry->rep, __entry->status
549 )
550);
551
552/**
553 ** Completion events
554 **/
555
556TRACE_EVENT(xprtrdma_wc_send,
557 TP_PROTO(
558 const struct rpcrdma_sendctx *sc,
559 const struct ib_wc *wc
560 ),
561
562 TP_ARGS(sc, wc),
563
564 TP_STRUCT__entry(
565 __field(const void *, req)
566 __field(unsigned int, unmap_count)
567 __field(unsigned int, status)
568 __field(unsigned int, vendor_err)
569 ),
570
571 TP_fast_assign(
572 __entry->req = sc->sc_req;
573 __entry->unmap_count = sc->sc_unmap_count;
574 __entry->status = wc->status;
575 __entry->vendor_err = __entry->status ? wc->vendor_err : 0;
576 ),
577
578 TP_printk("req=%p, unmapped %u pages: %s (%u/0x%x)",
579 __entry->req, __entry->unmap_count,
580 rdma_show_wc_status(__entry->status),
581 __entry->status, __entry->vendor_err
582 )
583);
584
585TRACE_EVENT(xprtrdma_wc_receive,
586 TP_PROTO(
587 const struct rpcrdma_rep *rep,
588 const struct ib_wc *wc
589 ),
590
591 TP_ARGS(rep, wc),
592
593 TP_STRUCT__entry(
594 __field(const void *, rep)
595 __field(unsigned int, byte_len)
596 __field(unsigned int, status)
597 __field(unsigned int, vendor_err)
598 ),
599
600 TP_fast_assign(
601 __entry->rep = rep;
602 __entry->byte_len = wc->byte_len;
603 __entry->status = wc->status;
604 __entry->vendor_err = __entry->status ? wc->vendor_err : 0;
605 ),
606
607 TP_printk("rep=%p, %u bytes: %s (%u/0x%x)",
608 __entry->rep, __entry->byte_len,
609 rdma_show_wc_status(__entry->status),
610 __entry->status, __entry->vendor_err
611 )
612);
613
614DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_fastreg);
615DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li);
616DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake);
617
618DEFINE_MR_EVENT(xprtrdma_localinv);
619DEFINE_MR_EVENT(xprtrdma_dma_unmap);
620DEFINE_MR_EVENT(xprtrdma_remoteinv);
621DEFINE_MR_EVENT(xprtrdma_recover_mr);
622
623/**
624 ** Reply events
625 **/
626
627TRACE_EVENT(xprtrdma_reply,
628 TP_PROTO(
629 const struct rpc_task *task,
630 const struct rpcrdma_rep *rep,
631 const struct rpcrdma_req *req,
632 unsigned int credits
633 ),
634
635 TP_ARGS(task, rep, req, credits),
636
637 TP_STRUCT__entry(
638 __field(unsigned int, task_id)
639 __field(unsigned int, client_id)
640 __field(const void *, rep)
641 __field(const void *, req)
642 __field(u32, xid)
643 __field(unsigned int, credits)
644 ),
645
646 TP_fast_assign(
647 __entry->task_id = task->tk_pid;
648 __entry->client_id = task->tk_client->cl_clid;
649 __entry->rep = rep;
650 __entry->req = req;
651 __entry->xid = be32_to_cpu(rep->rr_xid);
652 __entry->credits = credits;
653 ),
654
655 TP_printk("task:%u@%u xid=0x%08x, %u credits, rep=%p -> req=%p",
656 __entry->task_id, __entry->client_id, __entry->xid,
657 __entry->credits, __entry->rep, __entry->req
658 )
659);
660
661TRACE_EVENT(xprtrdma_defer_cmp,
662 TP_PROTO(
663 const struct rpcrdma_rep *rep
664 ),
665
666 TP_ARGS(rep),
667
668 TP_STRUCT__entry(
669 __field(unsigned int, task_id)
670 __field(unsigned int, client_id)
671 __field(const void *, rep)
672 __field(u32, xid)
673 ),
674
675 TP_fast_assign(
676 __entry->task_id = rep->rr_rqst->rq_task->tk_pid;
677 __entry->client_id = rep->rr_rqst->rq_task->tk_client->cl_clid;
678 __entry->rep = rep;
679 __entry->xid = be32_to_cpu(rep->rr_xid);
680 ),
681
682 TP_printk("task:%u@%u xid=0x%08x rep=%p",
683 __entry->task_id, __entry->client_id, __entry->xid,
684 __entry->rep
685 )
686);
687
688DEFINE_REPLY_EVENT(xprtrdma_reply_vers);
689DEFINE_REPLY_EVENT(xprtrdma_reply_rqst);
690DEFINE_REPLY_EVENT(xprtrdma_reply_short);
691DEFINE_REPLY_EVENT(xprtrdma_reply_hdr);
692
693TRACE_EVENT(xprtrdma_fixup,
694 TP_PROTO(
695 const struct rpc_rqst *rqst,
696 int len,
697 int hdrlen
698 ),
699
700 TP_ARGS(rqst, len, hdrlen),
701
702 TP_STRUCT__entry(
703 __field(unsigned int, task_id)
704 __field(unsigned int, client_id)
705 __field(const void *, base)
706 __field(int, len)
707 __field(int, hdrlen)
708 ),
709
710 TP_fast_assign(
711 __entry->task_id = rqst->rq_task->tk_pid;
712 __entry->client_id = rqst->rq_task->tk_client->cl_clid;
713 __entry->base = rqst->rq_rcv_buf.head[0].iov_base;
714 __entry->len = len;
715 __entry->hdrlen = hdrlen;
716 ),
717
718 TP_printk("task:%u@%u base=%p len=%d hdrlen=%d",
719 __entry->task_id, __entry->client_id,
720 __entry->base, __entry->len, __entry->hdrlen
721 )
722);
723
724TRACE_EVENT(xprtrdma_fixup_pg,
725 TP_PROTO(
726 const struct rpc_rqst *rqst,
727 int pageno,
728 const void *pos,
729 int len,
730 int curlen
731 ),
732
733 TP_ARGS(rqst, pageno, pos, len, curlen),
734
735 TP_STRUCT__entry(
736 __field(unsigned int, task_id)
737 __field(unsigned int, client_id)
738 __field(const void *, pos)
739 __field(int, pageno)
740 __field(int, len)
741 __field(int, curlen)
742 ),
743
744 TP_fast_assign(
745 __entry->task_id = rqst->rq_task->tk_pid;
746 __entry->client_id = rqst->rq_task->tk_client->cl_clid;
747 __entry->pos = pos;
748 __entry->pageno = pageno;
749 __entry->len = len;
750 __entry->curlen = curlen;
751 ),
752
753 TP_printk("task:%u@%u pageno=%d pos=%p len=%d curlen=%d",
754 __entry->task_id, __entry->client_id,
755 __entry->pageno, __entry->pos, __entry->len, __entry->curlen
756 )
757);
758
759TRACE_EVENT(xprtrdma_decode_seg,
760 TP_PROTO(
761 u32 handle,
762 u32 length,
763 u64 offset
764 ),
765
766 TP_ARGS(handle, length, offset),
767
768 TP_STRUCT__entry(
769 __field(u32, handle)
770 __field(u32, length)
771 __field(u64, offset)
772 ),
773
774 TP_fast_assign(
775 __entry->handle = handle;
776 __entry->length = length;
777 __entry->offset = offset;
778 ),
779
780 TP_printk("%u@0x%016llx:0x%08x",
781 __entry->length, (unsigned long long)__entry->offset,
782 __entry->handle
783 )
784);
785
786/**
787 ** Allocation/release of rpcrdma_reqs and rpcrdma_reps
788 **/
789
790TRACE_EVENT(xprtrdma_allocate,
791 TP_PROTO(
792 const struct rpc_task *task,
793 const struct rpcrdma_req *req
794 ),
795
796 TP_ARGS(task, req),
797
798 TP_STRUCT__entry(
799 __field(unsigned int, task_id)
800 __field(unsigned int, client_id)
801 __field(const void *, req)
802 __field(const void *, rep)
803 __field(size_t, callsize)
804 __field(size_t, rcvsize)
805 ),
806
807 TP_fast_assign(
808 __entry->task_id = task->tk_pid;
809 __entry->client_id = task->tk_client->cl_clid;
810 __entry->req = req;
811 __entry->rep = req ? req->rl_reply : NULL;
812 __entry->callsize = task->tk_rqstp->rq_callsize;
813 __entry->rcvsize = task->tk_rqstp->rq_rcvsize;
814 ),
815
816 TP_printk("task:%u@%u req=%p rep=%p (%zu, %zu)",
817 __entry->task_id, __entry->client_id,
818 __entry->req, __entry->rep,
819 __entry->callsize, __entry->rcvsize
820 )
821);
822
823TRACE_EVENT(xprtrdma_rpc_done,
824 TP_PROTO(
825 const struct rpc_task *task,
826 const struct rpcrdma_req *req
827 ),
828
829 TP_ARGS(task, req),
830
831 TP_STRUCT__entry(
832 __field(unsigned int, task_id)
833 __field(unsigned int, client_id)
834 __field(const void *, req)
835 __field(const void *, rep)
836 ),
837
838 TP_fast_assign(
839 __entry->task_id = task->tk_pid;
840 __entry->client_id = task->tk_client->cl_clid;
841 __entry->req = req;
842 __entry->rep = req->rl_reply;
843 ),
844
845 TP_printk("task:%u@%u req=%p rep=%p",
846 __entry->task_id, __entry->client_id,
847 __entry->req, __entry->rep
848 )
849);
850
851DEFINE_RXPRT_EVENT(xprtrdma_noreps);
852
853/**
854 ** Callback events
855 **/
856
857TRACE_EVENT(xprtrdma_cb_setup,
858 TP_PROTO(
859 const struct rpcrdma_xprt *r_xprt,
860 unsigned int reqs
861 ),
862
863 TP_ARGS(r_xprt, reqs),
864
865 TP_STRUCT__entry(
866 __field(const void *, r_xprt)
867 __field(unsigned int, reqs)
868 __string(addr, rpcrdma_addrstr(r_xprt))
869 __string(port, rpcrdma_portstr(r_xprt))
870 ),
871
872 TP_fast_assign(
873 __entry->r_xprt = r_xprt;
874 __entry->reqs = reqs;
875 __assign_str(addr, rpcrdma_addrstr(r_xprt));
876 __assign_str(port, rpcrdma_portstr(r_xprt));
877 ),
878
879 TP_printk("peer=[%s]:%s r_xprt=%p: %u reqs",
880 __get_str(addr), __get_str(port),
881 __entry->r_xprt, __entry->reqs
882 )
883);
884
885DEFINE_CB_EVENT(xprtrdma_cb_call);
886DEFINE_CB_EVENT(xprtrdma_cb_reply);
887
888#endif /* _TRACE_RPCRDMA_H */
889
890#include <trace/define_trace.h>
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 1357ec8973d5..970c91a83173 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -32,7 +32,7 @@ DECLARE_EVENT_CLASS(rpc_task_status,
32 __entry->status = task->tk_status; 32 __entry->status = task->tk_status;
33 ), 33 ),
34 34
35 TP_printk("task:%u@%u, status %d", 35 TP_printk("task:%u@%u status=%d",
36 __entry->task_id, __entry->client_id, 36 __entry->task_id, __entry->client_id,
37 __entry->status) 37 __entry->status)
38); 38);
@@ -66,7 +66,7 @@ TRACE_EVENT(rpc_connect_status,
66 __entry->status = status; 66 __entry->status = status;
67 ), 67 ),
68 68
69 TP_printk("task:%u@%u, status %d", 69 TP_printk("task:%u@%u status=%d",
70 __entry->task_id, __entry->client_id, 70 __entry->task_id, __entry->client_id,
71 __entry->status) 71 __entry->status)
72); 72);
@@ -390,6 +390,10 @@ DECLARE_EVENT_CLASS(rpc_xprt_event,
390 __entry->status) 390 __entry->status)
391); 391);
392 392
393DEFINE_EVENT(rpc_xprt_event, xprt_timer,
394 TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status),
395 TP_ARGS(xprt, xid, status));
396
393DEFINE_EVENT(rpc_xprt_event, xprt_lookup_rqst, 397DEFINE_EVENT(rpc_xprt_event, xprt_lookup_rqst,
394 TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status), 398 TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status),
395 TP_ARGS(xprt, xid, status)); 399 TP_ARGS(xprt, xid, status));
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index b1b49edd7c4d..896691afbb1a 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -755,22 +755,20 @@ static void __rpc_execute(struct rpc_task *task)
755 void (*do_action)(struct rpc_task *); 755 void (*do_action)(struct rpc_task *);
756 756
757 /* 757 /*
758 * Execute any pending callback first. 758 * Perform the next FSM step or a pending callback.
759 *
760 * tk_action may be NULL if the task has been killed.
761 * In particular, note that rpc_killall_tasks may
762 * do this at any time, so beware when dereferencing.
759 */ 763 */
760 do_action = task->tk_callback; 764 do_action = task->tk_action;
761 task->tk_callback = NULL; 765 if (task->tk_callback) {
762 if (do_action == NULL) { 766 do_action = task->tk_callback;
763 /* 767 task->tk_callback = NULL;
764 * Perform the next FSM step.
765 * tk_action may be NULL if the task has been killed.
766 * In particular, note that rpc_killall_tasks may
767 * do this at any time, so beware when dereferencing.
768 */
769 do_action = task->tk_action;
770 if (do_action == NULL)
771 break;
772 } 768 }
773 trace_rpc_task_run_action(task->tk_client, task, task->tk_action); 769 if (!do_action)
770 break;
771 trace_rpc_task_run_action(task->tk_client, task, do_action);
774 do_action(task); 772 do_action(task);
775 773
776 /* 774 /*
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 33b74fd84051..2436fd1125fc 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -940,8 +940,8 @@ static void xprt_timer(struct rpc_task *task)
940 940
941 if (task->tk_status != -ETIMEDOUT) 941 if (task->tk_status != -ETIMEDOUT)
942 return; 942 return;
943 dprintk("RPC: %5u xprt_timer\n", task->tk_pid);
944 943
944 trace_xprt_timer(xprt, req->rq_xid, task->tk_status);
945 if (!req->rq_reply_bytes_recvd) { 945 if (!req->rq_reply_bytes_recvd) {
946 if (xprt->ops->timer) 946 if (xprt->ops->timer)
947 xprt->ops->timer(xprt, task); 947 xprt->ops->timer(xprt, task);
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 8b818bb3518a..ed1a4a3065ee 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -43,7 +43,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
43 req = rpcrdma_create_req(r_xprt); 43 req = rpcrdma_create_req(r_xprt);
44 if (IS_ERR(req)) 44 if (IS_ERR(req))
45 return PTR_ERR(req); 45 return PTR_ERR(req);
46 __set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags);
47 46
48 rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, 47 rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
49 DMA_TO_DEVICE, GFP_KERNEL); 48 DMA_TO_DEVICE, GFP_KERNEL);
@@ -74,21 +73,13 @@ out_fail:
74static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, 73static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
75 unsigned int count) 74 unsigned int count)
76{ 75{
77 struct rpcrdma_rep *rep;
78 int rc = 0; 76 int rc = 0;
79 77
80 while (count--) { 78 while (count--) {
81 rep = rpcrdma_create_rep(r_xprt); 79 rc = rpcrdma_create_rep(r_xprt);
82 if (IS_ERR(rep)) { 80 if (rc)
83 pr_err("RPC: %s: reply buffer alloc failed\n",
84 __func__);
85 rc = PTR_ERR(rep);
86 break; 81 break;
87 }
88
89 rpcrdma_recv_buffer_put(rep);
90 } 82 }
91
92 return rc; 83 return rc;
93} 84}
94 85
@@ -129,6 +120,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
129 rqst->rq_xprt = &r_xprt->rx_xprt; 120 rqst->rq_xprt = &r_xprt->rx_xprt;
130 INIT_LIST_HEAD(&rqst->rq_list); 121 INIT_LIST_HEAD(&rqst->rq_list);
131 INIT_LIST_HEAD(&rqst->rq_bc_list); 122 INIT_LIST_HEAD(&rqst->rq_bc_list);
123 __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
132 124
133 if (rpcrdma_bc_setup_rqst(r_xprt, rqst)) 125 if (rpcrdma_bc_setup_rqst(r_xprt, rqst))
134 goto out_free; 126 goto out_free;
@@ -148,7 +140,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
148 140
149 buffer->rb_bc_srv_max_requests = reqs; 141 buffer->rb_bc_srv_max_requests = reqs;
150 request_module("svcrdma"); 142 request_module("svcrdma");
151 143 trace_xprtrdma_cb_setup(r_xprt, reqs);
152 return 0; 144 return 0;
153 145
154out_free: 146out_free:
@@ -196,13 +188,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
196 return maxmsg - RPCRDMA_HDRLEN_MIN; 188 return maxmsg - RPCRDMA_HDRLEN_MIN;
197} 189}
198 190
199/** 191static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
200 * rpcrdma_bc_marshal_reply - Send backwards direction reply
201 * @rqst: buffer containing RPC reply data
202 *
203 * Returns zero on success.
204 */
205int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
206{ 192{
207 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); 193 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
208 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 194 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
@@ -226,7 +212,46 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
226 if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN, 212 if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN,
227 &rqst->rq_snd_buf, rpcrdma_noch)) 213 &rqst->rq_snd_buf, rpcrdma_noch))
228 return -EIO; 214 return -EIO;
215
216 trace_xprtrdma_cb_reply(rqst);
217 return 0;
218}
219
220/**
221 * xprt_rdma_bc_send_reply - marshal and send a backchannel reply
222 * @rqst: RPC rqst with a backchannel RPC reply in rq_snd_buf
223 *
224 * Caller holds the transport's write lock.
225 *
226 * Returns:
227 * %0 if the RPC message has been sent
228 * %-ENOTCONN if the caller should reconnect and call again
229 * %-EIO if a permanent error occurred and the request was not
230 * sent. Do not try to send this message again.
231 */
232int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
233{
234 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
235 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
236 int rc;
237
238 if (!xprt_connected(rqst->rq_xprt))
239 goto drop_connection;
240
241 rc = rpcrdma_bc_marshal_reply(rqst);
242 if (rc < 0)
243 goto failed_marshal;
244
245 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
246 goto drop_connection;
229 return 0; 247 return 0;
248
249failed_marshal:
250 if (rc != -ENOTCONN)
251 return rc;
252drop_connection:
253 xprt_disconnect_done(rqst->rq_xprt);
254 return -ENOTCONN;
230} 255}
231 256
232/** 257/**
@@ -262,11 +287,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
262 dprintk("RPC: %s: freeing rqst %p (req %p)\n", 287 dprintk("RPC: %s: freeing rqst %p (req %p)\n",
263 __func__, rqst, rpcr_to_rdmar(rqst)); 288 __func__, rqst, rpcr_to_rdmar(rqst));
264 289
265 smp_mb__before_atomic();
266 WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state));
267 clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
268 smp_mb__after_atomic();
269
270 spin_lock_bh(&xprt->bc_pa_lock); 290 spin_lock_bh(&xprt->bc_pa_lock);
271 list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); 291 list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
272 spin_unlock_bh(&xprt->bc_pa_lock); 292 spin_unlock_bh(&xprt->bc_pa_lock);
@@ -274,7 +294,7 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
274 294
275/** 295/**
276 * rpcrdma_bc_receive_call - Handle a backward direction call 296 * rpcrdma_bc_receive_call - Handle a backward direction call
277 * @xprt: transport receiving the call 297 * @r_xprt: transport receiving the call
278 * @rep: receive buffer containing the call 298 * @rep: receive buffer containing the call
279 * 299 *
280 * Operational assumptions: 300 * Operational assumptions:
@@ -313,7 +333,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
313 struct rpc_rqst, rq_bc_pa_list); 333 struct rpc_rqst, rq_bc_pa_list);
314 list_del(&rqst->rq_bc_pa_list); 334 list_del(&rqst->rq_bc_pa_list);
315 spin_unlock(&xprt->bc_pa_lock); 335 spin_unlock(&xprt->bc_pa_lock);
316 dprintk("RPC: %s: using rqst %p\n", __func__, rqst);
317 336
318 /* Prepare rqst */ 337 /* Prepare rqst */
319 rqst->rq_reply_bytes_recvd = 0; 338 rqst->rq_reply_bytes_recvd = 0;
@@ -321,7 +340,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
321 rqst->rq_xid = *p; 340 rqst->rq_xid = *p;
322 341
323 rqst->rq_private_buf.len = size; 342 rqst->rq_private_buf.len = size;
324 set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
325 343
326 buf = &rqst->rq_rcv_buf; 344 buf = &rqst->rq_rcv_buf;
327 memset(buf, 0, sizeof(*buf)); 345 memset(buf, 0, sizeof(*buf));
@@ -335,12 +353,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
335 * the Upper Layer is done decoding it. 353 * the Upper Layer is done decoding it.
336 */ 354 */
337 req = rpcr_to_rdmar(rqst); 355 req = rpcr_to_rdmar(rqst);
338 dprintk("RPC: %s: attaching rep %p to req %p\n",
339 __func__, rep, req);
340 req->rl_reply = rep; 356 req->rl_reply = rep;
341 357 trace_xprtrdma_cb_call(rqst);
342 /* Defeat the retransmit detection logic in send_request */
343 req->rl_connect_cookie = 0;
344 358
345 /* Queue rqst for ULP's callback service */ 359 /* Queue rqst for ULP's callback service */
346 bc_serv = xprt->bc_serv; 360 bc_serv = xprt->bc_serv;
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 29fc84c7ff98..d5f95bb39300 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -1,6 +1,6 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2/* 2/*
3 * Copyright (c) 2015 Oracle. All rights reserved. 3 * Copyright (c) 2015, 2017 Oracle. All rights reserved.
4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
5 */ 5 */
6 6
@@ -47,7 +47,7 @@ fmr_is_supported(struct rpcrdma_ia *ia)
47} 47}
48 48
49static int 49static int
50fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw) 50fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
51{ 51{
52 static struct ib_fmr_attr fmr_attr = { 52 static struct ib_fmr_attr fmr_attr = {
53 .max_pages = RPCRDMA_MAX_FMR_SGES, 53 .max_pages = RPCRDMA_MAX_FMR_SGES,
@@ -55,106 +55,108 @@ fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw)
55 .page_shift = PAGE_SHIFT 55 .page_shift = PAGE_SHIFT
56 }; 56 };
57 57
58 mw->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES, 58 mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
59 sizeof(u64), GFP_KERNEL); 59 sizeof(u64), GFP_KERNEL);
60 if (!mw->fmr.fm_physaddrs) 60 if (!mr->fmr.fm_physaddrs)
61 goto out_free; 61 goto out_free;
62 62
63 mw->mw_sg = kcalloc(RPCRDMA_MAX_FMR_SGES, 63 mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
64 sizeof(*mw->mw_sg), GFP_KERNEL); 64 sizeof(*mr->mr_sg), GFP_KERNEL);
65 if (!mw->mw_sg) 65 if (!mr->mr_sg)
66 goto out_free; 66 goto out_free;
67 67
68 sg_init_table(mw->mw_sg, RPCRDMA_MAX_FMR_SGES); 68 sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
69 69
70 mw->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS, 70 mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
71 &fmr_attr); 71 &fmr_attr);
72 if (IS_ERR(mw->fmr.fm_mr)) 72 if (IS_ERR(mr->fmr.fm_mr))
73 goto out_fmr_err; 73 goto out_fmr_err;
74 74
75 return 0; 75 return 0;
76 76
77out_fmr_err: 77out_fmr_err:
78 dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__, 78 dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
79 PTR_ERR(mw->fmr.fm_mr)); 79 PTR_ERR(mr->fmr.fm_mr));
80 80
81out_free: 81out_free:
82 kfree(mw->mw_sg); 82 kfree(mr->mr_sg);
83 kfree(mw->fmr.fm_physaddrs); 83 kfree(mr->fmr.fm_physaddrs);
84 return -ENOMEM; 84 return -ENOMEM;
85} 85}
86 86
87static int 87static int
88__fmr_unmap(struct rpcrdma_mw *mw) 88__fmr_unmap(struct rpcrdma_mr *mr)
89{ 89{
90 LIST_HEAD(l); 90 LIST_HEAD(l);
91 int rc; 91 int rc;
92 92
93 list_add(&mw->fmr.fm_mr->list, &l); 93 list_add(&mr->fmr.fm_mr->list, &l);
94 rc = ib_unmap_fmr(&l); 94 rc = ib_unmap_fmr(&l);
95 list_del(&mw->fmr.fm_mr->list); 95 list_del(&mr->fmr.fm_mr->list);
96 return rc; 96 return rc;
97} 97}
98 98
99static void 99static void
100fmr_op_release_mr(struct rpcrdma_mw *r) 100fmr_op_release_mr(struct rpcrdma_mr *mr)
101{ 101{
102 LIST_HEAD(unmap_list); 102 LIST_HEAD(unmap_list);
103 int rc; 103 int rc;
104 104
105 /* Ensure MW is not on any rl_registered list */ 105 /* Ensure MW is not on any rl_registered list */
106 if (!list_empty(&r->mw_list)) 106 if (!list_empty(&mr->mr_list))
107 list_del(&r->mw_list); 107 list_del(&mr->mr_list);
108 108
109 kfree(r->fmr.fm_physaddrs); 109 kfree(mr->fmr.fm_physaddrs);
110 kfree(r->mw_sg); 110 kfree(mr->mr_sg);
111 111
112 /* In case this one was left mapped, try to unmap it 112 /* In case this one was left mapped, try to unmap it
113 * to prevent dealloc_fmr from failing with EBUSY 113 * to prevent dealloc_fmr from failing with EBUSY
114 */ 114 */
115 rc = __fmr_unmap(r); 115 rc = __fmr_unmap(mr);
116 if (rc) 116 if (rc)
117 pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n", 117 pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
118 r, rc); 118 mr, rc);
119 119
120 rc = ib_dealloc_fmr(r->fmr.fm_mr); 120 rc = ib_dealloc_fmr(mr->fmr.fm_mr);
121 if (rc) 121 if (rc)
122 pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n", 122 pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n",
123 r, rc); 123 mr, rc);
124 124
125 kfree(r); 125 kfree(mr);
126} 126}
127 127
128/* Reset of a single FMR. 128/* Reset of a single FMR.
129 */ 129 */
130static void 130static void
131fmr_op_recover_mr(struct rpcrdma_mw *mw) 131fmr_op_recover_mr(struct rpcrdma_mr *mr)
132{ 132{
133 struct rpcrdma_xprt *r_xprt = mw->mw_xprt; 133 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
134 int rc; 134 int rc;
135 135
136 /* ORDER: invalidate first */ 136 /* ORDER: invalidate first */
137 rc = __fmr_unmap(mw); 137 rc = __fmr_unmap(mr);
138
139 /* ORDER: then DMA unmap */
140 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
141 mw->mw_sg, mw->mw_nents, mw->mw_dir);
142 if (rc) 138 if (rc)
143 goto out_release; 139 goto out_release;
144 140
145 rpcrdma_put_mw(r_xprt, mw); 141 /* ORDER: then DMA unmap */
142 rpcrdma_mr_unmap_and_put(mr);
143
146 r_xprt->rx_stats.mrs_recovered++; 144 r_xprt->rx_stats.mrs_recovered++;
147 return; 145 return;
148 146
149out_release: 147out_release:
150 pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mw); 148 pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr);
151 r_xprt->rx_stats.mrs_orphaned++; 149 r_xprt->rx_stats.mrs_orphaned++;
152 150
153 spin_lock(&r_xprt->rx_buf.rb_mwlock); 151 trace_xprtrdma_dma_unmap(mr);
154 list_del(&mw->mw_all); 152 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
155 spin_unlock(&r_xprt->rx_buf.rb_mwlock); 153 mr->mr_sg, mr->mr_nents, mr->mr_dir);
154
155 spin_lock(&r_xprt->rx_buf.rb_mrlock);
156 list_del(&mr->mr_all);
157 spin_unlock(&r_xprt->rx_buf.rb_mrlock);
156 158
157 fmr_op_release_mr(mw); 159 fmr_op_release_mr(mr);
158} 160}
159 161
160static int 162static int
@@ -180,15 +182,15 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
180 */ 182 */
181static struct rpcrdma_mr_seg * 183static struct rpcrdma_mr_seg *
182fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, 184fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
183 int nsegs, bool writing, struct rpcrdma_mw **out) 185 int nsegs, bool writing, struct rpcrdma_mr **out)
184{ 186{
185 struct rpcrdma_mr_seg *seg1 = seg; 187 struct rpcrdma_mr_seg *seg1 = seg;
186 int len, pageoff, i, rc; 188 int len, pageoff, i, rc;
187 struct rpcrdma_mw *mw; 189 struct rpcrdma_mr *mr;
188 u64 *dma_pages; 190 u64 *dma_pages;
189 191
190 mw = rpcrdma_get_mw(r_xprt); 192 mr = rpcrdma_mr_get(r_xprt);
191 if (!mw) 193 if (!mr)
192 return ERR_PTR(-ENOBUFS); 194 return ERR_PTR(-ENOBUFS);
193 195
194 pageoff = offset_in_page(seg1->mr_offset); 196 pageoff = offset_in_page(seg1->mr_offset);
@@ -199,12 +201,12 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
199 nsegs = RPCRDMA_MAX_FMR_SGES; 201 nsegs = RPCRDMA_MAX_FMR_SGES;
200 for (i = 0; i < nsegs;) { 202 for (i = 0; i < nsegs;) {
201 if (seg->mr_page) 203 if (seg->mr_page)
202 sg_set_page(&mw->mw_sg[i], 204 sg_set_page(&mr->mr_sg[i],
203 seg->mr_page, 205 seg->mr_page,
204 seg->mr_len, 206 seg->mr_len,
205 offset_in_page(seg->mr_offset)); 207 offset_in_page(seg->mr_offset));
206 else 208 else
207 sg_set_buf(&mw->mw_sg[i], seg->mr_offset, 209 sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
208 seg->mr_len); 210 seg->mr_len);
209 len += seg->mr_len; 211 len += seg->mr_len;
210 ++seg; 212 ++seg;
@@ -214,40 +216,38 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
214 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) 216 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
215 break; 217 break;
216 } 218 }
217 mw->mw_dir = rpcrdma_data_dir(writing); 219 mr->mr_dir = rpcrdma_data_dir(writing);
218 220
219 mw->mw_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device, 221 mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
220 mw->mw_sg, i, mw->mw_dir); 222 mr->mr_sg, i, mr->mr_dir);
221 if (!mw->mw_nents) 223 if (!mr->mr_nents)
222 goto out_dmamap_err; 224 goto out_dmamap_err;
223 225
224 for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++) 226 for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
225 dma_pages[i] = sg_dma_address(&mw->mw_sg[i]); 227 dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
226 rc = ib_map_phys_fmr(mw->fmr.fm_mr, dma_pages, mw->mw_nents, 228 rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents,
227 dma_pages[0]); 229 dma_pages[0]);
228 if (rc) 230 if (rc)
229 goto out_maperr; 231 goto out_maperr;
230 232
231 mw->mw_handle = mw->fmr.fm_mr->rkey; 233 mr->mr_handle = mr->fmr.fm_mr->rkey;
232 mw->mw_length = len; 234 mr->mr_length = len;
233 mw->mw_offset = dma_pages[0] + pageoff; 235 mr->mr_offset = dma_pages[0] + pageoff;
234 236
235 *out = mw; 237 *out = mr;
236 return seg; 238 return seg;
237 239
238out_dmamap_err: 240out_dmamap_err:
239 pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", 241 pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
240 mw->mw_sg, i); 242 mr->mr_sg, i);
241 rpcrdma_put_mw(r_xprt, mw); 243 rpcrdma_mr_put(mr);
242 return ERR_PTR(-EIO); 244 return ERR_PTR(-EIO);
243 245
244out_maperr: 246out_maperr:
245 pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", 247 pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
246 len, (unsigned long long)dma_pages[0], 248 len, (unsigned long long)dma_pages[0],
247 pageoff, mw->mw_nents, rc); 249 pageoff, mr->mr_nents, rc);
248 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, 250 rpcrdma_mr_unmap_and_put(mr);
249 mw->mw_sg, mw->mw_nents, mw->mw_dir);
250 rpcrdma_put_mw(r_xprt, mw);
251 return ERR_PTR(-EIO); 251 return ERR_PTR(-EIO);
252} 252}
253 253
@@ -256,13 +256,13 @@ out_maperr:
256 * Sleeps until it is safe for the host CPU to access the 256 * Sleeps until it is safe for the host CPU to access the
257 * previously mapped memory regions. 257 * previously mapped memory regions.
258 * 258 *
259 * Caller ensures that @mws is not empty before the call. This 259 * Caller ensures that @mrs is not empty before the call. This
260 * function empties the list. 260 * function empties the list.
261 */ 261 */
262static void 262static void
263fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) 263fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
264{ 264{
265 struct rpcrdma_mw *mw; 265 struct rpcrdma_mr *mr;
266 LIST_HEAD(unmap_list); 266 LIST_HEAD(unmap_list);
267 int rc; 267 int rc;
268 268
@@ -271,10 +271,11 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
271 * ib_unmap_fmr() is slow, so use a single call instead 271 * ib_unmap_fmr() is slow, so use a single call instead
272 * of one call per mapped FMR. 272 * of one call per mapped FMR.
273 */ 273 */
274 list_for_each_entry(mw, mws, mw_list) { 274 list_for_each_entry(mr, mrs, mr_list) {
275 dprintk("RPC: %s: unmapping fmr %p\n", 275 dprintk("RPC: %s: unmapping fmr %p\n",
276 __func__, &mw->fmr); 276 __func__, &mr->fmr);
277 list_add_tail(&mw->fmr.fm_mr->list, &unmap_list); 277 trace_xprtrdma_localinv(mr);
278 list_add_tail(&mr->fmr.fm_mr->list, &unmap_list);
278 } 279 }
279 r_xprt->rx_stats.local_inv_needed++; 280 r_xprt->rx_stats.local_inv_needed++;
280 rc = ib_unmap_fmr(&unmap_list); 281 rc = ib_unmap_fmr(&unmap_list);
@@ -284,14 +285,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
284 /* ORDER: Now DMA unmap all of the req's MRs, and return 285 /* ORDER: Now DMA unmap all of the req's MRs, and return
285 * them to the free MW list. 286 * them to the free MW list.
286 */ 287 */
287 while (!list_empty(mws)) { 288 while (!list_empty(mrs)) {
288 mw = rpcrdma_pop_mw(mws); 289 mr = rpcrdma_mr_pop(mrs);
289 dprintk("RPC: %s: DMA unmapping fmr %p\n", 290 list_del(&mr->fmr.fm_mr->list);
290 __func__, &mw->fmr); 291 rpcrdma_mr_unmap_and_put(mr);
291 list_del(&mw->fmr.fm_mr->list);
292 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
293 mw->mw_sg, mw->mw_nents, mw->mw_dir);
294 rpcrdma_put_mw(r_xprt, mw);
295 } 292 }
296 293
297 return; 294 return;
@@ -299,10 +296,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
299out_reset: 296out_reset:
300 pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc); 297 pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
301 298
302 while (!list_empty(mws)) { 299 while (!list_empty(mrs)) {
303 mw = rpcrdma_pop_mw(mws); 300 mr = rpcrdma_mr_pop(mrs);
304 list_del(&mw->fmr.fm_mr->list); 301 list_del(&mr->fmr.fm_mr->list);
305 fmr_op_recover_mr(mw); 302 fmr_op_recover_mr(mr);
306 } 303 }
307} 304}
308 305
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 773e66e10a15..90f688f19783 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -1,11 +1,11 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2/* 2/*
3 * Copyright (c) 2015 Oracle. All rights reserved. 3 * Copyright (c) 2015, 2017 Oracle. All rights reserved.
4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
5 */ 5 */
6 6
7/* Lightweight memory registration using Fast Registration Work 7/* Lightweight memory registration using Fast Registration Work
8 * Requests (FRWR). Also referred to sometimes as FRMR mode. 8 * Requests (FRWR).
9 * 9 *
10 * FRWR features ordered asynchronous registration and deregistration 10 * FRWR features ordered asynchronous registration and deregistration
11 * of arbitrarily sized memory regions. This is the fastest and safest 11 * of arbitrarily sized memory regions. This is the fastest and safest
@@ -15,9 +15,9 @@
15/* Normal operation 15/* Normal operation
16 * 16 *
17 * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG 17 * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
18 * Work Request (frmr_op_map). When the RDMA operation is finished, this 18 * Work Request (frwr_op_map). When the RDMA operation is finished, this
19 * Memory Region is invalidated using a LOCAL_INV Work Request 19 * Memory Region is invalidated using a LOCAL_INV Work Request
20 * (frmr_op_unmap). 20 * (frwr_op_unmap_sync).
21 * 21 *
22 * Typically these Work Requests are not signaled, and neither are RDMA 22 * Typically these Work Requests are not signaled, and neither are RDMA
23 * SEND Work Requests (with the exception of signaling occasionally to 23 * SEND Work Requests (with the exception of signaling occasionally to
@@ -26,7 +26,7 @@
26 * 26 *
27 * As an optimization, frwr_op_unmap marks MRs INVALID before the 27 * As an optimization, frwr_op_unmap marks MRs INVALID before the
28 * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on 28 * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
29 * rb_mws immediately so that no work (like managing a linked list 29 * rb_mrs immediately so that no work (like managing a linked list
30 * under a spinlock) is needed in the completion upcall. 30 * under a spinlock) is needed in the completion upcall.
31 * 31 *
32 * But this means that frwr_op_map() can occasionally encounter an MR 32 * But this means that frwr_op_map() can occasionally encounter an MR
@@ -60,7 +60,7 @@
60 * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered 60 * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
61 * with ib_dereg_mr and then are re-initialized. Because MR recovery 61 * with ib_dereg_mr and then are re-initialized. Because MR recovery
62 * allocates fresh resources, it is deferred to a workqueue, and the 62 * allocates fresh resources, it is deferred to a workqueue, and the
63 * recovered MRs are placed back on the rb_mws list when recovery is 63 * recovered MRs are placed back on the rb_mrs list when recovery is
64 * complete. frwr_op_map allocates another MR for the current RPC while 64 * complete. frwr_op_map allocates another MR for the current RPC while
65 * the broken MR is reset. 65 * the broken MR is reset.
66 * 66 *
@@ -96,26 +96,26 @@ out_not_supported:
96} 96}
97 97
98static int 98static int
99frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) 99frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
100{ 100{
101 unsigned int depth = ia->ri_max_frmr_depth; 101 unsigned int depth = ia->ri_max_frwr_depth;
102 struct rpcrdma_frmr *f = &r->frmr; 102 struct rpcrdma_frwr *frwr = &mr->frwr;
103 int rc; 103 int rc;
104 104
105 f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); 105 frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
106 if (IS_ERR(f->fr_mr)) 106 if (IS_ERR(frwr->fr_mr))
107 goto out_mr_err; 107 goto out_mr_err;
108 108
109 r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL); 109 mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL);
110 if (!r->mw_sg) 110 if (!mr->mr_sg)
111 goto out_list_err; 111 goto out_list_err;
112 112
113 sg_init_table(r->mw_sg, depth); 113 sg_init_table(mr->mr_sg, depth);
114 init_completion(&f->fr_linv_done); 114 init_completion(&frwr->fr_linv_done);
115 return 0; 115 return 0;
116 116
117out_mr_err: 117out_mr_err:
118 rc = PTR_ERR(f->fr_mr); 118 rc = PTR_ERR(frwr->fr_mr);
119 dprintk("RPC: %s: ib_alloc_mr status %i\n", 119 dprintk("RPC: %s: ib_alloc_mr status %i\n",
120 __func__, rc); 120 __func__, rc);
121 return rc; 121 return rc;
@@ -124,83 +124,85 @@ out_list_err:
124 rc = -ENOMEM; 124 rc = -ENOMEM;
125 dprintk("RPC: %s: sg allocation failure\n", 125 dprintk("RPC: %s: sg allocation failure\n",
126 __func__); 126 __func__);
127 ib_dereg_mr(f->fr_mr); 127 ib_dereg_mr(frwr->fr_mr);
128 return rc; 128 return rc;
129} 129}
130 130
131static void 131static void
132frwr_op_release_mr(struct rpcrdma_mw *r) 132frwr_op_release_mr(struct rpcrdma_mr *mr)
133{ 133{
134 int rc; 134 int rc;
135 135
136 /* Ensure MW is not on any rl_registered list */ 136 /* Ensure MR is not on any rl_registered list */
137 if (!list_empty(&r->mw_list)) 137 if (!list_empty(&mr->mr_list))
138 list_del(&r->mw_list); 138 list_del(&mr->mr_list);
139 139
140 rc = ib_dereg_mr(r->frmr.fr_mr); 140 rc = ib_dereg_mr(mr->frwr.fr_mr);
141 if (rc) 141 if (rc)
142 pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", 142 pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
143 r, rc); 143 mr, rc);
144 kfree(r->mw_sg); 144 kfree(mr->mr_sg);
145 kfree(r); 145 kfree(mr);
146} 146}
147 147
148static int 148static int
149__frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) 149__frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
150{ 150{
151 struct rpcrdma_frmr *f = &r->frmr; 151 struct rpcrdma_frwr *frwr = &mr->frwr;
152 int rc; 152 int rc;
153 153
154 rc = ib_dereg_mr(f->fr_mr); 154 rc = ib_dereg_mr(frwr->fr_mr);
155 if (rc) { 155 if (rc) {
156 pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n", 156 pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
157 rc, r); 157 rc, mr);
158 return rc; 158 return rc;
159 } 159 }
160 160
161 f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, 161 frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
162 ia->ri_max_frmr_depth); 162 ia->ri_max_frwr_depth);
163 if (IS_ERR(f->fr_mr)) { 163 if (IS_ERR(frwr->fr_mr)) {
164 pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", 164 pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
165 PTR_ERR(f->fr_mr), r); 165 PTR_ERR(frwr->fr_mr), mr);
166 return PTR_ERR(f->fr_mr); 166 return PTR_ERR(frwr->fr_mr);
167 } 167 }
168 168
169 dprintk("RPC: %s: recovered FRMR %p\n", __func__, f); 169 dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr);
170 f->fr_state = FRMR_IS_INVALID; 170 frwr->fr_state = FRWR_IS_INVALID;
171 return 0; 171 return 0;
172} 172}
173 173
174/* Reset of a single FRMR. Generate a fresh rkey by replacing the MR. 174/* Reset of a single FRWR. Generate a fresh rkey by replacing the MR.
175 */ 175 */
176static void 176static void
177frwr_op_recover_mr(struct rpcrdma_mw *mw) 177frwr_op_recover_mr(struct rpcrdma_mr *mr)
178{ 178{
179 enum rpcrdma_frmr_state state = mw->frmr.fr_state; 179 enum rpcrdma_frwr_state state = mr->frwr.fr_state;
180 struct rpcrdma_xprt *r_xprt = mw->mw_xprt; 180 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
181 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 181 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
182 int rc; 182 int rc;
183 183
184 rc = __frwr_reset_mr(ia, mw); 184 rc = __frwr_mr_reset(ia, mr);
185 if (state != FRMR_FLUSHED_LI) 185 if (state != FRWR_FLUSHED_LI) {
186 trace_xprtrdma_dma_unmap(mr);
186 ib_dma_unmap_sg(ia->ri_device, 187 ib_dma_unmap_sg(ia->ri_device,
187 mw->mw_sg, mw->mw_nents, mw->mw_dir); 188 mr->mr_sg, mr->mr_nents, mr->mr_dir);
189 }
188 if (rc) 190 if (rc)
189 goto out_release; 191 goto out_release;
190 192
191 rpcrdma_put_mw(r_xprt, mw); 193 rpcrdma_mr_put(mr);
192 r_xprt->rx_stats.mrs_recovered++; 194 r_xprt->rx_stats.mrs_recovered++;
193 return; 195 return;
194 196
195out_release: 197out_release:
196 pr_err("rpcrdma: FRMR reset failed %d, %p release\n", rc, mw); 198 pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mr);
197 r_xprt->rx_stats.mrs_orphaned++; 199 r_xprt->rx_stats.mrs_orphaned++;
198 200
199 spin_lock(&r_xprt->rx_buf.rb_mwlock); 201 spin_lock(&r_xprt->rx_buf.rb_mrlock);
200 list_del(&mw->mw_all); 202 list_del(&mr->mr_all);
201 spin_unlock(&r_xprt->rx_buf.rb_mwlock); 203 spin_unlock(&r_xprt->rx_buf.rb_mrlock);
202 204
203 frwr_op_release_mr(mw); 205 frwr_op_release_mr(mr);
204} 206}
205 207
206static int 208static int
@@ -214,31 +216,31 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
214 if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) 216 if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
215 ia->ri_mrtype = IB_MR_TYPE_SG_GAPS; 217 ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
216 218
217 ia->ri_max_frmr_depth = 219 ia->ri_max_frwr_depth =
218 min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, 220 min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
219 attrs->max_fast_reg_page_list_len); 221 attrs->max_fast_reg_page_list_len);
220 dprintk("RPC: %s: device's max FR page list len = %u\n", 222 dprintk("RPC: %s: device's max FR page list len = %u\n",
221 __func__, ia->ri_max_frmr_depth); 223 __func__, ia->ri_max_frwr_depth);
222 224
223 /* Add room for frmr register and invalidate WRs. 225 /* Add room for frwr register and invalidate WRs.
224 * 1. FRMR reg WR for head 226 * 1. FRWR reg WR for head
225 * 2. FRMR invalidate WR for head 227 * 2. FRWR invalidate WR for head
226 * 3. N FRMR reg WRs for pagelist 228 * 3. N FRWR reg WRs for pagelist
227 * 4. N FRMR invalidate WRs for pagelist 229 * 4. N FRWR invalidate WRs for pagelist
228 * 5. FRMR reg WR for tail 230 * 5. FRWR reg WR for tail
229 * 6. FRMR invalidate WR for tail 231 * 6. FRWR invalidate WR for tail
230 * 7. The RDMA_SEND WR 232 * 7. The RDMA_SEND WR
231 */ 233 */
232 depth = 7; 234 depth = 7;
233 235
234 /* Calculate N if the device max FRMR depth is smaller than 236 /* Calculate N if the device max FRWR depth is smaller than
235 * RPCRDMA_MAX_DATA_SEGS. 237 * RPCRDMA_MAX_DATA_SEGS.
236 */ 238 */
237 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { 239 if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) {
238 delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth; 240 delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth;
239 do { 241 do {
240 depth += 2; /* FRMR reg + invalidate */ 242 depth += 2; /* FRWR reg + invalidate */
241 delta -= ia->ri_max_frmr_depth; 243 delta -= ia->ri_max_frwr_depth;
242 } while (delta > 0); 244 } while (delta > 0);
243 } 245 }
244 246
@@ -252,7 +254,7 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
252 } 254 }
253 255
254 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / 256 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
255 ia->ri_max_frmr_depth); 257 ia->ri_max_frwr_depth);
256 return 0; 258 return 0;
257} 259}
258 260
@@ -265,7 +267,7 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
265 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 267 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
266 268
267 return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, 269 return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
268 RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frmr_depth); 270 RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth);
269} 271}
270 272
271static void 273static void
@@ -286,16 +288,16 @@ __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
286static void 288static void
287frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) 289frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
288{ 290{
289 struct rpcrdma_frmr *frmr; 291 struct ib_cqe *cqe = wc->wr_cqe;
290 struct ib_cqe *cqe; 292 struct rpcrdma_frwr *frwr =
293 container_of(cqe, struct rpcrdma_frwr, fr_cqe);
291 294
292 /* WARNING: Only wr_cqe and status are reliable at this point */ 295 /* WARNING: Only wr_cqe and status are reliable at this point */
293 if (wc->status != IB_WC_SUCCESS) { 296 if (wc->status != IB_WC_SUCCESS) {
294 cqe = wc->wr_cqe; 297 frwr->fr_state = FRWR_FLUSHED_FR;
295 frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
296 frmr->fr_state = FRMR_FLUSHED_FR;
297 __frwr_sendcompletion_flush(wc, "fastreg"); 298 __frwr_sendcompletion_flush(wc, "fastreg");
298 } 299 }
300 trace_xprtrdma_wc_fastreg(wc, frwr);
299} 301}
300 302
301/** 303/**
@@ -307,16 +309,16 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
307static void 309static void
308frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) 310frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
309{ 311{
310 struct rpcrdma_frmr *frmr; 312 struct ib_cqe *cqe = wc->wr_cqe;
311 struct ib_cqe *cqe; 313 struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
314 fr_cqe);
312 315
313 /* WARNING: Only wr_cqe and status are reliable at this point */ 316 /* WARNING: Only wr_cqe and status are reliable at this point */
314 if (wc->status != IB_WC_SUCCESS) { 317 if (wc->status != IB_WC_SUCCESS) {
315 cqe = wc->wr_cqe; 318 frwr->fr_state = FRWR_FLUSHED_LI;
316 frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
317 frmr->fr_state = FRMR_FLUSHED_LI;
318 __frwr_sendcompletion_flush(wc, "localinv"); 319 __frwr_sendcompletion_flush(wc, "localinv");
319 } 320 }
321 trace_xprtrdma_wc_li(wc, frwr);
320} 322}
321 323
322/** 324/**
@@ -329,17 +331,17 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
329static void 331static void
330frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) 332frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
331{ 333{
332 struct rpcrdma_frmr *frmr; 334 struct ib_cqe *cqe = wc->wr_cqe;
333 struct ib_cqe *cqe; 335 struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
336 fr_cqe);
334 337
335 /* WARNING: Only wr_cqe and status are reliable at this point */ 338 /* WARNING: Only wr_cqe and status are reliable at this point */
336 cqe = wc->wr_cqe;
337 frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
338 if (wc->status != IB_WC_SUCCESS) { 339 if (wc->status != IB_WC_SUCCESS) {
339 frmr->fr_state = FRMR_FLUSHED_LI; 340 frwr->fr_state = FRWR_FLUSHED_LI;
340 __frwr_sendcompletion_flush(wc, "localinv"); 341 __frwr_sendcompletion_flush(wc, "localinv");
341 } 342 }
342 complete(&frmr->fr_linv_done); 343 complete(&frwr->fr_linv_done);
344 trace_xprtrdma_wc_li_wake(wc, frwr);
343} 345}
344 346
345/* Post a REG_MR Work Request to register a memory region 347/* Post a REG_MR Work Request to register a memory region
@@ -347,41 +349,39 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
347 */ 349 */
348static struct rpcrdma_mr_seg * 350static struct rpcrdma_mr_seg *
349frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, 351frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
350 int nsegs, bool writing, struct rpcrdma_mw **out) 352 int nsegs, bool writing, struct rpcrdma_mr **out)
351{ 353{
352 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 354 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
353 bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; 355 bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
354 struct rpcrdma_mw *mw; 356 struct rpcrdma_frwr *frwr;
355 struct rpcrdma_frmr *frmr; 357 struct rpcrdma_mr *mr;
356 struct ib_mr *mr; 358 struct ib_mr *ibmr;
357 struct ib_reg_wr *reg_wr; 359 struct ib_reg_wr *reg_wr;
358 struct ib_send_wr *bad_wr; 360 struct ib_send_wr *bad_wr;
359 int rc, i, n; 361 int rc, i, n;
360 u8 key; 362 u8 key;
361 363
362 mw = NULL; 364 mr = NULL;
363 do { 365 do {
364 if (mw) 366 if (mr)
365 rpcrdma_defer_mr_recovery(mw); 367 rpcrdma_mr_defer_recovery(mr);
366 mw = rpcrdma_get_mw(r_xprt); 368 mr = rpcrdma_mr_get(r_xprt);
367 if (!mw) 369 if (!mr)
368 return ERR_PTR(-ENOBUFS); 370 return ERR_PTR(-ENOBUFS);
369 } while (mw->frmr.fr_state != FRMR_IS_INVALID); 371 } while (mr->frwr.fr_state != FRWR_IS_INVALID);
370 frmr = &mw->frmr; 372 frwr = &mr->frwr;
371 frmr->fr_state = FRMR_IS_VALID; 373 frwr->fr_state = FRWR_IS_VALID;
372 mr = frmr->fr_mr; 374
373 reg_wr = &frmr->fr_regwr; 375 if (nsegs > ia->ri_max_frwr_depth)
374 376 nsegs = ia->ri_max_frwr_depth;
375 if (nsegs > ia->ri_max_frmr_depth)
376 nsegs = ia->ri_max_frmr_depth;
377 for (i = 0; i < nsegs;) { 377 for (i = 0; i < nsegs;) {
378 if (seg->mr_page) 378 if (seg->mr_page)
379 sg_set_page(&mw->mw_sg[i], 379 sg_set_page(&mr->mr_sg[i],
380 seg->mr_page, 380 seg->mr_page,
381 seg->mr_len, 381 seg->mr_len,
382 offset_in_page(seg->mr_offset)); 382 offset_in_page(seg->mr_offset));
383 else 383 else
384 sg_set_buf(&mw->mw_sg[i], seg->mr_offset, 384 sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
385 seg->mr_len); 385 seg->mr_len);
386 386
387 ++seg; 387 ++seg;
@@ -392,30 +392,29 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
392 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) 392 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
393 break; 393 break;
394 } 394 }
395 mw->mw_dir = rpcrdma_data_dir(writing); 395 mr->mr_dir = rpcrdma_data_dir(writing);
396 396
397 mw->mw_nents = ib_dma_map_sg(ia->ri_device, mw->mw_sg, i, mw->mw_dir); 397 mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
398 if (!mw->mw_nents) 398 if (!mr->mr_nents)
399 goto out_dmamap_err; 399 goto out_dmamap_err;
400 400
401 n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE); 401 ibmr = frwr->fr_mr;
402 if (unlikely(n != mw->mw_nents)) 402 n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
403 if (unlikely(n != mr->mr_nents))
403 goto out_mapmr_err; 404 goto out_mapmr_err;
404 405
405 dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n", 406 key = (u8)(ibmr->rkey & 0x000000FF);
406 __func__, frmr, mw->mw_nents, mr->length); 407 ib_update_fast_reg_key(ibmr, ++key);
407
408 key = (u8)(mr->rkey & 0x000000FF);
409 ib_update_fast_reg_key(mr, ++key);
410 408
409 reg_wr = &frwr->fr_regwr;
411 reg_wr->wr.next = NULL; 410 reg_wr->wr.next = NULL;
412 reg_wr->wr.opcode = IB_WR_REG_MR; 411 reg_wr->wr.opcode = IB_WR_REG_MR;
413 frmr->fr_cqe.done = frwr_wc_fastreg; 412 frwr->fr_cqe.done = frwr_wc_fastreg;
414 reg_wr->wr.wr_cqe = &frmr->fr_cqe; 413 reg_wr->wr.wr_cqe = &frwr->fr_cqe;
415 reg_wr->wr.num_sge = 0; 414 reg_wr->wr.num_sge = 0;
416 reg_wr->wr.send_flags = 0; 415 reg_wr->wr.send_flags = 0;
417 reg_wr->mr = mr; 416 reg_wr->mr = ibmr;
418 reg_wr->key = mr->rkey; 417 reg_wr->key = ibmr->rkey;
419 reg_wr->access = writing ? 418 reg_wr->access = writing ?
420 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 419 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
421 IB_ACCESS_REMOTE_READ; 420 IB_ACCESS_REMOTE_READ;
@@ -424,47 +423,64 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
424 if (rc) 423 if (rc)
425 goto out_senderr; 424 goto out_senderr;
426 425
427 mw->mw_handle = mr->rkey; 426 mr->mr_handle = ibmr->rkey;
428 mw->mw_length = mr->length; 427 mr->mr_length = ibmr->length;
429 mw->mw_offset = mr->iova; 428 mr->mr_offset = ibmr->iova;
430 429
431 *out = mw; 430 *out = mr;
432 return seg; 431 return seg;
433 432
434out_dmamap_err: 433out_dmamap_err:
435 pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", 434 pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
436 mw->mw_sg, i); 435 mr->mr_sg, i);
437 frmr->fr_state = FRMR_IS_INVALID; 436 frwr->fr_state = FRWR_IS_INVALID;
438 rpcrdma_put_mw(r_xprt, mw); 437 rpcrdma_mr_put(mr);
439 return ERR_PTR(-EIO); 438 return ERR_PTR(-EIO);
440 439
441out_mapmr_err: 440out_mapmr_err:
442 pr_err("rpcrdma: failed to map mr %p (%d/%d)\n", 441 pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
443 frmr->fr_mr, n, mw->mw_nents); 442 frwr->fr_mr, n, mr->mr_nents);
444 rpcrdma_defer_mr_recovery(mw); 443 rpcrdma_mr_defer_recovery(mr);
445 return ERR_PTR(-EIO); 444 return ERR_PTR(-EIO);
446 445
447out_senderr: 446out_senderr:
448 pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc); 447 pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc);
449 rpcrdma_defer_mr_recovery(mw); 448 rpcrdma_mr_defer_recovery(mr);
450 return ERR_PTR(-ENOTCONN); 449 return ERR_PTR(-ENOTCONN);
451} 450}
452 451
452/* Handle a remotely invalidated mr on the @mrs list
453 */
454static void
455frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
456{
457 struct rpcrdma_mr *mr;
458
459 list_for_each_entry(mr, mrs, mr_list)
460 if (mr->mr_handle == rep->rr_inv_rkey) {
461 list_del(&mr->mr_list);
462 trace_xprtrdma_remoteinv(mr);
463 mr->frwr.fr_state = FRWR_IS_INVALID;
464 rpcrdma_mr_unmap_and_put(mr);
465 break; /* only one invalidated MR per RPC */
466 }
467}
468
453/* Invalidate all memory regions that were registered for "req". 469/* Invalidate all memory regions that were registered for "req".
454 * 470 *
455 * Sleeps until it is safe for the host CPU to access the 471 * Sleeps until it is safe for the host CPU to access the
456 * previously mapped memory regions. 472 * previously mapped memory regions.
457 * 473 *
458 * Caller ensures that @mws is not empty before the call. This 474 * Caller ensures that @mrs is not empty before the call. This
459 * function empties the list. 475 * function empties the list.
460 */ 476 */
461static void 477static void
462frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) 478frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
463{ 479{
464 struct ib_send_wr *first, **prev, *last, *bad_wr; 480 struct ib_send_wr *first, **prev, *last, *bad_wr;
465 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 481 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
466 struct rpcrdma_frmr *f; 482 struct rpcrdma_frwr *frwr;
467 struct rpcrdma_mw *mw; 483 struct rpcrdma_mr *mr;
468 int count, rc; 484 int count, rc;
469 485
470 /* ORDER: Invalidate all of the MRs first 486 /* ORDER: Invalidate all of the MRs first
@@ -472,31 +488,27 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
472 * Chain the LOCAL_INV Work Requests and post them with 488 * Chain the LOCAL_INV Work Requests and post them with
473 * a single ib_post_send() call. 489 * a single ib_post_send() call.
474 */ 490 */
475 f = NULL; 491 frwr = NULL;
476 count = 0; 492 count = 0;
477 prev = &first; 493 prev = &first;
478 list_for_each_entry(mw, mws, mw_list) { 494 list_for_each_entry(mr, mrs, mr_list) {
479 mw->frmr.fr_state = FRMR_IS_INVALID; 495 mr->frwr.fr_state = FRWR_IS_INVALID;
480 496
481 if (mw->mw_flags & RPCRDMA_MW_F_RI) 497 frwr = &mr->frwr;
482 continue; 498 trace_xprtrdma_localinv(mr);
483 499
484 f = &mw->frmr; 500 frwr->fr_cqe.done = frwr_wc_localinv;
485 dprintk("RPC: %s: invalidating frmr %p\n", 501 last = &frwr->fr_invwr;
486 __func__, f);
487
488 f->fr_cqe.done = frwr_wc_localinv;
489 last = &f->fr_invwr;
490 memset(last, 0, sizeof(*last)); 502 memset(last, 0, sizeof(*last));
491 last->wr_cqe = &f->fr_cqe; 503 last->wr_cqe = &frwr->fr_cqe;
492 last->opcode = IB_WR_LOCAL_INV; 504 last->opcode = IB_WR_LOCAL_INV;
493 last->ex.invalidate_rkey = mw->mw_handle; 505 last->ex.invalidate_rkey = mr->mr_handle;
494 count++; 506 count++;
495 507
496 *prev = last; 508 *prev = last;
497 prev = &last->next; 509 prev = &last->next;
498 } 510 }
499 if (!f) 511 if (!frwr)
500 goto unmap; 512 goto unmap;
501 513
502 /* Strong send queue ordering guarantees that when the 514 /* Strong send queue ordering guarantees that when the
@@ -504,8 +516,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
504 * are complete. 516 * are complete.
505 */ 517 */
506 last->send_flags = IB_SEND_SIGNALED; 518 last->send_flags = IB_SEND_SIGNALED;
507 f->fr_cqe.done = frwr_wc_localinv_wake; 519 frwr->fr_cqe.done = frwr_wc_localinv_wake;
508 reinit_completion(&f->fr_linv_done); 520 reinit_completion(&frwr->fr_linv_done);
509 521
510 /* Transport disconnect drains the receive CQ before it 522 /* Transport disconnect drains the receive CQ before it
511 * replaces the QP. The RPC reply handler won't call us 523 * replaces the QP. The RPC reply handler won't call us
@@ -515,36 +527,32 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
515 bad_wr = NULL; 527 bad_wr = NULL;
516 rc = ib_post_send(ia->ri_id->qp, first, &bad_wr); 528 rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
517 if (bad_wr != first) 529 if (bad_wr != first)
518 wait_for_completion(&f->fr_linv_done); 530 wait_for_completion(&frwr->fr_linv_done);
519 if (rc) 531 if (rc)
520 goto reset_mrs; 532 goto reset_mrs;
521 533
522 /* ORDER: Now DMA unmap all of the MRs, and return 534 /* ORDER: Now DMA unmap all of the MRs, and return
523 * them to the free MW list. 535 * them to the free MR list.
524 */ 536 */
525unmap: 537unmap:
526 while (!list_empty(mws)) { 538 while (!list_empty(mrs)) {
527 mw = rpcrdma_pop_mw(mws); 539 mr = rpcrdma_mr_pop(mrs);
528 dprintk("RPC: %s: DMA unmapping frmr %p\n", 540 rpcrdma_mr_unmap_and_put(mr);
529 __func__, &mw->frmr);
530 ib_dma_unmap_sg(ia->ri_device,
531 mw->mw_sg, mw->mw_nents, mw->mw_dir);
532 rpcrdma_put_mw(r_xprt, mw);
533 } 541 }
534 return; 542 return;
535 543
536reset_mrs: 544reset_mrs:
537 pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc); 545 pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);
538 546
539 /* Find and reset the MRs in the LOCAL_INV WRs that did not 547 /* Find and reset the MRs in the LOCAL_INV WRs that did not
540 * get posted. 548 * get posted.
541 */ 549 */
542 while (bad_wr) { 550 while (bad_wr) {
543 f = container_of(bad_wr, struct rpcrdma_frmr, 551 frwr = container_of(bad_wr, struct rpcrdma_frwr,
544 fr_invwr); 552 fr_invwr);
545 mw = container_of(f, struct rpcrdma_mw, frmr); 553 mr = container_of(frwr, struct rpcrdma_mr, frwr);
546 554
547 __frwr_reset_mr(ia, mw); 555 __frwr_mr_reset(ia, mr);
548 556
549 bad_wr = bad_wr->next; 557 bad_wr = bad_wr->next;
550 } 558 }
@@ -553,6 +561,7 @@ reset_mrs:
553 561
554const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { 562const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
555 .ro_map = frwr_op_map, 563 .ro_map = frwr_op_map,
564 .ro_reminv = frwr_op_reminv,
556 .ro_unmap_sync = frwr_op_unmap_sync, 565 .ro_unmap_sync = frwr_op_unmap_sync,
557 .ro_recover_mr = frwr_op_recover_mr, 566 .ro_recover_mr = frwr_op_recover_mr,
558 .ro_open = frwr_op_open, 567 .ro_open = frwr_op_open,
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
index 560712bd9fa2..a762d192372b 100644
--- a/net/sunrpc/xprtrdma/module.c
+++ b/net/sunrpc/xprtrdma/module.c
@@ -1,18 +1,20 @@
1/* 1/*
2 * Copyright (c) 2015 Oracle. All rights reserved. 2 * Copyright (c) 2015, 2017 Oracle. All rights reserved.
3 */ 3 */
4 4
5/* rpcrdma.ko module initialization 5/* rpcrdma.ko module initialization
6 */ 6 */
7 7
8#include <linux/types.h>
9#include <linux/compiler.h>
8#include <linux/module.h> 10#include <linux/module.h>
9#include <linux/init.h> 11#include <linux/init.h>
10#include <linux/sunrpc/svc_rdma.h> 12#include <linux/sunrpc/svc_rdma.h>
11#include "xprt_rdma.h"
12 13
13#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 14#include <asm/swab.h>
14# define RPCDBG_FACILITY RPCDBG_TRANS 15
15#endif 16#define CREATE_TRACE_POINTS
17#include "xprt_rdma.h"
16 18
17MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc."); 19MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc.");
18MODULE_DESCRIPTION("RPC/RDMA Transport"); 20MODULE_DESCRIPTION("RPC/RDMA Transport");
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index a3f2ab283aeb..162e5dd82466 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -292,15 +292,15 @@ encode_item_not_present(struct xdr_stream *xdr)
292} 292}
293 293
294static void 294static void
295xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw) 295xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr)
296{ 296{
297 *iptr++ = cpu_to_be32(mw->mw_handle); 297 *iptr++ = cpu_to_be32(mr->mr_handle);
298 *iptr++ = cpu_to_be32(mw->mw_length); 298 *iptr++ = cpu_to_be32(mr->mr_length);
299 xdr_encode_hyper(iptr, mw->mw_offset); 299 xdr_encode_hyper(iptr, mr->mr_offset);
300} 300}
301 301
302static int 302static int
303encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw) 303encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
304{ 304{
305 __be32 *p; 305 __be32 *p;
306 306
@@ -308,12 +308,12 @@ encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw)
308 if (unlikely(!p)) 308 if (unlikely(!p))
309 return -EMSGSIZE; 309 return -EMSGSIZE;
310 310
311 xdr_encode_rdma_segment(p, mw); 311 xdr_encode_rdma_segment(p, mr);
312 return 0; 312 return 0;
313} 313}
314 314
315static int 315static int
316encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw, 316encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
317 u32 position) 317 u32 position)
318{ 318{
319 __be32 *p; 319 __be32 *p;
@@ -324,7 +324,7 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw,
324 324
325 *p++ = xdr_one; /* Item present */ 325 *p++ = xdr_one; /* Item present */
326 *p++ = cpu_to_be32(position); 326 *p++ = cpu_to_be32(position);
327 xdr_encode_rdma_segment(p, mw); 327 xdr_encode_rdma_segment(p, mr);
328 return 0; 328 return 0;
329} 329}
330 330
@@ -348,7 +348,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
348{ 348{
349 struct xdr_stream *xdr = &req->rl_stream; 349 struct xdr_stream *xdr = &req->rl_stream;
350 struct rpcrdma_mr_seg *seg; 350 struct rpcrdma_mr_seg *seg;
351 struct rpcrdma_mw *mw; 351 struct rpcrdma_mr *mr;
352 unsigned int pos; 352 unsigned int pos;
353 int nsegs; 353 int nsegs;
354 354
@@ -363,21 +363,17 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
363 363
364 do { 364 do {
365 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 365 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
366 false, &mw); 366 false, &mr);
367 if (IS_ERR(seg)) 367 if (IS_ERR(seg))
368 return PTR_ERR(seg); 368 return PTR_ERR(seg);
369 rpcrdma_push_mw(mw, &req->rl_registered); 369 rpcrdma_mr_push(mr, &req->rl_registered);
370 370
371 if (encode_read_segment(xdr, mw, pos) < 0) 371 if (encode_read_segment(xdr, mr, pos) < 0)
372 return -EMSGSIZE; 372 return -EMSGSIZE;
373 373
374 dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n", 374 trace_xprtrdma_read_chunk(rqst->rq_task, pos, mr, nsegs);
375 rqst->rq_task->tk_pid, __func__, pos,
376 mw->mw_length, (unsigned long long)mw->mw_offset,
377 mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
378
379 r_xprt->rx_stats.read_chunk_count++; 375 r_xprt->rx_stats.read_chunk_count++;
380 nsegs -= mw->mw_nents; 376 nsegs -= mr->mr_nents;
381 } while (nsegs); 377 } while (nsegs);
382 378
383 return 0; 379 return 0;
@@ -404,7 +400,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
404{ 400{
405 struct xdr_stream *xdr = &req->rl_stream; 401 struct xdr_stream *xdr = &req->rl_stream;
406 struct rpcrdma_mr_seg *seg; 402 struct rpcrdma_mr_seg *seg;
407 struct rpcrdma_mw *mw; 403 struct rpcrdma_mr *mr;
408 int nsegs, nchunks; 404 int nsegs, nchunks;
409 __be32 *segcount; 405 __be32 *segcount;
410 406
@@ -425,23 +421,19 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
425 nchunks = 0; 421 nchunks = 0;
426 do { 422 do {
427 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 423 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
428 true, &mw); 424 true, &mr);
429 if (IS_ERR(seg)) 425 if (IS_ERR(seg))
430 return PTR_ERR(seg); 426 return PTR_ERR(seg);
431 rpcrdma_push_mw(mw, &req->rl_registered); 427 rpcrdma_mr_push(mr, &req->rl_registered);
432 428
433 if (encode_rdma_segment(xdr, mw) < 0) 429 if (encode_rdma_segment(xdr, mr) < 0)
434 return -EMSGSIZE; 430 return -EMSGSIZE;
435 431
436 dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n", 432 trace_xprtrdma_write_chunk(rqst->rq_task, mr, nsegs);
437 rqst->rq_task->tk_pid, __func__,
438 mw->mw_length, (unsigned long long)mw->mw_offset,
439 mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
440
441 r_xprt->rx_stats.write_chunk_count++; 433 r_xprt->rx_stats.write_chunk_count++;
442 r_xprt->rx_stats.total_rdma_request += seg->mr_len; 434 r_xprt->rx_stats.total_rdma_request += mr->mr_length;
443 nchunks++; 435 nchunks++;
444 nsegs -= mw->mw_nents; 436 nsegs -= mr->mr_nents;
445 } while (nsegs); 437 } while (nsegs);
446 438
447 /* Update count of segments in this Write chunk */ 439 /* Update count of segments in this Write chunk */
@@ -468,7 +460,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
468{ 460{
469 struct xdr_stream *xdr = &req->rl_stream; 461 struct xdr_stream *xdr = &req->rl_stream;
470 struct rpcrdma_mr_seg *seg; 462 struct rpcrdma_mr_seg *seg;
471 struct rpcrdma_mw *mw; 463 struct rpcrdma_mr *mr;
472 int nsegs, nchunks; 464 int nsegs, nchunks;
473 __be32 *segcount; 465 __be32 *segcount;
474 466
@@ -487,23 +479,19 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
487 nchunks = 0; 479 nchunks = 0;
488 do { 480 do {
489 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 481 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
490 true, &mw); 482 true, &mr);
491 if (IS_ERR(seg)) 483 if (IS_ERR(seg))
492 return PTR_ERR(seg); 484 return PTR_ERR(seg);
493 rpcrdma_push_mw(mw, &req->rl_registered); 485 rpcrdma_mr_push(mr, &req->rl_registered);
494 486
495 if (encode_rdma_segment(xdr, mw) < 0) 487 if (encode_rdma_segment(xdr, mr) < 0)
496 return -EMSGSIZE; 488 return -EMSGSIZE;
497 489
498 dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n", 490 trace_xprtrdma_reply_chunk(rqst->rq_task, mr, nsegs);
499 rqst->rq_task->tk_pid, __func__,
500 mw->mw_length, (unsigned long long)mw->mw_offset,
501 mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
502
503 r_xprt->rx_stats.reply_chunk_count++; 491 r_xprt->rx_stats.reply_chunk_count++;
504 r_xprt->rx_stats.total_rdma_request += seg->mr_len; 492 r_xprt->rx_stats.total_rdma_request += mr->mr_length;
505 nchunks++; 493 nchunks++;
506 nsegs -= mw->mw_nents; 494 nsegs -= mr->mr_nents;
507 } while (nsegs); 495 } while (nsegs);
508 496
509 /* Update count of segments in the Reply chunk */ 497 /* Update count of segments in the Reply chunk */
@@ -524,9 +512,6 @@ rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
524 struct ib_sge *sge; 512 struct ib_sge *sge;
525 unsigned int count; 513 unsigned int count;
526 514
527 dprintk("RPC: %s: unmapping %u sges for sc=%p\n",
528 __func__, sc->sc_unmap_count, sc);
529
530 /* The first two SGEs contain the transport header and 515 /* The first two SGEs contain the transport header and
531 * the inline buffer. These are always left mapped so 516 * the inline buffer. These are always left mapped so
532 * they can be cheaply re-used. 517 * they can be cheaply re-used.
@@ -754,11 +739,6 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
754 __be32 *p; 739 __be32 *p;
755 int ret; 740 int ret;
756 741
757#if defined(CONFIG_SUNRPC_BACKCHANNEL)
758 if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state))
759 return rpcrdma_bc_marshal_reply(rqst);
760#endif
761
762 rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); 742 rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
763 xdr_init_encode(xdr, &req->rl_hdrbuf, 743 xdr_init_encode(xdr, &req->rl_hdrbuf,
764 req->rl_rdmabuf->rg_base); 744 req->rl_rdmabuf->rg_base);
@@ -821,6 +801,17 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
821 rtype = rpcrdma_areadch; 801 rtype = rpcrdma_areadch;
822 } 802 }
823 803
804 /* If this is a retransmit, discard previously registered
805 * chunks. Very likely the connection has been replaced,
806 * so these registrations are invalid and unusable.
807 */
808 while (unlikely(!list_empty(&req->rl_registered))) {
809 struct rpcrdma_mr *mr;
810
811 mr = rpcrdma_mr_pop(&req->rl_registered);
812 rpcrdma_mr_defer_recovery(mr);
813 }
814
824 /* This implementation supports the following combinations 815 /* This implementation supports the following combinations
825 * of chunk lists in one RPC-over-RDMA Call message: 816 * of chunk lists in one RPC-over-RDMA Call message:
826 * 817 *
@@ -868,10 +859,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
868 if (ret) 859 if (ret)
869 goto out_err; 860 goto out_err;
870 861
871 dprintk("RPC: %5u %s: %s/%s: hdrlen %u rpclen\n", 862 trace_xprtrdma_marshal(rqst, xdr_stream_pos(xdr), rtype, wtype);
872 rqst->rq_task->tk_pid, __func__,
873 transfertypes[rtype], transfertypes[wtype],
874 xdr_stream_pos(xdr));
875 863
876 ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr), 864 ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr),
877 &rqst->rq_snd_buf, rtype); 865 &rqst->rq_snd_buf, rtype);
@@ -926,8 +914,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
926 curlen = rqst->rq_rcv_buf.head[0].iov_len; 914 curlen = rqst->rq_rcv_buf.head[0].iov_len;
927 if (curlen > copy_len) 915 if (curlen > copy_len)
928 curlen = copy_len; 916 curlen = copy_len;
929 dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n", 917 trace_xprtrdma_fixup(rqst, copy_len, curlen);
930 __func__, srcp, copy_len, curlen);
931 srcp += curlen; 918 srcp += curlen;
932 copy_len -= curlen; 919 copy_len -= curlen;
933 920
@@ -947,9 +934,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
947 if (curlen > pagelist_len) 934 if (curlen > pagelist_len)
948 curlen = pagelist_len; 935 curlen = pagelist_len;
949 936
950 dprintk("RPC: %s: page %d" 937 trace_xprtrdma_fixup_pg(rqst, i, srcp,
951 " srcp 0x%p len %d curlen %d\n", 938 copy_len, curlen);
952 __func__, i, srcp, copy_len, curlen);
953 destp = kmap_atomic(ppages[i]); 939 destp = kmap_atomic(ppages[i]);
954 memcpy(destp + page_base, srcp, curlen); 940 memcpy(destp + page_base, srcp, curlen);
955 flush_dcache_page(ppages[i]); 941 flush_dcache_page(ppages[i]);
@@ -984,24 +970,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
984 return fixup_copy_count; 970 return fixup_copy_count;
985} 971}
986 972
987/* Caller must guarantee @rep remains stable during this call.
988 */
989static void
990rpcrdma_mark_remote_invalidation(struct list_head *mws,
991 struct rpcrdma_rep *rep)
992{
993 struct rpcrdma_mw *mw;
994
995 if (!(rep->rr_wc_flags & IB_WC_WITH_INVALIDATE))
996 return;
997
998 list_for_each_entry(mw, mws, mw_list)
999 if (mw->mw_handle == rep->rr_inv_rkey) {
1000 mw->mw_flags = RPCRDMA_MW_F_RI;
1001 break; /* only one invalidated MR per RPC */
1002 }
1003}
1004
1005/* By convention, backchannel calls arrive via rdma_msg type 973/* By convention, backchannel calls arrive via rdma_msg type
1006 * messages, and never populate the chunk lists. This makes 974 * messages, and never populate the chunk lists. This makes
1007 * the RPC/RDMA header small and fixed in size, so it is 975 * the RPC/RDMA header small and fixed in size, so it is
@@ -1058,26 +1026,19 @@ out_short:
1058 1026
1059static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length) 1027static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
1060{ 1028{
1029 u32 handle;
1030 u64 offset;
1061 __be32 *p; 1031 __be32 *p;
1062 1032
1063 p = xdr_inline_decode(xdr, 4 * sizeof(*p)); 1033 p = xdr_inline_decode(xdr, 4 * sizeof(*p));
1064 if (unlikely(!p)) 1034 if (unlikely(!p))
1065 return -EIO; 1035 return -EIO;
1066 1036
1067 ifdebug(FACILITY) { 1037 handle = be32_to_cpup(p++);
1068 u64 offset; 1038 *length = be32_to_cpup(p++);
1069 u32 handle; 1039 xdr_decode_hyper(p, &offset);
1070
1071 handle = be32_to_cpup(p++);
1072 *length = be32_to_cpup(p++);
1073 xdr_decode_hyper(p, &offset);
1074 dprintk("RPC: %s: segment %u@0x%016llx:0x%08x\n",
1075 __func__, *length, (unsigned long long)offset,
1076 handle);
1077 } else {
1078 *length = be32_to_cpup(p + 1);
1079 }
1080 1040
1041 trace_xprtrdma_decode_seg(handle, *length, offset);
1081 return 0; 1042 return 0;
1082} 1043}
1083 1044
@@ -1098,8 +1059,6 @@ static int decode_write_chunk(struct xdr_stream *xdr, u32 *length)
1098 *length += seglength; 1059 *length += seglength;
1099 } 1060 }
1100 1061
1101 dprintk("RPC: %s: segcount=%u, %u bytes\n",
1102 __func__, be32_to_cpup(p), *length);
1103 return 0; 1062 return 0;
1104} 1063}
1105 1064
@@ -1296,8 +1255,7 @@ out:
1296 * being marshaled. 1255 * being marshaled.
1297 */ 1256 */
1298out_badheader: 1257out_badheader:
1299 dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n", 1258 trace_xprtrdma_reply_hdr(rep);
1300 rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc));
1301 r_xprt->rx_stats.bad_reply_count++; 1259 r_xprt->rx_stats.bad_reply_count++;
1302 status = -EIO; 1260 status = -EIO;
1303 goto out; 1261 goto out;
@@ -1339,9 +1297,12 @@ void rpcrdma_deferred_completion(struct work_struct *work)
1339 struct rpcrdma_rep *rep = 1297 struct rpcrdma_rep *rep =
1340 container_of(work, struct rpcrdma_rep, rr_work); 1298 container_of(work, struct rpcrdma_rep, rr_work);
1341 struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst); 1299 struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
1300 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
1342 1301
1343 rpcrdma_mark_remote_invalidation(&req->rl_registered, rep); 1302 trace_xprtrdma_defer_cmp(rep);
1344 rpcrdma_release_rqst(rep->rr_rxprt, req); 1303 if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
1304 r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered);
1305 rpcrdma_release_rqst(r_xprt, req);
1345 rpcrdma_complete_rqst(rep); 1306 rpcrdma_complete_rqst(rep);
1346} 1307}
1347 1308
@@ -1360,8 +1321,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1360 u32 credits; 1321 u32 credits;
1361 __be32 *p; 1322 __be32 *p;
1362 1323
1363 dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
1364
1365 if (rep->rr_hdrbuf.head[0].iov_len == 0) 1324 if (rep->rr_hdrbuf.head[0].iov_len == 0)
1366 goto out_badstatus; 1325 goto out_badstatus;
1367 1326
@@ -1405,8 +1364,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1405 rep->rr_rqst = rqst; 1364 rep->rr_rqst = rqst;
1406 clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); 1365 clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
1407 1366
1408 dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n", 1367 trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
1409 __func__, rep, req, be32_to_cpu(rep->rr_xid));
1410 1368
1411 queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work); 1369 queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work);
1412 return; 1370 return;
@@ -1420,8 +1378,7 @@ out_badstatus:
1420 return; 1378 return;
1421 1379
1422out_badversion: 1380out_badversion:
1423 dprintk("RPC: %s: invalid version %d\n", 1381 trace_xprtrdma_reply_vers(rep);
1424 __func__, be32_to_cpu(rep->rr_vers));
1425 goto repost; 1382 goto repost;
1426 1383
1427/* The RPC transaction has already been terminated, or the header 1384/* The RPC transaction has already been terminated, or the header
@@ -1429,12 +1386,11 @@ out_badversion:
1429 */ 1386 */
1430out_norqst: 1387out_norqst:
1431 spin_unlock(&xprt->recv_lock); 1388 spin_unlock(&xprt->recv_lock);
1432 dprintk("RPC: %s: no match for incoming xid 0x%08x\n", 1389 trace_xprtrdma_reply_rqst(rep);
1433 __func__, be32_to_cpu(rep->rr_xid));
1434 goto repost; 1390 goto repost;
1435 1391
1436out_shortreply: 1392out_shortreply:
1437 dprintk("RPC: %s: short/invalid reply\n", __func__); 1393 trace_xprtrdma_reply_short(rep);
1438 1394
1439/* If no pending RPC transaction was matched, post a replacement 1395/* If no pending RPC transaction was matched, post a replacement
1440 * receive buffer before returning. 1396 * receive buffer before returning.
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 6ee1ad8978f3..4b1ecfe979cf 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -67,8 +67,7 @@
67static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; 67static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
68unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; 68unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
69static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; 69static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
70static unsigned int xprt_rdma_inline_write_padding; 70unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR;
71unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
72int xprt_rdma_pad_optimize; 71int xprt_rdma_pad_optimize;
73 72
74#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 73#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -81,6 +80,7 @@ static unsigned int zero;
81static unsigned int max_padding = PAGE_SIZE; 80static unsigned int max_padding = PAGE_SIZE;
82static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; 81static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
83static unsigned int max_memreg = RPCRDMA_LAST - 1; 82static unsigned int max_memreg = RPCRDMA_LAST - 1;
83static unsigned int dummy;
84 84
85static struct ctl_table_header *sunrpc_table_header; 85static struct ctl_table_header *sunrpc_table_header;
86 86
@@ -114,7 +114,7 @@ static struct ctl_table xr_tunables_table[] = {
114 }, 114 },
115 { 115 {
116 .procname = "rdma_inline_write_padding", 116 .procname = "rdma_inline_write_padding",
117 .data = &xprt_rdma_inline_write_padding, 117 .data = &dummy,
118 .maxlen = sizeof(unsigned int), 118 .maxlen = sizeof(unsigned int),
119 .mode = 0644, 119 .mode = 0644,
120 .proc_handler = proc_dointvec_minmax, 120 .proc_handler = proc_dointvec_minmax,
@@ -259,13 +259,10 @@ xprt_rdma_connect_worker(struct work_struct *work)
259 259
260 xprt_clear_connected(xprt); 260 xprt_clear_connected(xprt);
261 261
262 dprintk("RPC: %s: %sconnect\n", __func__,
263 r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
264 rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); 262 rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
265 if (rc) 263 if (rc)
266 xprt_wake_pending_tasks(xprt, rc); 264 xprt_wake_pending_tasks(xprt, rc);
267 265
268 dprintk("RPC: %s: exit\n", __func__);
269 xprt_clear_connecting(xprt); 266 xprt_clear_connecting(xprt);
270} 267}
271 268
@@ -275,7 +272,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
275 struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, 272 struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt,
276 rx_xprt); 273 rx_xprt);
277 274
278 pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt); 275 trace_xprtrdma_inject_dsc(r_xprt);
279 rdma_disconnect(r_xprt->rx_ia.ri_id); 276 rdma_disconnect(r_xprt->rx_ia.ri_id);
280} 277}
281 278
@@ -295,7 +292,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
295{ 292{
296 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 293 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
297 294
298 dprintk("RPC: %s: called\n", __func__); 295 trace_xprtrdma_destroy(r_xprt);
299 296
300 cancel_delayed_work_sync(&r_xprt->rx_connect_worker); 297 cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
301 298
@@ -306,11 +303,8 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
306 rpcrdma_ia_close(&r_xprt->rx_ia); 303 rpcrdma_ia_close(&r_xprt->rx_ia);
307 304
308 xprt_rdma_free_addresses(xprt); 305 xprt_rdma_free_addresses(xprt);
309
310 xprt_free(xprt); 306 xprt_free(xprt);
311 307
312 dprintk("RPC: %s: returning\n", __func__);
313
314 module_put(THIS_MODULE); 308 module_put(THIS_MODULE);
315} 309}
316 310
@@ -361,9 +355,7 @@ xprt_setup_rdma(struct xprt_create *args)
361 /* 355 /*
362 * Set up RDMA-specific connect data. 356 * Set up RDMA-specific connect data.
363 */ 357 */
364 358 sap = args->dstaddr;
365 sap = (struct sockaddr *)&cdata.addr;
366 memcpy(sap, args->dstaddr, args->addrlen);
367 359
368 /* Ensure xprt->addr holds valid server TCP (not RDMA) 360 /* Ensure xprt->addr holds valid server TCP (not RDMA)
369 * address, for any side protocols which peek at it */ 361 * address, for any side protocols which peek at it */
@@ -373,6 +365,7 @@ xprt_setup_rdma(struct xprt_create *args)
373 365
374 if (rpc_get_port(sap)) 366 if (rpc_get_port(sap))
375 xprt_set_bound(xprt); 367 xprt_set_bound(xprt);
368 xprt_rdma_format_addresses(xprt, sap);
376 369
377 cdata.max_requests = xprt->max_reqs; 370 cdata.max_requests = xprt->max_reqs;
378 371
@@ -387,8 +380,6 @@ xprt_setup_rdma(struct xprt_create *args)
387 if (cdata.inline_rsize > cdata.rsize) 380 if (cdata.inline_rsize > cdata.rsize)
388 cdata.inline_rsize = cdata.rsize; 381 cdata.inline_rsize = cdata.rsize;
389 382
390 cdata.padding = xprt_rdma_inline_write_padding;
391
392 /* 383 /*
393 * Create new transport instance, which includes initialized 384 * Create new transport instance, which includes initialized
394 * o ia 385 * o ia
@@ -398,7 +389,7 @@ xprt_setup_rdma(struct xprt_create *args)
398 389
399 new_xprt = rpcx_to_rdmax(xprt); 390 new_xprt = rpcx_to_rdmax(xprt);
400 391
401 rc = rpcrdma_ia_open(new_xprt, sap); 392 rc = rpcrdma_ia_open(new_xprt);
402 if (rc) 393 if (rc)
403 goto out1; 394 goto out1;
404 395
@@ -407,31 +398,19 @@ xprt_setup_rdma(struct xprt_create *args)
407 */ 398 */
408 new_xprt->rx_data = cdata; 399 new_xprt->rx_data = cdata;
409 new_ep = &new_xprt->rx_ep; 400 new_ep = &new_xprt->rx_ep;
410 new_ep->rep_remote_addr = cdata.addr;
411 401
412 rc = rpcrdma_ep_create(&new_xprt->rx_ep, 402 rc = rpcrdma_ep_create(&new_xprt->rx_ep,
413 &new_xprt->rx_ia, &new_xprt->rx_data); 403 &new_xprt->rx_ia, &new_xprt->rx_data);
414 if (rc) 404 if (rc)
415 goto out2; 405 goto out2;
416 406
417 /*
418 * Allocate pre-registered send and receive buffers for headers and
419 * any inline data. Also specify any padding which will be provided
420 * from a preregistered zero buffer.
421 */
422 rc = rpcrdma_buffer_create(new_xprt); 407 rc = rpcrdma_buffer_create(new_xprt);
423 if (rc) 408 if (rc)
424 goto out3; 409 goto out3;
425 410
426 /*
427 * Register a callback for connection events. This is necessary because
428 * connection loss notification is async. We also catch connection loss
429 * when reaping receives.
430 */
431 INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, 411 INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
432 xprt_rdma_connect_worker); 412 xprt_rdma_connect_worker);
433 413
434 xprt_rdma_format_addresses(xprt, sap);
435 xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); 414 xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt);
436 if (xprt->max_payload == 0) 415 if (xprt->max_payload == 0)
437 goto out4; 416 goto out4;
@@ -445,16 +424,19 @@ xprt_setup_rdma(struct xprt_create *args)
445 dprintk("RPC: %s: %s:%s\n", __func__, 424 dprintk("RPC: %s: %s:%s\n", __func__,
446 xprt->address_strings[RPC_DISPLAY_ADDR], 425 xprt->address_strings[RPC_DISPLAY_ADDR],
447 xprt->address_strings[RPC_DISPLAY_PORT]); 426 xprt->address_strings[RPC_DISPLAY_PORT]);
427 trace_xprtrdma_create(new_xprt);
448 return xprt; 428 return xprt;
449 429
450out4: 430out4:
451 xprt_rdma_free_addresses(xprt); 431 rpcrdma_buffer_destroy(&new_xprt->rx_buf);
452 rc = -EINVAL; 432 rc = -ENODEV;
453out3: 433out3:
454 rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); 434 rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
455out2: 435out2:
456 rpcrdma_ia_close(&new_xprt->rx_ia); 436 rpcrdma_ia_close(&new_xprt->rx_ia);
457out1: 437out1:
438 trace_xprtrdma_destroy(new_xprt);
439 xprt_rdma_free_addresses(xprt);
458 xprt_free(xprt); 440 xprt_free(xprt);
459 return ERR_PTR(rc); 441 return ERR_PTR(rc);
460} 442}
@@ -488,16 +470,34 @@ xprt_rdma_close(struct rpc_xprt *xprt)
488 rpcrdma_ep_disconnect(ep, ia); 470 rpcrdma_ep_disconnect(ep, ia);
489} 471}
490 472
473/**
474 * xprt_rdma_set_port - update server port with rpcbind result
475 * @xprt: controlling RPC transport
476 * @port: new port value
477 *
478 * Transport connect status is unchanged.
479 */
491static void 480static void
492xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) 481xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
493{ 482{
494 struct sockaddr_in *sap; 483 struct sockaddr *sap = (struct sockaddr *)&xprt->addr;
484 char buf[8];
495 485
496 sap = (struct sockaddr_in *)&xprt->addr; 486 dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n",
497 sap->sin_port = htons(port); 487 __func__, xprt,
498 sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr; 488 xprt->address_strings[RPC_DISPLAY_ADDR],
499 sap->sin_port = htons(port); 489 xprt->address_strings[RPC_DISPLAY_PORT],
500 dprintk("RPC: %s: %u\n", __func__, port); 490 port);
491
492 rpc_set_port(sap, port);
493
494 kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
495 snprintf(buf, sizeof(buf), "%u", port);
496 xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
497
498 kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
499 snprintf(buf, sizeof(buf), "%4hx", port);
500 xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
501} 501}
502 502
503/** 503/**
@@ -516,8 +516,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
516static void 516static void
517xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task) 517xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
518{ 518{
519 dprintk("RPC: %5u %s: xprt = %p\n", task->tk_pid, __func__, xprt);
520
521 xprt_force_disconnect(xprt); 519 xprt_force_disconnect(xprt);
522} 520}
523 521
@@ -640,7 +638,7 @@ xprt_rdma_allocate(struct rpc_task *task)
640 638
641 req = rpcrdma_buffer_get(&r_xprt->rx_buf); 639 req = rpcrdma_buffer_get(&r_xprt->rx_buf);
642 if (req == NULL) 640 if (req == NULL)
643 return -ENOMEM; 641 goto out_get;
644 642
645 flags = RPCRDMA_DEF_GFP; 643 flags = RPCRDMA_DEF_GFP;
646 if (RPC_IS_SWAPPER(task)) 644 if (RPC_IS_SWAPPER(task))
@@ -653,19 +651,18 @@ xprt_rdma_allocate(struct rpc_task *task)
653 if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) 651 if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
654 goto out_fail; 652 goto out_fail;
655 653
656 dprintk("RPC: %5u %s: send size = %zd, recv size = %zd, req = %p\n",
657 task->tk_pid, __func__, rqst->rq_callsize,
658 rqst->rq_rcvsize, req);
659
660 req->rl_cpu = smp_processor_id(); 654 req->rl_cpu = smp_processor_id();
661 req->rl_connect_cookie = 0; /* our reserved value */ 655 req->rl_connect_cookie = 0; /* our reserved value */
662 rpcrdma_set_xprtdata(rqst, req); 656 rpcrdma_set_xprtdata(rqst, req);
663 rqst->rq_buffer = req->rl_sendbuf->rg_base; 657 rqst->rq_buffer = req->rl_sendbuf->rg_base;
664 rqst->rq_rbuffer = req->rl_recvbuf->rg_base; 658 rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
659 trace_xprtrdma_allocate(task, req);
665 return 0; 660 return 0;
666 661
667out_fail: 662out_fail:
668 rpcrdma_buffer_put(req); 663 rpcrdma_buffer_put(req);
664out_get:
665 trace_xprtrdma_allocate(task, NULL);
669 return -ENOMEM; 666 return -ENOMEM;
670} 667}
671 668
@@ -682,13 +679,9 @@ xprt_rdma_free(struct rpc_task *task)
682 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); 679 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
683 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 680 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
684 681
685 if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags))
686 return;
687
688 dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
689
690 if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) 682 if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
691 rpcrdma_release_rqst(r_xprt, req); 683 rpcrdma_release_rqst(r_xprt, req);
684 trace_xprtrdma_rpc_done(task, req);
692 rpcrdma_buffer_put(req); 685 rpcrdma_buffer_put(req);
693} 686}
694 687
@@ -698,22 +691,12 @@ xprt_rdma_free(struct rpc_task *task)
698 * 691 *
699 * Caller holds the transport's write lock. 692 * Caller holds the transport's write lock.
700 * 693 *
701 * Return values: 694 * Returns:
702 * 0: The request has been sent 695 * %0 if the RPC message has been sent
703 * ENOTCONN: Caller needs to invoke connect logic then call again 696 * %-ENOTCONN if the caller should reconnect and call again
704 * ENOBUFS: Call again later to send the request 697 * %-ENOBUFS if the caller should call again later
705 * EIO: A permanent error occurred. The request was not sent, 698 * %-EIO if a permanent error occurred and the request was not
706 * and don't try it again 699 * sent. Do not try to send this message again.
707 *
708 * send_request invokes the meat of RPC RDMA. It must do the following:
709 *
710 * 1. Marshal the RPC request into an RPC RDMA request, which means
711 * putting a header in front of data, and creating IOVs for RDMA
712 * from those in the request.
713 * 2. In marshaling, detect opportunities for RDMA, and use them.
714 * 3. Post a recv message to set up asynch completion, then send
715 * the request (rpcrdma_ep_post).
716 * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP).
717 */ 700 */
718static int 701static int
719xprt_rdma_send_request(struct rpc_task *task) 702xprt_rdma_send_request(struct rpc_task *task)
@@ -724,14 +707,14 @@ xprt_rdma_send_request(struct rpc_task *task)
724 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 707 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
725 int rc = 0; 708 int rc = 0;
726 709
710#if defined(CONFIG_SUNRPC_BACKCHANNEL)
711 if (unlikely(!rqst->rq_buffer))
712 return xprt_rdma_bc_send_reply(rqst);
713#endif /* CONFIG_SUNRPC_BACKCHANNEL */
714
727 if (!xprt_connected(xprt)) 715 if (!xprt_connected(xprt))
728 goto drop_connection; 716 goto drop_connection;
729 717
730 /* On retransmit, remove any previously registered chunks */
731 if (unlikely(!list_empty(&req->rl_registered)))
732 r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
733 &req->rl_registered);
734
735 rc = rpcrdma_marshal_req(r_xprt, rqst); 718 rc = rpcrdma_marshal_req(r_xprt, rqst);
736 if (rc < 0) 719 if (rc < 0)
737 goto failed_marshal; 720 goto failed_marshal;
@@ -744,7 +727,7 @@ xprt_rdma_send_request(struct rpc_task *task)
744 goto drop_connection; 727 goto drop_connection;
745 req->rl_connect_cookie = xprt->connect_cookie; 728 req->rl_connect_cookie = xprt->connect_cookie;
746 729
747 set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); 730 __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
748 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) 731 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
749 goto drop_connection; 732 goto drop_connection;
750 733
@@ -904,8 +887,7 @@ int xprt_rdma_init(void)
904 "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", 887 "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
905 xprt_rdma_slot_table_entries, 888 xprt_rdma_slot_table_entries,
906 xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); 889 xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
907 dprintk("\tPadding %d\n\tMemreg %d\n", 890 dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy);
908 xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
909 891
910#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 892#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
911 if (!sunrpc_table_header) 893 if (!sunrpc_table_header)
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 8607c029c0dd..f4eb63e8e689 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -71,8 +71,8 @@
71/* 71/*
72 * internal functions 72 * internal functions
73 */ 73 */
74static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt); 74static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
75static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf); 75static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
76static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); 76static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
77 77
78struct workqueue_struct *rpcrdma_receive_wq __read_mostly; 78struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
@@ -108,7 +108,10 @@ static void
108rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) 108rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
109{ 109{
110 struct rpcrdma_ep *ep = context; 110 struct rpcrdma_ep *ep = context;
111 struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt,
112 rx_ep);
111 113
114 trace_xprtrdma_qp_error(r_xprt, event);
112 pr_err("rpcrdma: %s on device %s ep %p\n", 115 pr_err("rpcrdma: %s on device %s ep %p\n",
113 ib_event_msg(event->event), event->device->name, context); 116 ib_event_msg(event->event), event->device->name, context);
114 117
@@ -133,6 +136,7 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
133 container_of(cqe, struct rpcrdma_sendctx, sc_cqe); 136 container_of(cqe, struct rpcrdma_sendctx, sc_cqe);
134 137
135 /* WARNING: Only wr_cqe and status are reliable at this point */ 138 /* WARNING: Only wr_cqe and status are reliable at this point */
139 trace_xprtrdma_wc_send(sc, wc);
136 if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) 140 if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
137 pr_err("rpcrdma: Send: %s (%u/0x%x)\n", 141 pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
138 ib_wc_status_msg(wc->status), 142 ib_wc_status_msg(wc->status),
@@ -155,13 +159,11 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
155 rr_cqe); 159 rr_cqe);
156 160
157 /* WARNING: Only wr_id and status are reliable at this point */ 161 /* WARNING: Only wr_id and status are reliable at this point */
162 trace_xprtrdma_wc_receive(rep, wc);
158 if (wc->status != IB_WC_SUCCESS) 163 if (wc->status != IB_WC_SUCCESS)
159 goto out_fail; 164 goto out_fail;
160 165
161 /* status == SUCCESS means all fields in wc are trustworthy */ 166 /* status == SUCCESS means all fields in wc are trustworthy */
162 dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
163 __func__, rep, wc->byte_len);
164
165 rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); 167 rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len);
166 rep->rr_wc_flags = wc->wc_flags; 168 rep->rr_wc_flags = wc->wc_flags;
167 rep->rr_inv_rkey = wc->ex.invalidate_rkey; 169 rep->rr_inv_rkey = wc->ex.invalidate_rkey;
@@ -192,7 +194,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
192 unsigned int rsize, wsize; 194 unsigned int rsize, wsize;
193 195
194 /* Default settings for RPC-over-RDMA Version One */ 196 /* Default settings for RPC-over-RDMA Version One */
195 r_xprt->rx_ia.ri_reminv_expected = false;
196 r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize; 197 r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
197 rsize = RPCRDMA_V1_DEF_INLINE_SIZE; 198 rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
198 wsize = RPCRDMA_V1_DEF_INLINE_SIZE; 199 wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
@@ -200,7 +201,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
200 if (pmsg && 201 if (pmsg &&
201 pmsg->cp_magic == rpcrdma_cmp_magic && 202 pmsg->cp_magic == rpcrdma_cmp_magic &&
202 pmsg->cp_version == RPCRDMA_CMP_VERSION) { 203 pmsg->cp_version == RPCRDMA_CMP_VERSION) {
203 r_xprt->rx_ia.ri_reminv_expected = true;
204 r_xprt->rx_ia.ri_implicit_roundup = true; 204 r_xprt->rx_ia.ri_implicit_roundup = true;
205 rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); 205 rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
206 wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); 206 wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
@@ -221,11 +221,9 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
221 struct rpcrdma_xprt *xprt = id->context; 221 struct rpcrdma_xprt *xprt = id->context;
222 struct rpcrdma_ia *ia = &xprt->rx_ia; 222 struct rpcrdma_ia *ia = &xprt->rx_ia;
223 struct rpcrdma_ep *ep = &xprt->rx_ep; 223 struct rpcrdma_ep *ep = &xprt->rx_ep;
224#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
225 struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
226#endif
227 int connstate = 0; 224 int connstate = 0;
228 225
226 trace_xprtrdma_conn_upcall(xprt, event);
229 switch (event->event) { 227 switch (event->event) {
230 case RDMA_CM_EVENT_ADDR_RESOLVED: 228 case RDMA_CM_EVENT_ADDR_RESOLVED:
231 case RDMA_CM_EVENT_ROUTE_RESOLVED: 229 case RDMA_CM_EVENT_ROUTE_RESOLVED:
@@ -234,21 +232,17 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
234 break; 232 break;
235 case RDMA_CM_EVENT_ADDR_ERROR: 233 case RDMA_CM_EVENT_ADDR_ERROR:
236 ia->ri_async_rc = -EHOSTUNREACH; 234 ia->ri_async_rc = -EHOSTUNREACH;
237 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
238 __func__, ep);
239 complete(&ia->ri_done); 235 complete(&ia->ri_done);
240 break; 236 break;
241 case RDMA_CM_EVENT_ROUTE_ERROR: 237 case RDMA_CM_EVENT_ROUTE_ERROR:
242 ia->ri_async_rc = -ENETUNREACH; 238 ia->ri_async_rc = -ENETUNREACH;
243 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
244 __func__, ep);
245 complete(&ia->ri_done); 239 complete(&ia->ri_done);
246 break; 240 break;
247 case RDMA_CM_EVENT_DEVICE_REMOVAL: 241 case RDMA_CM_EVENT_DEVICE_REMOVAL:
248#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 242#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
249 pr_info("rpcrdma: removing device %s for %pIS:%u\n", 243 pr_info("rpcrdma: removing device %s for %s:%s\n",
250 ia->ri_device->name, 244 ia->ri_device->name,
251 sap, rpc_get_port(sap)); 245 rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt));
252#endif 246#endif
253 set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags); 247 set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
254 ep->rep_connected = -ENODEV; 248 ep->rep_connected = -ENODEV;
@@ -271,8 +265,8 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
271 connstate = -ENETDOWN; 265 connstate = -ENETDOWN;
272 goto connected; 266 goto connected;
273 case RDMA_CM_EVENT_REJECTED: 267 case RDMA_CM_EVENT_REJECTED:
274 dprintk("rpcrdma: connection to %pIS:%u rejected: %s\n", 268 dprintk("rpcrdma: connection to %s:%s rejected: %s\n",
275 sap, rpc_get_port(sap), 269 rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
276 rdma_reject_msg(id, event->status)); 270 rdma_reject_msg(id, event->status));
277 connstate = -ECONNREFUSED; 271 connstate = -ECONNREFUSED;
278 if (event->status == IB_CM_REJ_STALE_CONN) 272 if (event->status == IB_CM_REJ_STALE_CONN)
@@ -287,8 +281,9 @@ connected:
287 wake_up_all(&ep->rep_connect_wait); 281 wake_up_all(&ep->rep_connect_wait);
288 /*FALLTHROUGH*/ 282 /*FALLTHROUGH*/
289 default: 283 default:
290 dprintk("RPC: %s: %pIS:%u on %s/%s (ep 0x%p): %s\n", 284 dprintk("RPC: %s: %s:%s on %s/%s (ep 0x%p): %s\n",
291 __func__, sap, rpc_get_port(sap), 285 __func__,
286 rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
292 ia->ri_device->name, ia->ri_ops->ro_displayname, 287 ia->ri_device->name, ia->ri_ops->ro_displayname,
293 ep, rdma_event_msg(event->event)); 288 ep, rdma_event_msg(event->event));
294 break; 289 break;
@@ -298,13 +293,14 @@ connected:
298} 293}
299 294
300static struct rdma_cm_id * 295static struct rdma_cm_id *
301rpcrdma_create_id(struct rpcrdma_xprt *xprt, 296rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
302 struct rpcrdma_ia *ia, struct sockaddr *addr)
303{ 297{
304 unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1; 298 unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1;
305 struct rdma_cm_id *id; 299 struct rdma_cm_id *id;
306 int rc; 300 int rc;
307 301
302 trace_xprtrdma_conn_start(xprt);
303
308 init_completion(&ia->ri_done); 304 init_completion(&ia->ri_done);
309 init_completion(&ia->ri_remove_done); 305 init_completion(&ia->ri_remove_done);
310 306
@@ -318,7 +314,9 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
318 } 314 }
319 315
320 ia->ri_async_rc = -ETIMEDOUT; 316 ia->ri_async_rc = -ETIMEDOUT;
321 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); 317 rc = rdma_resolve_addr(id, NULL,
318 (struct sockaddr *)&xprt->rx_xprt.addr,
319 RDMA_RESOLVE_TIMEOUT);
322 if (rc) { 320 if (rc) {
323 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", 321 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
324 __func__, rc); 322 __func__, rc);
@@ -326,8 +324,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
326 } 324 }
327 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); 325 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
328 if (rc < 0) { 326 if (rc < 0) {
329 dprintk("RPC: %s: wait() exited: %i\n", 327 trace_xprtrdma_conn_tout(xprt);
330 __func__, rc);
331 goto out; 328 goto out;
332 } 329 }
333 330
@@ -344,8 +341,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
344 } 341 }
345 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); 342 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
346 if (rc < 0) { 343 if (rc < 0) {
347 dprintk("RPC: %s: wait() exited: %i\n", 344 trace_xprtrdma_conn_tout(xprt);
348 __func__, rc);
349 goto out; 345 goto out;
350 } 346 }
351 rc = ia->ri_async_rc; 347 rc = ia->ri_async_rc;
@@ -365,19 +361,18 @@ out:
365 361
366/** 362/**
367 * rpcrdma_ia_open - Open and initialize an Interface Adapter. 363 * rpcrdma_ia_open - Open and initialize an Interface Adapter.
368 * @xprt: controlling transport 364 * @xprt: transport with IA to (re)initialize
369 * @addr: IP address of remote peer
370 * 365 *
371 * Returns 0 on success, negative errno if an appropriate 366 * Returns 0 on success, negative errno if an appropriate
372 * Interface Adapter could not be found and opened. 367 * Interface Adapter could not be found and opened.
373 */ 368 */
374int 369int
375rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr) 370rpcrdma_ia_open(struct rpcrdma_xprt *xprt)
376{ 371{
377 struct rpcrdma_ia *ia = &xprt->rx_ia; 372 struct rpcrdma_ia *ia = &xprt->rx_ia;
378 int rc; 373 int rc;
379 374
380 ia->ri_id = rpcrdma_create_id(xprt, ia, addr); 375 ia->ri_id = rpcrdma_create_id(xprt, ia);
381 if (IS_ERR(ia->ri_id)) { 376 if (IS_ERR(ia->ri_id)) {
382 rc = PTR_ERR(ia->ri_id); 377 rc = PTR_ERR(ia->ri_id);
383 goto out_err; 378 goto out_err;
@@ -392,7 +387,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr)
392 } 387 }
393 388
394 switch (xprt_rdma_memreg_strategy) { 389 switch (xprt_rdma_memreg_strategy) {
395 case RPCRDMA_FRMR: 390 case RPCRDMA_FRWR:
396 if (frwr_is_supported(ia)) { 391 if (frwr_is_supported(ia)) {
397 ia->ri_ops = &rpcrdma_frwr_memreg_ops; 392 ia->ri_ops = &rpcrdma_frwr_memreg_ops;
398 break; 393 break;
@@ -462,10 +457,12 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
462 rpcrdma_dma_unmap_regbuf(req->rl_sendbuf); 457 rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
463 rpcrdma_dma_unmap_regbuf(req->rl_recvbuf); 458 rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
464 } 459 }
465 rpcrdma_destroy_mrs(buf); 460 rpcrdma_mrs_destroy(buf);
466 461
467 /* Allow waiters to continue */ 462 /* Allow waiters to continue */
468 complete(&ia->ri_remove_done); 463 complete(&ia->ri_remove_done);
464
465 trace_xprtrdma_remove(r_xprt);
469} 466}
470 467
471/** 468/**
@@ -476,7 +473,6 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
476void 473void
477rpcrdma_ia_close(struct rpcrdma_ia *ia) 474rpcrdma_ia_close(struct rpcrdma_ia *ia)
478{ 475{
479 dprintk("RPC: %s: entering\n", __func__);
480 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { 476 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
481 if (ia->ri_id->qp) 477 if (ia->ri_id->qp)
482 rdma_destroy_qp(ia->ri_id); 478 rdma_destroy_qp(ia->ri_id);
@@ -630,9 +626,6 @@ out1:
630void 626void
631rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) 627rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
632{ 628{
633 dprintk("RPC: %s: entering, connected is %d\n",
634 __func__, ep->rep_connected);
635
636 cancel_delayed_work_sync(&ep->rep_connect_worker); 629 cancel_delayed_work_sync(&ep->rep_connect_worker);
637 630
638 if (ia->ri_id->qp) { 631 if (ia->ri_id->qp) {
@@ -653,13 +646,12 @@ static int
653rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, 646rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
654 struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) 647 struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
655{ 648{
656 struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
657 int rc, err; 649 int rc, err;
658 650
659 pr_info("%s: r_xprt = %p\n", __func__, r_xprt); 651 trace_xprtrdma_reinsert(r_xprt);
660 652
661 rc = -EHOSTUNREACH; 653 rc = -EHOSTUNREACH;
662 if (rpcrdma_ia_open(r_xprt, sap)) 654 if (rpcrdma_ia_open(r_xprt))
663 goto out1; 655 goto out1;
664 656
665 rc = -ENOMEM; 657 rc = -ENOMEM;
@@ -676,7 +668,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
676 goto out3; 668 goto out3;
677 } 669 }
678 670
679 rpcrdma_create_mrs(r_xprt); 671 rpcrdma_mrs_create(r_xprt);
680 return 0; 672 return 0;
681 673
682out3: 674out3:
@@ -691,16 +683,15 @@ static int
691rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, 683rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
692 struct rpcrdma_ia *ia) 684 struct rpcrdma_ia *ia)
693{ 685{
694 struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
695 struct rdma_cm_id *id, *old; 686 struct rdma_cm_id *id, *old;
696 int err, rc; 687 int err, rc;
697 688
698 dprintk("RPC: %s: reconnecting...\n", __func__); 689 trace_xprtrdma_reconnect(r_xprt);
699 690
700 rpcrdma_ep_disconnect(ep, ia); 691 rpcrdma_ep_disconnect(ep, ia);
701 692
702 rc = -EHOSTUNREACH; 693 rc = -EHOSTUNREACH;
703 id = rpcrdma_create_id(r_xprt, ia, sap); 694 id = rpcrdma_create_id(r_xprt, ia);
704 if (IS_ERR(id)) 695 if (IS_ERR(id))
705 goto out; 696 goto out;
706 697
@@ -817,16 +808,14 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
817 int rc; 808 int rc;
818 809
819 rc = rdma_disconnect(ia->ri_id); 810 rc = rdma_disconnect(ia->ri_id);
820 if (!rc) { 811 if (!rc)
821 /* returns without wait if not connected */ 812 /* returns without wait if not connected */
822 wait_event_interruptible(ep->rep_connect_wait, 813 wait_event_interruptible(ep->rep_connect_wait,
823 ep->rep_connected != 1); 814 ep->rep_connected != 1);
824 dprintk("RPC: %s: after wait, %sconnected\n", __func__, 815 else
825 (ep->rep_connected == 1) ? "still " : "dis");
826 } else {
827 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
828 ep->rep_connected = rc; 816 ep->rep_connected = rc;
829 } 817 trace_xprtrdma_disconnect(container_of(ep, struct rpcrdma_xprt,
818 rx_ep), rc);
830 819
831 ib_drain_qp(ia->ri_id->qp); 820 ib_drain_qp(ia->ri_id->qp);
832} 821}
@@ -998,15 +987,15 @@ rpcrdma_mr_recovery_worker(struct work_struct *work)
998{ 987{
999 struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, 988 struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
1000 rb_recovery_worker.work); 989 rb_recovery_worker.work);
1001 struct rpcrdma_mw *mw; 990 struct rpcrdma_mr *mr;
1002 991
1003 spin_lock(&buf->rb_recovery_lock); 992 spin_lock(&buf->rb_recovery_lock);
1004 while (!list_empty(&buf->rb_stale_mrs)) { 993 while (!list_empty(&buf->rb_stale_mrs)) {
1005 mw = rpcrdma_pop_mw(&buf->rb_stale_mrs); 994 mr = rpcrdma_mr_pop(&buf->rb_stale_mrs);
1006 spin_unlock(&buf->rb_recovery_lock); 995 spin_unlock(&buf->rb_recovery_lock);
1007 996
1008 dprintk("RPC: %s: recovering MR %p\n", __func__, mw); 997 trace_xprtrdma_recover_mr(mr);
1009 mw->mw_xprt->rx_ia.ri_ops->ro_recover_mr(mw); 998 mr->mr_xprt->rx_ia.ri_ops->ro_recover_mr(mr);
1010 999
1011 spin_lock(&buf->rb_recovery_lock); 1000 spin_lock(&buf->rb_recovery_lock);
1012 } 1001 }
@@ -1014,20 +1003,20 @@ rpcrdma_mr_recovery_worker(struct work_struct *work)
1014} 1003}
1015 1004
1016void 1005void
1017rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw) 1006rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr)
1018{ 1007{
1019 struct rpcrdma_xprt *r_xprt = mw->mw_xprt; 1008 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
1020 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1009 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1021 1010
1022 spin_lock(&buf->rb_recovery_lock); 1011 spin_lock(&buf->rb_recovery_lock);
1023 rpcrdma_push_mw(mw, &buf->rb_stale_mrs); 1012 rpcrdma_mr_push(mr, &buf->rb_stale_mrs);
1024 spin_unlock(&buf->rb_recovery_lock); 1013 spin_unlock(&buf->rb_recovery_lock);
1025 1014
1026 schedule_delayed_work(&buf->rb_recovery_worker, 0); 1015 schedule_delayed_work(&buf->rb_recovery_worker, 0);
1027} 1016}
1028 1017
1029static void 1018static void
1030rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt) 1019rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
1031{ 1020{
1032 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1021 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1033 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 1022 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
@@ -1036,32 +1025,32 @@ rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt)
1036 LIST_HEAD(all); 1025 LIST_HEAD(all);
1037 1026
1038 for (count = 0; count < 32; count++) { 1027 for (count = 0; count < 32; count++) {
1039 struct rpcrdma_mw *mw; 1028 struct rpcrdma_mr *mr;
1040 int rc; 1029 int rc;
1041 1030
1042 mw = kzalloc(sizeof(*mw), GFP_KERNEL); 1031 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1043 if (!mw) 1032 if (!mr)
1044 break; 1033 break;
1045 1034
1046 rc = ia->ri_ops->ro_init_mr(ia, mw); 1035 rc = ia->ri_ops->ro_init_mr(ia, mr);
1047 if (rc) { 1036 if (rc) {
1048 kfree(mw); 1037 kfree(mr);
1049 break; 1038 break;
1050 } 1039 }
1051 1040
1052 mw->mw_xprt = r_xprt; 1041 mr->mr_xprt = r_xprt;
1053 1042
1054 list_add(&mw->mw_list, &free); 1043 list_add(&mr->mr_list, &free);
1055 list_add(&mw->mw_all, &all); 1044 list_add(&mr->mr_all, &all);
1056 } 1045 }
1057 1046
1058 spin_lock(&buf->rb_mwlock); 1047 spin_lock(&buf->rb_mrlock);
1059 list_splice(&free, &buf->rb_mws); 1048 list_splice(&free, &buf->rb_mrs);
1060 list_splice(&all, &buf->rb_all); 1049 list_splice(&all, &buf->rb_all);
1061 r_xprt->rx_stats.mrs_allocated += count; 1050 r_xprt->rx_stats.mrs_allocated += count;
1062 spin_unlock(&buf->rb_mwlock); 1051 spin_unlock(&buf->rb_mrlock);
1063 1052
1064 dprintk("RPC: %s: created %u MRs\n", __func__, count); 1053 trace_xprtrdma_createmrs(r_xprt, count);
1065} 1054}
1066 1055
1067static void 1056static void
@@ -1072,7 +1061,7 @@ rpcrdma_mr_refresh_worker(struct work_struct *work)
1072 struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, 1061 struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
1073 rx_buf); 1062 rx_buf);
1074 1063
1075 rpcrdma_create_mrs(r_xprt); 1064 rpcrdma_mrs_create(r_xprt);
1076} 1065}
1077 1066
1078struct rpcrdma_req * 1067struct rpcrdma_req *
@@ -1093,10 +1082,17 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
1093 return req; 1082 return req;
1094} 1083}
1095 1084
1096struct rpcrdma_rep * 1085/**
1086 * rpcrdma_create_rep - Allocate an rpcrdma_rep object
1087 * @r_xprt: controlling transport
1088 *
1089 * Returns 0 on success or a negative errno on failure.
1090 */
1091int
1097rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) 1092rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
1098{ 1093{
1099 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; 1094 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
1095 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1100 struct rpcrdma_rep *rep; 1096 struct rpcrdma_rep *rep;
1101 int rc; 1097 int rc;
1102 1098
@@ -1121,12 +1117,18 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
1121 rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; 1117 rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
1122 rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; 1118 rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
1123 rep->rr_recv_wr.num_sge = 1; 1119 rep->rr_recv_wr.num_sge = 1;
1124 return rep; 1120
1121 spin_lock(&buf->rb_lock);
1122 list_add(&rep->rr_list, &buf->rb_recv_bufs);
1123 spin_unlock(&buf->rb_lock);
1124 return 0;
1125 1125
1126out_free: 1126out_free:
1127 kfree(rep); 1127 kfree(rep);
1128out: 1128out:
1129 return ERR_PTR(rc); 1129 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1130 __func__, rc);
1131 return rc;
1130} 1132}
1131 1133
1132int 1134int
@@ -1137,10 +1139,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1137 1139
1138 buf->rb_max_requests = r_xprt->rx_data.max_requests; 1140 buf->rb_max_requests = r_xprt->rx_data.max_requests;
1139 buf->rb_bc_srv_max_requests = 0; 1141 buf->rb_bc_srv_max_requests = 0;
1140 spin_lock_init(&buf->rb_mwlock); 1142 spin_lock_init(&buf->rb_mrlock);
1141 spin_lock_init(&buf->rb_lock); 1143 spin_lock_init(&buf->rb_lock);
1142 spin_lock_init(&buf->rb_recovery_lock); 1144 spin_lock_init(&buf->rb_recovery_lock);
1143 INIT_LIST_HEAD(&buf->rb_mws); 1145 INIT_LIST_HEAD(&buf->rb_mrs);
1144 INIT_LIST_HEAD(&buf->rb_all); 1146 INIT_LIST_HEAD(&buf->rb_all);
1145 INIT_LIST_HEAD(&buf->rb_stale_mrs); 1147 INIT_LIST_HEAD(&buf->rb_stale_mrs);
1146 INIT_DELAYED_WORK(&buf->rb_refresh_worker, 1148 INIT_DELAYED_WORK(&buf->rb_refresh_worker,
@@ -1148,7 +1150,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1148 INIT_DELAYED_WORK(&buf->rb_recovery_worker, 1150 INIT_DELAYED_WORK(&buf->rb_recovery_worker,
1149 rpcrdma_mr_recovery_worker); 1151 rpcrdma_mr_recovery_worker);
1150 1152
1151 rpcrdma_create_mrs(r_xprt); 1153 rpcrdma_mrs_create(r_xprt);
1152 1154
1153 INIT_LIST_HEAD(&buf->rb_send_bufs); 1155 INIT_LIST_HEAD(&buf->rb_send_bufs);
1154 INIT_LIST_HEAD(&buf->rb_allreqs); 1156 INIT_LIST_HEAD(&buf->rb_allreqs);
@@ -1167,17 +1169,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1167 } 1169 }
1168 1170
1169 INIT_LIST_HEAD(&buf->rb_recv_bufs); 1171 INIT_LIST_HEAD(&buf->rb_recv_bufs);
1170 for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) { 1172 for (i = 0; i <= buf->rb_max_requests; i++) {
1171 struct rpcrdma_rep *rep; 1173 rc = rpcrdma_create_rep(r_xprt);
1172 1174 if (rc)
1173 rep = rpcrdma_create_rep(r_xprt);
1174 if (IS_ERR(rep)) {
1175 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1176 __func__, i);
1177 rc = PTR_ERR(rep);
1178 goto out; 1175 goto out;
1179 }
1180 list_add(&rep->rr_list, &buf->rb_recv_bufs);
1181 } 1176 }
1182 1177
1183 rc = rpcrdma_sendctxs_create(r_xprt); 1178 rc = rpcrdma_sendctxs_create(r_xprt);
@@ -1229,26 +1224,26 @@ rpcrdma_destroy_req(struct rpcrdma_req *req)
1229} 1224}
1230 1225
1231static void 1226static void
1232rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf) 1227rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
1233{ 1228{
1234 struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, 1229 struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
1235 rx_buf); 1230 rx_buf);
1236 struct rpcrdma_ia *ia = rdmab_to_ia(buf); 1231 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1237 struct rpcrdma_mw *mw; 1232 struct rpcrdma_mr *mr;
1238 unsigned int count; 1233 unsigned int count;
1239 1234
1240 count = 0; 1235 count = 0;
1241 spin_lock(&buf->rb_mwlock); 1236 spin_lock(&buf->rb_mrlock);
1242 while (!list_empty(&buf->rb_all)) { 1237 while (!list_empty(&buf->rb_all)) {
1243 mw = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); 1238 mr = list_entry(buf->rb_all.next, struct rpcrdma_mr, mr_all);
1244 list_del(&mw->mw_all); 1239 list_del(&mr->mr_all);
1245 1240
1246 spin_unlock(&buf->rb_mwlock); 1241 spin_unlock(&buf->rb_mrlock);
1247 ia->ri_ops->ro_release_mr(mw); 1242 ia->ri_ops->ro_release_mr(mr);
1248 count++; 1243 count++;
1249 spin_lock(&buf->rb_mwlock); 1244 spin_lock(&buf->rb_mrlock);
1250 } 1245 }
1251 spin_unlock(&buf->rb_mwlock); 1246 spin_unlock(&buf->rb_mrlock);
1252 r_xprt->rx_stats.mrs_allocated = 0; 1247 r_xprt->rx_stats.mrs_allocated = 0;
1253 1248
1254 dprintk("RPC: %s: released %u MRs\n", __func__, count); 1249 dprintk("RPC: %s: released %u MRs\n", __func__, count);
@@ -1285,27 +1280,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1285 spin_unlock(&buf->rb_reqslock); 1280 spin_unlock(&buf->rb_reqslock);
1286 buf->rb_recv_count = 0; 1281 buf->rb_recv_count = 0;
1287 1282
1288 rpcrdma_destroy_mrs(buf); 1283 rpcrdma_mrs_destroy(buf);
1289} 1284}
1290 1285
1291struct rpcrdma_mw * 1286/**
1292rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt) 1287 * rpcrdma_mr_get - Allocate an rpcrdma_mr object
1288 * @r_xprt: controlling transport
1289 *
1290 * Returns an initialized rpcrdma_mr or NULL if no free
1291 * rpcrdma_mr objects are available.
1292 */
1293struct rpcrdma_mr *
1294rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
1293{ 1295{
1294 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1296 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1295 struct rpcrdma_mw *mw = NULL; 1297 struct rpcrdma_mr *mr = NULL;
1296 1298
1297 spin_lock(&buf->rb_mwlock); 1299 spin_lock(&buf->rb_mrlock);
1298 if (!list_empty(&buf->rb_mws)) 1300 if (!list_empty(&buf->rb_mrs))
1299 mw = rpcrdma_pop_mw(&buf->rb_mws); 1301 mr = rpcrdma_mr_pop(&buf->rb_mrs);
1300 spin_unlock(&buf->rb_mwlock); 1302 spin_unlock(&buf->rb_mrlock);
1301 1303
1302 if (!mw) 1304 if (!mr)
1303 goto out_nomws; 1305 goto out_nomrs;
1304 mw->mw_flags = 0; 1306 return mr;
1305 return mw;
1306 1307
1307out_nomws: 1308out_nomrs:
1308 dprintk("RPC: %s: no MWs available\n", __func__); 1309 trace_xprtrdma_nomrs(r_xprt);
1309 if (r_xprt->rx_ep.rep_connected != -ENODEV) 1310 if (r_xprt->rx_ep.rep_connected != -ENODEV)
1310 schedule_delayed_work(&buf->rb_refresh_worker, 0); 1311 schedule_delayed_work(&buf->rb_refresh_worker, 0);
1311 1312
@@ -1315,14 +1316,39 @@ out_nomws:
1315 return NULL; 1316 return NULL;
1316} 1317}
1317 1318
1319static void
1320__rpcrdma_mr_put(struct rpcrdma_buffer *buf, struct rpcrdma_mr *mr)
1321{
1322 spin_lock(&buf->rb_mrlock);
1323 rpcrdma_mr_push(mr, &buf->rb_mrs);
1324 spin_unlock(&buf->rb_mrlock);
1325}
1326
1327/**
1328 * rpcrdma_mr_put - Release an rpcrdma_mr object
1329 * @mr: object to release
1330 *
1331 */
1318void 1332void
1319rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) 1333rpcrdma_mr_put(struct rpcrdma_mr *mr)
1320{ 1334{
1321 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1335 __rpcrdma_mr_put(&mr->mr_xprt->rx_buf, mr);
1336}
1337
1338/**
1339 * rpcrdma_mr_unmap_and_put - DMA unmap an MR and release it
1340 * @mr: object to release
1341 *
1342 */
1343void
1344rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
1345{
1346 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
1322 1347
1323 spin_lock(&buf->rb_mwlock); 1348 trace_xprtrdma_dma_unmap(mr);
1324 rpcrdma_push_mw(mw, &buf->rb_mws); 1349 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
1325 spin_unlock(&buf->rb_mwlock); 1350 mr->mr_sg, mr->mr_nents, mr->mr_dir);
1351 __rpcrdma_mr_put(&r_xprt->rx_buf, mr);
1326} 1352}
1327 1353
1328static struct rpcrdma_rep * 1354static struct rpcrdma_rep *
@@ -1359,11 +1385,11 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1359 req = rpcrdma_buffer_get_req_locked(buffers); 1385 req = rpcrdma_buffer_get_req_locked(buffers);
1360 req->rl_reply = rpcrdma_buffer_get_rep(buffers); 1386 req->rl_reply = rpcrdma_buffer_get_rep(buffers);
1361 spin_unlock(&buffers->rb_lock); 1387 spin_unlock(&buffers->rb_lock);
1388
1362 return req; 1389 return req;
1363 1390
1364out_reqbuf: 1391out_reqbuf:
1365 spin_unlock(&buffers->rb_lock); 1392 spin_unlock(&buffers->rb_lock);
1366 pr_warn("RPC: %s: out of request buffers\n", __func__);
1367 return NULL; 1393 return NULL;
1368} 1394}
1369 1395
@@ -1519,9 +1545,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
1519 req->rl_reply = NULL; 1545 req->rl_reply = NULL;
1520 } 1546 }
1521 1547
1522 dprintk("RPC: %s: posting %d s/g entries\n",
1523 __func__, send_wr->num_sge);
1524
1525 if (!ep->rep_send_count || 1548 if (!ep->rep_send_count ||
1526 test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { 1549 test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
1527 send_wr->send_flags |= IB_SEND_SIGNALED; 1550 send_wr->send_flags |= IB_SEND_SIGNALED;
@@ -1530,14 +1553,12 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
1530 send_wr->send_flags &= ~IB_SEND_SIGNALED; 1553 send_wr->send_flags &= ~IB_SEND_SIGNALED;
1531 --ep->rep_send_count; 1554 --ep->rep_send_count;
1532 } 1555 }
1556
1533 rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); 1557 rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
1558 trace_xprtrdma_post_send(req, rc);
1534 if (rc) 1559 if (rc)
1535 goto out_postsend_err; 1560 return -ENOTCONN;
1536 return 0; 1561 return 0;
1537
1538out_postsend_err:
1539 pr_err("rpcrdma: RDMA Send ib_post_send returned %i\n", rc);
1540 return -ENOTCONN;
1541} 1562}
1542 1563
1543int 1564int
@@ -1550,23 +1571,20 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1550 if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf)) 1571 if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf))
1551 goto out_map; 1572 goto out_map;
1552 rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail); 1573 rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail);
1574 trace_xprtrdma_post_recv(rep, rc);
1553 if (rc) 1575 if (rc)
1554 goto out_postrecv; 1576 return -ENOTCONN;
1555 return 0; 1577 return 0;
1556 1578
1557out_map: 1579out_map:
1558 pr_err("rpcrdma: failed to DMA map the Receive buffer\n"); 1580 pr_err("rpcrdma: failed to DMA map the Receive buffer\n");
1559 return -EIO; 1581 return -EIO;
1560
1561out_postrecv:
1562 pr_err("rpcrdma: ib_post_recv returned %i\n", rc);
1563 return -ENOTCONN;
1564} 1582}
1565 1583
1566/** 1584/**
1567 * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests 1585 * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests
1568 * @r_xprt: transport associated with these backchannel resources 1586 * @r_xprt: transport associated with these backchannel resources
1569 * @min_reqs: minimum number of incoming requests expected 1587 * @count: minimum number of incoming requests expected
1570 * 1588 *
1571 * Returns zero if all requested buffers were posted, or a negative errno. 1589 * Returns zero if all requested buffers were posted, or a negative errno.
1572 */ 1590 */
@@ -1594,7 +1612,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
1594 1612
1595out_reqbuf: 1613out_reqbuf:
1596 spin_unlock(&buffers->rb_lock); 1614 spin_unlock(&buffers->rb_lock);
1597 pr_warn("%s: no extra receive buffers\n", __func__); 1615 trace_xprtrdma_noreps(r_xprt);
1598 return -ENOMEM; 1616 return -ENOMEM;
1599 1617
1600out_rc: 1618out_rc:
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 1342f743f1c4..69883a960a3f 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -73,11 +73,10 @@ struct rpcrdma_ia {
73 struct completion ri_remove_done; 73 struct completion ri_remove_done;
74 int ri_async_rc; 74 int ri_async_rc;
75 unsigned int ri_max_segs; 75 unsigned int ri_max_segs;
76 unsigned int ri_max_frmr_depth; 76 unsigned int ri_max_frwr_depth;
77 unsigned int ri_max_inline_write; 77 unsigned int ri_max_inline_write;
78 unsigned int ri_max_inline_read; 78 unsigned int ri_max_inline_read;
79 unsigned int ri_max_send_sges; 79 unsigned int ri_max_send_sges;
80 bool ri_reminv_expected;
81 bool ri_implicit_roundup; 80 bool ri_implicit_roundup;
82 enum ib_mr_type ri_mrtype; 81 enum ib_mr_type ri_mrtype;
83 unsigned long ri_flags; 82 unsigned long ri_flags;
@@ -101,7 +100,6 @@ struct rpcrdma_ep {
101 wait_queue_head_t rep_connect_wait; 100 wait_queue_head_t rep_connect_wait;
102 struct rpcrdma_connect_private rep_cm_private; 101 struct rpcrdma_connect_private rep_cm_private;
103 struct rdma_conn_param rep_remote_cma; 102 struct rdma_conn_param rep_remote_cma;
104 struct sockaddr_storage rep_remote_addr;
105 struct delayed_work rep_connect_worker; 103 struct delayed_work rep_connect_worker;
106}; 104};
107 105
@@ -232,29 +230,29 @@ enum {
232}; 230};
233 231
234/* 232/*
235 * struct rpcrdma_mw - external memory region metadata 233 * struct rpcrdma_mr - external memory region metadata
236 * 234 *
237 * An external memory region is any buffer or page that is registered 235 * An external memory region is any buffer or page that is registered
238 * on the fly (ie, not pre-registered). 236 * on the fly (ie, not pre-registered).
239 * 237 *
240 * Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During 238 * Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During
241 * call_allocate, rpcrdma_buffer_get() assigns one to each segment in 239 * call_allocate, rpcrdma_buffer_get() assigns one to each segment in
242 * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep 240 * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
243 * track of registration metadata while each RPC is pending. 241 * track of registration metadata while each RPC is pending.
244 * rpcrdma_deregister_external() uses this metadata to unmap and 242 * rpcrdma_deregister_external() uses this metadata to unmap and
245 * release these resources when an RPC is complete. 243 * release these resources when an RPC is complete.
246 */ 244 */
247enum rpcrdma_frmr_state { 245enum rpcrdma_frwr_state {
248 FRMR_IS_INVALID, /* ready to be used */ 246 FRWR_IS_INVALID, /* ready to be used */
249 FRMR_IS_VALID, /* in use */ 247 FRWR_IS_VALID, /* in use */
250 FRMR_FLUSHED_FR, /* flushed FASTREG WR */ 248 FRWR_FLUSHED_FR, /* flushed FASTREG WR */
251 FRMR_FLUSHED_LI, /* flushed LOCALINV WR */ 249 FRWR_FLUSHED_LI, /* flushed LOCALINV WR */
252}; 250};
253 251
254struct rpcrdma_frmr { 252struct rpcrdma_frwr {
255 struct ib_mr *fr_mr; 253 struct ib_mr *fr_mr;
256 struct ib_cqe fr_cqe; 254 struct ib_cqe fr_cqe;
257 enum rpcrdma_frmr_state fr_state; 255 enum rpcrdma_frwr_state fr_state;
258 struct completion fr_linv_done; 256 struct completion fr_linv_done;
259 union { 257 union {
260 struct ib_reg_wr fr_regwr; 258 struct ib_reg_wr fr_regwr;
@@ -267,26 +265,20 @@ struct rpcrdma_fmr {
267 u64 *fm_physaddrs; 265 u64 *fm_physaddrs;
268}; 266};
269 267
270struct rpcrdma_mw { 268struct rpcrdma_mr {
271 struct list_head mw_list; 269 struct list_head mr_list;
272 struct scatterlist *mw_sg; 270 struct scatterlist *mr_sg;
273 int mw_nents; 271 int mr_nents;
274 enum dma_data_direction mw_dir; 272 enum dma_data_direction mr_dir;
275 unsigned long mw_flags;
276 union { 273 union {
277 struct rpcrdma_fmr fmr; 274 struct rpcrdma_fmr fmr;
278 struct rpcrdma_frmr frmr; 275 struct rpcrdma_frwr frwr;
279 }; 276 };
280 struct rpcrdma_xprt *mw_xprt; 277 struct rpcrdma_xprt *mr_xprt;
281 u32 mw_handle; 278 u32 mr_handle;
282 u32 mw_length; 279 u32 mr_length;
283 u64 mw_offset; 280 u64 mr_offset;
284 struct list_head mw_all; 281 struct list_head mr_all;
285};
286
287/* mw_flags */
288enum {
289 RPCRDMA_MW_F_RI = 1,
290}; 282};
291 283
292/* 284/*
@@ -362,8 +354,7 @@ struct rpcrdma_req {
362 354
363/* rl_flags */ 355/* rl_flags */
364enum { 356enum {
365 RPCRDMA_REQ_F_BACKCHANNEL = 0, 357 RPCRDMA_REQ_F_PENDING = 0,
366 RPCRDMA_REQ_F_PENDING,
367 RPCRDMA_REQ_F_TX_RESOURCES, 358 RPCRDMA_REQ_F_TX_RESOURCES,
368}; 359};
369 360
@@ -374,25 +365,25 @@ rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
374} 365}
375 366
376static inline struct rpcrdma_req * 367static inline struct rpcrdma_req *
377rpcr_to_rdmar(struct rpc_rqst *rqst) 368rpcr_to_rdmar(const struct rpc_rqst *rqst)
378{ 369{
379 return rqst->rq_xprtdata; 370 return rqst->rq_xprtdata;
380} 371}
381 372
382static inline void 373static inline void
383rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list) 374rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list)
384{ 375{
385 list_add_tail(&mw->mw_list, list); 376 list_add_tail(&mr->mr_list, list);
386} 377}
387 378
388static inline struct rpcrdma_mw * 379static inline struct rpcrdma_mr *
389rpcrdma_pop_mw(struct list_head *list) 380rpcrdma_mr_pop(struct list_head *list)
390{ 381{
391 struct rpcrdma_mw *mw; 382 struct rpcrdma_mr *mr;
392 383
393 mw = list_first_entry(list, struct rpcrdma_mw, mw_list); 384 mr = list_first_entry(list, struct rpcrdma_mr, mr_list);
394 list_del(&mw->mw_list); 385 list_del(&mr->mr_list);
395 return mw; 386 return mr;
396} 387}
397 388
398/* 389/*
@@ -402,8 +393,8 @@ rpcrdma_pop_mw(struct list_head *list)
402 * One of these is associated with a transport instance 393 * One of these is associated with a transport instance
403 */ 394 */
404struct rpcrdma_buffer { 395struct rpcrdma_buffer {
405 spinlock_t rb_mwlock; /* protect rb_mws list */ 396 spinlock_t rb_mrlock; /* protect rb_mrs list */
406 struct list_head rb_mws; 397 struct list_head rb_mrs;
407 struct list_head rb_all; 398 struct list_head rb_all;
408 399
409 unsigned long rb_sc_head; 400 unsigned long rb_sc_head;
@@ -438,13 +429,11 @@ struct rpcrdma_buffer {
438 * This data should be set with mount options 429 * This data should be set with mount options
439 */ 430 */
440struct rpcrdma_create_data_internal { 431struct rpcrdma_create_data_internal {
441 struct sockaddr_storage addr; /* RDMA server address */
442 unsigned int max_requests; /* max requests (slots) in flight */ 432 unsigned int max_requests; /* max requests (slots) in flight */
443 unsigned int rsize; /* mount rsize - max read hdr+data */ 433 unsigned int rsize; /* mount rsize - max read hdr+data */
444 unsigned int wsize; /* mount wsize - max write hdr+data */ 434 unsigned int wsize; /* mount wsize - max write hdr+data */
445 unsigned int inline_rsize; /* max non-rdma read data payload */ 435 unsigned int inline_rsize; /* max non-rdma read data payload */
446 unsigned int inline_wsize; /* max non-rdma write data payload */ 436 unsigned int inline_wsize; /* max non-rdma write data payload */
447 unsigned int padding; /* non-rdma write header padding */
448}; 437};
449 438
450/* 439/*
@@ -484,17 +473,19 @@ struct rpcrdma_memreg_ops {
484 struct rpcrdma_mr_seg * 473 struct rpcrdma_mr_seg *
485 (*ro_map)(struct rpcrdma_xprt *, 474 (*ro_map)(struct rpcrdma_xprt *,
486 struct rpcrdma_mr_seg *, int, bool, 475 struct rpcrdma_mr_seg *, int, bool,
487 struct rpcrdma_mw **); 476 struct rpcrdma_mr **);
477 void (*ro_reminv)(struct rpcrdma_rep *rep,
478 struct list_head *mrs);
488 void (*ro_unmap_sync)(struct rpcrdma_xprt *, 479 void (*ro_unmap_sync)(struct rpcrdma_xprt *,
489 struct list_head *); 480 struct list_head *);
490 void (*ro_recover_mr)(struct rpcrdma_mw *); 481 void (*ro_recover_mr)(struct rpcrdma_mr *mr);
491 int (*ro_open)(struct rpcrdma_ia *, 482 int (*ro_open)(struct rpcrdma_ia *,
492 struct rpcrdma_ep *, 483 struct rpcrdma_ep *,
493 struct rpcrdma_create_data_internal *); 484 struct rpcrdma_create_data_internal *);
494 size_t (*ro_maxpages)(struct rpcrdma_xprt *); 485 size_t (*ro_maxpages)(struct rpcrdma_xprt *);
495 int (*ro_init_mr)(struct rpcrdma_ia *, 486 int (*ro_init_mr)(struct rpcrdma_ia *,
496 struct rpcrdma_mw *); 487 struct rpcrdma_mr *);
497 void (*ro_release_mr)(struct rpcrdma_mw *); 488 void (*ro_release_mr)(struct rpcrdma_mr *mr);
498 const char *ro_displayname; 489 const char *ro_displayname;
499 const int ro_send_w_inv_ok; 490 const int ro_send_w_inv_ok;
500}; 491};
@@ -525,6 +516,18 @@ struct rpcrdma_xprt {
525#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) 516#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
526#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) 517#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
527 518
519static inline const char *
520rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt)
521{
522 return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR];
523}
524
525static inline const char *
526rpcrdma_portstr(const struct rpcrdma_xprt *r_xprt)
527{
528 return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_PORT];
529}
530
528/* Setting this to 0 ensures interoperability with early servers. 531/* Setting this to 0 ensures interoperability with early servers.
529 * Setting this to 1 enhances certain unaligned read/write performance. 532 * Setting this to 1 enhances certain unaligned read/write performance.
530 * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ 533 * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
@@ -538,7 +541,7 @@ extern unsigned int xprt_rdma_memreg_strategy;
538/* 541/*
539 * Interface Adapter calls - xprtrdma/verbs.c 542 * Interface Adapter calls - xprtrdma/verbs.c
540 */ 543 */
541int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr); 544int rpcrdma_ia_open(struct rpcrdma_xprt *xprt);
542void rpcrdma_ia_remove(struct rpcrdma_ia *ia); 545void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
543void rpcrdma_ia_close(struct rpcrdma_ia *); 546void rpcrdma_ia_close(struct rpcrdma_ia *);
544bool frwr_is_supported(struct rpcrdma_ia *); 547bool frwr_is_supported(struct rpcrdma_ia *);
@@ -564,22 +567,23 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *);
564 * Buffer calls - xprtrdma/verbs.c 567 * Buffer calls - xprtrdma/verbs.c
565 */ 568 */
566struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); 569struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
567struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
568void rpcrdma_destroy_req(struct rpcrdma_req *); 570void rpcrdma_destroy_req(struct rpcrdma_req *);
571int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt);
569int rpcrdma_buffer_create(struct rpcrdma_xprt *); 572int rpcrdma_buffer_create(struct rpcrdma_xprt *);
570void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); 573void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
571struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); 574struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
572void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); 575void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
573 576
574struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *); 577struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
575void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *); 578void rpcrdma_mr_put(struct rpcrdma_mr *mr);
579void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr);
580void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr);
581
576struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); 582struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
577void rpcrdma_buffer_put(struct rpcrdma_req *); 583void rpcrdma_buffer_put(struct rpcrdma_req *);
578void rpcrdma_recv_buffer_get(struct rpcrdma_req *); 584void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
579void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); 585void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
580 586
581void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *);
582
583struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, 587struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
584 gfp_t); 588 gfp_t);
585bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); 589bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *);
@@ -663,7 +667,7 @@ int xprt_rdma_bc_up(struct svc_serv *, struct net *);
663size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); 667size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
664int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); 668int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
665void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); 669void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
666int rpcrdma_bc_marshal_reply(struct rpc_rqst *); 670int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst);
667void xprt_rdma_bc_free_rqst(struct rpc_rqst *); 671void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
668void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); 672void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
669#endif /* CONFIG_SUNRPC_BACKCHANNEL */ 673#endif /* CONFIG_SUNRPC_BACKCHANNEL */
@@ -671,3 +675,5 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
671extern struct xprt_class xprt_rdma_bc; 675extern struct xprt_class xprt_rdma_bc;
672 676
673#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ 677#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
678
679#include <trace/events/rpcrdma.h>