diff options
author | Trond Myklebust <trond.myklebust@primarydata.com> | 2014-11-26 17:37:13 -0500 |
---|---|---|
committer | Trond Myklebust <trond.myklebust@primarydata.com> | 2014-11-26 17:37:13 -0500 |
commit | ea5264138d240825a7e3f11d5945d568b74efb91 (patch) | |
tree | 602ceb3d2ed3ff51ef2ea6f7cf60b5a38a79db8a /net/sunrpc | |
parent | 1702562db4bf6d43f60c2263ac7bec1f58d49c33 (diff) | |
parent | 7ff11de1bae02a41cac6503f858218ac1b9a3cbe (diff) |
Merge tag 'nfs-rdma-for-3.19' of git://git.linux-nfs.org/projects/anna/nfs-rdma into linux-next
Pull NFS client RDMA changes for 3.19 from Anna Schumaker:
"NFS: Client side changes for RDMA
These patches various bugfixes and cleanups for using NFS over RDMA, including
better error handling and performance improvements by using pad optimization.
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>"
* tag 'nfs-rdma-for-3.19' of git://git.linux-nfs.org/projects/anna/nfs-rdma:
xprtrdma: Display async errors
xprtrdma: Enable pad optimization
xprtrdma: Re-write rpcrdma_flush_cqs()
xprtrdma: Refactor tasklet scheduling
xprtrdma: unmap all FMRs during transport disconnect
xprtrdma: Cap req_cqinit
xprtrdma: Return an errno from rpcrdma_register_external()
Diffstat (limited to 'net/sunrpc')
-rw-r--r-- | net/sunrpc/xprtrdma/transport.c | 4 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 114 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/xprt_rdma.h | 6 |
3 files changed, 107 insertions, 17 deletions
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index ef58ebadb3ae..bbd6155d3e34 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -73,7 +73,7 @@ static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; | |||
73 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; | 73 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; |
74 | static unsigned int xprt_rdma_inline_write_padding; | 74 | static unsigned int xprt_rdma_inline_write_padding; |
75 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; | 75 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; |
76 | int xprt_rdma_pad_optimize = 0; | 76 | int xprt_rdma_pad_optimize = 1; |
77 | 77 | ||
78 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 78 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
79 | 79 | ||
@@ -599,7 +599,7 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
599 | 599 | ||
600 | if (req->rl_niovs == 0) | 600 | if (req->rl_niovs == 0) |
601 | rc = rpcrdma_marshal_req(rqst); | 601 | rc = rpcrdma_marshal_req(rqst); |
602 | else if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) | 602 | else if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_ALLPHYSICAL) |
603 | rc = rpcrdma_marshal_chunks(rqst, 0); | 603 | rc = rpcrdma_marshal_chunks(rqst, 0); |
604 | if (rc < 0) | 604 | if (rc < 0) |
605 | goto failed_marshal; | 605 | goto failed_marshal; |
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index b92b04083e40..c98e40643910 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -62,6 +62,7 @@ | |||
62 | #endif | 62 | #endif |
63 | 63 | ||
64 | static void rpcrdma_reset_frmrs(struct rpcrdma_ia *); | 64 | static void rpcrdma_reset_frmrs(struct rpcrdma_ia *); |
65 | static void rpcrdma_reset_fmrs(struct rpcrdma_ia *); | ||
65 | 66 | ||
66 | /* | 67 | /* |
67 | * internal functions | 68 | * internal functions |
@@ -105,13 +106,51 @@ rpcrdma_run_tasklet(unsigned long data) | |||
105 | 106 | ||
106 | static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL); | 107 | static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL); |
107 | 108 | ||
109 | static const char * const async_event[] = { | ||
110 | "CQ error", | ||
111 | "QP fatal error", | ||
112 | "QP request error", | ||
113 | "QP access error", | ||
114 | "communication established", | ||
115 | "send queue drained", | ||
116 | "path migration successful", | ||
117 | "path mig error", | ||
118 | "device fatal error", | ||
119 | "port active", | ||
120 | "port error", | ||
121 | "LID change", | ||
122 | "P_key change", | ||
123 | "SM change", | ||
124 | "SRQ error", | ||
125 | "SRQ limit reached", | ||
126 | "last WQE reached", | ||
127 | "client reregister", | ||
128 | "GID change", | ||
129 | }; | ||
130 | |||
131 | #define ASYNC_MSG(status) \ | ||
132 | ((status) < ARRAY_SIZE(async_event) ? \ | ||
133 | async_event[(status)] : "unknown async error") | ||
134 | |||
135 | static void | ||
136 | rpcrdma_schedule_tasklet(struct list_head *sched_list) | ||
137 | { | ||
138 | unsigned long flags; | ||
139 | |||
140 | spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); | ||
141 | list_splice_tail(sched_list, &rpcrdma_tasklets_g); | ||
142 | spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); | ||
143 | tasklet_schedule(&rpcrdma_tasklet_g); | ||
144 | } | ||
145 | |||
108 | static void | 146 | static void |
109 | rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) | 147 | rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) |
110 | { | 148 | { |
111 | struct rpcrdma_ep *ep = context; | 149 | struct rpcrdma_ep *ep = context; |
112 | 150 | ||
113 | dprintk("RPC: %s: QP error %X on device %s ep %p\n", | 151 | pr_err("RPC: %s: %s on device %s ep %p\n", |
114 | __func__, event->event, event->device->name, context); | 152 | __func__, ASYNC_MSG(event->event), |
153 | event->device->name, context); | ||
115 | if (ep->rep_connected == 1) { | 154 | if (ep->rep_connected == 1) { |
116 | ep->rep_connected = -EIO; | 155 | ep->rep_connected = -EIO; |
117 | ep->rep_func(ep); | 156 | ep->rep_func(ep); |
@@ -124,8 +163,9 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) | |||
124 | { | 163 | { |
125 | struct rpcrdma_ep *ep = context; | 164 | struct rpcrdma_ep *ep = context; |
126 | 165 | ||
127 | dprintk("RPC: %s: CQ error %X on device %s ep %p\n", | 166 | pr_err("RPC: %s: %s on device %s ep %p\n", |
128 | __func__, event->event, event->device->name, context); | 167 | __func__, ASYNC_MSG(event->event), |
168 | event->device->name, context); | ||
129 | if (ep->rep_connected == 1) { | 169 | if (ep->rep_connected == 1) { |
130 | ep->rep_connected = -EIO; | 170 | ep->rep_connected = -EIO; |
131 | ep->rep_func(ep); | 171 | ep->rep_func(ep); |
@@ -243,7 +283,6 @@ rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) | |||
243 | struct list_head sched_list; | 283 | struct list_head sched_list; |
244 | struct ib_wc *wcs; | 284 | struct ib_wc *wcs; |
245 | int budget, count, rc; | 285 | int budget, count, rc; |
246 | unsigned long flags; | ||
247 | 286 | ||
248 | INIT_LIST_HEAD(&sched_list); | 287 | INIT_LIST_HEAD(&sched_list); |
249 | budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; | 288 | budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; |
@@ -261,10 +300,7 @@ rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) | |||
261 | rc = 0; | 300 | rc = 0; |
262 | 301 | ||
263 | out_schedule: | 302 | out_schedule: |
264 | spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); | 303 | rpcrdma_schedule_tasklet(&sched_list); |
265 | list_splice_tail(&sched_list, &rpcrdma_tasklets_g); | ||
266 | spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); | ||
267 | tasklet_schedule(&rpcrdma_tasklet_g); | ||
268 | return rc; | 304 | return rc; |
269 | } | 305 | } |
270 | 306 | ||
@@ -309,8 +345,15 @@ rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context) | |||
309 | static void | 345 | static void |
310 | rpcrdma_flush_cqs(struct rpcrdma_ep *ep) | 346 | rpcrdma_flush_cqs(struct rpcrdma_ep *ep) |
311 | { | 347 | { |
312 | rpcrdma_recvcq_upcall(ep->rep_attr.recv_cq, ep); | 348 | struct ib_wc wc; |
313 | rpcrdma_sendcq_upcall(ep->rep_attr.send_cq, ep); | 349 | LIST_HEAD(sched_list); |
350 | |||
351 | while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0) | ||
352 | rpcrdma_recvcq_process_wc(&wc, &sched_list); | ||
353 | if (!list_empty(&sched_list)) | ||
354 | rpcrdma_schedule_tasklet(&sched_list); | ||
355 | while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0) | ||
356 | rpcrdma_sendcq_process_wc(&wc); | ||
314 | } | 357 | } |
315 | 358 | ||
316 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 359 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
@@ -733,7 +776,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
733 | 776 | ||
734 | /* set trigger for requesting send completion */ | 777 | /* set trigger for requesting send completion */ |
735 | ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; | 778 | ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; |
736 | if (ep->rep_cqinit <= 2) | 779 | if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS) |
780 | ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS; | ||
781 | else if (ep->rep_cqinit <= 2) | ||
737 | ep->rep_cqinit = 0; | 782 | ep->rep_cqinit = 0; |
738 | INIT_CQCOUNT(ep); | 783 | INIT_CQCOUNT(ep); |
739 | ep->rep_ia = ia; | 784 | ep->rep_ia = ia; |
@@ -866,8 +911,19 @@ retry: | |||
866 | rpcrdma_ep_disconnect(ep, ia); | 911 | rpcrdma_ep_disconnect(ep, ia); |
867 | rpcrdma_flush_cqs(ep); | 912 | rpcrdma_flush_cqs(ep); |
868 | 913 | ||
869 | if (ia->ri_memreg_strategy == RPCRDMA_FRMR) | 914 | switch (ia->ri_memreg_strategy) { |
915 | case RPCRDMA_FRMR: | ||
870 | rpcrdma_reset_frmrs(ia); | 916 | rpcrdma_reset_frmrs(ia); |
917 | break; | ||
918 | case RPCRDMA_MTHCAFMR: | ||
919 | rpcrdma_reset_fmrs(ia); | ||
920 | break; | ||
921 | case RPCRDMA_ALLPHYSICAL: | ||
922 | break; | ||
923 | default: | ||
924 | rc = -EIO; | ||
925 | goto out; | ||
926 | } | ||
871 | 927 | ||
872 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); | 928 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); |
873 | id = rpcrdma_create_id(xprt, ia, | 929 | id = rpcrdma_create_id(xprt, ia, |
@@ -1287,6 +1343,34 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1287 | kfree(buf->rb_pool); | 1343 | kfree(buf->rb_pool); |
1288 | } | 1344 | } |
1289 | 1345 | ||
1346 | /* After a disconnect, unmap all FMRs. | ||
1347 | * | ||
1348 | * This is invoked only in the transport connect worker in order | ||
1349 | * to serialize with rpcrdma_register_fmr_external(). | ||
1350 | */ | ||
1351 | static void | ||
1352 | rpcrdma_reset_fmrs(struct rpcrdma_ia *ia) | ||
1353 | { | ||
1354 | struct rpcrdma_xprt *r_xprt = | ||
1355 | container_of(ia, struct rpcrdma_xprt, rx_ia); | ||
1356 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
1357 | struct list_head *pos; | ||
1358 | struct rpcrdma_mw *r; | ||
1359 | LIST_HEAD(l); | ||
1360 | int rc; | ||
1361 | |||
1362 | list_for_each(pos, &buf->rb_all) { | ||
1363 | r = list_entry(pos, struct rpcrdma_mw, mw_all); | ||
1364 | |||
1365 | INIT_LIST_HEAD(&l); | ||
1366 | list_add(&r->r.fmr->list, &l); | ||
1367 | rc = ib_unmap_fmr(&l); | ||
1368 | if (rc) | ||
1369 | dprintk("RPC: %s: ib_unmap_fmr failed %i\n", | ||
1370 | __func__, rc); | ||
1371 | } | ||
1372 | } | ||
1373 | |||
1290 | /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in | 1374 | /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in |
1291 | * an unusable state. Find FRMRs in this state and dereg / reg | 1375 | * an unusable state. Find FRMRs in this state and dereg / reg |
1292 | * each. FRMRs that are VALID and attached to an rpcrdma_req are | 1376 | * each. FRMRs that are VALID and attached to an rpcrdma_req are |
@@ -1918,10 +2002,10 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | |||
1918 | break; | 2002 | break; |
1919 | 2003 | ||
1920 | default: | 2004 | default: |
1921 | return -1; | 2005 | return -EIO; |
1922 | } | 2006 | } |
1923 | if (rc) | 2007 | if (rc) |
1924 | return -1; | 2008 | return rc; |
1925 | 2009 | ||
1926 | return nsegs; | 2010 | return nsegs; |
1927 | } | 2011 | } |
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index ac7fc9a31342..b799041b75bf 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -97,6 +97,12 @@ struct rpcrdma_ep { | |||
97 | struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE]; | 97 | struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE]; |
98 | }; | 98 | }; |
99 | 99 | ||
100 | /* | ||
101 | * Force a signaled SEND Work Request every so often, | ||
102 | * in case the provider needs to do some housekeeping. | ||
103 | */ | ||
104 | #define RPCRDMA_MAX_UNSIGNALED_SENDS (32) | ||
105 | |||
100 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) | 106 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) |
101 | #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) | 107 | #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) |
102 | 108 | ||