aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2018-05-07 15:27:48 -0400
committerJ. Bruce Fields <bfields@redhat.com>2018-05-11 15:48:57 -0400
commiteb5d7a622e0bbe3fd316b2325d3840a0e030a3c4 (patch)
tree1f90beaea9186a0f9d393b943be13ec12f355567
parent3316f0631139c87631f2652c118da1a0354bd40d (diff)
svcrdma: Allocate recv_ctxt's on CPU handling Receives
There is a significant latency penalty when processing an ingress Receive if the Receive buffer resides in memory that is not on the same NUMA node as the the CPU handling completions for a CQ. The system administrator and the device driver determine which CPU handles completions. This CPU does not change during life of the CQ. Further the Upper Layer does not have any visibility of which CPU it is. Allocating Receive buffers in the Receive completion handler guarantees that Receive buffers are allocated on the preferred NUMA node for that CQ. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: J. Bruce Fields <bfields@redhat.com>
-rw-r--r--include/linux/sunrpc/svc_rdma.h1
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c52
2 files changed, 37 insertions, 16 deletions
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 01baabfb863b..27cf59c7085f 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -151,6 +151,7 @@ struct svc_rdma_recv_ctxt {
151 struct ib_sge rc_recv_sge; 151 struct ib_sge rc_recv_sge;
152 void *rc_recv_buf; 152 void *rc_recv_buf;
153 struct xdr_buf rc_arg; 153 struct xdr_buf rc_arg;
154 bool rc_temp;
154 u32 rc_byte_len; 155 u32 rc_byte_len;
155 unsigned int rc_page_count; 156 unsigned int rc_page_count;
156 unsigned int rc_hdr_count; 157 unsigned int rc_hdr_count;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index d4ccd1c0142c..0445e75d76a2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -144,6 +144,7 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
144 ctxt->rc_recv_sge.length = rdma->sc_max_req_size; 144 ctxt->rc_recv_sge.length = rdma->sc_max_req_size;
145 ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey; 145 ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey;
146 ctxt->rc_recv_buf = buffer; 146 ctxt->rc_recv_buf = buffer;
147 ctxt->rc_temp = false;
147 return ctxt; 148 return ctxt;
148 149
149fail2: 150fail2:
@@ -154,6 +155,15 @@ fail0:
154 return NULL; 155 return NULL;
155} 156}
156 157
158static void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma,
159 struct svc_rdma_recv_ctxt *ctxt)
160{
161 ib_dma_unmap_single(rdma->sc_pd->device, ctxt->rc_recv_sge.addr,
162 ctxt->rc_recv_sge.length, DMA_FROM_DEVICE);
163 kfree(ctxt->rc_recv_buf);
164 kfree(ctxt);
165}
166
157/** 167/**
158 * svc_rdma_recv_ctxts_destroy - Release all recv_ctxt's for an xprt 168 * svc_rdma_recv_ctxts_destroy - Release all recv_ctxt's for an xprt
159 * @rdma: svcxprt_rdma being torn down 169 * @rdma: svcxprt_rdma being torn down
@@ -165,12 +175,7 @@ void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma)
165 175
166 while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts))) { 176 while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts))) {
167 list_del(&ctxt->rc_list); 177 list_del(&ctxt->rc_list);
168 ib_dma_unmap_single(rdma->sc_pd->device, 178 svc_rdma_recv_ctxt_destroy(rdma, ctxt);
169 ctxt->rc_recv_sge.addr,
170 ctxt->rc_recv_sge.length,
171 DMA_FROM_DEVICE);
172 kfree(ctxt->rc_recv_buf);
173 kfree(ctxt);
174 } 179 }
175} 180}
176 181
@@ -212,21 +217,21 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
212 217
213 for (i = 0; i < ctxt->rc_page_count; i++) 218 for (i = 0; i < ctxt->rc_page_count; i++)
214 put_page(ctxt->rc_pages[i]); 219 put_page(ctxt->rc_pages[i]);
215 spin_lock(&rdma->sc_recv_lock); 220
216 list_add(&ctxt->rc_list, &rdma->sc_recv_ctxts); 221 if (!ctxt->rc_temp) {
217 spin_unlock(&rdma->sc_recv_lock); 222 spin_lock(&rdma->sc_recv_lock);
223 list_add(&ctxt->rc_list, &rdma->sc_recv_ctxts);
224 spin_unlock(&rdma->sc_recv_lock);
225 } else
226 svc_rdma_recv_ctxt_destroy(rdma, ctxt);
218} 227}
219 228
220static int svc_rdma_post_recv(struct svcxprt_rdma *rdma) 229static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
230 struct svc_rdma_recv_ctxt *ctxt)
221{ 231{
222 struct svc_rdma_recv_ctxt *ctxt;
223 struct ib_recv_wr *bad_recv_wr; 232 struct ib_recv_wr *bad_recv_wr;
224 int ret; 233 int ret;
225 234
226 ctxt = svc_rdma_recv_ctxt_get(rdma);
227 if (!ctxt)
228 return -ENOMEM;
229
230 svc_xprt_get(&rdma->sc_xprt); 235 svc_xprt_get(&rdma->sc_xprt);
231 ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, &bad_recv_wr); 236 ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, &bad_recv_wr);
232 trace_svcrdma_post_recv(&ctxt->rc_recv_wr, ret); 237 trace_svcrdma_post_recv(&ctxt->rc_recv_wr, ret);
@@ -240,6 +245,16 @@ err_post:
240 return ret; 245 return ret;
241} 246}
242 247
248static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
249{
250 struct svc_rdma_recv_ctxt *ctxt;
251
252 ctxt = svc_rdma_recv_ctxt_get(rdma);
253 if (!ctxt)
254 return -ENOMEM;
255 return __svc_rdma_post_recv(rdma, ctxt);
256}
257
243/** 258/**
244 * svc_rdma_post_recvs - Post initial set of Recv WRs 259 * svc_rdma_post_recvs - Post initial set of Recv WRs
245 * @rdma: fresh svcxprt_rdma 260 * @rdma: fresh svcxprt_rdma
@@ -248,11 +263,16 @@ err_post:
248 */ 263 */
249bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) 264bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
250{ 265{
266 struct svc_rdma_recv_ctxt *ctxt;
251 unsigned int i; 267 unsigned int i;
252 int ret; 268 int ret;
253 269
254 for (i = 0; i < rdma->sc_max_requests; i++) { 270 for (i = 0; i < rdma->sc_max_requests; i++) {
255 ret = svc_rdma_post_recv(rdma); 271 ctxt = svc_rdma_recv_ctxt_get(rdma);
272 if (!ctxt)
273 return -ENOMEM;
274 ctxt->rc_temp = true;
275 ret = __svc_rdma_post_recv(rdma, ctxt);
256 if (ret) { 276 if (ret) {
257 pr_err("svcrdma: failure posting recv buffers: %d\n", 277 pr_err("svcrdma: failure posting recv buffers: %d\n",
258 ret); 278 ret);