aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2018-02-28 15:30:27 -0500
committerAnna Schumaker <Anna.Schumaker@Netapp.com>2018-04-10 16:06:22 -0400
commit6720a89933739cb8dec748cd253f7c8df2c0ae4d (patch)
tree87c4122ed89edbbdd64de3d7018cff264a4d64ab
parent661e50bc853209e41a5c14a290ca4decc43cbfd1 (diff)
xprtrdma: Fix latency regression on NUMA NFS/RDMA clients
With v4.15, on one of my NFS/RDMA clients I measured a nearly doubling in the latency of small read and write system calls. There was no change in server round trip time. The extra latency appears in the whole RPC execution path. "git bisect" settled on commit ccede7598588 ("xprtrdma: Spread reply processing over more CPUs") . After some experimentation, I found that leaving the WQ bound and allowing the scheduler to pick the dispatch CPU seems to eliminate the long latencies, and it does not introduce any new regressions. The fix is implemented by reverting only the part of commit ccede7598588 ("xprtrdma: Spread reply processing over more CPUs") that dispatches RPC replies specifically on the CPU where the matching RPC call was made. Interestingly, saving the CPU number and later queuing reply processing there was effective _only_ for a NFS READ and WRITE request. On my NUMA client, in-kernel RPC reply processing for asynchronous RPCs was dispatched on the same CPU where the RPC call was made, as expected. However synchronous RPCs seem to get their reply dispatched on some other CPU than where the call was placed, every time. Fixes: ccede7598588 ("xprtrdma: Spread reply processing over ... ") Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Cc: stable@vger.kernel.org # v4.15+ Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c2
-rw-r--r--net/sunrpc/xprtrdma/transport.c2
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h1
3 files changed, 1 insertions, 4 deletions
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index f0855a959a27..4bc0f4d94a01 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1366,7 +1366,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1366 1366
1367 trace_xprtrdma_reply(rqst->rq_task, rep, req, credits); 1367 trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
1368 1368
1369 queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work); 1369 queue_work(rpcrdma_receive_wq, &rep->rr_work);
1370 return; 1370 return;
1371 1371
1372out_badstatus: 1372out_badstatus:
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 4b1ecfe979cf..f86021e3b853 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -52,7 +52,6 @@
52#include <linux/slab.h> 52#include <linux/slab.h>
53#include <linux/seq_file.h> 53#include <linux/seq_file.h>
54#include <linux/sunrpc/addr.h> 54#include <linux/sunrpc/addr.h>
55#include <linux/smp.h>
56 55
57#include "xprt_rdma.h" 56#include "xprt_rdma.h"
58 57
@@ -651,7 +650,6 @@ xprt_rdma_allocate(struct rpc_task *task)
651 if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) 650 if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
652 goto out_fail; 651 goto out_fail;
653 652
654 req->rl_cpu = smp_processor_id();
655 req->rl_connect_cookie = 0; /* our reserved value */ 653 req->rl_connect_cookie = 0; /* our reserved value */
656 rpcrdma_set_xprtdata(rqst, req); 654 rpcrdma_set_xprtdata(rqst, req);
657 rqst->rq_buffer = req->rl_sendbuf->rg_base; 655 rqst->rq_buffer = req->rl_sendbuf->rg_base;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 69883a960a3f..430a6de8300e 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -334,7 +334,6 @@ enum {
334struct rpcrdma_buffer; 334struct rpcrdma_buffer;
335struct rpcrdma_req { 335struct rpcrdma_req {
336 struct list_head rl_list; 336 struct list_head rl_list;
337 int rl_cpu;
338 unsigned int rl_connect_cookie; 337 unsigned int rl_connect_cookie;
339 struct rpcrdma_buffer *rl_buffer; 338 struct rpcrdma_buffer *rl_buffer;
340 struct rpcrdma_rep *rl_reply; 339 struct rpcrdma_rep *rl_reply;