aboutsummaryrefslogtreecommitdiffstats
path: root/net/sunrpc
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2008-06-06 13:22:25 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2008-07-09 12:09:15 -0400
commitb22602a673b1743bba4b62bb404ffd3b269d2f09 (patch)
tree70bb9bd6a6dd3339dbd3729ff70cbeab2a050ad5 /net/sunrpc
parent659bfcd6dd88919a5ad453f62afbeffcb3106847 (diff)
SUNRPC: Ensure all transports set rq_xtime consistently
The RPC client uses the rq_xtime field in each RPC request to determine the round-trip time of the request. Currently, the rq_xtime field is initialized by each transport just before it starts enqueing a request to be sent. However, transports do not handle initializing this value consistently; sometimes they don't initialize it at all. To make the measurement of request round-trip time consistent for all RPC client transport capabilities, pull rq_xtime initialization into the RPC client's generic transport logic. Now all transports will get a standardized RTT measure automatically, from: xprt_transmit() to xprt_complete_rqst() This makes round-trip time calculation more accurate for the TCP transport. The socket ->sendmsg() method can return "-EAGAIN" if the socket's output buffer is full, so the TCP transport's ->send_request() method may call the ->sendmsg() method repeatedly until it gets all of the request's bytes queued in the socket's buffer. Currently, the TCP transport sets the rq_xtime field every time through that loop so the final value is the timestamp just before the *last* call to the underlying socket's ->sendmsg() method. After this patch, the rq_xtime field contains a timestamp that reflects the time just before the *first* call to ->sendmsg(). This is consequential under heavy workloads because large requests often take multiple ->sendmsg() calls to get all the bytes of a request queued. The TCP transport causes the request to sleep until the remote end of the socket has received enough bytes to clear space in the socket's local output buffer. This delay can be quite significant. The method introduced by this patch is a more accurate measure of RTT for stream transports, since the server can cause enough back pressure to delay (ie increase the latency of) requests from the client. Additionally, this patch corrects the behavior of the RDMA transport, which entirely neglected to initialize the rq_xtime field. RPC performance metrics for RDMA transports now display correct RPC request round trip times. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Acked-by: Tom Talpey <thomas.talpey@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'net/sunrpc')
-rw-r--r--net/sunrpc/xprt.c1
-rw-r--r--net/sunrpc/xprtsock.c2
2 files changed, 1 insertions, 2 deletions
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 67996bd7fbf9..99a52aabe332 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -872,6 +872,7 @@ void xprt_transmit(struct rpc_task *task)
872 return; 872 return;
873 873
874 req->rq_connect_cookie = xprt->connect_cookie; 874 req->rq_connect_cookie = xprt->connect_cookie;
875 req->rq_xtime = jiffies;
875 status = xprt->ops->send_request(task); 876 status = xprt->ops->send_request(task);
876 if (status == 0) { 877 if (status == 0) {
877 dprintk("RPC: %5u xmit complete\n", task->tk_pid); 878 dprintk("RPC: %5u xmit complete\n", task->tk_pid);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index ddbe981ab516..4486c59c3aca 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -579,7 +579,6 @@ static int xs_udp_send_request(struct rpc_task *task)
579 req->rq_svec->iov_base, 579 req->rq_svec->iov_base,
580 req->rq_svec->iov_len); 580 req->rq_svec->iov_len);
581 581
582 req->rq_xtime = jiffies;
583 status = xs_sendpages(transport->sock, 582 status = xs_sendpages(transport->sock,
584 xs_addr(xprt), 583 xs_addr(xprt),
585 xprt->addrlen, xdr, 584 xprt->addrlen, xdr,
@@ -671,7 +670,6 @@ static int xs_tcp_send_request(struct rpc_task *task)
671 * to cope with writespace callbacks arriving _after_ we have 670 * to cope with writespace callbacks arriving _after_ we have
672 * called sendmsg(). */ 671 * called sendmsg(). */
673 while (1) { 672 while (1) {
674 req->rq_xtime = jiffies;
675 status = xs_sendpages(transport->sock, 673 status = xs_sendpages(transport->sock,
676 NULL, 0, xdr, req->rq_bytes_sent); 674 NULL, 0, xdr, req->rq_bytes_sent);
677 675