aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChuck Lever <cel@netapp.com>2005-08-25 19:25:51 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2005-09-23 12:38:38 -0400
commit12a804698b29d040b7cdd92e8a44b0e75164dae9 (patch)
tree9885cf95a0a2945ad8cd00de59633a0aa16a3599
parentfe3aca290f17ae4978bd73d02aa4029f1c9c024c (diff)
[PATCH] RPC: expose API for serializing access to RPC transports
The next several patches introduce an API that allows transports to choose whether the RPC client provides congestion control or whether the transport itself provides it. The first method we abstract is the one that serializes access to the RPC transport to prevent the bytes from different requests from mingling together. This method provides proper request serialization and the opportunity to prevent new requests from being started because the transport is congested. The normal situation is for the transport to handle congestion control itself. Although NFS over UDP was first, it has been recognized after years of experience that having the transport provide congestion control is much better than doing it in the RPC client. Thus TCP, and probably every future transport implementation, will use the default method, xprt_lock_write, provided in xprt.c, which does not provide any kind of congestion control. UDP can continue using the xprt.c-provided Van Jacobson congestion avoidance implementation. Test-plan: Use WAN simulation to cause sporadic bursty packet loss. Look for significant regression in performance or client stability. Signed-off-by: Chuck Lever <cel@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r--include/linux/sunrpc/xprt.h3
-rw-r--r--net/sunrpc/xprt.c64
-rw-r--r--net/sunrpc/xprtsock.c2
3 files changed, 57 insertions, 12 deletions
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index ac08e99a81cb..eee1c6877851 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -132,6 +132,7 @@ struct rpc_xprt;
132 132
133struct rpc_xprt_ops { 133struct rpc_xprt_ops {
134 void (*set_buffer_size)(struct rpc_xprt *xprt); 134 void (*set_buffer_size)(struct rpc_xprt *xprt);
135 int (*reserve_xprt)(struct rpc_task *task);
135 void (*connect)(struct rpc_task *task); 136 void (*connect)(struct rpc_task *task);
136 int (*send_request)(struct rpc_task *task); 137 int (*send_request)(struct rpc_task *task);
137 void (*set_retrans_timeout)(struct rpc_task *task); 138 void (*set_retrans_timeout)(struct rpc_task *task);
@@ -232,6 +233,8 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long
232 */ 233 */
233void xprt_connect(struct rpc_task *task); 234void xprt_connect(struct rpc_task *task);
234void xprt_reserve(struct rpc_task *task); 235void xprt_reserve(struct rpc_task *task);
236int xprt_reserve_xprt(struct rpc_task *task);
237int xprt_reserve_xprt_cong(struct rpc_task *task);
235int xprt_prepare_transmit(struct rpc_task *task); 238int xprt_prepare_transmit(struct rpc_task *task);
236void xprt_transmit(struct rpc_task *task); 239void xprt_transmit(struct rpc_task *task);
237int xprt_adjust_timeout(struct rpc_rqst *req); 240int xprt_adjust_timeout(struct rpc_rqst *req);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 1ac2fbe05102..2d1e8b83dd68 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -64,14 +64,56 @@ static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
64 64
65static int xprt_clear_backlog(struct rpc_xprt *xprt); 65static int xprt_clear_backlog(struct rpc_xprt *xprt);
66 66
67/**
68 * xprt_reserve_xprt - serialize write access to transports
69 * @task: task that is requesting access to the transport
70 *
71 * This prevents mixing the payload of separate requests, and prevents
72 * transport connects from colliding with writes. No congestion control
73 * is provided.
74 */
75int xprt_reserve_xprt(struct rpc_task *task)
76{
77 struct rpc_xprt *xprt = task->tk_xprt;
78 struct rpc_rqst *req = task->tk_rqstp;
79
80 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
81 if (task == xprt->snd_task)
82 return 1;
83 if (task == NULL)
84 return 0;
85 goto out_sleep;
86 }
87 xprt->snd_task = task;
88 if (req) {
89 req->rq_bytes_sent = 0;
90 req->rq_ntrans++;
91 }
92 return 1;
93
94out_sleep:
95 dprintk("RPC: %4d failed to lock transport %p\n",
96 task->tk_pid, xprt);
97 task->tk_timeout = 0;
98 task->tk_status = -EAGAIN;
99 if (req && req->rq_ntrans)
100 rpc_sleep_on(&xprt->resend, task, NULL, NULL);
101 else
102 rpc_sleep_on(&xprt->sending, task, NULL, NULL);
103 return 0;
104}
105
67/* 106/*
68 * Serialize write access to transports, in order to prevent different 107 * xprt_reserve_xprt_cong - serialize write access to transports
69 * requests from interfering with each other. 108 * @task: task that is requesting access to the transport
70 * Also prevents transport connects from colliding with writes. 109 *
110 * Same as xprt_reserve_xprt, but Van Jacobson congestion control is
111 * integrated into the decision of whether a request is allowed to be
112 * woken up and given access to the transport.
71 */ 113 */
72static int 114int xprt_reserve_xprt_cong(struct rpc_task *task)
73__xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
74{ 115{
116 struct rpc_xprt *xprt = task->tk_xprt;
75 struct rpc_rqst *req = task->tk_rqstp; 117 struct rpc_rqst *req = task->tk_rqstp;
76 118
77 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { 119 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
@@ -79,7 +121,7 @@ __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
79 return 1; 121 return 1;
80 goto out_sleep; 122 goto out_sleep;
81 } 123 }
82 if (xprt->nocong || __xprt_get_cong(xprt, task)) { 124 if (__xprt_get_cong(xprt, task)) {
83 xprt->snd_task = task; 125 xprt->snd_task = task;
84 if (req) { 126 if (req) {
85 req->rq_bytes_sent = 0; 127 req->rq_bytes_sent = 0;
@@ -101,20 +143,18 @@ out_sleep:
101 return 0; 143 return 0;
102} 144}
103 145
104static inline int 146static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
105xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
106{ 147{
107 int retval; 148 int retval;
108 149
109 spin_lock_bh(&xprt->transport_lock); 150 spin_lock_bh(&xprt->transport_lock);
110 retval = __xprt_lock_write(xprt, task); 151 retval = xprt->ops->reserve_xprt(task);
111 spin_unlock_bh(&xprt->transport_lock); 152 spin_unlock_bh(&xprt->transport_lock);
112 return retval; 153 return retval;
113} 154}
114 155
115 156
116static void 157static void __xprt_lock_write_next(struct rpc_xprt *xprt)
117__xprt_lock_write_next(struct rpc_xprt *xprt)
118{ 158{
119 struct rpc_task *task; 159 struct rpc_task *task;
120 160
@@ -598,7 +638,7 @@ int xprt_prepare_transmit(struct rpc_task *task)
598 err = req->rq_received; 638 err = req->rq_received;
599 goto out_unlock; 639 goto out_unlock;
600 } 640 }
601 if (!__xprt_lock_write(xprt, task)) { 641 if (!xprt->ops->reserve_xprt(task)) {
602 err = -EAGAIN; 642 err = -EAGAIN;
603 goto out_unlock; 643 goto out_unlock;
604 } 644 }
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 79433ffd1df0..fc4fbe8ea346 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1045,6 +1045,7 @@ static void xs_connect(struct rpc_task *task)
1045 1045
1046static struct rpc_xprt_ops xs_udp_ops = { 1046static struct rpc_xprt_ops xs_udp_ops = {
1047 .set_buffer_size = xs_udp_set_buffer_size, 1047 .set_buffer_size = xs_udp_set_buffer_size,
1048 .reserve_xprt = xprt_reserve_xprt_cong,
1048 .connect = xs_connect, 1049 .connect = xs_connect,
1049 .send_request = xs_udp_send_request, 1050 .send_request = xs_udp_send_request,
1050 .set_retrans_timeout = xprt_set_retrans_timeout_rtt, 1051 .set_retrans_timeout = xprt_set_retrans_timeout_rtt,
@@ -1054,6 +1055,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
1054 1055
1055static struct rpc_xprt_ops xs_tcp_ops = { 1056static struct rpc_xprt_ops xs_tcp_ops = {
1056 .set_buffer_size = xs_tcp_set_buffer_size, 1057 .set_buffer_size = xs_tcp_set_buffer_size,
1058 .reserve_xprt = xprt_reserve_xprt,
1057 .connect = xs_connect, 1059 .connect = xs_connect,
1058 .send_request = xs_tcp_send_request, 1060 .send_request = xs_tcp_send_request,
1059 .set_retrans_timeout = xprt_set_retrans_timeout_def, 1061 .set_retrans_timeout = xprt_set_retrans_timeout_def,