diff options
author | Chuck Lever <cel@netapp.com> | 2005-08-25 19:25:51 -0400 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2005-09-23 12:38:38 -0400 |
commit | 12a804698b29d040b7cdd92e8a44b0e75164dae9 (patch) | |
tree | 9885cf95a0a2945ad8cd00de59633a0aa16a3599 | |
parent | fe3aca290f17ae4978bd73d02aa4029f1c9c024c (diff) |
[PATCH] RPC: expose API for serializing access to RPC transports
The next several patches introduce an API that allows transports to
choose whether the RPC client provides congestion control or whether
the transport itself provides it.
The first method we abstract is the one that serializes access to the
RPC transport to prevent the bytes from different requests from mingling
together. This method provides proper request serialization and the
opportunity to prevent new requests from being started because the
transport is congested.
The normal situation is for the transport to handle congestion control
itself. Although NFS over UDP was first, it has been recognized after
years of experience that having the transport provide congestion control
is much better than doing it in the RPC client. Thus TCP, and probably
every future transport implementation, will use the default method,
xprt_lock_write, provided in xprt.c, which does not provide any kind
of congestion control. UDP can continue using the xprt.c-provided
Van Jacobson congestion avoidance implementation.
Test-plan:
Use WAN simulation to cause sporadic bursty packet loss. Look for significant
regression in performance or client stability.
Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r-- | include/linux/sunrpc/xprt.h | 3 | ||||
-rw-r--r-- | net/sunrpc/xprt.c | 64 | ||||
-rw-r--r-- | net/sunrpc/xprtsock.c | 2 |
3 files changed, 57 insertions, 12 deletions
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index ac08e99a81cb..eee1c6877851 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h | |||
@@ -132,6 +132,7 @@ struct rpc_xprt; | |||
132 | 132 | ||
133 | struct rpc_xprt_ops { | 133 | struct rpc_xprt_ops { |
134 | void (*set_buffer_size)(struct rpc_xprt *xprt); | 134 | void (*set_buffer_size)(struct rpc_xprt *xprt); |
135 | int (*reserve_xprt)(struct rpc_task *task); | ||
135 | void (*connect)(struct rpc_task *task); | 136 | void (*connect)(struct rpc_task *task); |
136 | int (*send_request)(struct rpc_task *task); | 137 | int (*send_request)(struct rpc_task *task); |
137 | void (*set_retrans_timeout)(struct rpc_task *task); | 138 | void (*set_retrans_timeout)(struct rpc_task *task); |
@@ -232,6 +233,8 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long | |||
232 | */ | 233 | */ |
233 | void xprt_connect(struct rpc_task *task); | 234 | void xprt_connect(struct rpc_task *task); |
234 | void xprt_reserve(struct rpc_task *task); | 235 | void xprt_reserve(struct rpc_task *task); |
236 | int xprt_reserve_xprt(struct rpc_task *task); | ||
237 | int xprt_reserve_xprt_cong(struct rpc_task *task); | ||
235 | int xprt_prepare_transmit(struct rpc_task *task); | 238 | int xprt_prepare_transmit(struct rpc_task *task); |
236 | void xprt_transmit(struct rpc_task *task); | 239 | void xprt_transmit(struct rpc_task *task); |
237 | int xprt_adjust_timeout(struct rpc_rqst *req); | 240 | int xprt_adjust_timeout(struct rpc_rqst *req); |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 1ac2fbe05102..2d1e8b83dd68 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
@@ -64,14 +64,56 @@ static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); | |||
64 | 64 | ||
65 | static int xprt_clear_backlog(struct rpc_xprt *xprt); | 65 | static int xprt_clear_backlog(struct rpc_xprt *xprt); |
66 | 66 | ||
67 | /** | ||
68 | * xprt_reserve_xprt - serialize write access to transports | ||
69 | * @task: task that is requesting access to the transport | ||
70 | * | ||
71 | * This prevents mixing the payload of separate requests, and prevents | ||
72 | * transport connects from colliding with writes. No congestion control | ||
73 | * is provided. | ||
74 | */ | ||
75 | int xprt_reserve_xprt(struct rpc_task *task) | ||
76 | { | ||
77 | struct rpc_xprt *xprt = task->tk_xprt; | ||
78 | struct rpc_rqst *req = task->tk_rqstp; | ||
79 | |||
80 | if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { | ||
81 | if (task == xprt->snd_task) | ||
82 | return 1; | ||
83 | if (task == NULL) | ||
84 | return 0; | ||
85 | goto out_sleep; | ||
86 | } | ||
87 | xprt->snd_task = task; | ||
88 | if (req) { | ||
89 | req->rq_bytes_sent = 0; | ||
90 | req->rq_ntrans++; | ||
91 | } | ||
92 | return 1; | ||
93 | |||
94 | out_sleep: | ||
95 | dprintk("RPC: %4d failed to lock transport %p\n", | ||
96 | task->tk_pid, xprt); | ||
97 | task->tk_timeout = 0; | ||
98 | task->tk_status = -EAGAIN; | ||
99 | if (req && req->rq_ntrans) | ||
100 | rpc_sleep_on(&xprt->resend, task, NULL, NULL); | ||
101 | else | ||
102 | rpc_sleep_on(&xprt->sending, task, NULL, NULL); | ||
103 | return 0; | ||
104 | } | ||
105 | |||
67 | /* | 106 | /* |
68 | * Serialize write access to transports, in order to prevent different | 107 | * xprt_reserve_xprt_cong - serialize write access to transports |
69 | * requests from interfering with each other. | 108 | * @task: task that is requesting access to the transport |
70 | * Also prevents transport connects from colliding with writes. | 109 | * |
110 | * Same as xprt_reserve_xprt, but Van Jacobson congestion control is | ||
111 | * integrated into the decision of whether a request is allowed to be | ||
112 | * woken up and given access to the transport. | ||
71 | */ | 113 | */ |
72 | static int | 114 | int xprt_reserve_xprt_cong(struct rpc_task *task) |
73 | __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) | ||
74 | { | 115 | { |
116 | struct rpc_xprt *xprt = task->tk_xprt; | ||
75 | struct rpc_rqst *req = task->tk_rqstp; | 117 | struct rpc_rqst *req = task->tk_rqstp; |
76 | 118 | ||
77 | if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { | 119 | if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { |
@@ -79,7 +121,7 @@ __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) | |||
79 | return 1; | 121 | return 1; |
80 | goto out_sleep; | 122 | goto out_sleep; |
81 | } | 123 | } |
82 | if (xprt->nocong || __xprt_get_cong(xprt, task)) { | 124 | if (__xprt_get_cong(xprt, task)) { |
83 | xprt->snd_task = task; | 125 | xprt->snd_task = task; |
84 | if (req) { | 126 | if (req) { |
85 | req->rq_bytes_sent = 0; | 127 | req->rq_bytes_sent = 0; |
@@ -101,20 +143,18 @@ out_sleep: | |||
101 | return 0; | 143 | return 0; |
102 | } | 144 | } |
103 | 145 | ||
104 | static inline int | 146 | static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) |
105 | xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) | ||
106 | { | 147 | { |
107 | int retval; | 148 | int retval; |
108 | 149 | ||
109 | spin_lock_bh(&xprt->transport_lock); | 150 | spin_lock_bh(&xprt->transport_lock); |
110 | retval = __xprt_lock_write(xprt, task); | 151 | retval = xprt->ops->reserve_xprt(task); |
111 | spin_unlock_bh(&xprt->transport_lock); | 152 | spin_unlock_bh(&xprt->transport_lock); |
112 | return retval; | 153 | return retval; |
113 | } | 154 | } |
114 | 155 | ||
115 | 156 | ||
116 | static void | 157 | static void __xprt_lock_write_next(struct rpc_xprt *xprt) |
117 | __xprt_lock_write_next(struct rpc_xprt *xprt) | ||
118 | { | 158 | { |
119 | struct rpc_task *task; | 159 | struct rpc_task *task; |
120 | 160 | ||
@@ -598,7 +638,7 @@ int xprt_prepare_transmit(struct rpc_task *task) | |||
598 | err = req->rq_received; | 638 | err = req->rq_received; |
599 | goto out_unlock; | 639 | goto out_unlock; |
600 | } | 640 | } |
601 | if (!__xprt_lock_write(xprt, task)) { | 641 | if (!xprt->ops->reserve_xprt(task)) { |
602 | err = -EAGAIN; | 642 | err = -EAGAIN; |
603 | goto out_unlock; | 643 | goto out_unlock; |
604 | } | 644 | } |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 79433ffd1df0..fc4fbe8ea346 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
@@ -1045,6 +1045,7 @@ static void xs_connect(struct rpc_task *task) | |||
1045 | 1045 | ||
1046 | static struct rpc_xprt_ops xs_udp_ops = { | 1046 | static struct rpc_xprt_ops xs_udp_ops = { |
1047 | .set_buffer_size = xs_udp_set_buffer_size, | 1047 | .set_buffer_size = xs_udp_set_buffer_size, |
1048 | .reserve_xprt = xprt_reserve_xprt_cong, | ||
1048 | .connect = xs_connect, | 1049 | .connect = xs_connect, |
1049 | .send_request = xs_udp_send_request, | 1050 | .send_request = xs_udp_send_request, |
1050 | .set_retrans_timeout = xprt_set_retrans_timeout_rtt, | 1051 | .set_retrans_timeout = xprt_set_retrans_timeout_rtt, |
@@ -1054,6 +1055,7 @@ static struct rpc_xprt_ops xs_udp_ops = { | |||
1054 | 1055 | ||
1055 | static struct rpc_xprt_ops xs_tcp_ops = { | 1056 | static struct rpc_xprt_ops xs_tcp_ops = { |
1056 | .set_buffer_size = xs_tcp_set_buffer_size, | 1057 | .set_buffer_size = xs_tcp_set_buffer_size, |
1058 | .reserve_xprt = xprt_reserve_xprt, | ||
1057 | .connect = xs_connect, | 1059 | .connect = xs_connect, |
1058 | .send_request = xs_tcp_send_request, | 1060 | .send_request = xs_tcp_send_request, |
1059 | .set_retrans_timeout = xprt_set_retrans_timeout_def, | 1061 | .set_retrans_timeout = xprt_set_retrans_timeout_def, |