diff options
author | Andy Grover <andy.grover@oracle.com> | 2010-03-29 20:08:49 -0400 |
---|---|---|
committer | Andy Grover <andy.grover@oracle.com> | 2010-09-08 21:12:21 -0400 |
commit | fcc5450c6386526034edc437e4cb2c67a6fdd7e9 (patch) | |
tree | 87b98163a69413de3c14a37220edf22350cb25d9 /net/rds/send.c | |
parent | 51e2cba8b5936c13b40f0fa11aa4e84683dbc751 (diff) |
RDS: Remove send_quota from send_xmit()
The purpose of the send quota was really to give fairness
when different connections were all using the same
workq thread to send backlogged msgs -- they could only send
so many before another connection could make progress.
Now that each connection is pushing the backlog from its
completion handler, they are all guaranteed to make progress
and the quota isn't needed any longer.
A thread *will* have to send all previously queued data, as well
as any further msgs placed on the queue while while c_send_lock
was held. In a pathological case a single process can get
roped into doing this for long periods while other threads
get off free. But, since it can only do this until the transport
reports full, this is a bounded scenario.
Signed-off-by: Andy Grover <andy.grover@oracle.com>
Diffstat (limited to 'net/rds/send.c')
-rw-r--r-- | net/rds/send.c | 30 |
1 files changed, 15 insertions, 15 deletions
diff --git a/net/rds/send.c b/net/rds/send.c index ecda3e6c432c..656c5c1b32bc 100644 --- a/net/rds/send.c +++ b/net/rds/send.c | |||
@@ -110,12 +110,11 @@ int rds_send_xmit(struct rds_connection *conn) | |||
110 | struct rds_message *rm; | 110 | struct rds_message *rm; |
111 | unsigned long flags; | 111 | unsigned long flags; |
112 | unsigned int tmp; | 112 | unsigned int tmp; |
113 | unsigned int send_quota = send_batch_count; | ||
114 | struct scatterlist *sg; | 113 | struct scatterlist *sg; |
115 | int ret = 0; | 114 | int ret = 0; |
116 | int was_empty = 0; | ||
117 | LIST_HEAD(to_be_dropped); | 115 | LIST_HEAD(to_be_dropped); |
118 | 116 | ||
117 | restart: | ||
119 | if (!rds_conn_up(conn)) | 118 | if (!rds_conn_up(conn)) |
120 | goto out; | 119 | goto out; |
121 | 120 | ||
@@ -139,7 +138,7 @@ int rds_send_xmit(struct rds_connection *conn) | |||
139 | * spin trying to push headers and data down the connection until | 138 | * spin trying to push headers and data down the connection until |
140 | * the connection doesn't make forward progress. | 139 | * the connection doesn't make forward progress. |
141 | */ | 140 | */ |
142 | while (--send_quota) { | 141 | while (1) { |
143 | 142 | ||
144 | rm = conn->c_xmit_rm; | 143 | rm = conn->c_xmit_rm; |
145 | 144 | ||
@@ -185,10 +184,8 @@ int rds_send_xmit(struct rds_connection *conn) | |||
185 | 184 | ||
186 | spin_unlock(&conn->c_lock); | 185 | spin_unlock(&conn->c_lock); |
187 | 186 | ||
188 | if (!rm) { | 187 | if (!rm) |
189 | was_empty = 1; | ||
190 | break; | 188 | break; |
191 | } | ||
192 | 189 | ||
193 | /* Unfortunately, the way Infiniband deals with | 190 | /* Unfortunately, the way Infiniband deals with |
194 | * RDMA to a bad MR key is by moving the entire | 191 | * RDMA to a bad MR key is by moving the entire |
@@ -350,20 +347,23 @@ int rds_send_xmit(struct rds_connection *conn) | |||
350 | rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED); | 347 | rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED); |
351 | } | 348 | } |
352 | 349 | ||
353 | if (send_quota == 0 && !was_empty) { | 350 | /* |
354 | /* We exhausted the send quota, but there's work left to | 351 | * Other senders will see we have c_send_lock and exit. We |
355 | * do. Return and (re-)schedule the send worker. | 352 | * need to recheck the send queue and race again for c_send_lock |
356 | */ | 353 | * to make sure messages don't just sit on the send queue. |
357 | ret = -EAGAIN; | 354 | * |
358 | } | 355 | * If the transport cannot continue (i.e ret != 0), then it must |
359 | 356 | * call us when more room is available, such as from the tx | |
360 | if (ret == 0 && was_empty) { | 357 | * completion handler. |
358 | */ | ||
359 | if (ret == 0) { | ||
361 | /* A simple bit test would be way faster than taking the | 360 | /* A simple bit test would be way faster than taking the |
362 | * spin lock */ | 361 | * spin lock */ |
363 | spin_lock_irqsave(&conn->c_lock, flags); | 362 | spin_lock_irqsave(&conn->c_lock, flags); |
364 | if (!list_empty(&conn->c_send_queue)) { | 363 | if (!list_empty(&conn->c_send_queue)) { |
365 | rds_stats_inc(s_send_lock_queue_raced); | 364 | rds_stats_inc(s_send_lock_queue_raced); |
366 | ret = -EAGAIN; | 365 | spin_unlock_irqrestore(&conn->c_lock, flags); |
366 | goto restart; | ||
367 | } | 367 | } |
368 | spin_unlock_irqrestore(&conn->c_lock, flags); | 368 | spin_unlock_irqrestore(&conn->c_lock, flags); |
369 | } | 369 | } |