aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Miller <davem@davemloft.net>2010-11-11 00:56:39 -0500
committerDavid Teigland <teigland@redhat.com>2010-11-11 14:05:12 -0500
commitb36930dd508e00f0c5083bcd57d25de6d0375c76 (patch)
tree802e874ff1f53289250c13ac260a8df033b8f47d
parentf6614b7bb405a9b35dd28baea989a749492c46b2 (diff)
dlm: Handle application limited situations properly.
In the normal regime where an application uses non-blocking I/O writes on a socket, they will handle -EAGAIN and use poll() to wait for send space. They don't actually sleep on the socket I/O write. But kernel level RPC layers that do socket I/O operations directly and key off of -EAGAIN on the write() to "try again later" don't use poll(), they instead have their own sleeping mechanism and rely upon ->sk_write_space() to trigger the wakeup. So they do effectively sleep on the write(), but this mechanism alone does not let the socket layers know what's going on. Therefore they must emulate what would have happened, otherwise TCP cannot possibly see that the connection is application window size limited. Handle this, therefore, like SUNRPC by setting SOCK_NOSPACE and bumping the ->sk_write_count as needed when we hit the send buffer limits. This should make TCP send buffer size auto-tuning and the ->sk_write_space() callback invocations actually happen. Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: David Teigland <teigland@redhat.com>
-rw-r--r--fs/dlm/lowcomms.c22
1 files changed, 21 insertions, 1 deletions
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 37a34c2c622a..77720f89c879 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -108,6 +108,7 @@ struct connection {
108#define CF_INIT_PENDING 4 108#define CF_INIT_PENDING 4
109#define CF_IS_OTHERCON 5 109#define CF_IS_OTHERCON 5
110#define CF_CLOSE 6 110#define CF_CLOSE 6
111#define CF_APP_LIMITED 7
111 struct list_head writequeue; /* List of outgoing writequeue_entries */ 112 struct list_head writequeue; /* List of outgoing writequeue_entries */
112 spinlock_t writequeue_lock; 113 spinlock_t writequeue_lock;
113 int (*rx_action) (struct connection *); /* What to do when active */ 114 int (*rx_action) (struct connection *); /* What to do when active */
@@ -295,7 +296,17 @@ static void lowcomms_write_space(struct sock *sk)
295{ 296{
296 struct connection *con = sock2con(sk); 297 struct connection *con = sock2con(sk);
297 298
298 if (con && !test_and_set_bit(CF_WRITE_PENDING, &con->flags)) 299 if (!con)
300 return;
301
302 clear_bit(SOCK_NOSPACE, &con->sock->flags);
303
304 if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) {
305 con->sock->sk->sk_write_pending--;
306 clear_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags);
307 }
308
309 if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
299 queue_work(send_workqueue, &con->swork); 310 queue_work(send_workqueue, &con->swork);
300} 311}
301 312
@@ -1319,6 +1330,15 @@ static void send_to_sock(struct connection *con)
1319 ret = kernel_sendpage(con->sock, e->page, offset, len, 1330 ret = kernel_sendpage(con->sock, e->page, offset, len,
1320 msg_flags); 1331 msg_flags);
1321 if (ret == -EAGAIN || ret == 0) { 1332 if (ret == -EAGAIN || ret == 0) {
1333 if (ret == -EAGAIN &&
1334 test_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags) &&
1335 !test_and_set_bit(CF_APP_LIMITED, &con->flags)) {
1336 /* Notify TCP that we're limited by the
1337 * application window size.
1338 */
1339 set_bit(SOCK_NOSPACE, &con->sock->flags);
1340 con->sock->sk->sk_write_pending++;
1341 }
1322 cond_resched(); 1342 cond_resched();
1323 goto out; 1343 goto out;
1324 } 1344 }