diff options
author | Patrick Caulfield <pcaulfie@redhat.com> | 2007-02-01 11:46:33 -0500 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2007-02-05 13:38:27 -0500 |
commit | a34fbc6363256387372331000462691bc4b3f5a9 (patch) | |
tree | d211465adf24f06d772cfbc78b92269771f46b07 /fs | |
parent | 62a0f62369b0fece37f6652d69b918c89d53c3b3 (diff) |
[DLM] fix softlockup in dlm_recv
This patch stops the dlm_recv workqueue from busy-waiting when a node
disconnects. This can cause soft lockup errors on debug systems and bad
performance generally.
Signed-Off-By: Patrick Caulfield <pcaulfie@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/dlm/lowcomms-tcp.c | 15 |
1 files changed, 9 insertions, 6 deletions
diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms-tcp.c index 18ade44287e7..f1efd17b2614 100644 --- a/fs/dlm/lowcomms-tcp.c +++ b/fs/dlm/lowcomms-tcp.c | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -109,7 +109,6 @@ struct connection { | |||
109 | struct page *rx_page; | 109 | struct page *rx_page; |
110 | struct cbuf cb; | 110 | struct cbuf cb; |
111 | int retries; | 111 | int retries; |
112 | atomic_t waiting_requests; | ||
113 | #define MAX_CONNECT_RETRIES 3 | 112 | #define MAX_CONNECT_RETRIES 3 |
114 | struct connection *othercon; | 113 | struct connection *othercon; |
115 | struct work_struct rwork; /* Receive workqueue */ | 114 | struct work_struct rwork; /* Receive workqueue */ |
@@ -278,8 +277,11 @@ static int receive_from_sock(struct connection *con) | |||
278 | 277 | ||
279 | mutex_lock(&con->sock_mutex); | 278 | mutex_lock(&con->sock_mutex); |
280 | 279 | ||
281 | if (con->sock == NULL) | 280 | if (con->sock == NULL) { |
282 | goto out; | 281 | ret = -EAGAIN; |
282 | goto out_close; | ||
283 | } | ||
284 | |||
283 | if (con->rx_page == NULL) { | 285 | if (con->rx_page == NULL) { |
284 | /* | 286 | /* |
285 | * This doesn't need to be atomic, but I think it should | 287 | * This doesn't need to be atomic, but I think it should |
@@ -352,7 +354,6 @@ static int receive_from_sock(struct connection *con) | |||
352 | con->rx_page = NULL; | 354 | con->rx_page = NULL; |
353 | } | 355 | } |
354 | 356 | ||
355 | out: | ||
356 | if (call_again_soon) | 357 | if (call_again_soon) |
357 | goto out_resched; | 358 | goto out_resched; |
358 | mutex_unlock(&con->sock_mutex); | 359 | mutex_unlock(&con->sock_mutex); |
@@ -370,6 +371,9 @@ out_close: | |||
370 | close_connection(con, false); | 371 | close_connection(con, false); |
371 | /* Reconnect when there is something to send */ | 372 | /* Reconnect when there is something to send */ |
372 | } | 373 | } |
374 | /* Don't return success if we really got EOF */ | ||
375 | if (ret == 0) | ||
376 | ret = -EAGAIN; | ||
373 | 377 | ||
374 | return ret; | 378 | return ret; |
375 | } | 379 | } |
@@ -847,7 +851,6 @@ int dlm_lowcomms_close(int nodeid) | |||
847 | if (con) { | 851 | if (con) { |
848 | clean_one_writequeue(con); | 852 | clean_one_writequeue(con); |
849 | close_connection(con, true); | 853 | close_connection(con, true); |
850 | atomic_set(&con->waiting_requests, 0); | ||
851 | } | 854 | } |
852 | return 0; | 855 | return 0; |
853 | 856 | ||