aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPatrick Caulfield <pcaulfie@redhat.com>2007-02-01 11:46:33 -0500
committerSteven Whitehouse <swhiteho@redhat.com>2007-02-05 13:38:27 -0500
commita34fbc6363256387372331000462691bc4b3f5a9 (patch)
treed211465adf24f06d772cfbc78b92269771f46b07
parent62a0f62369b0fece37f6652d69b918c89d53c3b3 (diff)
[DLM] fix softlockup in dlm_recv
This patch stops the dlm_recv workqueue from busy-waiting when a node disconnects. This can cause soft lockup errors on debug systems and bad performance generally. Signed-Off-By: Patrick Caulfield <pcaulfie@redhat.com> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
-rw-r--r--fs/dlm/lowcomms-tcp.c15
1 files changed, 9 insertions, 6 deletions
diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms-tcp.c
index 18ade44287e..f1efd17b261 100644
--- a/fs/dlm/lowcomms-tcp.c
+++ b/fs/dlm/lowcomms-tcp.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -109,7 +109,6 @@ struct connection {
109 struct page *rx_page; 109 struct page *rx_page;
110 struct cbuf cb; 110 struct cbuf cb;
111 int retries; 111 int retries;
112 atomic_t waiting_requests;
113#define MAX_CONNECT_RETRIES 3 112#define MAX_CONNECT_RETRIES 3
114 struct connection *othercon; 113 struct connection *othercon;
115 struct work_struct rwork; /* Receive workqueue */ 114 struct work_struct rwork; /* Receive workqueue */
@@ -278,8 +277,11 @@ static int receive_from_sock(struct connection *con)
278 277
279 mutex_lock(&con->sock_mutex); 278 mutex_lock(&con->sock_mutex);
280 279
281 if (con->sock == NULL) 280 if (con->sock == NULL) {
282 goto out; 281 ret = -EAGAIN;
282 goto out_close;
283 }
284
283 if (con->rx_page == NULL) { 285 if (con->rx_page == NULL) {
284 /* 286 /*
285 * This doesn't need to be atomic, but I think it should 287 * This doesn't need to be atomic, but I think it should
@@ -352,7 +354,6 @@ static int receive_from_sock(struct connection *con)
352 con->rx_page = NULL; 354 con->rx_page = NULL;
353 } 355 }
354 356
355out:
356 if (call_again_soon) 357 if (call_again_soon)
357 goto out_resched; 358 goto out_resched;
358 mutex_unlock(&con->sock_mutex); 359 mutex_unlock(&con->sock_mutex);
@@ -370,6 +371,9 @@ out_close:
370 close_connection(con, false); 371 close_connection(con, false);
371 /* Reconnect when there is something to send */ 372 /* Reconnect when there is something to send */
372 } 373 }
374 /* Don't return success if we really got EOF */
375 if (ret == 0)
376 ret = -EAGAIN;
373 377
374 return ret; 378 return ret;
375} 379}
@@ -847,7 +851,6 @@ int dlm_lowcomms_close(int nodeid)
847 if (con) { 851 if (con) {
848 clean_one_writequeue(con); 852 clean_one_writequeue(con);
849 close_connection(con, true); 853 close_connection(con, true);
850 atomic_set(&con->waiting_requests, 0);
851 } 854 }
852 return 0; 855 return 0;
853 856