diff options
author | Lars Marowsky-Bree <lmb@suse.de> | 2009-08-11 17:18:23 -0400 |
---|---|---|
committer | David Teigland <teigland@redhat.com> | 2009-08-24 14:13:56 -0400 |
commit | 063c4c99630c0b06afad080d2a18bda64172c1a2 (patch) | |
tree | 5ceca753d0366f9af6e2afbc9151494a6cfcac7d /fs/dlm | |
parent | b5711b8e5a437ca7d35321d19de568b4f76a7739 (diff) |
dlm: fix connection close handling
Closing a connection to a node can create problems if there are
outstanding messages for that node. The problems include dlm_send
spinning attempting to reconnect, or BUG from tcp_connect_to_sock()
attempting to use a partially closed connection.
To cleanly close a connection, we now first attempt to send any pending
messages, cancel any remaining workqueue work, and flag the connection
as closed to avoid reconnect attempts.
Signed-off-by: Lars Marowsky-Bree <lmb@suse.de>
Signed-off-by: Christine Caulfield <ccaulfie@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
Diffstat (limited to 'fs/dlm')
-rw-r--r-- | fs/dlm/lowcomms.c | 15 |
1 files changed, 13 insertions, 2 deletions
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 210d52c48808..bda690cd3640 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
@@ -106,6 +106,7 @@ struct connection { | |||
106 | #define CF_CONNECT_PENDING 3 | 106 | #define CF_CONNECT_PENDING 3 |
107 | #define CF_INIT_PENDING 4 | 107 | #define CF_INIT_PENDING 4 |
108 | #define CF_IS_OTHERCON 5 | 108 | #define CF_IS_OTHERCON 5 |
109 | #define CF_CLOSE 6 | ||
109 | struct list_head writequeue; /* List of outgoing writequeue_entries */ | 110 | struct list_head writequeue; /* List of outgoing writequeue_entries */ |
110 | spinlock_t writequeue_lock; | 111 | spinlock_t writequeue_lock; |
111 | int (*rx_action) (struct connection *); /* What to do when active */ | 112 | int (*rx_action) (struct connection *); /* What to do when active */ |
@@ -299,6 +300,8 @@ static void lowcomms_write_space(struct sock *sk) | |||
299 | 300 | ||
300 | static inline void lowcomms_connect_sock(struct connection *con) | 301 | static inline void lowcomms_connect_sock(struct connection *con) |
301 | { | 302 | { |
303 | if (test_bit(CF_CLOSE, &con->flags)) | ||
304 | return; | ||
302 | if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) | 305 | if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) |
303 | queue_work(send_workqueue, &con->swork); | 306 | queue_work(send_workqueue, &con->swork); |
304 | } | 307 | } |
@@ -1368,6 +1371,13 @@ int dlm_lowcomms_close(int nodeid) | |||
1368 | log_print("closing connection to node %d", nodeid); | 1371 | log_print("closing connection to node %d", nodeid); |
1369 | con = nodeid2con(nodeid, 0); | 1372 | con = nodeid2con(nodeid, 0); |
1370 | if (con) { | 1373 | if (con) { |
1374 | clear_bit(CF_CONNECT_PENDING, &con->flags); | ||
1375 | clear_bit(CF_WRITE_PENDING, &con->flags); | ||
1376 | set_bit(CF_CLOSE, &con->flags); | ||
1377 | if (cancel_work_sync(&con->swork)) | ||
1378 | log_print("canceled swork for node %d", nodeid); | ||
1379 | if (cancel_work_sync(&con->rwork)) | ||
1380 | log_print("canceled rwork for node %d", nodeid); | ||
1371 | clean_one_writequeue(con); | 1381 | clean_one_writequeue(con); |
1372 | close_connection(con, true); | 1382 | close_connection(con, true); |
1373 | } | 1383 | } |
@@ -1393,9 +1403,10 @@ static void process_send_sockets(struct work_struct *work) | |||
1393 | 1403 | ||
1394 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { | 1404 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { |
1395 | con->connect_action(con); | 1405 | con->connect_action(con); |
1406 | set_bit(CF_WRITE_PENDING, &con->flags); | ||
1396 | } | 1407 | } |
1397 | clear_bit(CF_WRITE_PENDING, &con->flags); | 1408 | if (test_and_clear_bit(CF_WRITE_PENDING, &con->flags)) |
1398 | send_to_sock(con); | 1409 | send_to_sock(con); |
1399 | } | 1410 | } |
1400 | 1411 | ||
1401 | 1412 | ||