diff options
author | Tao Ma <tao.ma@oracle.com> | 2008-03-05 02:50:12 -0500 |
---|---|---|
committer | Mark Fasheh <mfasheh@suse.com> | 2008-04-18 11:56:10 -0400 |
commit | 5cc3bf2786f63cceb191c3c02ddd83c6f38a7d64 (patch) | |
tree | a9d7f6fa7d251cff67d6b177835ff1f43d23ab2d /fs/ocfs2/cluster/tcp.c | |
parent | 8f50eb978935431ccbf89b0344efd4ce6a924875 (diff) |
ocfs2: Reconnect after idle time out.
Currently, o2net connects to a node on hb_up and disconnects on
hb_down and net timeout.
It disconnects on net timeout is ok, but it should attempt to
reconnect back. This is because sometimes nodes get overloaded
enough that the network connection breaks but the disk hb does not.
And if we get into that situation, we either fence (unnecessarily)
or wait for its disk hb to die (and sometimes hang in the process).
So in this updated scheme, when the network disconnects, we keep
attempting to reconnect till we succeed or we get a disk hb down
event.
If the other node is really dead, then we will eventually get a
node down event. If not, we should be able to connect again and
continue.
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Diffstat (limited to 'fs/ocfs2/cluster/tcp.c')
-rw-r--r-- | fs/ocfs2/cluster/tcp.c | 51 |
1 files changed, 36 insertions, 15 deletions
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index b8057c51b205..4ea4b0a26975 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -399,8 +399,6 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
399 | mlog_bug_on_msg(err && valid, "err %d valid %u\n", err, valid); | 399 | mlog_bug_on_msg(err && valid, "err %d valid %u\n", err, valid); |
400 | mlog_bug_on_msg(valid && !sc, "valid %u sc %p\n", valid, sc); | 400 | mlog_bug_on_msg(valid && !sc, "valid %u sc %p\n", valid, sc); |
401 | 401 | ||
402 | /* we won't reconnect after our valid conn goes away for | ||
403 | * this hb iteration.. here so it shows up in the logs */ | ||
404 | if (was_valid && !valid && err == 0) | 402 | if (was_valid && !valid && err == 0) |
405 | err = -ENOTCONN; | 403 | err = -ENOTCONN; |
406 | 404 | ||
@@ -430,11 +428,6 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
430 | 428 | ||
431 | if (!was_valid && valid) { | 429 | if (!was_valid && valid) { |
432 | o2quo_conn_up(o2net_num_from_nn(nn)); | 430 | o2quo_conn_up(o2net_num_from_nn(nn)); |
433 | /* this is a bit of a hack. we only try reconnecting | ||
434 | * when heartbeating starts until we get a connection. | ||
435 | * if that connection then dies we don't try reconnecting. | ||
436 | * the only way to start connecting again is to down | ||
437 | * heartbeat and bring it back up. */ | ||
438 | cancel_delayed_work(&nn->nn_connect_expired); | 431 | cancel_delayed_work(&nn->nn_connect_expired); |
439 | printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n", | 432 | printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n", |
440 | o2nm_this_node() > sc->sc_node->nd_num ? | 433 | o2nm_this_node() > sc->sc_node->nd_num ? |
@@ -457,6 +450,18 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
457 | delay = 0; | 450 | delay = 0; |
458 | mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); | 451 | mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); |
459 | queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); | 452 | queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); |
453 | |||
454 | /* | ||
455 | * Delay the expired work after idle timeout. | ||
456 | * | ||
457 | * We might have lots of failed connection attempts that run | ||
458 | * through here but we only cancel the connect_expired work when | ||
459 | * a connection attempt succeeds. So only the first enqueue of | ||
460 | * the connect_expired work will do anything. The rest will see | ||
461 | * that it's already queued and do nothing. | ||
462 | */ | ||
463 | delay += msecs_to_jiffies(o2net_idle_timeout(NULL)); | ||
464 | queue_delayed_work(o2net_wq, &nn->nn_connect_expired, delay); | ||
460 | } | 465 | } |
461 | 466 | ||
462 | /* keep track of the nn's sc ref for the caller */ | 467 | /* keep track of the nn's sc ref for the caller */ |
@@ -1193,6 +1198,7 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) | |||
1193 | * shut down already */ | 1198 | * shut down already */ |
1194 | if (nn->nn_sc == sc) { | 1199 | if (nn->nn_sc == sc) { |
1195 | o2net_sc_reset_idle_timer(sc); | 1200 | o2net_sc_reset_idle_timer(sc); |
1201 | atomic_set(&nn->nn_timeout, 0); | ||
1196 | o2net_set_nn_state(nn, sc, 1, 0); | 1202 | o2net_set_nn_state(nn, sc, 1, 0); |
1197 | } | 1203 | } |
1198 | spin_unlock(&nn->nn_lock); | 1204 | spin_unlock(&nn->nn_lock); |
@@ -1391,6 +1397,7 @@ static void o2net_sc_send_keep_req(struct work_struct *work) | |||
1391 | static void o2net_idle_timer(unsigned long data) | 1397 | static void o2net_idle_timer(unsigned long data) |
1392 | { | 1398 | { |
1393 | struct o2net_sock_container *sc = (struct o2net_sock_container *)data; | 1399 | struct o2net_sock_container *sc = (struct o2net_sock_container *)data; |
1400 | struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); | ||
1394 | struct timeval now; | 1401 | struct timeval now; |
1395 | 1402 | ||
1396 | do_gettimeofday(&now); | 1403 | do_gettimeofday(&now); |
@@ -1413,6 +1420,12 @@ static void o2net_idle_timer(unsigned long data) | |||
1413 | sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec, | 1420 | sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec, |
1414 | sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec); | 1421 | sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec); |
1415 | 1422 | ||
1423 | /* | ||
1424 | * Initialize the nn_timeout so that the next connection attempt | ||
1425 | * will continue in o2net_start_connect. | ||
1426 | */ | ||
1427 | atomic_set(&nn->nn_timeout, 1); | ||
1428 | |||
1416 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); | 1429 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); |
1417 | } | 1430 | } |
1418 | 1431 | ||
@@ -1447,6 +1460,7 @@ static void o2net_start_connect(struct work_struct *work) | |||
1447 | struct socket *sock = NULL; | 1460 | struct socket *sock = NULL; |
1448 | struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; | 1461 | struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; |
1449 | int ret = 0, stop; | 1462 | int ret = 0, stop; |
1463 | unsigned int timeout; | ||
1450 | 1464 | ||
1451 | /* if we're greater we initiate tx, otherwise we accept */ | 1465 | /* if we're greater we initiate tx, otherwise we accept */ |
1452 | if (o2nm_this_node() <= o2net_num_from_nn(nn)) | 1466 | if (o2nm_this_node() <= o2net_num_from_nn(nn)) |
@@ -1466,8 +1480,17 @@ static void o2net_start_connect(struct work_struct *work) | |||
1466 | } | 1480 | } |
1467 | 1481 | ||
1468 | spin_lock(&nn->nn_lock); | 1482 | spin_lock(&nn->nn_lock); |
1469 | /* see if we already have one pending or have given up */ | 1483 | /* |
1470 | stop = (nn->nn_sc || nn->nn_persistent_error); | 1484 | * see if we already have one pending or have given up. |
1485 | * For nn_timeout, it is set when we close the connection | ||
1486 | * because of the idle time out. So it means that we have | ||
1487 | * at least connected to that node successfully once, | ||
1488 | * now try to connect to it again. | ||
1489 | */ | ||
1490 | timeout = atomic_read(&nn->nn_timeout); | ||
1491 | stop = (nn->nn_sc || | ||
1492 | (nn->nn_persistent_error && | ||
1493 | (nn->nn_persistent_error != -ENOTCONN || timeout == 0))); | ||
1471 | spin_unlock(&nn->nn_lock); | 1494 | spin_unlock(&nn->nn_lock); |
1472 | if (stop) | 1495 | if (stop) |
1473 | goto out; | 1496 | goto out; |
@@ -1579,6 +1602,7 @@ void o2net_disconnect_node(struct o2nm_node *node) | |||
1579 | 1602 | ||
1580 | /* don't reconnect until it's heartbeating again */ | 1603 | /* don't reconnect until it's heartbeating again */ |
1581 | spin_lock(&nn->nn_lock); | 1604 | spin_lock(&nn->nn_lock); |
1605 | atomic_set(&nn->nn_timeout, 0); | ||
1582 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); | 1606 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); |
1583 | spin_unlock(&nn->nn_lock); | 1607 | spin_unlock(&nn->nn_lock); |
1584 | 1608 | ||
@@ -1613,17 +1637,12 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, | |||
1613 | (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1); | 1637 | (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1); |
1614 | 1638 | ||
1615 | if (node_num != o2nm_this_node()) { | 1639 | if (node_num != o2nm_this_node()) { |
1616 | /* heartbeat doesn't work unless a local node number is | ||
1617 | * configured and doing so brings up the o2net_wq, so we can | ||
1618 | * use it.. */ | ||
1619 | queue_delayed_work(o2net_wq, &nn->nn_connect_expired, | ||
1620 | msecs_to_jiffies(o2net_idle_timeout(node))); | ||
1621 | |||
1622 | /* believe it or not, accept and node hearbeating testing | 1640 | /* believe it or not, accept and node hearbeating testing |
1623 | * can succeed for this node before we got here.. so | 1641 | * can succeed for this node before we got here.. so |
1624 | * only use set_nn_state to clear the persistent error | 1642 | * only use set_nn_state to clear the persistent error |
1625 | * if that hasn't already happened */ | 1643 | * if that hasn't already happened */ |
1626 | spin_lock(&nn->nn_lock); | 1644 | spin_lock(&nn->nn_lock); |
1645 | atomic_set(&nn->nn_timeout, 0); | ||
1627 | if (nn->nn_persistent_error) | 1646 | if (nn->nn_persistent_error) |
1628 | o2net_set_nn_state(nn, NULL, 0, 0); | 1647 | o2net_set_nn_state(nn, NULL, 0, 0); |
1629 | spin_unlock(&nn->nn_lock); | 1648 | spin_unlock(&nn->nn_lock); |
@@ -1747,6 +1766,7 @@ static int o2net_accept_one(struct socket *sock) | |||
1747 | new_sock = NULL; | 1766 | new_sock = NULL; |
1748 | 1767 | ||
1749 | spin_lock(&nn->nn_lock); | 1768 | spin_lock(&nn->nn_lock); |
1769 | atomic_set(&nn->nn_timeout, 0); | ||
1750 | o2net_set_nn_state(nn, sc, 0, 0); | 1770 | o2net_set_nn_state(nn, sc, 0, 0); |
1751 | spin_unlock(&nn->nn_lock); | 1771 | spin_unlock(&nn->nn_lock); |
1752 | 1772 | ||
@@ -1941,6 +1961,7 @@ int o2net_init(void) | |||
1941 | for (i = 0; i < ARRAY_SIZE(o2net_nodes); i++) { | 1961 | for (i = 0; i < ARRAY_SIZE(o2net_nodes); i++) { |
1942 | struct o2net_node *nn = o2net_nn_from_num(i); | 1962 | struct o2net_node *nn = o2net_nn_from_num(i); |
1943 | 1963 | ||
1964 | atomic_set(&nn->nn_timeout, 0); | ||
1944 | spin_lock_init(&nn->nn_lock); | 1965 | spin_lock_init(&nn->nn_lock); |
1945 | INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect); | 1966 | INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect); |
1946 | INIT_DELAYED_WORK(&nn->nn_connect_expired, | 1967 | INIT_DELAYED_WORK(&nn->nn_connect_expired, |