diff options
-rw-r--r-- | fs/ocfs2/cluster/tcp.c | 51 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp_internal.h | 2 |
2 files changed, 38 insertions, 15 deletions
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index b8057c51b205..4ea4b0a26975 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -399,8 +399,6 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
399 | mlog_bug_on_msg(err && valid, "err %d valid %u\n", err, valid); | 399 | mlog_bug_on_msg(err && valid, "err %d valid %u\n", err, valid); |
400 | mlog_bug_on_msg(valid && !sc, "valid %u sc %p\n", valid, sc); | 400 | mlog_bug_on_msg(valid && !sc, "valid %u sc %p\n", valid, sc); |
401 | 401 | ||
402 | /* we won't reconnect after our valid conn goes away for | ||
403 | * this hb iteration.. here so it shows up in the logs */ | ||
404 | if (was_valid && !valid && err == 0) | 402 | if (was_valid && !valid && err == 0) |
405 | err = -ENOTCONN; | 403 | err = -ENOTCONN; |
406 | 404 | ||
@@ -430,11 +428,6 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
430 | 428 | ||
431 | if (!was_valid && valid) { | 429 | if (!was_valid && valid) { |
432 | o2quo_conn_up(o2net_num_from_nn(nn)); | 430 | o2quo_conn_up(o2net_num_from_nn(nn)); |
433 | /* this is a bit of a hack. we only try reconnecting | ||
434 | * when heartbeating starts until we get a connection. | ||
435 | * if that connection then dies we don't try reconnecting. | ||
436 | * the only way to start connecting again is to down | ||
437 | * heartbeat and bring it back up. */ | ||
438 | cancel_delayed_work(&nn->nn_connect_expired); | 431 | cancel_delayed_work(&nn->nn_connect_expired); |
439 | printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n", | 432 | printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n", |
440 | o2nm_this_node() > sc->sc_node->nd_num ? | 433 | o2nm_this_node() > sc->sc_node->nd_num ? |
@@ -457,6 +450,18 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
457 | delay = 0; | 450 | delay = 0; |
458 | mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); | 451 | mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); |
459 | queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); | 452 | queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); |
453 | |||
454 | /* | ||
455 | * Delay the expired work after idle timeout. | ||
456 | * | ||
457 | * We might have lots of failed connection attempts that run | ||
458 | * through here but we only cancel the connect_expired work when | ||
459 | * a connection attempt succeeds. So only the first enqueue of | ||
460 | * the connect_expired work will do anything. The rest will see | ||
461 | * that it's already queued and do nothing. | ||
462 | */ | ||
463 | delay += msecs_to_jiffies(o2net_idle_timeout(NULL)); | ||
464 | queue_delayed_work(o2net_wq, &nn->nn_connect_expired, delay); | ||
460 | } | 465 | } |
461 | 466 | ||
462 | /* keep track of the nn's sc ref for the caller */ | 467 | /* keep track of the nn's sc ref for the caller */ |
@@ -1193,6 +1198,7 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) | |||
1193 | * shut down already */ | 1198 | * shut down already */ |
1194 | if (nn->nn_sc == sc) { | 1199 | if (nn->nn_sc == sc) { |
1195 | o2net_sc_reset_idle_timer(sc); | 1200 | o2net_sc_reset_idle_timer(sc); |
1201 | atomic_set(&nn->nn_timeout, 0); | ||
1196 | o2net_set_nn_state(nn, sc, 1, 0); | 1202 | o2net_set_nn_state(nn, sc, 1, 0); |
1197 | } | 1203 | } |
1198 | spin_unlock(&nn->nn_lock); | 1204 | spin_unlock(&nn->nn_lock); |
@@ -1391,6 +1397,7 @@ static void o2net_sc_send_keep_req(struct work_struct *work) | |||
1391 | static void o2net_idle_timer(unsigned long data) | 1397 | static void o2net_idle_timer(unsigned long data) |
1392 | { | 1398 | { |
1393 | struct o2net_sock_container *sc = (struct o2net_sock_container *)data; | 1399 | struct o2net_sock_container *sc = (struct o2net_sock_container *)data; |
1400 | struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); | ||
1394 | struct timeval now; | 1401 | struct timeval now; |
1395 | 1402 | ||
1396 | do_gettimeofday(&now); | 1403 | do_gettimeofday(&now); |
@@ -1413,6 +1420,12 @@ static void o2net_idle_timer(unsigned long data) | |||
1413 | sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec, | 1420 | sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec, |
1414 | sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec); | 1421 | sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec); |
1415 | 1422 | ||
1423 | /* | ||
1424 | * Initialize the nn_timeout so that the next connection attempt | ||
1425 | * will continue in o2net_start_connect. | ||
1426 | */ | ||
1427 | atomic_set(&nn->nn_timeout, 1); | ||
1428 | |||
1416 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); | 1429 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); |
1417 | } | 1430 | } |
1418 | 1431 | ||
@@ -1447,6 +1460,7 @@ static void o2net_start_connect(struct work_struct *work) | |||
1447 | struct socket *sock = NULL; | 1460 | struct socket *sock = NULL; |
1448 | struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; | 1461 | struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; |
1449 | int ret = 0, stop; | 1462 | int ret = 0, stop; |
1463 | unsigned int timeout; | ||
1450 | 1464 | ||
1451 | /* if we're greater we initiate tx, otherwise we accept */ | 1465 | /* if we're greater we initiate tx, otherwise we accept */ |
1452 | if (o2nm_this_node() <= o2net_num_from_nn(nn)) | 1466 | if (o2nm_this_node() <= o2net_num_from_nn(nn)) |
@@ -1466,8 +1480,17 @@ static void o2net_start_connect(struct work_struct *work) | |||
1466 | } | 1480 | } |
1467 | 1481 | ||
1468 | spin_lock(&nn->nn_lock); | 1482 | spin_lock(&nn->nn_lock); |
1469 | /* see if we already have one pending or have given up */ | 1483 | /* |
1470 | stop = (nn->nn_sc || nn->nn_persistent_error); | 1484 | * see if we already have one pending or have given up. |
1485 | * For nn_timeout, it is set when we close the connection | ||
1486 | * because of the idle time out. So it means that we have | ||
1487 | * at least connected to that node successfully once, | ||
1488 | * now try to connect to it again. | ||
1489 | */ | ||
1490 | timeout = atomic_read(&nn->nn_timeout); | ||
1491 | stop = (nn->nn_sc || | ||
1492 | (nn->nn_persistent_error && | ||
1493 | (nn->nn_persistent_error != -ENOTCONN || timeout == 0))); | ||
1471 | spin_unlock(&nn->nn_lock); | 1494 | spin_unlock(&nn->nn_lock); |
1472 | if (stop) | 1495 | if (stop) |
1473 | goto out; | 1496 | goto out; |
@@ -1579,6 +1602,7 @@ void o2net_disconnect_node(struct o2nm_node *node) | |||
1579 | 1602 | ||
1580 | /* don't reconnect until it's heartbeating again */ | 1603 | /* don't reconnect until it's heartbeating again */ |
1581 | spin_lock(&nn->nn_lock); | 1604 | spin_lock(&nn->nn_lock); |
1605 | atomic_set(&nn->nn_timeout, 0); | ||
1582 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); | 1606 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); |
1583 | spin_unlock(&nn->nn_lock); | 1607 | spin_unlock(&nn->nn_lock); |
1584 | 1608 | ||
@@ -1613,17 +1637,12 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, | |||
1613 | (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1); | 1637 | (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1); |
1614 | 1638 | ||
1615 | if (node_num != o2nm_this_node()) { | 1639 | if (node_num != o2nm_this_node()) { |
1616 | /* heartbeat doesn't work unless a local node number is | ||
1617 | * configured and doing so brings up the o2net_wq, so we can | ||
1618 | * use it.. */ | ||
1619 | queue_delayed_work(o2net_wq, &nn->nn_connect_expired, | ||
1620 | msecs_to_jiffies(o2net_idle_timeout(node))); | ||
1621 | |||
1622 | /* believe it or not, accept and node hearbeating testing | 1640 | /* believe it or not, accept and node hearbeating testing |
1623 | * can succeed for this node before we got here.. so | 1641 | * can succeed for this node before we got here.. so |
1624 | * only use set_nn_state to clear the persistent error | 1642 | * only use set_nn_state to clear the persistent error |
1625 | * if that hasn't already happened */ | 1643 | * if that hasn't already happened */ |
1626 | spin_lock(&nn->nn_lock); | 1644 | spin_lock(&nn->nn_lock); |
1645 | atomic_set(&nn->nn_timeout, 0); | ||
1627 | if (nn->nn_persistent_error) | 1646 | if (nn->nn_persistent_error) |
1628 | o2net_set_nn_state(nn, NULL, 0, 0); | 1647 | o2net_set_nn_state(nn, NULL, 0, 0); |
1629 | spin_unlock(&nn->nn_lock); | 1648 | spin_unlock(&nn->nn_lock); |
@@ -1747,6 +1766,7 @@ static int o2net_accept_one(struct socket *sock) | |||
1747 | new_sock = NULL; | 1766 | new_sock = NULL; |
1748 | 1767 | ||
1749 | spin_lock(&nn->nn_lock); | 1768 | spin_lock(&nn->nn_lock); |
1769 | atomic_set(&nn->nn_timeout, 0); | ||
1750 | o2net_set_nn_state(nn, sc, 0, 0); | 1770 | o2net_set_nn_state(nn, sc, 0, 0); |
1751 | spin_unlock(&nn->nn_lock); | 1771 | spin_unlock(&nn->nn_lock); |
1752 | 1772 | ||
@@ -1941,6 +1961,7 @@ int o2net_init(void) | |||
1941 | for (i = 0; i < ARRAY_SIZE(o2net_nodes); i++) { | 1961 | for (i = 0; i < ARRAY_SIZE(o2net_nodes); i++) { |
1942 | struct o2net_node *nn = o2net_nn_from_num(i); | 1962 | struct o2net_node *nn = o2net_nn_from_num(i); |
1943 | 1963 | ||
1964 | atomic_set(&nn->nn_timeout, 0); | ||
1944 | spin_lock_init(&nn->nn_lock); | 1965 | spin_lock_init(&nn->nn_lock); |
1945 | INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect); | 1966 | INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect); |
1946 | INIT_DELAYED_WORK(&nn->nn_connect_expired, | 1967 | INIT_DELAYED_WORK(&nn->nn_connect_expired, |
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index d25b9af28500..b4c5586f46ea 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
@@ -95,6 +95,8 @@ struct o2net_node { | |||
95 | unsigned nn_sc_valid:1; | 95 | unsigned nn_sc_valid:1; |
96 | /* if this is set tx just returns it */ | 96 | /* if this is set tx just returns it */ |
97 | int nn_persistent_error; | 97 | int nn_persistent_error; |
98 | /* It is only set to 1 after the idle time out. */ | ||
99 | atomic_t nn_timeout; | ||
98 | 100 | ||
99 | /* threads waiting for an sc to arrive wait on the wq for generation | 101 | /* threads waiting for an sc to arrive wait on the wq for generation |
100 | * to increase. it is increased when a connecting socket succeeds | 102 | * to increase. it is increased when a connecting socket succeeds |