diff options
Diffstat (limited to 'fs/ocfs2/cluster/tcp.c')
-rw-r--r-- | fs/ocfs2/cluster/tcp.c | 60 |
1 files changed, 48 insertions, 12 deletions
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 9b3209dc0b16..ebbaee664c66 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -147,6 +147,28 @@ static void o2net_listen_data_ready(struct sock *sk, int bytes); | |||
147 | static void o2net_sc_send_keep_req(struct work_struct *work); | 147 | static void o2net_sc_send_keep_req(struct work_struct *work); |
148 | static void o2net_idle_timer(unsigned long data); | 148 | static void o2net_idle_timer(unsigned long data); |
149 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); | 149 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); |
150 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); | ||
151 | |||
152 | /* | ||
153 | * FIXME: These should use to_o2nm_cluster_from_node(), but we end up | ||
154 | * losing our parent link to the cluster during shutdown. This can be | ||
155 | * solved by adding a pre-removal callback to configfs, or passing | ||
156 | * around the cluster with the node. -jeffm | ||
157 | */ | ||
158 | static inline int o2net_reconnect_delay(struct o2nm_node *node) | ||
159 | { | ||
160 | return o2nm_single_cluster->cl_reconnect_delay_ms; | ||
161 | } | ||
162 | |||
163 | static inline int o2net_keepalive_delay(struct o2nm_node *node) | ||
164 | { | ||
165 | return o2nm_single_cluster->cl_keepalive_delay_ms; | ||
166 | } | ||
167 | |||
168 | static inline int o2net_idle_timeout(struct o2nm_node *node) | ||
169 | { | ||
170 | return o2nm_single_cluster->cl_idle_timeout_ms; | ||
171 | } | ||
150 | 172 | ||
151 | static inline int o2net_sys_err_to_errno(enum o2net_system_error err) | 173 | static inline int o2net_sys_err_to_errno(enum o2net_system_error err) |
152 | { | 174 | { |
@@ -271,6 +293,8 @@ static void sc_kref_release(struct kref *kref) | |||
271 | { | 293 | { |
272 | struct o2net_sock_container *sc = container_of(kref, | 294 | struct o2net_sock_container *sc = container_of(kref, |
273 | struct o2net_sock_container, sc_kref); | 295 | struct o2net_sock_container, sc_kref); |
296 | BUG_ON(timer_pending(&sc->sc_idle_timeout)); | ||
297 | |||
274 | sclog(sc, "releasing\n"); | 298 | sclog(sc, "releasing\n"); |
275 | 299 | ||
276 | if (sc->sc_sock) { | 300 | if (sc->sc_sock) { |
@@ -424,9 +448,9 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
424 | /* delay if we're withing a RECONNECT_DELAY of the | 448 | /* delay if we're withing a RECONNECT_DELAY of the |
425 | * last attempt */ | 449 | * last attempt */ |
426 | delay = (nn->nn_last_connect_attempt + | 450 | delay = (nn->nn_last_connect_attempt + |
427 | msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS)) | 451 | msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node))) |
428 | - jiffies; | 452 | - jiffies; |
429 | if (delay > msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS)) | 453 | if (delay > msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node))) |
430 | delay = 0; | 454 | delay = 0; |
431 | mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); | 455 | mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); |
432 | queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); | 456 | queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); |
@@ -1105,7 +1129,7 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) | |||
1105 | /* set valid and queue the idle timers only if it hasn't been | 1129 | /* set valid and queue the idle timers only if it hasn't been |
1106 | * shut down already */ | 1130 | * shut down already */ |
1107 | if (nn->nn_sc == sc) { | 1131 | if (nn->nn_sc == sc) { |
1108 | o2net_sc_postpone_idle(sc); | 1132 | o2net_sc_reset_idle_timer(sc); |
1109 | o2net_set_nn_state(nn, sc, 1, 0); | 1133 | o2net_set_nn_state(nn, sc, 1, 0); |
1110 | } | 1134 | } |
1111 | spin_unlock(&nn->nn_lock); | 1135 | spin_unlock(&nn->nn_lock); |
@@ -1287,8 +1311,10 @@ static void o2net_idle_timer(unsigned long data) | |||
1287 | 1311 | ||
1288 | do_gettimeofday(&now); | 1312 | do_gettimeofday(&now); |
1289 | 1313 | ||
1290 | printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for 10 " | 1314 | printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " |
1291 | "seconds, shutting it down.\n", SC_NODEF_ARGS(sc)); | 1315 | "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), |
1316 | o2net_idle_timeout(sc->sc_node) / 1000, | ||
1317 | o2net_idle_timeout(sc->sc_node) % 1000); | ||
1292 | mlog(ML_NOTICE, "here are some times that might help debug the " | 1318 | mlog(ML_NOTICE, "here are some times that might help debug the " |
1293 | "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " | 1319 | "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " |
1294 | "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", | 1320 | "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", |
@@ -1306,14 +1332,21 @@ static void o2net_idle_timer(unsigned long data) | |||
1306 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); | 1332 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); |
1307 | } | 1333 | } |
1308 | 1334 | ||
1309 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) | 1335 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc) |
1310 | { | 1336 | { |
1311 | o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); | 1337 | o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); |
1312 | o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, | 1338 | o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, |
1313 | O2NET_KEEPALIVE_DELAY_SECS * HZ); | 1339 | msecs_to_jiffies(o2net_keepalive_delay(sc->sc_node))); |
1314 | do_gettimeofday(&sc->sc_tv_timer); | 1340 | do_gettimeofday(&sc->sc_tv_timer); |
1315 | mod_timer(&sc->sc_idle_timeout, | 1341 | mod_timer(&sc->sc_idle_timeout, |
1316 | jiffies + (O2NET_IDLE_TIMEOUT_SECS * HZ)); | 1342 | jiffies + msecs_to_jiffies(o2net_idle_timeout(sc->sc_node))); |
1343 | } | ||
1344 | |||
1345 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) | ||
1346 | { | ||
1347 | /* Only push out an existing timer */ | ||
1348 | if (timer_pending(&sc->sc_idle_timeout)) | ||
1349 | o2net_sc_reset_idle_timer(sc); | ||
1317 | } | 1350 | } |
1318 | 1351 | ||
1319 | /* this work func is kicked whenever a path sets the nn state which doesn't | 1352 | /* this work func is kicked whenever a path sets the nn state which doesn't |
@@ -1435,9 +1468,12 @@ static void o2net_connect_expired(struct work_struct *work) | |||
1435 | 1468 | ||
1436 | spin_lock(&nn->nn_lock); | 1469 | spin_lock(&nn->nn_lock); |
1437 | if (!nn->nn_sc_valid) { | 1470 | if (!nn->nn_sc_valid) { |
1471 | struct o2nm_node *node = nn->nn_sc->sc_node; | ||
1438 | mlog(ML_ERROR, "no connection established with node %u after " | 1472 | mlog(ML_ERROR, "no connection established with node %u after " |
1439 | "%u seconds, giving up and returning errors.\n", | 1473 | "%u.%u seconds, giving up and returning errors.\n", |
1440 | o2net_num_from_nn(nn), O2NET_IDLE_TIMEOUT_SECS); | 1474 | o2net_num_from_nn(nn), |
1475 | o2net_idle_timeout(node) / 1000, | ||
1476 | o2net_idle_timeout(node) % 1000); | ||
1441 | 1477 | ||
1442 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); | 1478 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); |
1443 | } | 1479 | } |
@@ -1489,14 +1525,14 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, | |||
1489 | 1525 | ||
1490 | /* ensure an immediate connect attempt */ | 1526 | /* ensure an immediate connect attempt */ |
1491 | nn->nn_last_connect_attempt = jiffies - | 1527 | nn->nn_last_connect_attempt = jiffies - |
1492 | (msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS) + 1); | 1528 | (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1); |
1493 | 1529 | ||
1494 | if (node_num != o2nm_this_node()) { | 1530 | if (node_num != o2nm_this_node()) { |
1495 | /* heartbeat doesn't work unless a local node number is | 1531 | /* heartbeat doesn't work unless a local node number is |
1496 | * configured and doing so brings up the o2net_wq, so we can | 1532 | * configured and doing so brings up the o2net_wq, so we can |
1497 | * use it.. */ | 1533 | * use it.. */ |
1498 | queue_delayed_work(o2net_wq, &nn->nn_connect_expired, | 1534 | queue_delayed_work(o2net_wq, &nn->nn_connect_expired, |
1499 | O2NET_IDLE_TIMEOUT_SECS * HZ); | 1535 | msecs_to_jiffies(o2net_idle_timeout(node))); |
1500 | 1536 | ||
1501 | /* believe it or not, accept and node hearbeating testing | 1537 | /* believe it or not, accept and node hearbeating testing |
1502 | * can succeed for this node before we got here.. so | 1538 | * can succeed for this node before we got here.. so |