aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/cluster/tcp.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-04-18 13:15:22 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-18 13:15:22 -0400
commite675349e2bdbfb661fa0d8ff2441b4cf48fb7e48 (patch)
tree7443e324c951f375945905dc436b012c98a00e05 /fs/ocfs2/cluster/tcp.c
parentef38ff9d372d4fe69e415370939a0f1fb5783af1 (diff)
parent2309e9e040fe29469fb85a384636c455b62fe525 (diff)
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (64 commits) ocfs2/net: Add debug interface to o2net ocfs2: Only build ocfs2/dlm with the o2cb stack module ocfs2/cluster: Get rid of arguments to the timeout routines ocfs2: Put tree in MAINTAINERS ocfs2: Use BUG_ON ocfs2: Convert ocfs2 over to unlocked_ioctl ocfs2: Improve rename locking fs/ocfs2/aops.c: test for IS_ERR rather than 0 ocfs2: Add inode stealing for ocfs2_reserve_new_inode ocfs2: Add ac_alloc_slot in ocfs2_alloc_context ocfs2: Add a new parameter for ocfs2_reserve_suballoc_bits ocfs2: Enable cross extent block merge. ocfs2: Add support for cross extent block ocfs2: Move /sys/o2cb to /sys/fs/o2cb sysfs: Allow removal of symlinks in the sysfs root ocfs2: Reconnect after idle time out. ocfs2/dlm: Cleanup lockres print ocfs2/dlm: Fix lockname in lockres print function ocfs2/dlm: Move dlm_print_one_mle() from dlmmaster.c to dlmdebug.c ocfs2/dlm: Dumps the purgelist into a debugfs file ...
Diffstat (limited to 'fs/ocfs2/cluster/tcp.c')
-rw-r--r--fs/ocfs2/cluster/tcp.c164
1 files changed, 123 insertions, 41 deletions
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index b8057c51b205..1e44ad14881a 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -142,23 +142,65 @@ static void o2net_idle_timer(unsigned long data);
142static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); 142static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
143static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); 143static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc);
144 144
145/* 145static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
146 * FIXME: These should use to_o2nm_cluster_from_node(), but we end up 146 u32 msgkey, struct task_struct *task, u8 node)
147 * losing our parent link to the cluster during shutdown. This can be 147{
148 * solved by adding a pre-removal callback to configfs, or passing 148#ifdef CONFIG_DEBUG_FS
149 * around the cluster with the node. -jeffm 149 INIT_LIST_HEAD(&nst->st_net_debug_item);
150 */ 150 nst->st_task = task;
151static inline int o2net_reconnect_delay(struct o2nm_node *node) 151 nst->st_msg_type = msgtype;
152 nst->st_msg_key = msgkey;
153 nst->st_node = node;
154#endif
155}
156
157static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
158{
159#ifdef CONFIG_DEBUG_FS
160 do_gettimeofday(&nst->st_sock_time);
161#endif
162}
163
164static void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
165{
166#ifdef CONFIG_DEBUG_FS
167 do_gettimeofday(&nst->st_send_time);
168#endif
169}
170
171static void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
172{
173#ifdef CONFIG_DEBUG_FS
174 do_gettimeofday(&nst->st_status_time);
175#endif
176}
177
178static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
179 struct o2net_sock_container *sc)
180{
181#ifdef CONFIG_DEBUG_FS
182 nst->st_sc = sc;
183#endif
184}
185
186static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id)
187{
188#ifdef CONFIG_DEBUG_FS
189 nst->st_id = msg_id;
190#endif
191}
192
193static inline int o2net_reconnect_delay(void)
152{ 194{
153 return o2nm_single_cluster->cl_reconnect_delay_ms; 195 return o2nm_single_cluster->cl_reconnect_delay_ms;
154} 196}
155 197
156static inline int o2net_keepalive_delay(struct o2nm_node *node) 198static inline int o2net_keepalive_delay(void)
157{ 199{
158 return o2nm_single_cluster->cl_keepalive_delay_ms; 200 return o2nm_single_cluster->cl_keepalive_delay_ms;
159} 201}
160 202
161static inline int o2net_idle_timeout(struct o2nm_node *node) 203static inline int o2net_idle_timeout(void)
162{ 204{
163 return o2nm_single_cluster->cl_idle_timeout_ms; 205 return o2nm_single_cluster->cl_idle_timeout_ms;
164} 206}
@@ -296,6 +338,7 @@ static void sc_kref_release(struct kref *kref)
296 o2nm_node_put(sc->sc_node); 338 o2nm_node_put(sc->sc_node);
297 sc->sc_node = NULL; 339 sc->sc_node = NULL;
298 340
341 o2net_debug_del_sc(sc);
299 kfree(sc); 342 kfree(sc);
300} 343}
301 344
@@ -336,6 +379,7 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
336 379
337 ret = sc; 380 ret = sc;
338 sc->sc_page = page; 381 sc->sc_page = page;
382 o2net_debug_add_sc(sc);
339 sc = NULL; 383 sc = NULL;
340 page = NULL; 384 page = NULL;
341 385
@@ -399,8 +443,6 @@ static void o2net_set_nn_state(struct o2net_node *nn,
399 mlog_bug_on_msg(err && valid, "err %d valid %u\n", err, valid); 443 mlog_bug_on_msg(err && valid, "err %d valid %u\n", err, valid);
400 mlog_bug_on_msg(valid && !sc, "valid %u sc %p\n", valid, sc); 444 mlog_bug_on_msg(valid && !sc, "valid %u sc %p\n", valid, sc);
401 445
402 /* we won't reconnect after our valid conn goes away for
403 * this hb iteration.. here so it shows up in the logs */
404 if (was_valid && !valid && err == 0) 446 if (was_valid && !valid && err == 0)
405 err = -ENOTCONN; 447 err = -ENOTCONN;
406 448
@@ -430,11 +472,6 @@ static void o2net_set_nn_state(struct o2net_node *nn,
430 472
431 if (!was_valid && valid) { 473 if (!was_valid && valid) {
432 o2quo_conn_up(o2net_num_from_nn(nn)); 474 o2quo_conn_up(o2net_num_from_nn(nn));
433 /* this is a bit of a hack. we only try reconnecting
434 * when heartbeating starts until we get a connection.
435 * if that connection then dies we don't try reconnecting.
436 * the only way to start connecting again is to down
437 * heartbeat and bring it back up. */
438 cancel_delayed_work(&nn->nn_connect_expired); 475 cancel_delayed_work(&nn->nn_connect_expired);
439 printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n", 476 printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n",
440 o2nm_this_node() > sc->sc_node->nd_num ? 477 o2nm_this_node() > sc->sc_node->nd_num ?
@@ -451,12 +488,24 @@ static void o2net_set_nn_state(struct o2net_node *nn,
451 /* delay if we're withing a RECONNECT_DELAY of the 488 /* delay if we're withing a RECONNECT_DELAY of the
452 * last attempt */ 489 * last attempt */
453 delay = (nn->nn_last_connect_attempt + 490 delay = (nn->nn_last_connect_attempt +
454 msecs_to_jiffies(o2net_reconnect_delay(NULL))) 491 msecs_to_jiffies(o2net_reconnect_delay()))
455 - jiffies; 492 - jiffies;
456 if (delay > msecs_to_jiffies(o2net_reconnect_delay(NULL))) 493 if (delay > msecs_to_jiffies(o2net_reconnect_delay()))
457 delay = 0; 494 delay = 0;
458 mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); 495 mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay);
459 queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); 496 queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay);
497
498 /*
499 * Delay the expired work after idle timeout.
500 *
501 * We might have lots of failed connection attempts that run
502 * through here but we only cancel the connect_expired work when
503 * a connection attempt succeeds. So only the first enqueue of
504 * the connect_expired work will do anything. The rest will see
505 * that it's already queued and do nothing.
506 */
507 delay += msecs_to_jiffies(o2net_idle_timeout());
508 queue_delayed_work(o2net_wq, &nn->nn_connect_expired, delay);
460 } 509 }
461 510
462 /* keep track of the nn's sc ref for the caller */ 511 /* keep track of the nn's sc ref for the caller */
@@ -914,6 +963,9 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
914 struct o2net_status_wait nsw = { 963 struct o2net_status_wait nsw = {
915 .ns_node_item = LIST_HEAD_INIT(nsw.ns_node_item), 964 .ns_node_item = LIST_HEAD_INIT(nsw.ns_node_item),
916 }; 965 };
966 struct o2net_send_tracking nst;
967
968 o2net_init_nst(&nst, msg_type, key, current, target_node);
917 969
918 if (o2net_wq == NULL) { 970 if (o2net_wq == NULL) {
919 mlog(0, "attempt to tx without o2netd running\n"); 971 mlog(0, "attempt to tx without o2netd running\n");
@@ -939,6 +991,10 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
939 goto out; 991 goto out;
940 } 992 }
941 993
994 o2net_debug_add_nst(&nst);
995
996 o2net_set_nst_sock_time(&nst);
997
942 ret = wait_event_interruptible(nn->nn_sc_wq, 998 ret = wait_event_interruptible(nn->nn_sc_wq,
943 o2net_tx_can_proceed(nn, &sc, &error)); 999 o2net_tx_can_proceed(nn, &sc, &error));
944 if (!ret && error) 1000 if (!ret && error)
@@ -946,6 +1002,8 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
946 if (ret) 1002 if (ret)
947 goto out; 1003 goto out;
948 1004
1005 o2net_set_nst_sock_container(&nst, sc);
1006
949 veclen = caller_veclen + 1; 1007 veclen = caller_veclen + 1;
950 vec = kmalloc(sizeof(struct kvec) * veclen, GFP_ATOMIC); 1008 vec = kmalloc(sizeof(struct kvec) * veclen, GFP_ATOMIC);
951 if (vec == NULL) { 1009 if (vec == NULL) {
@@ -972,6 +1030,9 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
972 goto out; 1030 goto out;
973 1031
974 msg->msg_num = cpu_to_be32(nsw.ns_id); 1032 msg->msg_num = cpu_to_be32(nsw.ns_id);
1033 o2net_set_nst_msg_id(&nst, nsw.ns_id);
1034
1035 o2net_set_nst_send_time(&nst);
975 1036
976 /* finally, convert the message header to network byte-order 1037 /* finally, convert the message header to network byte-order
977 * and send */ 1038 * and send */
@@ -986,6 +1047,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
986 } 1047 }
987 1048
988 /* wait on other node's handler */ 1049 /* wait on other node's handler */
1050 o2net_set_nst_status_time(&nst);
989 wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw)); 1051 wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw));
990 1052
991 /* Note that we avoid overwriting the callers status return 1053 /* Note that we avoid overwriting the callers status return
@@ -998,6 +1060,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
998 mlog(0, "woken, returning system status %d, user status %d\n", 1060 mlog(0, "woken, returning system status %d, user status %d\n",
999 ret, nsw.ns_status); 1061 ret, nsw.ns_status);
1000out: 1062out:
1063 o2net_debug_del_nst(&nst); /* must be before dropping sc and node */
1001 if (sc) 1064 if (sc)
1002 sc_put(sc); 1065 sc_put(sc);
1003 if (vec) 1066 if (vec)
@@ -1154,23 +1217,23 @@ static int o2net_check_handshake(struct o2net_sock_container *sc)
1154 * but isn't. This can ultimately cause corruption. 1217 * but isn't. This can ultimately cause corruption.
1155 */ 1218 */
1156 if (be32_to_cpu(hand->o2net_idle_timeout_ms) != 1219 if (be32_to_cpu(hand->o2net_idle_timeout_ms) !=
1157 o2net_idle_timeout(sc->sc_node)) { 1220 o2net_idle_timeout()) {
1158 mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " 1221 mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of "
1159 "%u ms, but we use %u ms locally. disconnecting\n", 1222 "%u ms, but we use %u ms locally. disconnecting\n",
1160 SC_NODEF_ARGS(sc), 1223 SC_NODEF_ARGS(sc),
1161 be32_to_cpu(hand->o2net_idle_timeout_ms), 1224 be32_to_cpu(hand->o2net_idle_timeout_ms),
1162 o2net_idle_timeout(sc->sc_node)); 1225 o2net_idle_timeout());
1163 o2net_ensure_shutdown(nn, sc, -ENOTCONN); 1226 o2net_ensure_shutdown(nn, sc, -ENOTCONN);
1164 return -1; 1227 return -1;
1165 } 1228 }
1166 1229
1167 if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != 1230 if (be32_to_cpu(hand->o2net_keepalive_delay_ms) !=
1168 o2net_keepalive_delay(sc->sc_node)) { 1231 o2net_keepalive_delay()) {
1169 mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " 1232 mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of "
1170 "%u ms, but we use %u ms locally. disconnecting\n", 1233 "%u ms, but we use %u ms locally. disconnecting\n",
1171 SC_NODEF_ARGS(sc), 1234 SC_NODEF_ARGS(sc),
1172 be32_to_cpu(hand->o2net_keepalive_delay_ms), 1235 be32_to_cpu(hand->o2net_keepalive_delay_ms),
1173 o2net_keepalive_delay(sc->sc_node)); 1236 o2net_keepalive_delay());
1174 o2net_ensure_shutdown(nn, sc, -ENOTCONN); 1237 o2net_ensure_shutdown(nn, sc, -ENOTCONN);
1175 return -1; 1238 return -1;
1176 } 1239 }
@@ -1193,6 +1256,7 @@ static int o2net_check_handshake(struct o2net_sock_container *sc)
1193 * shut down already */ 1256 * shut down already */
1194 if (nn->nn_sc == sc) { 1257 if (nn->nn_sc == sc) {
1195 o2net_sc_reset_idle_timer(sc); 1258 o2net_sc_reset_idle_timer(sc);
1259 atomic_set(&nn->nn_timeout, 0);
1196 o2net_set_nn_state(nn, sc, 1, 0); 1260 o2net_set_nn_state(nn, sc, 1, 0);
1197 } 1261 }
1198 spin_unlock(&nn->nn_lock); 1262 spin_unlock(&nn->nn_lock);
@@ -1347,12 +1411,11 @@ static void o2net_initialize_handshake(void)
1347{ 1411{
1348 o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( 1412 o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32(
1349 O2HB_MAX_WRITE_TIMEOUT_MS); 1413 O2HB_MAX_WRITE_TIMEOUT_MS);
1350 o2net_hand->o2net_idle_timeout_ms = cpu_to_be32( 1414 o2net_hand->o2net_idle_timeout_ms = cpu_to_be32(o2net_idle_timeout());
1351 o2net_idle_timeout(NULL));
1352 o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32( 1415 o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32(
1353 o2net_keepalive_delay(NULL)); 1416 o2net_keepalive_delay());
1354 o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32( 1417 o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32(
1355 o2net_reconnect_delay(NULL)); 1418 o2net_reconnect_delay());
1356} 1419}
1357 1420
1358/* ------------------------------------------------------------ */ 1421/* ------------------------------------------------------------ */
@@ -1391,14 +1454,15 @@ static void o2net_sc_send_keep_req(struct work_struct *work)
1391static void o2net_idle_timer(unsigned long data) 1454static void o2net_idle_timer(unsigned long data)
1392{ 1455{
1393 struct o2net_sock_container *sc = (struct o2net_sock_container *)data; 1456 struct o2net_sock_container *sc = (struct o2net_sock_container *)data;
1457 struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
1394 struct timeval now; 1458 struct timeval now;
1395 1459
1396 do_gettimeofday(&now); 1460 do_gettimeofday(&now);
1397 1461
1398 printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " 1462 printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
1399 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), 1463 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
1400 o2net_idle_timeout(sc->sc_node) / 1000, 1464 o2net_idle_timeout() / 1000,
1401 o2net_idle_timeout(sc->sc_node) % 1000); 1465 o2net_idle_timeout() % 1000);
1402 mlog(ML_NOTICE, "here are some times that might help debug the " 1466 mlog(ML_NOTICE, "here are some times that might help debug the "
1403 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " 1467 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
1404 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", 1468 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n",
@@ -1413,6 +1477,12 @@ static void o2net_idle_timer(unsigned long data)
1413 sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec, 1477 sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec,
1414 sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec); 1478 sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec);
1415 1479
1480 /*
1481 * Initialize the nn_timeout so that the next connection attempt
1482 * will continue in o2net_start_connect.
1483 */
1484 atomic_set(&nn->nn_timeout, 1);
1485
1416 o2net_sc_queue_work(sc, &sc->sc_shutdown_work); 1486 o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
1417} 1487}
1418 1488
@@ -1420,10 +1490,10 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
1420{ 1490{
1421 o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); 1491 o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work);
1422 o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, 1492 o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work,
1423 msecs_to_jiffies(o2net_keepalive_delay(sc->sc_node))); 1493 msecs_to_jiffies(o2net_keepalive_delay()));
1424 do_gettimeofday(&sc->sc_tv_timer); 1494 do_gettimeofday(&sc->sc_tv_timer);
1425 mod_timer(&sc->sc_idle_timeout, 1495 mod_timer(&sc->sc_idle_timeout,
1426 jiffies + msecs_to_jiffies(o2net_idle_timeout(sc->sc_node))); 1496 jiffies + msecs_to_jiffies(o2net_idle_timeout()));
1427} 1497}
1428 1498
1429static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) 1499static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
@@ -1447,6 +1517,7 @@ static void o2net_start_connect(struct work_struct *work)
1447 struct socket *sock = NULL; 1517 struct socket *sock = NULL;
1448 struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; 1518 struct sockaddr_in myaddr = {0, }, remoteaddr = {0, };
1449 int ret = 0, stop; 1519 int ret = 0, stop;
1520 unsigned int timeout;
1450 1521
1451 /* if we're greater we initiate tx, otherwise we accept */ 1522 /* if we're greater we initiate tx, otherwise we accept */
1452 if (o2nm_this_node() <= o2net_num_from_nn(nn)) 1523 if (o2nm_this_node() <= o2net_num_from_nn(nn))
@@ -1466,8 +1537,17 @@ static void o2net_start_connect(struct work_struct *work)
1466 } 1537 }
1467 1538
1468 spin_lock(&nn->nn_lock); 1539 spin_lock(&nn->nn_lock);
1469 /* see if we already have one pending or have given up */ 1540 /*
1470 stop = (nn->nn_sc || nn->nn_persistent_error); 1541 * see if we already have one pending or have given up.
1542 * For nn_timeout, it is set when we close the connection
1543 * because of the idle time out. So it means that we have
1544 * at least connected to that node successfully once,
1545 * now try to connect to it again.
1546 */
1547 timeout = atomic_read(&nn->nn_timeout);
1548 stop = (nn->nn_sc ||
1549 (nn->nn_persistent_error &&
1550 (nn->nn_persistent_error != -ENOTCONN || timeout == 0)));
1471 spin_unlock(&nn->nn_lock); 1551 spin_unlock(&nn->nn_lock);
1472 if (stop) 1552 if (stop)
1473 goto out; 1553 goto out;
@@ -1555,8 +1635,8 @@ static void o2net_connect_expired(struct work_struct *work)
1555 mlog(ML_ERROR, "no connection established with node %u after " 1635 mlog(ML_ERROR, "no connection established with node %u after "
1556 "%u.%u seconds, giving up and returning errors.\n", 1636 "%u.%u seconds, giving up and returning errors.\n",
1557 o2net_num_from_nn(nn), 1637 o2net_num_from_nn(nn),
1558 o2net_idle_timeout(NULL) / 1000, 1638 o2net_idle_timeout() / 1000,
1559 o2net_idle_timeout(NULL) % 1000); 1639 o2net_idle_timeout() % 1000);
1560 1640
1561 o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); 1641 o2net_set_nn_state(nn, NULL, 0, -ENOTCONN);
1562 } 1642 }
@@ -1579,6 +1659,7 @@ void o2net_disconnect_node(struct o2nm_node *node)
1579 1659
1580 /* don't reconnect until it's heartbeating again */ 1660 /* don't reconnect until it's heartbeating again */
1581 spin_lock(&nn->nn_lock); 1661 spin_lock(&nn->nn_lock);
1662 atomic_set(&nn->nn_timeout, 0);
1582 o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); 1663 o2net_set_nn_state(nn, NULL, 0, -ENOTCONN);
1583 spin_unlock(&nn->nn_lock); 1664 spin_unlock(&nn->nn_lock);
1584 1665
@@ -1610,20 +1691,15 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
1610 1691
1611 /* ensure an immediate connect attempt */ 1692 /* ensure an immediate connect attempt */
1612 nn->nn_last_connect_attempt = jiffies - 1693 nn->nn_last_connect_attempt = jiffies -
1613 (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1); 1694 (msecs_to_jiffies(o2net_reconnect_delay()) + 1);
1614 1695
1615 if (node_num != o2nm_this_node()) { 1696 if (node_num != o2nm_this_node()) {
1616 /* heartbeat doesn't work unless a local node number is
1617 * configured and doing so brings up the o2net_wq, so we can
1618 * use it.. */
1619 queue_delayed_work(o2net_wq, &nn->nn_connect_expired,
1620 msecs_to_jiffies(o2net_idle_timeout(node)));
1621
1622 /* believe it or not, accept and node hearbeating testing 1697 /* believe it or not, accept and node hearbeating testing
1623 * can succeed for this node before we got here.. so 1698 * can succeed for this node before we got here.. so
1624 * only use set_nn_state to clear the persistent error 1699 * only use set_nn_state to clear the persistent error
1625 * if that hasn't already happened */ 1700 * if that hasn't already happened */
1626 spin_lock(&nn->nn_lock); 1701 spin_lock(&nn->nn_lock);
1702 atomic_set(&nn->nn_timeout, 0);
1627 if (nn->nn_persistent_error) 1703 if (nn->nn_persistent_error)
1628 o2net_set_nn_state(nn, NULL, 0, 0); 1704 o2net_set_nn_state(nn, NULL, 0, 0);
1629 spin_unlock(&nn->nn_lock); 1705 spin_unlock(&nn->nn_lock);
@@ -1747,6 +1823,7 @@ static int o2net_accept_one(struct socket *sock)
1747 new_sock = NULL; 1823 new_sock = NULL;
1748 1824
1749 spin_lock(&nn->nn_lock); 1825 spin_lock(&nn->nn_lock);
1826 atomic_set(&nn->nn_timeout, 0);
1750 o2net_set_nn_state(nn, sc, 0, 0); 1827 o2net_set_nn_state(nn, sc, 0, 0);
1751 spin_unlock(&nn->nn_lock); 1828 spin_unlock(&nn->nn_lock);
1752 1829
@@ -1922,6 +1999,9 @@ int o2net_init(void)
1922 1999
1923 o2quo_init(); 2000 o2quo_init();
1924 2001
2002 if (o2net_debugfs_init())
2003 return -ENOMEM;
2004
1925 o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL); 2005 o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL);
1926 o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); 2006 o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL);
1927 o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); 2007 o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL);
@@ -1941,6 +2021,7 @@ int o2net_init(void)
1941 for (i = 0; i < ARRAY_SIZE(o2net_nodes); i++) { 2021 for (i = 0; i < ARRAY_SIZE(o2net_nodes); i++) {
1942 struct o2net_node *nn = o2net_nn_from_num(i); 2022 struct o2net_node *nn = o2net_nn_from_num(i);
1943 2023
2024 atomic_set(&nn->nn_timeout, 0);
1944 spin_lock_init(&nn->nn_lock); 2025 spin_lock_init(&nn->nn_lock);
1945 INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect); 2026 INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect);
1946 INIT_DELAYED_WORK(&nn->nn_connect_expired, 2027 INIT_DELAYED_WORK(&nn->nn_connect_expired,
@@ -1962,4 +2043,5 @@ void o2net_exit(void)
1962 kfree(o2net_hand); 2043 kfree(o2net_hand);
1963 kfree(o2net_keep_req); 2044 kfree(o2net_keep_req);
1964 kfree(o2net_keep_resp); 2045 kfree(o2net_keep_resp);
2046 o2net_debugfs_exit();
1965} 2047}