aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/cluster/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/cluster/tcp.c')
-rw-r--r--fs/ocfs2/cluster/tcp.c152
1 files changed, 131 insertions, 21 deletions
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 9b3209dc0b16..457753df1ae7 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -147,6 +147,28 @@ static void o2net_listen_data_ready(struct sock *sk, int bytes);
147static void o2net_sc_send_keep_req(struct work_struct *work); 147static void o2net_sc_send_keep_req(struct work_struct *work);
148static void o2net_idle_timer(unsigned long data); 148static void o2net_idle_timer(unsigned long data);
149static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); 149static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
150static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc);
151
152/*
153 * FIXME: These should use to_o2nm_cluster_from_node(), but we end up
154 * losing our parent link to the cluster during shutdown. This can be
155 * solved by adding a pre-removal callback to configfs, or passing
156 * around the cluster with the node. -jeffm
157 */
158static inline int o2net_reconnect_delay(struct o2nm_node *node)
159{
160 return o2nm_single_cluster->cl_reconnect_delay_ms;
161}
162
163static inline int o2net_keepalive_delay(struct o2nm_node *node)
164{
165 return o2nm_single_cluster->cl_keepalive_delay_ms;
166}
167
168static inline int o2net_idle_timeout(struct o2nm_node *node)
169{
170 return o2nm_single_cluster->cl_idle_timeout_ms;
171}
150 172
151static inline int o2net_sys_err_to_errno(enum o2net_system_error err) 173static inline int o2net_sys_err_to_errno(enum o2net_system_error err)
152{ 174{
@@ -271,6 +293,8 @@ static void sc_kref_release(struct kref *kref)
271{ 293{
272 struct o2net_sock_container *sc = container_of(kref, 294 struct o2net_sock_container *sc = container_of(kref,
273 struct o2net_sock_container, sc_kref); 295 struct o2net_sock_container, sc_kref);
296 BUG_ON(timer_pending(&sc->sc_idle_timeout));
297
274 sclog(sc, "releasing\n"); 298 sclog(sc, "releasing\n");
275 299
276 if (sc->sc_sock) { 300 if (sc->sc_sock) {
@@ -356,6 +380,13 @@ static void o2net_sc_cancel_delayed_work(struct o2net_sock_container *sc,
356 sc_put(sc); 380 sc_put(sc);
357} 381}
358 382
383static atomic_t o2net_connected_peers = ATOMIC_INIT(0);
384
385int o2net_num_connected_peers(void)
386{
387 return atomic_read(&o2net_connected_peers);
388}
389
359static void o2net_set_nn_state(struct o2net_node *nn, 390static void o2net_set_nn_state(struct o2net_node *nn,
360 struct o2net_sock_container *sc, 391 struct o2net_sock_container *sc,
361 unsigned valid, int err) 392 unsigned valid, int err)
@@ -366,6 +397,11 @@ static void o2net_set_nn_state(struct o2net_node *nn,
366 397
367 assert_spin_locked(&nn->nn_lock); 398 assert_spin_locked(&nn->nn_lock);
368 399
400 if (old_sc && !sc)
401 atomic_dec(&o2net_connected_peers);
402 else if (!old_sc && sc)
403 atomic_inc(&o2net_connected_peers);
404
369 /* the node num comparison and single connect/accept path should stop 405 /* the node num comparison and single connect/accept path should stop
370 * an non-null sc from being overwritten with another */ 406 * an non-null sc from being overwritten with another */
371 BUG_ON(sc && nn->nn_sc && nn->nn_sc != sc); 407 BUG_ON(sc && nn->nn_sc && nn->nn_sc != sc);
@@ -424,9 +460,9 @@ static void o2net_set_nn_state(struct o2net_node *nn,
424 /* delay if we're withing a RECONNECT_DELAY of the 460 /* delay if we're withing a RECONNECT_DELAY of the
425 * last attempt */ 461 * last attempt */
426 delay = (nn->nn_last_connect_attempt + 462 delay = (nn->nn_last_connect_attempt +
427 msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS)) 463 msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node)))
428 - jiffies; 464 - jiffies;
429 if (delay > msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS)) 465 if (delay > msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node)))
430 delay = 0; 466 delay = 0;
431 mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); 467 mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay);
432 queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); 468 queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay);
@@ -1099,13 +1135,51 @@ static int o2net_check_handshake(struct o2net_sock_container *sc)
1099 return -1; 1135 return -1;
1100 } 1136 }
1101 1137
1138 /*
1139 * Ensure timeouts are consistent with other nodes, otherwise
1140 * we can end up with one node thinking that the other must be down,
1141 * but isn't. This can ultimately cause corruption.
1142 */
1143 if (be32_to_cpu(hand->o2net_idle_timeout_ms) !=
1144 o2net_idle_timeout(sc->sc_node)) {
1145 mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of "
1146 "%u ms, but we use %u ms locally. disconnecting\n",
1147 SC_NODEF_ARGS(sc),
1148 be32_to_cpu(hand->o2net_idle_timeout_ms),
1149 o2net_idle_timeout(sc->sc_node));
1150 o2net_ensure_shutdown(nn, sc, -ENOTCONN);
1151 return -1;
1152 }
1153
1154 if (be32_to_cpu(hand->o2net_keepalive_delay_ms) !=
1155 o2net_keepalive_delay(sc->sc_node)) {
1156 mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of "
1157 "%u ms, but we use %u ms locally. disconnecting\n",
1158 SC_NODEF_ARGS(sc),
1159 be32_to_cpu(hand->o2net_keepalive_delay_ms),
1160 o2net_keepalive_delay(sc->sc_node));
1161 o2net_ensure_shutdown(nn, sc, -ENOTCONN);
1162 return -1;
1163 }
1164
1165 if (be32_to_cpu(hand->o2hb_heartbeat_timeout_ms) !=
1166 O2HB_MAX_WRITE_TIMEOUT_MS) {
1167 mlog(ML_NOTICE, SC_NODEF_FMT " uses a heartbeat timeout of "
1168 "%u ms, but we use %u ms locally. disconnecting\n",
1169 SC_NODEF_ARGS(sc),
1170 be32_to_cpu(hand->o2hb_heartbeat_timeout_ms),
1171 O2HB_MAX_WRITE_TIMEOUT_MS);
1172 o2net_ensure_shutdown(nn, sc, -ENOTCONN);
1173 return -1;
1174 }
1175
1102 sc->sc_handshake_ok = 1; 1176 sc->sc_handshake_ok = 1;
1103 1177
1104 spin_lock(&nn->nn_lock); 1178 spin_lock(&nn->nn_lock);
1105 /* set valid and queue the idle timers only if it hasn't been 1179 /* set valid and queue the idle timers only if it hasn't been
1106 * shut down already */ 1180 * shut down already */
1107 if (nn->nn_sc == sc) { 1181 if (nn->nn_sc == sc) {
1108 o2net_sc_postpone_idle(sc); 1182 o2net_sc_reset_idle_timer(sc);
1109 o2net_set_nn_state(nn, sc, 1, 0); 1183 o2net_set_nn_state(nn, sc, 1, 0);
1110 } 1184 }
1111 spin_unlock(&nn->nn_lock); 1185 spin_unlock(&nn->nn_lock);
@@ -1131,6 +1205,23 @@ static int o2net_advance_rx(struct o2net_sock_container *sc)
1131 sclog(sc, "receiving\n"); 1205 sclog(sc, "receiving\n");
1132 do_gettimeofday(&sc->sc_tv_advance_start); 1206 do_gettimeofday(&sc->sc_tv_advance_start);
1133 1207
1208 if (unlikely(sc->sc_handshake_ok == 0)) {
1209 if(sc->sc_page_off < sizeof(struct o2net_handshake)) {
1210 data = page_address(sc->sc_page) + sc->sc_page_off;
1211 datalen = sizeof(struct o2net_handshake) - sc->sc_page_off;
1212 ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen);
1213 if (ret > 0)
1214 sc->sc_page_off += ret;
1215 }
1216
1217 if (sc->sc_page_off == sizeof(struct o2net_handshake)) {
1218 o2net_check_handshake(sc);
1219 if (unlikely(sc->sc_handshake_ok == 0))
1220 ret = -EPROTO;
1221 }
1222 goto out;
1223 }
1224
1134 /* do we need more header? */ 1225 /* do we need more header? */
1135 if (sc->sc_page_off < sizeof(struct o2net_msg)) { 1226 if (sc->sc_page_off < sizeof(struct o2net_msg)) {
1136 data = page_address(sc->sc_page) + sc->sc_page_off; 1227 data = page_address(sc->sc_page) + sc->sc_page_off;
@@ -1138,15 +1229,6 @@ static int o2net_advance_rx(struct o2net_sock_container *sc)
1138 ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); 1229 ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen);
1139 if (ret > 0) { 1230 if (ret > 0) {
1140 sc->sc_page_off += ret; 1231 sc->sc_page_off += ret;
1141
1142 /* this working relies on the handshake being
1143 * smaller than the normal message header */
1144 if (sc->sc_page_off >= sizeof(struct o2net_handshake)&&
1145 !sc->sc_handshake_ok && o2net_check_handshake(sc)) {
1146 ret = -EPROTO;
1147 goto out;
1148 }
1149
1150 /* only swab incoming here.. we can 1232 /* only swab incoming here.. we can
1151 * only get here once as we cross from 1233 * only get here once as we cross from
1152 * being under to over */ 1234 * being under to over */
@@ -1248,6 +1330,18 @@ static int o2net_set_nodelay(struct socket *sock)
1248 return ret; 1330 return ret;
1249} 1331}
1250 1332
1333static void o2net_initialize_handshake(void)
1334{
1335 o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32(
1336 O2HB_MAX_WRITE_TIMEOUT_MS);
1337 o2net_hand->o2net_idle_timeout_ms = cpu_to_be32(
1338 o2net_idle_timeout(NULL));
1339 o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32(
1340 o2net_keepalive_delay(NULL));
1341 o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32(
1342 o2net_reconnect_delay(NULL));
1343}
1344
1251/* ------------------------------------------------------------ */ 1345/* ------------------------------------------------------------ */
1252 1346
1253/* called when a connect completes and after a sock is accepted. the 1347/* called when a connect completes and after a sock is accepted. the
@@ -1262,6 +1356,7 @@ static void o2net_sc_connect_completed(struct work_struct *work)
1262 (unsigned long long)O2NET_PROTOCOL_VERSION, 1356 (unsigned long long)O2NET_PROTOCOL_VERSION,
1263 (unsigned long long)be64_to_cpu(o2net_hand->connector_id)); 1357 (unsigned long long)be64_to_cpu(o2net_hand->connector_id));
1264 1358
1359 o2net_initialize_handshake();
1265 o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); 1360 o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand));
1266 sc_put(sc); 1361 sc_put(sc);
1267} 1362}
@@ -1287,8 +1382,10 @@ static void o2net_idle_timer(unsigned long data)
1287 1382
1288 do_gettimeofday(&now); 1383 do_gettimeofday(&now);
1289 1384
1290 printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for 10 " 1385 printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
1291 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc)); 1386 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
1387 o2net_idle_timeout(sc->sc_node) / 1000,
1388 o2net_idle_timeout(sc->sc_node) % 1000);
1292 mlog(ML_NOTICE, "here are some times that might help debug the " 1389 mlog(ML_NOTICE, "here are some times that might help debug the "
1293 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " 1390 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
1294 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", 1391 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n",
@@ -1306,14 +1403,21 @@ static void o2net_idle_timer(unsigned long data)
1306 o2net_sc_queue_work(sc, &sc->sc_shutdown_work); 1403 o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
1307} 1404}
1308 1405
1309static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) 1406static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
1310{ 1407{
1311 o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); 1408 o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work);
1312 o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, 1409 o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work,
1313 O2NET_KEEPALIVE_DELAY_SECS * HZ); 1410 msecs_to_jiffies(o2net_keepalive_delay(sc->sc_node)));
1314 do_gettimeofday(&sc->sc_tv_timer); 1411 do_gettimeofday(&sc->sc_tv_timer);
1315 mod_timer(&sc->sc_idle_timeout, 1412 mod_timer(&sc->sc_idle_timeout,
1316 jiffies + (O2NET_IDLE_TIMEOUT_SECS * HZ)); 1413 jiffies + msecs_to_jiffies(o2net_idle_timeout(sc->sc_node)));
1414}
1415
1416static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
1417{
1418 /* Only push out an existing timer */
1419 if (timer_pending(&sc->sc_idle_timeout))
1420 o2net_sc_reset_idle_timer(sc);
1317} 1421}
1318 1422
1319/* this work func is kicked whenever a path sets the nn state which doesn't 1423/* this work func is kicked whenever a path sets the nn state which doesn't
@@ -1435,9 +1539,12 @@ static void o2net_connect_expired(struct work_struct *work)
1435 1539
1436 spin_lock(&nn->nn_lock); 1540 spin_lock(&nn->nn_lock);
1437 if (!nn->nn_sc_valid) { 1541 if (!nn->nn_sc_valid) {
1542 struct o2nm_node *node = nn->nn_sc->sc_node;
1438 mlog(ML_ERROR, "no connection established with node %u after " 1543 mlog(ML_ERROR, "no connection established with node %u after "
1439 "%u seconds, giving up and returning errors.\n", 1544 "%u.%u seconds, giving up and returning errors.\n",
1440 o2net_num_from_nn(nn), O2NET_IDLE_TIMEOUT_SECS); 1545 o2net_num_from_nn(nn),
1546 o2net_idle_timeout(node) / 1000,
1547 o2net_idle_timeout(node) % 1000);
1441 1548
1442 o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); 1549 o2net_set_nn_state(nn, NULL, 0, -ENOTCONN);
1443 } 1550 }
@@ -1478,6 +1585,8 @@ static void o2net_hb_node_down_cb(struct o2nm_node *node, int node_num,
1478 1585
1479 if (node_num != o2nm_this_node()) 1586 if (node_num != o2nm_this_node())
1480 o2net_disconnect_node(node); 1587 o2net_disconnect_node(node);
1588
1589 BUG_ON(atomic_read(&o2net_connected_peers) < 0);
1481} 1590}
1482 1591
1483static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, 1592static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
@@ -1489,14 +1598,14 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
1489 1598
1490 /* ensure an immediate connect attempt */ 1599 /* ensure an immediate connect attempt */
1491 nn->nn_last_connect_attempt = jiffies - 1600 nn->nn_last_connect_attempt = jiffies -
1492 (msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS) + 1); 1601 (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1);
1493 1602
1494 if (node_num != o2nm_this_node()) { 1603 if (node_num != o2nm_this_node()) {
1495 /* heartbeat doesn't work unless a local node number is 1604 /* heartbeat doesn't work unless a local node number is
1496 * configured and doing so brings up the o2net_wq, so we can 1605 * configured and doing so brings up the o2net_wq, so we can
1497 * use it.. */ 1606 * use it.. */
1498 queue_delayed_work(o2net_wq, &nn->nn_connect_expired, 1607 queue_delayed_work(o2net_wq, &nn->nn_connect_expired,
1499 O2NET_IDLE_TIMEOUT_SECS * HZ); 1608 msecs_to_jiffies(o2net_idle_timeout(node)));
1500 1609
1501 /* believe it or not, accept and node hearbeating testing 1610 /* believe it or not, accept and node hearbeating testing
1502 * can succeed for this node before we got here.. so 1611 * can succeed for this node before we got here.. so
@@ -1641,6 +1750,7 @@ static int o2net_accept_one(struct socket *sock)
1641 o2net_register_callbacks(sc->sc_sock->sk, sc); 1750 o2net_register_callbacks(sc->sc_sock->sk, sc);
1642 o2net_sc_queue_work(sc, &sc->sc_rx_work); 1751 o2net_sc_queue_work(sc, &sc->sc_rx_work);
1643 1752
1753 o2net_initialize_handshake();
1644 o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); 1754 o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand));
1645 1755
1646out: 1756out: