diff options
Diffstat (limited to 'fs/ocfs2/cluster/tcp.c')
-rw-r--r-- | fs/ocfs2/cluster/tcp.c | 152 |
1 files changed, 131 insertions, 21 deletions
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 9b3209dc0b16..457753df1ae7 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -147,6 +147,28 @@ static void o2net_listen_data_ready(struct sock *sk, int bytes); | |||
147 | static void o2net_sc_send_keep_req(struct work_struct *work); | 147 | static void o2net_sc_send_keep_req(struct work_struct *work); |
148 | static void o2net_idle_timer(unsigned long data); | 148 | static void o2net_idle_timer(unsigned long data); |
149 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); | 149 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); |
150 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); | ||
151 | |||
152 | /* | ||
153 | * FIXME: These should use to_o2nm_cluster_from_node(), but we end up | ||
154 | * losing our parent link to the cluster during shutdown. This can be | ||
155 | * solved by adding a pre-removal callback to configfs, or passing | ||
156 | * around the cluster with the node. -jeffm | ||
157 | */ | ||
158 | static inline int o2net_reconnect_delay(struct o2nm_node *node) | ||
159 | { | ||
160 | return o2nm_single_cluster->cl_reconnect_delay_ms; | ||
161 | } | ||
162 | |||
163 | static inline int o2net_keepalive_delay(struct o2nm_node *node) | ||
164 | { | ||
165 | return o2nm_single_cluster->cl_keepalive_delay_ms; | ||
166 | } | ||
167 | |||
168 | static inline int o2net_idle_timeout(struct o2nm_node *node) | ||
169 | { | ||
170 | return o2nm_single_cluster->cl_idle_timeout_ms; | ||
171 | } | ||
150 | 172 | ||
151 | static inline int o2net_sys_err_to_errno(enum o2net_system_error err) | 173 | static inline int o2net_sys_err_to_errno(enum o2net_system_error err) |
152 | { | 174 | { |
@@ -271,6 +293,8 @@ static void sc_kref_release(struct kref *kref) | |||
271 | { | 293 | { |
272 | struct o2net_sock_container *sc = container_of(kref, | 294 | struct o2net_sock_container *sc = container_of(kref, |
273 | struct o2net_sock_container, sc_kref); | 295 | struct o2net_sock_container, sc_kref); |
296 | BUG_ON(timer_pending(&sc->sc_idle_timeout)); | ||
297 | |||
274 | sclog(sc, "releasing\n"); | 298 | sclog(sc, "releasing\n"); |
275 | 299 | ||
276 | if (sc->sc_sock) { | 300 | if (sc->sc_sock) { |
@@ -356,6 +380,13 @@ static void o2net_sc_cancel_delayed_work(struct o2net_sock_container *sc, | |||
356 | sc_put(sc); | 380 | sc_put(sc); |
357 | } | 381 | } |
358 | 382 | ||
383 | static atomic_t o2net_connected_peers = ATOMIC_INIT(0); | ||
384 | |||
385 | int o2net_num_connected_peers(void) | ||
386 | { | ||
387 | return atomic_read(&o2net_connected_peers); | ||
388 | } | ||
389 | |||
359 | static void o2net_set_nn_state(struct o2net_node *nn, | 390 | static void o2net_set_nn_state(struct o2net_node *nn, |
360 | struct o2net_sock_container *sc, | 391 | struct o2net_sock_container *sc, |
361 | unsigned valid, int err) | 392 | unsigned valid, int err) |
@@ -366,6 +397,11 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
366 | 397 | ||
367 | assert_spin_locked(&nn->nn_lock); | 398 | assert_spin_locked(&nn->nn_lock); |
368 | 399 | ||
400 | if (old_sc && !sc) | ||
401 | atomic_dec(&o2net_connected_peers); | ||
402 | else if (!old_sc && sc) | ||
403 | atomic_inc(&o2net_connected_peers); | ||
404 | |||
369 | /* the node num comparison and single connect/accept path should stop | 405 | /* the node num comparison and single connect/accept path should stop |
370 | * an non-null sc from being overwritten with another */ | 406 | * an non-null sc from being overwritten with another */ |
371 | BUG_ON(sc && nn->nn_sc && nn->nn_sc != sc); | 407 | BUG_ON(sc && nn->nn_sc && nn->nn_sc != sc); |
@@ -424,9 +460,9 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
424 | /* delay if we're withing a RECONNECT_DELAY of the | 460 | /* delay if we're withing a RECONNECT_DELAY of the |
425 | * last attempt */ | 461 | * last attempt */ |
426 | delay = (nn->nn_last_connect_attempt + | 462 | delay = (nn->nn_last_connect_attempt + |
427 | msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS)) | 463 | msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node))) |
428 | - jiffies; | 464 | - jiffies; |
429 | if (delay > msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS)) | 465 | if (delay > msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node))) |
430 | delay = 0; | 466 | delay = 0; |
431 | mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); | 467 | mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); |
432 | queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); | 468 | queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); |
@@ -1099,13 +1135,51 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) | |||
1099 | return -1; | 1135 | return -1; |
1100 | } | 1136 | } |
1101 | 1137 | ||
1138 | /* | ||
1139 | * Ensure timeouts are consistent with other nodes, otherwise | ||
1140 | * we can end up with one node thinking that the other must be down, | ||
1141 | * but isn't. This can ultimately cause corruption. | ||
1142 | */ | ||
1143 | if (be32_to_cpu(hand->o2net_idle_timeout_ms) != | ||
1144 | o2net_idle_timeout(sc->sc_node)) { | ||
1145 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " | ||
1146 | "%u ms, but we use %u ms locally. disconnecting\n", | ||
1147 | SC_NODEF_ARGS(sc), | ||
1148 | be32_to_cpu(hand->o2net_idle_timeout_ms), | ||
1149 | o2net_idle_timeout(sc->sc_node)); | ||
1150 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); | ||
1151 | return -1; | ||
1152 | } | ||
1153 | |||
1154 | if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != | ||
1155 | o2net_keepalive_delay(sc->sc_node)) { | ||
1156 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " | ||
1157 | "%u ms, but we use %u ms locally. disconnecting\n", | ||
1158 | SC_NODEF_ARGS(sc), | ||
1159 | be32_to_cpu(hand->o2net_keepalive_delay_ms), | ||
1160 | o2net_keepalive_delay(sc->sc_node)); | ||
1161 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); | ||
1162 | return -1; | ||
1163 | } | ||
1164 | |||
1165 | if (be32_to_cpu(hand->o2hb_heartbeat_timeout_ms) != | ||
1166 | O2HB_MAX_WRITE_TIMEOUT_MS) { | ||
1167 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a heartbeat timeout of " | ||
1168 | "%u ms, but we use %u ms locally. disconnecting\n", | ||
1169 | SC_NODEF_ARGS(sc), | ||
1170 | be32_to_cpu(hand->o2hb_heartbeat_timeout_ms), | ||
1171 | O2HB_MAX_WRITE_TIMEOUT_MS); | ||
1172 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); | ||
1173 | return -1; | ||
1174 | } | ||
1175 | |||
1102 | sc->sc_handshake_ok = 1; | 1176 | sc->sc_handshake_ok = 1; |
1103 | 1177 | ||
1104 | spin_lock(&nn->nn_lock); | 1178 | spin_lock(&nn->nn_lock); |
1105 | /* set valid and queue the idle timers only if it hasn't been | 1179 | /* set valid and queue the idle timers only if it hasn't been |
1106 | * shut down already */ | 1180 | * shut down already */ |
1107 | if (nn->nn_sc == sc) { | 1181 | if (nn->nn_sc == sc) { |
1108 | o2net_sc_postpone_idle(sc); | 1182 | o2net_sc_reset_idle_timer(sc); |
1109 | o2net_set_nn_state(nn, sc, 1, 0); | 1183 | o2net_set_nn_state(nn, sc, 1, 0); |
1110 | } | 1184 | } |
1111 | spin_unlock(&nn->nn_lock); | 1185 | spin_unlock(&nn->nn_lock); |
@@ -1131,6 +1205,23 @@ static int o2net_advance_rx(struct o2net_sock_container *sc) | |||
1131 | sclog(sc, "receiving\n"); | 1205 | sclog(sc, "receiving\n"); |
1132 | do_gettimeofday(&sc->sc_tv_advance_start); | 1206 | do_gettimeofday(&sc->sc_tv_advance_start); |
1133 | 1207 | ||
1208 | if (unlikely(sc->sc_handshake_ok == 0)) { | ||
1209 | if(sc->sc_page_off < sizeof(struct o2net_handshake)) { | ||
1210 | data = page_address(sc->sc_page) + sc->sc_page_off; | ||
1211 | datalen = sizeof(struct o2net_handshake) - sc->sc_page_off; | ||
1212 | ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); | ||
1213 | if (ret > 0) | ||
1214 | sc->sc_page_off += ret; | ||
1215 | } | ||
1216 | |||
1217 | if (sc->sc_page_off == sizeof(struct o2net_handshake)) { | ||
1218 | o2net_check_handshake(sc); | ||
1219 | if (unlikely(sc->sc_handshake_ok == 0)) | ||
1220 | ret = -EPROTO; | ||
1221 | } | ||
1222 | goto out; | ||
1223 | } | ||
1224 | |||
1134 | /* do we need more header? */ | 1225 | /* do we need more header? */ |
1135 | if (sc->sc_page_off < sizeof(struct o2net_msg)) { | 1226 | if (sc->sc_page_off < sizeof(struct o2net_msg)) { |
1136 | data = page_address(sc->sc_page) + sc->sc_page_off; | 1227 | data = page_address(sc->sc_page) + sc->sc_page_off; |
@@ -1138,15 +1229,6 @@ static int o2net_advance_rx(struct o2net_sock_container *sc) | |||
1138 | ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); | 1229 | ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); |
1139 | if (ret > 0) { | 1230 | if (ret > 0) { |
1140 | sc->sc_page_off += ret; | 1231 | sc->sc_page_off += ret; |
1141 | |||
1142 | /* this working relies on the handshake being | ||
1143 | * smaller than the normal message header */ | ||
1144 | if (sc->sc_page_off >= sizeof(struct o2net_handshake)&& | ||
1145 | !sc->sc_handshake_ok && o2net_check_handshake(sc)) { | ||
1146 | ret = -EPROTO; | ||
1147 | goto out; | ||
1148 | } | ||
1149 | |||
1150 | /* only swab incoming here.. we can | 1232 | /* only swab incoming here.. we can |
1151 | * only get here once as we cross from | 1233 | * only get here once as we cross from |
1152 | * being under to over */ | 1234 | * being under to over */ |
@@ -1248,6 +1330,18 @@ static int o2net_set_nodelay(struct socket *sock) | |||
1248 | return ret; | 1330 | return ret; |
1249 | } | 1331 | } |
1250 | 1332 | ||
1333 | static void o2net_initialize_handshake(void) | ||
1334 | { | ||
1335 | o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( | ||
1336 | O2HB_MAX_WRITE_TIMEOUT_MS); | ||
1337 | o2net_hand->o2net_idle_timeout_ms = cpu_to_be32( | ||
1338 | o2net_idle_timeout(NULL)); | ||
1339 | o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32( | ||
1340 | o2net_keepalive_delay(NULL)); | ||
1341 | o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32( | ||
1342 | o2net_reconnect_delay(NULL)); | ||
1343 | } | ||
1344 | |||
1251 | /* ------------------------------------------------------------ */ | 1345 | /* ------------------------------------------------------------ */ |
1252 | 1346 | ||
1253 | /* called when a connect completes and after a sock is accepted. the | 1347 | /* called when a connect completes and after a sock is accepted. the |
@@ -1262,6 +1356,7 @@ static void o2net_sc_connect_completed(struct work_struct *work) | |||
1262 | (unsigned long long)O2NET_PROTOCOL_VERSION, | 1356 | (unsigned long long)O2NET_PROTOCOL_VERSION, |
1263 | (unsigned long long)be64_to_cpu(o2net_hand->connector_id)); | 1357 | (unsigned long long)be64_to_cpu(o2net_hand->connector_id)); |
1264 | 1358 | ||
1359 | o2net_initialize_handshake(); | ||
1265 | o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); | 1360 | o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); |
1266 | sc_put(sc); | 1361 | sc_put(sc); |
1267 | } | 1362 | } |
@@ -1287,8 +1382,10 @@ static void o2net_idle_timer(unsigned long data) | |||
1287 | 1382 | ||
1288 | do_gettimeofday(&now); | 1383 | do_gettimeofday(&now); |
1289 | 1384 | ||
1290 | printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for 10 " | 1385 | printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " |
1291 | "seconds, shutting it down.\n", SC_NODEF_ARGS(sc)); | 1386 | "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), |
1387 | o2net_idle_timeout(sc->sc_node) / 1000, | ||
1388 | o2net_idle_timeout(sc->sc_node) % 1000); | ||
1292 | mlog(ML_NOTICE, "here are some times that might help debug the " | 1389 | mlog(ML_NOTICE, "here are some times that might help debug the " |
1293 | "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " | 1390 | "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " |
1294 | "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", | 1391 | "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", |
@@ -1306,14 +1403,21 @@ static void o2net_idle_timer(unsigned long data) | |||
1306 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); | 1403 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); |
1307 | } | 1404 | } |
1308 | 1405 | ||
1309 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) | 1406 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc) |
1310 | { | 1407 | { |
1311 | o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); | 1408 | o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); |
1312 | o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, | 1409 | o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, |
1313 | O2NET_KEEPALIVE_DELAY_SECS * HZ); | 1410 | msecs_to_jiffies(o2net_keepalive_delay(sc->sc_node))); |
1314 | do_gettimeofday(&sc->sc_tv_timer); | 1411 | do_gettimeofday(&sc->sc_tv_timer); |
1315 | mod_timer(&sc->sc_idle_timeout, | 1412 | mod_timer(&sc->sc_idle_timeout, |
1316 | jiffies + (O2NET_IDLE_TIMEOUT_SECS * HZ)); | 1413 | jiffies + msecs_to_jiffies(o2net_idle_timeout(sc->sc_node))); |
1414 | } | ||
1415 | |||
1416 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) | ||
1417 | { | ||
1418 | /* Only push out an existing timer */ | ||
1419 | if (timer_pending(&sc->sc_idle_timeout)) | ||
1420 | o2net_sc_reset_idle_timer(sc); | ||
1317 | } | 1421 | } |
1318 | 1422 | ||
1319 | /* this work func is kicked whenever a path sets the nn state which doesn't | 1423 | /* this work func is kicked whenever a path sets the nn state which doesn't |
@@ -1435,9 +1539,12 @@ static void o2net_connect_expired(struct work_struct *work) | |||
1435 | 1539 | ||
1436 | spin_lock(&nn->nn_lock); | 1540 | spin_lock(&nn->nn_lock); |
1437 | if (!nn->nn_sc_valid) { | 1541 | if (!nn->nn_sc_valid) { |
1542 | struct o2nm_node *node = nn->nn_sc->sc_node; | ||
1438 | mlog(ML_ERROR, "no connection established with node %u after " | 1543 | mlog(ML_ERROR, "no connection established with node %u after " |
1439 | "%u seconds, giving up and returning errors.\n", | 1544 | "%u.%u seconds, giving up and returning errors.\n", |
1440 | o2net_num_from_nn(nn), O2NET_IDLE_TIMEOUT_SECS); | 1545 | o2net_num_from_nn(nn), |
1546 | o2net_idle_timeout(node) / 1000, | ||
1547 | o2net_idle_timeout(node) % 1000); | ||
1441 | 1548 | ||
1442 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); | 1549 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); |
1443 | } | 1550 | } |
@@ -1478,6 +1585,8 @@ static void o2net_hb_node_down_cb(struct o2nm_node *node, int node_num, | |||
1478 | 1585 | ||
1479 | if (node_num != o2nm_this_node()) | 1586 | if (node_num != o2nm_this_node()) |
1480 | o2net_disconnect_node(node); | 1587 | o2net_disconnect_node(node); |
1588 | |||
1589 | BUG_ON(atomic_read(&o2net_connected_peers) < 0); | ||
1481 | } | 1590 | } |
1482 | 1591 | ||
1483 | static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, | 1592 | static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, |
@@ -1489,14 +1598,14 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, | |||
1489 | 1598 | ||
1490 | /* ensure an immediate connect attempt */ | 1599 | /* ensure an immediate connect attempt */ |
1491 | nn->nn_last_connect_attempt = jiffies - | 1600 | nn->nn_last_connect_attempt = jiffies - |
1492 | (msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS) + 1); | 1601 | (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1); |
1493 | 1602 | ||
1494 | if (node_num != o2nm_this_node()) { | 1603 | if (node_num != o2nm_this_node()) { |
1495 | /* heartbeat doesn't work unless a local node number is | 1604 | /* heartbeat doesn't work unless a local node number is |
1496 | * configured and doing so brings up the o2net_wq, so we can | 1605 | * configured and doing so brings up the o2net_wq, so we can |
1497 | * use it.. */ | 1606 | * use it.. */ |
1498 | queue_delayed_work(o2net_wq, &nn->nn_connect_expired, | 1607 | queue_delayed_work(o2net_wq, &nn->nn_connect_expired, |
1499 | O2NET_IDLE_TIMEOUT_SECS * HZ); | 1608 | msecs_to_jiffies(o2net_idle_timeout(node))); |
1500 | 1609 | ||
1501 | /* believe it or not, accept and node hearbeating testing | 1610 | /* believe it or not, accept and node hearbeating testing |
1502 | * can succeed for this node before we got here.. so | 1611 | * can succeed for this node before we got here.. so |
@@ -1641,6 +1750,7 @@ static int o2net_accept_one(struct socket *sock) | |||
1641 | o2net_register_callbacks(sc->sc_sock->sk, sc); | 1750 | o2net_register_callbacks(sc->sc_sock->sk, sc); |
1642 | o2net_sc_queue_work(sc, &sc->sc_rx_work); | 1751 | o2net_sc_queue_work(sc, &sc->sc_rx_work); |
1643 | 1752 | ||
1753 | o2net_initialize_handshake(); | ||
1644 | o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); | 1754 | o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); |
1645 | 1755 | ||
1646 | out: | 1756 | out: |