diff options
Diffstat (limited to 'fs/ocfs2/cluster')
-rw-r--r-- | fs/ocfs2/cluster/nodemanager.c | 192 | ||||
-rw-r--r-- | fs/ocfs2/cluster/nodemanager.h | 17 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp.c | 152 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp.h | 8 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp_internal.h | 15 |
5 files changed, 344 insertions, 40 deletions
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index d11753c50bc1..357f1d551771 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c | |||
@@ -35,7 +35,7 @@ | |||
35 | /* for now we operate under the assertion that there can be only one | 35 | /* for now we operate under the assertion that there can be only one |
36 | * cluster active at a time. Changing this will require trickling | 36 | * cluster active at a time. Changing this will require trickling |
37 | * cluster references throughout where nodes are looked up */ | 37 | * cluster references throughout where nodes are looked up */ |
38 | static struct o2nm_cluster *o2nm_single_cluster = NULL; | 38 | struct o2nm_cluster *o2nm_single_cluster = NULL; |
39 | 39 | ||
40 | #define OCFS2_MAX_HB_CTL_PATH 256 | 40 | #define OCFS2_MAX_HB_CTL_PATH 256 |
41 | static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; | 41 | static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; |
@@ -97,17 +97,6 @@ const char *o2nm_get_hb_ctl_path(void) | |||
97 | } | 97 | } |
98 | EXPORT_SYMBOL_GPL(o2nm_get_hb_ctl_path); | 98 | EXPORT_SYMBOL_GPL(o2nm_get_hb_ctl_path); |
99 | 99 | ||
100 | struct o2nm_cluster { | ||
101 | struct config_group cl_group; | ||
102 | unsigned cl_has_local:1; | ||
103 | u8 cl_local_node; | ||
104 | rwlock_t cl_nodes_lock; | ||
105 | struct o2nm_node *cl_nodes[O2NM_MAX_NODES]; | ||
106 | struct rb_root cl_node_ip_tree; | ||
107 | /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ | ||
108 | unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
109 | }; | ||
110 | |||
111 | struct o2nm_node *o2nm_get_node_by_num(u8 node_num) | 100 | struct o2nm_node *o2nm_get_node_by_num(u8 node_num) |
112 | { | 101 | { |
113 | struct o2nm_node *node = NULL; | 102 | struct o2nm_node *node = NULL; |
@@ -543,6 +532,179 @@ static struct o2nm_node_group *to_o2nm_node_group(struct config_group *group) | |||
543 | } | 532 | } |
544 | #endif | 533 | #endif |
545 | 534 | ||
535 | struct o2nm_cluster_attribute { | ||
536 | struct configfs_attribute attr; | ||
537 | ssize_t (*show)(struct o2nm_cluster *, char *); | ||
538 | ssize_t (*store)(struct o2nm_cluster *, const char *, size_t); | ||
539 | }; | ||
540 | |||
541 | static ssize_t o2nm_cluster_attr_write(const char *page, ssize_t count, | ||
542 | unsigned int *val) | ||
543 | { | ||
544 | unsigned long tmp; | ||
545 | char *p = (char *)page; | ||
546 | |||
547 | tmp = simple_strtoul(p, &p, 0); | ||
548 | if (!p || (*p && (*p != '\n'))) | ||
549 | return -EINVAL; | ||
550 | |||
551 | if (tmp == 0) | ||
552 | return -EINVAL; | ||
553 | if (tmp >= (u32)-1) | ||
554 | return -ERANGE; | ||
555 | |||
556 | *val = tmp; | ||
557 | |||
558 | return count; | ||
559 | } | ||
560 | |||
561 | static ssize_t o2nm_cluster_attr_idle_timeout_ms_read( | ||
562 | struct o2nm_cluster *cluster, char *page) | ||
563 | { | ||
564 | return sprintf(page, "%u\n", cluster->cl_idle_timeout_ms); | ||
565 | } | ||
566 | |||
567 | static ssize_t o2nm_cluster_attr_idle_timeout_ms_write( | ||
568 | struct o2nm_cluster *cluster, const char *page, size_t count) | ||
569 | { | ||
570 | ssize_t ret; | ||
571 | unsigned int val; | ||
572 | |||
573 | ret = o2nm_cluster_attr_write(page, count, &val); | ||
574 | |||
575 | if (ret > 0) { | ||
576 | if (cluster->cl_idle_timeout_ms != val | ||
577 | && o2net_num_connected_peers()) { | ||
578 | mlog(ML_NOTICE, | ||
579 | "o2net: cannot change idle timeout after " | ||
580 | "the first peer has agreed to it." | ||
581 | " %d connected peers\n", | ||
582 | o2net_num_connected_peers()); | ||
583 | ret = -EINVAL; | ||
584 | } else if (val <= cluster->cl_keepalive_delay_ms) { | ||
585 | mlog(ML_NOTICE, "o2net: idle timeout must be larger " | ||
586 | "than keepalive delay\n"); | ||
587 | ret = -EINVAL; | ||
588 | } else { | ||
589 | cluster->cl_idle_timeout_ms = val; | ||
590 | } | ||
591 | } | ||
592 | |||
593 | return ret; | ||
594 | } | ||
595 | |||
596 | static ssize_t o2nm_cluster_attr_keepalive_delay_ms_read( | ||
597 | struct o2nm_cluster *cluster, char *page) | ||
598 | { | ||
599 | return sprintf(page, "%u\n", cluster->cl_keepalive_delay_ms); | ||
600 | } | ||
601 | |||
602 | static ssize_t o2nm_cluster_attr_keepalive_delay_ms_write( | ||
603 | struct o2nm_cluster *cluster, const char *page, size_t count) | ||
604 | { | ||
605 | ssize_t ret; | ||
606 | unsigned int val; | ||
607 | |||
608 | ret = o2nm_cluster_attr_write(page, count, &val); | ||
609 | |||
610 | if (ret > 0) { | ||
611 | if (cluster->cl_keepalive_delay_ms != val | ||
612 | && o2net_num_connected_peers()) { | ||
613 | mlog(ML_NOTICE, | ||
614 | "o2net: cannot change keepalive delay after" | ||
615 | " the first peer has agreed to it." | ||
616 | " %d connected peers\n", | ||
617 | o2net_num_connected_peers()); | ||
618 | ret = -EINVAL; | ||
619 | } else if (val >= cluster->cl_idle_timeout_ms) { | ||
620 | mlog(ML_NOTICE, "o2net: keepalive delay must be " | ||
621 | "smaller than idle timeout\n"); | ||
622 | ret = -EINVAL; | ||
623 | } else { | ||
624 | cluster->cl_keepalive_delay_ms = val; | ||
625 | } | ||
626 | } | ||
627 | |||
628 | return ret; | ||
629 | } | ||
630 | |||
631 | static ssize_t o2nm_cluster_attr_reconnect_delay_ms_read( | ||
632 | struct o2nm_cluster *cluster, char *page) | ||
633 | { | ||
634 | return sprintf(page, "%u\n", cluster->cl_reconnect_delay_ms); | ||
635 | } | ||
636 | |||
637 | static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write( | ||
638 | struct o2nm_cluster *cluster, const char *page, size_t count) | ||
639 | { | ||
640 | return o2nm_cluster_attr_write(page, count, | ||
641 | &cluster->cl_reconnect_delay_ms); | ||
642 | } | ||
643 | static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = { | ||
644 | .attr = { .ca_owner = THIS_MODULE, | ||
645 | .ca_name = "idle_timeout_ms", | ||
646 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
647 | .show = o2nm_cluster_attr_idle_timeout_ms_read, | ||
648 | .store = o2nm_cluster_attr_idle_timeout_ms_write, | ||
649 | }; | ||
650 | |||
651 | static struct o2nm_cluster_attribute o2nm_cluster_attr_keepalive_delay_ms = { | ||
652 | .attr = { .ca_owner = THIS_MODULE, | ||
653 | .ca_name = "keepalive_delay_ms", | ||
654 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
655 | .show = o2nm_cluster_attr_keepalive_delay_ms_read, | ||
656 | .store = o2nm_cluster_attr_keepalive_delay_ms_write, | ||
657 | }; | ||
658 | |||
659 | static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = { | ||
660 | .attr = { .ca_owner = THIS_MODULE, | ||
661 | .ca_name = "reconnect_delay_ms", | ||
662 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
663 | .show = o2nm_cluster_attr_reconnect_delay_ms_read, | ||
664 | .store = o2nm_cluster_attr_reconnect_delay_ms_write, | ||
665 | }; | ||
666 | |||
667 | static struct configfs_attribute *o2nm_cluster_attrs[] = { | ||
668 | &o2nm_cluster_attr_idle_timeout_ms.attr, | ||
669 | &o2nm_cluster_attr_keepalive_delay_ms.attr, | ||
670 | &o2nm_cluster_attr_reconnect_delay_ms.attr, | ||
671 | NULL, | ||
672 | }; | ||
673 | static ssize_t o2nm_cluster_show(struct config_item *item, | ||
674 | struct configfs_attribute *attr, | ||
675 | char *page) | ||
676 | { | ||
677 | struct o2nm_cluster *cluster = to_o2nm_cluster(item); | ||
678 | struct o2nm_cluster_attribute *o2nm_cluster_attr = | ||
679 | container_of(attr, struct o2nm_cluster_attribute, attr); | ||
680 | ssize_t ret = 0; | ||
681 | |||
682 | if (o2nm_cluster_attr->show) | ||
683 | ret = o2nm_cluster_attr->show(cluster, page); | ||
684 | return ret; | ||
685 | } | ||
686 | |||
687 | static ssize_t o2nm_cluster_store(struct config_item *item, | ||
688 | struct configfs_attribute *attr, | ||
689 | const char *page, size_t count) | ||
690 | { | ||
691 | struct o2nm_cluster *cluster = to_o2nm_cluster(item); | ||
692 | struct o2nm_cluster_attribute *o2nm_cluster_attr = | ||
693 | container_of(attr, struct o2nm_cluster_attribute, attr); | ||
694 | ssize_t ret; | ||
695 | |||
696 | if (o2nm_cluster_attr->store == NULL) { | ||
697 | ret = -EINVAL; | ||
698 | goto out; | ||
699 | } | ||
700 | |||
701 | ret = o2nm_cluster_attr->store(cluster, page, count); | ||
702 | if (ret < count) | ||
703 | goto out; | ||
704 | out: | ||
705 | return ret; | ||
706 | } | ||
707 | |||
546 | static struct config_item *o2nm_node_group_make_item(struct config_group *group, | 708 | static struct config_item *o2nm_node_group_make_item(struct config_group *group, |
547 | const char *name) | 709 | const char *name) |
548 | { | 710 | { |
@@ -624,10 +786,13 @@ static void o2nm_cluster_release(struct config_item *item) | |||
624 | 786 | ||
625 | static struct configfs_item_operations o2nm_cluster_item_ops = { | 787 | static struct configfs_item_operations o2nm_cluster_item_ops = { |
626 | .release = o2nm_cluster_release, | 788 | .release = o2nm_cluster_release, |
789 | .show_attribute = o2nm_cluster_show, | ||
790 | .store_attribute = o2nm_cluster_store, | ||
627 | }; | 791 | }; |
628 | 792 | ||
629 | static struct config_item_type o2nm_cluster_type = { | 793 | static struct config_item_type o2nm_cluster_type = { |
630 | .ct_item_ops = &o2nm_cluster_item_ops, | 794 | .ct_item_ops = &o2nm_cluster_item_ops, |
795 | .ct_attrs = o2nm_cluster_attrs, | ||
631 | .ct_owner = THIS_MODULE, | 796 | .ct_owner = THIS_MODULE, |
632 | }; | 797 | }; |
633 | 798 | ||
@@ -678,6 +843,9 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g | |||
678 | cluster->cl_group.default_groups[2] = NULL; | 843 | cluster->cl_group.default_groups[2] = NULL; |
679 | rwlock_init(&cluster->cl_nodes_lock); | 844 | rwlock_init(&cluster->cl_nodes_lock); |
680 | cluster->cl_node_ip_tree = RB_ROOT; | 845 | cluster->cl_node_ip_tree = RB_ROOT; |
846 | cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT; | ||
847 | cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; | ||
848 | cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; | ||
681 | 849 | ||
682 | ret = &cluster->cl_group; | 850 | ret = &cluster->cl_group; |
683 | o2nm_single_cluster = cluster; | 851 | o2nm_single_cluster = cluster; |
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h index fce8033c310f..8fb23cacc2f5 100644 --- a/fs/ocfs2/cluster/nodemanager.h +++ b/fs/ocfs2/cluster/nodemanager.h | |||
@@ -53,6 +53,23 @@ struct o2nm_node { | |||
53 | unsigned long nd_set_attributes; | 53 | unsigned long nd_set_attributes; |
54 | }; | 54 | }; |
55 | 55 | ||
56 | struct o2nm_cluster { | ||
57 | struct config_group cl_group; | ||
58 | unsigned cl_has_local:1; | ||
59 | u8 cl_local_node; | ||
60 | rwlock_t cl_nodes_lock; | ||
61 | struct o2nm_node *cl_nodes[O2NM_MAX_NODES]; | ||
62 | struct rb_root cl_node_ip_tree; | ||
63 | unsigned int cl_idle_timeout_ms; | ||
64 | unsigned int cl_keepalive_delay_ms; | ||
65 | unsigned int cl_reconnect_delay_ms; | ||
66 | |||
67 | /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ | ||
68 | unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
69 | }; | ||
70 | |||
71 | extern struct o2nm_cluster *o2nm_single_cluster; | ||
72 | |||
56 | u8 o2nm_this_node(void); | 73 | u8 o2nm_this_node(void); |
57 | 74 | ||
58 | int o2nm_configured_node_map(unsigned long *map, unsigned bytes); | 75 | int o2nm_configured_node_map(unsigned long *map, unsigned bytes); |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 9b3209dc0b16..457753df1ae7 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -147,6 +147,28 @@ static void o2net_listen_data_ready(struct sock *sk, int bytes); | |||
147 | static void o2net_sc_send_keep_req(struct work_struct *work); | 147 | static void o2net_sc_send_keep_req(struct work_struct *work); |
148 | static void o2net_idle_timer(unsigned long data); | 148 | static void o2net_idle_timer(unsigned long data); |
149 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); | 149 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); |
150 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); | ||
151 | |||
152 | /* | ||
153 | * FIXME: These should use to_o2nm_cluster_from_node(), but we end up | ||
154 | * losing our parent link to the cluster during shutdown. This can be | ||
155 | * solved by adding a pre-removal callback to configfs, or passing | ||
156 | * around the cluster with the node. -jeffm | ||
157 | */ | ||
158 | static inline int o2net_reconnect_delay(struct o2nm_node *node) | ||
159 | { | ||
160 | return o2nm_single_cluster->cl_reconnect_delay_ms; | ||
161 | } | ||
162 | |||
163 | static inline int o2net_keepalive_delay(struct o2nm_node *node) | ||
164 | { | ||
165 | return o2nm_single_cluster->cl_keepalive_delay_ms; | ||
166 | } | ||
167 | |||
168 | static inline int o2net_idle_timeout(struct o2nm_node *node) | ||
169 | { | ||
170 | return o2nm_single_cluster->cl_idle_timeout_ms; | ||
171 | } | ||
150 | 172 | ||
151 | static inline int o2net_sys_err_to_errno(enum o2net_system_error err) | 173 | static inline int o2net_sys_err_to_errno(enum o2net_system_error err) |
152 | { | 174 | { |
@@ -271,6 +293,8 @@ static void sc_kref_release(struct kref *kref) | |||
271 | { | 293 | { |
272 | struct o2net_sock_container *sc = container_of(kref, | 294 | struct o2net_sock_container *sc = container_of(kref, |
273 | struct o2net_sock_container, sc_kref); | 295 | struct o2net_sock_container, sc_kref); |
296 | BUG_ON(timer_pending(&sc->sc_idle_timeout)); | ||
297 | |||
274 | sclog(sc, "releasing\n"); | 298 | sclog(sc, "releasing\n"); |
275 | 299 | ||
276 | if (sc->sc_sock) { | 300 | if (sc->sc_sock) { |
@@ -356,6 +380,13 @@ static void o2net_sc_cancel_delayed_work(struct o2net_sock_container *sc, | |||
356 | sc_put(sc); | 380 | sc_put(sc); |
357 | } | 381 | } |
358 | 382 | ||
383 | static atomic_t o2net_connected_peers = ATOMIC_INIT(0); | ||
384 | |||
385 | int o2net_num_connected_peers(void) | ||
386 | { | ||
387 | return atomic_read(&o2net_connected_peers); | ||
388 | } | ||
389 | |||
359 | static void o2net_set_nn_state(struct o2net_node *nn, | 390 | static void o2net_set_nn_state(struct o2net_node *nn, |
360 | struct o2net_sock_container *sc, | 391 | struct o2net_sock_container *sc, |
361 | unsigned valid, int err) | 392 | unsigned valid, int err) |
@@ -366,6 +397,11 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
366 | 397 | ||
367 | assert_spin_locked(&nn->nn_lock); | 398 | assert_spin_locked(&nn->nn_lock); |
368 | 399 | ||
400 | if (old_sc && !sc) | ||
401 | atomic_dec(&o2net_connected_peers); | ||
402 | else if (!old_sc && sc) | ||
403 | atomic_inc(&o2net_connected_peers); | ||
404 | |||
369 | /* the node num comparison and single connect/accept path should stop | 405 | /* the node num comparison and single connect/accept path should stop |
370 | * an non-null sc from being overwritten with another */ | 406 | * an non-null sc from being overwritten with another */ |
371 | BUG_ON(sc && nn->nn_sc && nn->nn_sc != sc); | 407 | BUG_ON(sc && nn->nn_sc && nn->nn_sc != sc); |
@@ -424,9 +460,9 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
424 | /* delay if we're withing a RECONNECT_DELAY of the | 460 | /* delay if we're withing a RECONNECT_DELAY of the |
425 | * last attempt */ | 461 | * last attempt */ |
426 | delay = (nn->nn_last_connect_attempt + | 462 | delay = (nn->nn_last_connect_attempt + |
427 | msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS)) | 463 | msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node))) |
428 | - jiffies; | 464 | - jiffies; |
429 | if (delay > msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS)) | 465 | if (delay > msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node))) |
430 | delay = 0; | 466 | delay = 0; |
431 | mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); | 467 | mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); |
432 | queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); | 468 | queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); |
@@ -1099,13 +1135,51 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) | |||
1099 | return -1; | 1135 | return -1; |
1100 | } | 1136 | } |
1101 | 1137 | ||
1138 | /* | ||
1139 | * Ensure timeouts are consistent with other nodes, otherwise | ||
1140 | * we can end up with one node thinking that the other must be down, | ||
1141 | * but isn't. This can ultimately cause corruption. | ||
1142 | */ | ||
1143 | if (be32_to_cpu(hand->o2net_idle_timeout_ms) != | ||
1144 | o2net_idle_timeout(sc->sc_node)) { | ||
1145 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " | ||
1146 | "%u ms, but we use %u ms locally. disconnecting\n", | ||
1147 | SC_NODEF_ARGS(sc), | ||
1148 | be32_to_cpu(hand->o2net_idle_timeout_ms), | ||
1149 | o2net_idle_timeout(sc->sc_node)); | ||
1150 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); | ||
1151 | return -1; | ||
1152 | } | ||
1153 | |||
1154 | if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != | ||
1155 | o2net_keepalive_delay(sc->sc_node)) { | ||
1156 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " | ||
1157 | "%u ms, but we use %u ms locally. disconnecting\n", | ||
1158 | SC_NODEF_ARGS(sc), | ||
1159 | be32_to_cpu(hand->o2net_keepalive_delay_ms), | ||
1160 | o2net_keepalive_delay(sc->sc_node)); | ||
1161 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); | ||
1162 | return -1; | ||
1163 | } | ||
1164 | |||
1165 | if (be32_to_cpu(hand->o2hb_heartbeat_timeout_ms) != | ||
1166 | O2HB_MAX_WRITE_TIMEOUT_MS) { | ||
1167 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a heartbeat timeout of " | ||
1168 | "%u ms, but we use %u ms locally. disconnecting\n", | ||
1169 | SC_NODEF_ARGS(sc), | ||
1170 | be32_to_cpu(hand->o2hb_heartbeat_timeout_ms), | ||
1171 | O2HB_MAX_WRITE_TIMEOUT_MS); | ||
1172 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); | ||
1173 | return -1; | ||
1174 | } | ||
1175 | |||
1102 | sc->sc_handshake_ok = 1; | 1176 | sc->sc_handshake_ok = 1; |
1103 | 1177 | ||
1104 | spin_lock(&nn->nn_lock); | 1178 | spin_lock(&nn->nn_lock); |
1105 | /* set valid and queue the idle timers only if it hasn't been | 1179 | /* set valid and queue the idle timers only if it hasn't been |
1106 | * shut down already */ | 1180 | * shut down already */ |
1107 | if (nn->nn_sc == sc) { | 1181 | if (nn->nn_sc == sc) { |
1108 | o2net_sc_postpone_idle(sc); | 1182 | o2net_sc_reset_idle_timer(sc); |
1109 | o2net_set_nn_state(nn, sc, 1, 0); | 1183 | o2net_set_nn_state(nn, sc, 1, 0); |
1110 | } | 1184 | } |
1111 | spin_unlock(&nn->nn_lock); | 1185 | spin_unlock(&nn->nn_lock); |
@@ -1131,6 +1205,23 @@ static int o2net_advance_rx(struct o2net_sock_container *sc) | |||
1131 | sclog(sc, "receiving\n"); | 1205 | sclog(sc, "receiving\n"); |
1132 | do_gettimeofday(&sc->sc_tv_advance_start); | 1206 | do_gettimeofday(&sc->sc_tv_advance_start); |
1133 | 1207 | ||
1208 | if (unlikely(sc->sc_handshake_ok == 0)) { | ||
1209 | if(sc->sc_page_off < sizeof(struct o2net_handshake)) { | ||
1210 | data = page_address(sc->sc_page) + sc->sc_page_off; | ||
1211 | datalen = sizeof(struct o2net_handshake) - sc->sc_page_off; | ||
1212 | ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); | ||
1213 | if (ret > 0) | ||
1214 | sc->sc_page_off += ret; | ||
1215 | } | ||
1216 | |||
1217 | if (sc->sc_page_off == sizeof(struct o2net_handshake)) { | ||
1218 | o2net_check_handshake(sc); | ||
1219 | if (unlikely(sc->sc_handshake_ok == 0)) | ||
1220 | ret = -EPROTO; | ||
1221 | } | ||
1222 | goto out; | ||
1223 | } | ||
1224 | |||
1134 | /* do we need more header? */ | 1225 | /* do we need more header? */ |
1135 | if (sc->sc_page_off < sizeof(struct o2net_msg)) { | 1226 | if (sc->sc_page_off < sizeof(struct o2net_msg)) { |
1136 | data = page_address(sc->sc_page) + sc->sc_page_off; | 1227 | data = page_address(sc->sc_page) + sc->sc_page_off; |
@@ -1138,15 +1229,6 @@ static int o2net_advance_rx(struct o2net_sock_container *sc) | |||
1138 | ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); | 1229 | ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); |
1139 | if (ret > 0) { | 1230 | if (ret > 0) { |
1140 | sc->sc_page_off += ret; | 1231 | sc->sc_page_off += ret; |
1141 | |||
1142 | /* this working relies on the handshake being | ||
1143 | * smaller than the normal message header */ | ||
1144 | if (sc->sc_page_off >= sizeof(struct o2net_handshake)&& | ||
1145 | !sc->sc_handshake_ok && o2net_check_handshake(sc)) { | ||
1146 | ret = -EPROTO; | ||
1147 | goto out; | ||
1148 | } | ||
1149 | |||
1150 | /* only swab incoming here.. we can | 1232 | /* only swab incoming here.. we can |
1151 | * only get here once as we cross from | 1233 | * only get here once as we cross from |
1152 | * being under to over */ | 1234 | * being under to over */ |
@@ -1248,6 +1330,18 @@ static int o2net_set_nodelay(struct socket *sock) | |||
1248 | return ret; | 1330 | return ret; |
1249 | } | 1331 | } |
1250 | 1332 | ||
1333 | static void o2net_initialize_handshake(void) | ||
1334 | { | ||
1335 | o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( | ||
1336 | O2HB_MAX_WRITE_TIMEOUT_MS); | ||
1337 | o2net_hand->o2net_idle_timeout_ms = cpu_to_be32( | ||
1338 | o2net_idle_timeout(NULL)); | ||
1339 | o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32( | ||
1340 | o2net_keepalive_delay(NULL)); | ||
1341 | o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32( | ||
1342 | o2net_reconnect_delay(NULL)); | ||
1343 | } | ||
1344 | |||
1251 | /* ------------------------------------------------------------ */ | 1345 | /* ------------------------------------------------------------ */ |
1252 | 1346 | ||
1253 | /* called when a connect completes and after a sock is accepted. the | 1347 | /* called when a connect completes and after a sock is accepted. the |
@@ -1262,6 +1356,7 @@ static void o2net_sc_connect_completed(struct work_struct *work) | |||
1262 | (unsigned long long)O2NET_PROTOCOL_VERSION, | 1356 | (unsigned long long)O2NET_PROTOCOL_VERSION, |
1263 | (unsigned long long)be64_to_cpu(o2net_hand->connector_id)); | 1357 | (unsigned long long)be64_to_cpu(o2net_hand->connector_id)); |
1264 | 1358 | ||
1359 | o2net_initialize_handshake(); | ||
1265 | o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); | 1360 | o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); |
1266 | sc_put(sc); | 1361 | sc_put(sc); |
1267 | } | 1362 | } |
@@ -1287,8 +1382,10 @@ static void o2net_idle_timer(unsigned long data) | |||
1287 | 1382 | ||
1288 | do_gettimeofday(&now); | 1383 | do_gettimeofday(&now); |
1289 | 1384 | ||
1290 | printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for 10 " | 1385 | printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " |
1291 | "seconds, shutting it down.\n", SC_NODEF_ARGS(sc)); | 1386 | "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), |
1387 | o2net_idle_timeout(sc->sc_node) / 1000, | ||
1388 | o2net_idle_timeout(sc->sc_node) % 1000); | ||
1292 | mlog(ML_NOTICE, "here are some times that might help debug the " | 1389 | mlog(ML_NOTICE, "here are some times that might help debug the " |
1293 | "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " | 1390 | "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " |
1294 | "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", | 1391 | "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", |
@@ -1306,14 +1403,21 @@ static void o2net_idle_timer(unsigned long data) | |||
1306 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); | 1403 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); |
1307 | } | 1404 | } |
1308 | 1405 | ||
1309 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) | 1406 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc) |
1310 | { | 1407 | { |
1311 | o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); | 1408 | o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); |
1312 | o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, | 1409 | o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, |
1313 | O2NET_KEEPALIVE_DELAY_SECS * HZ); | 1410 | msecs_to_jiffies(o2net_keepalive_delay(sc->sc_node))); |
1314 | do_gettimeofday(&sc->sc_tv_timer); | 1411 | do_gettimeofday(&sc->sc_tv_timer); |
1315 | mod_timer(&sc->sc_idle_timeout, | 1412 | mod_timer(&sc->sc_idle_timeout, |
1316 | jiffies + (O2NET_IDLE_TIMEOUT_SECS * HZ)); | 1413 | jiffies + msecs_to_jiffies(o2net_idle_timeout(sc->sc_node))); |
1414 | } | ||
1415 | |||
1416 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) | ||
1417 | { | ||
1418 | /* Only push out an existing timer */ | ||
1419 | if (timer_pending(&sc->sc_idle_timeout)) | ||
1420 | o2net_sc_reset_idle_timer(sc); | ||
1317 | } | 1421 | } |
1318 | 1422 | ||
1319 | /* this work func is kicked whenever a path sets the nn state which doesn't | 1423 | /* this work func is kicked whenever a path sets the nn state which doesn't |
@@ -1435,9 +1539,12 @@ static void o2net_connect_expired(struct work_struct *work) | |||
1435 | 1539 | ||
1436 | spin_lock(&nn->nn_lock); | 1540 | spin_lock(&nn->nn_lock); |
1437 | if (!nn->nn_sc_valid) { | 1541 | if (!nn->nn_sc_valid) { |
1542 | struct o2nm_node *node = nn->nn_sc->sc_node; | ||
1438 | mlog(ML_ERROR, "no connection established with node %u after " | 1543 | mlog(ML_ERROR, "no connection established with node %u after " |
1439 | "%u seconds, giving up and returning errors.\n", | 1544 | "%u.%u seconds, giving up and returning errors.\n", |
1440 | o2net_num_from_nn(nn), O2NET_IDLE_TIMEOUT_SECS); | 1545 | o2net_num_from_nn(nn), |
1546 | o2net_idle_timeout(node) / 1000, | ||
1547 | o2net_idle_timeout(node) % 1000); | ||
1441 | 1548 | ||
1442 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); | 1549 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); |
1443 | } | 1550 | } |
@@ -1478,6 +1585,8 @@ static void o2net_hb_node_down_cb(struct o2nm_node *node, int node_num, | |||
1478 | 1585 | ||
1479 | if (node_num != o2nm_this_node()) | 1586 | if (node_num != o2nm_this_node()) |
1480 | o2net_disconnect_node(node); | 1587 | o2net_disconnect_node(node); |
1588 | |||
1589 | BUG_ON(atomic_read(&o2net_connected_peers) < 0); | ||
1481 | } | 1590 | } |
1482 | 1591 | ||
1483 | static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, | 1592 | static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, |
@@ -1489,14 +1598,14 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, | |||
1489 | 1598 | ||
1490 | /* ensure an immediate connect attempt */ | 1599 | /* ensure an immediate connect attempt */ |
1491 | nn->nn_last_connect_attempt = jiffies - | 1600 | nn->nn_last_connect_attempt = jiffies - |
1492 | (msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS) + 1); | 1601 | (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1); |
1493 | 1602 | ||
1494 | if (node_num != o2nm_this_node()) { | 1603 | if (node_num != o2nm_this_node()) { |
1495 | /* heartbeat doesn't work unless a local node number is | 1604 | /* heartbeat doesn't work unless a local node number is |
1496 | * configured and doing so brings up the o2net_wq, so we can | 1605 | * configured and doing so brings up the o2net_wq, so we can |
1497 | * use it.. */ | 1606 | * use it.. */ |
1498 | queue_delayed_work(o2net_wq, &nn->nn_connect_expired, | 1607 | queue_delayed_work(o2net_wq, &nn->nn_connect_expired, |
1499 | O2NET_IDLE_TIMEOUT_SECS * HZ); | 1608 | msecs_to_jiffies(o2net_idle_timeout(node))); |
1500 | 1609 | ||
1501 | /* believe it or not, accept and node hearbeating testing | 1610 | /* believe it or not, accept and node hearbeating testing |
1502 | * can succeed for this node before we got here.. so | 1611 | * can succeed for this node before we got here.. so |
@@ -1641,6 +1750,7 @@ static int o2net_accept_one(struct socket *sock) | |||
1641 | o2net_register_callbacks(sc->sc_sock->sk, sc); | 1750 | o2net_register_callbacks(sc->sc_sock->sk, sc); |
1642 | o2net_sc_queue_work(sc, &sc->sc_rx_work); | 1751 | o2net_sc_queue_work(sc, &sc->sc_rx_work); |
1643 | 1752 | ||
1753 | o2net_initialize_handshake(); | ||
1644 | o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); | 1754 | o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); |
1645 | 1755 | ||
1646 | out: | 1756 | out: |
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index 616ff2b8434a..21a4e43df836 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h | |||
@@ -54,6 +54,13 @@ typedef int (o2net_msg_handler_func)(struct o2net_msg *msg, u32 len, void *data) | |||
54 | 54 | ||
55 | #define O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(struct o2net_msg)) | 55 | #define O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(struct o2net_msg)) |
56 | 56 | ||
57 | /* same as hb delay, we're waiting for another node to recognize our hb */ | ||
58 | #define O2NET_RECONNECT_DELAY_MS_DEFAULT 2000 | ||
59 | |||
60 | #define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 5000 | ||
61 | #define O2NET_IDLE_TIMEOUT_MS_DEFAULT 10000 | ||
62 | |||
63 | |||
57 | /* TODO: figure this out.... */ | 64 | /* TODO: figure this out.... */ |
58 | static inline int o2net_link_down(int err, struct socket *sock) | 65 | static inline int o2net_link_down(int err, struct socket *sock) |
59 | { | 66 | { |
@@ -101,6 +108,7 @@ void o2net_unregister_hb_callbacks(void); | |||
101 | int o2net_start_listening(struct o2nm_node *node); | 108 | int o2net_start_listening(struct o2nm_node *node); |
102 | void o2net_stop_listening(struct o2nm_node *node); | 109 | void o2net_stop_listening(struct o2nm_node *node); |
103 | void o2net_disconnect_node(struct o2nm_node *node); | 110 | void o2net_disconnect_node(struct o2nm_node *node); |
111 | int o2net_num_connected_peers(void); | ||
104 | 112 | ||
105 | int o2net_init(void); | 113 | int o2net_init(void); |
106 | void o2net_exit(void); | 114 | void o2net_exit(void); |
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index daebbd3a2c8c..b700dc9624d1 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
@@ -27,23 +27,20 @@ | |||
27 | #define O2NET_MSG_KEEP_REQ_MAGIC ((u16)0xfa57) | 27 | #define O2NET_MSG_KEEP_REQ_MAGIC ((u16)0xfa57) |
28 | #define O2NET_MSG_KEEP_RESP_MAGIC ((u16)0xfa58) | 28 | #define O2NET_MSG_KEEP_RESP_MAGIC ((u16)0xfa58) |
29 | 29 | ||
30 | /* same as hb delay, we're waiting for another node to recognize our hb */ | ||
31 | #define O2NET_RECONNECT_DELAY_MS O2HB_REGION_TIMEOUT_MS | ||
32 | |||
33 | /* we're delaying our quorum decision so that heartbeat will have timed | 30 | /* we're delaying our quorum decision so that heartbeat will have timed |
34 | * out truly dead nodes by the time we come around to making decisions | 31 | * out truly dead nodes by the time we come around to making decisions |
35 | * on their number */ | 32 | * on their number */ |
36 | #define O2NET_QUORUM_DELAY_MS ((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS) | 33 | #define O2NET_QUORUM_DELAY_MS ((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS) |
37 | 34 | ||
38 | #define O2NET_KEEPALIVE_DELAY_SECS 5 | ||
39 | #define O2NET_IDLE_TIMEOUT_SECS 10 | ||
40 | |||
41 | /* | 35 | /* |
42 | * This version number represents quite a lot, unfortunately. It not | 36 | * This version number represents quite a lot, unfortunately. It not |
43 | * only represents the raw network message protocol on the wire but also | 37 | * only represents the raw network message protocol on the wire but also |
44 | * locking semantics of the file system using the protocol. It should | 38 | * locking semantics of the file system using the protocol. It should |
45 | * be somewhere else, I'm sure, but right now it isn't. | 39 | * be somewhere else, I'm sure, but right now it isn't. |
46 | * | 40 | * |
41 | * New in version 5: | ||
42 | * - Network timeout checking protocol | ||
43 | * | ||
47 | * New in version 4: | 44 | * New in version 4: |
48 | * - Remove i_generation from lock names for better stat performance. | 45 | * - Remove i_generation from lock names for better stat performance. |
49 | * | 46 | * |
@@ -54,10 +51,14 @@ | |||
54 | * - full 64 bit i_size in the metadata lock lvbs | 51 | * - full 64 bit i_size in the metadata lock lvbs |
55 | * - introduction of "rw" lock and pushing meta/data locking down | 52 | * - introduction of "rw" lock and pushing meta/data locking down |
56 | */ | 53 | */ |
57 | #define O2NET_PROTOCOL_VERSION 4ULL | 54 | #define O2NET_PROTOCOL_VERSION 5ULL |
58 | struct o2net_handshake { | 55 | struct o2net_handshake { |
59 | __be64 protocol_version; | 56 | __be64 protocol_version; |
60 | __be64 connector_id; | 57 | __be64 connector_id; |
58 | __be32 o2hb_heartbeat_timeout_ms; | ||
59 | __be32 o2net_idle_timeout_ms; | ||
60 | __be32 o2net_keepalive_delay_ms; | ||
61 | __be32 o2net_reconnect_delay_ms; | ||
61 | }; | 62 | }; |
62 | 63 | ||
63 | struct o2net_node { | 64 | struct o2net_node { |