aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ocfs2/cluster/nodemanager.c161
-rw-r--r--fs/ocfs2/cluster/nodemanager.h3
-rw-r--r--fs/ocfs2/cluster/tcp.c60
-rw-r--r--fs/ocfs2/cluster/tcp.h7
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h6
5 files changed, 219 insertions, 18 deletions
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index dd4aefa11b3d..234f83f2897f 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -532,6 +532,161 @@ static struct o2nm_node_group *to_o2nm_node_group(struct config_group *group)
532} 532}
533#endif 533#endif
534 534
535struct o2nm_cluster_attribute {
536 struct configfs_attribute attr;
537 ssize_t (*show)(struct o2nm_cluster *, char *);
538 ssize_t (*store)(struct o2nm_cluster *, const char *, size_t);
539};
540
541static ssize_t o2nm_cluster_attr_write(const char *page, ssize_t count,
542 unsigned int *val)
543{
544 unsigned long tmp;
545 char *p = (char *)page;
546
547 tmp = simple_strtoul(p, &p, 0);
548 if (!p || (*p && (*p != '\n')))
549 return -EINVAL;
550
551 if (tmp == 0)
552 return -EINVAL;
553 if (tmp >= (u32)-1)
554 return -ERANGE;
555
556 *val = tmp;
557
558 return count;
559}
560
561static ssize_t o2nm_cluster_attr_idle_timeout_ms_read(
562 struct o2nm_cluster *cluster, char *page)
563{
564 return sprintf(page, "%u\n", cluster->cl_idle_timeout_ms);
565}
566
567static ssize_t o2nm_cluster_attr_idle_timeout_ms_write(
568 struct o2nm_cluster *cluster, const char *page, size_t count)
569{
570 ssize_t ret;
571 unsigned int val;
572
573 ret = o2nm_cluster_attr_write(page, count, &val);
574
575 if (ret > 0) {
576 if (val <= cluster->cl_keepalive_delay_ms) {
577 mlog(ML_NOTICE, "o2net: idle timeout must be larger "
578 "than keepalive delay\n");
579 return -EINVAL;
580 }
581 cluster->cl_idle_timeout_ms = val;
582 }
583
584 return ret;
585}
586
587static ssize_t o2nm_cluster_attr_keepalive_delay_ms_read(
588 struct o2nm_cluster *cluster, char *page)
589{
590 return sprintf(page, "%u\n", cluster->cl_keepalive_delay_ms);
591}
592
593static ssize_t o2nm_cluster_attr_keepalive_delay_ms_write(
594 struct o2nm_cluster *cluster, const char *page, size_t count)
595{
596 ssize_t ret;
597 unsigned int val;
598
599 ret = o2nm_cluster_attr_write(page, count, &val);
600
601 if (ret > 0) {
602 if (val >= cluster->cl_idle_timeout_ms) {
603 mlog(ML_NOTICE, "o2net: keepalive delay must be "
604 "smaller than idle timeout\n");
605 return -EINVAL;
606 }
607 cluster->cl_keepalive_delay_ms = val;
608 }
609
610 return ret;
611}
612
613static ssize_t o2nm_cluster_attr_reconnect_delay_ms_read(
614 struct o2nm_cluster *cluster, char *page)
615{
616 return sprintf(page, "%u\n", cluster->cl_reconnect_delay_ms);
617}
618
619static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write(
620 struct o2nm_cluster *cluster, const char *page, size_t count)
621{
622 return o2nm_cluster_attr_write(page, count,
623 &cluster->cl_reconnect_delay_ms);
624}
625static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = {
626 .attr = { .ca_owner = THIS_MODULE,
627 .ca_name = "idle_timeout_ms",
628 .ca_mode = S_IRUGO | S_IWUSR },
629 .show = o2nm_cluster_attr_idle_timeout_ms_read,
630 .store = o2nm_cluster_attr_idle_timeout_ms_write,
631};
632
633static struct o2nm_cluster_attribute o2nm_cluster_attr_keepalive_delay_ms = {
634 .attr = { .ca_owner = THIS_MODULE,
635 .ca_name = "keepalive_delay_ms",
636 .ca_mode = S_IRUGO | S_IWUSR },
637 .show = o2nm_cluster_attr_keepalive_delay_ms_read,
638 .store = o2nm_cluster_attr_keepalive_delay_ms_write,
639};
640
641static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = {
642 .attr = { .ca_owner = THIS_MODULE,
643 .ca_name = "reconnect_delay_ms",
644 .ca_mode = S_IRUGO | S_IWUSR },
645 .show = o2nm_cluster_attr_reconnect_delay_ms_read,
646 .store = o2nm_cluster_attr_reconnect_delay_ms_write,
647};
648
649static struct configfs_attribute *o2nm_cluster_attrs[] = {
650 &o2nm_cluster_attr_idle_timeout_ms.attr,
651 &o2nm_cluster_attr_keepalive_delay_ms.attr,
652 &o2nm_cluster_attr_reconnect_delay_ms.attr,
653 NULL,
654};
655static ssize_t o2nm_cluster_show(struct config_item *item,
656 struct configfs_attribute *attr,
657 char *page)
658{
659 struct o2nm_cluster *cluster = to_o2nm_cluster(item);
660 struct o2nm_cluster_attribute *o2nm_cluster_attr =
661 container_of(attr, struct o2nm_cluster_attribute, attr);
662 ssize_t ret = 0;
663
664 if (o2nm_cluster_attr->show)
665 ret = o2nm_cluster_attr->show(cluster, page);
666 return ret;
667}
668
669static ssize_t o2nm_cluster_store(struct config_item *item,
670 struct configfs_attribute *attr,
671 const char *page, size_t count)
672{
673 struct o2nm_cluster *cluster = to_o2nm_cluster(item);
674 struct o2nm_cluster_attribute *o2nm_cluster_attr =
675 container_of(attr, struct o2nm_cluster_attribute, attr);
676 ssize_t ret;
677
678 if (o2nm_cluster_attr->store == NULL) {
679 ret = -EINVAL;
680 goto out;
681 }
682
683 ret = o2nm_cluster_attr->store(cluster, page, count);
684 if (ret < count)
685 goto out;
686out:
687 return ret;
688}
689
535static struct config_item *o2nm_node_group_make_item(struct config_group *group, 690static struct config_item *o2nm_node_group_make_item(struct config_group *group,
536 const char *name) 691 const char *name)
537{ 692{
@@ -613,10 +768,13 @@ static void o2nm_cluster_release(struct config_item *item)
613 768
614static struct configfs_item_operations o2nm_cluster_item_ops = { 769static struct configfs_item_operations o2nm_cluster_item_ops = {
615 .release = o2nm_cluster_release, 770 .release = o2nm_cluster_release,
771 .show_attribute = o2nm_cluster_show,
772 .store_attribute = o2nm_cluster_store,
616}; 773};
617 774
618static struct config_item_type o2nm_cluster_type = { 775static struct config_item_type o2nm_cluster_type = {
619 .ct_item_ops = &o2nm_cluster_item_ops, 776 .ct_item_ops = &o2nm_cluster_item_ops,
777 .ct_attrs = o2nm_cluster_attrs,
620 .ct_owner = THIS_MODULE, 778 .ct_owner = THIS_MODULE,
621}; 779};
622 780
@@ -667,6 +825,9 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
667 cluster->cl_group.default_groups[2] = NULL; 825 cluster->cl_group.default_groups[2] = NULL;
668 rwlock_init(&cluster->cl_nodes_lock); 826 rwlock_init(&cluster->cl_nodes_lock);
669 cluster->cl_node_ip_tree = RB_ROOT; 827 cluster->cl_node_ip_tree = RB_ROOT;
828 cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT;
829 cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT;
830 cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT;
670 831
671 ret = &cluster->cl_group; 832 ret = &cluster->cl_group;
672 o2nm_single_cluster = cluster; 833 o2nm_single_cluster = cluster;
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h
index b571cda9fbb7..8fb23cacc2f5 100644
--- a/fs/ocfs2/cluster/nodemanager.h
+++ b/fs/ocfs2/cluster/nodemanager.h
@@ -60,6 +60,9 @@ struct o2nm_cluster {
60 rwlock_t cl_nodes_lock; 60 rwlock_t cl_nodes_lock;
61 struct o2nm_node *cl_nodes[O2NM_MAX_NODES]; 61 struct o2nm_node *cl_nodes[O2NM_MAX_NODES];
62 struct rb_root cl_node_ip_tree; 62 struct rb_root cl_node_ip_tree;
63 unsigned int cl_idle_timeout_ms;
64 unsigned int cl_keepalive_delay_ms;
65 unsigned int cl_reconnect_delay_ms;
63 66
64 /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ 67 /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
65 unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; 68 unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 9b3209dc0b16..ebbaee664c66 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -147,6 +147,28 @@ static void o2net_listen_data_ready(struct sock *sk, int bytes);
147static void o2net_sc_send_keep_req(struct work_struct *work); 147static void o2net_sc_send_keep_req(struct work_struct *work);
148static void o2net_idle_timer(unsigned long data); 148static void o2net_idle_timer(unsigned long data);
149static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); 149static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
150static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc);
151
152/*
153 * FIXME: These should use to_o2nm_cluster_from_node(), but we end up
154 * losing our parent link to the cluster during shutdown. This can be
155 * solved by adding a pre-removal callback to configfs, or passing
156 * around the cluster with the node. -jeffm
157 */
158static inline int o2net_reconnect_delay(struct o2nm_node *node)
159{
160 return o2nm_single_cluster->cl_reconnect_delay_ms;
161}
162
163static inline int o2net_keepalive_delay(struct o2nm_node *node)
164{
165 return o2nm_single_cluster->cl_keepalive_delay_ms;
166}
167
168static inline int o2net_idle_timeout(struct o2nm_node *node)
169{
170 return o2nm_single_cluster->cl_idle_timeout_ms;
171}
150 172
151static inline int o2net_sys_err_to_errno(enum o2net_system_error err) 173static inline int o2net_sys_err_to_errno(enum o2net_system_error err)
152{ 174{
@@ -271,6 +293,8 @@ static void sc_kref_release(struct kref *kref)
271{ 293{
272 struct o2net_sock_container *sc = container_of(kref, 294 struct o2net_sock_container *sc = container_of(kref,
273 struct o2net_sock_container, sc_kref); 295 struct o2net_sock_container, sc_kref);
296 BUG_ON(timer_pending(&sc->sc_idle_timeout));
297
274 sclog(sc, "releasing\n"); 298 sclog(sc, "releasing\n");
275 299
276 if (sc->sc_sock) { 300 if (sc->sc_sock) {
@@ -424,9 +448,9 @@ static void o2net_set_nn_state(struct o2net_node *nn,
424 /* delay if we're withing a RECONNECT_DELAY of the 448 /* delay if we're withing a RECONNECT_DELAY of the
425 * last attempt */ 449 * last attempt */
426 delay = (nn->nn_last_connect_attempt + 450 delay = (nn->nn_last_connect_attempt +
427 msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS)) 451 msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node)))
428 - jiffies; 452 - jiffies;
429 if (delay > msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS)) 453 if (delay > msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node)))
430 delay = 0; 454 delay = 0;
431 mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); 455 mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay);
432 queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); 456 queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay);
@@ -1105,7 +1129,7 @@ static int o2net_check_handshake(struct o2net_sock_container *sc)
1105 /* set valid and queue the idle timers only if it hasn't been 1129 /* set valid and queue the idle timers only if it hasn't been
1106 * shut down already */ 1130 * shut down already */
1107 if (nn->nn_sc == sc) { 1131 if (nn->nn_sc == sc) {
1108 o2net_sc_postpone_idle(sc); 1132 o2net_sc_reset_idle_timer(sc);
1109 o2net_set_nn_state(nn, sc, 1, 0); 1133 o2net_set_nn_state(nn, sc, 1, 0);
1110 } 1134 }
1111 spin_unlock(&nn->nn_lock); 1135 spin_unlock(&nn->nn_lock);
@@ -1287,8 +1311,10 @@ static void o2net_idle_timer(unsigned long data)
1287 1311
1288 do_gettimeofday(&now); 1312 do_gettimeofday(&now);
1289 1313
1290 printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for 10 " 1314 printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
1291 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc)); 1315 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
1316 o2net_idle_timeout(sc->sc_node) / 1000,
1317 o2net_idle_timeout(sc->sc_node) % 1000);
1292 mlog(ML_NOTICE, "here are some times that might help debug the " 1318 mlog(ML_NOTICE, "here are some times that might help debug the "
1293 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " 1319 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
1294 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", 1320 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n",
@@ -1306,14 +1332,21 @@ static void o2net_idle_timer(unsigned long data)
1306 o2net_sc_queue_work(sc, &sc->sc_shutdown_work); 1332 o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
1307} 1333}
1308 1334
1309static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) 1335static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
1310{ 1336{
1311 o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); 1337 o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work);
1312 o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, 1338 o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work,
1313 O2NET_KEEPALIVE_DELAY_SECS * HZ); 1339 msecs_to_jiffies(o2net_keepalive_delay(sc->sc_node)));
1314 do_gettimeofday(&sc->sc_tv_timer); 1340 do_gettimeofday(&sc->sc_tv_timer);
1315 mod_timer(&sc->sc_idle_timeout, 1341 mod_timer(&sc->sc_idle_timeout,
1316 jiffies + (O2NET_IDLE_TIMEOUT_SECS * HZ)); 1342 jiffies + msecs_to_jiffies(o2net_idle_timeout(sc->sc_node)));
1343}
1344
1345static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
1346{
1347 /* Only push out an existing timer */
1348 if (timer_pending(&sc->sc_idle_timeout))
1349 o2net_sc_reset_idle_timer(sc);
1317} 1350}
1318 1351
1319/* this work func is kicked whenever a path sets the nn state which doesn't 1352/* this work func is kicked whenever a path sets the nn state which doesn't
@@ -1435,9 +1468,12 @@ static void o2net_connect_expired(struct work_struct *work)
1435 1468
1436 spin_lock(&nn->nn_lock); 1469 spin_lock(&nn->nn_lock);
1437 if (!nn->nn_sc_valid) { 1470 if (!nn->nn_sc_valid) {
1471 struct o2nm_node *node = nn->nn_sc->sc_node;
1438 mlog(ML_ERROR, "no connection established with node %u after " 1472 mlog(ML_ERROR, "no connection established with node %u after "
1439 "%u seconds, giving up and returning errors.\n", 1473 "%u.%u seconds, giving up and returning errors.\n",
1440 o2net_num_from_nn(nn), O2NET_IDLE_TIMEOUT_SECS); 1474 o2net_num_from_nn(nn),
1475 o2net_idle_timeout(node) / 1000,
1476 o2net_idle_timeout(node) % 1000);
1441 1477
1442 o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); 1478 o2net_set_nn_state(nn, NULL, 0, -ENOTCONN);
1443 } 1479 }
@@ -1489,14 +1525,14 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
1489 1525
1490 /* ensure an immediate connect attempt */ 1526 /* ensure an immediate connect attempt */
1491 nn->nn_last_connect_attempt = jiffies - 1527 nn->nn_last_connect_attempt = jiffies -
1492 (msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS) + 1); 1528 (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1);
1493 1529
1494 if (node_num != o2nm_this_node()) { 1530 if (node_num != o2nm_this_node()) {
1495 /* heartbeat doesn't work unless a local node number is 1531 /* heartbeat doesn't work unless a local node number is
1496 * configured and doing so brings up the o2net_wq, so we can 1532 * configured and doing so brings up the o2net_wq, so we can
1497 * use it.. */ 1533 * use it.. */
1498 queue_delayed_work(o2net_wq, &nn->nn_connect_expired, 1534 queue_delayed_work(o2net_wq, &nn->nn_connect_expired,
1499 O2NET_IDLE_TIMEOUT_SECS * HZ); 1535 msecs_to_jiffies(o2net_idle_timeout(node)));
1500 1536
1501 /* believe it or not, accept and node hearbeating testing 1537 /* believe it or not, accept and node hearbeating testing
1502 * can succeed for this node before we got here.. so 1538 * can succeed for this node before we got here.. so
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h
index 616ff2b8434a..2e08976050fb 100644
--- a/fs/ocfs2/cluster/tcp.h
+++ b/fs/ocfs2/cluster/tcp.h
@@ -54,6 +54,13 @@ typedef int (o2net_msg_handler_func)(struct o2net_msg *msg, u32 len, void *data)
54 54
55#define O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(struct o2net_msg)) 55#define O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(struct o2net_msg))
56 56
57/* same as hb delay, we're waiting for another node to recognize our hb */
58#define O2NET_RECONNECT_DELAY_MS_DEFAULT 2000
59
60#define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 5000
61#define O2NET_IDLE_TIMEOUT_MS_DEFAULT 10000
62
63
57/* TODO: figure this out.... */ 64/* TODO: figure this out.... */
58static inline int o2net_link_down(int err, struct socket *sock) 65static inline int o2net_link_down(int err, struct socket *sock)
59{ 66{
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index daebbd3a2c8c..56f7ee1d2547 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -27,17 +27,11 @@
27#define O2NET_MSG_KEEP_REQ_MAGIC ((u16)0xfa57) 27#define O2NET_MSG_KEEP_REQ_MAGIC ((u16)0xfa57)
28#define O2NET_MSG_KEEP_RESP_MAGIC ((u16)0xfa58) 28#define O2NET_MSG_KEEP_RESP_MAGIC ((u16)0xfa58)
29 29
30/* same as hb delay, we're waiting for another node to recognize our hb */
31#define O2NET_RECONNECT_DELAY_MS O2HB_REGION_TIMEOUT_MS
32
33/* we're delaying our quorum decision so that heartbeat will have timed 30/* we're delaying our quorum decision so that heartbeat will have timed
34 * out truly dead nodes by the time we come around to making decisions 31 * out truly dead nodes by the time we come around to making decisions
35 * on their number */ 32 * on their number */
36#define O2NET_QUORUM_DELAY_MS ((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS) 33#define O2NET_QUORUM_DELAY_MS ((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS)
37 34
38#define O2NET_KEEPALIVE_DELAY_SECS 5
39#define O2NET_IDLE_TIMEOUT_SECS 10
40
41/* 35/*
42 * This version number represents quite a lot, unfortunately. It not 36 * This version number represents quite a lot, unfortunately. It not
43 * only represents the raw network message protocol on the wire but also 37 * only represents the raw network message protocol on the wire but also