aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
authorSunil Mushran <sunil.mushran@oracle.com>2009-11-17 19:29:19 -0500
committerJoel Becker <joel.becker@oracle.com>2009-12-02 19:49:26 -0500
commitf6656d26d17b2598f43cd41be088853fa2a03397 (patch)
tree5fdf95fdfab4302e28d9f00cf89ea4785ea9325c /fs/ocfs2
parent57b09bb5e492c37c1e4273fe4e435ffd1d2ddbe0 (diff)
ocfs2/cluster: Make fence method configurable - v2
By default, o2cb fences the box by calling emergency_restart(). While this scheme works well in production, it comes in the way during testing as it does not let the tester take stack/core dumps for analysis. This patch allows user to dynamically change the fence method to panic() by: # echo "panic" > /sys/kernel/config/cluster/<clustername>/fence_method Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com> Signed-off-by: Joel Becker <joel.becker@oracle.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/cluster/nodemanager.c51
-rw-r--r--fs/ocfs2/cluster/nodemanager.h7
-rw-r--r--fs/ocfs2/cluster/quorum.c16
3 files changed, 72 insertions, 2 deletions
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 7ee6188bc79a..c81142e3ef84 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -35,6 +35,10 @@
35 * cluster references throughout where nodes are looked up */ 35 * cluster references throughout where nodes are looked up */
36struct o2nm_cluster *o2nm_single_cluster = NULL; 36struct o2nm_cluster *o2nm_single_cluster = NULL;
37 37
38char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = {
39 "reset", /* O2NM_FENCE_RESET */
40 "panic", /* O2NM_FENCE_PANIC */
41};
38 42
39struct o2nm_node *o2nm_get_node_by_num(u8 node_num) 43struct o2nm_node *o2nm_get_node_by_num(u8 node_num)
40{ 44{
@@ -579,6 +583,43 @@ static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write(
579 return o2nm_cluster_attr_write(page, count, 583 return o2nm_cluster_attr_write(page, count,
580 &cluster->cl_reconnect_delay_ms); 584 &cluster->cl_reconnect_delay_ms);
581} 585}
586
587static ssize_t o2nm_cluster_attr_fence_method_read(
588 struct o2nm_cluster *cluster, char *page)
589{
590 ssize_t ret = 0;
591
592 if (cluster)
593 ret = sprintf(page, "%s\n",
594 o2nm_fence_method_desc[cluster->cl_fence_method]);
595 return ret;
596}
597
598static ssize_t o2nm_cluster_attr_fence_method_write(
599 struct o2nm_cluster *cluster, const char *page, size_t count)
600{
601 unsigned int i;
602
603 if (page[count - 1] != '\n')
604 goto bail;
605
606 for (i = 0; i < O2NM_FENCE_METHODS; ++i) {
607 if (count != strlen(o2nm_fence_method_desc[i]) + 1)
608 continue;
609 if (strncasecmp(page, o2nm_fence_method_desc[i], count - 1))
610 continue;
611 if (cluster->cl_fence_method != i) {
612 printk(KERN_INFO "ocfs2: Changing fence method to %s\n",
613 o2nm_fence_method_desc[i]);
614 cluster->cl_fence_method = i;
615 }
616 return count;
617 }
618
619bail:
620 return -EINVAL;
621}
622
582static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = { 623static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = {
583 .attr = { .ca_owner = THIS_MODULE, 624 .attr = { .ca_owner = THIS_MODULE,
584 .ca_name = "idle_timeout_ms", 625 .ca_name = "idle_timeout_ms",
@@ -603,10 +644,19 @@ static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = {
603 .store = o2nm_cluster_attr_reconnect_delay_ms_write, 644 .store = o2nm_cluster_attr_reconnect_delay_ms_write,
604}; 645};
605 646
647static struct o2nm_cluster_attribute o2nm_cluster_attr_fence_method = {
648 .attr = { .ca_owner = THIS_MODULE,
649 .ca_name = "fence_method",
650 .ca_mode = S_IRUGO | S_IWUSR },
651 .show = o2nm_cluster_attr_fence_method_read,
652 .store = o2nm_cluster_attr_fence_method_write,
653};
654
606static struct configfs_attribute *o2nm_cluster_attrs[] = { 655static struct configfs_attribute *o2nm_cluster_attrs[] = {
607 &o2nm_cluster_attr_idle_timeout_ms.attr, 656 &o2nm_cluster_attr_idle_timeout_ms.attr,
608 &o2nm_cluster_attr_keepalive_delay_ms.attr, 657 &o2nm_cluster_attr_keepalive_delay_ms.attr,
609 &o2nm_cluster_attr_reconnect_delay_ms.attr, 658 &o2nm_cluster_attr_reconnect_delay_ms.attr,
659 &o2nm_cluster_attr_fence_method.attr,
610 NULL, 660 NULL,
611}; 661};
612static ssize_t o2nm_cluster_show(struct config_item *item, 662static ssize_t o2nm_cluster_show(struct config_item *item,
@@ -778,6 +828,7 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
778 cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT; 828 cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT;
779 cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; 829 cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT;
780 cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; 830 cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT;
831 cluster->cl_fence_method = O2NM_FENCE_RESET;
781 832
782 ret = &cluster->cl_group; 833 ret = &cluster->cl_group;
783 o2nm_single_cluster = cluster; 834 o2nm_single_cluster = cluster;
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h
index c992ea0da4ad..09ea2d388bbb 100644
--- a/fs/ocfs2/cluster/nodemanager.h
+++ b/fs/ocfs2/cluster/nodemanager.h
@@ -33,6 +33,12 @@
33#include <linux/configfs.h> 33#include <linux/configfs.h>
34#include <linux/rbtree.h> 34#include <linux/rbtree.h>
35 35
36enum o2nm_fence_method {
37 O2NM_FENCE_RESET = 0,
38 O2NM_FENCE_PANIC,
39 O2NM_FENCE_METHODS, /* Number of fence methods */
40};
41
36struct o2nm_node { 42struct o2nm_node {
37 spinlock_t nd_lock; 43 spinlock_t nd_lock;
38 struct config_item nd_item; 44 struct config_item nd_item;
@@ -58,6 +64,7 @@ struct o2nm_cluster {
58 unsigned int cl_idle_timeout_ms; 64 unsigned int cl_idle_timeout_ms;
59 unsigned int cl_keepalive_delay_ms; 65 unsigned int cl_keepalive_delay_ms;
60 unsigned int cl_reconnect_delay_ms; 66 unsigned int cl_reconnect_delay_ms;
67 enum o2nm_fence_method cl_fence_method;
61 68
62 /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ 69 /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
63 unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; 70 unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index bbacf7da48a4..639024033fce 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -74,8 +74,20 @@ static void o2quo_fence_self(void)
74 * threads can still schedule, etc, etc */ 74 * threads can still schedule, etc, etc */
75 o2hb_stop_all_regions(); 75 o2hb_stop_all_regions();
76 76
77 printk("ocfs2 is very sorry to be fencing this system by restarting\n"); 77 switch (o2nm_single_cluster->cl_fence_method) {
78 emergency_restart(); 78 case O2NM_FENCE_PANIC:
79 panic("*** ocfs2 is very sorry to be fencing this system by "
80 "panicing ***\n");
81 break;
82 default:
83 WARN_ON(o2nm_single_cluster->cl_fence_method >=
84 O2NM_FENCE_METHODS);
85 case O2NM_FENCE_RESET:
86 printk(KERN_ERR "*** ocfs2 is very sorry to be fencing this "
87 "system by restarting ***\n");
88 emergency_restart();
89 break;
90 };
79} 91}
80 92
81/* Indicate that a timeout occured on a hearbeat region write. The 93/* Indicate that a timeout occured on a hearbeat region write. The