diff options
author | Sunil Mushran <sunil.mushran@oracle.com> | 2009-11-17 19:29:19 -0500 |
---|---|---|
committer | Joel Becker <joel.becker@oracle.com> | 2009-12-02 19:49:26 -0500 |
commit | f6656d26d17b2598f43cd41be088853fa2a03397 (patch) | |
tree | 5fdf95fdfab4302e28d9f00cf89ea4785ea9325c | |
parent | 57b09bb5e492c37c1e4273fe4e435ffd1d2ddbe0 (diff) |
ocfs2/cluster: Make fence method configurable - v2
By default, o2cb fences the box by calling emergency_restart(). While this
scheme works well in production, it comes in the way during testing as it
does not let the tester take stack/core dumps for analysis.
This patch allows user to dynamically change the fence method to panic() by:
# echo "panic" > /sys/kernel/config/cluster/<clustername>/fence_method
Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
-rw-r--r-- | fs/ocfs2/cluster/nodemanager.c | 51 | ||||
-rw-r--r-- | fs/ocfs2/cluster/nodemanager.h | 7 | ||||
-rw-r--r-- | fs/ocfs2/cluster/quorum.c | 16 |
3 files changed, 72 insertions, 2 deletions
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index 7ee6188bc79a..c81142e3ef84 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c | |||
@@ -35,6 +35,10 @@ | |||
35 | * cluster references throughout where nodes are looked up */ | 35 | * cluster references throughout where nodes are looked up */ |
36 | struct o2nm_cluster *o2nm_single_cluster = NULL; | 36 | struct o2nm_cluster *o2nm_single_cluster = NULL; |
37 | 37 | ||
38 | char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = { | ||
39 | "reset", /* O2NM_FENCE_RESET */ | ||
40 | "panic", /* O2NM_FENCE_PANIC */ | ||
41 | }; | ||
38 | 42 | ||
39 | struct o2nm_node *o2nm_get_node_by_num(u8 node_num) | 43 | struct o2nm_node *o2nm_get_node_by_num(u8 node_num) |
40 | { | 44 | { |
@@ -579,6 +583,43 @@ static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write( | |||
579 | return o2nm_cluster_attr_write(page, count, | 583 | return o2nm_cluster_attr_write(page, count, |
580 | &cluster->cl_reconnect_delay_ms); | 584 | &cluster->cl_reconnect_delay_ms); |
581 | } | 585 | } |
586 | |||
587 | static ssize_t o2nm_cluster_attr_fence_method_read( | ||
588 | struct o2nm_cluster *cluster, char *page) | ||
589 | { | ||
590 | ssize_t ret = 0; | ||
591 | |||
592 | if (cluster) | ||
593 | ret = sprintf(page, "%s\n", | ||
594 | o2nm_fence_method_desc[cluster->cl_fence_method]); | ||
595 | return ret; | ||
596 | } | ||
597 | |||
598 | static ssize_t o2nm_cluster_attr_fence_method_write( | ||
599 | struct o2nm_cluster *cluster, const char *page, size_t count) | ||
600 | { | ||
601 | unsigned int i; | ||
602 | |||
603 | if (page[count - 1] != '\n') | ||
604 | goto bail; | ||
605 | |||
606 | for (i = 0; i < O2NM_FENCE_METHODS; ++i) { | ||
607 | if (count != strlen(o2nm_fence_method_desc[i]) + 1) | ||
608 | continue; | ||
609 | if (strncasecmp(page, o2nm_fence_method_desc[i], count - 1)) | ||
610 | continue; | ||
611 | if (cluster->cl_fence_method != i) { | ||
612 | printk(KERN_INFO "ocfs2: Changing fence method to %s\n", | ||
613 | o2nm_fence_method_desc[i]); | ||
614 | cluster->cl_fence_method = i; | ||
615 | } | ||
616 | return count; | ||
617 | } | ||
618 | |||
619 | bail: | ||
620 | return -EINVAL; | ||
621 | } | ||
622 | |||
582 | static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = { | 623 | static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = { |
583 | .attr = { .ca_owner = THIS_MODULE, | 624 | .attr = { .ca_owner = THIS_MODULE, |
584 | .ca_name = "idle_timeout_ms", | 625 | .ca_name = "idle_timeout_ms", |
@@ -603,10 +644,19 @@ static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = { | |||
603 | .store = o2nm_cluster_attr_reconnect_delay_ms_write, | 644 | .store = o2nm_cluster_attr_reconnect_delay_ms_write, |
604 | }; | 645 | }; |
605 | 646 | ||
647 | static struct o2nm_cluster_attribute o2nm_cluster_attr_fence_method = { | ||
648 | .attr = { .ca_owner = THIS_MODULE, | ||
649 | .ca_name = "fence_method", | ||
650 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
651 | .show = o2nm_cluster_attr_fence_method_read, | ||
652 | .store = o2nm_cluster_attr_fence_method_write, | ||
653 | }; | ||
654 | |||
606 | static struct configfs_attribute *o2nm_cluster_attrs[] = { | 655 | static struct configfs_attribute *o2nm_cluster_attrs[] = { |
607 | &o2nm_cluster_attr_idle_timeout_ms.attr, | 656 | &o2nm_cluster_attr_idle_timeout_ms.attr, |
608 | &o2nm_cluster_attr_keepalive_delay_ms.attr, | 657 | &o2nm_cluster_attr_keepalive_delay_ms.attr, |
609 | &o2nm_cluster_attr_reconnect_delay_ms.attr, | 658 | &o2nm_cluster_attr_reconnect_delay_ms.attr, |
659 | &o2nm_cluster_attr_fence_method.attr, | ||
610 | NULL, | 660 | NULL, |
611 | }; | 661 | }; |
612 | static ssize_t o2nm_cluster_show(struct config_item *item, | 662 | static ssize_t o2nm_cluster_show(struct config_item *item, |
@@ -778,6 +828,7 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g | |||
778 | cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT; | 828 | cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT; |
779 | cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; | 829 | cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; |
780 | cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; | 830 | cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; |
831 | cluster->cl_fence_method = O2NM_FENCE_RESET; | ||
781 | 832 | ||
782 | ret = &cluster->cl_group; | 833 | ret = &cluster->cl_group; |
783 | o2nm_single_cluster = cluster; | 834 | o2nm_single_cluster = cluster; |
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h index c992ea0da4ad..09ea2d388bbb 100644 --- a/fs/ocfs2/cluster/nodemanager.h +++ b/fs/ocfs2/cluster/nodemanager.h | |||
@@ -33,6 +33,12 @@ | |||
33 | #include <linux/configfs.h> | 33 | #include <linux/configfs.h> |
34 | #include <linux/rbtree.h> | 34 | #include <linux/rbtree.h> |
35 | 35 | ||
36 | enum o2nm_fence_method { | ||
37 | O2NM_FENCE_RESET = 0, | ||
38 | O2NM_FENCE_PANIC, | ||
39 | O2NM_FENCE_METHODS, /* Number of fence methods */ | ||
40 | }; | ||
41 | |||
36 | struct o2nm_node { | 42 | struct o2nm_node { |
37 | spinlock_t nd_lock; | 43 | spinlock_t nd_lock; |
38 | struct config_item nd_item; | 44 | struct config_item nd_item; |
@@ -58,6 +64,7 @@ struct o2nm_cluster { | |||
58 | unsigned int cl_idle_timeout_ms; | 64 | unsigned int cl_idle_timeout_ms; |
59 | unsigned int cl_keepalive_delay_ms; | 65 | unsigned int cl_keepalive_delay_ms; |
60 | unsigned int cl_reconnect_delay_ms; | 66 | unsigned int cl_reconnect_delay_ms; |
67 | enum o2nm_fence_method cl_fence_method; | ||
61 | 68 | ||
62 | /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ | 69 | /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ |
63 | unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 70 | unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c index bbacf7da48a4..639024033fce 100644 --- a/fs/ocfs2/cluster/quorum.c +++ b/fs/ocfs2/cluster/quorum.c | |||
@@ -74,8 +74,20 @@ static void o2quo_fence_self(void) | |||
74 | * threads can still schedule, etc, etc */ | 74 | * threads can still schedule, etc, etc */ |
75 | o2hb_stop_all_regions(); | 75 | o2hb_stop_all_regions(); |
76 | 76 | ||
77 | printk("ocfs2 is very sorry to be fencing this system by restarting\n"); | 77 | switch (o2nm_single_cluster->cl_fence_method) { |
78 | emergency_restart(); | 78 | case O2NM_FENCE_PANIC: |
79 | panic("*** ocfs2 is very sorry to be fencing this system by " | ||
80 | "panicing ***\n"); | ||
81 | break; | ||
82 | default: | ||
83 | WARN_ON(o2nm_single_cluster->cl_fence_method >= | ||
84 | O2NM_FENCE_METHODS); | ||
85 | case O2NM_FENCE_RESET: | ||
86 | printk(KERN_ERR "*** ocfs2 is very sorry to be fencing this " | ||
87 | "system by restarting ***\n"); | ||
88 | emergency_restart(); | ||
89 | break; | ||
90 | }; | ||
79 | } | 91 | } |
80 | 92 | ||
81 | /* Indicate that a timeout occured on a hearbeat region write. The | 93 | /* Indicate that a timeout occured on a hearbeat region write. The |