diff options
| author | Sunil Mushran <sunil.mushran@oracle.com> | 2009-11-17 19:29:19 -0500 |
|---|---|---|
| committer | Joel Becker <joel.becker@oracle.com> | 2009-12-02 19:49:26 -0500 |
| commit | f6656d26d17b2598f43cd41be088853fa2a03397 (patch) | |
| tree | 5fdf95fdfab4302e28d9f00cf89ea4785ea9325c | |
| parent | 57b09bb5e492c37c1e4273fe4e435ffd1d2ddbe0 (diff) | |
ocfs2/cluster: Make fence method configurable - v2
By default, o2cb fences the box by calling emergency_restart(). While this
scheme works well in production, it comes in the way during testing as it
does not let the tester take stack/core dumps for analysis.
This patch allows user to dynamically change the fence method to panic() by:
# echo "panic" > /sys/kernel/config/cluster/<clustername>/fence_method
Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
| -rw-r--r-- | fs/ocfs2/cluster/nodemanager.c | 51 | ||||
| -rw-r--r-- | fs/ocfs2/cluster/nodemanager.h | 7 | ||||
| -rw-r--r-- | fs/ocfs2/cluster/quorum.c | 16 |
3 files changed, 72 insertions, 2 deletions
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index 7ee6188bc79a..c81142e3ef84 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c | |||
| @@ -35,6 +35,10 @@ | |||
| 35 | * cluster references throughout where nodes are looked up */ | 35 | * cluster references throughout where nodes are looked up */ |
| 36 | struct o2nm_cluster *o2nm_single_cluster = NULL; | 36 | struct o2nm_cluster *o2nm_single_cluster = NULL; |
| 37 | 37 | ||
| 38 | char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = { | ||
| 39 | "reset", /* O2NM_FENCE_RESET */ | ||
| 40 | "panic", /* O2NM_FENCE_PANIC */ | ||
| 41 | }; | ||
| 38 | 42 | ||
| 39 | struct o2nm_node *o2nm_get_node_by_num(u8 node_num) | 43 | struct o2nm_node *o2nm_get_node_by_num(u8 node_num) |
| 40 | { | 44 | { |
| @@ -579,6 +583,43 @@ static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write( | |||
| 579 | return o2nm_cluster_attr_write(page, count, | 583 | return o2nm_cluster_attr_write(page, count, |
| 580 | &cluster->cl_reconnect_delay_ms); | 584 | &cluster->cl_reconnect_delay_ms); |
| 581 | } | 585 | } |
| 586 | |||
| 587 | static ssize_t o2nm_cluster_attr_fence_method_read( | ||
| 588 | struct o2nm_cluster *cluster, char *page) | ||
| 589 | { | ||
| 590 | ssize_t ret = 0; | ||
| 591 | |||
| 592 | if (cluster) | ||
| 593 | ret = sprintf(page, "%s\n", | ||
| 594 | o2nm_fence_method_desc[cluster->cl_fence_method]); | ||
| 595 | return ret; | ||
| 596 | } | ||
| 597 | |||
| 598 | static ssize_t o2nm_cluster_attr_fence_method_write( | ||
| 599 | struct o2nm_cluster *cluster, const char *page, size_t count) | ||
| 600 | { | ||
| 601 | unsigned int i; | ||
| 602 | |||
| 603 | if (page[count - 1] != '\n') | ||
| 604 | goto bail; | ||
| 605 | |||
| 606 | for (i = 0; i < O2NM_FENCE_METHODS; ++i) { | ||
| 607 | if (count != strlen(o2nm_fence_method_desc[i]) + 1) | ||
| 608 | continue; | ||
| 609 | if (strncasecmp(page, o2nm_fence_method_desc[i], count - 1)) | ||
| 610 | continue; | ||
| 611 | if (cluster->cl_fence_method != i) { | ||
| 612 | printk(KERN_INFO "ocfs2: Changing fence method to %s\n", | ||
| 613 | o2nm_fence_method_desc[i]); | ||
| 614 | cluster->cl_fence_method = i; | ||
| 615 | } | ||
| 616 | return count; | ||
| 617 | } | ||
| 618 | |||
| 619 | bail: | ||
| 620 | return -EINVAL; | ||
| 621 | } | ||
| 622 | |||
| 582 | static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = { | 623 | static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = { |
| 583 | .attr = { .ca_owner = THIS_MODULE, | 624 | .attr = { .ca_owner = THIS_MODULE, |
| 584 | .ca_name = "idle_timeout_ms", | 625 | .ca_name = "idle_timeout_ms", |
| @@ -603,10 +644,19 @@ static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = { | |||
| 603 | .store = o2nm_cluster_attr_reconnect_delay_ms_write, | 644 | .store = o2nm_cluster_attr_reconnect_delay_ms_write, |
| 604 | }; | 645 | }; |
| 605 | 646 | ||
| 647 | static struct o2nm_cluster_attribute o2nm_cluster_attr_fence_method = { | ||
| 648 | .attr = { .ca_owner = THIS_MODULE, | ||
| 649 | .ca_name = "fence_method", | ||
| 650 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
| 651 | .show = o2nm_cluster_attr_fence_method_read, | ||
| 652 | .store = o2nm_cluster_attr_fence_method_write, | ||
| 653 | }; | ||
| 654 | |||
| 606 | static struct configfs_attribute *o2nm_cluster_attrs[] = { | 655 | static struct configfs_attribute *o2nm_cluster_attrs[] = { |
| 607 | &o2nm_cluster_attr_idle_timeout_ms.attr, | 656 | &o2nm_cluster_attr_idle_timeout_ms.attr, |
| 608 | &o2nm_cluster_attr_keepalive_delay_ms.attr, | 657 | &o2nm_cluster_attr_keepalive_delay_ms.attr, |
| 609 | &o2nm_cluster_attr_reconnect_delay_ms.attr, | 658 | &o2nm_cluster_attr_reconnect_delay_ms.attr, |
| 659 | &o2nm_cluster_attr_fence_method.attr, | ||
| 610 | NULL, | 660 | NULL, |
| 611 | }; | 661 | }; |
| 612 | static ssize_t o2nm_cluster_show(struct config_item *item, | 662 | static ssize_t o2nm_cluster_show(struct config_item *item, |
| @@ -778,6 +828,7 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g | |||
| 778 | cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT; | 828 | cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT; |
| 779 | cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; | 829 | cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; |
| 780 | cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; | 830 | cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; |
| 831 | cluster->cl_fence_method = O2NM_FENCE_RESET; | ||
| 781 | 832 | ||
| 782 | ret = &cluster->cl_group; | 833 | ret = &cluster->cl_group; |
| 783 | o2nm_single_cluster = cluster; | 834 | o2nm_single_cluster = cluster; |
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h index c992ea0da4ad..09ea2d388bbb 100644 --- a/fs/ocfs2/cluster/nodemanager.h +++ b/fs/ocfs2/cluster/nodemanager.h | |||
| @@ -33,6 +33,12 @@ | |||
| 33 | #include <linux/configfs.h> | 33 | #include <linux/configfs.h> |
| 34 | #include <linux/rbtree.h> | 34 | #include <linux/rbtree.h> |
| 35 | 35 | ||
| 36 | enum o2nm_fence_method { | ||
| 37 | O2NM_FENCE_RESET = 0, | ||
| 38 | O2NM_FENCE_PANIC, | ||
| 39 | O2NM_FENCE_METHODS, /* Number of fence methods */ | ||
| 40 | }; | ||
| 41 | |||
| 36 | struct o2nm_node { | 42 | struct o2nm_node { |
| 37 | spinlock_t nd_lock; | 43 | spinlock_t nd_lock; |
| 38 | struct config_item nd_item; | 44 | struct config_item nd_item; |
| @@ -58,6 +64,7 @@ struct o2nm_cluster { | |||
| 58 | unsigned int cl_idle_timeout_ms; | 64 | unsigned int cl_idle_timeout_ms; |
| 59 | unsigned int cl_keepalive_delay_ms; | 65 | unsigned int cl_keepalive_delay_ms; |
| 60 | unsigned int cl_reconnect_delay_ms; | 66 | unsigned int cl_reconnect_delay_ms; |
| 67 | enum o2nm_fence_method cl_fence_method; | ||
| 61 | 68 | ||
| 62 | /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ | 69 | /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ |
| 63 | unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 70 | unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c index bbacf7da48a4..639024033fce 100644 --- a/fs/ocfs2/cluster/quorum.c +++ b/fs/ocfs2/cluster/quorum.c | |||
| @@ -74,8 +74,20 @@ static void o2quo_fence_self(void) | |||
| 74 | * threads can still schedule, etc, etc */ | 74 | * threads can still schedule, etc, etc */ |
| 75 | o2hb_stop_all_regions(); | 75 | o2hb_stop_all_regions(); |
| 76 | 76 | ||
| 77 | printk("ocfs2 is very sorry to be fencing this system by restarting\n"); | 77 | switch (o2nm_single_cluster->cl_fence_method) { |
| 78 | emergency_restart(); | 78 | case O2NM_FENCE_PANIC: |
| 79 | panic("*** ocfs2 is very sorry to be fencing this system by " | ||
| 80 | "panicing ***\n"); | ||
| 81 | break; | ||
| 82 | default: | ||
| 83 | WARN_ON(o2nm_single_cluster->cl_fence_method >= | ||
| 84 | O2NM_FENCE_METHODS); | ||
| 85 | case O2NM_FENCE_RESET: | ||
| 86 | printk(KERN_ERR "*** ocfs2 is very sorry to be fencing this " | ||
| 87 | "system by restarting ***\n"); | ||
| 88 | emergency_restart(); | ||
| 89 | break; | ||
| 90 | }; | ||
| 79 | } | 91 | } |
| 80 | 92 | ||
| 81 | /* Indicate that a timeout occured on a hearbeat region write. The | 93 | /* Indicate that a timeout occured on a hearbeat region write. The |
