aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Fasheh <mark.fasheh@oracle.com>2006-05-09 18:09:35 -0400
committerMark Fasheh <mark.fasheh@oracle.com>2006-06-29 18:45:35 -0400
commit0db638f44e7db9732d9c5704ca837f57ce061f42 (patch)
tree0ac8c092bfa27a9d88431b3c404af19fa0b1bdd1
parent4ba63adce06bc7549e1dd36344123dbaccdaa52f (diff)
ocfs2: warn the user on a dead timeout mismatch
Print a warning to the user when a node with a different dead count joins the region. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
-rw-r--r--fs/ocfs2/cluster/heartbeat.c20
-rw-r--r--fs/ocfs2/cluster/ocfs2_heartbeat.h1
2 files changed, 21 insertions, 0 deletions
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 1d26cfcd9f84..504595d6cf65 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -517,6 +517,7 @@ static inline void o2hb_prepare_block(struct o2hb_region *reg,
517 hb_block->hb_seq = cpu_to_le64(cputime); 517 hb_block->hb_seq = cpu_to_le64(cputime);
518 hb_block->hb_node = node_num; 518 hb_block->hb_node = node_num;
519 hb_block->hb_generation = cpu_to_le64(generation); 519 hb_block->hb_generation = cpu_to_le64(generation);
520 hb_block->hb_dead_ms = cpu_to_le32(o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS);
520 521
521 /* This step must always happen last! */ 522 /* This step must always happen last! */
522 hb_block->hb_cksum = cpu_to_le32(o2hb_compute_block_crc_le(reg, 523 hb_block->hb_cksum = cpu_to_le32(o2hb_compute_block_crc_le(reg,
@@ -645,6 +646,8 @@ static int o2hb_check_slot(struct o2hb_region *reg,
645 struct o2nm_node *node; 646 struct o2nm_node *node;
646 struct o2hb_disk_heartbeat_block *hb_block = reg->hr_tmp_block; 647 struct o2hb_disk_heartbeat_block *hb_block = reg->hr_tmp_block;
647 u64 cputime; 648 u64 cputime;
649 unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS;
650 unsigned int slot_dead_ms;
648 651
649 memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); 652 memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes);
650 653
@@ -733,6 +736,23 @@ fire_callbacks:
733 &o2hb_live_slots[slot->ds_node_num]); 736 &o2hb_live_slots[slot->ds_node_num]);
734 737
735 slot->ds_equal_samples = 0; 738 slot->ds_equal_samples = 0;
739
740 /* We want to be sure that all nodes agree on the
741 * number of milliseconds before a node will be
742 * considered dead. The self-fencing timeout is
743 * computed from this value, and a discrepancy might
744 * result in heartbeat calling a node dead when it
745 * hasn't self-fenced yet. */
746 slot_dead_ms = le32_to_cpu(hb_block->hb_dead_ms);
747 if (slot_dead_ms && slot_dead_ms != dead_ms) {
748 /* TODO: Perhaps we can fail the region here. */
749 mlog(ML_ERROR, "Node %d on device %s has a dead count "
750 "of %u ms, but our count is %u ms.\n"
751 "Please double check your configuration values "
752 "for 'O2CB_HEARTBEAT_THRESHOLD'\n",
753 slot->ds_node_num, reg->hr_dev_name, slot_dead_ms,
754 dead_ms);
755 }
736 goto out; 756 goto out;
737 } 757 }
738 758
diff --git a/fs/ocfs2/cluster/ocfs2_heartbeat.h b/fs/ocfs2/cluster/ocfs2_heartbeat.h
index 94096069cb43..3f4151da9709 100644
--- a/fs/ocfs2/cluster/ocfs2_heartbeat.h
+++ b/fs/ocfs2/cluster/ocfs2_heartbeat.h
@@ -32,6 +32,7 @@ struct o2hb_disk_heartbeat_block {
32 __u8 hb_pad1[3]; 32 __u8 hb_pad1[3];
33 __le32 hb_cksum; 33 __le32 hb_cksum;
34 __le64 hb_generation; 34 __le64 hb_generation;
35 __le32 hb_dead_ms;
35}; 36};
36 37
37#endif /* _OCFS2_HEARTBEAT_H */ 38#endif /* _OCFS2_HEARTBEAT_H */