diff options
author | Sunil Mushran <sunil.mushran@oracle.com> | 2011-05-04 13:28:01 -0400 |
---|---|---|
committer | Joel Becker <jlbec@evilplan.org> | 2011-05-13 14:27:02 -0400 |
commit | 33c12a5436464f8d4f56d68e5e79e24a3a1f11aa (patch) | |
tree | 8fe03ac8fa06b7681a5256661722bc311eaa8b87 | |
parent | 76d9fc2954d057b19bf5d7b854df2b621b00fdec (diff) |
ocfs2/cluster: Heartbeat mismatch message improved
If o2hb finds unexpected values in the heartbeat slot, it prints a message
"ERROR: Device "dm-6": another node is heartbeating in our slot!"
This message could be misleading. This patch adds two more messages to
help users better diagnose the problem.
Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Acked-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Joel Becker <jlbec@evilplan.org>
-rw-r--r-- | fs/ocfs2/cluster/heartbeat.c | 48 |
1 files changed, 31 insertions, 17 deletions
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 1d28505caff..9a3e6bbff27 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -539,25 +539,41 @@ static int o2hb_verify_crc(struct o2hb_region *reg, | |||
539 | 539 | ||
540 | /* We want to make sure that nobody is heartbeating on top of us -- | 540 | /* We want to make sure that nobody is heartbeating on top of us -- |
541 | * this will help detect an invalid configuration. */ | 541 | * this will help detect an invalid configuration. */ |
542 | static int o2hb_check_last_timestamp(struct o2hb_region *reg) | 542 | static void o2hb_check_last_timestamp(struct o2hb_region *reg) |
543 | { | 543 | { |
544 | int node_num, ret; | ||
545 | struct o2hb_disk_slot *slot; | 544 | struct o2hb_disk_slot *slot; |
546 | struct o2hb_disk_heartbeat_block *hb_block; | 545 | struct o2hb_disk_heartbeat_block *hb_block; |
546 | char *errstr; | ||
547 | 547 | ||
548 | node_num = o2nm_this_node(); | 548 | slot = ®->hr_slots[o2nm_this_node()]; |
549 | |||
550 | ret = 1; | ||
551 | slot = ®->hr_slots[node_num]; | ||
552 | /* Don't check on our 1st timestamp */ | 549 | /* Don't check on our 1st timestamp */ |
553 | if (slot->ds_last_time) { | 550 | if (!slot->ds_last_time) |
554 | hb_block = slot->ds_raw_block; | 551 | return; |
555 | 552 | ||
556 | if (le64_to_cpu(hb_block->hb_seq) != slot->ds_last_time) | 553 | hb_block = slot->ds_raw_block; |
557 | ret = 0; | 554 | if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time && |
558 | } | 555 | le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation && |
556 | hb_block->hb_node == slot->ds_node_num) | ||
557 | return; | ||
559 | 558 | ||
560 | return ret; | 559 | #define ERRSTR1 "Another node is heartbeating on device" |
560 | #define ERRSTR2 "Heartbeat generation mismatch on device" | ||
561 | #define ERRSTR3 "Heartbeat sequence mismatch on device" | ||
562 | |||
563 | if (hb_block->hb_node != slot->ds_node_num) | ||
564 | errstr = ERRSTR1; | ||
565 | else if (le64_to_cpu(hb_block->hb_generation) != | ||
566 | slot->ds_last_generation) | ||
567 | errstr = ERRSTR2; | ||
568 | else | ||
569 | errstr = ERRSTR3; | ||
570 | |||
571 | mlog(ML_ERROR, "%s (%s): expected(%u:0x%llx, 0x%llx), " | ||
572 | "ondisk(%u:0x%llx, 0x%llx)\n", errstr, reg->hr_dev_name, | ||
573 | slot->ds_node_num, (unsigned long long)slot->ds_last_generation, | ||
574 | (unsigned long long)slot->ds_last_time, hb_block->hb_node, | ||
575 | (unsigned long long)le64_to_cpu(hb_block->hb_generation), | ||
576 | (unsigned long long)le64_to_cpu(hb_block->hb_seq)); | ||
561 | } | 577 | } |
562 | 578 | ||
563 | static inline void o2hb_prepare_block(struct o2hb_region *reg, | 579 | static inline void o2hb_prepare_block(struct o2hb_region *reg, |
@@ -983,9 +999,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
983 | /* With an up to date view of the slots, we can check that no | 999 | /* With an up to date view of the slots, we can check that no |
984 | * other node has been improperly configured to heartbeat in | 1000 | * other node has been improperly configured to heartbeat in |
985 | * our slot. */ | 1001 | * our slot. */ |
986 | if (!o2hb_check_last_timestamp(reg)) | 1002 | o2hb_check_last_timestamp(reg); |
987 | mlog(ML_ERROR, "Device \"%s\": another node is heartbeating " | ||
988 | "in our slot!\n", reg->hr_dev_name); | ||
989 | 1003 | ||
990 | /* fill in the proper info for our next heartbeat */ | 1004 | /* fill in the proper info for our next heartbeat */ |
991 | o2hb_prepare_block(reg, reg->hr_generation); | 1005 | o2hb_prepare_block(reg, reg->hr_generation); |
@@ -999,8 +1013,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
999 | } | 1013 | } |
1000 | 1014 | ||
1001 | i = -1; | 1015 | i = -1; |
1002 | while((i = find_next_bit(configured_nodes, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { | 1016 | while((i = find_next_bit(configured_nodes, |
1003 | 1017 | O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { | |
1004 | change |= o2hb_check_slot(reg, ®->hr_slots[i]); | 1018 | change |= o2hb_check_slot(reg, ®->hr_slots[i]); |
1005 | } | 1019 | } |
1006 | 1020 | ||