aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/cluster/heartbeat.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/cluster/heartbeat.c')
-rw-r--r--fs/ocfs2/cluster/heartbeat.c61
1 files changed, 43 insertions, 18 deletions
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 643720209a98..9a3e6bbff27b 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -539,25 +539,41 @@ static int o2hb_verify_crc(struct o2hb_region *reg,
539 539
540/* We want to make sure that nobody is heartbeating on top of us -- 540/* We want to make sure that nobody is heartbeating on top of us --
541 * this will help detect an invalid configuration. */ 541 * this will help detect an invalid configuration. */
542static int o2hb_check_last_timestamp(struct o2hb_region *reg) 542static void o2hb_check_last_timestamp(struct o2hb_region *reg)
543{ 543{
544 int node_num, ret;
545 struct o2hb_disk_slot *slot; 544 struct o2hb_disk_slot *slot;
546 struct o2hb_disk_heartbeat_block *hb_block; 545 struct o2hb_disk_heartbeat_block *hb_block;
546 char *errstr;
547 547
548 node_num = o2nm_this_node(); 548 slot = &reg->hr_slots[o2nm_this_node()];
549
550 ret = 1;
551 slot = &reg->hr_slots[node_num];
552 /* Don't check on our 1st timestamp */ 549 /* Don't check on our 1st timestamp */
553 if (slot->ds_last_time) { 550 if (!slot->ds_last_time)
554 hb_block = slot->ds_raw_block; 551 return;
555 552
556 if (le64_to_cpu(hb_block->hb_seq) != slot->ds_last_time) 553 hb_block = slot->ds_raw_block;
557 ret = 0; 554 if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time &&
558 } 555 le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation &&
556 hb_block->hb_node == slot->ds_node_num)
557 return;
559 558
560 return ret; 559#define ERRSTR1 "Another node is heartbeating on device"
560#define ERRSTR2 "Heartbeat generation mismatch on device"
561#define ERRSTR3 "Heartbeat sequence mismatch on device"
562
563 if (hb_block->hb_node != slot->ds_node_num)
564 errstr = ERRSTR1;
565 else if (le64_to_cpu(hb_block->hb_generation) !=
566 slot->ds_last_generation)
567 errstr = ERRSTR2;
568 else
569 errstr = ERRSTR3;
570
571 mlog(ML_ERROR, "%s (%s): expected(%u:0x%llx, 0x%llx), "
572 "ondisk(%u:0x%llx, 0x%llx)\n", errstr, reg->hr_dev_name,
573 slot->ds_node_num, (unsigned long long)slot->ds_last_generation,
574 (unsigned long long)slot->ds_last_time, hb_block->hb_node,
575 (unsigned long long)le64_to_cpu(hb_block->hb_generation),
576 (unsigned long long)le64_to_cpu(hb_block->hb_seq));
561} 577}
562 578
563static inline void o2hb_prepare_block(struct o2hb_region *reg, 579static inline void o2hb_prepare_block(struct o2hb_region *reg,
@@ -983,9 +999,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
983 /* With an up to date view of the slots, we can check that no 999 /* With an up to date view of the slots, we can check that no
984 * other node has been improperly configured to heartbeat in 1000 * other node has been improperly configured to heartbeat in
985 * our slot. */ 1001 * our slot. */
986 if (!o2hb_check_last_timestamp(reg)) 1002 o2hb_check_last_timestamp(reg);
987 mlog(ML_ERROR, "Device \"%s\": another node is heartbeating "
988 "in our slot!\n", reg->hr_dev_name);
989 1003
990 /* fill in the proper info for our next heartbeat */ 1004 /* fill in the proper info for our next heartbeat */
991 o2hb_prepare_block(reg, reg->hr_generation); 1005 o2hb_prepare_block(reg, reg->hr_generation);
@@ -999,8 +1013,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
999 } 1013 }
1000 1014
1001 i = -1; 1015 i = -1;
1002 while((i = find_next_bit(configured_nodes, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { 1016 while((i = find_next_bit(configured_nodes,
1003 1017 O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
1004 change |= o2hb_check_slot(reg, &reg->hr_slots[i]); 1018 change |= o2hb_check_slot(reg, &reg->hr_slots[i]);
1005 } 1019 }
1006 1020
@@ -1690,6 +1704,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1690 struct file *filp = NULL; 1704 struct file *filp = NULL;
1691 struct inode *inode = NULL; 1705 struct inode *inode = NULL;
1692 ssize_t ret = -EINVAL; 1706 ssize_t ret = -EINVAL;
1707 int live_threshold;
1693 1708
1694 if (reg->hr_bdev) 1709 if (reg->hr_bdev)
1695 goto out; 1710 goto out;
@@ -1766,8 +1781,18 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1766 * A node is considered live after it has beat LIVE_THRESHOLD 1781 * A node is considered live after it has beat LIVE_THRESHOLD
1767 * times. We're not steady until we've given them a chance 1782 * times. We're not steady until we've given them a chance
1768 * _after_ our first read. 1783 * _after_ our first read.
1784 * The default threshold is bare minimum so as to limit the delay
1785 * during mounts. For global heartbeat, the threshold doubled for the
1786 * first region.
1769 */ 1787 */
1770 atomic_set(&reg->hr_steady_iterations, O2HB_LIVE_THRESHOLD + 1); 1788 live_threshold = O2HB_LIVE_THRESHOLD;
1789 if (o2hb_global_heartbeat_active()) {
1790 spin_lock(&o2hb_live_lock);
1791 if (o2hb_pop_count(&o2hb_region_bitmap, O2NM_MAX_REGIONS) == 1)
1792 live_threshold <<= 1;
1793 spin_unlock(&o2hb_live_lock);
1794 }
1795 atomic_set(&reg->hr_steady_iterations, live_threshold + 1);
1771 1796
1772 hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s", 1797 hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s",
1773 reg->hr_item.ci_name); 1798 reg->hr_item.ci_name);