diff options
Diffstat (limited to 'fs/ocfs2/cluster/heartbeat.c')
-rw-r--r-- | fs/ocfs2/cluster/heartbeat.c | 61 |
1 files changed, 43 insertions, 18 deletions
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 643720209a98..9a3e6bbff27b 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -539,25 +539,41 @@ static int o2hb_verify_crc(struct o2hb_region *reg, | |||
539 | 539 | ||
540 | /* We want to make sure that nobody is heartbeating on top of us -- | 540 | /* We want to make sure that nobody is heartbeating on top of us -- |
541 | * this will help detect an invalid configuration. */ | 541 | * this will help detect an invalid configuration. */ |
542 | static int o2hb_check_last_timestamp(struct o2hb_region *reg) | 542 | static void o2hb_check_last_timestamp(struct o2hb_region *reg) |
543 | { | 543 | { |
544 | int node_num, ret; | ||
545 | struct o2hb_disk_slot *slot; | 544 | struct o2hb_disk_slot *slot; |
546 | struct o2hb_disk_heartbeat_block *hb_block; | 545 | struct o2hb_disk_heartbeat_block *hb_block; |
546 | char *errstr; | ||
547 | 547 | ||
548 | node_num = o2nm_this_node(); | 548 | slot = ®->hr_slots[o2nm_this_node()]; |
549 | |||
550 | ret = 1; | ||
551 | slot = ®->hr_slots[node_num]; | ||
552 | /* Don't check on our 1st timestamp */ | 549 | /* Don't check on our 1st timestamp */ |
553 | if (slot->ds_last_time) { | 550 | if (!slot->ds_last_time) |
554 | hb_block = slot->ds_raw_block; | 551 | return; |
555 | 552 | ||
556 | if (le64_to_cpu(hb_block->hb_seq) != slot->ds_last_time) | 553 | hb_block = slot->ds_raw_block; |
557 | ret = 0; | 554 | if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time && |
558 | } | 555 | le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation && |
556 | hb_block->hb_node == slot->ds_node_num) | ||
557 | return; | ||
559 | 558 | ||
560 | return ret; | 559 | #define ERRSTR1 "Another node is heartbeating on device" |
560 | #define ERRSTR2 "Heartbeat generation mismatch on device" | ||
561 | #define ERRSTR3 "Heartbeat sequence mismatch on device" | ||
562 | |||
563 | if (hb_block->hb_node != slot->ds_node_num) | ||
564 | errstr = ERRSTR1; | ||
565 | else if (le64_to_cpu(hb_block->hb_generation) != | ||
566 | slot->ds_last_generation) | ||
567 | errstr = ERRSTR2; | ||
568 | else | ||
569 | errstr = ERRSTR3; | ||
570 | |||
571 | mlog(ML_ERROR, "%s (%s): expected(%u:0x%llx, 0x%llx), " | ||
572 | "ondisk(%u:0x%llx, 0x%llx)\n", errstr, reg->hr_dev_name, | ||
573 | slot->ds_node_num, (unsigned long long)slot->ds_last_generation, | ||
574 | (unsigned long long)slot->ds_last_time, hb_block->hb_node, | ||
575 | (unsigned long long)le64_to_cpu(hb_block->hb_generation), | ||
576 | (unsigned long long)le64_to_cpu(hb_block->hb_seq)); | ||
561 | } | 577 | } |
562 | 578 | ||
563 | static inline void o2hb_prepare_block(struct o2hb_region *reg, | 579 | static inline void o2hb_prepare_block(struct o2hb_region *reg, |
@@ -983,9 +999,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
983 | /* With an up to date view of the slots, we can check that no | 999 | /* With an up to date view of the slots, we can check that no |
984 | * other node has been improperly configured to heartbeat in | 1000 | * other node has been improperly configured to heartbeat in |
985 | * our slot. */ | 1001 | * our slot. */ |
986 | if (!o2hb_check_last_timestamp(reg)) | 1002 | o2hb_check_last_timestamp(reg); |
987 | mlog(ML_ERROR, "Device \"%s\": another node is heartbeating " | ||
988 | "in our slot!\n", reg->hr_dev_name); | ||
989 | 1003 | ||
990 | /* fill in the proper info for our next heartbeat */ | 1004 | /* fill in the proper info for our next heartbeat */ |
991 | o2hb_prepare_block(reg, reg->hr_generation); | 1005 | o2hb_prepare_block(reg, reg->hr_generation); |
@@ -999,8 +1013,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
999 | } | 1013 | } |
1000 | 1014 | ||
1001 | i = -1; | 1015 | i = -1; |
1002 | while((i = find_next_bit(configured_nodes, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { | 1016 | while((i = find_next_bit(configured_nodes, |
1003 | 1017 | O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { | |
1004 | change |= o2hb_check_slot(reg, ®->hr_slots[i]); | 1018 | change |= o2hb_check_slot(reg, ®->hr_slots[i]); |
1005 | } | 1019 | } |
1006 | 1020 | ||
@@ -1690,6 +1704,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1690 | struct file *filp = NULL; | 1704 | struct file *filp = NULL; |
1691 | struct inode *inode = NULL; | 1705 | struct inode *inode = NULL; |
1692 | ssize_t ret = -EINVAL; | 1706 | ssize_t ret = -EINVAL; |
1707 | int live_threshold; | ||
1693 | 1708 | ||
1694 | if (reg->hr_bdev) | 1709 | if (reg->hr_bdev) |
1695 | goto out; | 1710 | goto out; |
@@ -1766,8 +1781,18 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1766 | * A node is considered live after it has beat LIVE_THRESHOLD | 1781 | * A node is considered live after it has beat LIVE_THRESHOLD |
1767 | * times. We're not steady until we've given them a chance | 1782 | * times. We're not steady until we've given them a chance |
1768 | * _after_ our first read. | 1783 | * _after_ our first read. |
1784 | * The default threshold is bare minimum so as to limit the delay | ||
1785 | * during mounts. For global heartbeat, the threshold doubled for the | ||
1786 | * first region. | ||
1769 | */ | 1787 | */ |
1770 | atomic_set(®->hr_steady_iterations, O2HB_LIVE_THRESHOLD + 1); | 1788 | live_threshold = O2HB_LIVE_THRESHOLD; |
1789 | if (o2hb_global_heartbeat_active()) { | ||
1790 | spin_lock(&o2hb_live_lock); | ||
1791 | if (o2hb_pop_count(&o2hb_region_bitmap, O2NM_MAX_REGIONS) == 1) | ||
1792 | live_threshold <<= 1; | ||
1793 | spin_unlock(&o2hb_live_lock); | ||
1794 | } | ||
1795 | atomic_set(®->hr_steady_iterations, live_threshold + 1); | ||
1771 | 1796 | ||
1772 | hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s", | 1797 | hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s", |
1773 | reg->hr_item.ci_name); | 1798 | reg->hr_item.ci_name); |