aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/cluster
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/cluster')
-rw-r--r--fs/ocfs2/cluster/heartbeat.c20
-rw-r--r--fs/ocfs2/cluster/masklog.h22
-rw-r--r--fs/ocfs2/cluster/ocfs2_heartbeat.h1
-rw-r--r--fs/ocfs2/cluster/tcp.c14
4 files changed, 49 insertions, 8 deletions
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 1d26cfcd9f84..504595d6cf65 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -517,6 +517,7 @@ static inline void o2hb_prepare_block(struct o2hb_region *reg,
517 hb_block->hb_seq = cpu_to_le64(cputime); 517 hb_block->hb_seq = cpu_to_le64(cputime);
518 hb_block->hb_node = node_num; 518 hb_block->hb_node = node_num;
519 hb_block->hb_generation = cpu_to_le64(generation); 519 hb_block->hb_generation = cpu_to_le64(generation);
520 hb_block->hb_dead_ms = cpu_to_le32(o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS);
520 521
521 /* This step must always happen last! */ 522 /* This step must always happen last! */
522 hb_block->hb_cksum = cpu_to_le32(o2hb_compute_block_crc_le(reg, 523 hb_block->hb_cksum = cpu_to_le32(o2hb_compute_block_crc_le(reg,
@@ -645,6 +646,8 @@ static int o2hb_check_slot(struct o2hb_region *reg,
645 struct o2nm_node *node; 646 struct o2nm_node *node;
646 struct o2hb_disk_heartbeat_block *hb_block = reg->hr_tmp_block; 647 struct o2hb_disk_heartbeat_block *hb_block = reg->hr_tmp_block;
647 u64 cputime; 648 u64 cputime;
649 unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS;
650 unsigned int slot_dead_ms;
648 651
649 memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); 652 memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes);
650 653
@@ -733,6 +736,23 @@ fire_callbacks:
733 &o2hb_live_slots[slot->ds_node_num]); 736 &o2hb_live_slots[slot->ds_node_num]);
734 737
735 slot->ds_equal_samples = 0; 738 slot->ds_equal_samples = 0;
739
740 /* We want to be sure that all nodes agree on the
741 * number of milliseconds before a node will be
742 * considered dead. The self-fencing timeout is
743 * computed from this value, and a discrepancy might
744 * result in heartbeat calling a node dead when it
745 * hasn't self-fenced yet. */
746 slot_dead_ms = le32_to_cpu(hb_block->hb_dead_ms);
747 if (slot_dead_ms && slot_dead_ms != dead_ms) {
748 /* TODO: Perhaps we can fail the region here. */
749 mlog(ML_ERROR, "Node %d on device %s has a dead count "
750 "of %u ms, but our count is %u ms.\n"
751 "Please double check your configuration values "
752 "for 'O2CB_HEARTBEAT_THRESHOLD'\n",
753 slot->ds_node_num, reg->hr_dev_name, slot_dead_ms,
754 dead_ms);
755 }
736 goto out; 756 goto out;
737 } 757 }
738 758
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 73edad782537..a42628ba9ddf 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -123,6 +123,17 @@
123#define MLOG_MASK_PREFIX 0 123#define MLOG_MASK_PREFIX 0
124#endif 124#endif
125 125
126/*
127 * When logging is disabled, force the bit test to 0 for anything other
128 * than errors and notices, allowing gcc to remove the code completely.
129 * When enabled, allow all masks.
130 */
131#if defined(CONFIG_OCFS2_DEBUG_MASKLOG)
132#define ML_ALLOWED_BITS ~0
133#else
134#define ML_ALLOWED_BITS (ML_ERROR|ML_NOTICE)
135#endif
136
126#define MLOG_MAX_BITS 64 137#define MLOG_MAX_BITS 64
127 138
128struct mlog_bits { 139struct mlog_bits {
@@ -187,7 +198,8 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
187 198
188#define mlog(mask, fmt, args...) do { \ 199#define mlog(mask, fmt, args...) do { \
189 u64 __m = MLOG_MASK_PREFIX | (mask); \ 200 u64 __m = MLOG_MASK_PREFIX | (mask); \
190 if (__mlog_test_u64(__m, mlog_and_bits) && \ 201 if ((__m & ML_ALLOWED_BITS) && \
202 __mlog_test_u64(__m, mlog_and_bits) && \
191 !__mlog_test_u64(__m, mlog_not_bits)) { \ 203 !__mlog_test_u64(__m, mlog_not_bits)) { \
192 if (__m & ML_ERROR) \ 204 if (__m & ML_ERROR) \
193 __mlog_printk(KERN_ERR, "ERROR: "fmt , ##args); \ 205 __mlog_printk(KERN_ERR, "ERROR: "fmt , ##args); \
@@ -204,6 +216,7 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
204 mlog(ML_ERROR, "status = %lld\n", (long long)_st); \ 216 mlog(ML_ERROR, "status = %lld\n", (long long)_st); \
205} while (0) 217} while (0)
206 218
219#if defined(CONFIG_OCFS2_DEBUG_MASKLOG)
207#define mlog_entry(fmt, args...) do { \ 220#define mlog_entry(fmt, args...) do { \
208 mlog(ML_ENTRY, "ENTRY:" fmt , ##args); \ 221 mlog(ML_ENTRY, "ENTRY:" fmt , ##args); \
209} while (0) 222} while (0)
@@ -247,6 +260,13 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
247#define mlog_exit_void() do { \ 260#define mlog_exit_void() do { \
248 mlog(ML_EXIT, "EXIT\n"); \ 261 mlog(ML_EXIT, "EXIT\n"); \
249} while (0) 262} while (0)
263#else
264#define mlog_entry(...) do { } while (0)
265#define mlog_entry_void(...) do { } while (0)
266#define mlog_exit(...) do { } while (0)
267#define mlog_exit_ptr(...) do { } while (0)
268#define mlog_exit_void(...) do { } while (0)
269#endif /* defined(CONFIG_OCFS2_DEBUG_MASKLOG) */
250 270
251#define mlog_bug_on_msg(cond, fmt, args...) do { \ 271#define mlog_bug_on_msg(cond, fmt, args...) do { \
252 if (cond) { \ 272 if (cond) { \
diff --git a/fs/ocfs2/cluster/ocfs2_heartbeat.h b/fs/ocfs2/cluster/ocfs2_heartbeat.h
index 94096069cb43..3f4151da9709 100644
--- a/fs/ocfs2/cluster/ocfs2_heartbeat.h
+++ b/fs/ocfs2/cluster/ocfs2_heartbeat.h
@@ -32,6 +32,7 @@ struct o2hb_disk_heartbeat_block {
32 __u8 hb_pad1[3]; 32 __u8 hb_pad1[3];
33 __le32 hb_cksum; 33 __le32 hb_cksum;
34 __le64 hb_generation; 34 __le64 hb_generation;
35 __le32 hb_dead_ms;
35}; 36};
36 37
37#endif /* _OCFS2_HEARTBEAT_H */ 38#endif /* _OCFS2_HEARTBEAT_H */
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 1591eb37a723..b650efa8c8be 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -396,8 +396,8 @@ static void o2net_set_nn_state(struct o2net_node *nn,
396 } 396 }
397 397
398 if (was_valid && !valid) { 398 if (was_valid && !valid) {
399 mlog(ML_NOTICE, "no longer connected to " SC_NODEF_FMT "\n", 399 printk(KERN_INFO "o2net: no longer connected to "
400 SC_NODEF_ARGS(old_sc)); 400 SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc));
401 o2net_complete_nodes_nsw(nn); 401 o2net_complete_nodes_nsw(nn);
402 } 402 }
403 403
@@ -409,10 +409,10 @@ static void o2net_set_nn_state(struct o2net_node *nn,
409 * the only way to start connecting again is to down 409 * the only way to start connecting again is to down
410 * heartbeat and bring it back up. */ 410 * heartbeat and bring it back up. */
411 cancel_delayed_work(&nn->nn_connect_expired); 411 cancel_delayed_work(&nn->nn_connect_expired);
412 mlog(ML_NOTICE, "%s " SC_NODEF_FMT "\n", 412 printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n",
413 o2nm_this_node() > sc->sc_node->nd_num ? 413 o2nm_this_node() > sc->sc_node->nd_num ?
414 "connected to" : "accepted connection from", 414 "connected to" : "accepted connection from",
415 SC_NODEF_ARGS(sc)); 415 SC_NODEF_ARGS(sc));
416 } 416 }
417 417
418 /* trigger the connecting worker func as long as we're not valid, 418 /* trigger the connecting worker func as long as we're not valid,
@@ -1280,7 +1280,7 @@ static void o2net_idle_timer(unsigned long data)
1280 1280
1281 do_gettimeofday(&now); 1281 do_gettimeofday(&now);
1282 1282
1283 mlog(ML_NOTICE, "connection to " SC_NODEF_FMT " has been idle for 10 " 1283 printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for 10 "
1284 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc)); 1284 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc));
1285 mlog(ML_NOTICE, "here are some times that might help debug the " 1285 mlog(ML_NOTICE, "here are some times that might help debug the "
1286 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " 1286 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "