diff options
author | Junxiao Bi <junxiao.bi@oracle.com> | 2016-05-27 17:27:07 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-27 17:49:37 -0400 |
commit | 1bd1290283d7939478062e80bdd9719d3a21522f (patch) | |
tree | 94095c855b1b10fcf89c522a90e3358dec01d6e5 /fs | |
parent | e76f8237a2f7b7220980c0fb3c6d0b1d48ba79ad (diff) |
ocfs2: o2hb: add some user/debug log
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Reviewed-by: Ryan Ding <ryan.ding@oracle.com>
Reviewed-by: Mark Fasheh <mfasheh@suse.de>
Cc: Gang He <ghe@suse.com>
Cc: rwxybh <rwxybh@126.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Joseph Qi <joseph.qi@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ocfs2/cluster/heartbeat.c | 39 |
1 files changed, 32 insertions, 7 deletions
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index e929b15d6162..84ebeb5678c6 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -292,6 +292,8 @@ struct o2hb_bio_wait_ctxt { | |||
292 | int wc_error; | 292 | int wc_error; |
293 | }; | 293 | }; |
294 | 294 | ||
295 | #define O2HB_NEGO_TIMEOUT_MS (O2HB_MAX_WRITE_TIMEOUT_MS/2) | ||
296 | |||
295 | enum { | 297 | enum { |
296 | O2HB_NEGO_TIMEOUT_MSG = 1, | 298 | O2HB_NEGO_TIMEOUT_MSG = 1, |
297 | O2HB_NEGO_APPROVE_MSG = 2, | 299 | O2HB_NEGO_APPROVE_MSG = 2, |
@@ -358,7 +360,7 @@ static void o2hb_arm_timeout(struct o2hb_region *reg) | |||
358 | cancel_delayed_work(®->hr_nego_timeout_work); | 360 | cancel_delayed_work(®->hr_nego_timeout_work); |
359 | /* negotiate timeout must be less than write timeout. */ | 361 | /* negotiate timeout must be less than write timeout. */ |
360 | schedule_delayed_work(®->hr_nego_timeout_work, | 362 | schedule_delayed_work(®->hr_nego_timeout_work, |
361 | msecs_to_jiffies(O2HB_MAX_WRITE_TIMEOUT_MS)/2); | 363 | msecs_to_jiffies(O2HB_NEGO_TIMEOUT_MS)); |
362 | memset(reg->hr_nego_node_bitmap, 0, sizeof(reg->hr_nego_node_bitmap)); | 364 | memset(reg->hr_nego_node_bitmap, 0, sizeof(reg->hr_nego_node_bitmap)); |
363 | } | 365 | } |
364 | 366 | ||
@@ -389,7 +391,7 @@ again: | |||
389 | static void o2hb_nego_timeout(struct work_struct *work) | 391 | static void o2hb_nego_timeout(struct work_struct *work) |
390 | { | 392 | { |
391 | unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 393 | unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
392 | int master_node, i; | 394 | int master_node, i, ret; |
393 | struct o2hb_region *reg; | 395 | struct o2hb_region *reg; |
394 | 396 | ||
395 | reg = container_of(work, struct o2hb_region, hr_nego_timeout_work.work); | 397 | reg = container_of(work, struct o2hb_region, hr_nego_timeout_work.work); |
@@ -398,7 +400,12 @@ static void o2hb_nego_timeout(struct work_struct *work) | |||
398 | master_node = find_next_bit(live_node_bitmap, O2NM_MAX_NODES, 0); | 400 | master_node = find_next_bit(live_node_bitmap, O2NM_MAX_NODES, 0); |
399 | 401 | ||
400 | if (master_node == o2nm_this_node()) { | 402 | if (master_node == o2nm_this_node()) { |
401 | set_bit(master_node, reg->hr_nego_node_bitmap); | 403 | if (!test_bit(master_node, reg->hr_nego_node_bitmap)) { |
404 | printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%s).\n", | ||
405 | o2nm_this_node(), O2HB_NEGO_TIMEOUT_MS/1000, | ||
406 | config_item_name(®->hr_item), reg->hr_dev_name); | ||
407 | set_bit(master_node, reg->hr_nego_node_bitmap); | ||
408 | } | ||
402 | if (memcmp(reg->hr_nego_node_bitmap, live_node_bitmap, | 409 | if (memcmp(reg->hr_nego_node_bitmap, live_node_bitmap, |
403 | sizeof(reg->hr_nego_node_bitmap))) { | 410 | sizeof(reg->hr_nego_node_bitmap))) { |
404 | /* check negotiate bitmap every second to do timeout | 411 | /* check negotiate bitmap every second to do timeout |
@@ -410,6 +417,8 @@ static void o2hb_nego_timeout(struct work_struct *work) | |||
410 | return; | 417 | return; |
411 | } | 418 | } |
412 | 419 | ||
420 | printk(KERN_NOTICE "o2hb: all nodes hb write hung, maybe region %s (%s) is down.\n", | ||
421 | config_item_name(®->hr_item), reg->hr_dev_name); | ||
413 | /* approve negotiate timeout request. */ | 422 | /* approve negotiate timeout request. */ |
414 | o2hb_arm_timeout(reg); | 423 | o2hb_arm_timeout(reg); |
415 | 424 | ||
@@ -419,13 +428,23 @@ static void o2hb_nego_timeout(struct work_struct *work) | |||
419 | if (i == master_node) | 428 | if (i == master_node) |
420 | continue; | 429 | continue; |
421 | 430 | ||
422 | o2hb_send_nego_msg(reg->hr_key, | 431 | mlog(ML_HEARTBEAT, "send NEGO_APPROVE msg to node %d\n", i); |
432 | ret = o2hb_send_nego_msg(reg->hr_key, | ||
423 | O2HB_NEGO_APPROVE_MSG, i); | 433 | O2HB_NEGO_APPROVE_MSG, i); |
434 | if (ret) | ||
435 | mlog(ML_ERROR, "send NEGO_APPROVE msg to node %d fail %d\n", | ||
436 | i, ret); | ||
424 | } | 437 | } |
425 | } else { | 438 | } else { |
426 | /* negotiate timeout with master node. */ | 439 | /* negotiate timeout with master node. */ |
427 | o2hb_send_nego_msg(reg->hr_key, O2HB_NEGO_TIMEOUT_MSG, | 440 | printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%s), negotiate timeout with node %d.\n", |
428 | master_node); | 441 | o2nm_this_node(), O2HB_NEGO_TIMEOUT_MS/1000, config_item_name(®->hr_item), |
442 | reg->hr_dev_name, master_node); | ||
443 | ret = o2hb_send_nego_msg(reg->hr_key, O2HB_NEGO_TIMEOUT_MSG, | ||
444 | master_node); | ||
445 | if (ret) | ||
446 | mlog(ML_ERROR, "send NEGO_TIMEOUT msg to node %d fail %d\n", | ||
447 | master_node, ret); | ||
429 | } | 448 | } |
430 | } | 449 | } |
431 | 450 | ||
@@ -436,6 +455,8 @@ static int o2hb_nego_timeout_handler(struct o2net_msg *msg, u32 len, void *data, | |||
436 | struct o2hb_nego_msg *nego_msg; | 455 | struct o2hb_nego_msg *nego_msg; |
437 | 456 | ||
438 | nego_msg = (struct o2hb_nego_msg *)msg->buf; | 457 | nego_msg = (struct o2hb_nego_msg *)msg->buf; |
458 | printk(KERN_NOTICE "o2hb: receive negotiate timeout message from node %d on region %s (%s).\n", | ||
459 | nego_msg->node_num, config_item_name(®->hr_item), reg->hr_dev_name); | ||
439 | if (nego_msg->node_num < O2NM_MAX_NODES) | 460 | if (nego_msg->node_num < O2NM_MAX_NODES) |
440 | set_bit(nego_msg->node_num, reg->hr_nego_node_bitmap); | 461 | set_bit(nego_msg->node_num, reg->hr_nego_node_bitmap); |
441 | else | 462 | else |
@@ -447,7 +468,11 @@ static int o2hb_nego_timeout_handler(struct o2net_msg *msg, u32 len, void *data, | |||
447 | static int o2hb_nego_approve_handler(struct o2net_msg *msg, u32 len, void *data, | 468 | static int o2hb_nego_approve_handler(struct o2net_msg *msg, u32 len, void *data, |
448 | void **ret_data) | 469 | void **ret_data) |
449 | { | 470 | { |
450 | o2hb_arm_timeout(data); | 471 | struct o2hb_region *reg = data; |
472 | |||
473 | printk(KERN_NOTICE "o2hb: negotiate timeout approved by master node on region %s (%s).\n", | ||
474 | config_item_name(®->hr_item), reg->hr_dev_name); | ||
475 | o2hb_arm_timeout(reg); | ||
451 | return 0; | 476 | return 0; |
452 | } | 477 | } |
453 | 478 | ||