aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJunxiao Bi <junxiao.bi@oracle.com>2016-05-27 17:27:07 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-27 17:49:37 -0400
commit1bd1290283d7939478062e80bdd9719d3a21522f (patch)
tree94095c855b1b10fcf89c522a90e3358dec01d6e5 /fs
parente76f8237a2f7b7220980c0fb3c6d0b1d48ba79ad (diff)
ocfs2: o2hb: add some user/debug log
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com> Reviewed-by: Ryan Ding <ryan.ding@oracle.com> Reviewed-by: Mark Fasheh <mfasheh@suse.de> Cc: Gang He <ghe@suse.com> Cc: rwxybh <rwxybh@126.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Joseph Qi <joseph.qi@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/ocfs2/cluster/heartbeat.c39
1 files changed, 32 insertions, 7 deletions
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index e929b15d6162..84ebeb5678c6 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -292,6 +292,8 @@ struct o2hb_bio_wait_ctxt {
292 int wc_error; 292 int wc_error;
293}; 293};
294 294
295#define O2HB_NEGO_TIMEOUT_MS (O2HB_MAX_WRITE_TIMEOUT_MS/2)
296
295enum { 297enum {
296 O2HB_NEGO_TIMEOUT_MSG = 1, 298 O2HB_NEGO_TIMEOUT_MSG = 1,
297 O2HB_NEGO_APPROVE_MSG = 2, 299 O2HB_NEGO_APPROVE_MSG = 2,
@@ -358,7 +360,7 @@ static void o2hb_arm_timeout(struct o2hb_region *reg)
358 cancel_delayed_work(&reg->hr_nego_timeout_work); 360 cancel_delayed_work(&reg->hr_nego_timeout_work);
359 /* negotiate timeout must be less than write timeout. */ 361 /* negotiate timeout must be less than write timeout. */
360 schedule_delayed_work(&reg->hr_nego_timeout_work, 362 schedule_delayed_work(&reg->hr_nego_timeout_work,
361 msecs_to_jiffies(O2HB_MAX_WRITE_TIMEOUT_MS)/2); 363 msecs_to_jiffies(O2HB_NEGO_TIMEOUT_MS));
362 memset(reg->hr_nego_node_bitmap, 0, sizeof(reg->hr_nego_node_bitmap)); 364 memset(reg->hr_nego_node_bitmap, 0, sizeof(reg->hr_nego_node_bitmap));
363} 365}
364 366
@@ -389,7 +391,7 @@ again:
389static void o2hb_nego_timeout(struct work_struct *work) 391static void o2hb_nego_timeout(struct work_struct *work)
390{ 392{
391 unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; 393 unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
392 int master_node, i; 394 int master_node, i, ret;
393 struct o2hb_region *reg; 395 struct o2hb_region *reg;
394 396
395 reg = container_of(work, struct o2hb_region, hr_nego_timeout_work.work); 397 reg = container_of(work, struct o2hb_region, hr_nego_timeout_work.work);
@@ -398,7 +400,12 @@ static void o2hb_nego_timeout(struct work_struct *work)
398 master_node = find_next_bit(live_node_bitmap, O2NM_MAX_NODES, 0); 400 master_node = find_next_bit(live_node_bitmap, O2NM_MAX_NODES, 0);
399 401
400 if (master_node == o2nm_this_node()) { 402 if (master_node == o2nm_this_node()) {
401 set_bit(master_node, reg->hr_nego_node_bitmap); 403 if (!test_bit(master_node, reg->hr_nego_node_bitmap)) {
404 printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%s).\n",
405 o2nm_this_node(), O2HB_NEGO_TIMEOUT_MS/1000,
406 config_item_name(&reg->hr_item), reg->hr_dev_name);
407 set_bit(master_node, reg->hr_nego_node_bitmap);
408 }
402 if (memcmp(reg->hr_nego_node_bitmap, live_node_bitmap, 409 if (memcmp(reg->hr_nego_node_bitmap, live_node_bitmap,
403 sizeof(reg->hr_nego_node_bitmap))) { 410 sizeof(reg->hr_nego_node_bitmap))) {
404 /* check negotiate bitmap every second to do timeout 411 /* check negotiate bitmap every second to do timeout
@@ -410,6 +417,8 @@ static void o2hb_nego_timeout(struct work_struct *work)
410 return; 417 return;
411 } 418 }
412 419
420 printk(KERN_NOTICE "o2hb: all nodes hb write hung, maybe region %s (%s) is down.\n",
421 config_item_name(&reg->hr_item), reg->hr_dev_name);
413 /* approve negotiate timeout request. */ 422 /* approve negotiate timeout request. */
414 o2hb_arm_timeout(reg); 423 o2hb_arm_timeout(reg);
415 424
@@ -419,13 +428,23 @@ static void o2hb_nego_timeout(struct work_struct *work)
419 if (i == master_node) 428 if (i == master_node)
420 continue; 429 continue;
421 430
422 o2hb_send_nego_msg(reg->hr_key, 431 mlog(ML_HEARTBEAT, "send NEGO_APPROVE msg to node %d\n", i);
432 ret = o2hb_send_nego_msg(reg->hr_key,
423 O2HB_NEGO_APPROVE_MSG, i); 433 O2HB_NEGO_APPROVE_MSG, i);
434 if (ret)
435 mlog(ML_ERROR, "send NEGO_APPROVE msg to node %d fail %d\n",
436 i, ret);
424 } 437 }
425 } else { 438 } else {
426 /* negotiate timeout with master node. */ 439 /* negotiate timeout with master node. */
427 o2hb_send_nego_msg(reg->hr_key, O2HB_NEGO_TIMEOUT_MSG, 440 printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%s), negotiate timeout with node %d.\n",
428 master_node); 441 o2nm_this_node(), O2HB_NEGO_TIMEOUT_MS/1000, config_item_name(&reg->hr_item),
442 reg->hr_dev_name, master_node);
443 ret = o2hb_send_nego_msg(reg->hr_key, O2HB_NEGO_TIMEOUT_MSG,
444 master_node);
445 if (ret)
446 mlog(ML_ERROR, "send NEGO_TIMEOUT msg to node %d fail %d\n",
447 master_node, ret);
429 } 448 }
430} 449}
431 450
@@ -436,6 +455,8 @@ static int o2hb_nego_timeout_handler(struct o2net_msg *msg, u32 len, void *data,
436 struct o2hb_nego_msg *nego_msg; 455 struct o2hb_nego_msg *nego_msg;
437 456
438 nego_msg = (struct o2hb_nego_msg *)msg->buf; 457 nego_msg = (struct o2hb_nego_msg *)msg->buf;
458 printk(KERN_NOTICE "o2hb: receive negotiate timeout message from node %d on region %s (%s).\n",
459 nego_msg->node_num, config_item_name(&reg->hr_item), reg->hr_dev_name);
439 if (nego_msg->node_num < O2NM_MAX_NODES) 460 if (nego_msg->node_num < O2NM_MAX_NODES)
440 set_bit(nego_msg->node_num, reg->hr_nego_node_bitmap); 461 set_bit(nego_msg->node_num, reg->hr_nego_node_bitmap);
441 else 462 else
@@ -447,7 +468,11 @@ static int o2hb_nego_timeout_handler(struct o2net_msg *msg, u32 len, void *data,
447static int o2hb_nego_approve_handler(struct o2net_msg *msg, u32 len, void *data, 468static int o2hb_nego_approve_handler(struct o2net_msg *msg, u32 len, void *data,
448 void **ret_data) 469 void **ret_data)
449{ 470{
450 o2hb_arm_timeout(data); 471 struct o2hb_region *reg = data;
472
473 printk(KERN_NOTICE "o2hb: negotiate timeout approved by master node on region %s (%s).\n",
474 config_item_name(&reg->hr_item), reg->hr_dev_name);
475 o2hb_arm_timeout(reg);
451 return 0; 476 return 0;
452} 477}
453 478