aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJunxiao Bi <junxiao.bi@oracle.com>2016-05-27 17:27:01 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-27 17:49:37 -0400
commit34069b886f95356d68bf8315fa648c4ab3193cdd (patch)
tree728201bcfe3ec19215242e1632d7cd1d45c86852 /fs
parente0cbb79805083b8862182341ebf72266d58f6d12 (diff)
ocfs2: o2hb: add NEGO_TIMEOUT message
This message is sent to master node when non-master nodes's negotiate timer expired. Master node records these nodes in a bitmap which is used to do write timeout timer re-queue decision. Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com> Reviewed-by: Ryan Ding <ryan.ding@oracle.com> Reviewed-by: Mark Fasheh <mfasheh@suse.de> Cc: Gang He <ghe@suse.com> Cc: rwxybh <rwxybh@126.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Joseph Qi <joseph.qi@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/ocfs2/cluster/heartbeat.c66
1 files changed, 65 insertions, 1 deletions
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 750c950f4e1f..454c89076833 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -280,6 +280,10 @@ struct o2hb_region {
280 * being checked because we temporarily have to zero out the 280 * being checked because we temporarily have to zero out the
281 * crc field. */ 281 * crc field. */
282 struct o2hb_disk_heartbeat_block *hr_tmp_block; 282 struct o2hb_disk_heartbeat_block *hr_tmp_block;
283
284 /* Message key for negotiate timeout message. */
285 unsigned int hr_key;
286 struct list_head hr_handler_list;
283}; 287};
284 288
285struct o2hb_bio_wait_ctxt { 289struct o2hb_bio_wait_ctxt {
@@ -288,6 +292,14 @@ struct o2hb_bio_wait_ctxt {
288 int wc_error; 292 int wc_error;
289}; 293};
290 294
295enum {
296 O2HB_NEGO_TIMEOUT_MSG = 1,
297};
298
299struct o2hb_nego_msg {
300 u8 node_num;
301};
302
291static void o2hb_write_timeout(struct work_struct *work) 303static void o2hb_write_timeout(struct work_struct *work)
292{ 304{
293 int failed, quorum; 305 int failed, quorum;
@@ -355,6 +367,24 @@ static void o2hb_disarm_timeout(struct o2hb_region *reg)
355 cancel_delayed_work_sync(&reg->hr_nego_timeout_work); 367 cancel_delayed_work_sync(&reg->hr_nego_timeout_work);
356} 368}
357 369
370static int o2hb_send_nego_msg(int key, int type, u8 target)
371{
372 struct o2hb_nego_msg msg;
373 int status, ret;
374
375 msg.node_num = o2nm_this_node();
376again:
377 ret = o2net_send_message(type, key, &msg, sizeof(msg),
378 target, &status);
379
380 if (ret == -EAGAIN || ret == -ENOMEM) {
381 msleep(100);
382 goto again;
383 }
384
385 return ret;
386}
387
358static void o2hb_nego_timeout(struct work_struct *work) 388static void o2hb_nego_timeout(struct work_struct *work)
359{ 389{
360 unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; 390 unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
@@ -382,8 +412,24 @@ static void o2hb_nego_timeout(struct work_struct *work)
382 /* approve negotiate timeout request. */ 412 /* approve negotiate timeout request. */
383 } else { 413 } else {
384 /* negotiate timeout with master node. */ 414 /* negotiate timeout with master node. */
415 o2hb_send_nego_msg(reg->hr_key, O2HB_NEGO_TIMEOUT_MSG,
416 master_node);
385 } 417 }
418}
419
420static int o2hb_nego_timeout_handler(struct o2net_msg *msg, u32 len, void *data,
421 void **ret_data)
422{
423 struct o2hb_region *reg = data;
424 struct o2hb_nego_msg *nego_msg;
386 425
426 nego_msg = (struct o2hb_nego_msg *)msg->buf;
427 if (nego_msg->node_num < O2NM_MAX_NODES)
428 set_bit(nego_msg->node_num, reg->hr_nego_node_bitmap);
429 else
430 mlog(ML_ERROR, "got nego timeout message from bad node.\n");
431
432 return 0;
387} 433}
388 434
389static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc) 435static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc)
@@ -1493,6 +1539,7 @@ static void o2hb_region_release(struct config_item *item)
1493 list_del(&reg->hr_all_item); 1539 list_del(&reg->hr_all_item);
1494 spin_unlock(&o2hb_live_lock); 1540 spin_unlock(&o2hb_live_lock);
1495 1541
1542 o2net_unregister_handler_list(&reg->hr_handler_list);
1496 kfree(reg); 1543 kfree(reg);
1497} 1544}
1498 1545
@@ -2038,13 +2085,30 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
2038 2085
2039 config_item_init_type_name(&reg->hr_item, name, &o2hb_region_type); 2086 config_item_init_type_name(&reg->hr_item, name, &o2hb_region_type);
2040 2087
2088 /* this is the same way to generate msg key as dlm, for local heartbeat,
2089 * name is also the same, so make initial crc value different to avoid
2090 * message key conflict.
2091 */
2092 reg->hr_key = crc32_le(reg->hr_region_num + O2NM_MAX_REGIONS,
2093 name, strlen(name));
2094 INIT_LIST_HEAD(&reg->hr_handler_list);
2095 ret = o2net_register_handler(O2HB_NEGO_TIMEOUT_MSG, reg->hr_key,
2096 sizeof(struct o2hb_nego_msg),
2097 o2hb_nego_timeout_handler,
2098 reg, NULL, &reg->hr_handler_list);
2099 if (ret)
2100 goto free;
2101
2041 ret = o2hb_debug_region_init(reg, o2hb_debug_dir); 2102 ret = o2hb_debug_region_init(reg, o2hb_debug_dir);
2042 if (ret) { 2103 if (ret) {
2043 config_item_put(&reg->hr_item); 2104 config_item_put(&reg->hr_item);
2044 goto free; 2105 goto unregister_handler;
2045 } 2106 }
2046 2107
2047 return &reg->hr_item; 2108 return &reg->hr_item;
2109
2110unregister_handler:
2111 o2net_unregister_handler_list(&reg->hr_handler_list);
2048free: 2112free:
2049 kfree(reg); 2113 kfree(reg);
2050 return ERR_PTR(ret); 2114 return ERR_PTR(ret);