diff options
author | Junxiao Bi <junxiao.bi@oracle.com> | 2016-05-27 17:27:01 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-27 17:49:37 -0400 |
commit | 34069b886f95356d68bf8315fa648c4ab3193cdd (patch) | |
tree | 728201bcfe3ec19215242e1632d7cd1d45c86852 /fs | |
parent | e0cbb79805083b8862182341ebf72266d58f6d12 (diff) |
ocfs2: o2hb: add NEGO_TIMEOUT message
This message is sent to master node when non-master nodes's negotiate
timer expired. Master node records these nodes in a bitmap which is
used to do write timeout timer re-queue decision.
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Reviewed-by: Ryan Ding <ryan.ding@oracle.com>
Reviewed-by: Mark Fasheh <mfasheh@suse.de>
Cc: Gang He <ghe@suse.com>
Cc: rwxybh <rwxybh@126.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Joseph Qi <joseph.qi@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ocfs2/cluster/heartbeat.c | 66 |
1 files changed, 65 insertions, 1 deletions
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 750c950f4e1f..454c89076833 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -280,6 +280,10 @@ struct o2hb_region { | |||
280 | * being checked because we temporarily have to zero out the | 280 | * being checked because we temporarily have to zero out the |
281 | * crc field. */ | 281 | * crc field. */ |
282 | struct o2hb_disk_heartbeat_block *hr_tmp_block; | 282 | struct o2hb_disk_heartbeat_block *hr_tmp_block; |
283 | |||
284 | /* Message key for negotiate timeout message. */ | ||
285 | unsigned int hr_key; | ||
286 | struct list_head hr_handler_list; | ||
283 | }; | 287 | }; |
284 | 288 | ||
285 | struct o2hb_bio_wait_ctxt { | 289 | struct o2hb_bio_wait_ctxt { |
@@ -288,6 +292,14 @@ struct o2hb_bio_wait_ctxt { | |||
288 | int wc_error; | 292 | int wc_error; |
289 | }; | 293 | }; |
290 | 294 | ||
295 | enum { | ||
296 | O2HB_NEGO_TIMEOUT_MSG = 1, | ||
297 | }; | ||
298 | |||
299 | struct o2hb_nego_msg { | ||
300 | u8 node_num; | ||
301 | }; | ||
302 | |||
291 | static void o2hb_write_timeout(struct work_struct *work) | 303 | static void o2hb_write_timeout(struct work_struct *work) |
292 | { | 304 | { |
293 | int failed, quorum; | 305 | int failed, quorum; |
@@ -355,6 +367,24 @@ static void o2hb_disarm_timeout(struct o2hb_region *reg) | |||
355 | cancel_delayed_work_sync(®->hr_nego_timeout_work); | 367 | cancel_delayed_work_sync(®->hr_nego_timeout_work); |
356 | } | 368 | } |
357 | 369 | ||
370 | static int o2hb_send_nego_msg(int key, int type, u8 target) | ||
371 | { | ||
372 | struct o2hb_nego_msg msg; | ||
373 | int status, ret; | ||
374 | |||
375 | msg.node_num = o2nm_this_node(); | ||
376 | again: | ||
377 | ret = o2net_send_message(type, key, &msg, sizeof(msg), | ||
378 | target, &status); | ||
379 | |||
380 | if (ret == -EAGAIN || ret == -ENOMEM) { | ||
381 | msleep(100); | ||
382 | goto again; | ||
383 | } | ||
384 | |||
385 | return ret; | ||
386 | } | ||
387 | |||
358 | static void o2hb_nego_timeout(struct work_struct *work) | 388 | static void o2hb_nego_timeout(struct work_struct *work) |
359 | { | 389 | { |
360 | unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 390 | unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
@@ -382,8 +412,24 @@ static void o2hb_nego_timeout(struct work_struct *work) | |||
382 | /* approve negotiate timeout request. */ | 412 | /* approve negotiate timeout request. */ |
383 | } else { | 413 | } else { |
384 | /* negotiate timeout with master node. */ | 414 | /* negotiate timeout with master node. */ |
415 | o2hb_send_nego_msg(reg->hr_key, O2HB_NEGO_TIMEOUT_MSG, | ||
416 | master_node); | ||
385 | } | 417 | } |
418 | } | ||
419 | |||
420 | static int o2hb_nego_timeout_handler(struct o2net_msg *msg, u32 len, void *data, | ||
421 | void **ret_data) | ||
422 | { | ||
423 | struct o2hb_region *reg = data; | ||
424 | struct o2hb_nego_msg *nego_msg; | ||
386 | 425 | ||
426 | nego_msg = (struct o2hb_nego_msg *)msg->buf; | ||
427 | if (nego_msg->node_num < O2NM_MAX_NODES) | ||
428 | set_bit(nego_msg->node_num, reg->hr_nego_node_bitmap); | ||
429 | else | ||
430 | mlog(ML_ERROR, "got nego timeout message from bad node.\n"); | ||
431 | |||
432 | return 0; | ||
387 | } | 433 | } |
388 | 434 | ||
389 | static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc) | 435 | static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc) |
@@ -1493,6 +1539,7 @@ static void o2hb_region_release(struct config_item *item) | |||
1493 | list_del(®->hr_all_item); | 1539 | list_del(®->hr_all_item); |
1494 | spin_unlock(&o2hb_live_lock); | 1540 | spin_unlock(&o2hb_live_lock); |
1495 | 1541 | ||
1542 | o2net_unregister_handler_list(®->hr_handler_list); | ||
1496 | kfree(reg); | 1543 | kfree(reg); |
1497 | } | 1544 | } |
1498 | 1545 | ||
@@ -2038,13 +2085,30 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g | |||
2038 | 2085 | ||
2039 | config_item_init_type_name(®->hr_item, name, &o2hb_region_type); | 2086 | config_item_init_type_name(®->hr_item, name, &o2hb_region_type); |
2040 | 2087 | ||
2088 | /* this is the same way to generate msg key as dlm, for local heartbeat, | ||
2089 | * name is also the same, so make initial crc value different to avoid | ||
2090 | * message key conflict. | ||
2091 | */ | ||
2092 | reg->hr_key = crc32_le(reg->hr_region_num + O2NM_MAX_REGIONS, | ||
2093 | name, strlen(name)); | ||
2094 | INIT_LIST_HEAD(®->hr_handler_list); | ||
2095 | ret = o2net_register_handler(O2HB_NEGO_TIMEOUT_MSG, reg->hr_key, | ||
2096 | sizeof(struct o2hb_nego_msg), | ||
2097 | o2hb_nego_timeout_handler, | ||
2098 | reg, NULL, ®->hr_handler_list); | ||
2099 | if (ret) | ||
2100 | goto free; | ||
2101 | |||
2041 | ret = o2hb_debug_region_init(reg, o2hb_debug_dir); | 2102 | ret = o2hb_debug_region_init(reg, o2hb_debug_dir); |
2042 | if (ret) { | 2103 | if (ret) { |
2043 | config_item_put(®->hr_item); | 2104 | config_item_put(®->hr_item); |
2044 | goto free; | 2105 | goto unregister_handler; |
2045 | } | 2106 | } |
2046 | 2107 | ||
2047 | return ®->hr_item; | 2108 | return ®->hr_item; |
2109 | |||
2110 | unregister_handler: | ||
2111 | o2net_unregister_handler_list(®->hr_handler_list); | ||
2048 | free: | 2112 | free: |
2049 | kfree(reg); | 2113 | kfree(reg); |
2050 | return ERR_PTR(ret); | 2114 | return ERR_PTR(ret); |