diff options
| author | Liang Zhen <liang.zhen@intel.com> | 2015-02-01 21:52:06 -0500 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2015-02-07 04:31:10 -0500 |
| commit | af3fa7c71bf61a4ff8e9203860c24795183f9da4 (patch) | |
| tree | 6a9a7c643dfbeba781c351da42859e9f4ea9826e | |
| parent | 62e4941354c38c968474a909a5a89395ccff0067 (diff) | |
staging/lustre/lnet: peer aliveness status and NI status
A couple of changes to improve aliveness detection:
- When LNet received a message, it can determine peer of this message
is alive
- When LNet received a message from remote network, it can determine
router is alive and NI status on router is UP.
Signed-off-by: Liang Zhen <liang.zhen@intel.com>
Reviewed-on: http://review.whamcloud.com/12453
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-5485
Reviewed-by: James Simmons <uja.ornl@gmail.com>
Reviewed-by: Isaac Huang <he.huang@intel.com>
Signed-off-by: Oleg Drokin <oleg.drokin@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
| -rw-r--r-- | drivers/staging/lustre/include/linux/lnet/lib-lnet.h | 10 | ||||
| -rw-r--r-- | drivers/staging/lustre/lnet/lnet/lib-move.c | 13 | ||||
| -rw-r--r-- | drivers/staging/lustre/lnet/lnet/router.c | 17 |
3 files changed, 39 insertions, 1 deletions
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h index 99fb52a98dac..0038d29a37fe 100644 --- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h +++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h | |||
| @@ -636,6 +636,7 @@ lnet_net2rnethash(__u32 net) | |||
| 636 | } | 636 | } |
| 637 | 637 | ||
| 638 | extern lnd_t the_lolnd; | 638 | extern lnd_t the_lolnd; |
| 639 | extern int avoid_asym_router_failure; | ||
| 639 | 640 | ||
| 640 | int lnet_cpt_of_nid_locked(lnet_nid_t nid); | 641 | int lnet_cpt_of_nid_locked(lnet_nid_t nid); |
| 641 | int lnet_cpt_of_nid(lnet_nid_t nid); | 642 | int lnet_cpt_of_nid(lnet_nid_t nid); |
| @@ -851,6 +852,7 @@ int lnet_peer_buffer_credits(lnet_ni_t *ni); | |||
| 851 | 852 | ||
| 852 | int lnet_router_checker_start(void); | 853 | int lnet_router_checker_start(void); |
| 853 | void lnet_router_checker_stop(void); | 854 | void lnet_router_checker_stop(void); |
| 855 | void lnet_router_ni_update_locked(lnet_peer_t *gw, __u32 net); | ||
| 854 | void lnet_swap_pinginfo(lnet_ping_info_t *info); | 856 | void lnet_swap_pinginfo(lnet_ping_info_t *info); |
| 855 | 857 | ||
| 856 | int lnet_ping_target_init(void); | 858 | int lnet_ping_target_init(void); |
| @@ -870,4 +872,12 @@ void lnet_peer_tables_destroy(void); | |||
| 870 | int lnet_peer_tables_create(void); | 872 | int lnet_peer_tables_create(void); |
| 871 | void lnet_debug_peer(lnet_nid_t nid); | 873 | void lnet_debug_peer(lnet_nid_t nid); |
| 872 | 874 | ||
| 875 | static inline void lnet_peer_set_alive(lnet_peer_t *lp) | ||
| 876 | { | ||
| 877 | lp->lp_last_alive = lp->lp_last_query = get_seconds(); | ||
| 878 | if (!lp->lp_alive) | ||
| 879 | lnet_notify_locked(lp, 0, 1, lp->lp_last_alive); | ||
| 880 | } | ||
| 881 | |||
| 882 | |||
| 873 | #endif | 883 | #endif |
diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c index ed6eec9bd2cc..0f53c761f1a9 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-move.c +++ b/drivers/staging/lustre/lnet/lnet/lib-move.c | |||
| @@ -1877,6 +1877,19 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid, | |||
| 1877 | goto drop; | 1877 | goto drop; |
| 1878 | } | 1878 | } |
| 1879 | 1879 | ||
| 1880 | if (lnet_isrouter(msg->msg_rxpeer)) { | ||
| 1881 | lnet_peer_set_alive(msg->msg_rxpeer); | ||
| 1882 | if (avoid_asym_router_failure && | ||
| 1883 | LNET_NIDNET(src_nid) != LNET_NIDNET(from_nid)) { | ||
| 1884 | /* received a remote message from router, update | ||
| 1885 | * remote NI status on this router. | ||
| 1886 | * NB: multi-hop routed message will be ignored. | ||
| 1887 | */ | ||
| 1888 | lnet_router_ni_update_locked(msg->msg_rxpeer, | ||
| 1889 | LNET_NIDNET(src_nid)); | ||
| 1890 | } | ||
| 1891 | } | ||
| 1892 | |||
| 1880 | lnet_msg_commit(msg, cpt); | 1893 | lnet_msg_commit(msg, cpt); |
| 1881 | 1894 | ||
| 1882 | if (!for_me) { | 1895 | if (!for_me) { |
diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c index 1bbaa5bae5ab..52ec0ab7e3c3 100644 --- a/drivers/staging/lustre/lnet/lnet/router.c +++ b/drivers/staging/lustre/lnet/lnet/router.c | |||
| @@ -84,7 +84,7 @@ static int check_routers_before_use; | |||
| 84 | module_param(check_routers_before_use, int, 0444); | 84 | module_param(check_routers_before_use, int, 0444); |
| 85 | MODULE_PARM_DESC(check_routers_before_use, "Assume routers are down and ping them before use"); | 85 | MODULE_PARM_DESC(check_routers_before_use, "Assume routers are down and ping them before use"); |
| 86 | 86 | ||
| 87 | static int avoid_asym_router_failure = 1; | 87 | int avoid_asym_router_failure = 1; |
| 88 | module_param(avoid_asym_router_failure, int, 0644); | 88 | module_param(avoid_asym_router_failure, int, 0644); |
| 89 | MODULE_PARM_DESC(avoid_asym_router_failure, "Avoid asymmetrical router failures (0 to disable)"); | 89 | MODULE_PARM_DESC(avoid_asym_router_failure, "Avoid asymmetrical router failures (0 to disable)"); |
| 90 | 90 | ||
| @@ -783,6 +783,21 @@ lnet_wait_known_routerstate(void) | |||
| 783 | } | 783 | } |
| 784 | } | 784 | } |
| 785 | 785 | ||
| 786 | void | ||
| 787 | lnet_router_ni_update_locked(lnet_peer_t *gw, __u32 net) | ||
| 788 | { | ||
| 789 | lnet_route_t *rte; | ||
| 790 | |||
| 791 | if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS) != 0) { | ||
| 792 | list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) { | ||
| 793 | if (rte->lr_net == net) { | ||
| 794 | rte->lr_downis = 0; | ||
| 795 | break; | ||
| 796 | } | ||
| 797 | } | ||
| 798 | } | ||
| 799 | } | ||
| 800 | |||
| 786 | static void | 801 | static void |
| 787 | lnet_update_ni_status_locked(void) | 802 | lnet_update_ni_status_locked(void) |
| 788 | { | 803 | { |
