aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging
diff options
context:
space:
mode:
authorLiang Zhen <liang.zhen@intel.com>2015-02-01 21:52:06 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2015-02-07 04:31:10 -0500
commitaf3fa7c71bf61a4ff8e9203860c24795183f9da4 (patch)
tree6a9a7c643dfbeba781c351da42859e9f4ea9826e /drivers/staging
parent62e4941354c38c968474a909a5a89395ccff0067 (diff)
staging/lustre/lnet: peer aliveness status and NI status
A couple of changes to improve aliveness detection: - When LNet received a message, it can determine peer of this message is alive - When LNet received a message from remote network, it can determine router is alive and NI status on router is UP. Signed-off-by: Liang Zhen <liang.zhen@intel.com> Reviewed-on: http://review.whamcloud.com/12453 Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-5485 Reviewed-by: James Simmons <uja.ornl@gmail.com> Reviewed-by: Isaac Huang <he.huang@intel.com> Signed-off-by: Oleg Drokin <oleg.drokin@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/staging')
-rw-r--r--drivers/staging/lustre/include/linux/lnet/lib-lnet.h10
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-move.c13
-rw-r--r--drivers/staging/lustre/lnet/lnet/router.c17
3 files changed, 39 insertions, 1 deletions
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
index 99fb52a98dac..0038d29a37fe 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
@@ -636,6 +636,7 @@ lnet_net2rnethash(__u32 net)
636} 636}
637 637
638extern lnd_t the_lolnd; 638extern lnd_t the_lolnd;
639extern int avoid_asym_router_failure;
639 640
640int lnet_cpt_of_nid_locked(lnet_nid_t nid); 641int lnet_cpt_of_nid_locked(lnet_nid_t nid);
641int lnet_cpt_of_nid(lnet_nid_t nid); 642int lnet_cpt_of_nid(lnet_nid_t nid);
@@ -851,6 +852,7 @@ int lnet_peer_buffer_credits(lnet_ni_t *ni);
851 852
852int lnet_router_checker_start(void); 853int lnet_router_checker_start(void);
853void lnet_router_checker_stop(void); 854void lnet_router_checker_stop(void);
855void lnet_router_ni_update_locked(lnet_peer_t *gw, __u32 net);
854void lnet_swap_pinginfo(lnet_ping_info_t *info); 856void lnet_swap_pinginfo(lnet_ping_info_t *info);
855 857
856int lnet_ping_target_init(void); 858int lnet_ping_target_init(void);
@@ -870,4 +872,12 @@ void lnet_peer_tables_destroy(void);
870int lnet_peer_tables_create(void); 872int lnet_peer_tables_create(void);
871void lnet_debug_peer(lnet_nid_t nid); 873void lnet_debug_peer(lnet_nid_t nid);
872 874
875static inline void lnet_peer_set_alive(lnet_peer_t *lp)
876{
877 lp->lp_last_alive = lp->lp_last_query = get_seconds();
878 if (!lp->lp_alive)
879 lnet_notify_locked(lp, 0, 1, lp->lp_last_alive);
880}
881
882
873#endif 883#endif
diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c
index ed6eec9bd2cc..0f53c761f1a9 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-move.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-move.c
@@ -1877,6 +1877,19 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid,
1877 goto drop; 1877 goto drop;
1878 } 1878 }
1879 1879
1880 if (lnet_isrouter(msg->msg_rxpeer)) {
1881 lnet_peer_set_alive(msg->msg_rxpeer);
1882 if (avoid_asym_router_failure &&
1883 LNET_NIDNET(src_nid) != LNET_NIDNET(from_nid)) {
1884 /* received a remote message from router, update
1885 * remote NI status on this router.
1886 * NB: multi-hop routed message will be ignored.
1887 */
1888 lnet_router_ni_update_locked(msg->msg_rxpeer,
1889 LNET_NIDNET(src_nid));
1890 }
1891 }
1892
1880 lnet_msg_commit(msg, cpt); 1893 lnet_msg_commit(msg, cpt);
1881 1894
1882 if (!for_me) { 1895 if (!for_me) {
diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c
index 1bbaa5bae5ab..52ec0ab7e3c3 100644
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ b/drivers/staging/lustre/lnet/lnet/router.c
@@ -84,7 +84,7 @@ static int check_routers_before_use;
84module_param(check_routers_before_use, int, 0444); 84module_param(check_routers_before_use, int, 0444);
85MODULE_PARM_DESC(check_routers_before_use, "Assume routers are down and ping them before use"); 85MODULE_PARM_DESC(check_routers_before_use, "Assume routers are down and ping them before use");
86 86
87static int avoid_asym_router_failure = 1; 87int avoid_asym_router_failure = 1;
88module_param(avoid_asym_router_failure, int, 0644); 88module_param(avoid_asym_router_failure, int, 0644);
89MODULE_PARM_DESC(avoid_asym_router_failure, "Avoid asymmetrical router failures (0 to disable)"); 89MODULE_PARM_DESC(avoid_asym_router_failure, "Avoid asymmetrical router failures (0 to disable)");
90 90
@@ -783,6 +783,21 @@ lnet_wait_known_routerstate(void)
783 } 783 }
784} 784}
785 785
786void
787lnet_router_ni_update_locked(lnet_peer_t *gw, __u32 net)
788{
789 lnet_route_t *rte;
790
791 if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS) != 0) {
792 list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) {
793 if (rte->lr_net == net) {
794 rte->lr_downis = 0;
795 break;
796 }
797 }
798 }
799}
800
786static void 801static void
787lnet_update_ni_status_locked(void) 802lnet_update_ni_status_locked(void)
788{ 803{