aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShiraz Saleem <shiraz.saleem@intel.com>2017-09-19 10:19:13 -0400
committerDoug Ledford <dledford@redhat.com>2017-09-22 13:43:36 -0400
commitf16dc0aa5ea20a2cf173e82ade5f05bfecaa850a (patch)
tree5c8c786dd3a266b2596011d0e2fae433c66fc32e
parentdfc612b3407e88913a58db00b3bca93685d4f4f9 (diff)
i40iw: Add support for port reuse on active side connections
During OpenMPI scale up testing, we observe rdma_connect failures if ports are reused on multiple connections. This is because the Control Queue-Pair (CQP) command to add the reused port to Accelerated Port Bit VectorTable (APBVT) fails as there already exists an entry. Check for duplicate port before invoking the CQP command to add APBVT entry and delete the entry only if the port is not in use. Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c151
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.h3
2 files changed, 78 insertions, 76 deletions
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index b7215448bb63..5230dd3c938c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -1504,23 +1504,40 @@ static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core,
1504} 1504}
1505 1505
1506/** 1506/**
1507 * listen_port_in_use - determine if port is in use 1507 * i40iw_port_in_use - determine if port is in use
1508 * @port: Listen port number 1508 * @port: port number
1509 * @active_side: flag for listener side vs active side
1509 */ 1510 */
1510static bool i40iw_listen_port_in_use(struct i40iw_cm_core *cm_core, u16 port) 1511static bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port, bool active_side)
1511{ 1512{
1512 struct i40iw_cm_listener *listen_node; 1513 struct i40iw_cm_listener *listen_node;
1514 struct i40iw_cm_node *cm_node;
1513 unsigned long flags; 1515 unsigned long flags;
1514 bool ret = false; 1516 bool ret = false;
1515 1517
1516 spin_lock_irqsave(&cm_core->listen_list_lock, flags); 1518 if (active_side) {
1517 list_for_each_entry(listen_node, &cm_core->listen_nodes, list) { 1519 /* search connected node list */
1518 if (listen_node->loc_port == port) { 1520 spin_lock_irqsave(&cm_core->ht_lock, flags);
1519 ret = true; 1521 list_for_each_entry(cm_node, &cm_core->connected_nodes, list) {
1520 break; 1522 if (cm_node->loc_port == port) {
1523 ret = true;
1524 break;
1525 }
1526 }
1527 if (!ret)
1528 clear_bit(port, cm_core->active_side_ports);
1529 spin_unlock_irqrestore(&cm_core->ht_lock, flags);
1530 } else {
1531 spin_lock_irqsave(&cm_core->listen_list_lock, flags);
1532 list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
1533 if (listen_node->loc_port == port) {
1534 ret = true;
1535 break;
1536 }
1521 } 1537 }
1538 spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
1522 } 1539 }
1523 spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); 1540
1524 return ret; 1541 return ret;
1525} 1542}
1526 1543
@@ -1868,7 +1885,7 @@ static int i40iw_dec_refcnt_listen(struct i40iw_cm_core *cm_core,
1868 spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); 1885 spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
1869 1886
1870 if (listener->iwdev) { 1887 if (listener->iwdev) {
1871 if (apbvt_del && !i40iw_listen_port_in_use(cm_core, listener->loc_port)) 1888 if (apbvt_del && !i40iw_port_in_use(cm_core, listener->loc_port, false))
1872 i40iw_manage_apbvt(listener->iwdev, 1889 i40iw_manage_apbvt(listener->iwdev,
1873 listener->loc_port, 1890 listener->loc_port,
1874 I40IW_MANAGE_APBVT_DEL); 1891 I40IW_MANAGE_APBVT_DEL);
@@ -2247,21 +2264,21 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node)
2247 if (cm_node->listener) { 2264 if (cm_node->listener) {
2248 i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true); 2265 i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
2249 } else { 2266 } else {
2250 if (!i40iw_listen_port_in_use(cm_core, cm_node->loc_port) && 2267 if (!i40iw_port_in_use(cm_core, cm_node->loc_port, true) && cm_node->apbvt_set) {
2251 cm_node->apbvt_set) {
2252 i40iw_manage_apbvt(cm_node->iwdev, 2268 i40iw_manage_apbvt(cm_node->iwdev,
2253 cm_node->loc_port, 2269 cm_node->loc_port,
2254 I40IW_MANAGE_APBVT_DEL); 2270 I40IW_MANAGE_APBVT_DEL);
2255 i40iw_get_addr_info(cm_node, &nfo); 2271 cm_node->apbvt_set = 0;
2256 if (cm_node->qhash_set) { 2272 }
2257 i40iw_manage_qhash(cm_node->iwdev, 2273 i40iw_get_addr_info(cm_node, &nfo);
2258 &nfo, 2274 if (cm_node->qhash_set) {
2259 I40IW_QHASH_TYPE_TCP_ESTABLISHED, 2275 i40iw_manage_qhash(cm_node->iwdev,
2260 I40IW_QHASH_MANAGE_TYPE_DELETE, 2276 &nfo,
2261 NULL, 2277 I40IW_QHASH_TYPE_TCP_ESTABLISHED,
2262 false); 2278 I40IW_QHASH_MANAGE_TYPE_DELETE,
2263 cm_node->qhash_set = 0; 2279 NULL,
2264 } 2280 false);
2281 cm_node->qhash_set = 0;
2265 } 2282 }
2266 } 2283 }
2267 2284
@@ -3738,10 +3755,8 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3738 struct sockaddr_in *raddr; 3755 struct sockaddr_in *raddr;
3739 struct sockaddr_in6 *laddr6; 3756 struct sockaddr_in6 *laddr6;
3740 struct sockaddr_in6 *raddr6; 3757 struct sockaddr_in6 *raddr6;
3741 bool qhash_set = false; 3758 int ret = 0;
3742 int apbvt_set = 0; 3759 unsigned long flags;
3743 int err = 0;
3744 enum i40iw_status_code status;
3745 3760
3746 ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn); 3761 ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
3747 if (!ibqp) 3762 if (!ibqp)
@@ -3790,32 +3805,6 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3790 cm_info.user_pri = rt_tos2priority(cm_id->tos); 3805 cm_info.user_pri = rt_tos2priority(cm_id->tos);
3791 i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "%s TOS:[%d] UP:[%d]\n", 3806 i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "%s TOS:[%d] UP:[%d]\n",
3792 __func__, cm_id->tos, cm_info.user_pri); 3807 __func__, cm_id->tos, cm_info.user_pri);
3793 if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) ||
3794 (!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32,
3795 raddr6->sin6_addr.in6_u.u6_addr32,
3796 sizeof(laddr6->sin6_addr.in6_u.u6_addr32)))) {
3797 status = i40iw_manage_qhash(iwdev,
3798 &cm_info,
3799 I40IW_QHASH_TYPE_TCP_ESTABLISHED,
3800 I40IW_QHASH_MANAGE_TYPE_ADD,
3801 NULL,
3802 true);
3803 if (status)
3804 return -EINVAL;
3805 qhash_set = true;
3806 }
3807 status = i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD);
3808 if (status) {
3809 i40iw_manage_qhash(iwdev,
3810 &cm_info,
3811 I40IW_QHASH_TYPE_TCP_ESTABLISHED,
3812 I40IW_QHASH_MANAGE_TYPE_DELETE,
3813 NULL,
3814 false);
3815 return -EINVAL;
3816 }
3817
3818 apbvt_set = 1;
3819 cm_id->add_ref(cm_id); 3808 cm_id->add_ref(cm_id);
3820 cm_node = i40iw_create_cm_node(&iwdev->cm_core, iwdev, 3809 cm_node = i40iw_create_cm_node(&iwdev->cm_core, iwdev,
3821 conn_param->private_data_len, 3810 conn_param->private_data_len,
@@ -3823,17 +3812,40 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3823 &cm_info); 3812 &cm_info);
3824 3813
3825 if (IS_ERR(cm_node)) { 3814 if (IS_ERR(cm_node)) {
3826 err = PTR_ERR(cm_node); 3815 ret = PTR_ERR(cm_node);
3827 goto err_out; 3816 cm_id->rem_ref(cm_id);
3817 return ret;
3818 }
3819
3820 if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) ||
3821 (!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32,
3822 raddr6->sin6_addr.in6_u.u6_addr32,
3823 sizeof(laddr6->sin6_addr.in6_u.u6_addr32)))) {
3824 if (i40iw_manage_qhash(iwdev, &cm_info, I40IW_QHASH_TYPE_TCP_ESTABLISHED,
3825 I40IW_QHASH_MANAGE_TYPE_ADD, NULL, true)) {
3826 ret = -EINVAL;
3827 goto err;
3828 }
3829 cm_node->qhash_set = true;
3828 } 3830 }
3829 3831
3832 spin_lock_irqsave(&iwdev->cm_core.ht_lock, flags);
3833 if (!test_and_set_bit(cm_info.loc_port, iwdev->cm_core.active_side_ports)) {
3834 spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
3835 if (i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD)) {
3836 ret = -EINVAL;
3837 goto err;
3838 }
3839 } else {
3840 spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
3841 }
3842
3843 cm_node->apbvt_set = true;
3830 i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord); 3844 i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
3831 if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO && 3845 if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
3832 !cm_node->ord_size) 3846 !cm_node->ord_size)
3833 cm_node->ord_size = 1; 3847 cm_node->ord_size = 1;
3834 3848
3835 cm_node->apbvt_set = apbvt_set;
3836 cm_node->qhash_set = qhash_set;
3837 iwqp->cm_node = cm_node; 3849 iwqp->cm_node = cm_node;
3838 cm_node->iwqp = iwqp; 3850 cm_node->iwqp = iwqp;
3839 iwqp->cm_id = cm_id; 3851 iwqp->cm_id = cm_id;
@@ -3841,11 +3853,9 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3841 3853
3842 if (cm_node->state != I40IW_CM_STATE_OFFLOADED) { 3854 if (cm_node->state != I40IW_CM_STATE_OFFLOADED) {
3843 cm_node->state = I40IW_CM_STATE_SYN_SENT; 3855 cm_node->state = I40IW_CM_STATE_SYN_SENT;
3844 err = i40iw_send_syn(cm_node, 0); 3856 ret = i40iw_send_syn(cm_node, 0);
3845 if (err) { 3857 if (ret)
3846 i40iw_rem_ref_cm_node(cm_node); 3858 goto err;
3847 goto err_out;
3848 }
3849 } 3859 }
3850 3860
3851 i40iw_debug(cm_node->dev, 3861 i40iw_debug(cm_node->dev,
@@ -3854,9 +3864,10 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3854 cm_node->rem_port, 3864 cm_node->rem_port,
3855 cm_node, 3865 cm_node,
3856 cm_node->cm_id); 3866 cm_node->cm_id);
3867
3857 return 0; 3868 return 0;
3858 3869
3859err_out: 3870err:
3860 if (cm_info.ipv4) 3871 if (cm_info.ipv4)
3861 i40iw_debug(&iwdev->sc_dev, 3872 i40iw_debug(&iwdev->sc_dev,
3862 I40IW_DEBUG_CM, 3873 I40IW_DEBUG_CM,
@@ -3868,22 +3879,10 @@ err_out:
3868 "Api - connect() FAILED: dest addr=%pI6", 3879 "Api - connect() FAILED: dest addr=%pI6",
3869 cm_info.rem_addr); 3880 cm_info.rem_addr);
3870 3881
3871 if (qhash_set) 3882 i40iw_rem_ref_cm_node(cm_node);
3872 i40iw_manage_qhash(iwdev,
3873 &cm_info,
3874 I40IW_QHASH_TYPE_TCP_ESTABLISHED,
3875 I40IW_QHASH_MANAGE_TYPE_DELETE,
3876 NULL,
3877 false);
3878
3879 if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core,
3880 cm_info.loc_port))
3881 i40iw_manage_apbvt(iwdev,
3882 cm_info.loc_port,
3883 I40IW_MANAGE_APBVT_DEL);
3884 cm_id->rem_ref(cm_id); 3883 cm_id->rem_ref(cm_id);
3885 iwdev->cm_core.stats_connect_errs++; 3884 iwdev->cm_core.stats_connect_errs++;
3886 return err; 3885 return ret;
3887} 3886}
3888 3887
3889/** 3888/**
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
index 8626e7f1fdd3..45abef76295b 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h
@@ -71,6 +71,7 @@
71#define I40IW_HW_IRD_SETTING_32 32 71#define I40IW_HW_IRD_SETTING_32 32
72#define I40IW_HW_IRD_SETTING_64 64 72#define I40IW_HW_IRD_SETTING_64 64
73 73
74#define MAX_PORTS 65536
74#define I40IW_VLAN_PRIO_SHIFT 13 75#define I40IW_VLAN_PRIO_SHIFT 13
75 76
76enum ietf_mpa_flags { 77enum ietf_mpa_flags {
@@ -413,6 +414,8 @@ struct i40iw_cm_core {
413 spinlock_t ht_lock; /* manage hash table */ 414 spinlock_t ht_lock; /* manage hash table */
414 spinlock_t listen_list_lock; /* listen list */ 415 spinlock_t listen_list_lock; /* listen list */
415 416
417 unsigned long active_side_ports[BITS_TO_LONGS(MAX_PORTS)];
418
416 u64 stats_nodes_created; 419 u64 stats_nodes_created;
417 u64 stats_nodes_destroyed; 420 u64 stats_nodes_destroyed;
418 u64 stats_listen_created; 421 u64 stats_listen_created;