aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSean Hefty <sean.hefty@intel.com>2007-06-18 14:09:37 -0400
committerRoland Dreier <rolandd@cisco.com>2007-07-11 00:50:05 -0400
commit1d84612649427a85e1f311baa7215f9a6252d856 (patch)
treecbb9d614cb56c12f76e438f3f05ca9559a2dee3a
parent24be6e81c78314c91a47200272eb4bc31284bd7b (diff)
IB/cm: Include HCA ACK delay in local ACK timeout
The IB CM should include the HCA ACK delay when calculating the local ACK timeout value to use for RC QPs. If the HCA ACK delay is large enough relative to the packet life time, then if it is not taken into account, the calculated timeout value ends up being too small, which can result in "retry exceeded" errors. Signed-off-by: Sean Hefty <sean.hefty@intel.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/core/cm.c71
-rw-r--r--drivers/infiniband/core/cma.c1
-rw-r--r--drivers/infiniband/core/ucm.c1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c1
-rw-r--r--include/rdma/ib_cm.h1
5 files changed, 57 insertions, 18 deletions
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 16181d655854..c7007c45889a 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -87,6 +87,7 @@ struct cm_port {
87struct cm_device { 87struct cm_device {
88 struct list_head list; 88 struct list_head list;
89 struct ib_device *device; 89 struct ib_device *device;
90 u8 ack_delay;
90 struct cm_port port[0]; 91 struct cm_port port[0];
91}; 92};
92 93
@@ -95,7 +96,7 @@ struct cm_av {
95 union ib_gid dgid; 96 union ib_gid dgid;
96 struct ib_ah_attr ah_attr; 97 struct ib_ah_attr ah_attr;
97 u16 pkey_index; 98 u16 pkey_index;
98 u8 packet_life_time; 99 u8 timeout;
99}; 100};
100 101
101struct cm_work { 102struct cm_work {
@@ -154,6 +155,7 @@ struct cm_id_private {
154 u8 retry_count; 155 u8 retry_count;
155 u8 rnr_retry_count; 156 u8 rnr_retry_count;
156 u8 service_timeout; 157 u8 service_timeout;
158 u8 target_ack_delay;
157 159
158 struct list_head work_list; 160 struct list_head work_list;
159 atomic_t work_count; 161 atomic_t work_count;
@@ -293,7 +295,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
293 av->port = port; 295 av->port = port;
294 ib_init_ah_from_path(cm_dev->device, port->port_num, path, 296 ib_init_ah_from_path(cm_dev->device, port->port_num, path,
295 &av->ah_attr); 297 &av->ah_attr);
296 av->packet_life_time = path->packet_life_time; 298 av->timeout = path->packet_life_time + 1;
297 return 0; 299 return 0;
298} 300}
299 301
@@ -643,6 +645,25 @@ static inline int cm_convert_to_ms(int iba_time)
643 return 1 << max(iba_time - 8, 0); 645 return 1 << max(iba_time - 8, 0);
644} 646}
645 647
648/*
649 * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
650 * Because of how ack_timeout is stored, adding one doubles the timeout.
651 * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
652 * increment it (round up) only if the other is within 50%.
653 */
654static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
655{
656 int ack_timeout = packet_life_time + 1;
657
658 if (ack_timeout >= ca_ack_delay)
659 ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
660 else
661 ack_timeout = ca_ack_delay +
662 (ack_timeout >= (ca_ack_delay - 1));
663
664 return min(31, ack_timeout);
665}
666
646static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) 667static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
647{ 668{
648 if (timewait_info->inserted_remote_id) { 669 if (timewait_info->inserted_remote_id) {
@@ -686,7 +707,7 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
686 * timewait before notifying the user that we've exited timewait. 707 * timewait before notifying the user that we've exited timewait.
687 */ 708 */
688 cm_id_priv->id.state = IB_CM_TIMEWAIT; 709 cm_id_priv->id.state = IB_CM_TIMEWAIT;
689 wait_time = cm_convert_to_ms(cm_id_priv->av.packet_life_time + 1); 710 wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
690 queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, 711 queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
691 msecs_to_jiffies(wait_time)); 712 msecs_to_jiffies(wait_time));
692 cm_id_priv->timewait_info = NULL; 713 cm_id_priv->timewait_info = NULL;
@@ -908,7 +929,8 @@ static void cm_format_req(struct cm_req_msg *req_msg,
908 cm_req_set_primary_sl(req_msg, param->primary_path->sl); 929 cm_req_set_primary_sl(req_msg, param->primary_path->sl);
909 cm_req_set_primary_subnet_local(req_msg, 1); /* local only... */ 930 cm_req_set_primary_subnet_local(req_msg, 1); /* local only... */
910 cm_req_set_primary_local_ack_timeout(req_msg, 931 cm_req_set_primary_local_ack_timeout(req_msg,
911 min(31, param->primary_path->packet_life_time + 1)); 932 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
933 param->primary_path->packet_life_time));
912 934
913 if (param->alternate_path) { 935 if (param->alternate_path) {
914 req_msg->alt_local_lid = param->alternate_path->slid; 936 req_msg->alt_local_lid = param->alternate_path->slid;
@@ -923,7 +945,8 @@ static void cm_format_req(struct cm_req_msg *req_msg,
923 cm_req_set_alt_sl(req_msg, param->alternate_path->sl); 945 cm_req_set_alt_sl(req_msg, param->alternate_path->sl);
924 cm_req_set_alt_subnet_local(req_msg, 1); /* local only... */ 946 cm_req_set_alt_subnet_local(req_msg, 1); /* local only... */
925 cm_req_set_alt_local_ack_timeout(req_msg, 947 cm_req_set_alt_local_ack_timeout(req_msg,
926 min(31, param->alternate_path->packet_life_time + 1)); 948 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
949 param->alternate_path->packet_life_time));
927 } 950 }
928 951
929 if (param->private_data && param->private_data_len) 952 if (param->private_data && param->private_data_len)
@@ -1433,7 +1456,8 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
1433 cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn)); 1456 cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
1434 rep_msg->resp_resources = param->responder_resources; 1457 rep_msg->resp_resources = param->responder_resources;
1435 rep_msg->initiator_depth = param->initiator_depth; 1458 rep_msg->initiator_depth = param->initiator_depth;
1436 cm_rep_set_target_ack_delay(rep_msg, param->target_ack_delay); 1459 cm_rep_set_target_ack_delay(rep_msg,
1460 cm_id_priv->av.port->cm_dev->ack_delay);
1437 cm_rep_set_failover(rep_msg, param->failover_accepted); 1461 cm_rep_set_failover(rep_msg, param->failover_accepted);
1438 cm_rep_set_flow_ctrl(rep_msg, param->flow_control); 1462 cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
1439 cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count); 1463 cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
@@ -1680,6 +1704,13 @@ static int cm_rep_handler(struct cm_work *work)
1680 cm_id_priv->responder_resources = rep_msg->initiator_depth; 1704 cm_id_priv->responder_resources = rep_msg->initiator_depth;
1681 cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg); 1705 cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
1682 cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); 1706 cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1707 cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1708 cm_id_priv->av.timeout =
1709 cm_ack_timeout(cm_id_priv->target_ack_delay,
1710 cm_id_priv->av.timeout - 1);
1711 cm_id_priv->alt_av.timeout =
1712 cm_ack_timeout(cm_id_priv->target_ack_delay,
1713 cm_id_priv->alt_av.timeout - 1);
1683 1714
1684 /* todo: handle peer_to_peer */ 1715 /* todo: handle peer_to_peer */
1685 1716
@@ -2291,7 +2322,7 @@ static int cm_mra_handler(struct cm_work *work)
2291 work->cm_event.param.mra_rcvd.service_timeout = 2322 work->cm_event.param.mra_rcvd.service_timeout =
2292 cm_mra_get_service_timeout(mra_msg); 2323 cm_mra_get_service_timeout(mra_msg);
2293 timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + 2324 timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
2294 cm_convert_to_ms(cm_id_priv->av.packet_life_time); 2325 cm_convert_to_ms(cm_id_priv->av.timeout);
2295 2326
2296 spin_lock_irq(&cm_id_priv->lock); 2327 spin_lock_irq(&cm_id_priv->lock);
2297 switch (cm_id_priv->id.state) { 2328 switch (cm_id_priv->id.state) {
@@ -2363,7 +2394,8 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg,
2363 cm_lap_set_sl(lap_msg, alternate_path->sl); 2394 cm_lap_set_sl(lap_msg, alternate_path->sl);
2364 cm_lap_set_subnet_local(lap_msg, 1); /* local only... */ 2395 cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
2365 cm_lap_set_local_ack_timeout(lap_msg, 2396 cm_lap_set_local_ack_timeout(lap_msg,
2366 min(31, alternate_path->packet_life_time + 1)); 2397 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
2398 alternate_path->packet_life_time));
2367 2399
2368 if (private_data && private_data_len) 2400 if (private_data && private_data_len)
2369 memcpy(lap_msg->private_data, private_data, private_data_len); 2401 memcpy(lap_msg->private_data, private_data, private_data_len);
@@ -2394,6 +2426,9 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
2394 ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av); 2426 ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
2395 if (ret) 2427 if (ret)
2396 goto out; 2428 goto out;
2429 cm_id_priv->alt_av.timeout =
2430 cm_ack_timeout(cm_id_priv->target_ack_delay,
2431 cm_id_priv->alt_av.timeout - 1);
2397 2432
2398 ret = cm_alloc_msg(cm_id_priv, &msg); 2433 ret = cm_alloc_msg(cm_id_priv, &msg);
2399 if (ret) 2434 if (ret)
@@ -3248,8 +3283,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
3248 *qp_attr_mask |= IB_QP_ALT_PATH; 3283 *qp_attr_mask |= IB_QP_ALT_PATH;
3249 qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; 3284 qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3250 qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; 3285 qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3251 qp_attr->alt_timeout = 3286 qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3252 cm_id_priv->alt_av.packet_life_time + 1;
3253 qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; 3287 qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3254 } 3288 }
3255 ret = 0; 3289 ret = 0;
@@ -3287,8 +3321,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
3287 *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | 3321 *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
3288 IB_QP_RNR_RETRY | 3322 IB_QP_RNR_RETRY |
3289 IB_QP_MAX_QP_RD_ATOMIC; 3323 IB_QP_MAX_QP_RD_ATOMIC;
3290 qp_attr->timeout = 3324 qp_attr->timeout = cm_id_priv->av.timeout;
3291 cm_id_priv->av.packet_life_time + 1;
3292 qp_attr->retry_cnt = cm_id_priv->retry_count; 3325 qp_attr->retry_cnt = cm_id_priv->retry_count;
3293 qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; 3326 qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
3294 qp_attr->max_rd_atomic = 3327 qp_attr->max_rd_atomic =
@@ -3302,8 +3335,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
3302 *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE; 3335 *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
3303 qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; 3336 qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3304 qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; 3337 qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3305 qp_attr->alt_timeout = 3338 qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3306 cm_id_priv->alt_av.packet_life_time + 1;
3307 qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; 3339 qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3308 qp_attr->path_mig_state = IB_MIG_REARM; 3340 qp_attr->path_mig_state = IB_MIG_REARM;
3309 } 3341 }
@@ -3343,6 +3375,16 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
3343} 3375}
3344EXPORT_SYMBOL(ib_cm_init_qp_attr); 3376EXPORT_SYMBOL(ib_cm_init_qp_attr);
3345 3377
3378void cm_get_ack_delay(struct cm_device *cm_dev)
3379{
3380 struct ib_device_attr attr;
3381
3382 if (ib_query_device(cm_dev->device, &attr))
3383 cm_dev->ack_delay = 0; /* acks will rely on packet life time */
3384 else
3385 cm_dev->ack_delay = attr.local_ca_ack_delay;
3386}
3387
3346static void cm_add_one(struct ib_device *device) 3388static void cm_add_one(struct ib_device *device)
3347{ 3389{
3348 struct cm_device *cm_dev; 3390 struct cm_device *cm_dev;
@@ -3367,6 +3409,7 @@ static void cm_add_one(struct ib_device *device)
3367 return; 3409 return;
3368 3410
3369 cm_dev->device = device; 3411 cm_dev->device = device;
3412 cm_get_ack_delay(cm_dev);
3370 3413
3371 set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); 3414 set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
3372 for (i = 1; i <= device->phys_port_cnt; i++) { 3415 for (i = 1; i <= device->phys_port_cnt; i++) {
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 32a0e66d2a23..23af7a032a03 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2326,7 +2326,6 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
2326 rep.private_data_len = conn_param->private_data_len; 2326 rep.private_data_len = conn_param->private_data_len;
2327 rep.responder_resources = conn_param->responder_resources; 2327 rep.responder_resources = conn_param->responder_resources;
2328 rep.initiator_depth = conn_param->initiator_depth; 2328 rep.initiator_depth = conn_param->initiator_depth;
2329 rep.target_ack_delay = CMA_CM_RESPONSE_TIMEOUT;
2330 rep.failover_accepted = 0; 2329 rep.failover_accepted = 0;
2331 rep.flow_control = conn_param->flow_control; 2330 rep.flow_control = conn_param->flow_control;
2332 rep.rnr_retry_count = conn_param->rnr_retry_count; 2331 rep.rnr_retry_count = conn_param->rnr_retry_count;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 2586a3ee8eba..424983f5b1ee 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -823,7 +823,6 @@ static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file,
823 param.private_data_len = cmd.len; 823 param.private_data_len = cmd.len;
824 param.responder_resources = cmd.responder_resources; 824 param.responder_resources = cmd.responder_resources;
825 param.initiator_depth = cmd.initiator_depth; 825 param.initiator_depth = cmd.initiator_depth;
826 param.target_ack_delay = cmd.target_ack_delay;
827 param.failover_accepted = cmd.failover_accepted; 826 param.failover_accepted = cmd.failover_accepted;
828 param.flow_control = cmd.flow_control; 827 param.flow_control = cmd.flow_control;
829 param.rnr_retry_count = cmd.rnr_retry_count; 828 param.rnr_retry_count = cmd.rnr_retry_count;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 6764d216c887..08b4676a3820 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -281,7 +281,6 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
281 rep.private_data_len = sizeof data; 281 rep.private_data_len = sizeof data;
282 rep.flow_control = 0; 282 rep.flow_control = 0;
283 rep.rnr_retry_count = req->rnr_retry_count; 283 rep.rnr_retry_count = req->rnr_retry_count;
284 rep.target_ack_delay = 20; /* FIXME */
285 rep.srq = 1; 284 rep.srq = 1;
286 rep.qp_num = qp->qp_num; 285 rep.qp_num = qp->qp_num;
287 rep.starting_psn = psn; 286 rep.starting_psn = psn;
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 5c070176d9ab..12243e80c706 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -385,7 +385,6 @@ struct ib_cm_rep_param {
385 u8 private_data_len; 385 u8 private_data_len;
386 u8 responder_resources; 386 u8 responder_resources;
387 u8 initiator_depth; 387 u8 initiator_depth;
388 u8 target_ack_delay;
389 u8 failover_accepted; 388 u8 failover_accepted;
390 u8 flow_control; 389 u8 flow_control;
391 u8 rnr_retry_count; 390 u8 rnr_retry_count;