diff options
| -rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib.h | 42 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_cm.c | 203 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 2 |
3 files changed, 211 insertions, 36 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 93d4a9a1e1dd..a0b3782c7625 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h | |||
| @@ -132,12 +132,46 @@ struct ipoib_cm_data { | |||
| 132 | __be32 mtu; | 132 | __be32 mtu; |
| 133 | }; | 133 | }; |
| 134 | 134 | ||
| 135 | /* | ||
| 136 | * Quoting 10.3.1 Queue Pair and EE Context States: | ||
| 137 | * | ||
| 138 | * Note, for QPs that are associated with an SRQ, the Consumer should take the | ||
| 139 | * QP through the Error State before invoking a Destroy QP or a Modify QP to the | ||
| 140 | * Reset State. The Consumer may invoke the Destroy QP without first performing | ||
| 141 | * a Modify QP to the Error State and waiting for the Affiliated Asynchronous | ||
| 142 | * Last WQE Reached Event. However, if the Consumer does not wait for the | ||
| 143 | * Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment | ||
| 144 | * leakage may occur. Therefore, it is good programming practice to tear down a | ||
| 145 | * QP that is associated with an SRQ by using the following process: | ||
| 146 | * | ||
| 147 | * - Put the QP in the Error State | ||
| 148 | * - Wait for the Affiliated Asynchronous Last WQE Reached Event; | ||
| 149 | * - either: | ||
| 150 | * drain the CQ by invoking the Poll CQ verb and either wait for CQ | ||
| 151 | * to be empty or the number of Poll CQ operations has exceeded | ||
| 152 | * CQ capacity size; | ||
| 153 | * - or | ||
| 154 | * post another WR that completes on the same CQ and wait for this | ||
| 155 | * WR to return as a WC; | ||
| 156 | * - and then invoke a Destroy QP or Reset QP. | ||
| 157 | * | ||
| 158 | * We use the second option and wait for a completion on the | ||
| 159 | * rx_drain_qp before destroying QPs attached to our SRQ. | ||
| 160 | */ | ||
| 161 | |||
| 162 | enum ipoib_cm_state { | ||
| 163 | IPOIB_CM_RX_LIVE, | ||
| 164 | IPOIB_CM_RX_ERROR, /* Ignored by stale task */ | ||
| 165 | IPOIB_CM_RX_FLUSH /* Last WQE Reached event observed */ | ||
| 166 | }; | ||
| 167 | |||
| 135 | struct ipoib_cm_rx { | 168 | struct ipoib_cm_rx { |
| 136 | struct ib_cm_id *id; | 169 | struct ib_cm_id *id; |
| 137 | struct ib_qp *qp; | 170 | struct ib_qp *qp; |
| 138 | struct list_head list; | 171 | struct list_head list; |
| 139 | struct net_device *dev; | 172 | struct net_device *dev; |
| 140 | unsigned long jiffies; | 173 | unsigned long jiffies; |
| 174 | enum ipoib_cm_state state; | ||
| 141 | }; | 175 | }; |
| 142 | 176 | ||
| 143 | struct ipoib_cm_tx { | 177 | struct ipoib_cm_tx { |
| @@ -165,10 +199,16 @@ struct ipoib_cm_dev_priv { | |||
| 165 | struct ib_srq *srq; | 199 | struct ib_srq *srq; |
| 166 | struct ipoib_cm_rx_buf *srq_ring; | 200 | struct ipoib_cm_rx_buf *srq_ring; |
| 167 | struct ib_cm_id *id; | 201 | struct ib_cm_id *id; |
| 168 | struct list_head passive_ids; | 202 | struct ib_qp *rx_drain_qp; /* generates WR described in 10.3.1 */ |
| 203 | struct list_head passive_ids; /* state: LIVE */ | ||
| 204 | struct list_head rx_error_list; /* state: ERROR */ | ||
| 205 | struct list_head rx_flush_list; /* state: FLUSH, drain not started */ | ||
| 206 | struct list_head rx_drain_list; /* state: FLUSH, drain started */ | ||
| 207 | struct list_head rx_reap_list; /* state: FLUSH, drain done */ | ||
| 169 | struct work_struct start_task; | 208 | struct work_struct start_task; |
| 170 | struct work_struct reap_task; | 209 | struct work_struct reap_task; |
| 171 | struct work_struct skb_task; | 210 | struct work_struct skb_task; |
| 211 | struct work_struct rx_reap_task; | ||
| 172 | struct delayed_work stale_task; | 212 | struct delayed_work stale_task; |
| 173 | struct sk_buff_head skb_queue; | 213 | struct sk_buff_head skb_queue; |
| 174 | struct list_head start_list; | 214 | struct list_head start_list; |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index eec833b81e9b..ffec794b7913 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c | |||
| @@ -37,6 +37,7 @@ | |||
| 37 | #include <net/dst.h> | 37 | #include <net/dst.h> |
| 38 | #include <net/icmp.h> | 38 | #include <net/icmp.h> |
| 39 | #include <linux/icmpv6.h> | 39 | #include <linux/icmpv6.h> |
| 40 | #include <linux/delay.h> | ||
| 40 | 41 | ||
| 41 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA | 42 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA |
| 42 | static int data_debug_level; | 43 | static int data_debug_level; |
| @@ -62,6 +63,16 @@ struct ipoib_cm_id { | |||
| 62 | u32 remote_mtu; | 63 | u32 remote_mtu; |
| 63 | }; | 64 | }; |
| 64 | 65 | ||
| 66 | static struct ib_qp_attr ipoib_cm_err_attr = { | ||
| 67 | .qp_state = IB_QPS_ERR | ||
| 68 | }; | ||
| 69 | |||
| 70 | #define IPOIB_CM_RX_DRAIN_WRID 0x7fffffff | ||
| 71 | |||
| 72 | static struct ib_recv_wr ipoib_cm_rx_drain_wr = { | ||
| 73 | .wr_id = IPOIB_CM_RX_DRAIN_WRID | ||
| 74 | }; | ||
| 75 | |||
| 65 | static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, | 76 | static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, |
| 66 | struct ib_cm_event *event); | 77 | struct ib_cm_event *event); |
| 67 | 78 | ||
| @@ -150,11 +161,44 @@ partial_error: | |||
| 150 | return NULL; | 161 | return NULL; |
| 151 | } | 162 | } |
| 152 | 163 | ||
| 164 | static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv* priv) | ||
| 165 | { | ||
| 166 | struct ib_recv_wr *bad_wr; | ||
| 167 | |||
| 168 | /* rx_drain_qp send queue depth is 1, so | ||
| 169 | * make sure we have at most 1 outstanding WR. */ | ||
| 170 | if (list_empty(&priv->cm.rx_flush_list) || | ||
| 171 | !list_empty(&priv->cm.rx_drain_list)) | ||
| 172 | return; | ||
| 173 | |||
| 174 | if (ib_post_recv(priv->cm.rx_drain_qp, &ipoib_cm_rx_drain_wr, &bad_wr)) | ||
| 175 | ipoib_warn(priv, "failed to post rx_drain wr\n"); | ||
| 176 | |||
| 177 | list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list); | ||
| 178 | } | ||
| 179 | |||
| 180 | static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx) | ||
| 181 | { | ||
| 182 | struct ipoib_cm_rx *p = ctx; | ||
| 183 | struct ipoib_dev_priv *priv = netdev_priv(p->dev); | ||
| 184 | unsigned long flags; | ||
| 185 | |||
| 186 | if (event->event != IB_EVENT_QP_LAST_WQE_REACHED) | ||
| 187 | return; | ||
| 188 | |||
| 189 | spin_lock_irqsave(&priv->lock, flags); | ||
| 190 | list_move(&p->list, &priv->cm.rx_flush_list); | ||
| 191 | p->state = IPOIB_CM_RX_FLUSH; | ||
| 192 | ipoib_cm_start_rx_drain(priv); | ||
| 193 | spin_unlock_irqrestore(&priv->lock, flags); | ||
| 194 | } | ||
| 195 | |||
| 153 | static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, | 196 | static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, |
| 154 | struct ipoib_cm_rx *p) | 197 | struct ipoib_cm_rx *p) |
| 155 | { | 198 | { |
| 156 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 199 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
| 157 | struct ib_qp_init_attr attr = { | 200 | struct ib_qp_init_attr attr = { |
| 201 | .event_handler = ipoib_cm_rx_event_handler, | ||
| 158 | .send_cq = priv->cq, /* does not matter, we never send anything */ | 202 | .send_cq = priv->cq, /* does not matter, we never send anything */ |
| 159 | .recv_cq = priv->cq, | 203 | .recv_cq = priv->cq, |
| 160 | .srq = priv->cm.srq, | 204 | .srq = priv->cm.srq, |
| @@ -256,6 +300,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even | |||
| 256 | 300 | ||
| 257 | cm_id->context = p; | 301 | cm_id->context = p; |
| 258 | p->jiffies = jiffies; | 302 | p->jiffies = jiffies; |
| 303 | p->state = IPOIB_CM_RX_LIVE; | ||
| 259 | spin_lock_irq(&priv->lock); | 304 | spin_lock_irq(&priv->lock); |
| 260 | if (list_empty(&priv->cm.passive_ids)) | 305 | if (list_empty(&priv->cm.passive_ids)) |
| 261 | queue_delayed_work(ipoib_workqueue, | 306 | queue_delayed_work(ipoib_workqueue, |
| @@ -277,7 +322,6 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, | |||
| 277 | { | 322 | { |
| 278 | struct ipoib_cm_rx *p; | 323 | struct ipoib_cm_rx *p; |
| 279 | struct ipoib_dev_priv *priv; | 324 | struct ipoib_dev_priv *priv; |
| 280 | int ret; | ||
| 281 | 325 | ||
| 282 | switch (event->event) { | 326 | switch (event->event) { |
| 283 | case IB_CM_REQ_RECEIVED: | 327 | case IB_CM_REQ_RECEIVED: |
| @@ -289,20 +333,9 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, | |||
| 289 | case IB_CM_REJ_RECEIVED: | 333 | case IB_CM_REJ_RECEIVED: |
| 290 | p = cm_id->context; | 334 | p = cm_id->context; |
| 291 | priv = netdev_priv(p->dev); | 335 | priv = netdev_priv(p->dev); |
| 292 | spin_lock_irq(&priv->lock); | 336 | if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE)) |
| 293 | if (list_empty(&p->list)) | 337 | ipoib_warn(priv, "unable to move qp to error state\n"); |
| 294 | ret = 0; /* Connection is going away already. */ | 338 | /* Fall through */ |
| 295 | else { | ||
| 296 | list_del_init(&p->list); | ||
| 297 | ret = -ECONNRESET; | ||
| 298 | } | ||
| 299 | spin_unlock_irq(&priv->lock); | ||
| 300 | if (ret) { | ||
| 301 | ib_destroy_qp(p->qp); | ||
| 302 | kfree(p); | ||
| 303 | return ret; | ||
| 304 | } | ||
| 305 | return 0; | ||
| 306 | default: | 339 | default: |
| 307 | return 0; | 340 | return 0; |
| 308 | } | 341 | } |
| @@ -354,8 +387,15 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) | |||
| 354 | wr_id, wc->status); | 387 | wr_id, wc->status); |
| 355 | 388 | ||
| 356 | if (unlikely(wr_id >= ipoib_recvq_size)) { | 389 | if (unlikely(wr_id >= ipoib_recvq_size)) { |
| 357 | ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", | 390 | if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~IPOIB_CM_OP_SRQ)) { |
| 358 | wr_id, ipoib_recvq_size); | 391 | spin_lock_irqsave(&priv->lock, flags); |
| 392 | list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); | ||
| 393 | ipoib_cm_start_rx_drain(priv); | ||
| 394 | queue_work(ipoib_workqueue, &priv->cm.rx_reap_task); | ||
| 395 | spin_unlock_irqrestore(&priv->lock, flags); | ||
| 396 | } else | ||
| 397 | ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", | ||
| 398 | wr_id, ipoib_recvq_size); | ||
| 359 | return; | 399 | return; |
| 360 | } | 400 | } |
| 361 | 401 | ||
| @@ -374,9 +414,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) | |||
| 374 | if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { | 414 | if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { |
| 375 | spin_lock_irqsave(&priv->lock, flags); | 415 | spin_lock_irqsave(&priv->lock, flags); |
| 376 | p->jiffies = jiffies; | 416 | p->jiffies = jiffies; |
| 377 | /* Move this entry to list head, but do | 417 | /* Move this entry to list head, but do not re-add it |
| 378 | * not re-add it if it has been removed. */ | 418 | * if it has been moved out of list. */ |
| 379 | if (!list_empty(&p->list)) | 419 | if (p->state == IPOIB_CM_RX_LIVE) |
| 380 | list_move(&p->list, &priv->cm.passive_ids); | 420 | list_move(&p->list, &priv->cm.passive_ids); |
| 381 | spin_unlock_irqrestore(&priv->lock, flags); | 421 | spin_unlock_irqrestore(&priv->lock, flags); |
| 382 | } | 422 | } |
| @@ -583,17 +623,43 @@ static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr) | |||
| 583 | int ipoib_cm_dev_open(struct net_device *dev) | 623 | int ipoib_cm_dev_open(struct net_device *dev) |
| 584 | { | 624 | { |
| 585 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 625 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
| 626 | struct ib_qp_init_attr qp_init_attr = { | ||
| 627 | .send_cq = priv->cq, /* does not matter, we never send anything */ | ||
| 628 | .recv_cq = priv->cq, | ||
| 629 | .cap.max_send_wr = 1, /* FIXME: 0 Seems not to work */ | ||
| 630 | .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */ | ||
| 631 | .cap.max_recv_wr = 1, | ||
| 632 | .cap.max_recv_sge = 1, /* FIXME: 0 Seems not to work */ | ||
| 633 | .sq_sig_type = IB_SIGNAL_ALL_WR, | ||
| 634 | .qp_type = IB_QPT_UC, | ||
| 635 | }; | ||
| 586 | int ret; | 636 | int ret; |
| 587 | 637 | ||
| 588 | if (!IPOIB_CM_SUPPORTED(dev->dev_addr)) | 638 | if (!IPOIB_CM_SUPPORTED(dev->dev_addr)) |
| 589 | return 0; | 639 | return 0; |
| 590 | 640 | ||
| 641 | priv->cm.rx_drain_qp = ib_create_qp(priv->pd, &qp_init_attr); | ||
| 642 | if (IS_ERR(priv->cm.rx_drain_qp)) { | ||
| 643 | printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name); | ||
| 644 | ret = PTR_ERR(priv->cm.rx_drain_qp); | ||
| 645 | return ret; | ||
| 646 | } | ||
| 647 | |||
| 648 | /* | ||
| 649 | * We put the QP in error state directly. This way, a "flush | ||
| 650 | * error" WC will be immediately generated for each WR we post. | ||
| 651 | */ | ||
| 652 | ret = ib_modify_qp(priv->cm.rx_drain_qp, &ipoib_cm_err_attr, IB_QP_STATE); | ||
| 653 | if (ret) { | ||
| 654 | ipoib_warn(priv, "failed to modify drain QP to error: %d\n", ret); | ||
| 655 | goto err_qp; | ||
| 656 | } | ||
| 657 | |||
| 591 | priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev); | 658 | priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev); |
| 592 | if (IS_ERR(priv->cm.id)) { | 659 | if (IS_ERR(priv->cm.id)) { |
| 593 | printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name); | 660 | printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name); |
| 594 | ret = PTR_ERR(priv->cm.id); | 661 | ret = PTR_ERR(priv->cm.id); |
| 595 | priv->cm.id = NULL; | 662 | goto err_cm; |
| 596 | return ret; | ||
| 597 | } | 663 | } |
| 598 | 664 | ||
| 599 | ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num), | 665 | ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num), |
| @@ -601,35 +667,79 @@ int ipoib_cm_dev_open(struct net_device *dev) | |||
| 601 | if (ret) { | 667 | if (ret) { |
| 602 | printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name, | 668 | printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name, |
| 603 | IPOIB_CM_IETF_ID | priv->qp->qp_num); | 669 | IPOIB_CM_IETF_ID | priv->qp->qp_num); |
| 604 | ib_destroy_cm_id(priv->cm.id); | 670 | goto err_listen; |
| 605 | priv->cm.id = NULL; | ||
| 606 | return ret; | ||
| 607 | } | 671 | } |
| 672 | |||
| 608 | return 0; | 673 | return 0; |
| 674 | |||
| 675 | err_listen: | ||
| 676 | ib_destroy_cm_id(priv->cm.id); | ||
| 677 | err_cm: | ||
| 678 | priv->cm.id = NULL; | ||
| 679 | err_qp: | ||
| 680 | ib_destroy_qp(priv->cm.rx_drain_qp); | ||
| 681 | return ret; | ||
| 609 | } | 682 | } |
| 610 | 683 | ||
| 611 | void ipoib_cm_dev_stop(struct net_device *dev) | 684 | void ipoib_cm_dev_stop(struct net_device *dev) |
| 612 | { | 685 | { |
| 613 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 686 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
| 614 | struct ipoib_cm_rx *p; | 687 | struct ipoib_cm_rx *p, *n; |
| 688 | unsigned long begin; | ||
| 689 | LIST_HEAD(list); | ||
| 690 | int ret; | ||
| 615 | 691 | ||
| 616 | if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id) | 692 | if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id) |
| 617 | return; | 693 | return; |
| 618 | 694 | ||
| 619 | ib_destroy_cm_id(priv->cm.id); | 695 | ib_destroy_cm_id(priv->cm.id); |
| 620 | priv->cm.id = NULL; | 696 | priv->cm.id = NULL; |
| 697 | |||
| 621 | spin_lock_irq(&priv->lock); | 698 | spin_lock_irq(&priv->lock); |
| 622 | while (!list_empty(&priv->cm.passive_ids)) { | 699 | while (!list_empty(&priv->cm.passive_ids)) { |
| 623 | p = list_entry(priv->cm.passive_ids.next, typeof(*p), list); | 700 | p = list_entry(priv->cm.passive_ids.next, typeof(*p), list); |
| 624 | list_del_init(&p->list); | 701 | list_move(&p->list, &priv->cm.rx_error_list); |
| 702 | p->state = IPOIB_CM_RX_ERROR; | ||
| 625 | spin_unlock_irq(&priv->lock); | 703 | spin_unlock_irq(&priv->lock); |
| 704 | ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE); | ||
| 705 | if (ret) | ||
| 706 | ipoib_warn(priv, "unable to move qp to error state: %d\n", ret); | ||
| 707 | spin_lock_irq(&priv->lock); | ||
| 708 | } | ||
| 709 | |||
| 710 | /* Wait for all RX to be drained */ | ||
| 711 | begin = jiffies; | ||
| 712 | |||
| 713 | while (!list_empty(&priv->cm.rx_error_list) || | ||
| 714 | !list_empty(&priv->cm.rx_flush_list) || | ||
| 715 | !list_empty(&priv->cm.rx_drain_list)) { | ||
| 716 | if (!time_after(jiffies, begin + 5 * HZ)) { | ||
| 717 | ipoib_warn(priv, "RX drain timing out\n"); | ||
| 718 | |||
| 719 | /* | ||
| 720 | * assume the HW is wedged and just free up everything. | ||
| 721 | */ | ||
| 722 | list_splice_init(&priv->cm.rx_flush_list, &list); | ||
| 723 | list_splice_init(&priv->cm.rx_error_list, &list); | ||
| 724 | list_splice_init(&priv->cm.rx_drain_list, &list); | ||
| 725 | break; | ||
| 726 | } | ||
| 727 | spin_unlock_irq(&priv->lock); | ||
| 728 | msleep(1); | ||
| 729 | spin_lock_irq(&priv->lock); | ||
| 730 | } | ||
| 731 | |||
| 732 | list_splice_init(&priv->cm.rx_reap_list, &list); | ||
| 733 | |||
| 734 | spin_unlock_irq(&priv->lock); | ||
| 735 | |||
| 736 | list_for_each_entry_safe(p, n, &list, list) { | ||
| 626 | ib_destroy_cm_id(p->id); | 737 | ib_destroy_cm_id(p->id); |
| 627 | ib_destroy_qp(p->qp); | 738 | ib_destroy_qp(p->qp); |
| 628 | kfree(p); | 739 | kfree(p); |
| 629 | spin_lock_irq(&priv->lock); | ||
| 630 | } | 740 | } |
| 631 | spin_unlock_irq(&priv->lock); | ||
| 632 | 741 | ||
| 742 | ib_destroy_qp(priv->cm.rx_drain_qp); | ||
| 633 | cancel_delayed_work(&priv->cm.stale_task); | 743 | cancel_delayed_work(&priv->cm.stale_task); |
| 634 | } | 744 | } |
| 635 | 745 | ||
| @@ -1079,24 +1189,44 @@ void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, | |||
| 1079 | queue_work(ipoib_workqueue, &priv->cm.skb_task); | 1189 | queue_work(ipoib_workqueue, &priv->cm.skb_task); |
| 1080 | } | 1190 | } |
| 1081 | 1191 | ||
| 1192 | static void ipoib_cm_rx_reap(struct work_struct *work) | ||
| 1193 | { | ||
| 1194 | struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, | ||
| 1195 | cm.rx_reap_task); | ||
| 1196 | struct ipoib_cm_rx *p, *n; | ||
| 1197 | LIST_HEAD(list); | ||
| 1198 | |||
| 1199 | spin_lock_irq(&priv->lock); | ||
| 1200 | list_splice_init(&priv->cm.rx_reap_list, &list); | ||
| 1201 | spin_unlock_irq(&priv->lock); | ||
| 1202 | |||
| 1203 | list_for_each_entry_safe(p, n, &list, list) { | ||
| 1204 | ib_destroy_cm_id(p->id); | ||
| 1205 | ib_destroy_qp(p->qp); | ||
| 1206 | kfree(p); | ||
| 1207 | } | ||
| 1208 | } | ||
| 1209 | |||
| 1082 | static void ipoib_cm_stale_task(struct work_struct *work) | 1210 | static void ipoib_cm_stale_task(struct work_struct *work) |
| 1083 | { | 1211 | { |
| 1084 | struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, | 1212 | struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, |
| 1085 | cm.stale_task.work); | 1213 | cm.stale_task.work); |
| 1086 | struct ipoib_cm_rx *p; | 1214 | struct ipoib_cm_rx *p; |
| 1215 | int ret; | ||
| 1087 | 1216 | ||
| 1088 | spin_lock_irq(&priv->lock); | 1217 | spin_lock_irq(&priv->lock); |
| 1089 | while (!list_empty(&priv->cm.passive_ids)) { | 1218 | while (!list_empty(&priv->cm.passive_ids)) { |
| 1090 | /* List if sorted by LRU, start from tail, | 1219 | /* List is sorted by LRU, start from tail, |
| 1091 | * stop when we see a recently used entry */ | 1220 | * stop when we see a recently used entry */ |
| 1092 | p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list); | 1221 | p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list); |
| 1093 | if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT)) | 1222 | if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT)) |
| 1094 | break; | 1223 | break; |
| 1095 | list_del_init(&p->list); | 1224 | list_move(&p->list, &priv->cm.rx_error_list); |
| 1225 | p->state = IPOIB_CM_RX_ERROR; | ||
| 1096 | spin_unlock_irq(&priv->lock); | 1226 | spin_unlock_irq(&priv->lock); |
| 1097 | ib_destroy_cm_id(p->id); | 1227 | ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE); |
| 1098 | ib_destroy_qp(p->qp); | 1228 | if (ret) |
| 1099 | kfree(p); | 1229 | ipoib_warn(priv, "unable to move qp to error state: %d\n", ret); |
| 1100 | spin_lock_irq(&priv->lock); | 1230 | spin_lock_irq(&priv->lock); |
| 1101 | } | 1231 | } |
| 1102 | 1232 | ||
| @@ -1164,9 +1294,14 @@ int ipoib_cm_dev_init(struct net_device *dev) | |||
| 1164 | INIT_LIST_HEAD(&priv->cm.passive_ids); | 1294 | INIT_LIST_HEAD(&priv->cm.passive_ids); |
| 1165 | INIT_LIST_HEAD(&priv->cm.reap_list); | 1295 | INIT_LIST_HEAD(&priv->cm.reap_list); |
| 1166 | INIT_LIST_HEAD(&priv->cm.start_list); | 1296 | INIT_LIST_HEAD(&priv->cm.start_list); |
| 1297 | INIT_LIST_HEAD(&priv->cm.rx_error_list); | ||
| 1298 | INIT_LIST_HEAD(&priv->cm.rx_flush_list); | ||
| 1299 | INIT_LIST_HEAD(&priv->cm.rx_drain_list); | ||
| 1300 | INIT_LIST_HEAD(&priv->cm.rx_reap_list); | ||
| 1167 | INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start); | 1301 | INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start); |
| 1168 | INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap); | 1302 | INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap); |
| 1169 | INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap); | 1303 | INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap); |
| 1304 | INIT_WORK(&priv->cm.rx_reap_task, ipoib_cm_rx_reap); | ||
| 1170 | INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task); | 1305 | INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task); |
| 1171 | 1306 | ||
| 1172 | skb_queue_head_init(&priv->cm.skb_queue); | 1307 | skb_queue_head_init(&priv->cm.skb_queue); |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 791252621b26..982eb88e27ec 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c | |||
| @@ -173,7 +173,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) | |||
| 173 | size = ipoib_sendq_size + ipoib_recvq_size + 1; | 173 | size = ipoib_sendq_size + ipoib_recvq_size + 1; |
| 174 | ret = ipoib_cm_dev_init(dev); | 174 | ret = ipoib_cm_dev_init(dev); |
| 175 | if (!ret) | 175 | if (!ret) |
| 176 | size += ipoib_recvq_size; | 176 | size += ipoib_recvq_size + 1 /* 1 extra for rx_drain_qp */; |
| 177 | 177 | ||
| 178 | priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); | 178 | priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); |
| 179 | if (IS_ERR(priv->cq)) { | 179 | if (IS_ERR(priv->cq)) { |
