aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorAlexander Schmidt <alexs@linux.vnet.ibm.com>2008-09-20 23:05:21 -0400
committerRoland Dreier <rolandd@cisco.com>2008-09-20 23:05:21 -0400
commitb9012e0a4255c93e1d81f1ccee591de6414b5955 (patch)
treef96df62a968eab98722256251914070fa7bb0c2c /drivers
parent9824b8f11373b0df806c135a342da9319ef1d893 (diff)
IB/ehca: Generate flush status CQ entries
When a QP goes into error state, it is required that CQ entries with a flush error status are delivered to the application for any outstanding work requests. eHCA does not do this in hardware, so this patch adds software flush CQE generation to the ehca driver. Whenever a QP gets into error state, it is added to the QP error list of its respective CQ. If the error QP list of a CQ is not empty, poll_cq() generates flush CQEs before polling the actual CQ. Signed-off-by: Alexander Schmidt <alexs@linux.vnet.ibm.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h14
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c3
-rw-r--r--drivers/infiniband/hw/ehca/ehca_iverbs.h2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c225
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c211
5 files changed, 412 insertions, 43 deletions
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 1ab919f836a8..5d7b7855afb9 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -164,6 +164,13 @@ struct ehca_qmap_entry {
164 u16 reported; 164 u16 reported;
165}; 165};
166 166
167struct ehca_queue_map {
168 struct ehca_qmap_entry *map;
169 unsigned int entries;
170 unsigned int tail;
171 unsigned int left_to_poll;
172};
173
167struct ehca_qp { 174struct ehca_qp {
168 union { 175 union {
169 struct ib_qp ib_qp; 176 struct ib_qp ib_qp;
@@ -173,8 +180,9 @@ struct ehca_qp {
173 enum ehca_ext_qp_type ext_type; 180 enum ehca_ext_qp_type ext_type;
174 enum ib_qp_state state; 181 enum ib_qp_state state;
175 struct ipz_queue ipz_squeue; 182 struct ipz_queue ipz_squeue;
176 struct ehca_qmap_entry *sq_map; 183 struct ehca_queue_map sq_map;
177 struct ipz_queue ipz_rqueue; 184 struct ipz_queue ipz_rqueue;
185 struct ehca_queue_map rq_map;
178 struct h_galpas galpas; 186 struct h_galpas galpas;
179 u32 qkey; 187 u32 qkey;
180 u32 real_qp_num; 188 u32 real_qp_num;
@@ -204,6 +212,8 @@ struct ehca_qp {
204 atomic_t nr_events; /* events seen */ 212 atomic_t nr_events; /* events seen */
205 wait_queue_head_t wait_completion; 213 wait_queue_head_t wait_completion;
206 int mig_armed; 214 int mig_armed;
215 struct list_head sq_err_node;
216 struct list_head rq_err_node;
207}; 217};
208 218
209#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) 219#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
@@ -233,6 +243,8 @@ struct ehca_cq {
233 /* mmap counter for resources mapped into user space */ 243 /* mmap counter for resources mapped into user space */
234 u32 mm_count_queue; 244 u32 mm_count_queue;
235 u32 mm_count_galpa; 245 u32 mm_count_galpa;
246 struct list_head sqp_err_list;
247 struct list_head rqp_err_list;
236}; 248};
237 249
238enum ehca_mr_flag { 250enum ehca_mr_flag {
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 5540b276a33c..33647a95eb9a 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -276,6 +276,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
276 for (i = 0; i < QP_HASHTAB_LEN; i++) 276 for (i = 0; i < QP_HASHTAB_LEN; i++)
277 INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]); 277 INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]);
278 278
279 INIT_LIST_HEAD(&my_cq->sqp_err_list);
280 INIT_LIST_HEAD(&my_cq->rqp_err_list);
281
279 if (context) { 282 if (context) {
280 struct ipz_queue *ipz_queue = &my_cq->ipz_queue; 283 struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
281 struct ehca_create_cq_resp resp; 284 struct ehca_create_cq_resp resp;
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
index a8a2ea585d2f..8f7f282ead65 100644
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -197,6 +197,8 @@ void ehca_poll_eqs(unsigned long data);
197int ehca_calc_ipd(struct ehca_shca *shca, int port, 197int ehca_calc_ipd(struct ehca_shca *shca, int port,
198 enum ib_rate path_rate, u32 *ipd); 198 enum ib_rate path_rate, u32 *ipd);
199 199
200void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq);
201
200#ifdef CONFIG_PPC_64K_PAGES 202#ifdef CONFIG_PPC_64K_PAGES
201void *ehca_alloc_fw_ctrlblock(gfp_t flags); 203void *ehca_alloc_fw_ctrlblock(gfp_t flags);
202void ehca_free_fw_ctrlblock(void *ptr); 204void ehca_free_fw_ctrlblock(void *ptr);
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index b6bcee036734..4dbe2870e014 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -396,6 +396,50 @@ static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue,
396 queue->is_small = (queue->page_size != 0); 396 queue->is_small = (queue->page_size != 0);
397} 397}
398 398
399/* needs to be called with cq->spinlock held */
400void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq)
401{
402 struct list_head *list, *node;
403
404 /* TODO: support low latency QPs */
405 if (qp->ext_type == EQPT_LLQP)
406 return;
407
408 if (on_sq) {
409 list = &qp->send_cq->sqp_err_list;
410 node = &qp->sq_err_node;
411 } else {
412 list = &qp->recv_cq->rqp_err_list;
413 node = &qp->rq_err_node;
414 }
415
416 if (list_empty(node))
417 list_add_tail(node, list);
418
419 return;
420}
421
422static void del_from_err_list(struct ehca_cq *cq, struct list_head *node)
423{
424 unsigned long flags;
425
426 spin_lock_irqsave(&cq->spinlock, flags);
427
428 if (!list_empty(node))
429 list_del_init(node);
430
431 spin_unlock_irqrestore(&cq->spinlock, flags);
432}
433
434static void reset_queue_map(struct ehca_queue_map *qmap)
435{
436 int i;
437
438 qmap->tail = 0;
439 for (i = 0; i < qmap->entries; i++)
440 qmap->map[i].reported = 1;
441}
442
399/* 443/*
400 * Create an ib_qp struct that is either a QP or an SRQ, depending on 444 * Create an ib_qp struct that is either a QP or an SRQ, depending on
401 * the value of the is_srq parameter. If init_attr and srq_init_attr share 445 * the value of the is_srq parameter. If init_attr and srq_init_attr share
@@ -407,12 +451,11 @@ static struct ehca_qp *internal_create_qp(
407 struct ib_srq_init_attr *srq_init_attr, 451 struct ib_srq_init_attr *srq_init_attr,
408 struct ib_udata *udata, int is_srq) 452 struct ib_udata *udata, int is_srq)
409{ 453{
410 struct ehca_qp *my_qp; 454 struct ehca_qp *my_qp, *my_srq = NULL;
411 struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); 455 struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
412 struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, 456 struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
413 ib_device); 457 ib_device);
414 struct ib_ucontext *context = NULL; 458 struct ib_ucontext *context = NULL;
415 u32 nr_qes;
416 u64 h_ret; 459 u64 h_ret;
417 int is_llqp = 0, has_srq = 0; 460 int is_llqp = 0, has_srq = 0;
418 int qp_type, max_send_sge, max_recv_sge, ret; 461 int qp_type, max_send_sge, max_recv_sge, ret;
@@ -457,8 +500,7 @@ static struct ehca_qp *internal_create_qp(
457 500
458 /* handle SRQ base QPs */ 501 /* handle SRQ base QPs */
459 if (init_attr->srq) { 502 if (init_attr->srq) {
460 struct ehca_qp *my_srq = 503 my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq);
461 container_of(init_attr->srq, struct ehca_qp, ib_srq);
462 504
463 has_srq = 1; 505 has_srq = 1;
464 parms.ext_type = EQPT_SRQBASE; 506 parms.ext_type = EQPT_SRQBASE;
@@ -716,15 +758,19 @@ static struct ehca_qp *internal_create_qp(
716 "and pages ret=%i", ret); 758 "and pages ret=%i", ret);
717 goto create_qp_exit2; 759 goto create_qp_exit2;
718 } 760 }
719 nr_qes = my_qp->ipz_squeue.queue_length / 761
762 my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
720 my_qp->ipz_squeue.qe_size; 763 my_qp->ipz_squeue.qe_size;
721 my_qp->sq_map = vmalloc(nr_qes * 764 my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
722 sizeof(struct ehca_qmap_entry)); 765 sizeof(struct ehca_qmap_entry));
723 if (!my_qp->sq_map) { 766 if (!my_qp->sq_map.map) {
724 ehca_err(pd->device, "Couldn't allocate squeue " 767 ehca_err(pd->device, "Couldn't allocate squeue "
725 "map ret=%i", ret); 768 "map ret=%i", ret);
726 goto create_qp_exit3; 769 goto create_qp_exit3;
727 } 770 }
771 INIT_LIST_HEAD(&my_qp->sq_err_node);
772 /* to avoid the generation of bogus flush CQEs */
773 reset_queue_map(&my_qp->sq_map);
728 } 774 }
729 775
730 if (HAS_RQ(my_qp)) { 776 if (HAS_RQ(my_qp)) {
@@ -736,6 +782,25 @@ static struct ehca_qp *internal_create_qp(
736 "and pages ret=%i", ret); 782 "and pages ret=%i", ret);
737 goto create_qp_exit4; 783 goto create_qp_exit4;
738 } 784 }
785
786 my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
787 my_qp->ipz_rqueue.qe_size;
788 my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
789 sizeof(struct ehca_qmap_entry));
790 if (!my_qp->rq_map.map) {
791 ehca_err(pd->device, "Couldn't allocate squeue "
792 "map ret=%i", ret);
793 goto create_qp_exit5;
794 }
795 INIT_LIST_HEAD(&my_qp->rq_err_node);
796 /* to avoid the generation of bogus flush CQEs */
797 reset_queue_map(&my_qp->rq_map);
798 } else if (init_attr->srq) {
799 /* this is a base QP, use the queue map of the SRQ */
800 my_qp->rq_map = my_srq->rq_map;
801 INIT_LIST_HEAD(&my_qp->rq_err_node);
802
803 my_qp->ipz_rqueue = my_srq->ipz_rqueue;
739 } 804 }
740 805
741 if (is_srq) { 806 if (is_srq) {
@@ -799,7 +864,7 @@ static struct ehca_qp *internal_create_qp(
799 if (ret) { 864 if (ret) {
800 ehca_err(pd->device, 865 ehca_err(pd->device,
801 "Couldn't assign qp to send_cq ret=%i", ret); 866 "Couldn't assign qp to send_cq ret=%i", ret);
802 goto create_qp_exit6; 867 goto create_qp_exit7;
803 } 868 }
804 } 869 }
805 870
@@ -825,25 +890,29 @@ static struct ehca_qp *internal_create_qp(
825 if (ib_copy_to_udata(udata, &resp, sizeof resp)) { 890 if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
826 ehca_err(pd->device, "Copy to udata failed"); 891 ehca_err(pd->device, "Copy to udata failed");
827 ret = -EINVAL; 892 ret = -EINVAL;
828 goto create_qp_exit7; 893 goto create_qp_exit8;
829 } 894 }
830 } 895 }
831 896
832 return my_qp; 897 return my_qp;
833 898
834create_qp_exit7: 899create_qp_exit8:
835 ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num); 900 ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num);
836 901
837create_qp_exit6: 902create_qp_exit7:
838 kfree(my_qp->mod_qp_parm); 903 kfree(my_qp->mod_qp_parm);
839 904
905create_qp_exit6:
906 if (HAS_RQ(my_qp))
907 vfree(my_qp->rq_map.map);
908
840create_qp_exit5: 909create_qp_exit5:
841 if (HAS_RQ(my_qp)) 910 if (HAS_RQ(my_qp))
842 ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); 911 ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
843 912
844create_qp_exit4: 913create_qp_exit4:
845 if (HAS_SQ(my_qp)) 914 if (HAS_SQ(my_qp))
846 vfree(my_qp->sq_map); 915 vfree(my_qp->sq_map.map);
847 916
848create_qp_exit3: 917create_qp_exit3:
849 if (HAS_SQ(my_qp)) 918 if (HAS_SQ(my_qp))
@@ -1035,6 +1104,101 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
1035 return 0; 1104 return 0;
1036} 1105}
1037 1106
1107static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue,
1108 struct ehca_queue_map *qmap)
1109{
1110 void *wqe_v;
1111 u64 q_ofs;
1112 u32 wqe_idx;
1113
1114 /* convert real to abs address */
1115 wqe_p = wqe_p & (~(1UL << 63));
1116
1117 wqe_v = abs_to_virt(wqe_p);
1118
1119 if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) {
1120 ehca_gen_err("Invalid offset for calculating left cqes "
1121 "wqe_p=%#lx wqe_v=%p\n", wqe_p, wqe_v);
1122 return -EFAULT;
1123 }
1124
1125 wqe_idx = q_ofs / ipz_queue->qe_size;
1126 if (wqe_idx < qmap->tail)
1127 qmap->left_to_poll = (qmap->entries - qmap->tail) + wqe_idx;
1128 else
1129 qmap->left_to_poll = wqe_idx - qmap->tail;
1130
1131 return 0;
1132}
1133
1134static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca)
1135{
1136 u64 h_ret;
1137 void *send_wqe_p, *recv_wqe_p;
1138 int ret;
1139 unsigned long flags;
1140 int qp_num = my_qp->ib_qp.qp_num;
1141
1142 /* this hcall is not supported on base QPs */
1143 if (my_qp->ext_type != EQPT_SRQBASE) {
1144 /* get send and receive wqe pointer */
1145 h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
1146 my_qp->ipz_qp_handle, &my_qp->pf,
1147 &send_wqe_p, &recv_wqe_p, 4);
1148 if (h_ret != H_SUCCESS) {
1149 ehca_err(&shca->ib_device, "disable_and_get_wqe() "
1150 "failed ehca_qp=%p qp_num=%x h_ret=%li",
1151 my_qp, qp_num, h_ret);
1152 return ehca2ib_return_code(h_ret);
1153 }
1154
1155 /*
1156 * acquire lock to ensure that nobody is polling the cq which
1157 * could mean that the qmap->tail pointer is in an
1158 * inconsistent state.
1159 */
1160 spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
1161 ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue,
1162 &my_qp->sq_map);
1163 spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
1164 if (ret)
1165 return ret;
1166
1167
1168 spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
1169 ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue,
1170 &my_qp->rq_map);
1171 spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
1172 if (ret)
1173 return ret;
1174 } else {
1175 spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
1176 my_qp->sq_map.left_to_poll = 0;
1177 spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
1178
1179 spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
1180 my_qp->rq_map.left_to_poll = 0;
1181 spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
1182 }
1183
1184 /* this assures flush cqes being generated only for pending wqes */
1185 if ((my_qp->sq_map.left_to_poll == 0) &&
1186 (my_qp->rq_map.left_to_poll == 0)) {
1187 spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
1188 ehca_add_to_err_list(my_qp, 1);
1189 spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
1190
1191 if (HAS_RQ(my_qp)) {
1192 spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
1193 ehca_add_to_err_list(my_qp, 0);
1194 spin_unlock_irqrestore(&my_qp->recv_cq->spinlock,
1195 flags);
1196 }
1197 }
1198
1199 return 0;
1200}
1201
1038/* 1202/*
1039 * internal_modify_qp with circumvention to handle aqp0 properly 1203 * internal_modify_qp with circumvention to handle aqp0 properly
1040 * smi_reset2init indicates if this is an internal reset-to-init-call for 1204 * smi_reset2init indicates if this is an internal reset-to-init-call for
@@ -1539,10 +1703,27 @@ static int internal_modify_qp(struct ib_qp *ibqp,
1539 goto modify_qp_exit2; 1703 goto modify_qp_exit2;
1540 } 1704 }
1541 } 1705 }
1706 if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) {
1707 ret = check_for_left_cqes(my_qp, shca);
1708 if (ret)
1709 goto modify_qp_exit2;
1710 }
1542 1711
1543 if (statetrans == IB_QPST_ANY2RESET) { 1712 if (statetrans == IB_QPST_ANY2RESET) {
1544 ipz_qeit_reset(&my_qp->ipz_rqueue); 1713 ipz_qeit_reset(&my_qp->ipz_rqueue);
1545 ipz_qeit_reset(&my_qp->ipz_squeue); 1714 ipz_qeit_reset(&my_qp->ipz_squeue);
1715
1716 if (qp_cur_state == IB_QPS_ERR) {
1717 del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
1718
1719 if (HAS_RQ(my_qp))
1720 del_from_err_list(my_qp->recv_cq,
1721 &my_qp->rq_err_node);
1722 }
1723 reset_queue_map(&my_qp->sq_map);
1724
1725 if (HAS_RQ(my_qp))
1726 reset_queue_map(&my_qp->rq_map);
1546 } 1727 }
1547 1728
1548 if (attr_mask & IB_QP_QKEY) 1729 if (attr_mask & IB_QP_QKEY)
@@ -1958,6 +2139,16 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
1958 idr_remove(&ehca_qp_idr, my_qp->token); 2139 idr_remove(&ehca_qp_idr, my_qp->token);
1959 write_unlock_irqrestore(&ehca_qp_idr_lock, flags); 2140 write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
1960 2141
2142 /*
2143 * SRQs will never get into an error list and do not have a recv_cq,
2144 * so we need to skip them here.
2145 */
2146 if (HAS_RQ(my_qp) && !IS_SRQ(my_qp))
2147 del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node);
2148
2149 if (HAS_SQ(my_qp))
2150 del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
2151
1961 /* now wait until all pending events have completed */ 2152 /* now wait until all pending events have completed */
1962 wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events)); 2153 wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
1963 2154
@@ -1983,7 +2174,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
1983 if (qp_type == IB_QPT_GSI) { 2174 if (qp_type == IB_QPT_GSI) {
1984 struct ib_event event; 2175 struct ib_event event;
1985 ehca_info(dev, "device %s: port %x is inactive.", 2176 ehca_info(dev, "device %s: port %x is inactive.",
1986 shca->ib_device.name, port_num); 2177 shca->ib_device.name, port_num);
1987 event.device = &shca->ib_device; 2178 event.device = &shca->ib_device;
1988 event.event = IB_EVENT_PORT_ERR; 2179 event.event = IB_EVENT_PORT_ERR;
1989 event.element.port_num = port_num; 2180 event.element.port_num = port_num;
@@ -1991,11 +2182,15 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
1991 ib_dispatch_event(&event); 2182 ib_dispatch_event(&event);
1992 } 2183 }
1993 2184
1994 if (HAS_RQ(my_qp)) 2185 if (HAS_RQ(my_qp)) {
1995 ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); 2186 ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
2187
2188 vfree(my_qp->rq_map.map);
2189 }
1996 if (HAS_SQ(my_qp)) { 2190 if (HAS_SQ(my_qp)) {
1997 ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); 2191 ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
1998 vfree(my_qp->sq_map); 2192
2193 vfree(my_qp->sq_map.map);
1999 } 2194 }
2000 kmem_cache_free(qp_cache, my_qp); 2195 kmem_cache_free(qp_cache, my_qp);
2001 atomic_dec(&shca->num_qps); 2196 atomic_dec(&shca->num_qps);
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index 4426d82fe798..64928079eafa 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -53,9 +53,25 @@
53/* in RC traffic, insert an empty RDMA READ every this many packets */ 53/* in RC traffic, insert an empty RDMA READ every this many packets */
54#define ACK_CIRC_THRESHOLD 2000000 54#define ACK_CIRC_THRESHOLD 2000000
55 55
56static u64 replace_wr_id(u64 wr_id, u16 idx)
57{
58 u64 ret;
59
60 ret = wr_id & ~QMAP_IDX_MASK;
61 ret |= idx & QMAP_IDX_MASK;
62
63 return ret;
64}
65
66static u16 get_app_wr_id(u64 wr_id)
67{
68 return wr_id & QMAP_IDX_MASK;
69}
70
56static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, 71static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
57 struct ehca_wqe *wqe_p, 72 struct ehca_wqe *wqe_p,
58 struct ib_recv_wr *recv_wr) 73 struct ib_recv_wr *recv_wr,
74 u32 rq_map_idx)
59{ 75{
60 u8 cnt_ds; 76 u8 cnt_ds;
61 if (unlikely((recv_wr->num_sge < 0) || 77 if (unlikely((recv_wr->num_sge < 0) ||
@@ -69,7 +85,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
69 /* clear wqe header until sglist */ 85 /* clear wqe header until sglist */
70 memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); 86 memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
71 87
72 wqe_p->work_request_id = recv_wr->wr_id; 88 wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx);
73 wqe_p->nr_of_data_seg = recv_wr->num_sge; 89 wqe_p->nr_of_data_seg = recv_wr->num_sge;
74 90
75 for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { 91 for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
@@ -146,6 +162,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
146 u64 dma_length; 162 u64 dma_length;
147 struct ehca_av *my_av; 163 struct ehca_av *my_av;
148 u32 remote_qkey = send_wr->wr.ud.remote_qkey; 164 u32 remote_qkey = send_wr->wr.ud.remote_qkey;
165 struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx];
149 166
150 if (unlikely((send_wr->num_sge < 0) || 167 if (unlikely((send_wr->num_sge < 0) ||
151 (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { 168 (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
@@ -158,11 +175,10 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
158 /* clear wqe header until sglist */ 175 /* clear wqe header until sglist */
159 memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); 176 memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
160 177
161 wqe_p->work_request_id = send_wr->wr_id & ~QMAP_IDX_MASK; 178 wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx);
162 wqe_p->work_request_id |= sq_map_idx & QMAP_IDX_MASK;
163 179
164 qp->sq_map[sq_map_idx].app_wr_id = send_wr->wr_id & QMAP_IDX_MASK; 180 qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id);
165 qp->sq_map[sq_map_idx].reported = 0; 181 qmap_entry->reported = 0;
166 182
167 switch (send_wr->opcode) { 183 switch (send_wr->opcode) {
168 case IB_WR_SEND: 184 case IB_WR_SEND:
@@ -496,7 +512,9 @@ static int internal_post_recv(struct ehca_qp *my_qp,
496 struct ehca_wqe *wqe_p; 512 struct ehca_wqe *wqe_p;
497 int wqe_cnt = 0; 513 int wqe_cnt = 0;
498 int ret = 0; 514 int ret = 0;
515 u32 rq_map_idx;
499 unsigned long flags; 516 unsigned long flags;
517 struct ehca_qmap_entry *qmap_entry;
500 518
501 if (unlikely(!HAS_RQ(my_qp))) { 519 if (unlikely(!HAS_RQ(my_qp))) {
502 ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", 520 ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d",
@@ -524,8 +542,15 @@ static int internal_post_recv(struct ehca_qp *my_qp,
524 } 542 }
525 goto post_recv_exit0; 543 goto post_recv_exit0;
526 } 544 }
545 /*
546 * Get the index of the WQE in the recv queue. The same index
547 * is used for writing into the rq_map.
548 */
549 rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
550
527 /* write a RECV WQE into the QUEUE */ 551 /* write a RECV WQE into the QUEUE */
528 ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr); 552 ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr,
553 rq_map_idx);
529 /* 554 /*
530 * if something failed, 555 * if something failed,
531 * reset the free entry pointer to the start value 556 * reset the free entry pointer to the start value
@@ -540,6 +565,11 @@ static int internal_post_recv(struct ehca_qp *my_qp,
540 } 565 }
541 goto post_recv_exit0; 566 goto post_recv_exit0;
542 } 567 }
568
569 qmap_entry = &my_qp->rq_map.map[rq_map_idx];
570 qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id);
571 qmap_entry->reported = 0;
572
543 wqe_cnt++; 573 wqe_cnt++;
544 } /* eof for cur_recv_wr */ 574 } /* eof for cur_recv_wr */
545 575
@@ -596,10 +626,12 @@ static const u8 ib_wc_opcode[255] = {
596/* internal function to poll one entry of cq */ 626/* internal function to poll one entry of cq */
597static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) 627static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
598{ 628{
599 int ret = 0; 629 int ret = 0, qmap_tail_idx;
600 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); 630 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
601 struct ehca_cqe *cqe; 631 struct ehca_cqe *cqe;
602 struct ehca_qp *my_qp; 632 struct ehca_qp *my_qp;
633 struct ehca_qmap_entry *qmap_entry;
634 struct ehca_queue_map *qmap;
603 int cqe_count = 0, is_error; 635 int cqe_count = 0, is_error;
604 636
605repoll: 637repoll:
@@ -674,27 +706,52 @@ repoll:
674 goto repoll; 706 goto repoll;
675 wc->qp = &my_qp->ib_qp; 707 wc->qp = &my_qp->ib_qp;
676 708
677 if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) { 709 if (is_error) {
678 struct ehca_qmap_entry *qmap_entry;
679 /* 710 /*
680 * We got a send completion and need to restore the original 711 * set left_to_poll to 0 because in error state, we will not
681 * wr_id. 712 * get any additional CQEs
682 */ 713 */
683 qmap_entry = &my_qp->sq_map[cqe->work_request_id & 714 ehca_add_to_err_list(my_qp, 1);
684 QMAP_IDX_MASK]; 715 my_qp->sq_map.left_to_poll = 0;
685 716
686 if (qmap_entry->reported) { 717 if (HAS_RQ(my_qp))
687 ehca_warn(cq->device, "Double cqe on qp_num=%#x", 718 ehca_add_to_err_list(my_qp, 0);
688 my_qp->real_qp_num); 719 my_qp->rq_map.left_to_poll = 0;
689 /* found a double cqe, discard it and read next one */ 720 }
690 goto repoll; 721
691 } 722 qmap_tail_idx = get_app_wr_id(cqe->work_request_id);
692 wc->wr_id = cqe->work_request_id & ~QMAP_IDX_MASK; 723 if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT))
693 wc->wr_id |= qmap_entry->app_wr_id; 724 /* We got a send completion. */
694 qmap_entry->reported = 1; 725 qmap = &my_qp->sq_map;
695 } else 726 else
696 /* We got a receive completion. */ 727 /* We got a receive completion. */
697 wc->wr_id = cqe->work_request_id; 728 qmap = &my_qp->rq_map;
729
730 qmap_entry = &qmap->map[qmap_tail_idx];
731 if (qmap_entry->reported) {
732 ehca_warn(cq->device, "Double cqe on qp_num=%#x",
733 my_qp->real_qp_num);
734 /* found a double cqe, discard it and read next one */
735 goto repoll;
736 }
737
738 wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id);
739 qmap_entry->reported = 1;
740
741 /* this is a proper completion, we need to advance the tail pointer */
742 if (++qmap->tail == qmap->entries)
743 qmap->tail = 0;
744
745 /* if left_to_poll is decremented to 0, add the QP to the error list */
746 if (qmap->left_to_poll > 0) {
747 qmap->left_to_poll--;
748 if ((my_qp->sq_map.left_to_poll == 0) &&
749 (my_qp->rq_map.left_to_poll == 0)) {
750 ehca_add_to_err_list(my_qp, 1);
751 if (HAS_RQ(my_qp))
752 ehca_add_to_err_list(my_qp, 0);
753 }
754 }
698 755
699 /* eval ib_wc_opcode */ 756 /* eval ib_wc_opcode */
700 wc->opcode = ib_wc_opcode[cqe->optype]-1; 757 wc->opcode = ib_wc_opcode[cqe->optype]-1;
@@ -733,13 +790,88 @@ poll_cq_one_exit0:
733 return ret; 790 return ret;
734} 791}
735 792
793static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq,
794 struct ib_wc *wc, int num_entries,
795 struct ipz_queue *ipz_queue, int on_sq)
796{
797 int nr = 0;
798 struct ehca_wqe *wqe;
799 u64 offset;
800 struct ehca_queue_map *qmap;
801 struct ehca_qmap_entry *qmap_entry;
802
803 if (on_sq)
804 qmap = &my_qp->sq_map;
805 else
806 qmap = &my_qp->rq_map;
807
808 qmap_entry = &qmap->map[qmap->tail];
809
810 while ((nr < num_entries) && (qmap_entry->reported == 0)) {
811 /* generate flush CQE */
812 memset(wc, 0, sizeof(*wc));
813
814 offset = qmap->tail * ipz_queue->qe_size;
815 wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset);
816 if (!wqe) {
817 ehca_err(cq->device, "Invalid wqe offset=%#lx on "
818 "qp_num=%#x", offset, my_qp->real_qp_num);
819 return nr;
820 }
821
822 wc->wr_id = replace_wr_id(wqe->work_request_id,
823 qmap_entry->app_wr_id);
824
825 if (on_sq) {
826 switch (wqe->optype) {
827 case WQE_OPTYPE_SEND:
828 wc->opcode = IB_WC_SEND;
829 break;
830 case WQE_OPTYPE_RDMAWRITE:
831 wc->opcode = IB_WC_RDMA_WRITE;
832 break;
833 case WQE_OPTYPE_RDMAREAD:
834 wc->opcode = IB_WC_RDMA_READ;
835 break;
836 default:
837 ehca_err(cq->device, "Invalid optype=%x",
838 wqe->optype);
839 return nr;
840 }
841 } else
842 wc->opcode = IB_WC_RECV;
843
844 if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) {
845 wc->ex.imm_data = wqe->immediate_data;
846 wc->wc_flags |= IB_WC_WITH_IMM;
847 }
848
849 wc->status = IB_WC_WR_FLUSH_ERR;
850
851 wc->qp = &my_qp->ib_qp;
852
853 /* mark as reported and advance tail pointer */
854 qmap_entry->reported = 1;
855 if (++qmap->tail == qmap->entries)
856 qmap->tail = 0;
857 qmap_entry = &qmap->map[qmap->tail];
858
859 wc++; nr++;
860 }
861
862 return nr;
863
864}
865
736int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) 866int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
737{ 867{
738 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); 868 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
739 int nr; 869 int nr;
870 struct ehca_qp *err_qp;
740 struct ib_wc *current_wc = wc; 871 struct ib_wc *current_wc = wc;
741 int ret = 0; 872 int ret = 0;
742 unsigned long flags; 873 unsigned long flags;
874 int entries_left = num_entries;
743 875
744 if (num_entries < 1) { 876 if (num_entries < 1) {
745 ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " 877 ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
@@ -749,15 +881,40 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
749 } 881 }
750 882
751 spin_lock_irqsave(&my_cq->spinlock, flags); 883 spin_lock_irqsave(&my_cq->spinlock, flags);
752 for (nr = 0; nr < num_entries; nr++) { 884
885 /* generate flush cqes for send queues */
886 list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) {
887 nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
888 &err_qp->ipz_squeue, 1);
889 entries_left -= nr;
890 current_wc += nr;
891
892 if (entries_left == 0)
893 break;
894 }
895
896 /* generate flush cqes for receive queues */
897 list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) {
898 nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
899 &err_qp->ipz_rqueue, 0);
900 entries_left -= nr;
901 current_wc += nr;
902
903 if (entries_left == 0)
904 break;
905 }
906
907 for (nr = 0; nr < entries_left; nr++) {
753 ret = ehca_poll_cq_one(cq, current_wc); 908 ret = ehca_poll_cq_one(cq, current_wc);
754 if (ret) 909 if (ret)
755 break; 910 break;
756 current_wc++; 911 current_wc++;
757 } /* eof for nr */ 912 } /* eof for nr */
913 entries_left -= nr;
914
758 spin_unlock_irqrestore(&my_cq->spinlock, flags); 915 spin_unlock_irqrestore(&my_cq->spinlock, flags);
759 if (ret == -EAGAIN || !ret) 916 if (ret == -EAGAIN || !ret)
760 ret = nr; 917 ret = num_entries - entries_left;
761 918
762poll_cq_exit0: 919poll_cq_exit0:
763 return ret; 920 return ret;