diff options
author | Steve Wise <swise@opengridcomputing.com> | 2008-07-15 02:48:53 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2008-07-15 02:48:53 -0400 |
commit | 4ab928f69208d240d3681336f34589e4b151824f (patch) | |
tree | aca612ffbf703870cac63efb5ec5d8955ac2bc3c /drivers/infiniband | |
parent | 96f15c03532282366364ecfd20f04e49b5d96f3a (diff) |
RDMA/cxgb3: Fixes for zero STag
Handling the zero STag in receive work request requires some extra
logic in the driver:
- Only set the QP_PRIV bit for kernel mode QPs.
- Add a zero STag build function for recv wrs. The uP needs a PBL
allocated and passed down in the recv WR so it can construct a HW
PBL for the zero STag S/G entries. Note: we need to place a few
restrictions on zero STag usage because of this:
1) all SGEs in a recv WR must either be zero STag or not. No mixing.
2) an individual SGE length cannot exceed 128MB for a zero-stag SGE.
This should be OK since it's not really practical to allocate
such a large chunk of pinned contiguous DMA mapped memory.
- Add an optimized non-zero-STag recv wr format for kernel users.
This is needed to optimize both zero and non-zero STag cracking in
the recv path for kernel users.
- Remove the iwch_ prefix from the static build functions.
- Bump required FW version.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/hw/cxgb3/cxio_hal.c | 12 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb3/cxio_wr.h | 13 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch_provider.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch_qp.c | 127 |
4 files changed, 130 insertions, 26 deletions
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index 340e4181c761..f6d5747153a5 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c | |||
@@ -278,7 +278,7 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, | |||
278 | if (!wq->qpid) | 278 | if (!wq->qpid) |
279 | return -ENOMEM; | 279 | return -ENOMEM; |
280 | 280 | ||
281 | wq->rq = kzalloc(depth * sizeof(u64), GFP_KERNEL); | 281 | wq->rq = kzalloc(depth * sizeof(struct t3_swrq), GFP_KERNEL); |
282 | if (!wq->rq) | 282 | if (!wq->rq) |
283 | goto err1; | 283 | goto err1; |
284 | 284 | ||
@@ -302,6 +302,7 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, | |||
302 | if (!kernel_domain) | 302 | if (!kernel_domain) |
303 | wq->udb = (u64)rdev_p->rnic_info.udbell_physbase + | 303 | wq->udb = (u64)rdev_p->rnic_info.udbell_physbase + |
304 | (wq->qpid << rdev_p->qpshift); | 304 | (wq->qpid << rdev_p->qpshift); |
305 | wq->rdev = rdev_p; | ||
305 | PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __func__, | 306 | PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __func__, |
306 | wq->qpid, wq->doorbell, (unsigned long long) wq->udb); | 307 | wq->qpid, wq->doorbell, (unsigned long long) wq->udb); |
307 | return 0; | 308 | return 0; |
@@ -1266,13 +1267,16 @@ proc_cqe: | |||
1266 | wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe); | 1267 | wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe); |
1267 | PDBG("%s completing sq idx %ld\n", __func__, | 1268 | PDBG("%s completing sq idx %ld\n", __func__, |
1268 | Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)); | 1269 | Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)); |
1269 | *cookie = (wq->sq + | 1270 | *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id; |
1270 | Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2))->wr_id; | ||
1271 | wq->sq_rptr++; | 1271 | wq->sq_rptr++; |
1272 | } else { | 1272 | } else { |
1273 | PDBG("%s completing rq idx %ld\n", __func__, | 1273 | PDBG("%s completing rq idx %ld\n", __func__, |
1274 | Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); | 1274 | Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); |
1275 | *cookie = *(wq->rq + Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); | 1275 | *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id; |
1276 | if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr) | ||
1277 | cxio_hal_pblpool_free(wq->rdev, | ||
1278 | wq->rq[Q_PTR2IDX(wq->rq_rptr, | ||
1279 | wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE); | ||
1276 | wq->rq_rptr++; | 1280 | wq->rq_rptr++; |
1277 | } | 1281 | } |
1278 | 1282 | ||
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index de760e9f1cc6..04618f7bfbb3 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h | |||
@@ -39,6 +39,9 @@ | |||
39 | 39 | ||
40 | #define T3_MAX_SGE 4 | 40 | #define T3_MAX_SGE 4 |
41 | #define T3_MAX_INLINE 64 | 41 | #define T3_MAX_INLINE 64 |
42 | #define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3) | ||
43 | #define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024) | ||
44 | #define T3_STAG0_PAGE_SHIFT 15 | ||
42 | 45 | ||
43 | #define Q_EMPTY(rptr,wptr) ((rptr)==(wptr)) | 46 | #define Q_EMPTY(rptr,wptr) ((rptr)==(wptr)) |
44 | #define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \ | 47 | #define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \ |
@@ -665,6 +668,11 @@ struct t3_swsq { | |||
665 | int signaled; | 668 | int signaled; |
666 | }; | 669 | }; |
667 | 670 | ||
671 | struct t3_swrq { | ||
672 | __u64 wr_id; | ||
673 | __u32 pbl_addr; | ||
674 | }; | ||
675 | |||
668 | /* | 676 | /* |
669 | * A T3 WQ implements both the SQ and RQ. | 677 | * A T3 WQ implements both the SQ and RQ. |
670 | */ | 678 | */ |
@@ -681,14 +689,15 @@ struct t3_wq { | |||
681 | u32 sq_wptr; /* sq_wptr - sq_rptr == count of */ | 689 | u32 sq_wptr; /* sq_wptr - sq_rptr == count of */ |
682 | u32 sq_rptr; /* pending wrs */ | 690 | u32 sq_rptr; /* pending wrs */ |
683 | u32 sq_size_log2; /* sq size */ | 691 | u32 sq_size_log2; /* sq size */ |
684 | u64 *rq; /* SW RQ (holds consumer wr_ids */ | 692 | struct t3_swrq *rq; /* SW RQ (holds consumer wr_ids */ |
685 | u32 rq_wptr; /* rq_wptr - rq_rptr == count of */ | 693 | u32 rq_wptr; /* rq_wptr - rq_rptr == count of */ |
686 | u32 rq_rptr; /* pending wrs */ | 694 | u32 rq_rptr; /* pending wrs */ |
687 | u64 *rq_oldest_wr; /* oldest wr on the SW RQ */ | 695 | struct t3_swrq *rq_oldest_wr; /* oldest wr on the SW RQ */ |
688 | u32 rq_size_log2; /* rq size */ | 696 | u32 rq_size_log2; /* rq size */ |
689 | u32 rq_addr; /* rq adapter address */ | 697 | u32 rq_addr; /* rq adapter address */ |
690 | void __iomem *doorbell; /* kernel db */ | 698 | void __iomem *doorbell; /* kernel db */ |
691 | u64 udb; /* user db if any */ | 699 | u64 udb; /* user db if any */ |
700 | struct cxio_rdev *rdev; | ||
692 | }; | 701 | }; |
693 | 702 | ||
694 | struct t3_cq { | 703 | struct t3_cq { |
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 7ecfd4d638c5..b89640aa6e10 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c | |||
@@ -1007,10 +1007,10 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, | |||
1007 | qhp->ibqp.qp_num = qhp->wq.qpid; | 1007 | qhp->ibqp.qp_num = qhp->wq.qpid; |
1008 | init_timer(&(qhp->timer)); | 1008 | init_timer(&(qhp->timer)); |
1009 | PDBG("%s sq_num_entries %d, rq_num_entries %d " | 1009 | PDBG("%s sq_num_entries %d, rq_num_entries %d " |
1010 | "qpid 0x%0x qhp %p dma_addr 0x%llx size %d\n", | 1010 | "qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n", |
1011 | __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, | 1011 | __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, |
1012 | qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr, | 1012 | qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr, |
1013 | 1 << qhp->wq.size_log2); | 1013 | 1 << qhp->wq.size_log2, qhp->wq.rq_addr); |
1014 | return &qhp->ibqp; | 1014 | return &qhp->ibqp; |
1015 | } | 1015 | } |
1016 | 1016 | ||
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 3b44300a3036..9a3be3a9d5dc 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c | |||
@@ -33,10 +33,11 @@ | |||
33 | #include "iwch.h" | 33 | #include "iwch.h" |
34 | #include "iwch_cm.h" | 34 | #include "iwch_cm.h" |
35 | #include "cxio_hal.h" | 35 | #include "cxio_hal.h" |
36 | #include "cxio_resource.h" | ||
36 | 37 | ||
37 | #define NO_SUPPORT -1 | 38 | #define NO_SUPPORT -1 |
38 | 39 | ||
39 | static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, | 40 | static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, |
40 | u8 * flit_cnt) | 41 | u8 * flit_cnt) |
41 | { | 42 | { |
42 | int i; | 43 | int i; |
@@ -81,7 +82,7 @@ static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, | |||
81 | return 0; | 82 | return 0; |
82 | } | 83 | } |
83 | 84 | ||
84 | static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, | 85 | static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, |
85 | u8 *flit_cnt) | 86 | u8 *flit_cnt) |
86 | { | 87 | { |
87 | int i; | 88 | int i; |
@@ -122,7 +123,7 @@ static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, | |||
122 | return 0; | 123 | return 0; |
123 | } | 124 | } |
124 | 125 | ||
125 | static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, | 126 | static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, |
126 | u8 *flit_cnt) | 127 | u8 *flit_cnt) |
127 | { | 128 | { |
128 | if (wr->num_sge > 1) | 129 | if (wr->num_sge > 1) |
@@ -143,7 +144,7 @@ static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, | |||
143 | return 0; | 144 | return 0; |
144 | } | 145 | } |
145 | 146 | ||
146 | static int iwch_build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr, | 147 | static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr, |
147 | u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) | 148 | u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) |
148 | { | 149 | { |
149 | int i; | 150 | int i; |
@@ -185,7 +186,7 @@ static int iwch_build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr, | |||
185 | return 0; | 186 | return 0; |
186 | } | 187 | } |
187 | 188 | ||
188 | static int iwch_build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr, | 189 | static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr, |
189 | u8 *flit_cnt) | 190 | u8 *flit_cnt) |
190 | { | 191 | { |
191 | wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey); | 192 | wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey); |
@@ -244,23 +245,106 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, | |||
244 | return 0; | 245 | return 0; |
245 | } | 246 | } |
246 | 247 | ||
247 | static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe, | 248 | static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, |
248 | struct ib_recv_wr *wr) | 249 | struct ib_recv_wr *wr) |
249 | { | 250 | { |
250 | int i; | 251 | int i, err = 0; |
251 | if (wr->num_sge > T3_MAX_SGE) | 252 | u32 pbl_addr[T3_MAX_SGE]; |
252 | return -EINVAL; | 253 | u8 page_size[T3_MAX_SGE]; |
254 | |||
255 | err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr, | ||
256 | page_size); | ||
257 | if (err) | ||
258 | return err; | ||
259 | wqe->recv.pagesz[0] = page_size[0]; | ||
260 | wqe->recv.pagesz[1] = page_size[1]; | ||
261 | wqe->recv.pagesz[2] = page_size[2]; | ||
262 | wqe->recv.pagesz[3] = page_size[3]; | ||
253 | wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); | 263 | wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); |
254 | for (i = 0; i < wr->num_sge; i++) { | 264 | for (i = 0; i < wr->num_sge; i++) { |
255 | wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); | 265 | wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); |
256 | wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); | 266 | wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); |
267 | |||
268 | /* to in the WQE == the offset into the page */ | ||
269 | wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) % | ||
270 | (1UL << (12 + page_size[i]))); | ||
271 | |||
272 | /* pbl_addr is the adapters address in the PBL */ | ||
273 | wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]); | ||
274 | } | ||
275 | for (; i < T3_MAX_SGE; i++) { | ||
276 | wqe->recv.sgl[i].stag = 0; | ||
277 | wqe->recv.sgl[i].len = 0; | ||
278 | wqe->recv.sgl[i].to = 0; | ||
279 | wqe->recv.pbl_addr[i] = 0; | ||
280 | } | ||
281 | qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, | ||
282 | qhp->wq.rq_size_log2)].wr_id = wr->wr_id; | ||
283 | qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, | ||
284 | qhp->wq.rq_size_log2)].pbl_addr = 0; | ||
285 | return 0; | ||
286 | } | ||
287 | |||
288 | static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe, | ||
289 | struct ib_recv_wr *wr) | ||
290 | { | ||
291 | int i; | ||
292 | u32 pbl_addr; | ||
293 | u32 pbl_offset; | ||
294 | |||
295 | |||
296 | /* | ||
297 | * The T3 HW requires the PBL in the HW recv descriptor to reference | ||
298 | * a PBL entry. So we allocate the max needed PBL memory here and pass | ||
299 | * it to the uP in the recv WR. The uP will build the PBL and setup | ||
300 | * the HW recv descriptor. | ||
301 | */ | ||
302 | pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE); | ||
303 | if (!pbl_addr) | ||
304 | return -ENOMEM; | ||
305 | |||
306 | /* | ||
307 | * Compute the 8B aligned offset. | ||
308 | */ | ||
309 | pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3; | ||
310 | |||
311 | wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); | ||
312 | |||
313 | for (i = 0; i < wr->num_sge; i++) { | ||
314 | |||
315 | /* | ||
316 | * Use a 128MB page size. This and an imposed 128MB | ||
317 | * sge length limit allows us to require only a 2-entry HW | ||
318 | * PBL for each SGE. This restriction is acceptable since | ||
319 | * since it is not possible to allocate 128MB of contiguous | ||
320 | * DMA coherent memory! | ||
321 | */ | ||
322 | if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN) | ||
323 | return -EINVAL; | ||
324 | wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT; | ||
325 | |||
326 | /* | ||
327 | * T3 restricts a recv to all zero-stag or all non-zero-stag. | ||
328 | */ | ||
329 | if (wr->sg_list[i].lkey != 0) | ||
330 | return -EINVAL; | ||
331 | wqe->recv.sgl[i].stag = 0; | ||
332 | wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); | ||
257 | wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); | 333 | wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); |
334 | wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_offset); | ||
335 | pbl_offset += 2; | ||
258 | } | 336 | } |
259 | for (; i < T3_MAX_SGE; i++) { | 337 | for (; i < T3_MAX_SGE; i++) { |
338 | wqe->recv.pagesz[i] = 0; | ||
260 | wqe->recv.sgl[i].stag = 0; | 339 | wqe->recv.sgl[i].stag = 0; |
261 | wqe->recv.sgl[i].len = 0; | 340 | wqe->recv.sgl[i].len = 0; |
262 | wqe->recv.sgl[i].to = 0; | 341 | wqe->recv.sgl[i].to = 0; |
342 | wqe->recv.pbl_addr[i] = 0; | ||
263 | } | 343 | } |
344 | qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, | ||
345 | qhp->wq.rq_size_log2)].wr_id = wr->wr_id; | ||
346 | qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, | ||
347 | qhp->wq.rq_size_log2)].pbl_addr = pbl_addr; | ||
264 | return 0; | 348 | return 0; |
265 | } | 349 | } |
266 | 350 | ||
@@ -312,18 +396,18 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
312 | if (wr->send_flags & IB_SEND_FENCE) | 396 | if (wr->send_flags & IB_SEND_FENCE) |
313 | t3_wr_flags |= T3_READ_FENCE_FLAG; | 397 | t3_wr_flags |= T3_READ_FENCE_FLAG; |
314 | t3_wr_opcode = T3_WR_SEND; | 398 | t3_wr_opcode = T3_WR_SEND; |
315 | err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt); | 399 | err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt); |
316 | break; | 400 | break; |
317 | case IB_WR_RDMA_WRITE: | 401 | case IB_WR_RDMA_WRITE: |
318 | case IB_WR_RDMA_WRITE_WITH_IMM: | 402 | case IB_WR_RDMA_WRITE_WITH_IMM: |
319 | t3_wr_opcode = T3_WR_WRITE; | 403 | t3_wr_opcode = T3_WR_WRITE; |
320 | err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt); | 404 | err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt); |
321 | break; | 405 | break; |
322 | case IB_WR_RDMA_READ: | 406 | case IB_WR_RDMA_READ: |
323 | case IB_WR_RDMA_READ_WITH_INV: | 407 | case IB_WR_RDMA_READ_WITH_INV: |
324 | t3_wr_opcode = T3_WR_READ; | 408 | t3_wr_opcode = T3_WR_READ; |
325 | t3_wr_flags = 0; /* T3 reads are always signaled */ | 409 | t3_wr_flags = 0; /* T3 reads are always signaled */ |
326 | err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt); | 410 | err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt); |
327 | if (err) | 411 | if (err) |
328 | break; | 412 | break; |
329 | sqp->read_len = wqe->read.local_len; | 413 | sqp->read_len = wqe->read.local_len; |
@@ -332,14 +416,14 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
332 | break; | 416 | break; |
333 | case IB_WR_FAST_REG_MR: | 417 | case IB_WR_FAST_REG_MR: |
334 | t3_wr_opcode = T3_WR_FASTREG; | 418 | t3_wr_opcode = T3_WR_FASTREG; |
335 | err = iwch_build_fastreg(wqe, wr, &t3_wr_flit_cnt, | 419 | err = build_fastreg(wqe, wr, &t3_wr_flit_cnt, |
336 | &wr_cnt, &qhp->wq); | 420 | &wr_cnt, &qhp->wq); |
337 | break; | 421 | break; |
338 | case IB_WR_LOCAL_INV: | 422 | case IB_WR_LOCAL_INV: |
339 | if (wr->send_flags & IB_SEND_FENCE) | 423 | if (wr->send_flags & IB_SEND_FENCE) |
340 | t3_wr_flags |= T3_LOCAL_FENCE_FLAG; | 424 | t3_wr_flags |= T3_LOCAL_FENCE_FLAG; |
341 | t3_wr_opcode = T3_WR_INV_STAG; | 425 | t3_wr_opcode = T3_WR_INV_STAG; |
342 | err = iwch_build_inv_stag(wqe, wr, &t3_wr_flit_cnt); | 426 | err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt); |
343 | break; | 427 | break; |
344 | default: | 428 | default: |
345 | PDBG("%s post of type=%d TBD!\n", __func__, | 429 | PDBG("%s post of type=%d TBD!\n", __func__, |
@@ -398,18 +482,24 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, | |||
398 | return -EINVAL; | 482 | return -EINVAL; |
399 | } | 483 | } |
400 | while (wr) { | 484 | while (wr) { |
485 | if (wr->num_sge > T3_MAX_SGE) { | ||
486 | err = -EINVAL; | ||
487 | *bad_wr = wr; | ||
488 | break; | ||
489 | } | ||
401 | idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); | 490 | idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); |
402 | wqe = (union t3_wr *) (qhp->wq.queue + idx); | 491 | wqe = (union t3_wr *) (qhp->wq.queue + idx); |
403 | if (num_wrs) | 492 | if (num_wrs) |
404 | err = iwch_build_rdma_recv(qhp->rhp, wqe, wr); | 493 | if (wr->sg_list[0].lkey) |
494 | err = build_rdma_recv(qhp, wqe, wr); | ||
495 | else | ||
496 | err = build_zero_stag_recv(qhp, wqe, wr); | ||
405 | else | 497 | else |
406 | err = -ENOMEM; | 498 | err = -ENOMEM; |
407 | if (err) { | 499 | if (err) { |
408 | *bad_wr = wr; | 500 | *bad_wr = wr; |
409 | break; | 501 | break; |
410 | } | 502 | } |
411 | qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] = | ||
412 | wr->wr_id; | ||
413 | build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG, | 503 | build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG, |
414 | Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), | 504 | Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), |
415 | 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP); | 505 | 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP); |
@@ -810,7 +900,8 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, | |||
810 | init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); | 900 | init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); |
811 | init_attr.rqe_count = iwch_rqes_posted(qhp); | 901 | init_attr.rqe_count = iwch_rqes_posted(qhp); |
812 | init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0; | 902 | init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0; |
813 | init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0; | 903 | if (!qhp->ibqp.uobject) |
904 | init_attr.flags |= PRIV_QP; | ||
814 | if (peer2peer) { | 905 | if (peer2peer) { |
815 | init_attr.rtr_type = RTR_READ; | 906 | init_attr.rtr_type = RTR_READ; |
816 | if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator) | 907 | if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator) |