aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorNaresh Gottumukkala <bgottumukkala@emulex.com>2013-08-26 05:57:44 -0400
committerRoland Dreier <roland@purestorage.com>2013-09-03 00:18:32 -0400
commitcffce99051b80c90630a9fff662a1b25e278069d (patch)
tree58257cc50f34458649c412d5a723063d53b417a3 /drivers/infiniband
parent2b51a9b9eb6bf240d2592e10d2f8823dd1f5ee3e (diff)
RDMA/ocrdma: Dont use PD 0 for userpace CQ DB
Create_CQ verb doesn't provide a PD pointer. So, until now we are creating all (both userspace and kernel) CQ DB regions from PD0. This will result in mmapping PD0 to applications. A rogue userspace application can mess things up. Also more serious issues is even the be2net NIC uses PD0. This patch addresses this problem by: 1) Create a PD page for every userspace application when the alloc_ucontext is called. This will be destroyed in dealloc_ucontext. 2) All CQs for that context will use the PD allocated in ucontext. 3) The first create_PD call from application will result in returning the PD address from its ucontext (no new PD will be created). 4) For subsecquent create_pd calls from application, we create new PDs for the application. Signed-off-by: Naresh Gottumukkala <bgottumukkala@emulex.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h3
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.h7
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c1
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c359
6 files changed, 339 insertions, 40 deletions
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index 1c8ba4cefcba..fde8fb097a8c 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -324,6 +324,9 @@ struct ocrdma_ucontext {
324 324
325 struct list_head mm_head; 325 struct list_head mm_head;
326 struct mutex mm_list_lock; /* protects list entries of mm type */ 326 struct mutex mm_list_lock; /* protects list entries of mm type */
327 struct ocrdma_pd *cntxt_pd;
328 int pd_in_use;
329
327 struct { 330 struct {
328 u32 *va; 331 u32 *va;
329 dma_addr_t pa; 332 dma_addr_t pa;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 16ce664dc466..618c2124e619 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -1309,7 +1309,7 @@ static void ocrdma_unbind_eq(struct ocrdma_dev *dev, u16 eq_id)
1309} 1309}
1310 1310
1311int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, 1311int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
1312 int entries, int dpp_cq) 1312 int entries, int dpp_cq, u16 pd_id)
1313{ 1313{
1314 int status = -ENOMEM; int max_hw_cqe; 1314 int status = -ENOMEM; int max_hw_cqe;
1315 struct pci_dev *pdev = dev->nic_info.pdev; 1315 struct pci_dev *pdev = dev->nic_info.pdev;
@@ -1357,7 +1357,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
1357 cmd->cmd.ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS; 1357 cmd->cmd.ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS;
1358 1358
1359 cq->eqn = ocrdma_bind_eq(dev); 1359 cq->eqn = ocrdma_bind_eq(dev);
1360 cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER2; 1360 cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER3;
1361 cqe_count = cq->len / cqe_size; 1361 cqe_count = cq->len / cqe_size;
1362 if (cqe_count > 1024) { 1362 if (cqe_count > 1024) {
1363 /* Set cnt to 3 to indicate more than 1024 cq entries */ 1363 /* Set cnt to 3 to indicate more than 1024 cq entries */
@@ -1393,6 +1393,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
1393 cq->phase_change = true; 1393 cq->phase_change = true;
1394 } 1394 }
1395 1395
1396 cmd->cmd.pd_id = pd_id; /* valid only for v3 */
1396 ocrdma_build_q_pages(&cmd->cmd.pa[0], hw_pages, cq->pa, page_size); 1397 ocrdma_build_q_pages(&cmd->cmd.pa[0], hw_pages, cq->pa, page_size);
1397 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); 1398 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
1398 if (status) 1399 if (status)
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
index cc90ac3b6d42..044db74e780a 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
@@ -78,6 +78,11 @@ static inline void ocrdma_copy_le32_to_cpu(void *dst, void *src, u32 len)
78#endif 78#endif
79} 79}
80 80
81static inline u64 ocrdma_get_db_addr(struct ocrdma_dev *dev, u32 pdid)
82{
83 return dev->nic_info.unmapped_db + (pdid * dev->nic_info.db_page_size);
84}
85
81int ocrdma_init_hw(struct ocrdma_dev *); 86int ocrdma_init_hw(struct ocrdma_dev *);
82void ocrdma_cleanup_hw(struct ocrdma_dev *); 87void ocrdma_cleanup_hw(struct ocrdma_dev *);
83 88
@@ -100,7 +105,7 @@ int ocrdma_mbx_dealloc_lkey(struct ocrdma_dev *, int fmr, u32 lkey);
100int ocrdma_reg_mr(struct ocrdma_dev *, struct ocrdma_hw_mr *hwmr, 105int ocrdma_reg_mr(struct ocrdma_dev *, struct ocrdma_hw_mr *hwmr,
101 u32 pd_id, int acc); 106 u32 pd_id, int acc);
102int ocrdma_mbx_create_cq(struct ocrdma_dev *, struct ocrdma_cq *, 107int ocrdma_mbx_create_cq(struct ocrdma_dev *, struct ocrdma_cq *,
103 int entries, int dpp_cq); 108 int entries, int dpp_cq, u16 pd_id);
104int ocrdma_mbx_destroy_cq(struct ocrdma_dev *, struct ocrdma_cq *); 109int ocrdma_mbx_destroy_cq(struct ocrdma_dev *, struct ocrdma_cq *);
105 110
106int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs, 111int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 4eeea56f7b31..7d43ba924bfb 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -326,6 +326,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
326 dev->ibdev.req_notify_cq = ocrdma_arm_cq; 326 dev->ibdev.req_notify_cq = ocrdma_arm_cq;
327 327
328 dev->ibdev.get_dma_mr = ocrdma_get_dma_mr; 328 dev->ibdev.get_dma_mr = ocrdma_get_dma_mr;
329 dev->ibdev.reg_phys_mr = ocrdma_reg_kernel_mr;
329 dev->ibdev.dereg_mr = ocrdma_dereg_mr; 330 dev->ibdev.dereg_mr = ocrdma_dereg_mr;
330 dev->ibdev.reg_user_mr = ocrdma_reg_user_mr; 331 dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
331 332
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index d1a9fb72a4bd..1e2992fee4be 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -547,6 +547,7 @@ enum {
547 547
548enum { 548enum {
549 OCRDMA_CREATE_CQ_VER2 = 2, 549 OCRDMA_CREATE_CQ_VER2 = 2,
550 OCRDMA_CREATE_CQ_VER3 = 3,
550 551
551 OCRDMA_CREATE_CQ_PAGE_CNT_MASK = 0xFFFF, 552 OCRDMA_CREATE_CQ_PAGE_CNT_MASK = 0xFFFF,
552 OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT = 16, 553 OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT = 16,
@@ -580,7 +581,8 @@ struct ocrdma_create_cq_cmd {
580 u32 pgsz_pgcnt; 581 u32 pgsz_pgcnt;
581 u32 ev_cnt_flags; 582 u32 ev_cnt_flags;
582 u32 eqn; 583 u32 eqn;
583 u32 cqe_count; 584 u16 cqe_count;
585 u16 pd_id;
584 u32 rsvd6; 586 u32 rsvd6;
585 struct ocrdma_pa pa[OCRDMA_CREATE_CQ_MAX_PAGES]; 587 struct ocrdma_pa pa[OCRDMA_CREATE_CQ_MAX_PAGES];
586}; 588};
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 3e80f65f42a3..e554fc258a68 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -215,6 +215,108 @@ static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
215 return found; 215 return found;
216} 216}
217 217
218static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
219 struct ocrdma_ucontext *uctx,
220 struct ib_udata *udata)
221{
222 struct ocrdma_pd *pd = NULL;
223 int status = 0;
224
225 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
226 if (!pd)
227 return ERR_PTR(-ENOMEM);
228
229 if (udata && uctx) {
230 pd->dpp_enabled =
231 dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY;
232 pd->num_dpp_qp =
233 pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0;
234 }
235
236retry:
237 status = ocrdma_mbx_alloc_pd(dev, pd);
238 if (status) {
239 if (pd->dpp_enabled) {
240 pd->dpp_enabled = false;
241 pd->num_dpp_qp = 0;
242 goto retry;
243 } else {
244 kfree(pd);
245 return ERR_PTR(status);
246 }
247 }
248
249 return pd;
250}
251
252static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx,
253 struct ocrdma_pd *pd)
254{
255 return (uctx->cntxt_pd == pd ? true : false);
256}
257
258static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev,
259 struct ocrdma_pd *pd)
260{
261 int status = 0;
262
263 status = ocrdma_mbx_dealloc_pd(dev, pd);
264 kfree(pd);
265 return status;
266}
267
268static int ocrdma_alloc_ucontext_pd(struct ocrdma_dev *dev,
269 struct ocrdma_ucontext *uctx,
270 struct ib_udata *udata)
271{
272 int status = 0;
273
274 uctx->cntxt_pd = _ocrdma_alloc_pd(dev, uctx, udata);
275 if (IS_ERR(uctx->cntxt_pd)) {
276 status = PTR_ERR(uctx->cntxt_pd);
277 uctx->cntxt_pd = NULL;
278 goto err;
279 }
280
281 uctx->cntxt_pd->uctx = uctx;
282 uctx->cntxt_pd->ibpd.device = &dev->ibdev;
283err:
284 return status;
285}
286
287static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
288{
289 int status = 0;
290 struct ocrdma_pd *pd = uctx->cntxt_pd;
291 struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
292
293 BUG_ON(uctx->pd_in_use);
294 uctx->cntxt_pd = NULL;
295 status = _ocrdma_dealloc_pd(dev, pd);
296 return status;
297}
298
299static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx)
300{
301 struct ocrdma_pd *pd = NULL;
302
303 mutex_lock(&uctx->mm_list_lock);
304 if (!uctx->pd_in_use) {
305 uctx->pd_in_use = true;
306 pd = uctx->cntxt_pd;
307 }
308 mutex_unlock(&uctx->mm_list_lock);
309
310 return pd;
311}
312
313static void ocrdma_release_ucontext_pd(struct ocrdma_ucontext *uctx)
314{
315 mutex_lock(&uctx->mm_list_lock);
316 uctx->pd_in_use = false;
317 mutex_unlock(&uctx->mm_list_lock);
318}
319
218struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev, 320struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
219 struct ib_udata *udata) 321 struct ib_udata *udata)
220{ 322{
@@ -249,6 +351,11 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
249 status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len); 351 status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len);
250 if (status) 352 if (status)
251 goto map_err; 353 goto map_err;
354
355 status = ocrdma_alloc_ucontext_pd(dev, ctx, udata);
356 if (status)
357 goto pd_err;
358
252 resp.dev_id = dev->id; 359 resp.dev_id = dev->id;
253 resp.max_inline_data = dev->attr.max_inline_data; 360 resp.max_inline_data = dev->attr.max_inline_data;
254 resp.wqe_size = dev->attr.wqe_size; 361 resp.wqe_size = dev->attr.wqe_size;
@@ -262,6 +369,7 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
262 return &ctx->ibucontext; 369 return &ctx->ibucontext;
263 370
264cpy_err: 371cpy_err:
372pd_err:
265 ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len); 373 ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len);
266map_err: 374map_err:
267 dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va, 375 dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va,
@@ -272,11 +380,14 @@ map_err:
272 380
273int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx) 381int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
274{ 382{
383 int status = 0;
275 struct ocrdma_mm *mm, *tmp; 384 struct ocrdma_mm *mm, *tmp;
276 struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx); 385 struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx);
277 struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device); 386 struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device);
278 struct pci_dev *pdev = dev->nic_info.pdev; 387 struct pci_dev *pdev = dev->nic_info.pdev;
279 388
389 status = ocrdma_dealloc_ucontext_pd(uctx);
390
280 ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len); 391 ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len);
281 dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va, 392 dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va,
282 uctx->ah_tbl.pa); 393 uctx->ah_tbl.pa);
@@ -286,7 +397,7 @@ int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
286 kfree(mm); 397 kfree(mm);
287 } 398 }
288 kfree(uctx); 399 kfree(uctx);
289 return 0; 400 return status;
290} 401}
291 402
292int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) 403int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
@@ -346,8 +457,7 @@ static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd,
346 memset(&rsp, 0, sizeof(rsp)); 457 memset(&rsp, 0, sizeof(rsp));
347 rsp.id = pd->id; 458 rsp.id = pd->id;
348 rsp.dpp_enabled = pd->dpp_enabled; 459 rsp.dpp_enabled = pd->dpp_enabled;
349 db_page_addr = dev->nic_info.unmapped_db + 460 db_page_addr = ocrdma_get_db_addr(dev, pd->id);
350 (pd->id * dev->nic_info.db_page_size);
351 db_page_size = dev->nic_info.db_page_size; 461 db_page_size = dev->nic_info.db_page_size;
352 462
353 status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size); 463 status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size);
@@ -386,31 +496,26 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev,
386{ 496{
387 struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); 497 struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
388 struct ocrdma_pd *pd; 498 struct ocrdma_pd *pd;
499 struct ocrdma_ucontext *uctx = NULL;
389 int status; 500 int status;
501 u8 is_uctx_pd = false;
390 502
391 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
392 if (!pd)
393 return ERR_PTR(-ENOMEM);
394 if (udata && context) { 503 if (udata && context) {
395 pd->dpp_enabled = 504 uctx = get_ocrdma_ucontext(context);
396 (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY); 505 pd = ocrdma_get_ucontext_pd(uctx);
397 pd->num_dpp_qp = 506 if (pd) {
398 pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0; 507 is_uctx_pd = true;
399 } 508 goto pd_mapping;
400retry:
401 status = ocrdma_mbx_alloc_pd(dev, pd);
402 if (status) {
403 /* try for pd with out dpp */
404 if (pd->dpp_enabled) {
405 pd->dpp_enabled = false;
406 pd->num_dpp_qp = 0;
407 goto retry;
408 } else {
409 kfree(pd);
410 return ERR_PTR(status);
411 } 509 }
412 } 510 }
413 511
512 pd = _ocrdma_alloc_pd(dev, uctx, udata);
513 if (IS_ERR(pd)) {
514 status = PTR_ERR(pd);
515 goto exit;
516 }
517
518pd_mapping:
414 if (udata && context) { 519 if (udata && context) {
415 status = ocrdma_copy_pd_uresp(dev, pd, context, udata); 520 status = ocrdma_copy_pd_uresp(dev, pd, context, udata);
416 if (status) 521 if (status)
@@ -419,8 +524,13 @@ retry:
419 return &pd->ibpd; 524 return &pd->ibpd;
420 525
421err: 526err:
422 status = ocrdma_mbx_dealloc_pd(dev, pd); 527 if (is_uctx_pd) {
423 kfree(pd); 528 ocrdma_release_ucontext_pd(uctx);
529 } else {
530 status = ocrdma_mbx_dealloc_pd(dev, pd);
531 kfree(pd);
532 }
533exit:
424 return ERR_PTR(status); 534 return ERR_PTR(status);
425} 535}
426 536
@@ -428,20 +538,25 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd)
428{ 538{
429 struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); 539 struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
430 struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); 540 struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
431 int status; 541 struct ocrdma_ucontext *uctx = NULL;
542 int status = 0;
432 u64 usr_db; 543 u64 usr_db;
433 544
434 status = ocrdma_mbx_dealloc_pd(dev, pd); 545 uctx = pd->uctx;
435 if (pd->uctx) { 546 if (uctx) {
436 u64 dpp_db = dev->nic_info.dpp_unmapped_addr + 547 u64 dpp_db = dev->nic_info.dpp_unmapped_addr +
437 (pd->id * PAGE_SIZE); 548 (pd->id * PAGE_SIZE);
438 if (pd->dpp_enabled) 549 if (pd->dpp_enabled)
439 ocrdma_del_mmap(pd->uctx, dpp_db, PAGE_SIZE); 550 ocrdma_del_mmap(pd->uctx, dpp_db, PAGE_SIZE);
440 usr_db = dev->nic_info.unmapped_db + 551 usr_db = ocrdma_get_db_addr(dev, pd->id);
441 (pd->id * dev->nic_info.db_page_size);
442 ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size); 552 ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size);
553
554 if (is_ucontext_pd(uctx, pd)) {
555 ocrdma_release_ucontext_pd(uctx);
556 return status;
557 }
443 } 558 }
444 kfree(pd); 559 status = _ocrdma_dealloc_pd(dev, pd);
445 return status; 560 return status;
446} 561}
447 562
@@ -701,7 +816,7 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
701 struct ib_ucontext *ib_ctx) 816 struct ib_ucontext *ib_ctx)
702{ 817{
703 int status; 818 int status;
704 struct ocrdma_ucontext *uctx; 819 struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx);
705 struct ocrdma_create_cq_uresp uresp; 820 struct ocrdma_create_cq_uresp uresp;
706 821
707 memset(&uresp, 0, sizeof(uresp)); 822 memset(&uresp, 0, sizeof(uresp));
@@ -710,7 +825,7 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
710 uresp.num_pages = 1; 825 uresp.num_pages = 1;
711 uresp.max_hw_cqe = cq->max_hw_cqe; 826 uresp.max_hw_cqe = cq->max_hw_cqe;
712 uresp.page_addr[0] = cq->pa; 827 uresp.page_addr[0] = cq->pa;
713 uresp.db_page_addr = dev->nic_info.unmapped_db; 828 uresp.db_page_addr = ocrdma_get_db_addr(dev, uctx->cntxt_pd->id);
714 uresp.db_page_size = dev->nic_info.db_page_size; 829 uresp.db_page_size = dev->nic_info.db_page_size;
715 uresp.phase_change = cq->phase_change ? 1 : 0; 830 uresp.phase_change = cq->phase_change ? 1 : 0;
716 status = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); 831 status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
@@ -719,7 +834,6 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
719 __func__, dev->id, cq->id); 834 __func__, dev->id, cq->id);
720 goto err; 835 goto err;
721 } 836 }
722 uctx = get_ocrdma_ucontext(ib_ctx);
723 status = ocrdma_add_mmap(uctx, uresp.db_page_addr, uresp.db_page_size); 837 status = ocrdma_add_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
724 if (status) 838 if (status)
725 goto err; 839 goto err;
@@ -739,6 +853,8 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
739{ 853{
740 struct ocrdma_cq *cq; 854 struct ocrdma_cq *cq;
741 struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); 855 struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
856 struct ocrdma_ucontext *uctx = NULL;
857 u16 pd_id = 0;
742 int status; 858 int status;
743 struct ocrdma_create_cq_ureq ureq; 859 struct ocrdma_create_cq_ureq ureq;
744 860
@@ -756,7 +872,12 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
756 INIT_LIST_HEAD(&cq->sq_head); 872 INIT_LIST_HEAD(&cq->sq_head);
757 INIT_LIST_HEAD(&cq->rq_head); 873 INIT_LIST_HEAD(&cq->rq_head);
758 874
759 status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq); 875 if (ib_ctx) {
876 uctx = get_ocrdma_ucontext(ib_ctx);
877 pd_id = uctx->cntxt_pd->id;
878 }
879
880 status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq, pd_id);
760 if (status) { 881 if (status) {
761 kfree(cq); 882 kfree(cq);
762 return ERR_PTR(status); 883 return ERR_PTR(status);
@@ -797,13 +918,16 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq)
797 int status; 918 int status;
798 struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); 919 struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
799 struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); 920 struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
921 int pdid = 0;
800 922
801 status = ocrdma_mbx_destroy_cq(dev, cq); 923 status = ocrdma_mbx_destroy_cq(dev, cq);
802 924
803 if (cq->ucontext) { 925 if (cq->ucontext) {
926 pdid = cq->ucontext->cntxt_pd->id;
804 ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, 927 ocrdma_del_mmap(cq->ucontext, (u64) cq->pa,
805 PAGE_ALIGN(cq->len)); 928 PAGE_ALIGN(cq->len));
806 ocrdma_del_mmap(cq->ucontext, dev->nic_info.unmapped_db, 929 ocrdma_del_mmap(cq->ucontext,
930 ocrdma_get_db_addr(dev, pdid),
807 dev->nic_info.db_page_size); 931 dev->nic_info.db_page_size);
808 } 932 }
809 dev->cq_tbl[cq->id] = NULL; 933 dev->cq_tbl[cq->id] = NULL;
@@ -2686,3 +2810,166 @@ void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list)
2686{ 2810{
2687 kfree(page_list); 2811 kfree(page_list);
2688} 2812}
2813
2814#define MAX_KERNEL_PBE_SIZE 65536
2815static inline int count_kernel_pbes(struct ib_phys_buf *buf_list,
2816 int buf_cnt, u32 *pbe_size)
2817{
2818 u64 total_size = 0;
2819 u64 buf_size = 0;
2820 int i;
2821 *pbe_size = roundup(buf_list[0].size, PAGE_SIZE);
2822 *pbe_size = roundup_pow_of_two(*pbe_size);
2823
2824 /* find the smallest PBE size that we can have */
2825 for (i = 0; i < buf_cnt; i++) {
2826 /* first addr may not be page aligned, so ignore checking */
2827 if ((i != 0) && ((buf_list[i].addr & ~PAGE_MASK) ||
2828 (buf_list[i].size & ~PAGE_MASK))) {
2829 return 0;
2830 }
2831
2832 /* if configured PBE size is greater then the chosen one,
2833 * reduce the PBE size.
2834 */
2835 buf_size = roundup(buf_list[i].size, PAGE_SIZE);
2836 /* pbe_size has to be even multiple of 4K 1,2,4,8...*/
2837 buf_size = roundup_pow_of_two(buf_size);
2838 if (*pbe_size > buf_size)
2839 *pbe_size = buf_size;
2840
2841 total_size += buf_size;
2842 }
2843 *pbe_size = *pbe_size > MAX_KERNEL_PBE_SIZE ?
2844 (MAX_KERNEL_PBE_SIZE) : (*pbe_size);
2845
2846 /* num_pbes = total_size / (*pbe_size); this is implemented below. */
2847
2848 return total_size >> ilog2(*pbe_size);
2849}
2850
2851static void build_kernel_pbes(struct ib_phys_buf *buf_list, int ib_buf_cnt,
2852 u32 pbe_size, struct ocrdma_pbl *pbl_tbl,
2853 struct ocrdma_hw_mr *hwmr)
2854{
2855 int i;
2856 int idx;
2857 int pbes_per_buf = 0;
2858 u64 buf_addr = 0;
2859 int num_pbes;
2860 struct ocrdma_pbe *pbe;
2861 int total_num_pbes = 0;
2862
2863 if (!hwmr->num_pbes)
2864 return;
2865
2866 pbe = (struct ocrdma_pbe *)pbl_tbl->va;
2867 num_pbes = 0;
2868
2869 /* go through the OS phy regions & fill hw pbe entries into pbls. */
2870 for (i = 0; i < ib_buf_cnt; i++) {
2871 buf_addr = buf_list[i].addr;
2872 pbes_per_buf =
2873 roundup_pow_of_two(roundup(buf_list[i].size, PAGE_SIZE)) /
2874 pbe_size;
2875 hwmr->len += buf_list[i].size;
2876 /* number of pbes can be more for one OS buf, when
2877 * buffers are of different sizes.
2878 * split the ib_buf to one or more pbes.
2879 */
2880 for (idx = 0; idx < pbes_per_buf; idx++) {
2881 /* we program always page aligned addresses,
2882 * first unaligned address is taken care by fbo.
2883 */
2884 if (i == 0) {
2885 /* for non zero fbo, assign the
2886 * start of the page.
2887 */
2888 pbe->pa_lo =
2889 cpu_to_le32((u32) (buf_addr & PAGE_MASK));
2890 pbe->pa_hi =
2891 cpu_to_le32((u32) upper_32_bits(buf_addr));
2892 } else {
2893 pbe->pa_lo =
2894 cpu_to_le32((u32) (buf_addr & 0xffffffff));
2895 pbe->pa_hi =
2896 cpu_to_le32((u32) upper_32_bits(buf_addr));
2897 }
2898 buf_addr += pbe_size;
2899 num_pbes += 1;
2900 total_num_pbes += 1;
2901 pbe++;
2902
2903 if (total_num_pbes == hwmr->num_pbes)
2904 goto mr_tbl_done;
2905 /* if the pbl is full storing the pbes,
2906 * move to next pbl.
2907 */
2908 if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
2909 pbl_tbl++;
2910 pbe = (struct ocrdma_pbe *)pbl_tbl->va;
2911 num_pbes = 0;
2912 }
2913 }
2914 }
2915mr_tbl_done:
2916 return;
2917}
2918
2919struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *ibpd,
2920 struct ib_phys_buf *buf_list,
2921 int buf_cnt, int acc, u64 *iova_start)
2922{
2923 int status = -ENOMEM;
2924 struct ocrdma_mr *mr;
2925 struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
2926 struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
2927 u32 num_pbes;
2928 u32 pbe_size = 0;
2929
2930 if ((acc & IB_ACCESS_REMOTE_WRITE) && !(acc & IB_ACCESS_LOCAL_WRITE))
2931 return ERR_PTR(-EINVAL);
2932
2933 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2934 if (!mr)
2935 return ERR_PTR(status);
2936
2937 num_pbes = count_kernel_pbes(buf_list, buf_cnt, &pbe_size);
2938 if (num_pbes == 0) {
2939 status = -EINVAL;
2940 goto pbl_err;
2941 }
2942 status = ocrdma_get_pbl_info(dev, mr, num_pbes);
2943 if (status)
2944 goto pbl_err;
2945
2946 mr->hwmr.pbe_size = pbe_size;
2947 mr->hwmr.fbo = *iova_start - (buf_list[0].addr & PAGE_MASK);
2948 mr->hwmr.va = *iova_start;
2949 mr->hwmr.local_rd = 1;
2950 mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2951 mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2952 mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2953 mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2954 mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
2955
2956 status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
2957 if (status)
2958 goto pbl_err;
2959 build_kernel_pbes(buf_list, buf_cnt, pbe_size, mr->hwmr.pbl_table,
2960 &mr->hwmr);
2961 status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
2962 if (status)
2963 goto mbx_err;
2964
2965 mr->ibmr.lkey = mr->hwmr.lkey;
2966 if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
2967 mr->ibmr.rkey = mr->hwmr.lkey;
2968 return &mr->ibmr;
2969
2970mbx_err:
2971 ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
2972pbl_err:
2973 kfree(mr);
2974 return ERR_PTR(status);
2975}