diff options
author | Naresh Gottumukkala <bgottumukkala@emulex.com> | 2013-08-26 05:57:44 -0400 |
---|---|---|
committer | Roland Dreier <roland@purestorage.com> | 2013-09-03 00:18:32 -0400 |
commit | cffce99051b80c90630a9fff662a1b25e278069d (patch) | |
tree | 58257cc50f34458649c412d5a723063d53b417a3 /drivers/infiniband | |
parent | 2b51a9b9eb6bf240d2592e10d2f8823dd1f5ee3e (diff) |
RDMA/ocrdma: Dont use PD 0 for userpace CQ DB
Create_CQ verb doesn't provide a PD pointer. So, until now we are
creating all (both userspace and kernel) CQ DB regions from PD0. This
will result in mmapping PD0 to applications. A rogue userspace
application can mess things up.
Also more serious issues is even the be2net NIC uses PD0.
This patch addresses this problem by:
1) Create a PD page for every userspace application when the
alloc_ucontext is called. This will be destroyed in
dealloc_ucontext.
2) All CQs for that context will use the PD allocated in ucontext.
3) The first create_PD call from application will result in returning
the PD address from its ucontext (no new PD will be created).
4) For subsecquent create_pd calls from application, we create new PDs for
the application.
Signed-off-by: Naresh Gottumukkala <bgottumukkala@emulex.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/hw/ocrdma/ocrdma.h | 3 | ||||
-rw-r--r-- | drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 5 | ||||
-rw-r--r-- | drivers/infiniband/hw/ocrdma/ocrdma_hw.h | 7 | ||||
-rw-r--r-- | drivers/infiniband/hw/ocrdma/ocrdma_main.c | 1 | ||||
-rw-r--r-- | drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 359 |
6 files changed, 339 insertions, 40 deletions
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 1c8ba4cefcba..fde8fb097a8c 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h | |||
@@ -324,6 +324,9 @@ struct ocrdma_ucontext { | |||
324 | 324 | ||
325 | struct list_head mm_head; | 325 | struct list_head mm_head; |
326 | struct mutex mm_list_lock; /* protects list entries of mm type */ | 326 | struct mutex mm_list_lock; /* protects list entries of mm type */ |
327 | struct ocrdma_pd *cntxt_pd; | ||
328 | int pd_in_use; | ||
329 | |||
327 | struct { | 330 | struct { |
328 | u32 *va; | 331 | u32 *va; |
329 | dma_addr_t pa; | 332 | dma_addr_t pa; |
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 16ce664dc466..618c2124e619 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c | |||
@@ -1309,7 +1309,7 @@ static void ocrdma_unbind_eq(struct ocrdma_dev *dev, u16 eq_id) | |||
1309 | } | 1309 | } |
1310 | 1310 | ||
1311 | int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, | 1311 | int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, |
1312 | int entries, int dpp_cq) | 1312 | int entries, int dpp_cq, u16 pd_id) |
1313 | { | 1313 | { |
1314 | int status = -ENOMEM; int max_hw_cqe; | 1314 | int status = -ENOMEM; int max_hw_cqe; |
1315 | struct pci_dev *pdev = dev->nic_info.pdev; | 1315 | struct pci_dev *pdev = dev->nic_info.pdev; |
@@ -1357,7 +1357,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, | |||
1357 | cmd->cmd.ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS; | 1357 | cmd->cmd.ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS; |
1358 | 1358 | ||
1359 | cq->eqn = ocrdma_bind_eq(dev); | 1359 | cq->eqn = ocrdma_bind_eq(dev); |
1360 | cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER2; | 1360 | cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER3; |
1361 | cqe_count = cq->len / cqe_size; | 1361 | cqe_count = cq->len / cqe_size; |
1362 | if (cqe_count > 1024) { | 1362 | if (cqe_count > 1024) { |
1363 | /* Set cnt to 3 to indicate more than 1024 cq entries */ | 1363 | /* Set cnt to 3 to indicate more than 1024 cq entries */ |
@@ -1393,6 +1393,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, | |||
1393 | cq->phase_change = true; | 1393 | cq->phase_change = true; |
1394 | } | 1394 | } |
1395 | 1395 | ||
1396 | cmd->cmd.pd_id = pd_id; /* valid only for v3 */ | ||
1396 | ocrdma_build_q_pages(&cmd->cmd.pa[0], hw_pages, cq->pa, page_size); | 1397 | ocrdma_build_q_pages(&cmd->cmd.pa[0], hw_pages, cq->pa, page_size); |
1397 | status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); | 1398 | status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); |
1398 | if (status) | 1399 | if (status) |
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index cc90ac3b6d42..044db74e780a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h | |||
@@ -78,6 +78,11 @@ static inline void ocrdma_copy_le32_to_cpu(void *dst, void *src, u32 len) | |||
78 | #endif | 78 | #endif |
79 | } | 79 | } |
80 | 80 | ||
81 | static inline u64 ocrdma_get_db_addr(struct ocrdma_dev *dev, u32 pdid) | ||
82 | { | ||
83 | return dev->nic_info.unmapped_db + (pdid * dev->nic_info.db_page_size); | ||
84 | } | ||
85 | |||
81 | int ocrdma_init_hw(struct ocrdma_dev *); | 86 | int ocrdma_init_hw(struct ocrdma_dev *); |
82 | void ocrdma_cleanup_hw(struct ocrdma_dev *); | 87 | void ocrdma_cleanup_hw(struct ocrdma_dev *); |
83 | 88 | ||
@@ -100,7 +105,7 @@ int ocrdma_mbx_dealloc_lkey(struct ocrdma_dev *, int fmr, u32 lkey); | |||
100 | int ocrdma_reg_mr(struct ocrdma_dev *, struct ocrdma_hw_mr *hwmr, | 105 | int ocrdma_reg_mr(struct ocrdma_dev *, struct ocrdma_hw_mr *hwmr, |
101 | u32 pd_id, int acc); | 106 | u32 pd_id, int acc); |
102 | int ocrdma_mbx_create_cq(struct ocrdma_dev *, struct ocrdma_cq *, | 107 | int ocrdma_mbx_create_cq(struct ocrdma_dev *, struct ocrdma_cq *, |
103 | int entries, int dpp_cq); | 108 | int entries, int dpp_cq, u16 pd_id); |
104 | int ocrdma_mbx_destroy_cq(struct ocrdma_dev *, struct ocrdma_cq *); | 109 | int ocrdma_mbx_destroy_cq(struct ocrdma_dev *, struct ocrdma_cq *); |
105 | 110 | ||
106 | int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs, | 111 | int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs, |
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 4eeea56f7b31..7d43ba924bfb 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c | |||
@@ -326,6 +326,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) | |||
326 | dev->ibdev.req_notify_cq = ocrdma_arm_cq; | 326 | dev->ibdev.req_notify_cq = ocrdma_arm_cq; |
327 | 327 | ||
328 | dev->ibdev.get_dma_mr = ocrdma_get_dma_mr; | 328 | dev->ibdev.get_dma_mr = ocrdma_get_dma_mr; |
329 | dev->ibdev.reg_phys_mr = ocrdma_reg_kernel_mr; | ||
329 | dev->ibdev.dereg_mr = ocrdma_dereg_mr; | 330 | dev->ibdev.dereg_mr = ocrdma_dereg_mr; |
330 | dev->ibdev.reg_user_mr = ocrdma_reg_user_mr; | 331 | dev->ibdev.reg_user_mr = ocrdma_reg_user_mr; |
331 | 332 | ||
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index d1a9fb72a4bd..1e2992fee4be 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h | |||
@@ -547,6 +547,7 @@ enum { | |||
547 | 547 | ||
548 | enum { | 548 | enum { |
549 | OCRDMA_CREATE_CQ_VER2 = 2, | 549 | OCRDMA_CREATE_CQ_VER2 = 2, |
550 | OCRDMA_CREATE_CQ_VER3 = 3, | ||
550 | 551 | ||
551 | OCRDMA_CREATE_CQ_PAGE_CNT_MASK = 0xFFFF, | 552 | OCRDMA_CREATE_CQ_PAGE_CNT_MASK = 0xFFFF, |
552 | OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT = 16, | 553 | OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT = 16, |
@@ -580,7 +581,8 @@ struct ocrdma_create_cq_cmd { | |||
580 | u32 pgsz_pgcnt; | 581 | u32 pgsz_pgcnt; |
581 | u32 ev_cnt_flags; | 582 | u32 ev_cnt_flags; |
582 | u32 eqn; | 583 | u32 eqn; |
583 | u32 cqe_count; | 584 | u16 cqe_count; |
585 | u16 pd_id; | ||
584 | u32 rsvd6; | 586 | u32 rsvd6; |
585 | struct ocrdma_pa pa[OCRDMA_CREATE_CQ_MAX_PAGES]; | 587 | struct ocrdma_pa pa[OCRDMA_CREATE_CQ_MAX_PAGES]; |
586 | }; | 588 | }; |
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 3e80f65f42a3..e554fc258a68 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | |||
@@ -215,6 +215,108 @@ static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr, | |||
215 | return found; | 215 | return found; |
216 | } | 216 | } |
217 | 217 | ||
218 | static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, | ||
219 | struct ocrdma_ucontext *uctx, | ||
220 | struct ib_udata *udata) | ||
221 | { | ||
222 | struct ocrdma_pd *pd = NULL; | ||
223 | int status = 0; | ||
224 | |||
225 | pd = kzalloc(sizeof(*pd), GFP_KERNEL); | ||
226 | if (!pd) | ||
227 | return ERR_PTR(-ENOMEM); | ||
228 | |||
229 | if (udata && uctx) { | ||
230 | pd->dpp_enabled = | ||
231 | dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY; | ||
232 | pd->num_dpp_qp = | ||
233 | pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0; | ||
234 | } | ||
235 | |||
236 | retry: | ||
237 | status = ocrdma_mbx_alloc_pd(dev, pd); | ||
238 | if (status) { | ||
239 | if (pd->dpp_enabled) { | ||
240 | pd->dpp_enabled = false; | ||
241 | pd->num_dpp_qp = 0; | ||
242 | goto retry; | ||
243 | } else { | ||
244 | kfree(pd); | ||
245 | return ERR_PTR(status); | ||
246 | } | ||
247 | } | ||
248 | |||
249 | return pd; | ||
250 | } | ||
251 | |||
252 | static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx, | ||
253 | struct ocrdma_pd *pd) | ||
254 | { | ||
255 | return (uctx->cntxt_pd == pd ? true : false); | ||
256 | } | ||
257 | |||
258 | static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev, | ||
259 | struct ocrdma_pd *pd) | ||
260 | { | ||
261 | int status = 0; | ||
262 | |||
263 | status = ocrdma_mbx_dealloc_pd(dev, pd); | ||
264 | kfree(pd); | ||
265 | return status; | ||
266 | } | ||
267 | |||
268 | static int ocrdma_alloc_ucontext_pd(struct ocrdma_dev *dev, | ||
269 | struct ocrdma_ucontext *uctx, | ||
270 | struct ib_udata *udata) | ||
271 | { | ||
272 | int status = 0; | ||
273 | |||
274 | uctx->cntxt_pd = _ocrdma_alloc_pd(dev, uctx, udata); | ||
275 | if (IS_ERR(uctx->cntxt_pd)) { | ||
276 | status = PTR_ERR(uctx->cntxt_pd); | ||
277 | uctx->cntxt_pd = NULL; | ||
278 | goto err; | ||
279 | } | ||
280 | |||
281 | uctx->cntxt_pd->uctx = uctx; | ||
282 | uctx->cntxt_pd->ibpd.device = &dev->ibdev; | ||
283 | err: | ||
284 | return status; | ||
285 | } | ||
286 | |||
287 | static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx) | ||
288 | { | ||
289 | int status = 0; | ||
290 | struct ocrdma_pd *pd = uctx->cntxt_pd; | ||
291 | struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device); | ||
292 | |||
293 | BUG_ON(uctx->pd_in_use); | ||
294 | uctx->cntxt_pd = NULL; | ||
295 | status = _ocrdma_dealloc_pd(dev, pd); | ||
296 | return status; | ||
297 | } | ||
298 | |||
299 | static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx) | ||
300 | { | ||
301 | struct ocrdma_pd *pd = NULL; | ||
302 | |||
303 | mutex_lock(&uctx->mm_list_lock); | ||
304 | if (!uctx->pd_in_use) { | ||
305 | uctx->pd_in_use = true; | ||
306 | pd = uctx->cntxt_pd; | ||
307 | } | ||
308 | mutex_unlock(&uctx->mm_list_lock); | ||
309 | |||
310 | return pd; | ||
311 | } | ||
312 | |||
313 | static void ocrdma_release_ucontext_pd(struct ocrdma_ucontext *uctx) | ||
314 | { | ||
315 | mutex_lock(&uctx->mm_list_lock); | ||
316 | uctx->pd_in_use = false; | ||
317 | mutex_unlock(&uctx->mm_list_lock); | ||
318 | } | ||
319 | |||
218 | struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev, | 320 | struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev, |
219 | struct ib_udata *udata) | 321 | struct ib_udata *udata) |
220 | { | 322 | { |
@@ -249,6 +351,11 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev, | |||
249 | status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len); | 351 | status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len); |
250 | if (status) | 352 | if (status) |
251 | goto map_err; | 353 | goto map_err; |
354 | |||
355 | status = ocrdma_alloc_ucontext_pd(dev, ctx, udata); | ||
356 | if (status) | ||
357 | goto pd_err; | ||
358 | |||
252 | resp.dev_id = dev->id; | 359 | resp.dev_id = dev->id; |
253 | resp.max_inline_data = dev->attr.max_inline_data; | 360 | resp.max_inline_data = dev->attr.max_inline_data; |
254 | resp.wqe_size = dev->attr.wqe_size; | 361 | resp.wqe_size = dev->attr.wqe_size; |
@@ -262,6 +369,7 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev, | |||
262 | return &ctx->ibucontext; | 369 | return &ctx->ibucontext; |
263 | 370 | ||
264 | cpy_err: | 371 | cpy_err: |
372 | pd_err: | ||
265 | ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len); | 373 | ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len); |
266 | map_err: | 374 | map_err: |
267 | dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va, | 375 | dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va, |
@@ -272,11 +380,14 @@ map_err: | |||
272 | 380 | ||
273 | int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx) | 381 | int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx) |
274 | { | 382 | { |
383 | int status = 0; | ||
275 | struct ocrdma_mm *mm, *tmp; | 384 | struct ocrdma_mm *mm, *tmp; |
276 | struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx); | 385 | struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx); |
277 | struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device); | 386 | struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device); |
278 | struct pci_dev *pdev = dev->nic_info.pdev; | 387 | struct pci_dev *pdev = dev->nic_info.pdev; |
279 | 388 | ||
389 | status = ocrdma_dealloc_ucontext_pd(uctx); | ||
390 | |||
280 | ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len); | 391 | ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len); |
281 | dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va, | 392 | dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va, |
282 | uctx->ah_tbl.pa); | 393 | uctx->ah_tbl.pa); |
@@ -286,7 +397,7 @@ int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx) | |||
286 | kfree(mm); | 397 | kfree(mm); |
287 | } | 398 | } |
288 | kfree(uctx); | 399 | kfree(uctx); |
289 | return 0; | 400 | return status; |
290 | } | 401 | } |
291 | 402 | ||
292 | int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) | 403 | int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) |
@@ -346,8 +457,7 @@ static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd, | |||
346 | memset(&rsp, 0, sizeof(rsp)); | 457 | memset(&rsp, 0, sizeof(rsp)); |
347 | rsp.id = pd->id; | 458 | rsp.id = pd->id; |
348 | rsp.dpp_enabled = pd->dpp_enabled; | 459 | rsp.dpp_enabled = pd->dpp_enabled; |
349 | db_page_addr = dev->nic_info.unmapped_db + | 460 | db_page_addr = ocrdma_get_db_addr(dev, pd->id); |
350 | (pd->id * dev->nic_info.db_page_size); | ||
351 | db_page_size = dev->nic_info.db_page_size; | 461 | db_page_size = dev->nic_info.db_page_size; |
352 | 462 | ||
353 | status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size); | 463 | status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size); |
@@ -386,31 +496,26 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev, | |||
386 | { | 496 | { |
387 | struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); | 497 | struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); |
388 | struct ocrdma_pd *pd; | 498 | struct ocrdma_pd *pd; |
499 | struct ocrdma_ucontext *uctx = NULL; | ||
389 | int status; | 500 | int status; |
501 | u8 is_uctx_pd = false; | ||
390 | 502 | ||
391 | pd = kzalloc(sizeof(*pd), GFP_KERNEL); | ||
392 | if (!pd) | ||
393 | return ERR_PTR(-ENOMEM); | ||
394 | if (udata && context) { | 503 | if (udata && context) { |
395 | pd->dpp_enabled = | 504 | uctx = get_ocrdma_ucontext(context); |
396 | (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY); | 505 | pd = ocrdma_get_ucontext_pd(uctx); |
397 | pd->num_dpp_qp = | 506 | if (pd) { |
398 | pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0; | 507 | is_uctx_pd = true; |
399 | } | 508 | goto pd_mapping; |
400 | retry: | ||
401 | status = ocrdma_mbx_alloc_pd(dev, pd); | ||
402 | if (status) { | ||
403 | /* try for pd with out dpp */ | ||
404 | if (pd->dpp_enabled) { | ||
405 | pd->dpp_enabled = false; | ||
406 | pd->num_dpp_qp = 0; | ||
407 | goto retry; | ||
408 | } else { | ||
409 | kfree(pd); | ||
410 | return ERR_PTR(status); | ||
411 | } | 509 | } |
412 | } | 510 | } |
413 | 511 | ||
512 | pd = _ocrdma_alloc_pd(dev, uctx, udata); | ||
513 | if (IS_ERR(pd)) { | ||
514 | status = PTR_ERR(pd); | ||
515 | goto exit; | ||
516 | } | ||
517 | |||
518 | pd_mapping: | ||
414 | if (udata && context) { | 519 | if (udata && context) { |
415 | status = ocrdma_copy_pd_uresp(dev, pd, context, udata); | 520 | status = ocrdma_copy_pd_uresp(dev, pd, context, udata); |
416 | if (status) | 521 | if (status) |
@@ -419,8 +524,13 @@ retry: | |||
419 | return &pd->ibpd; | 524 | return &pd->ibpd; |
420 | 525 | ||
421 | err: | 526 | err: |
422 | status = ocrdma_mbx_dealloc_pd(dev, pd); | 527 | if (is_uctx_pd) { |
423 | kfree(pd); | 528 | ocrdma_release_ucontext_pd(uctx); |
529 | } else { | ||
530 | status = ocrdma_mbx_dealloc_pd(dev, pd); | ||
531 | kfree(pd); | ||
532 | } | ||
533 | exit: | ||
424 | return ERR_PTR(status); | 534 | return ERR_PTR(status); |
425 | } | 535 | } |
426 | 536 | ||
@@ -428,20 +538,25 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd) | |||
428 | { | 538 | { |
429 | struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); | 539 | struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); |
430 | struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); | 540 | struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); |
431 | int status; | 541 | struct ocrdma_ucontext *uctx = NULL; |
542 | int status = 0; | ||
432 | u64 usr_db; | 543 | u64 usr_db; |
433 | 544 | ||
434 | status = ocrdma_mbx_dealloc_pd(dev, pd); | 545 | uctx = pd->uctx; |
435 | if (pd->uctx) { | 546 | if (uctx) { |
436 | u64 dpp_db = dev->nic_info.dpp_unmapped_addr + | 547 | u64 dpp_db = dev->nic_info.dpp_unmapped_addr + |
437 | (pd->id * PAGE_SIZE); | 548 | (pd->id * PAGE_SIZE); |
438 | if (pd->dpp_enabled) | 549 | if (pd->dpp_enabled) |
439 | ocrdma_del_mmap(pd->uctx, dpp_db, PAGE_SIZE); | 550 | ocrdma_del_mmap(pd->uctx, dpp_db, PAGE_SIZE); |
440 | usr_db = dev->nic_info.unmapped_db + | 551 | usr_db = ocrdma_get_db_addr(dev, pd->id); |
441 | (pd->id * dev->nic_info.db_page_size); | ||
442 | ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size); | 552 | ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size); |
553 | |||
554 | if (is_ucontext_pd(uctx, pd)) { | ||
555 | ocrdma_release_ucontext_pd(uctx); | ||
556 | return status; | ||
557 | } | ||
443 | } | 558 | } |
444 | kfree(pd); | 559 | status = _ocrdma_dealloc_pd(dev, pd); |
445 | return status; | 560 | return status; |
446 | } | 561 | } |
447 | 562 | ||
@@ -701,7 +816,7 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq, | |||
701 | struct ib_ucontext *ib_ctx) | 816 | struct ib_ucontext *ib_ctx) |
702 | { | 817 | { |
703 | int status; | 818 | int status; |
704 | struct ocrdma_ucontext *uctx; | 819 | struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx); |
705 | struct ocrdma_create_cq_uresp uresp; | 820 | struct ocrdma_create_cq_uresp uresp; |
706 | 821 | ||
707 | memset(&uresp, 0, sizeof(uresp)); | 822 | memset(&uresp, 0, sizeof(uresp)); |
@@ -710,7 +825,7 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq, | |||
710 | uresp.num_pages = 1; | 825 | uresp.num_pages = 1; |
711 | uresp.max_hw_cqe = cq->max_hw_cqe; | 826 | uresp.max_hw_cqe = cq->max_hw_cqe; |
712 | uresp.page_addr[0] = cq->pa; | 827 | uresp.page_addr[0] = cq->pa; |
713 | uresp.db_page_addr = dev->nic_info.unmapped_db; | 828 | uresp.db_page_addr = ocrdma_get_db_addr(dev, uctx->cntxt_pd->id); |
714 | uresp.db_page_size = dev->nic_info.db_page_size; | 829 | uresp.db_page_size = dev->nic_info.db_page_size; |
715 | uresp.phase_change = cq->phase_change ? 1 : 0; | 830 | uresp.phase_change = cq->phase_change ? 1 : 0; |
716 | status = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); | 831 | status = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); |
@@ -719,7 +834,6 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq, | |||
719 | __func__, dev->id, cq->id); | 834 | __func__, dev->id, cq->id); |
720 | goto err; | 835 | goto err; |
721 | } | 836 | } |
722 | uctx = get_ocrdma_ucontext(ib_ctx); | ||
723 | status = ocrdma_add_mmap(uctx, uresp.db_page_addr, uresp.db_page_size); | 837 | status = ocrdma_add_mmap(uctx, uresp.db_page_addr, uresp.db_page_size); |
724 | if (status) | 838 | if (status) |
725 | goto err; | 839 | goto err; |
@@ -739,6 +853,8 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, | |||
739 | { | 853 | { |
740 | struct ocrdma_cq *cq; | 854 | struct ocrdma_cq *cq; |
741 | struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); | 855 | struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); |
856 | struct ocrdma_ucontext *uctx = NULL; | ||
857 | u16 pd_id = 0; | ||
742 | int status; | 858 | int status; |
743 | struct ocrdma_create_cq_ureq ureq; | 859 | struct ocrdma_create_cq_ureq ureq; |
744 | 860 | ||
@@ -756,7 +872,12 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, | |||
756 | INIT_LIST_HEAD(&cq->sq_head); | 872 | INIT_LIST_HEAD(&cq->sq_head); |
757 | INIT_LIST_HEAD(&cq->rq_head); | 873 | INIT_LIST_HEAD(&cq->rq_head); |
758 | 874 | ||
759 | status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq); | 875 | if (ib_ctx) { |
876 | uctx = get_ocrdma_ucontext(ib_ctx); | ||
877 | pd_id = uctx->cntxt_pd->id; | ||
878 | } | ||
879 | |||
880 | status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq, pd_id); | ||
760 | if (status) { | 881 | if (status) { |
761 | kfree(cq); | 882 | kfree(cq); |
762 | return ERR_PTR(status); | 883 | return ERR_PTR(status); |
@@ -797,13 +918,16 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq) | |||
797 | int status; | 918 | int status; |
798 | struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); | 919 | struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); |
799 | struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); | 920 | struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); |
921 | int pdid = 0; | ||
800 | 922 | ||
801 | status = ocrdma_mbx_destroy_cq(dev, cq); | 923 | status = ocrdma_mbx_destroy_cq(dev, cq); |
802 | 924 | ||
803 | if (cq->ucontext) { | 925 | if (cq->ucontext) { |
926 | pdid = cq->ucontext->cntxt_pd->id; | ||
804 | ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, | 927 | ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, |
805 | PAGE_ALIGN(cq->len)); | 928 | PAGE_ALIGN(cq->len)); |
806 | ocrdma_del_mmap(cq->ucontext, dev->nic_info.unmapped_db, | 929 | ocrdma_del_mmap(cq->ucontext, |
930 | ocrdma_get_db_addr(dev, pdid), | ||
807 | dev->nic_info.db_page_size); | 931 | dev->nic_info.db_page_size); |
808 | } | 932 | } |
809 | dev->cq_tbl[cq->id] = NULL; | 933 | dev->cq_tbl[cq->id] = NULL; |
@@ -2686,3 +2810,166 @@ void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list) | |||
2686 | { | 2810 | { |
2687 | kfree(page_list); | 2811 | kfree(page_list); |
2688 | } | 2812 | } |
2813 | |||
2814 | #define MAX_KERNEL_PBE_SIZE 65536 | ||
2815 | static inline int count_kernel_pbes(struct ib_phys_buf *buf_list, | ||
2816 | int buf_cnt, u32 *pbe_size) | ||
2817 | { | ||
2818 | u64 total_size = 0; | ||
2819 | u64 buf_size = 0; | ||
2820 | int i; | ||
2821 | *pbe_size = roundup(buf_list[0].size, PAGE_SIZE); | ||
2822 | *pbe_size = roundup_pow_of_two(*pbe_size); | ||
2823 | |||
2824 | /* find the smallest PBE size that we can have */ | ||
2825 | for (i = 0; i < buf_cnt; i++) { | ||
2826 | /* first addr may not be page aligned, so ignore checking */ | ||
2827 | if ((i != 0) && ((buf_list[i].addr & ~PAGE_MASK) || | ||
2828 | (buf_list[i].size & ~PAGE_MASK))) { | ||
2829 | return 0; | ||
2830 | } | ||
2831 | |||
2832 | /* if configured PBE size is greater then the chosen one, | ||
2833 | * reduce the PBE size. | ||
2834 | */ | ||
2835 | buf_size = roundup(buf_list[i].size, PAGE_SIZE); | ||
2836 | /* pbe_size has to be even multiple of 4K 1,2,4,8...*/ | ||
2837 | buf_size = roundup_pow_of_two(buf_size); | ||
2838 | if (*pbe_size > buf_size) | ||
2839 | *pbe_size = buf_size; | ||
2840 | |||
2841 | total_size += buf_size; | ||
2842 | } | ||
2843 | *pbe_size = *pbe_size > MAX_KERNEL_PBE_SIZE ? | ||
2844 | (MAX_KERNEL_PBE_SIZE) : (*pbe_size); | ||
2845 | |||
2846 | /* num_pbes = total_size / (*pbe_size); this is implemented below. */ | ||
2847 | |||
2848 | return total_size >> ilog2(*pbe_size); | ||
2849 | } | ||
2850 | |||
2851 | static void build_kernel_pbes(struct ib_phys_buf *buf_list, int ib_buf_cnt, | ||
2852 | u32 pbe_size, struct ocrdma_pbl *pbl_tbl, | ||
2853 | struct ocrdma_hw_mr *hwmr) | ||
2854 | { | ||
2855 | int i; | ||
2856 | int idx; | ||
2857 | int pbes_per_buf = 0; | ||
2858 | u64 buf_addr = 0; | ||
2859 | int num_pbes; | ||
2860 | struct ocrdma_pbe *pbe; | ||
2861 | int total_num_pbes = 0; | ||
2862 | |||
2863 | if (!hwmr->num_pbes) | ||
2864 | return; | ||
2865 | |||
2866 | pbe = (struct ocrdma_pbe *)pbl_tbl->va; | ||
2867 | num_pbes = 0; | ||
2868 | |||
2869 | /* go through the OS phy regions & fill hw pbe entries into pbls. */ | ||
2870 | for (i = 0; i < ib_buf_cnt; i++) { | ||
2871 | buf_addr = buf_list[i].addr; | ||
2872 | pbes_per_buf = | ||
2873 | roundup_pow_of_two(roundup(buf_list[i].size, PAGE_SIZE)) / | ||
2874 | pbe_size; | ||
2875 | hwmr->len += buf_list[i].size; | ||
2876 | /* number of pbes can be more for one OS buf, when | ||
2877 | * buffers are of different sizes. | ||
2878 | * split the ib_buf to one or more pbes. | ||
2879 | */ | ||
2880 | for (idx = 0; idx < pbes_per_buf; idx++) { | ||
2881 | /* we program always page aligned addresses, | ||
2882 | * first unaligned address is taken care by fbo. | ||
2883 | */ | ||
2884 | if (i == 0) { | ||
2885 | /* for non zero fbo, assign the | ||
2886 | * start of the page. | ||
2887 | */ | ||
2888 | pbe->pa_lo = | ||
2889 | cpu_to_le32((u32) (buf_addr & PAGE_MASK)); | ||
2890 | pbe->pa_hi = | ||
2891 | cpu_to_le32((u32) upper_32_bits(buf_addr)); | ||
2892 | } else { | ||
2893 | pbe->pa_lo = | ||
2894 | cpu_to_le32((u32) (buf_addr & 0xffffffff)); | ||
2895 | pbe->pa_hi = | ||
2896 | cpu_to_le32((u32) upper_32_bits(buf_addr)); | ||
2897 | } | ||
2898 | buf_addr += pbe_size; | ||
2899 | num_pbes += 1; | ||
2900 | total_num_pbes += 1; | ||
2901 | pbe++; | ||
2902 | |||
2903 | if (total_num_pbes == hwmr->num_pbes) | ||
2904 | goto mr_tbl_done; | ||
2905 | /* if the pbl is full storing the pbes, | ||
2906 | * move to next pbl. | ||
2907 | */ | ||
2908 | if (num_pbes == (hwmr->pbl_size/sizeof(u64))) { | ||
2909 | pbl_tbl++; | ||
2910 | pbe = (struct ocrdma_pbe *)pbl_tbl->va; | ||
2911 | num_pbes = 0; | ||
2912 | } | ||
2913 | } | ||
2914 | } | ||
2915 | mr_tbl_done: | ||
2916 | return; | ||
2917 | } | ||
2918 | |||
2919 | struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *ibpd, | ||
2920 | struct ib_phys_buf *buf_list, | ||
2921 | int buf_cnt, int acc, u64 *iova_start) | ||
2922 | { | ||
2923 | int status = -ENOMEM; | ||
2924 | struct ocrdma_mr *mr; | ||
2925 | struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); | ||
2926 | struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); | ||
2927 | u32 num_pbes; | ||
2928 | u32 pbe_size = 0; | ||
2929 | |||
2930 | if ((acc & IB_ACCESS_REMOTE_WRITE) && !(acc & IB_ACCESS_LOCAL_WRITE)) | ||
2931 | return ERR_PTR(-EINVAL); | ||
2932 | |||
2933 | mr = kzalloc(sizeof(*mr), GFP_KERNEL); | ||
2934 | if (!mr) | ||
2935 | return ERR_PTR(status); | ||
2936 | |||
2937 | num_pbes = count_kernel_pbes(buf_list, buf_cnt, &pbe_size); | ||
2938 | if (num_pbes == 0) { | ||
2939 | status = -EINVAL; | ||
2940 | goto pbl_err; | ||
2941 | } | ||
2942 | status = ocrdma_get_pbl_info(dev, mr, num_pbes); | ||
2943 | if (status) | ||
2944 | goto pbl_err; | ||
2945 | |||
2946 | mr->hwmr.pbe_size = pbe_size; | ||
2947 | mr->hwmr.fbo = *iova_start - (buf_list[0].addr & PAGE_MASK); | ||
2948 | mr->hwmr.va = *iova_start; | ||
2949 | mr->hwmr.local_rd = 1; | ||
2950 | mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; | ||
2951 | mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; | ||
2952 | mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; | ||
2953 | mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; | ||
2954 | mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0; | ||
2955 | |||
2956 | status = ocrdma_build_pbl_tbl(dev, &mr->hwmr); | ||
2957 | if (status) | ||
2958 | goto pbl_err; | ||
2959 | build_kernel_pbes(buf_list, buf_cnt, pbe_size, mr->hwmr.pbl_table, | ||
2960 | &mr->hwmr); | ||
2961 | status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc); | ||
2962 | if (status) | ||
2963 | goto mbx_err; | ||
2964 | |||
2965 | mr->ibmr.lkey = mr->hwmr.lkey; | ||
2966 | if (mr->hwmr.remote_wr || mr->hwmr.remote_rd) | ||
2967 | mr->ibmr.rkey = mr->hwmr.lkey; | ||
2968 | return &mr->ibmr; | ||
2969 | |||
2970 | mbx_err: | ||
2971 | ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); | ||
2972 | pbl_err: | ||
2973 | kfree(mr); | ||
2974 | return ERR_PTR(status); | ||
2975 | } | ||