aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/qib/qib_mr.c
diff options
context:
space:
mode:
authorMike Marciniszyn <mike.marciniszyn@qlogic.com>2011-01-10 20:42:22 -0500
committerRoland Dreier <rolandd@cisco.com>2011-01-10 20:42:22 -0500
commit2a600f14d25fda341b5633c75cc50a7574fc1007 (patch)
tree02d0af0c74dc1e7ef99d2798d4b5e0a375b4deb8 /drivers/infiniband/hw/qib/qib_mr.c
parent7c3edd3ff3098093e594dbcbc9dbeeae09b1b4a0 (diff)
IB/qib: RDMA lkey/rkey validation is inefficient for large MRs
The current code loops during rkey/lkey validiation to isolate the MR for the RDMA, which is expensive when the current operation is inside a very large memory region. This fix optimizes rkey/lkey validation routines for user memory regions and fast memory regions. The MR entry can be isolated by shifts/mods instead of looping. The existing loop is preserved for phys memory regions for now. Signed-off-by: Mike Marciniszyn <mike.marciniszyn@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/qib/qib_mr.c')
-rw-r--r--drivers/infiniband/hw/qib/qib_mr.c8
1 files changed, 5 insertions, 3 deletions
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
index 5f95f0f6385d..08944e2ee334 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -39,7 +39,6 @@
39/* Fast memory region */ 39/* Fast memory region */
40struct qib_fmr { 40struct qib_fmr {
41 struct ib_fmr ibfmr; 41 struct ib_fmr ibfmr;
42 u8 page_shift;
43 struct qib_mregion mr; /* must be last */ 42 struct qib_mregion mr; /* must be last */
44}; 43};
45 44
@@ -107,6 +106,7 @@ static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table)
107 goto bail; 106 goto bail;
108 } 107 }
109 mr->mr.mapsz = m; 108 mr->mr.mapsz = m;
109 mr->mr.page_shift = 0;
110 mr->mr.max_segs = count; 110 mr->mr.max_segs = count;
111 111
112 /* 112 /*
@@ -231,6 +231,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
231 mr->mr.access_flags = mr_access_flags; 231 mr->mr.access_flags = mr_access_flags;
232 mr->umem = umem; 232 mr->umem = umem;
233 233
234 if (is_power_of_2(umem->page_size))
235 mr->mr.page_shift = ilog2(umem->page_size);
234 m = 0; 236 m = 0;
235 n = 0; 237 n = 0;
236 list_for_each_entry(chunk, &umem->chunk_list, list) { 238 list_for_each_entry(chunk, &umem->chunk_list, list) {
@@ -390,7 +392,7 @@ struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
390 fmr->mr.offset = 0; 392 fmr->mr.offset = 0;
391 fmr->mr.access_flags = mr_access_flags; 393 fmr->mr.access_flags = mr_access_flags;
392 fmr->mr.max_segs = fmr_attr->max_pages; 394 fmr->mr.max_segs = fmr_attr->max_pages;
393 fmr->page_shift = fmr_attr->page_shift; 395 fmr->mr.page_shift = fmr_attr->page_shift;
394 396
395 atomic_set(&fmr->mr.refcount, 0); 397 atomic_set(&fmr->mr.refcount, 0);
396 ret = &fmr->ibfmr; 398 ret = &fmr->ibfmr;
@@ -437,7 +439,7 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
437 spin_lock_irqsave(&rkt->lock, flags); 439 spin_lock_irqsave(&rkt->lock, flags);
438 fmr->mr.user_base = iova; 440 fmr->mr.user_base = iova;
439 fmr->mr.iova = iova; 441 fmr->mr.iova = iova;
440 ps = 1 << fmr->page_shift; 442 ps = 1 << fmr->mr.page_shift;
441 fmr->mr.length = list_len * ps; 443 fmr->mr.length = list_len * ps;
442 m = 0; 444 m = 0;
443 n = 0; 445 n = 0;