aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorMike Marciniszyn <mike.marciniszyn@qlogic.com>2011-01-10 20:42:22 -0500
committerRoland Dreier <rolandd@cisco.com>2011-01-10 20:42:22 -0500
commit2a600f14d25fda341b5633c75cc50a7574fc1007 (patch)
tree02d0af0c74dc1e7ef99d2798d4b5e0a375b4deb8 /drivers
parent7c3edd3ff3098093e594dbcbc9dbeeae09b1b4a0 (diff)
IB/qib: RDMA lkey/rkey validation is inefficient for large MRs
The current code loops during rkey/lkey validiation to isolate the MR for the RDMA, which is expensive when the current operation is inside a very large memory region. This fix optimizes rkey/lkey validation routines for user memory regions and fast memory regions. The MR entry can be isolated by shifts/mods instead of looping. The existing loop is preserved for phys memory regions for now. Signed-off-by: Mike Marciniszyn <mike.marciniszyn@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/infiniband/hw/qib/qib_keys.c74
-rw-r--r--drivers/infiniband/hw/qib/qib_mr.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h1
3 files changed, 60 insertions, 23 deletions
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
index 4b80eb153d57..756d16098e73 100644
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ b/drivers/infiniband/hw/qib/qib_keys.c
@@ -158,31 +158,47 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
158 isge->sge_length = sge->length; 158 isge->sge_length = sge->length;
159 isge->m = 0; 159 isge->m = 0;
160 isge->n = 0; 160 isge->n = 0;
161 spin_unlock_irqrestore(&rkt->lock, flags);
161 goto ok; 162 goto ok;
162 } 163 }
163 mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]; 164 mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))];
164 if (unlikely(mr == NULL || mr->lkey != sge->lkey || 165 if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
165 mr->pd != &pd->ibpd)) 166 mr->pd != &pd->ibpd))
166 goto bail; 167 goto bail;
168 atomic_inc(&mr->refcount);
169 spin_unlock_irqrestore(&rkt->lock, flags);
167 170
168 off = sge->addr - mr->user_base; 171 off = sge->addr - mr->user_base;
169 if (unlikely(sge->addr < mr->user_base || 172 if (unlikely(sge->addr < mr->user_base ||
170 off + sge->length > mr->length || 173 off + sge->length > mr->length ||
171 (mr->access_flags & acc) != acc)) 174 (mr->access_flags & acc) != acc))
172 goto bail; 175 return ret;
173 176
174 off += mr->offset; 177 off += mr->offset;
175 m = 0; 178 if (mr->page_shift) {
176 n = 0; 179 /*
177 while (off >= mr->map[m]->segs[n].length) { 180 page sizes are uniform power of 2 so no loop is necessary
178 off -= mr->map[m]->segs[n].length; 181 entries_spanned_by_off is the number of times the loop below
179 n++; 182 would have executed.
180 if (n >= QIB_SEGSZ) { 183 */
181 m++; 184 size_t entries_spanned_by_off;
182 n = 0; 185
186 entries_spanned_by_off = off >> mr->page_shift;
187 off -= (entries_spanned_by_off << mr->page_shift);
188 m = entries_spanned_by_off/QIB_SEGSZ;
189 n = entries_spanned_by_off%QIB_SEGSZ;
190 } else {
191 m = 0;
192 n = 0;
193 while (off >= mr->map[m]->segs[n].length) {
194 off -= mr->map[m]->segs[n].length;
195 n++;
196 if (n >= QIB_SEGSZ) {
197 m++;
198 n = 0;
199 }
183 } 200 }
184 } 201 }
185 atomic_inc(&mr->refcount);
186 isge->mr = mr; 202 isge->mr = mr;
187 isge->vaddr = mr->map[m]->segs[n].vaddr + off; 203 isge->vaddr = mr->map[m]->segs[n].vaddr + off;
188 isge->length = mr->map[m]->segs[n].length - off; 204 isge->length = mr->map[m]->segs[n].length - off;
@@ -191,6 +207,7 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
191 isge->n = n; 207 isge->n = n;
192ok: 208ok:
193 ret = 1; 209 ret = 1;
210 return ret;
194bail: 211bail:
195 spin_unlock_irqrestore(&rkt->lock, flags); 212 spin_unlock_irqrestore(&rkt->lock, flags);
196 return ret; 213 return ret;
@@ -237,30 +254,46 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
237 sge->sge_length = len; 254 sge->sge_length = len;
238 sge->m = 0; 255 sge->m = 0;
239 sge->n = 0; 256 sge->n = 0;
257 spin_unlock_irqrestore(&rkt->lock, flags);
240 goto ok; 258 goto ok;
241 } 259 }
242 260
243 mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; 261 mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))];
244 if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) 262 if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
245 goto bail; 263 goto bail;
264 atomic_inc(&mr->refcount);
265 spin_unlock_irqrestore(&rkt->lock, flags);
246 266
247 off = vaddr - mr->iova; 267 off = vaddr - mr->iova;
248 if (unlikely(vaddr < mr->iova || off + len > mr->length || 268 if (unlikely(vaddr < mr->iova || off + len > mr->length ||
249 (mr->access_flags & acc) == 0)) 269 (mr->access_flags & acc) == 0))
250 goto bail; 270 return ret;
251 271
252 off += mr->offset; 272 off += mr->offset;
253 m = 0; 273 if (mr->page_shift) {
254 n = 0; 274 /*
255 while (off >= mr->map[m]->segs[n].length) { 275 page sizes are uniform power of 2 so no loop is necessary
256 off -= mr->map[m]->segs[n].length; 276 entries_spanned_by_off is the number of times the loop below
257 n++; 277 would have executed.
258 if (n >= QIB_SEGSZ) { 278 */
259 m++; 279 size_t entries_spanned_by_off;
260 n = 0; 280
281 entries_spanned_by_off = off >> mr->page_shift;
282 off -= (entries_spanned_by_off << mr->page_shift);
283 m = entries_spanned_by_off/QIB_SEGSZ;
284 n = entries_spanned_by_off%QIB_SEGSZ;
285 } else {
286 m = 0;
287 n = 0;
288 while (off >= mr->map[m]->segs[n].length) {
289 off -= mr->map[m]->segs[n].length;
290 n++;
291 if (n >= QIB_SEGSZ) {
292 m++;
293 n = 0;
294 }
261 } 295 }
262 } 296 }
263 atomic_inc(&mr->refcount);
264 sge->mr = mr; 297 sge->mr = mr;
265 sge->vaddr = mr->map[m]->segs[n].vaddr + off; 298 sge->vaddr = mr->map[m]->segs[n].vaddr + off;
266 sge->length = mr->map[m]->segs[n].length - off; 299 sge->length = mr->map[m]->segs[n].length - off;
@@ -269,6 +302,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
269 sge->n = n; 302 sge->n = n;
270ok: 303ok:
271 ret = 1; 304 ret = 1;
305 return ret;
272bail: 306bail:
273 spin_unlock_irqrestore(&rkt->lock, flags); 307 spin_unlock_irqrestore(&rkt->lock, flags);
274 return ret; 308 return ret;
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
index 5f95f0f6385d..08944e2ee334 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -39,7 +39,6 @@
39/* Fast memory region */ 39/* Fast memory region */
40struct qib_fmr { 40struct qib_fmr {
41 struct ib_fmr ibfmr; 41 struct ib_fmr ibfmr;
42 u8 page_shift;
43 struct qib_mregion mr; /* must be last */ 42 struct qib_mregion mr; /* must be last */
44}; 43};
45 44
@@ -107,6 +106,7 @@ static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table)
107 goto bail; 106 goto bail;
108 } 107 }
109 mr->mr.mapsz = m; 108 mr->mr.mapsz = m;
109 mr->mr.page_shift = 0;
110 mr->mr.max_segs = count; 110 mr->mr.max_segs = count;
111 111
112 /* 112 /*
@@ -231,6 +231,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
231 mr->mr.access_flags = mr_access_flags; 231 mr->mr.access_flags = mr_access_flags;
232 mr->umem = umem; 232 mr->umem = umem;
233 233
234 if (is_power_of_2(umem->page_size))
235 mr->mr.page_shift = ilog2(umem->page_size);
234 m = 0; 236 m = 0;
235 n = 0; 237 n = 0;
236 list_for_each_entry(chunk, &umem->chunk_list, list) { 238 list_for_each_entry(chunk, &umem->chunk_list, list) {
@@ -390,7 +392,7 @@ struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
390 fmr->mr.offset = 0; 392 fmr->mr.offset = 0;
391 fmr->mr.access_flags = mr_access_flags; 393 fmr->mr.access_flags = mr_access_flags;
392 fmr->mr.max_segs = fmr_attr->max_pages; 394 fmr->mr.max_segs = fmr_attr->max_pages;
393 fmr->page_shift = fmr_attr->page_shift; 395 fmr->mr.page_shift = fmr_attr->page_shift;
394 396
395 atomic_set(&fmr->mr.refcount, 0); 397 atomic_set(&fmr->mr.refcount, 0);
396 ret = &fmr->ibfmr; 398 ret = &fmr->ibfmr;
@@ -437,7 +439,7 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
437 spin_lock_irqsave(&rkt->lock, flags); 439 spin_lock_irqsave(&rkt->lock, flags);
438 fmr->mr.user_base = iova; 440 fmr->mr.user_base = iova;
439 fmr->mr.iova = iova; 441 fmr->mr.iova = iova;
440 ps = 1 << fmr->page_shift; 442 ps = 1 << fmr->mr.page_shift;
441 fmr->mr.length = list_len * ps; 443 fmr->mr.length = list_len * ps;
442 m = 0; 444 m = 0;
443 n = 0; 445 n = 0;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index a08ceab510e1..63b22a9a7feb 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -301,6 +301,7 @@ struct qib_mregion {
301 int access_flags; 301 int access_flags;
302 u32 max_segs; /* number of qib_segs in all the arrays */ 302 u32 max_segs; /* number of qib_segs in all the arrays */
303 u32 mapsz; /* size of the map array */ 303 u32 mapsz; /* size of the map array */
304 u8 page_shift; /* 0 - non unform/non powerof2 sizes */
304 atomic_t refcount; 305 atomic_t refcount;
305 struct qib_segarray *map[0]; /* the segments */ 306 struct qib_segarray *map[0]; /* the segments */
306}; 307};