diff options
| author | Mike Marciniszyn <mike.marciniszyn@qlogic.com> | 2011-01-10 20:42:22 -0500 |
|---|---|---|
| committer | Roland Dreier <rolandd@cisco.com> | 2011-01-10 20:42:22 -0500 |
| commit | 2a600f14d25fda341b5633c75cc50a7574fc1007 (patch) | |
| tree | 02d0af0c74dc1e7ef99d2798d4b5e0a375b4deb8 | |
| parent | 7c3edd3ff3098093e594dbcbc9dbeeae09b1b4a0 (diff) | |
IB/qib: RDMA lkey/rkey validation is inefficient for large MRs
The current code loops during rkey/lkey validiation to isolate the MR
for the RDMA, which is expensive when the current operation is inside
a very large memory region.
This fix optimizes rkey/lkey validation routines for user memory
regions and fast memory regions. The MR entry can be isolated by
shifts/mods instead of looping. The existing loop is preserved for
phys memory regions for now.
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
| -rw-r--r-- | drivers/infiniband/hw/qib/qib_keys.c | 74 | ||||
| -rw-r--r-- | drivers/infiniband/hw/qib/qib_mr.c | 8 | ||||
| -rw-r--r-- | drivers/infiniband/hw/qib/qib_verbs.h | 1 |
3 files changed, 60 insertions, 23 deletions
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c index 4b80eb153d57..756d16098e73 100644 --- a/drivers/infiniband/hw/qib/qib_keys.c +++ b/drivers/infiniband/hw/qib/qib_keys.c | |||
| @@ -158,31 +158,47 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, | |||
| 158 | isge->sge_length = sge->length; | 158 | isge->sge_length = sge->length; |
| 159 | isge->m = 0; | 159 | isge->m = 0; |
| 160 | isge->n = 0; | 160 | isge->n = 0; |
| 161 | spin_unlock_irqrestore(&rkt->lock, flags); | ||
| 161 | goto ok; | 162 | goto ok; |
| 162 | } | 163 | } |
| 163 | mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]; | 164 | mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]; |
| 164 | if (unlikely(mr == NULL || mr->lkey != sge->lkey || | 165 | if (unlikely(mr == NULL || mr->lkey != sge->lkey || |
| 165 | mr->pd != &pd->ibpd)) | 166 | mr->pd != &pd->ibpd)) |
| 166 | goto bail; | 167 | goto bail; |
| 168 | atomic_inc(&mr->refcount); | ||
| 169 | spin_unlock_irqrestore(&rkt->lock, flags); | ||
| 167 | 170 | ||
| 168 | off = sge->addr - mr->user_base; | 171 | off = sge->addr - mr->user_base; |
| 169 | if (unlikely(sge->addr < mr->user_base || | 172 | if (unlikely(sge->addr < mr->user_base || |
| 170 | off + sge->length > mr->length || | 173 | off + sge->length > mr->length || |
| 171 | (mr->access_flags & acc) != acc)) | 174 | (mr->access_flags & acc) != acc)) |
| 172 | goto bail; | 175 | return ret; |
| 173 | 176 | ||
| 174 | off += mr->offset; | 177 | off += mr->offset; |
| 175 | m = 0; | 178 | if (mr->page_shift) { |
| 176 | n = 0; | 179 | /* |
| 177 | while (off >= mr->map[m]->segs[n].length) { | 180 | page sizes are uniform power of 2 so no loop is necessary |
| 178 | off -= mr->map[m]->segs[n].length; | 181 | entries_spanned_by_off is the number of times the loop below |
| 179 | n++; | 182 | would have executed. |
| 180 | if (n >= QIB_SEGSZ) { | 183 | */ |
| 181 | m++; | 184 | size_t entries_spanned_by_off; |
| 182 | n = 0; | 185 | |
| 186 | entries_spanned_by_off = off >> mr->page_shift; | ||
| 187 | off -= (entries_spanned_by_off << mr->page_shift); | ||
| 188 | m = entries_spanned_by_off/QIB_SEGSZ; | ||
| 189 | n = entries_spanned_by_off%QIB_SEGSZ; | ||
| 190 | } else { | ||
| 191 | m = 0; | ||
| 192 | n = 0; | ||
| 193 | while (off >= mr->map[m]->segs[n].length) { | ||
| 194 | off -= mr->map[m]->segs[n].length; | ||
| 195 | n++; | ||
| 196 | if (n >= QIB_SEGSZ) { | ||
| 197 | m++; | ||
| 198 | n = 0; | ||
| 199 | } | ||
| 183 | } | 200 | } |
| 184 | } | 201 | } |
| 185 | atomic_inc(&mr->refcount); | ||
| 186 | isge->mr = mr; | 202 | isge->mr = mr; |
| 187 | isge->vaddr = mr->map[m]->segs[n].vaddr + off; | 203 | isge->vaddr = mr->map[m]->segs[n].vaddr + off; |
| 188 | isge->length = mr->map[m]->segs[n].length - off; | 204 | isge->length = mr->map[m]->segs[n].length - off; |
| @@ -191,6 +207,7 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, | |||
| 191 | isge->n = n; | 207 | isge->n = n; |
| 192 | ok: | 208 | ok: |
| 193 | ret = 1; | 209 | ret = 1; |
| 210 | return ret; | ||
| 194 | bail: | 211 | bail: |
| 195 | spin_unlock_irqrestore(&rkt->lock, flags); | 212 | spin_unlock_irqrestore(&rkt->lock, flags); |
| 196 | return ret; | 213 | return ret; |
| @@ -237,30 +254,46 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, | |||
| 237 | sge->sge_length = len; | 254 | sge->sge_length = len; |
| 238 | sge->m = 0; | 255 | sge->m = 0; |
| 239 | sge->n = 0; | 256 | sge->n = 0; |
| 257 | spin_unlock_irqrestore(&rkt->lock, flags); | ||
| 240 | goto ok; | 258 | goto ok; |
| 241 | } | 259 | } |
| 242 | 260 | ||
| 243 | mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; | 261 | mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; |
| 244 | if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) | 262 | if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) |
| 245 | goto bail; | 263 | goto bail; |
| 264 | atomic_inc(&mr->refcount); | ||
| 265 | spin_unlock_irqrestore(&rkt->lock, flags); | ||
| 246 | 266 | ||
| 247 | off = vaddr - mr->iova; | 267 | off = vaddr - mr->iova; |
| 248 | if (unlikely(vaddr < mr->iova || off + len > mr->length || | 268 | if (unlikely(vaddr < mr->iova || off + len > mr->length || |
| 249 | (mr->access_flags & acc) == 0)) | 269 | (mr->access_flags & acc) == 0)) |
| 250 | goto bail; | 270 | return ret; |
| 251 | 271 | ||
| 252 | off += mr->offset; | 272 | off += mr->offset; |
| 253 | m = 0; | 273 | if (mr->page_shift) { |
| 254 | n = 0; | 274 | /* |
| 255 | while (off >= mr->map[m]->segs[n].length) { | 275 | page sizes are uniform power of 2 so no loop is necessary |
| 256 | off -= mr->map[m]->segs[n].length; | 276 | entries_spanned_by_off is the number of times the loop below |
| 257 | n++; | 277 | would have executed. |
| 258 | if (n >= QIB_SEGSZ) { | 278 | */ |
| 259 | m++; | 279 | size_t entries_spanned_by_off; |
| 260 | n = 0; | 280 | |
| 281 | entries_spanned_by_off = off >> mr->page_shift; | ||
| 282 | off -= (entries_spanned_by_off << mr->page_shift); | ||
| 283 | m = entries_spanned_by_off/QIB_SEGSZ; | ||
| 284 | n = entries_spanned_by_off%QIB_SEGSZ; | ||
| 285 | } else { | ||
| 286 | m = 0; | ||
| 287 | n = 0; | ||
| 288 | while (off >= mr->map[m]->segs[n].length) { | ||
| 289 | off -= mr->map[m]->segs[n].length; | ||
| 290 | n++; | ||
| 291 | if (n >= QIB_SEGSZ) { | ||
| 292 | m++; | ||
| 293 | n = 0; | ||
| 294 | } | ||
| 261 | } | 295 | } |
| 262 | } | 296 | } |
| 263 | atomic_inc(&mr->refcount); | ||
| 264 | sge->mr = mr; | 297 | sge->mr = mr; |
| 265 | sge->vaddr = mr->map[m]->segs[n].vaddr + off; | 298 | sge->vaddr = mr->map[m]->segs[n].vaddr + off; |
| 266 | sge->length = mr->map[m]->segs[n].length - off; | 299 | sge->length = mr->map[m]->segs[n].length - off; |
| @@ -269,6 +302,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, | |||
| 269 | sge->n = n; | 302 | sge->n = n; |
| 270 | ok: | 303 | ok: |
| 271 | ret = 1; | 304 | ret = 1; |
| 305 | return ret; | ||
| 272 | bail: | 306 | bail: |
| 273 | spin_unlock_irqrestore(&rkt->lock, flags); | 307 | spin_unlock_irqrestore(&rkt->lock, flags); |
| 274 | return ret; | 308 | return ret; |
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c index 5f95f0f6385d..08944e2ee334 100644 --- a/drivers/infiniband/hw/qib/qib_mr.c +++ b/drivers/infiniband/hw/qib/qib_mr.c | |||
| @@ -39,7 +39,6 @@ | |||
| 39 | /* Fast memory region */ | 39 | /* Fast memory region */ |
| 40 | struct qib_fmr { | 40 | struct qib_fmr { |
| 41 | struct ib_fmr ibfmr; | 41 | struct ib_fmr ibfmr; |
| 42 | u8 page_shift; | ||
| 43 | struct qib_mregion mr; /* must be last */ | 42 | struct qib_mregion mr; /* must be last */ |
| 44 | }; | 43 | }; |
| 45 | 44 | ||
| @@ -107,6 +106,7 @@ static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table) | |||
| 107 | goto bail; | 106 | goto bail; |
| 108 | } | 107 | } |
| 109 | mr->mr.mapsz = m; | 108 | mr->mr.mapsz = m; |
| 109 | mr->mr.page_shift = 0; | ||
| 110 | mr->mr.max_segs = count; | 110 | mr->mr.max_segs = count; |
| 111 | 111 | ||
| 112 | /* | 112 | /* |
| @@ -231,6 +231,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, | |||
| 231 | mr->mr.access_flags = mr_access_flags; | 231 | mr->mr.access_flags = mr_access_flags; |
| 232 | mr->umem = umem; | 232 | mr->umem = umem; |
| 233 | 233 | ||
| 234 | if (is_power_of_2(umem->page_size)) | ||
| 235 | mr->mr.page_shift = ilog2(umem->page_size); | ||
| 234 | m = 0; | 236 | m = 0; |
| 235 | n = 0; | 237 | n = 0; |
| 236 | list_for_each_entry(chunk, &umem->chunk_list, list) { | 238 | list_for_each_entry(chunk, &umem->chunk_list, list) { |
| @@ -390,7 +392,7 @@ struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, | |||
| 390 | fmr->mr.offset = 0; | 392 | fmr->mr.offset = 0; |
| 391 | fmr->mr.access_flags = mr_access_flags; | 393 | fmr->mr.access_flags = mr_access_flags; |
| 392 | fmr->mr.max_segs = fmr_attr->max_pages; | 394 | fmr->mr.max_segs = fmr_attr->max_pages; |
| 393 | fmr->page_shift = fmr_attr->page_shift; | 395 | fmr->mr.page_shift = fmr_attr->page_shift; |
| 394 | 396 | ||
| 395 | atomic_set(&fmr->mr.refcount, 0); | 397 | atomic_set(&fmr->mr.refcount, 0); |
| 396 | ret = &fmr->ibfmr; | 398 | ret = &fmr->ibfmr; |
| @@ -437,7 +439,7 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, | |||
| 437 | spin_lock_irqsave(&rkt->lock, flags); | 439 | spin_lock_irqsave(&rkt->lock, flags); |
| 438 | fmr->mr.user_base = iova; | 440 | fmr->mr.user_base = iova; |
| 439 | fmr->mr.iova = iova; | 441 | fmr->mr.iova = iova; |
| 440 | ps = 1 << fmr->page_shift; | 442 | ps = 1 << fmr->mr.page_shift; |
| 441 | fmr->mr.length = list_len * ps; | 443 | fmr->mr.length = list_len * ps; |
| 442 | m = 0; | 444 | m = 0; |
| 443 | n = 0; | 445 | n = 0; |
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index a08ceab510e1..63b22a9a7feb 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h | |||
| @@ -301,6 +301,7 @@ struct qib_mregion { | |||
| 301 | int access_flags; | 301 | int access_flags; |
| 302 | u32 max_segs; /* number of qib_segs in all the arrays */ | 302 | u32 max_segs; /* number of qib_segs in all the arrays */ |
| 303 | u32 mapsz; /* size of the map array */ | 303 | u32 mapsz; /* size of the map array */ |
| 304 | u8 page_shift; /* 0 - non unform/non powerof2 sizes */ | ||
| 304 | atomic_t refcount; | 305 | atomic_t refcount; |
| 305 | struct qib_segarray *map[0]; /* the segments */ | 306 | struct qib_segarray *map[0]; /* the segments */ |
| 306 | }; | 307 | }; |
