diff options
author | Mike Marciniszyn <mike.marciniszyn@qlogic.com> | 2011-01-10 20:42:22 -0500 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2011-01-10 20:42:22 -0500 |
commit | 2a600f14d25fda341b5633c75cc50a7574fc1007 (patch) | |
tree | 02d0af0c74dc1e7ef99d2798d4b5e0a375b4deb8 /drivers/infiniband/hw/qib/qib_keys.c | |
parent | 7c3edd3ff3098093e594dbcbc9dbeeae09b1b4a0 (diff) |
IB/qib: RDMA lkey/rkey validation is inefficient for large MRs
The current code loops during rkey/lkey validiation to isolate the MR
for the RDMA, which is expensive when the current operation is inside
a very large memory region.
This fix optimizes rkey/lkey validation routines for user memory
regions and fast memory regions. The MR entry can be isolated by
shifts/mods instead of looping. The existing loop is preserved for
phys memory regions for now.
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/qib/qib_keys.c')
-rw-r--r-- | drivers/infiniband/hw/qib/qib_keys.c | 74 |
1 files changed, 54 insertions, 20 deletions
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c index 4b80eb153d57..756d16098e73 100644 --- a/drivers/infiniband/hw/qib/qib_keys.c +++ b/drivers/infiniband/hw/qib/qib_keys.c | |||
@@ -158,31 +158,47 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, | |||
158 | isge->sge_length = sge->length; | 158 | isge->sge_length = sge->length; |
159 | isge->m = 0; | 159 | isge->m = 0; |
160 | isge->n = 0; | 160 | isge->n = 0; |
161 | spin_unlock_irqrestore(&rkt->lock, flags); | ||
161 | goto ok; | 162 | goto ok; |
162 | } | 163 | } |
163 | mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]; | 164 | mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]; |
164 | if (unlikely(mr == NULL || mr->lkey != sge->lkey || | 165 | if (unlikely(mr == NULL || mr->lkey != sge->lkey || |
165 | mr->pd != &pd->ibpd)) | 166 | mr->pd != &pd->ibpd)) |
166 | goto bail; | 167 | goto bail; |
168 | atomic_inc(&mr->refcount); | ||
169 | spin_unlock_irqrestore(&rkt->lock, flags); | ||
167 | 170 | ||
168 | off = sge->addr - mr->user_base; | 171 | off = sge->addr - mr->user_base; |
169 | if (unlikely(sge->addr < mr->user_base || | 172 | if (unlikely(sge->addr < mr->user_base || |
170 | off + sge->length > mr->length || | 173 | off + sge->length > mr->length || |
171 | (mr->access_flags & acc) != acc)) | 174 | (mr->access_flags & acc) != acc)) |
172 | goto bail; | 175 | return ret; |
173 | 176 | ||
174 | off += mr->offset; | 177 | off += mr->offset; |
175 | m = 0; | 178 | if (mr->page_shift) { |
176 | n = 0; | 179 | /* |
177 | while (off >= mr->map[m]->segs[n].length) { | 180 | page sizes are uniform power of 2 so no loop is necessary |
178 | off -= mr->map[m]->segs[n].length; | 181 | entries_spanned_by_off is the number of times the loop below |
179 | n++; | 182 | would have executed. |
180 | if (n >= QIB_SEGSZ) { | 183 | */ |
181 | m++; | 184 | size_t entries_spanned_by_off; |
182 | n = 0; | 185 | |
186 | entries_spanned_by_off = off >> mr->page_shift; | ||
187 | off -= (entries_spanned_by_off << mr->page_shift); | ||
188 | m = entries_spanned_by_off/QIB_SEGSZ; | ||
189 | n = entries_spanned_by_off%QIB_SEGSZ; | ||
190 | } else { | ||
191 | m = 0; | ||
192 | n = 0; | ||
193 | while (off >= mr->map[m]->segs[n].length) { | ||
194 | off -= mr->map[m]->segs[n].length; | ||
195 | n++; | ||
196 | if (n >= QIB_SEGSZ) { | ||
197 | m++; | ||
198 | n = 0; | ||
199 | } | ||
183 | } | 200 | } |
184 | } | 201 | } |
185 | atomic_inc(&mr->refcount); | ||
186 | isge->mr = mr; | 202 | isge->mr = mr; |
187 | isge->vaddr = mr->map[m]->segs[n].vaddr + off; | 203 | isge->vaddr = mr->map[m]->segs[n].vaddr + off; |
188 | isge->length = mr->map[m]->segs[n].length - off; | 204 | isge->length = mr->map[m]->segs[n].length - off; |
@@ -191,6 +207,7 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, | |||
191 | isge->n = n; | 207 | isge->n = n; |
192 | ok: | 208 | ok: |
193 | ret = 1; | 209 | ret = 1; |
210 | return ret; | ||
194 | bail: | 211 | bail: |
195 | spin_unlock_irqrestore(&rkt->lock, flags); | 212 | spin_unlock_irqrestore(&rkt->lock, flags); |
196 | return ret; | 213 | return ret; |
@@ -237,30 +254,46 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, | |||
237 | sge->sge_length = len; | 254 | sge->sge_length = len; |
238 | sge->m = 0; | 255 | sge->m = 0; |
239 | sge->n = 0; | 256 | sge->n = 0; |
257 | spin_unlock_irqrestore(&rkt->lock, flags); | ||
240 | goto ok; | 258 | goto ok; |
241 | } | 259 | } |
242 | 260 | ||
243 | mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; | 261 | mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; |
244 | if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) | 262 | if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) |
245 | goto bail; | 263 | goto bail; |
264 | atomic_inc(&mr->refcount); | ||
265 | spin_unlock_irqrestore(&rkt->lock, flags); | ||
246 | 266 | ||
247 | off = vaddr - mr->iova; | 267 | off = vaddr - mr->iova; |
248 | if (unlikely(vaddr < mr->iova || off + len > mr->length || | 268 | if (unlikely(vaddr < mr->iova || off + len > mr->length || |
249 | (mr->access_flags & acc) == 0)) | 269 | (mr->access_flags & acc) == 0)) |
250 | goto bail; | 270 | return ret; |
251 | 271 | ||
252 | off += mr->offset; | 272 | off += mr->offset; |
253 | m = 0; | 273 | if (mr->page_shift) { |
254 | n = 0; | 274 | /* |
255 | while (off >= mr->map[m]->segs[n].length) { | 275 | page sizes are uniform power of 2 so no loop is necessary |
256 | off -= mr->map[m]->segs[n].length; | 276 | entries_spanned_by_off is the number of times the loop below |
257 | n++; | 277 | would have executed. |
258 | if (n >= QIB_SEGSZ) { | 278 | */ |
259 | m++; | 279 | size_t entries_spanned_by_off; |
260 | n = 0; | 280 | |
281 | entries_spanned_by_off = off >> mr->page_shift; | ||
282 | off -= (entries_spanned_by_off << mr->page_shift); | ||
283 | m = entries_spanned_by_off/QIB_SEGSZ; | ||
284 | n = entries_spanned_by_off%QIB_SEGSZ; | ||
285 | } else { | ||
286 | m = 0; | ||
287 | n = 0; | ||
288 | while (off >= mr->map[m]->segs[n].length) { | ||
289 | off -= mr->map[m]->segs[n].length; | ||
290 | n++; | ||
291 | if (n >= QIB_SEGSZ) { | ||
292 | m++; | ||
293 | n = 0; | ||
294 | } | ||
261 | } | 295 | } |
262 | } | 296 | } |
263 | atomic_inc(&mr->refcount); | ||
264 | sge->mr = mr; | 297 | sge->mr = mr; |
265 | sge->vaddr = mr->map[m]->segs[n].vaddr + off; | 298 | sge->vaddr = mr->map[m]->segs[n].vaddr + off; |
266 | sge->length = mr->map[m]->segs[n].length - off; | 299 | sge->length = mr->map[m]->segs[n].length - off; |
@@ -269,6 +302,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, | |||
269 | sge->n = n; | 302 | sge->n = n; |
270 | ok: | 303 | ok: |
271 | ret = 1; | 304 | ret = 1; |
305 | return ret; | ||
272 | bail: | 306 | bail: |
273 | spin_unlock_irqrestore(&rkt->lock, flags); | 307 | spin_unlock_irqrestore(&rkt->lock, flags); |
274 | return ret; | 308 | return ret; |