diff options
| author | Mike Marciniszyn <mike.marciniszyn@intel.com> | 2012-06-27 18:33:19 -0400 |
|---|---|---|
| committer | Roland Dreier <roland@purestorage.com> | 2012-07-08 21:05:19 -0400 |
| commit | 8aac4cc3a9d7d7c2f203b7a8db521b604cfb5dc9 (patch) | |
| tree | f9e98f1fd7df79a1577c77d74b7242fa16b9a74d | |
| parent | 6a82649f217023863d6b1740017e6c3dd6685327 (diff) | |
IB/qib: RCU locking for MR validation
Profiling indicates that MR validation locking is expensive. The MR
table is largely read-only and is a suitable candidate for RCU locking.
The patch uses RCU locking during validation to eliminate one
lock/unlock during that validation.
Reviewed-by: Mike Heinz <michael.william.heinz@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
| -rw-r--r-- | drivers/infiniband/hw/qib/qib_keys.c | 98 | ||||
| -rw-r--r-- | drivers/infiniband/hw/qib/qib_mr.c | 7 | ||||
| -rw-r--r-- | drivers/infiniband/hw/qib/qib_verbs.c | 4 | ||||
| -rw-r--r-- | drivers/infiniband/hw/qib/qib_verbs.h | 7 |
4 files changed, 66 insertions, 50 deletions
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c index 8b5ee3aa8e3d..970165b027f7 100644 --- a/drivers/infiniband/hw/qib/qib_keys.c +++ b/drivers/infiniband/hw/qib/qib_keys.c | |||
| @@ -40,8 +40,7 @@ | |||
| 40 | * | 40 | * |
| 41 | * Returns 0 if successful, otherwise returns -errno. | 41 | * Returns 0 if successful, otherwise returns -errno. |
| 42 | * | 42 | * |
| 43 | * Increments mr reference count and sets published | 43 | * Increments mr reference count as required. |
| 44 | * as required. | ||
| 45 | * | 44 | * |
| 46 | * Sets the lkey field mr for non-dma regions. | 45 | * Sets the lkey field mr for non-dma regions. |
| 47 | * | 46 | * |
| @@ -60,10 +59,12 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) | |||
| 60 | 59 | ||
| 61 | /* special case for dma_mr lkey == 0 */ | 60 | /* special case for dma_mr lkey == 0 */ |
| 62 | if (dma_region) { | 61 | if (dma_region) { |
| 63 | /* should the dma_mr be relative to the pd? */ | 62 | struct qib_mregion *tmr; |
| 64 | if (!dev->dma_mr) { | 63 | |
| 64 | tmr = rcu_dereference(dev->dma_mr); | ||
| 65 | if (!tmr) { | ||
| 65 | qib_get_mr(mr); | 66 | qib_get_mr(mr); |
| 66 | dev->dma_mr = mr; | 67 | rcu_assign_pointer(dev->dma_mr, mr); |
| 67 | mr->lkey_published = 1; | 68 | mr->lkey_published = 1; |
| 68 | } | 69 | } |
| 69 | goto success; | 70 | goto success; |
| @@ -93,7 +94,7 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) | |||
| 93 | rkt->gen++; | 94 | rkt->gen++; |
| 94 | } | 95 | } |
| 95 | qib_get_mr(mr); | 96 | qib_get_mr(mr); |
| 96 | rkt->table[r] = mr; | 97 | rcu_assign_pointer(rkt->table[r], mr); |
| 97 | mr->lkey_published = 1; | 98 | mr->lkey_published = 1; |
| 98 | success: | 99 | success: |
| 99 | spin_unlock_irqrestore(&rkt->lock, flags); | 100 | spin_unlock_irqrestore(&rkt->lock, flags); |
| @@ -120,33 +121,30 @@ void qib_free_lkey(struct qib_mregion *mr) | |||
| 120 | spin_lock_irqsave(&rkt->lock, flags); | 121 | spin_lock_irqsave(&rkt->lock, flags); |
| 121 | if (!mr->lkey_published) | 122 | if (!mr->lkey_published) |
| 122 | goto out; | 123 | goto out; |
| 123 | mr->lkey_published = 0; | 124 | if (lkey == 0) |
| 124 | 125 | rcu_assign_pointer(dev->dma_mr, NULL); | |
| 125 | 126 | else { | |
| 126 | spin_lock_irqsave(&dev->lk_table.lock, flags); | ||
| 127 | if (lkey == 0) { | ||
| 128 | if (dev->dma_mr && dev->dma_mr == mr) { | ||
| 129 | qib_put_mr(dev->dma_mr); | ||
| 130 | dev->dma_mr = NULL; | ||
| 131 | } | ||
| 132 | } else { | ||
| 133 | r = lkey >> (32 - ib_qib_lkey_table_size); | 127 | r = lkey >> (32 - ib_qib_lkey_table_size); |
| 134 | qib_put_mr(dev->dma_mr); | 128 | rcu_assign_pointer(rkt->table[r], NULL); |
| 135 | rkt->table[r] = NULL; | ||
| 136 | } | 129 | } |
| 130 | qib_put_mr(mr); | ||
| 131 | mr->lkey_published = 0; | ||
| 137 | out: | 132 | out: |
| 138 | spin_unlock_irqrestore(&dev->lk_table.lock, flags); | 133 | spin_unlock_irqrestore(&rkt->lock, flags); |
| 139 | } | 134 | } |
| 140 | 135 | ||
| 141 | /** | 136 | /** |
| 142 | * qib_lkey_ok - check IB SGE for validity and initialize | 137 | * qib_lkey_ok - check IB SGE for validity and initialize |
| 143 | * @rkt: table containing lkey to check SGE against | 138 | * @rkt: table containing lkey to check SGE against |
| 139 | * @pd: protection domain | ||
| 144 | * @isge: outgoing internal SGE | 140 | * @isge: outgoing internal SGE |
| 145 | * @sge: SGE to check | 141 | * @sge: SGE to check |
| 146 | * @acc: access flags | 142 | * @acc: access flags |
| 147 | * | 143 | * |
| 148 | * Return 1 if valid and successful, otherwise returns 0. | 144 | * Return 1 if valid and successful, otherwise returns 0. |
| 149 | * | 145 | * |
| 146 | * increments the reference count upon success | ||
| 147 | * | ||
| 150 | * Check the IB SGE for validity and initialize our internal version | 148 | * Check the IB SGE for validity and initialize our internal version |
| 151 | * of it. | 149 | * of it. |
| 152 | */ | 150 | */ |
| @@ -156,24 +154,25 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, | |||
| 156 | struct qib_mregion *mr; | 154 | struct qib_mregion *mr; |
| 157 | unsigned n, m; | 155 | unsigned n, m; |
| 158 | size_t off; | 156 | size_t off; |
| 159 | unsigned long flags; | ||
| 160 | 157 | ||
| 161 | /* | 158 | /* |
| 162 | * We use LKEY == zero for kernel virtual addresses | 159 | * We use LKEY == zero for kernel virtual addresses |
| 163 | * (see qib_get_dma_mr and qib_dma.c). | 160 | * (see qib_get_dma_mr and qib_dma.c). |
| 164 | */ | 161 | */ |
| 165 | spin_lock_irqsave(&rkt->lock, flags); | 162 | rcu_read_lock(); |
| 166 | if (sge->lkey == 0) { | 163 | if (sge->lkey == 0) { |
| 167 | struct qib_ibdev *dev = to_idev(pd->ibpd.device); | 164 | struct qib_ibdev *dev = to_idev(pd->ibpd.device); |
| 168 | 165 | ||
| 169 | if (pd->user) | 166 | if (pd->user) |
| 170 | goto bail; | 167 | goto bail; |
| 171 | if (!dev->dma_mr) | 168 | mr = rcu_dereference(dev->dma_mr); |
| 169 | if (!mr) | ||
| 170 | goto bail; | ||
| 171 | if (unlikely(!atomic_inc_not_zero(&mr->refcount))) | ||
| 172 | goto bail; | 172 | goto bail; |
| 173 | qib_get_mr(dev->dma_mr); | 173 | rcu_read_unlock(); |
| 174 | spin_unlock_irqrestore(&rkt->lock, flags); | ||
| 175 | 174 | ||
| 176 | isge->mr = dev->dma_mr; | 175 | isge->mr = mr; |
| 177 | isge->vaddr = (void *) sge->addr; | 176 | isge->vaddr = (void *) sge->addr; |
| 178 | isge->length = sge->length; | 177 | isge->length = sge->length; |
| 179 | isge->sge_length = sge->length; | 178 | isge->sge_length = sge->length; |
| @@ -181,18 +180,18 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, | |||
| 181 | isge->n = 0; | 180 | isge->n = 0; |
| 182 | goto ok; | 181 | goto ok; |
| 183 | } | 182 | } |
| 184 | mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]; | 183 | mr = rcu_dereference( |
| 185 | if (unlikely(mr == NULL || mr->lkey != sge->lkey || | 184 | rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]); |
| 186 | mr->pd != &pd->ibpd)) | 185 | if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) |
| 187 | goto bail; | 186 | goto bail; |
| 188 | 187 | ||
| 189 | off = sge->addr - mr->user_base; | 188 | off = sge->addr - mr->user_base; |
| 190 | if (unlikely(sge->addr < mr->user_base || | 189 | if (unlikely(sge->addr < mr->iova || off + sge->length > mr->length || |
| 191 | off + sge->length > mr->length || | 190 | (mr->access_flags & acc) == 0)) |
| 192 | (mr->access_flags & acc) != acc)) | ||
| 193 | goto bail; | 191 | goto bail; |
| 194 | qib_get_mr(mr); | 192 | if (unlikely(!atomic_inc_not_zero(&mr->refcount))) |
| 195 | spin_unlock_irqrestore(&rkt->lock, flags); | 193 | goto bail; |
| 194 | rcu_read_unlock(); | ||
| 196 | 195 | ||
| 197 | off += mr->offset; | 196 | off += mr->offset; |
| 198 | if (mr->page_shift) { | 197 | if (mr->page_shift) { |
| @@ -228,20 +227,22 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, | |||
| 228 | ok: | 227 | ok: |
| 229 | return 1; | 228 | return 1; |
| 230 | bail: | 229 | bail: |
| 231 | spin_unlock_irqrestore(&rkt->lock, flags); | 230 | rcu_read_unlock(); |
| 232 | return 0; | 231 | return 0; |
| 233 | } | 232 | } |
| 234 | 233 | ||
| 235 | /** | 234 | /** |
| 236 | * qib_rkey_ok - check the IB virtual address, length, and RKEY | 235 | * qib_rkey_ok - check the IB virtual address, length, and RKEY |
| 237 | * @dev: infiniband device | 236 | * @qp: qp for validation |
| 238 | * @ss: SGE state | 237 | * @sge: SGE state |
| 239 | * @len: length of data | 238 | * @len: length of data |
| 240 | * @vaddr: virtual address to place data | 239 | * @vaddr: virtual address to place data |
| 241 | * @rkey: rkey to check | 240 | * @rkey: rkey to check |
| 242 | * @acc: access flags | 241 | * @acc: access flags |
| 243 | * | 242 | * |
| 244 | * Return 1 if successful, otherwise 0. | 243 | * Return 1 if successful, otherwise 0. |
| 244 | * | ||
| 245 | * increments the reference count upon success | ||
| 245 | */ | 246 | */ |
| 246 | int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, | 247 | int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, |
| 247 | u32 len, u64 vaddr, u32 rkey, int acc) | 248 | u32 len, u64 vaddr, u32 rkey, int acc) |
| @@ -250,25 +251,26 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, | |||
| 250 | struct qib_mregion *mr; | 251 | struct qib_mregion *mr; |
| 251 | unsigned n, m; | 252 | unsigned n, m; |
| 252 | size_t off; | 253 | size_t off; |
| 253 | unsigned long flags; | ||
| 254 | 254 | ||
| 255 | /* | 255 | /* |
| 256 | * We use RKEY == zero for kernel virtual addresses | 256 | * We use RKEY == zero for kernel virtual addresses |
| 257 | * (see qib_get_dma_mr and qib_dma.c). | 257 | * (see qib_get_dma_mr and qib_dma.c). |
| 258 | */ | 258 | */ |
| 259 | spin_lock_irqsave(&rkt->lock, flags); | 259 | rcu_read_lock(); |
| 260 | if (rkey == 0) { | 260 | if (rkey == 0) { |
| 261 | struct qib_pd *pd = to_ipd(qp->ibqp.pd); | 261 | struct qib_pd *pd = to_ipd(qp->ibqp.pd); |
| 262 | struct qib_ibdev *dev = to_idev(pd->ibpd.device); | 262 | struct qib_ibdev *dev = to_idev(pd->ibpd.device); |
| 263 | 263 | ||
| 264 | if (pd->user) | 264 | if (pd->user) |
| 265 | goto bail; | 265 | goto bail; |
| 266 | if (!dev->dma_mr) | 266 | mr = rcu_dereference(dev->dma_mr); |
| 267 | if (!mr) | ||
| 267 | goto bail; | 268 | goto bail; |
| 268 | qib_get_mr(dev->dma_mr); | 269 | if (unlikely(!atomic_inc_not_zero(&mr->refcount))) |
| 269 | spin_unlock_irqrestore(&rkt->lock, flags); | 270 | goto bail; |
| 271 | rcu_read_unlock(); | ||
| 270 | 272 | ||
| 271 | sge->mr = dev->dma_mr; | 273 | sge->mr = mr; |
| 272 | sge->vaddr = (void *) vaddr; | 274 | sge->vaddr = (void *) vaddr; |
| 273 | sge->length = len; | 275 | sge->length = len; |
| 274 | sge->sge_length = len; | 276 | sge->sge_length = len; |
| @@ -277,16 +279,18 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, | |||
| 277 | goto ok; | 279 | goto ok; |
| 278 | } | 280 | } |
| 279 | 281 | ||
| 280 | mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; | 282 | mr = rcu_dereference( |
| 281 | if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) | 283 | rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]); |
| 284 | if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) | ||
| 282 | goto bail; | 285 | goto bail; |
| 283 | 286 | ||
| 284 | off = vaddr - mr->iova; | 287 | off = vaddr - mr->iova; |
| 285 | if (unlikely(vaddr < mr->iova || off + len > mr->length || | 288 | if (unlikely(vaddr < mr->iova || off + len > mr->length || |
| 286 | (mr->access_flags & acc) == 0)) | 289 | (mr->access_flags & acc) == 0)) |
| 287 | goto bail; | 290 | goto bail; |
| 288 | qib_get_mr(mr); | 291 | if (unlikely(!atomic_inc_not_zero(&mr->refcount))) |
| 289 | spin_unlock_irqrestore(&rkt->lock, flags); | 292 | goto bail; |
| 293 | rcu_read_unlock(); | ||
| 290 | 294 | ||
| 291 | off += mr->offset; | 295 | off += mr->offset; |
| 292 | if (mr->page_shift) { | 296 | if (mr->page_shift) { |
| @@ -322,7 +326,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, | |||
| 322 | ok: | 326 | ok: |
| 323 | return 1; | 327 | return 1; |
| 324 | bail: | 328 | bail: |
| 325 | spin_unlock_irqrestore(&rkt->lock, flags); | 329 | rcu_read_unlock(); |
| 326 | return 0; | 330 | return 0; |
| 327 | } | 331 | } |
| 328 | 332 | ||
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c index 6a2028a56e3d..e6687ded8210 100644 --- a/drivers/infiniband/hw/qib/qib_mr.c +++ b/drivers/infiniband/hw/qib/qib_mr.c | |||
| @@ -527,3 +527,10 @@ int qib_dealloc_fmr(struct ib_fmr *ibfmr) | |||
| 527 | out: | 527 | out: |
| 528 | return ret; | 528 | return ret; |
| 529 | } | 529 | } |
| 530 | |||
| 531 | void mr_rcu_callback(struct rcu_head *list) | ||
| 532 | { | ||
| 533 | struct qib_mregion *mr = container_of(list, struct qib_mregion, list); | ||
| 534 | |||
| 535 | complete(&mr->comp); | ||
| 536 | } | ||
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 76d7ce8a8c6e..59cdea345a82 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c | |||
| @@ -2066,7 +2066,9 @@ int qib_register_ib_device(struct qib_devdata *dd) | |||
| 2066 | ret = -ENOMEM; | 2066 | ret = -ENOMEM; |
| 2067 | goto err_lk; | 2067 | goto err_lk; |
| 2068 | } | 2068 | } |
| 2069 | memset(dev->lk_table.table, 0, lk_tab_size); | 2069 | RCU_INIT_POINTER(dev->dma_mr, NULL); |
| 2070 | for (i = 0; i < dev->lk_table.max; i++) | ||
| 2071 | RCU_INIT_POINTER(dev->lk_table.table[i], NULL); | ||
| 2070 | INIT_LIST_HEAD(&dev->pending_mmaps); | 2072 | INIT_LIST_HEAD(&dev->pending_mmaps); |
| 2071 | spin_lock_init(&dev->pending_lock); | 2073 | spin_lock_init(&dev->pending_lock); |
| 2072 | dev->mmap_offset = PAGE_SIZE; | 2074 | dev->mmap_offset = PAGE_SIZE; |
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 4a2277bc059e..85751fd74371 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h | |||
| @@ -303,8 +303,9 @@ struct qib_mregion { | |||
| 303 | u32 max_segs; /* number of qib_segs in all the arrays */ | 303 | u32 max_segs; /* number of qib_segs in all the arrays */ |
| 304 | u32 mapsz; /* size of the map array */ | 304 | u32 mapsz; /* size of the map array */ |
| 305 | u8 page_shift; /* 0 - non unform/non powerof2 sizes */ | 305 | u8 page_shift; /* 0 - non unform/non powerof2 sizes */ |
| 306 | u8 lkey_published; /* in global table */ | 306 | u8 lkey_published; /* in global table */ |
| 307 | struct completion comp; /* complete when refcount goes to zero */ | 307 | struct completion comp; /* complete when refcount goes to zero */ |
| 308 | struct rcu_head list; | ||
| 308 | atomic_t refcount; | 309 | atomic_t refcount; |
| 309 | struct qib_segarray *map[0]; /* the segments */ | 310 | struct qib_segarray *map[0]; /* the segments */ |
| 310 | }; | 311 | }; |
| @@ -1022,10 +1023,12 @@ static inline void qib_get_mr(struct qib_mregion *mr) | |||
| 1022 | atomic_inc(&mr->refcount); | 1023 | atomic_inc(&mr->refcount); |
| 1023 | } | 1024 | } |
| 1024 | 1025 | ||
| 1026 | void mr_rcu_callback(struct rcu_head *list); | ||
| 1027 | |||
| 1025 | static inline void qib_put_mr(struct qib_mregion *mr) | 1028 | static inline void qib_put_mr(struct qib_mregion *mr) |
| 1026 | { | 1029 | { |
| 1027 | if (unlikely(atomic_dec_and_test(&mr->refcount))) | 1030 | if (unlikely(atomic_dec_and_test(&mr->refcount))) |
| 1028 | complete(&mr->comp); | 1031 | call_rcu(&mr->list, mr_rcu_callback); |
| 1029 | } | 1032 | } |
| 1030 | 1033 | ||
| 1031 | static inline void qib_put_ss(struct qib_sge_state *ss) | 1034 | static inline void qib_put_ss(struct qib_sge_state *ss) |
