diff options
author | Mike Marciniszyn <mike.marciniszyn@intel.com> | 2012-06-27 18:33:19 -0400 |
---|---|---|
committer | Roland Dreier <roland@purestorage.com> | 2012-07-08 21:05:19 -0400 |
commit | 8aac4cc3a9d7d7c2f203b7a8db521b604cfb5dc9 (patch) | |
tree | f9e98f1fd7df79a1577c77d74b7242fa16b9a74d /drivers/infiniband | |
parent | 6a82649f217023863d6b1740017e6c3dd6685327 (diff) |
IB/qib: RCU locking for MR validation
Profiling indicates that MR validation locking is expensive. The MR
table is largely read-only and is a suitable candidate for RCU locking.
The patch uses RCU locking during validation to eliminate one
lock/unlock during that validation.
Reviewed-by: Mike Heinz <michael.william.heinz@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/hw/qib/qib_keys.c | 98 | ||||
-rw-r--r-- | drivers/infiniband/hw/qib/qib_mr.c | 7 | ||||
-rw-r--r-- | drivers/infiniband/hw/qib/qib_verbs.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/qib/qib_verbs.h | 7 |
4 files changed, 66 insertions, 50 deletions
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c index 8b5ee3aa8e3d..970165b027f7 100644 --- a/drivers/infiniband/hw/qib/qib_keys.c +++ b/drivers/infiniband/hw/qib/qib_keys.c | |||
@@ -40,8 +40,7 @@ | |||
40 | * | 40 | * |
41 | * Returns 0 if successful, otherwise returns -errno. | 41 | * Returns 0 if successful, otherwise returns -errno. |
42 | * | 42 | * |
43 | * Increments mr reference count and sets published | 43 | * Increments mr reference count as required. |
44 | * as required. | ||
45 | * | 44 | * |
46 | * Sets the lkey field mr for non-dma regions. | 45 | * Sets the lkey field mr for non-dma regions. |
47 | * | 46 | * |
@@ -60,10 +59,12 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) | |||
60 | 59 | ||
61 | /* special case for dma_mr lkey == 0 */ | 60 | /* special case for dma_mr lkey == 0 */ |
62 | if (dma_region) { | 61 | if (dma_region) { |
63 | /* should the dma_mr be relative to the pd? */ | 62 | struct qib_mregion *tmr; |
64 | if (!dev->dma_mr) { | 63 | |
64 | tmr = rcu_dereference(dev->dma_mr); | ||
65 | if (!tmr) { | ||
65 | qib_get_mr(mr); | 66 | qib_get_mr(mr); |
66 | dev->dma_mr = mr; | 67 | rcu_assign_pointer(dev->dma_mr, mr); |
67 | mr->lkey_published = 1; | 68 | mr->lkey_published = 1; |
68 | } | 69 | } |
69 | goto success; | 70 | goto success; |
@@ -93,7 +94,7 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) | |||
93 | rkt->gen++; | 94 | rkt->gen++; |
94 | } | 95 | } |
95 | qib_get_mr(mr); | 96 | qib_get_mr(mr); |
96 | rkt->table[r] = mr; | 97 | rcu_assign_pointer(rkt->table[r], mr); |
97 | mr->lkey_published = 1; | 98 | mr->lkey_published = 1; |
98 | success: | 99 | success: |
99 | spin_unlock_irqrestore(&rkt->lock, flags); | 100 | spin_unlock_irqrestore(&rkt->lock, flags); |
@@ -120,33 +121,30 @@ void qib_free_lkey(struct qib_mregion *mr) | |||
120 | spin_lock_irqsave(&rkt->lock, flags); | 121 | spin_lock_irqsave(&rkt->lock, flags); |
121 | if (!mr->lkey_published) | 122 | if (!mr->lkey_published) |
122 | goto out; | 123 | goto out; |
123 | mr->lkey_published = 0; | 124 | if (lkey == 0) |
124 | 125 | rcu_assign_pointer(dev->dma_mr, NULL); | |
125 | 126 | else { | |
126 | spin_lock_irqsave(&dev->lk_table.lock, flags); | ||
127 | if (lkey == 0) { | ||
128 | if (dev->dma_mr && dev->dma_mr == mr) { | ||
129 | qib_put_mr(dev->dma_mr); | ||
130 | dev->dma_mr = NULL; | ||
131 | } | ||
132 | } else { | ||
133 | r = lkey >> (32 - ib_qib_lkey_table_size); | 127 | r = lkey >> (32 - ib_qib_lkey_table_size); |
134 | qib_put_mr(dev->dma_mr); | 128 | rcu_assign_pointer(rkt->table[r], NULL); |
135 | rkt->table[r] = NULL; | ||
136 | } | 129 | } |
130 | qib_put_mr(mr); | ||
131 | mr->lkey_published = 0; | ||
137 | out: | 132 | out: |
138 | spin_unlock_irqrestore(&dev->lk_table.lock, flags); | 133 | spin_unlock_irqrestore(&rkt->lock, flags); |
139 | } | 134 | } |
140 | 135 | ||
141 | /** | 136 | /** |
142 | * qib_lkey_ok - check IB SGE for validity and initialize | 137 | * qib_lkey_ok - check IB SGE for validity and initialize |
143 | * @rkt: table containing lkey to check SGE against | 138 | * @rkt: table containing lkey to check SGE against |
139 | * @pd: protection domain | ||
144 | * @isge: outgoing internal SGE | 140 | * @isge: outgoing internal SGE |
145 | * @sge: SGE to check | 141 | * @sge: SGE to check |
146 | * @acc: access flags | 142 | * @acc: access flags |
147 | * | 143 | * |
148 | * Return 1 if valid and successful, otherwise returns 0. | 144 | * Return 1 if valid and successful, otherwise returns 0. |
149 | * | 145 | * |
146 | * increments the reference count upon success | ||
147 | * | ||
150 | * Check the IB SGE for validity and initialize our internal version | 148 | * Check the IB SGE for validity and initialize our internal version |
151 | * of it. | 149 | * of it. |
152 | */ | 150 | */ |
@@ -156,24 +154,25 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, | |||
156 | struct qib_mregion *mr; | 154 | struct qib_mregion *mr; |
157 | unsigned n, m; | 155 | unsigned n, m; |
158 | size_t off; | 156 | size_t off; |
159 | unsigned long flags; | ||
160 | 157 | ||
161 | /* | 158 | /* |
162 | * We use LKEY == zero for kernel virtual addresses | 159 | * We use LKEY == zero for kernel virtual addresses |
163 | * (see qib_get_dma_mr and qib_dma.c). | 160 | * (see qib_get_dma_mr and qib_dma.c). |
164 | */ | 161 | */ |
165 | spin_lock_irqsave(&rkt->lock, flags); | 162 | rcu_read_lock(); |
166 | if (sge->lkey == 0) { | 163 | if (sge->lkey == 0) { |
167 | struct qib_ibdev *dev = to_idev(pd->ibpd.device); | 164 | struct qib_ibdev *dev = to_idev(pd->ibpd.device); |
168 | 165 | ||
169 | if (pd->user) | 166 | if (pd->user) |
170 | goto bail; | 167 | goto bail; |
171 | if (!dev->dma_mr) | 168 | mr = rcu_dereference(dev->dma_mr); |
169 | if (!mr) | ||
170 | goto bail; | ||
171 | if (unlikely(!atomic_inc_not_zero(&mr->refcount))) | ||
172 | goto bail; | 172 | goto bail; |
173 | qib_get_mr(dev->dma_mr); | 173 | rcu_read_unlock(); |
174 | spin_unlock_irqrestore(&rkt->lock, flags); | ||
175 | 174 | ||
176 | isge->mr = dev->dma_mr; | 175 | isge->mr = mr; |
177 | isge->vaddr = (void *) sge->addr; | 176 | isge->vaddr = (void *) sge->addr; |
178 | isge->length = sge->length; | 177 | isge->length = sge->length; |
179 | isge->sge_length = sge->length; | 178 | isge->sge_length = sge->length; |
@@ -181,18 +180,18 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, | |||
181 | isge->n = 0; | 180 | isge->n = 0; |
182 | goto ok; | 181 | goto ok; |
183 | } | 182 | } |
184 | mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]; | 183 | mr = rcu_dereference( |
185 | if (unlikely(mr == NULL || mr->lkey != sge->lkey || | 184 | rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]); |
186 | mr->pd != &pd->ibpd)) | 185 | if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) |
187 | goto bail; | 186 | goto bail; |
188 | 187 | ||
189 | off = sge->addr - mr->user_base; | 188 | off = sge->addr - mr->user_base; |
190 | if (unlikely(sge->addr < mr->user_base || | 189 | if (unlikely(sge->addr < mr->iova || off + sge->length > mr->length || |
191 | off + sge->length > mr->length || | 190 | (mr->access_flags & acc) == 0)) |
192 | (mr->access_flags & acc) != acc)) | ||
193 | goto bail; | 191 | goto bail; |
194 | qib_get_mr(mr); | 192 | if (unlikely(!atomic_inc_not_zero(&mr->refcount))) |
195 | spin_unlock_irqrestore(&rkt->lock, flags); | 193 | goto bail; |
194 | rcu_read_unlock(); | ||
196 | 195 | ||
197 | off += mr->offset; | 196 | off += mr->offset; |
198 | if (mr->page_shift) { | 197 | if (mr->page_shift) { |
@@ -228,20 +227,22 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, | |||
228 | ok: | 227 | ok: |
229 | return 1; | 228 | return 1; |
230 | bail: | 229 | bail: |
231 | spin_unlock_irqrestore(&rkt->lock, flags); | 230 | rcu_read_unlock(); |
232 | return 0; | 231 | return 0; |
233 | } | 232 | } |
234 | 233 | ||
235 | /** | 234 | /** |
236 | * qib_rkey_ok - check the IB virtual address, length, and RKEY | 235 | * qib_rkey_ok - check the IB virtual address, length, and RKEY |
237 | * @dev: infiniband device | 236 | * @qp: qp for validation |
238 | * @ss: SGE state | 237 | * @sge: SGE state |
239 | * @len: length of data | 238 | * @len: length of data |
240 | * @vaddr: virtual address to place data | 239 | * @vaddr: virtual address to place data |
241 | * @rkey: rkey to check | 240 | * @rkey: rkey to check |
242 | * @acc: access flags | 241 | * @acc: access flags |
243 | * | 242 | * |
244 | * Return 1 if successful, otherwise 0. | 243 | * Return 1 if successful, otherwise 0. |
244 | * | ||
245 | * increments the reference count upon success | ||
245 | */ | 246 | */ |
246 | int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, | 247 | int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, |
247 | u32 len, u64 vaddr, u32 rkey, int acc) | 248 | u32 len, u64 vaddr, u32 rkey, int acc) |
@@ -250,25 +251,26 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, | |||
250 | struct qib_mregion *mr; | 251 | struct qib_mregion *mr; |
251 | unsigned n, m; | 252 | unsigned n, m; |
252 | size_t off; | 253 | size_t off; |
253 | unsigned long flags; | ||
254 | 254 | ||
255 | /* | 255 | /* |
256 | * We use RKEY == zero for kernel virtual addresses | 256 | * We use RKEY == zero for kernel virtual addresses |
257 | * (see qib_get_dma_mr and qib_dma.c). | 257 | * (see qib_get_dma_mr and qib_dma.c). |
258 | */ | 258 | */ |
259 | spin_lock_irqsave(&rkt->lock, flags); | 259 | rcu_read_lock(); |
260 | if (rkey == 0) { | 260 | if (rkey == 0) { |
261 | struct qib_pd *pd = to_ipd(qp->ibqp.pd); | 261 | struct qib_pd *pd = to_ipd(qp->ibqp.pd); |
262 | struct qib_ibdev *dev = to_idev(pd->ibpd.device); | 262 | struct qib_ibdev *dev = to_idev(pd->ibpd.device); |
263 | 263 | ||
264 | if (pd->user) | 264 | if (pd->user) |
265 | goto bail; | 265 | goto bail; |
266 | if (!dev->dma_mr) | 266 | mr = rcu_dereference(dev->dma_mr); |
267 | if (!mr) | ||
267 | goto bail; | 268 | goto bail; |
268 | qib_get_mr(dev->dma_mr); | 269 | if (unlikely(!atomic_inc_not_zero(&mr->refcount))) |
269 | spin_unlock_irqrestore(&rkt->lock, flags); | 270 | goto bail; |
271 | rcu_read_unlock(); | ||
270 | 272 | ||
271 | sge->mr = dev->dma_mr; | 273 | sge->mr = mr; |
272 | sge->vaddr = (void *) vaddr; | 274 | sge->vaddr = (void *) vaddr; |
273 | sge->length = len; | 275 | sge->length = len; |
274 | sge->sge_length = len; | 276 | sge->sge_length = len; |
@@ -277,16 +279,18 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, | |||
277 | goto ok; | 279 | goto ok; |
278 | } | 280 | } |
279 | 281 | ||
280 | mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; | 282 | mr = rcu_dereference( |
281 | if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) | 283 | rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]); |
284 | if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) | ||
282 | goto bail; | 285 | goto bail; |
283 | 286 | ||
284 | off = vaddr - mr->iova; | 287 | off = vaddr - mr->iova; |
285 | if (unlikely(vaddr < mr->iova || off + len > mr->length || | 288 | if (unlikely(vaddr < mr->iova || off + len > mr->length || |
286 | (mr->access_flags & acc) == 0)) | 289 | (mr->access_flags & acc) == 0)) |
287 | goto bail; | 290 | goto bail; |
288 | qib_get_mr(mr); | 291 | if (unlikely(!atomic_inc_not_zero(&mr->refcount))) |
289 | spin_unlock_irqrestore(&rkt->lock, flags); | 292 | goto bail; |
293 | rcu_read_unlock(); | ||
290 | 294 | ||
291 | off += mr->offset; | 295 | off += mr->offset; |
292 | if (mr->page_shift) { | 296 | if (mr->page_shift) { |
@@ -322,7 +326,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, | |||
322 | ok: | 326 | ok: |
323 | return 1; | 327 | return 1; |
324 | bail: | 328 | bail: |
325 | spin_unlock_irqrestore(&rkt->lock, flags); | 329 | rcu_read_unlock(); |
326 | return 0; | 330 | return 0; |
327 | } | 331 | } |
328 | 332 | ||
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c index 6a2028a56e3d..e6687ded8210 100644 --- a/drivers/infiniband/hw/qib/qib_mr.c +++ b/drivers/infiniband/hw/qib/qib_mr.c | |||
@@ -527,3 +527,10 @@ int qib_dealloc_fmr(struct ib_fmr *ibfmr) | |||
527 | out: | 527 | out: |
528 | return ret; | 528 | return ret; |
529 | } | 529 | } |
530 | |||
531 | void mr_rcu_callback(struct rcu_head *list) | ||
532 | { | ||
533 | struct qib_mregion *mr = container_of(list, struct qib_mregion, list); | ||
534 | |||
535 | complete(&mr->comp); | ||
536 | } | ||
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 76d7ce8a8c6e..59cdea345a82 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c | |||
@@ -2066,7 +2066,9 @@ int qib_register_ib_device(struct qib_devdata *dd) | |||
2066 | ret = -ENOMEM; | 2066 | ret = -ENOMEM; |
2067 | goto err_lk; | 2067 | goto err_lk; |
2068 | } | 2068 | } |
2069 | memset(dev->lk_table.table, 0, lk_tab_size); | 2069 | RCU_INIT_POINTER(dev->dma_mr, NULL); |
2070 | for (i = 0; i < dev->lk_table.max; i++) | ||
2071 | RCU_INIT_POINTER(dev->lk_table.table[i], NULL); | ||
2070 | INIT_LIST_HEAD(&dev->pending_mmaps); | 2072 | INIT_LIST_HEAD(&dev->pending_mmaps); |
2071 | spin_lock_init(&dev->pending_lock); | 2073 | spin_lock_init(&dev->pending_lock); |
2072 | dev->mmap_offset = PAGE_SIZE; | 2074 | dev->mmap_offset = PAGE_SIZE; |
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 4a2277bc059e..85751fd74371 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h | |||
@@ -303,8 +303,9 @@ struct qib_mregion { | |||
303 | u32 max_segs; /* number of qib_segs in all the arrays */ | 303 | u32 max_segs; /* number of qib_segs in all the arrays */ |
304 | u32 mapsz; /* size of the map array */ | 304 | u32 mapsz; /* size of the map array */ |
305 | u8 page_shift; /* 0 - non unform/non powerof2 sizes */ | 305 | u8 page_shift; /* 0 - non unform/non powerof2 sizes */ |
306 | u8 lkey_published; /* in global table */ | 306 | u8 lkey_published; /* in global table */ |
307 | struct completion comp; /* complete when refcount goes to zero */ | 307 | struct completion comp; /* complete when refcount goes to zero */ |
308 | struct rcu_head list; | ||
308 | atomic_t refcount; | 309 | atomic_t refcount; |
309 | struct qib_segarray *map[0]; /* the segments */ | 310 | struct qib_segarray *map[0]; /* the segments */ |
310 | }; | 311 | }; |
@@ -1022,10 +1023,12 @@ static inline void qib_get_mr(struct qib_mregion *mr) | |||
1022 | atomic_inc(&mr->refcount); | 1023 | atomic_inc(&mr->refcount); |
1023 | } | 1024 | } |
1024 | 1025 | ||
1026 | void mr_rcu_callback(struct rcu_head *list); | ||
1027 | |||
1025 | static inline void qib_put_mr(struct qib_mregion *mr) | 1028 | static inline void qib_put_mr(struct qib_mregion *mr) |
1026 | { | 1029 | { |
1027 | if (unlikely(atomic_dec_and_test(&mr->refcount))) | 1030 | if (unlikely(atomic_dec_and_test(&mr->refcount))) |
1028 | complete(&mr->comp); | 1031 | call_rcu(&mr->list, mr_rcu_callback); |
1029 | } | 1032 | } |
1030 | 1033 | ||
1031 | static inline void qib_put_ss(struct qib_sge_state *ss) | 1034 | static inline void qib_put_ss(struct qib_sge_state *ss) |