aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorMike Marciniszyn <mike.marciniszyn@intel.com>2012-06-27 18:33:19 -0400
committerRoland Dreier <roland@purestorage.com>2012-07-08 21:05:19 -0400
commit8aac4cc3a9d7d7c2f203b7a8db521b604cfb5dc9 (patch)
treef9e98f1fd7df79a1577c77d74b7242fa16b9a74d /drivers/infiniband
parent6a82649f217023863d6b1740017e6c3dd6685327 (diff)
IB/qib: RCU locking for MR validation
Profiling indicates that MR validation locking is expensive. The MR table is largely read-only and is a suitable candidate for RCU locking. The patch uses RCU locking during validation to eliminate one lock/unlock during that validation. Reviewed-by: Mike Heinz <michael.william.heinz@intel.com> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/qib/qib_keys.c98
-rw-r--r--drivers/infiniband/hw/qib/qib_mr.c7
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h7
4 files changed, 66 insertions, 50 deletions
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
index 8b5ee3aa8e3d..970165b027f7 100644
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ b/drivers/infiniband/hw/qib/qib_keys.c
@@ -40,8 +40,7 @@
40 * 40 *
41 * Returns 0 if successful, otherwise returns -errno. 41 * Returns 0 if successful, otherwise returns -errno.
42 * 42 *
43 * Increments mr reference count and sets published 43 * Increments mr reference count as required.
44 * as required.
45 * 44 *
46 * Sets the lkey field mr for non-dma regions. 45 * Sets the lkey field mr for non-dma regions.
47 * 46 *
@@ -60,10 +59,12 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
60 59
61 /* special case for dma_mr lkey == 0 */ 60 /* special case for dma_mr lkey == 0 */
62 if (dma_region) { 61 if (dma_region) {
63 /* should the dma_mr be relative to the pd? */ 62 struct qib_mregion *tmr;
64 if (!dev->dma_mr) { 63
64 tmr = rcu_dereference(dev->dma_mr);
65 if (!tmr) {
65 qib_get_mr(mr); 66 qib_get_mr(mr);
66 dev->dma_mr = mr; 67 rcu_assign_pointer(dev->dma_mr, mr);
67 mr->lkey_published = 1; 68 mr->lkey_published = 1;
68 } 69 }
69 goto success; 70 goto success;
@@ -93,7 +94,7 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
93 rkt->gen++; 94 rkt->gen++;
94 } 95 }
95 qib_get_mr(mr); 96 qib_get_mr(mr);
96 rkt->table[r] = mr; 97 rcu_assign_pointer(rkt->table[r], mr);
97 mr->lkey_published = 1; 98 mr->lkey_published = 1;
98success: 99success:
99 spin_unlock_irqrestore(&rkt->lock, flags); 100 spin_unlock_irqrestore(&rkt->lock, flags);
@@ -120,33 +121,30 @@ void qib_free_lkey(struct qib_mregion *mr)
120 spin_lock_irqsave(&rkt->lock, flags); 121 spin_lock_irqsave(&rkt->lock, flags);
121 if (!mr->lkey_published) 122 if (!mr->lkey_published)
122 goto out; 123 goto out;
123 mr->lkey_published = 0; 124 if (lkey == 0)
124 125 rcu_assign_pointer(dev->dma_mr, NULL);
125 126 else {
126 spin_lock_irqsave(&dev->lk_table.lock, flags);
127 if (lkey == 0) {
128 if (dev->dma_mr && dev->dma_mr == mr) {
129 qib_put_mr(dev->dma_mr);
130 dev->dma_mr = NULL;
131 }
132 } else {
133 r = lkey >> (32 - ib_qib_lkey_table_size); 127 r = lkey >> (32 - ib_qib_lkey_table_size);
134 qib_put_mr(dev->dma_mr); 128 rcu_assign_pointer(rkt->table[r], NULL);
135 rkt->table[r] = NULL;
136 } 129 }
130 qib_put_mr(mr);
131 mr->lkey_published = 0;
137out: 132out:
138 spin_unlock_irqrestore(&dev->lk_table.lock, flags); 133 spin_unlock_irqrestore(&rkt->lock, flags);
139} 134}
140 135
141/** 136/**
142 * qib_lkey_ok - check IB SGE for validity and initialize 137 * qib_lkey_ok - check IB SGE for validity and initialize
143 * @rkt: table containing lkey to check SGE against 138 * @rkt: table containing lkey to check SGE against
139 * @pd: protection domain
144 * @isge: outgoing internal SGE 140 * @isge: outgoing internal SGE
145 * @sge: SGE to check 141 * @sge: SGE to check
146 * @acc: access flags 142 * @acc: access flags
147 * 143 *
148 * Return 1 if valid and successful, otherwise returns 0. 144 * Return 1 if valid and successful, otherwise returns 0.
149 * 145 *
146 * increments the reference count upon success
147 *
150 * Check the IB SGE for validity and initialize our internal version 148 * Check the IB SGE for validity and initialize our internal version
151 * of it. 149 * of it.
152 */ 150 */
@@ -156,24 +154,25 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
156 struct qib_mregion *mr; 154 struct qib_mregion *mr;
157 unsigned n, m; 155 unsigned n, m;
158 size_t off; 156 size_t off;
159 unsigned long flags;
160 157
161 /* 158 /*
162 * We use LKEY == zero for kernel virtual addresses 159 * We use LKEY == zero for kernel virtual addresses
163 * (see qib_get_dma_mr and qib_dma.c). 160 * (see qib_get_dma_mr and qib_dma.c).
164 */ 161 */
165 spin_lock_irqsave(&rkt->lock, flags); 162 rcu_read_lock();
166 if (sge->lkey == 0) { 163 if (sge->lkey == 0) {
167 struct qib_ibdev *dev = to_idev(pd->ibpd.device); 164 struct qib_ibdev *dev = to_idev(pd->ibpd.device);
168 165
169 if (pd->user) 166 if (pd->user)
170 goto bail; 167 goto bail;
171 if (!dev->dma_mr) 168 mr = rcu_dereference(dev->dma_mr);
169 if (!mr)
170 goto bail;
171 if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
172 goto bail; 172 goto bail;
173 qib_get_mr(dev->dma_mr); 173 rcu_read_unlock();
174 spin_unlock_irqrestore(&rkt->lock, flags);
175 174
176 isge->mr = dev->dma_mr; 175 isge->mr = mr;
177 isge->vaddr = (void *) sge->addr; 176 isge->vaddr = (void *) sge->addr;
178 isge->length = sge->length; 177 isge->length = sge->length;
179 isge->sge_length = sge->length; 178 isge->sge_length = sge->length;
@@ -181,18 +180,18 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
181 isge->n = 0; 180 isge->n = 0;
182 goto ok; 181 goto ok;
183 } 182 }
184 mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]; 183 mr = rcu_dereference(
185 if (unlikely(mr == NULL || mr->lkey != sge->lkey || 184 rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]);
186 mr->pd != &pd->ibpd)) 185 if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
187 goto bail; 186 goto bail;
188 187
189 off = sge->addr - mr->user_base; 188 off = sge->addr - mr->user_base;
190 if (unlikely(sge->addr < mr->user_base || 189 if (unlikely(sge->addr < mr->iova || off + sge->length > mr->length ||
191 off + sge->length > mr->length || 190 (mr->access_flags & acc) == 0))
192 (mr->access_flags & acc) != acc))
193 goto bail; 191 goto bail;
194 qib_get_mr(mr); 192 if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
195 spin_unlock_irqrestore(&rkt->lock, flags); 193 goto bail;
194 rcu_read_unlock();
196 195
197 off += mr->offset; 196 off += mr->offset;
198 if (mr->page_shift) { 197 if (mr->page_shift) {
@@ -228,20 +227,22 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
228ok: 227ok:
229 return 1; 228 return 1;
230bail: 229bail:
231 spin_unlock_irqrestore(&rkt->lock, flags); 230 rcu_read_unlock();
232 return 0; 231 return 0;
233} 232}
234 233
235/** 234/**
236 * qib_rkey_ok - check the IB virtual address, length, and RKEY 235 * qib_rkey_ok - check the IB virtual address, length, and RKEY
237 * @dev: infiniband device 236 * @qp: qp for validation
238 * @ss: SGE state 237 * @sge: SGE state
239 * @len: length of data 238 * @len: length of data
240 * @vaddr: virtual address to place data 239 * @vaddr: virtual address to place data
241 * @rkey: rkey to check 240 * @rkey: rkey to check
242 * @acc: access flags 241 * @acc: access flags
243 * 242 *
244 * Return 1 if successful, otherwise 0. 243 * Return 1 if successful, otherwise 0.
244 *
245 * increments the reference count upon success
245 */ 246 */
246int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, 247int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
247 u32 len, u64 vaddr, u32 rkey, int acc) 248 u32 len, u64 vaddr, u32 rkey, int acc)
@@ -250,25 +251,26 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
250 struct qib_mregion *mr; 251 struct qib_mregion *mr;
251 unsigned n, m; 252 unsigned n, m;
252 size_t off; 253 size_t off;
253 unsigned long flags;
254 254
255 /* 255 /*
256 * We use RKEY == zero for kernel virtual addresses 256 * We use RKEY == zero for kernel virtual addresses
257 * (see qib_get_dma_mr and qib_dma.c). 257 * (see qib_get_dma_mr and qib_dma.c).
258 */ 258 */
259 spin_lock_irqsave(&rkt->lock, flags); 259 rcu_read_lock();
260 if (rkey == 0) { 260 if (rkey == 0) {
261 struct qib_pd *pd = to_ipd(qp->ibqp.pd); 261 struct qib_pd *pd = to_ipd(qp->ibqp.pd);
262 struct qib_ibdev *dev = to_idev(pd->ibpd.device); 262 struct qib_ibdev *dev = to_idev(pd->ibpd.device);
263 263
264 if (pd->user) 264 if (pd->user)
265 goto bail; 265 goto bail;
266 if (!dev->dma_mr) 266 mr = rcu_dereference(dev->dma_mr);
267 if (!mr)
267 goto bail; 268 goto bail;
268 qib_get_mr(dev->dma_mr); 269 if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
269 spin_unlock_irqrestore(&rkt->lock, flags); 270 goto bail;
271 rcu_read_unlock();
270 272
271 sge->mr = dev->dma_mr; 273 sge->mr = mr;
272 sge->vaddr = (void *) vaddr; 274 sge->vaddr = (void *) vaddr;
273 sge->length = len; 275 sge->length = len;
274 sge->sge_length = len; 276 sge->sge_length = len;
@@ -277,16 +279,18 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
277 goto ok; 279 goto ok;
278 } 280 }
279 281
280 mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; 282 mr = rcu_dereference(
281 if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) 283 rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]);
284 if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
282 goto bail; 285 goto bail;
283 286
284 off = vaddr - mr->iova; 287 off = vaddr - mr->iova;
285 if (unlikely(vaddr < mr->iova || off + len > mr->length || 288 if (unlikely(vaddr < mr->iova || off + len > mr->length ||
286 (mr->access_flags & acc) == 0)) 289 (mr->access_flags & acc) == 0))
287 goto bail; 290 goto bail;
288 qib_get_mr(mr); 291 if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
289 spin_unlock_irqrestore(&rkt->lock, flags); 292 goto bail;
293 rcu_read_unlock();
290 294
291 off += mr->offset; 295 off += mr->offset;
292 if (mr->page_shift) { 296 if (mr->page_shift) {
@@ -322,7 +326,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
322ok: 326ok:
323 return 1; 327 return 1;
324bail: 328bail:
325 spin_unlock_irqrestore(&rkt->lock, flags); 329 rcu_read_unlock();
326 return 0; 330 return 0;
327} 331}
328 332
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
index 6a2028a56e3d..e6687ded8210 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -527,3 +527,10 @@ int qib_dealloc_fmr(struct ib_fmr *ibfmr)
527out: 527out:
528 return ret; 528 return ret;
529} 529}
530
531void mr_rcu_callback(struct rcu_head *list)
532{
533 struct qib_mregion *mr = container_of(list, struct qib_mregion, list);
534
535 complete(&mr->comp);
536}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 76d7ce8a8c6e..59cdea345a82 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -2066,7 +2066,9 @@ int qib_register_ib_device(struct qib_devdata *dd)
2066 ret = -ENOMEM; 2066 ret = -ENOMEM;
2067 goto err_lk; 2067 goto err_lk;
2068 } 2068 }
2069 memset(dev->lk_table.table, 0, lk_tab_size); 2069 RCU_INIT_POINTER(dev->dma_mr, NULL);
2070 for (i = 0; i < dev->lk_table.max; i++)
2071 RCU_INIT_POINTER(dev->lk_table.table[i], NULL);
2070 INIT_LIST_HEAD(&dev->pending_mmaps); 2072 INIT_LIST_HEAD(&dev->pending_mmaps);
2071 spin_lock_init(&dev->pending_lock); 2073 spin_lock_init(&dev->pending_lock);
2072 dev->mmap_offset = PAGE_SIZE; 2074 dev->mmap_offset = PAGE_SIZE;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 4a2277bc059e..85751fd74371 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -303,8 +303,9 @@ struct qib_mregion {
303 u32 max_segs; /* number of qib_segs in all the arrays */ 303 u32 max_segs; /* number of qib_segs in all the arrays */
304 u32 mapsz; /* size of the map array */ 304 u32 mapsz; /* size of the map array */
305 u8 page_shift; /* 0 - non unform/non powerof2 sizes */ 305 u8 page_shift; /* 0 - non unform/non powerof2 sizes */
306 u8 lkey_published; /* in global table */ 306 u8 lkey_published; /* in global table */
307 struct completion comp; /* complete when refcount goes to zero */ 307 struct completion comp; /* complete when refcount goes to zero */
308 struct rcu_head list;
308 atomic_t refcount; 309 atomic_t refcount;
309 struct qib_segarray *map[0]; /* the segments */ 310 struct qib_segarray *map[0]; /* the segments */
310}; 311};
@@ -1022,10 +1023,12 @@ static inline void qib_get_mr(struct qib_mregion *mr)
1022 atomic_inc(&mr->refcount); 1023 atomic_inc(&mr->refcount);
1023} 1024}
1024 1025
1026void mr_rcu_callback(struct rcu_head *list);
1027
1025static inline void qib_put_mr(struct qib_mregion *mr) 1028static inline void qib_put_mr(struct qib_mregion *mr)
1026{ 1029{
1027 if (unlikely(atomic_dec_and_test(&mr->refcount))) 1030 if (unlikely(atomic_dec_and_test(&mr->refcount)))
1028 complete(&mr->comp); 1031 call_rcu(&mr->list, mr_rcu_callback);
1029} 1032}
1030 1033
1031static inline void qib_put_ss(struct qib_sge_state *ss) 1034static inline void qib_put_ss(struct qib_sge_state *ss)