aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/ipath/ipath_cq.c
diff options
context:
space:
mode:
authorRalph Campbell <ralph.campbell@qlogic.com>2006-09-22 18:22:26 -0400
committerRoland Dreier <rolandd@cisco.com>2006-09-22 18:22:26 -0400
commit373d9915803aebbbf7fd3841efd9dac31c32e148 (patch)
treebd0b05d5e78eab4471bc4d623a880013693077ea /drivers/infiniband/hw/ipath/ipath_cq.c
parent9bc57e2d19db4da81c1150120658cc3658a99ed4 (diff)
IB/ipath: Performance improvements via mmap of queues
Improve performance of userspace post receive, post SRQ receive, and poll CQ operations for ipath by allowing userspace to directly mmap() receive queues and completion queues. This eliminates the copying between userspace and the kernel in the data path. Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/ipath/ipath_cq.c')
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c176
1 files changed, 133 insertions, 43 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index 3efee341c9bc..3c4c198a4514 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -42,20 +42,28 @@
42 * @entry: work completion entry to add 42 * @entry: work completion entry to add
43 * @sig: true if @entry is a solicitated entry 43 * @sig: true if @entry is a solicitated entry
44 * 44 *
45 * This may be called with one of the qp->s_lock or qp->r_rq.lock held. 45 * This may be called with qp->s_lock held.
46 */ 46 */
47void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) 47void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
48{ 48{
49 struct ipath_cq_wc *wc = cq->queue;
49 unsigned long flags; 50 unsigned long flags;
51 u32 head;
50 u32 next; 52 u32 next;
51 53
52 spin_lock_irqsave(&cq->lock, flags); 54 spin_lock_irqsave(&cq->lock, flags);
53 55
54 if (cq->head == cq->ibcq.cqe) 56 /*
57 * Note that the head pointer might be writable by user processes.
58 * Take care to verify it is a sane value.
59 */
60 head = wc->head;
61 if (head >= (unsigned) cq->ibcq.cqe) {
62 head = cq->ibcq.cqe;
55 next = 0; 63 next = 0;
56 else 64 } else
57 next = cq->head + 1; 65 next = head + 1;
58 if (unlikely(next == cq->tail)) { 66 if (unlikely(next == wc->tail)) {
59 spin_unlock_irqrestore(&cq->lock, flags); 67 spin_unlock_irqrestore(&cq->lock, flags);
60 if (cq->ibcq.event_handler) { 68 if (cq->ibcq.event_handler) {
61 struct ib_event ev; 69 struct ib_event ev;
@@ -67,8 +75,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
67 } 75 }
68 return; 76 return;
69 } 77 }
70 cq->queue[cq->head] = *entry; 78 wc->queue[head] = *entry;
71 cq->head = next; 79 wc->head = next;
72 80
73 if (cq->notify == IB_CQ_NEXT_COMP || 81 if (cq->notify == IB_CQ_NEXT_COMP ||
74 (cq->notify == IB_CQ_SOLICITED && solicited)) { 82 (cq->notify == IB_CQ_SOLICITED && solicited)) {
@@ -101,19 +109,20 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
101int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) 109int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
102{ 110{
103 struct ipath_cq *cq = to_icq(ibcq); 111 struct ipath_cq *cq = to_icq(ibcq);
112 struct ipath_cq_wc *wc = cq->queue;
104 unsigned long flags; 113 unsigned long flags;
105 int npolled; 114 int npolled;
106 115
107 spin_lock_irqsave(&cq->lock, flags); 116 spin_lock_irqsave(&cq->lock, flags);
108 117
109 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { 118 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
110 if (cq->tail == cq->head) 119 if (wc->tail == wc->head)
111 break; 120 break;
112 *entry = cq->queue[cq->tail]; 121 *entry = wc->queue[wc->tail];
113 if (cq->tail == cq->ibcq.cqe) 122 if (wc->tail >= cq->ibcq.cqe)
114 cq->tail = 0; 123 wc->tail = 0;
115 else 124 else
116 cq->tail++; 125 wc->tail++;
117 } 126 }
118 127
119 spin_unlock_irqrestore(&cq->lock, flags); 128 spin_unlock_irqrestore(&cq->lock, flags);
@@ -160,38 +169,74 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
160{ 169{
161 struct ipath_ibdev *dev = to_idev(ibdev); 170 struct ipath_ibdev *dev = to_idev(ibdev);
162 struct ipath_cq *cq; 171 struct ipath_cq *cq;
163 struct ib_wc *wc; 172 struct ipath_cq_wc *wc;
164 struct ib_cq *ret; 173 struct ib_cq *ret;
165 174
166 if (entries > ib_ipath_max_cqes) { 175 if (entries > ib_ipath_max_cqes) {
167 ret = ERR_PTR(-EINVAL); 176 ret = ERR_PTR(-EINVAL);
168 goto bail; 177 goto done;
169 } 178 }
170 179
171 if (dev->n_cqs_allocated == ib_ipath_max_cqs) { 180 if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
172 ret = ERR_PTR(-ENOMEM); 181 ret = ERR_PTR(-ENOMEM);
173 goto bail; 182 goto done;
174 } 183 }
175 184
176 /* 185 /* Allocate the completion queue structure. */
177 * Need to use vmalloc() if we want to support large #s of
178 * entries.
179 */
180 cq = kmalloc(sizeof(*cq), GFP_KERNEL); 186 cq = kmalloc(sizeof(*cq), GFP_KERNEL);
181 if (!cq) { 187 if (!cq) {
182 ret = ERR_PTR(-ENOMEM); 188 ret = ERR_PTR(-ENOMEM);
183 goto bail; 189 goto done;
184 } 190 }
185 191
186 /* 192 /*
187 * Need to use vmalloc() if we want to support large #s of entries. 193 * Allocate the completion queue entries and head/tail pointers.
194 * This is allocated separately so that it can be resized and
195 * also mapped into user space.
196 * We need to use vmalloc() in order to support mmap and large
197 * numbers of entries.
188 */ 198 */
189 wc = vmalloc(sizeof(*wc) * (entries + 1)); 199 wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries);
190 if (!wc) { 200 if (!wc) {
191 kfree(cq);
192 ret = ERR_PTR(-ENOMEM); 201 ret = ERR_PTR(-ENOMEM);
193 goto bail; 202 goto bail_cq;
194 } 203 }
204
205 /*
206 * Return the address of the WC as the offset to mmap.
207 * See ipath_mmap() for details.
208 */
209 if (udata && udata->outlen >= sizeof(__u64)) {
210 struct ipath_mmap_info *ip;
211 __u64 offset = (__u64) wc;
212 int err;
213
214 err = ib_copy_to_udata(udata, &offset, sizeof(offset));
215 if (err) {
216 ret = ERR_PTR(err);
217 goto bail_wc;
218 }
219
220 /* Allocate info for ipath_mmap(). */
221 ip = kmalloc(sizeof(*ip), GFP_KERNEL);
222 if (!ip) {
223 ret = ERR_PTR(-ENOMEM);
224 goto bail_wc;
225 }
226 cq->ip = ip;
227 ip->context = context;
228 ip->obj = wc;
229 kref_init(&ip->ref);
230 ip->mmap_cnt = 0;
231 ip->size = PAGE_ALIGN(sizeof(*wc) +
232 sizeof(struct ib_wc) * entries);
233 spin_lock_irq(&dev->pending_lock);
234 ip->next = dev->pending_mmaps;
235 dev->pending_mmaps = ip;
236 spin_unlock_irq(&dev->pending_lock);
237 } else
238 cq->ip = NULL;
239
195 /* 240 /*
196 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. 241 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
197 * The number of entries should be >= the number requested or return 242 * The number of entries should be >= the number requested or return
@@ -202,15 +247,22 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
202 cq->triggered = 0; 247 cq->triggered = 0;
203 spin_lock_init(&cq->lock); 248 spin_lock_init(&cq->lock);
204 tasklet_init(&cq->comptask, send_complete, (unsigned long)cq); 249 tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
205 cq->head = 0; 250 wc->head = 0;
206 cq->tail = 0; 251 wc->tail = 0;
207 cq->queue = wc; 252 cq->queue = wc;
208 253
209 ret = &cq->ibcq; 254 ret = &cq->ibcq;
210 255
211 dev->n_cqs_allocated++; 256 dev->n_cqs_allocated++;
257 goto done;
212 258
213bail: 259bail_wc:
260 vfree(wc);
261
262bail_cq:
263 kfree(cq);
264
265done:
214 return ret; 266 return ret;
215} 267}
216 268
@@ -229,7 +281,10 @@ int ipath_destroy_cq(struct ib_cq *ibcq)
229 281
230 tasklet_kill(&cq->comptask); 282 tasklet_kill(&cq->comptask);
231 dev->n_cqs_allocated--; 283 dev->n_cqs_allocated--;
232 vfree(cq->queue); 284 if (cq->ip)
285 kref_put(&cq->ip->ref, ipath_release_mmap_info);
286 else
287 vfree(cq->queue);
233 kfree(cq); 288 kfree(cq);
234 289
235 return 0; 290 return 0;
@@ -253,7 +308,7 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
253 spin_lock_irqsave(&cq->lock, flags); 308 spin_lock_irqsave(&cq->lock, flags);
254 /* 309 /*
255 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow 310 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
256 * any other transitions. 311 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
257 */ 312 */
258 if (cq->notify != IB_CQ_NEXT_COMP) 313 if (cq->notify != IB_CQ_NEXT_COMP)
259 cq->notify = notify; 314 cq->notify = notify;
@@ -264,46 +319,81 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
264int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) 319int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
265{ 320{
266 struct ipath_cq *cq = to_icq(ibcq); 321 struct ipath_cq *cq = to_icq(ibcq);
267 struct ib_wc *wc, *old_wc; 322 struct ipath_cq_wc *old_wc = cq->queue;
268 u32 n; 323 struct ipath_cq_wc *wc;
324 u32 head, tail, n;
269 int ret; 325 int ret;
270 326
271 /* 327 /*
272 * Need to use vmalloc() if we want to support large #s of entries. 328 * Need to use vmalloc() if we want to support large #s of entries.
273 */ 329 */
274 wc = vmalloc(sizeof(*wc) * (cqe + 1)); 330 wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe);
275 if (!wc) { 331 if (!wc) {
276 ret = -ENOMEM; 332 ret = -ENOMEM;
277 goto bail; 333 goto bail;
278 } 334 }
279 335
336 /*
337 * Return the address of the WC as the offset to mmap.
338 * See ipath_mmap() for details.
339 */
340 if (udata && udata->outlen >= sizeof(__u64)) {
341 __u64 offset = (__u64) wc;
342
343 ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
344 if (ret)
345 goto bail;
346 }
347
280 spin_lock_irq(&cq->lock); 348 spin_lock_irq(&cq->lock);
281 if (cq->head < cq->tail) 349 /*
282 n = cq->ibcq.cqe + 1 + cq->head - cq->tail; 350 * Make sure head and tail are sane since they
351 * might be user writable.
352 */
353 head = old_wc->head;
354 if (head > (u32) cq->ibcq.cqe)
355 head = (u32) cq->ibcq.cqe;
356 tail = old_wc->tail;
357 if (tail > (u32) cq->ibcq.cqe)
358 tail = (u32) cq->ibcq.cqe;
359 if (head < tail)
360 n = cq->ibcq.cqe + 1 + head - tail;
283 else 361 else
284 n = cq->head - cq->tail; 362 n = head - tail;
285 if (unlikely((u32)cqe < n)) { 363 if (unlikely((u32)cqe < n)) {
286 spin_unlock_irq(&cq->lock); 364 spin_unlock_irq(&cq->lock);
287 vfree(wc); 365 vfree(wc);
288 ret = -EOVERFLOW; 366 ret = -EOVERFLOW;
289 goto bail; 367 goto bail;
290 } 368 }
291 for (n = 0; cq->tail != cq->head; n++) { 369 for (n = 0; tail != head; n++) {
292 wc[n] = cq->queue[cq->tail]; 370 wc->queue[n] = old_wc->queue[tail];
293 if (cq->tail == cq->ibcq.cqe) 371 if (tail == (u32) cq->ibcq.cqe)
294 cq->tail = 0; 372 tail = 0;
295 else 373 else
296 cq->tail++; 374 tail++;
297 } 375 }
298 cq->ibcq.cqe = cqe; 376 cq->ibcq.cqe = cqe;
299 cq->head = n; 377 wc->head = n;
300 cq->tail = 0; 378 wc->tail = 0;
301 old_wc = cq->queue;
302 cq->queue = wc; 379 cq->queue = wc;
303 spin_unlock_irq(&cq->lock); 380 spin_unlock_irq(&cq->lock);
304 381
305 vfree(old_wc); 382 vfree(old_wc);
306 383
384 if (cq->ip) {
385 struct ipath_ibdev *dev = to_idev(ibcq->device);
386 struct ipath_mmap_info *ip = cq->ip;
387
388 ip->obj = wc;
389 ip->size = PAGE_ALIGN(sizeof(*wc) +
390 sizeof(struct ib_wc) * cqe);
391 spin_lock_irq(&dev->pending_lock);
392 ip->next = dev->pending_mmaps;
393 dev->pending_mmaps = ip;
394 spin_unlock_irq(&dev->pending_lock);
395 }
396
307 ret = 0; 397 ret = 0;
308 398
309bail: 399bail: