diff options
Diffstat (limited to 'drivers/infiniband/hw/qib/qib_cq.c')
-rw-r--r-- | drivers/infiniband/hw/qib/qib_cq.c | 484 |
1 files changed, 484 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c new file mode 100644 index 000000000000..a86cbf880f98 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_cq.c | |||
@@ -0,0 +1,484 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved. | ||
3 | * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. | ||
4 | * | ||
5 | * This software is available to you under a choice of one of two | ||
6 | * licenses. You may choose to be licensed under the terms of the GNU | ||
7 | * General Public License (GPL) Version 2, available from the file | ||
8 | * COPYING in the main directory of this source tree, or the | ||
9 | * OpenIB.org BSD license below: | ||
10 | * | ||
11 | * Redistribution and use in source and binary forms, with or | ||
12 | * without modification, are permitted provided that the following | ||
13 | * conditions are met: | ||
14 | * | ||
15 | * - Redistributions of source code must retain the above | ||
16 | * copyright notice, this list of conditions and the following | ||
17 | * disclaimer. | ||
18 | * | ||
19 | * - Redistributions in binary form must reproduce the above | ||
20 | * copyright notice, this list of conditions and the following | ||
21 | * disclaimer in the documentation and/or other materials | ||
22 | * provided with the distribution. | ||
23 | * | ||
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
26 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
28 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
29 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
30 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
31 | * SOFTWARE. | ||
32 | */ | ||
33 | |||
34 | #include <linux/err.h> | ||
35 | #include <linux/slab.h> | ||
36 | #include <linux/vmalloc.h> | ||
37 | |||
38 | #include "qib_verbs.h" | ||
39 | |||
40 | /** | ||
41 | * qib_cq_enter - add a new entry to the completion queue | ||
42 | * @cq: completion queue | ||
43 | * @entry: work completion entry to add | ||
44 | * @sig: true if @entry is a solicitated entry | ||
45 | * | ||
46 | * This may be called with qp->s_lock held. | ||
47 | */ | ||
48 | void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited) | ||
49 | { | ||
50 | struct qib_cq_wc *wc; | ||
51 | unsigned long flags; | ||
52 | u32 head; | ||
53 | u32 next; | ||
54 | |||
55 | spin_lock_irqsave(&cq->lock, flags); | ||
56 | |||
57 | /* | ||
58 | * Note that the head pointer might be writable by user processes. | ||
59 | * Take care to verify it is a sane value. | ||
60 | */ | ||
61 | wc = cq->queue; | ||
62 | head = wc->head; | ||
63 | if (head >= (unsigned) cq->ibcq.cqe) { | ||
64 | head = cq->ibcq.cqe; | ||
65 | next = 0; | ||
66 | } else | ||
67 | next = head + 1; | ||
68 | if (unlikely(next == wc->tail)) { | ||
69 | spin_unlock_irqrestore(&cq->lock, flags); | ||
70 | if (cq->ibcq.event_handler) { | ||
71 | struct ib_event ev; | ||
72 | |||
73 | ev.device = cq->ibcq.device; | ||
74 | ev.element.cq = &cq->ibcq; | ||
75 | ev.event = IB_EVENT_CQ_ERR; | ||
76 | cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); | ||
77 | } | ||
78 | return; | ||
79 | } | ||
80 | if (cq->ip) { | ||
81 | wc->uqueue[head].wr_id = entry->wr_id; | ||
82 | wc->uqueue[head].status = entry->status; | ||
83 | wc->uqueue[head].opcode = entry->opcode; | ||
84 | wc->uqueue[head].vendor_err = entry->vendor_err; | ||
85 | wc->uqueue[head].byte_len = entry->byte_len; | ||
86 | wc->uqueue[head].ex.imm_data = | ||
87 | (__u32 __force)entry->ex.imm_data; | ||
88 | wc->uqueue[head].qp_num = entry->qp->qp_num; | ||
89 | wc->uqueue[head].src_qp = entry->src_qp; | ||
90 | wc->uqueue[head].wc_flags = entry->wc_flags; | ||
91 | wc->uqueue[head].pkey_index = entry->pkey_index; | ||
92 | wc->uqueue[head].slid = entry->slid; | ||
93 | wc->uqueue[head].sl = entry->sl; | ||
94 | wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; | ||
95 | wc->uqueue[head].port_num = entry->port_num; | ||
96 | /* Make sure entry is written before the head index. */ | ||
97 | smp_wmb(); | ||
98 | } else | ||
99 | wc->kqueue[head] = *entry; | ||
100 | wc->head = next; | ||
101 | |||
102 | if (cq->notify == IB_CQ_NEXT_COMP || | ||
103 | (cq->notify == IB_CQ_SOLICITED && solicited)) { | ||
104 | cq->notify = IB_CQ_NONE; | ||
105 | cq->triggered++; | ||
106 | /* | ||
107 | * This will cause send_complete() to be called in | ||
108 | * another thread. | ||
109 | */ | ||
110 | queue_work(qib_cq_wq, &cq->comptask); | ||
111 | } | ||
112 | |||
113 | spin_unlock_irqrestore(&cq->lock, flags); | ||
114 | } | ||
115 | |||
116 | /** | ||
117 | * qib_poll_cq - poll for work completion entries | ||
118 | * @ibcq: the completion queue to poll | ||
119 | * @num_entries: the maximum number of entries to return | ||
120 | * @entry: pointer to array where work completions are placed | ||
121 | * | ||
122 | * Returns the number of completion entries polled. | ||
123 | * | ||
124 | * This may be called from interrupt context. Also called by ib_poll_cq() | ||
125 | * in the generic verbs code. | ||
126 | */ | ||
127 | int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) | ||
128 | { | ||
129 | struct qib_cq *cq = to_icq(ibcq); | ||
130 | struct qib_cq_wc *wc; | ||
131 | unsigned long flags; | ||
132 | int npolled; | ||
133 | u32 tail; | ||
134 | |||
135 | /* The kernel can only poll a kernel completion queue */ | ||
136 | if (cq->ip) { | ||
137 | npolled = -EINVAL; | ||
138 | goto bail; | ||
139 | } | ||
140 | |||
141 | spin_lock_irqsave(&cq->lock, flags); | ||
142 | |||
143 | wc = cq->queue; | ||
144 | tail = wc->tail; | ||
145 | if (tail > (u32) cq->ibcq.cqe) | ||
146 | tail = (u32) cq->ibcq.cqe; | ||
147 | for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { | ||
148 | if (tail == wc->head) | ||
149 | break; | ||
150 | /* The kernel doesn't need a RMB since it has the lock. */ | ||
151 | *entry = wc->kqueue[tail]; | ||
152 | if (tail >= cq->ibcq.cqe) | ||
153 | tail = 0; | ||
154 | else | ||
155 | tail++; | ||
156 | } | ||
157 | wc->tail = tail; | ||
158 | |||
159 | spin_unlock_irqrestore(&cq->lock, flags); | ||
160 | |||
161 | bail: | ||
162 | return npolled; | ||
163 | } | ||
164 | |||
165 | static void send_complete(struct work_struct *work) | ||
166 | { | ||
167 | struct qib_cq *cq = container_of(work, struct qib_cq, comptask); | ||
168 | |||
169 | /* | ||
170 | * The completion handler will most likely rearm the notification | ||
171 | * and poll for all pending entries. If a new completion entry | ||
172 | * is added while we are in this routine, queue_work() | ||
173 | * won't call us again until we return so we check triggered to | ||
174 | * see if we need to call the handler again. | ||
175 | */ | ||
176 | for (;;) { | ||
177 | u8 triggered = cq->triggered; | ||
178 | |||
179 | /* | ||
180 | * IPoIB connected mode assumes the callback is from a | ||
181 | * soft IRQ. We simulate this by blocking "bottom halves". | ||
182 | * See the implementation for ipoib_cm_handle_tx_wc(), | ||
183 | * netif_tx_lock_bh() and netif_tx_lock(). | ||
184 | */ | ||
185 | local_bh_disable(); | ||
186 | cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); | ||
187 | local_bh_enable(); | ||
188 | |||
189 | if (cq->triggered == triggered) | ||
190 | return; | ||
191 | } | ||
192 | } | ||
193 | |||
194 | /** | ||
195 | * qib_create_cq - create a completion queue | ||
196 | * @ibdev: the device this completion queue is attached to | ||
197 | * @entries: the minimum size of the completion queue | ||
198 | * @context: unused by the QLogic_IB driver | ||
199 | * @udata: user data for libibverbs.so | ||
200 | * | ||
201 | * Returns a pointer to the completion queue or negative errno values | ||
202 | * for failure. | ||
203 | * | ||
204 | * Called by ib_create_cq() in the generic verbs code. | ||
205 | */ | ||
206 | struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries, | ||
207 | int comp_vector, struct ib_ucontext *context, | ||
208 | struct ib_udata *udata) | ||
209 | { | ||
210 | struct qib_ibdev *dev = to_idev(ibdev); | ||
211 | struct qib_cq *cq; | ||
212 | struct qib_cq_wc *wc; | ||
213 | struct ib_cq *ret; | ||
214 | u32 sz; | ||
215 | |||
216 | if (entries < 1 || entries > ib_qib_max_cqes) { | ||
217 | ret = ERR_PTR(-EINVAL); | ||
218 | goto done; | ||
219 | } | ||
220 | |||
221 | /* Allocate the completion queue structure. */ | ||
222 | cq = kmalloc(sizeof(*cq), GFP_KERNEL); | ||
223 | if (!cq) { | ||
224 | ret = ERR_PTR(-ENOMEM); | ||
225 | goto done; | ||
226 | } | ||
227 | |||
228 | /* | ||
229 | * Allocate the completion queue entries and head/tail pointers. | ||
230 | * This is allocated separately so that it can be resized and | ||
231 | * also mapped into user space. | ||
232 | * We need to use vmalloc() in order to support mmap and large | ||
233 | * numbers of entries. | ||
234 | */ | ||
235 | sz = sizeof(*wc); | ||
236 | if (udata && udata->outlen >= sizeof(__u64)) | ||
237 | sz += sizeof(struct ib_uverbs_wc) * (entries + 1); | ||
238 | else | ||
239 | sz += sizeof(struct ib_wc) * (entries + 1); | ||
240 | wc = vmalloc_user(sz); | ||
241 | if (!wc) { | ||
242 | ret = ERR_PTR(-ENOMEM); | ||
243 | goto bail_cq; | ||
244 | } | ||
245 | |||
246 | /* | ||
247 | * Return the address of the WC as the offset to mmap. | ||
248 | * See qib_mmap() for details. | ||
249 | */ | ||
250 | if (udata && udata->outlen >= sizeof(__u64)) { | ||
251 | int err; | ||
252 | |||
253 | cq->ip = qib_create_mmap_info(dev, sz, context, wc); | ||
254 | if (!cq->ip) { | ||
255 | ret = ERR_PTR(-ENOMEM); | ||
256 | goto bail_wc; | ||
257 | } | ||
258 | |||
259 | err = ib_copy_to_udata(udata, &cq->ip->offset, | ||
260 | sizeof(cq->ip->offset)); | ||
261 | if (err) { | ||
262 | ret = ERR_PTR(err); | ||
263 | goto bail_ip; | ||
264 | } | ||
265 | } else | ||
266 | cq->ip = NULL; | ||
267 | |||
268 | spin_lock(&dev->n_cqs_lock); | ||
269 | if (dev->n_cqs_allocated == ib_qib_max_cqs) { | ||
270 | spin_unlock(&dev->n_cqs_lock); | ||
271 | ret = ERR_PTR(-ENOMEM); | ||
272 | goto bail_ip; | ||
273 | } | ||
274 | |||
275 | dev->n_cqs_allocated++; | ||
276 | spin_unlock(&dev->n_cqs_lock); | ||
277 | |||
278 | if (cq->ip) { | ||
279 | spin_lock_irq(&dev->pending_lock); | ||
280 | list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps); | ||
281 | spin_unlock_irq(&dev->pending_lock); | ||
282 | } | ||
283 | |||
284 | /* | ||
285 | * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. | ||
286 | * The number of entries should be >= the number requested or return | ||
287 | * an error. | ||
288 | */ | ||
289 | cq->ibcq.cqe = entries; | ||
290 | cq->notify = IB_CQ_NONE; | ||
291 | cq->triggered = 0; | ||
292 | spin_lock_init(&cq->lock); | ||
293 | INIT_WORK(&cq->comptask, send_complete); | ||
294 | wc->head = 0; | ||
295 | wc->tail = 0; | ||
296 | cq->queue = wc; | ||
297 | |||
298 | ret = &cq->ibcq; | ||
299 | |||
300 | goto done; | ||
301 | |||
302 | bail_ip: | ||
303 | kfree(cq->ip); | ||
304 | bail_wc: | ||
305 | vfree(wc); | ||
306 | bail_cq: | ||
307 | kfree(cq); | ||
308 | done: | ||
309 | return ret; | ||
310 | } | ||
311 | |||
312 | /** | ||
313 | * qib_destroy_cq - destroy a completion queue | ||
314 | * @ibcq: the completion queue to destroy. | ||
315 | * | ||
316 | * Returns 0 for success. | ||
317 | * | ||
318 | * Called by ib_destroy_cq() in the generic verbs code. | ||
319 | */ | ||
320 | int qib_destroy_cq(struct ib_cq *ibcq) | ||
321 | { | ||
322 | struct qib_ibdev *dev = to_idev(ibcq->device); | ||
323 | struct qib_cq *cq = to_icq(ibcq); | ||
324 | |||
325 | flush_work(&cq->comptask); | ||
326 | spin_lock(&dev->n_cqs_lock); | ||
327 | dev->n_cqs_allocated--; | ||
328 | spin_unlock(&dev->n_cqs_lock); | ||
329 | if (cq->ip) | ||
330 | kref_put(&cq->ip->ref, qib_release_mmap_info); | ||
331 | else | ||
332 | vfree(cq->queue); | ||
333 | kfree(cq); | ||
334 | |||
335 | return 0; | ||
336 | } | ||
337 | |||
338 | /** | ||
339 | * qib_req_notify_cq - change the notification type for a completion queue | ||
340 | * @ibcq: the completion queue | ||
341 | * @notify_flags: the type of notification to request | ||
342 | * | ||
343 | * Returns 0 for success. | ||
344 | * | ||
345 | * This may be called from interrupt context. Also called by | ||
346 | * ib_req_notify_cq() in the generic verbs code. | ||
347 | */ | ||
348 | int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) | ||
349 | { | ||
350 | struct qib_cq *cq = to_icq(ibcq); | ||
351 | unsigned long flags; | ||
352 | int ret = 0; | ||
353 | |||
354 | spin_lock_irqsave(&cq->lock, flags); | ||
355 | /* | ||
356 | * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow | ||
357 | * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). | ||
358 | */ | ||
359 | if (cq->notify != IB_CQ_NEXT_COMP) | ||
360 | cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; | ||
361 | |||
362 | if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && | ||
363 | cq->queue->head != cq->queue->tail) | ||
364 | ret = 1; | ||
365 | |||
366 | spin_unlock_irqrestore(&cq->lock, flags); | ||
367 | |||
368 | return ret; | ||
369 | } | ||
370 | |||
371 | /** | ||
372 | * qib_resize_cq - change the size of the CQ | ||
373 | * @ibcq: the completion queue | ||
374 | * | ||
375 | * Returns 0 for success. | ||
376 | */ | ||
377 | int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) | ||
378 | { | ||
379 | struct qib_cq *cq = to_icq(ibcq); | ||
380 | struct qib_cq_wc *old_wc; | ||
381 | struct qib_cq_wc *wc; | ||
382 | u32 head, tail, n; | ||
383 | int ret; | ||
384 | u32 sz; | ||
385 | |||
386 | if (cqe < 1 || cqe > ib_qib_max_cqes) { | ||
387 | ret = -EINVAL; | ||
388 | goto bail; | ||
389 | } | ||
390 | |||
391 | /* | ||
392 | * Need to use vmalloc() if we want to support large #s of entries. | ||
393 | */ | ||
394 | sz = sizeof(*wc); | ||
395 | if (udata && udata->outlen >= sizeof(__u64)) | ||
396 | sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); | ||
397 | else | ||
398 | sz += sizeof(struct ib_wc) * (cqe + 1); | ||
399 | wc = vmalloc_user(sz); | ||
400 | if (!wc) { | ||
401 | ret = -ENOMEM; | ||
402 | goto bail; | ||
403 | } | ||
404 | |||
405 | /* Check that we can write the offset to mmap. */ | ||
406 | if (udata && udata->outlen >= sizeof(__u64)) { | ||
407 | __u64 offset = 0; | ||
408 | |||
409 | ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); | ||
410 | if (ret) | ||
411 | goto bail_free; | ||
412 | } | ||
413 | |||
414 | spin_lock_irq(&cq->lock); | ||
415 | /* | ||
416 | * Make sure head and tail are sane since they | ||
417 | * might be user writable. | ||
418 | */ | ||
419 | old_wc = cq->queue; | ||
420 | head = old_wc->head; | ||
421 | if (head > (u32) cq->ibcq.cqe) | ||
422 | head = (u32) cq->ibcq.cqe; | ||
423 | tail = old_wc->tail; | ||
424 | if (tail > (u32) cq->ibcq.cqe) | ||
425 | tail = (u32) cq->ibcq.cqe; | ||
426 | if (head < tail) | ||
427 | n = cq->ibcq.cqe + 1 + head - tail; | ||
428 | else | ||
429 | n = head - tail; | ||
430 | if (unlikely((u32)cqe < n)) { | ||
431 | ret = -EINVAL; | ||
432 | goto bail_unlock; | ||
433 | } | ||
434 | for (n = 0; tail != head; n++) { | ||
435 | if (cq->ip) | ||
436 | wc->uqueue[n] = old_wc->uqueue[tail]; | ||
437 | else | ||
438 | wc->kqueue[n] = old_wc->kqueue[tail]; | ||
439 | if (tail == (u32) cq->ibcq.cqe) | ||
440 | tail = 0; | ||
441 | else | ||
442 | tail++; | ||
443 | } | ||
444 | cq->ibcq.cqe = cqe; | ||
445 | wc->head = n; | ||
446 | wc->tail = 0; | ||
447 | cq->queue = wc; | ||
448 | spin_unlock_irq(&cq->lock); | ||
449 | |||
450 | vfree(old_wc); | ||
451 | |||
452 | if (cq->ip) { | ||
453 | struct qib_ibdev *dev = to_idev(ibcq->device); | ||
454 | struct qib_mmap_info *ip = cq->ip; | ||
455 | |||
456 | qib_update_mmap_info(dev, ip, sz, wc); | ||
457 | |||
458 | /* | ||
459 | * Return the offset to mmap. | ||
460 | * See qib_mmap() for details. | ||
461 | */ | ||
462 | if (udata && udata->outlen >= sizeof(__u64)) { | ||
463 | ret = ib_copy_to_udata(udata, &ip->offset, | ||
464 | sizeof(ip->offset)); | ||
465 | if (ret) | ||
466 | goto bail; | ||
467 | } | ||
468 | |||
469 | spin_lock_irq(&dev->pending_lock); | ||
470 | if (list_empty(&ip->pending_mmaps)) | ||
471 | list_add(&ip->pending_mmaps, &dev->pending_mmaps); | ||
472 | spin_unlock_irq(&dev->pending_lock); | ||
473 | } | ||
474 | |||
475 | ret = 0; | ||
476 | goto bail; | ||
477 | |||
478 | bail_unlock: | ||
479 | spin_unlock_irq(&cq->lock); | ||
480 | bail_free: | ||
481 | vfree(wc); | ||
482 | bail: | ||
483 | return ret; | ||
484 | } | ||