aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/ipath/ipath_verbs.h
diff options
context:
space:
mode:
authorRalph Campbell <ralph.campbell@qlogic.com>2006-09-22 18:22:26 -0400
committerRoland Dreier <rolandd@cisco.com>2006-09-22 18:22:26 -0400
commit373d9915803aebbbf7fd3841efd9dac31c32e148 (patch)
treebd0b05d5e78eab4471bc4d623a880013693077ea /drivers/infiniband/hw/ipath/ipath_verbs.h
parent9bc57e2d19db4da81c1150120658cc3658a99ed4 (diff)
IB/ipath: Performance improvements via mmap of queues
Improve performance of userspace post receive, post SRQ receive, and poll CQ operations for ipath by allowing userspace to directly mmap() receive queues and completion queues. This eliminates the copying between userspace and the kernel in the data path. Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/ipath/ipath_verbs.h')
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h115
1 files changed, 59 insertions, 56 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 698396778f00..7d2ba72609f7 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -38,6 +38,7 @@
38#include <linux/spinlock.h> 38#include <linux/spinlock.h>
39#include <linux/kernel.h> 39#include <linux/kernel.h>
40#include <linux/interrupt.h> 40#include <linux/interrupt.h>
41#include <linux/kref.h>
41#include <rdma/ib_pack.h> 42#include <rdma/ib_pack.h>
42 43
43#include "ipath_layer.h" 44#include "ipath_layer.h"
@@ -50,7 +51,7 @@
50 * Increment this value if any changes that break userspace ABI 51 * Increment this value if any changes that break userspace ABI
51 * compatibility are made. 52 * compatibility are made.
52 */ 53 */
53#define IPATH_UVERBS_ABI_VERSION 1 54#define IPATH_UVERBS_ABI_VERSION 2
54 55
55/* 56/*
56 * Define an ib_cq_notify value that is not valid so we know when CQ 57 * Define an ib_cq_notify value that is not valid so we know when CQ
@@ -178,58 +179,41 @@ struct ipath_ah {
178}; 179};
179 180
180/* 181/*
181 * Quick description of our CQ/QP locking scheme: 182 * This structure is used by ipath_mmap() to validate an offset
182 * 183 * when an mmap() request is made. The vm_area_struct then uses
183 * We have one global lock that protects dev->cq/qp_table. Each 184 * this as its vm_private_data.
184 * struct ipath_cq/qp also has its own lock. An individual qp lock
185 * may be taken inside of an individual cq lock. Both cqs attached to
186 * a qp may be locked, with the send cq locked first. No other
187 * nesting should be done.
188 *
189 * Each struct ipath_cq/qp also has an atomic_t ref count. The
190 * pointer from the cq/qp_table to the struct counts as one reference.
191 * This reference also is good for access through the consumer API, so
192 * modifying the CQ/QP etc doesn't need to take another reference.
193 * Access because of a completion being polled does need a reference.
194 *
195 * Finally, each struct ipath_cq/qp has a wait_queue_head_t for the
196 * destroy function to sleep on.
197 *
198 * This means that access from the consumer API requires nothing but
199 * taking the struct's lock.
200 *
201 * Access because of a completion event should go as follows:
202 * - lock cq/qp_table and look up struct
203 * - increment ref count in struct
204 * - drop cq/qp_table lock
205 * - lock struct, do your thing, and unlock struct
206 * - decrement ref count; if zero, wake up waiters
207 *
208 * To destroy a CQ/QP, we can do the following:
209 * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock
210 * - decrement ref count
211 * - wait_event until ref count is zero
212 *
213 * It is the consumer's responsibilty to make sure that no QP
214 * operations (WQE posting or state modification) are pending when the
215 * QP is destroyed. Also, the consumer must make sure that calls to
216 * qp_modify are serialized.
217 *
218 * Possible optimizations (wait for profile data to see if/where we
219 * have locks bouncing between CPUs):
220 * - split cq/qp table lock into n separate (cache-aligned) locks,
221 * indexed (say) by the page in the table
222 */ 185 */
186struct ipath_mmap_info {
187 struct ipath_mmap_info *next;
188 struct ib_ucontext *context;
189 void *obj;
190 struct kref ref;
191 unsigned size;
192 unsigned mmap_cnt;
193};
223 194
195/*
196 * This structure is used to contain the head pointer, tail pointer,
197 * and completion queue entries as a single memory allocation so
198 * it can be mmap'ed into user space.
199 */
200struct ipath_cq_wc {
201 u32 head; /* index of next entry to fill */
202 u32 tail; /* index of next ib_poll_cq() entry */
203 struct ib_wc queue[1]; /* this is actually size ibcq.cqe + 1 */
204};
205
206/*
207 * The completion queue structure.
208 */
224struct ipath_cq { 209struct ipath_cq {
225 struct ib_cq ibcq; 210 struct ib_cq ibcq;
226 struct tasklet_struct comptask; 211 struct tasklet_struct comptask;
227 spinlock_t lock; 212 spinlock_t lock;
228 u8 notify; 213 u8 notify;
229 u8 triggered; 214 u8 triggered;
230 u32 head; /* new records added to the head */ 215 struct ipath_cq_wc *queue;
231 u32 tail; /* poll_cq() reads from here. */ 216 struct ipath_mmap_info *ip;
232 struct ib_wc *queue; /* this is actually ibcq.cqe + 1 */
233}; 217};
234 218
235/* 219/*
@@ -248,28 +232,40 @@ struct ipath_swqe {
248 232
249/* 233/*
250 * Receive work request queue entry. 234 * Receive work request queue entry.
251 * The size of the sg_list is determined when the QP is created and stored 235 * The size of the sg_list is determined when the QP (or SRQ) is created
252 * in qp->r_max_sge. 236 * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
253 */ 237 */
254struct ipath_rwqe { 238struct ipath_rwqe {
255 u64 wr_id; 239 u64 wr_id;
256 u32 length; /* total length of data in sg_list */
257 u8 num_sge; 240 u8 num_sge;
258 struct ipath_sge sg_list[0]; 241 struct ib_sge sg_list[0];
259}; 242};
260 243
261struct ipath_rq { 244/*
262 spinlock_t lock; 245 * This structure is used to contain the head pointer, tail pointer,
246 * and receive work queue entries as a single memory allocation so
247 * it can be mmap'ed into user space.
248 * Note that the wq array elements are variable size so you can't
249 * just index into the array to get the N'th element;
250 * use get_rwqe_ptr() instead.
251 */
252struct ipath_rwq {
263 u32 head; /* new work requests posted to the head */ 253 u32 head; /* new work requests posted to the head */
264 u32 tail; /* receives pull requests from here. */ 254 u32 tail; /* receives pull requests from here. */
255 struct ipath_rwqe wq[0];
256};
257
258struct ipath_rq {
259 struct ipath_rwq *wq;
260 spinlock_t lock;
265 u32 size; /* size of RWQE array */ 261 u32 size; /* size of RWQE array */
266 u8 max_sge; 262 u8 max_sge;
267 struct ipath_rwqe *wq; /* RWQE array */
268}; 263};
269 264
270struct ipath_srq { 265struct ipath_srq {
271 struct ib_srq ibsrq; 266 struct ib_srq ibsrq;
272 struct ipath_rq rq; 267 struct ipath_rq rq;
268 struct ipath_mmap_info *ip;
273 /* send signal when number of RWQEs < limit */ 269 /* send signal when number of RWQEs < limit */
274 u32 limit; 270 u32 limit;
275}; 271};
@@ -293,6 +289,7 @@ struct ipath_qp {
293 atomic_t refcount; 289 atomic_t refcount;
294 wait_queue_head_t wait; 290 wait_queue_head_t wait;
295 struct tasklet_struct s_task; 291 struct tasklet_struct s_task;
292 struct ipath_mmap_info *ip;
296 struct ipath_sge_state *s_cur_sge; 293 struct ipath_sge_state *s_cur_sge;
297 struct ipath_sge_state s_sge; /* current send request data */ 294 struct ipath_sge_state s_sge; /* current send request data */
298 /* current RDMA read send data */ 295 /* current RDMA read send data */
@@ -345,7 +342,8 @@ struct ipath_qp {
345 u32 s_ssn; /* SSN of tail entry */ 342 u32 s_ssn; /* SSN of tail entry */
346 u32 s_lsn; /* limit sequence number (credit) */ 343 u32 s_lsn; /* limit sequence number (credit) */
347 struct ipath_swqe *s_wq; /* send work queue */ 344 struct ipath_swqe *s_wq; /* send work queue */
348 struct ipath_rq r_rq; /* receive work queue */ 345 struct ipath_rq r_rq; /* receive work queue */
346 struct ipath_sge r_sg_list[0]; /* verified SGEs */
349}; 347};
350 348
351/* 349/*
@@ -369,15 +367,15 @@ static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp,
369 367
370/* 368/*
371 * Since struct ipath_rwqe is not a fixed size, we can't simply index into 369 * Since struct ipath_rwqe is not a fixed size, we can't simply index into
372 * struct ipath_rq.wq. This function does the array index computation. 370 * struct ipath_rwq.wq. This function does the array index computation.
373 */ 371 */
374static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq, 372static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq,
375 unsigned n) 373 unsigned n)
376{ 374{
377 return (struct ipath_rwqe *) 375 return (struct ipath_rwqe *)
378 ((char *) rq->wq + 376 ((char *) rq->wq->wq +
379 (sizeof(struct ipath_rwqe) + 377 (sizeof(struct ipath_rwqe) +
380 rq->max_sge * sizeof(struct ipath_sge)) * n); 378 rq->max_sge * sizeof(struct ib_sge)) * n);
381} 379}
382 380
383/* 381/*
@@ -417,6 +415,7 @@ struct ipath_ibdev {
417 struct ib_device ibdev; 415 struct ib_device ibdev;
418 struct list_head dev_list; 416 struct list_head dev_list;
419 struct ipath_devdata *dd; 417 struct ipath_devdata *dd;
418 struct ipath_mmap_info *pending_mmaps;
420 int ib_unit; /* This is the device number */ 419 int ib_unit; /* This is the device number */
421 u16 sm_lid; /* in host order */ 420 u16 sm_lid; /* in host order */
422 u8 sm_sl; 421 u8 sm_sl;
@@ -681,6 +680,10 @@ int ipath_unmap_fmr(struct list_head *fmr_list);
681 680
682int ipath_dealloc_fmr(struct ib_fmr *ibfmr); 681int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
683 682
683void ipath_release_mmap_info(struct kref *ref);
684
685int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
686
684void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev); 687void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev);
685 688
686void ipath_insert_rnr_queue(struct ipath_qp *qp); 689void ipath_insert_rnr_queue(struct ipath_qp *qp);