diff options
author | Ralph Campbell <ralph.campbell@qlogic.com> | 2006-09-22 18:22:26 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2006-09-22 18:22:26 -0400 |
commit | 373d9915803aebbbf7fd3841efd9dac31c32e148 (patch) | |
tree | bd0b05d5e78eab4471bc4d623a880013693077ea /drivers/infiniband/hw/ipath/ipath_verbs.h | |
parent | 9bc57e2d19db4da81c1150120658cc3658a99ed4 (diff) |
IB/ipath: Performance improvements via mmap of queues
Improve performance of userspace post receive, post SRQ receive, and
poll CQ operations for ipath by allowing userspace to directly mmap()
receive queues and completion queues. This eliminates the copying
between userspace and the kernel in the data path.
Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/ipath/ipath_verbs.h')
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_verbs.h | 115 |
1 files changed, 59 insertions, 56 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 698396778f00..7d2ba72609f7 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/spinlock.h> | 38 | #include <linux/spinlock.h> |
39 | #include <linux/kernel.h> | 39 | #include <linux/kernel.h> |
40 | #include <linux/interrupt.h> | 40 | #include <linux/interrupt.h> |
41 | #include <linux/kref.h> | ||
41 | #include <rdma/ib_pack.h> | 42 | #include <rdma/ib_pack.h> |
42 | 43 | ||
43 | #include "ipath_layer.h" | 44 | #include "ipath_layer.h" |
@@ -50,7 +51,7 @@ | |||
50 | * Increment this value if any changes that break userspace ABI | 51 | * Increment this value if any changes that break userspace ABI |
51 | * compatibility are made. | 52 | * compatibility are made. |
52 | */ | 53 | */ |
53 | #define IPATH_UVERBS_ABI_VERSION 1 | 54 | #define IPATH_UVERBS_ABI_VERSION 2 |
54 | 55 | ||
55 | /* | 56 | /* |
56 | * Define an ib_cq_notify value that is not valid so we know when CQ | 57 | * Define an ib_cq_notify value that is not valid so we know when CQ |
@@ -178,58 +179,41 @@ struct ipath_ah { | |||
178 | }; | 179 | }; |
179 | 180 | ||
180 | /* | 181 | /* |
181 | * Quick description of our CQ/QP locking scheme: | 182 | * This structure is used by ipath_mmap() to validate an offset |
182 | * | 183 | * when an mmap() request is made. The vm_area_struct then uses |
183 | * We have one global lock that protects dev->cq/qp_table. Each | 184 | * this as its vm_private_data. |
184 | * struct ipath_cq/qp also has its own lock. An individual qp lock | ||
185 | * may be taken inside of an individual cq lock. Both cqs attached to | ||
186 | * a qp may be locked, with the send cq locked first. No other | ||
187 | * nesting should be done. | ||
188 | * | ||
189 | * Each struct ipath_cq/qp also has an atomic_t ref count. The | ||
190 | * pointer from the cq/qp_table to the struct counts as one reference. | ||
191 | * This reference also is good for access through the consumer API, so | ||
192 | * modifying the CQ/QP etc doesn't need to take another reference. | ||
193 | * Access because of a completion being polled does need a reference. | ||
194 | * | ||
195 | * Finally, each struct ipath_cq/qp has a wait_queue_head_t for the | ||
196 | * destroy function to sleep on. | ||
197 | * | ||
198 | * This means that access from the consumer API requires nothing but | ||
199 | * taking the struct's lock. | ||
200 | * | ||
201 | * Access because of a completion event should go as follows: | ||
202 | * - lock cq/qp_table and look up struct | ||
203 | * - increment ref count in struct | ||
204 | * - drop cq/qp_table lock | ||
205 | * - lock struct, do your thing, and unlock struct | ||
206 | * - decrement ref count; if zero, wake up waiters | ||
207 | * | ||
208 | * To destroy a CQ/QP, we can do the following: | ||
209 | * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock | ||
210 | * - decrement ref count | ||
211 | * - wait_event until ref count is zero | ||
212 | * | ||
213 | * It is the consumer's responsibilty to make sure that no QP | ||
214 | * operations (WQE posting or state modification) are pending when the | ||
215 | * QP is destroyed. Also, the consumer must make sure that calls to | ||
216 | * qp_modify are serialized. | ||
217 | * | ||
218 | * Possible optimizations (wait for profile data to see if/where we | ||
219 | * have locks bouncing between CPUs): | ||
220 | * - split cq/qp table lock into n separate (cache-aligned) locks, | ||
221 | * indexed (say) by the page in the table | ||
222 | */ | 185 | */ |
186 | struct ipath_mmap_info { | ||
187 | struct ipath_mmap_info *next; | ||
188 | struct ib_ucontext *context; | ||
189 | void *obj; | ||
190 | struct kref ref; | ||
191 | unsigned size; | ||
192 | unsigned mmap_cnt; | ||
193 | }; | ||
223 | 194 | ||
195 | /* | ||
196 | * This structure is used to contain the head pointer, tail pointer, | ||
197 | * and completion queue entries as a single memory allocation so | ||
198 | * it can be mmap'ed into user space. | ||
199 | */ | ||
200 | struct ipath_cq_wc { | ||
201 | u32 head; /* index of next entry to fill */ | ||
202 | u32 tail; /* index of next ib_poll_cq() entry */ | ||
203 | struct ib_wc queue[1]; /* this is actually size ibcq.cqe + 1 */ | ||
204 | }; | ||
205 | |||
206 | /* | ||
207 | * The completion queue structure. | ||
208 | */ | ||
224 | struct ipath_cq { | 209 | struct ipath_cq { |
225 | struct ib_cq ibcq; | 210 | struct ib_cq ibcq; |
226 | struct tasklet_struct comptask; | 211 | struct tasklet_struct comptask; |
227 | spinlock_t lock; | 212 | spinlock_t lock; |
228 | u8 notify; | 213 | u8 notify; |
229 | u8 triggered; | 214 | u8 triggered; |
230 | u32 head; /* new records added to the head */ | 215 | struct ipath_cq_wc *queue; |
231 | u32 tail; /* poll_cq() reads from here. */ | 216 | struct ipath_mmap_info *ip; |
232 | struct ib_wc *queue; /* this is actually ibcq.cqe + 1 */ | ||
233 | }; | 217 | }; |
234 | 218 | ||
235 | /* | 219 | /* |
@@ -248,28 +232,40 @@ struct ipath_swqe { | |||
248 | 232 | ||
249 | /* | 233 | /* |
250 | * Receive work request queue entry. | 234 | * Receive work request queue entry. |
251 | * The size of the sg_list is determined when the QP is created and stored | 235 | * The size of the sg_list is determined when the QP (or SRQ) is created |
252 | * in qp->r_max_sge. | 236 | * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). |
253 | */ | 237 | */ |
254 | struct ipath_rwqe { | 238 | struct ipath_rwqe { |
255 | u64 wr_id; | 239 | u64 wr_id; |
256 | u32 length; /* total length of data in sg_list */ | ||
257 | u8 num_sge; | 240 | u8 num_sge; |
258 | struct ipath_sge sg_list[0]; | 241 | struct ib_sge sg_list[0]; |
259 | }; | 242 | }; |
260 | 243 | ||
261 | struct ipath_rq { | 244 | /* |
262 | spinlock_t lock; | 245 | * This structure is used to contain the head pointer, tail pointer, |
246 | * and receive work queue entries as a single memory allocation so | ||
247 | * it can be mmap'ed into user space. | ||
248 | * Note that the wq array elements are variable size so you can't | ||
249 | * just index into the array to get the N'th element; | ||
250 | * use get_rwqe_ptr() instead. | ||
251 | */ | ||
252 | struct ipath_rwq { | ||
263 | u32 head; /* new work requests posted to the head */ | 253 | u32 head; /* new work requests posted to the head */ |
264 | u32 tail; /* receives pull requests from here. */ | 254 | u32 tail; /* receives pull requests from here. */ |
255 | struct ipath_rwqe wq[0]; | ||
256 | }; | ||
257 | |||
258 | struct ipath_rq { | ||
259 | struct ipath_rwq *wq; | ||
260 | spinlock_t lock; | ||
265 | u32 size; /* size of RWQE array */ | 261 | u32 size; /* size of RWQE array */ |
266 | u8 max_sge; | 262 | u8 max_sge; |
267 | struct ipath_rwqe *wq; /* RWQE array */ | ||
268 | }; | 263 | }; |
269 | 264 | ||
270 | struct ipath_srq { | 265 | struct ipath_srq { |
271 | struct ib_srq ibsrq; | 266 | struct ib_srq ibsrq; |
272 | struct ipath_rq rq; | 267 | struct ipath_rq rq; |
268 | struct ipath_mmap_info *ip; | ||
273 | /* send signal when number of RWQEs < limit */ | 269 | /* send signal when number of RWQEs < limit */ |
274 | u32 limit; | 270 | u32 limit; |
275 | }; | 271 | }; |
@@ -293,6 +289,7 @@ struct ipath_qp { | |||
293 | atomic_t refcount; | 289 | atomic_t refcount; |
294 | wait_queue_head_t wait; | 290 | wait_queue_head_t wait; |
295 | struct tasklet_struct s_task; | 291 | struct tasklet_struct s_task; |
292 | struct ipath_mmap_info *ip; | ||
296 | struct ipath_sge_state *s_cur_sge; | 293 | struct ipath_sge_state *s_cur_sge; |
297 | struct ipath_sge_state s_sge; /* current send request data */ | 294 | struct ipath_sge_state s_sge; /* current send request data */ |
298 | /* current RDMA read send data */ | 295 | /* current RDMA read send data */ |
@@ -345,7 +342,8 @@ struct ipath_qp { | |||
345 | u32 s_ssn; /* SSN of tail entry */ | 342 | u32 s_ssn; /* SSN of tail entry */ |
346 | u32 s_lsn; /* limit sequence number (credit) */ | 343 | u32 s_lsn; /* limit sequence number (credit) */ |
347 | struct ipath_swqe *s_wq; /* send work queue */ | 344 | struct ipath_swqe *s_wq; /* send work queue */ |
348 | struct ipath_rq r_rq; /* receive work queue */ | 345 | struct ipath_rq r_rq; /* receive work queue */ |
346 | struct ipath_sge r_sg_list[0]; /* verified SGEs */ | ||
349 | }; | 347 | }; |
350 | 348 | ||
351 | /* | 349 | /* |
@@ -369,15 +367,15 @@ static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp, | |||
369 | 367 | ||
370 | /* | 368 | /* |
371 | * Since struct ipath_rwqe is not a fixed size, we can't simply index into | 369 | * Since struct ipath_rwqe is not a fixed size, we can't simply index into |
372 | * struct ipath_rq.wq. This function does the array index computation. | 370 | * struct ipath_rwq.wq. This function does the array index computation. |
373 | */ | 371 | */ |
374 | static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq, | 372 | static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq, |
375 | unsigned n) | 373 | unsigned n) |
376 | { | 374 | { |
377 | return (struct ipath_rwqe *) | 375 | return (struct ipath_rwqe *) |
378 | ((char *) rq->wq + | 376 | ((char *) rq->wq->wq + |
379 | (sizeof(struct ipath_rwqe) + | 377 | (sizeof(struct ipath_rwqe) + |
380 | rq->max_sge * sizeof(struct ipath_sge)) * n); | 378 | rq->max_sge * sizeof(struct ib_sge)) * n); |
381 | } | 379 | } |
382 | 380 | ||
383 | /* | 381 | /* |
@@ -417,6 +415,7 @@ struct ipath_ibdev { | |||
417 | struct ib_device ibdev; | 415 | struct ib_device ibdev; |
418 | struct list_head dev_list; | 416 | struct list_head dev_list; |
419 | struct ipath_devdata *dd; | 417 | struct ipath_devdata *dd; |
418 | struct ipath_mmap_info *pending_mmaps; | ||
420 | int ib_unit; /* This is the device number */ | 419 | int ib_unit; /* This is the device number */ |
421 | u16 sm_lid; /* in host order */ | 420 | u16 sm_lid; /* in host order */ |
422 | u8 sm_sl; | 421 | u8 sm_sl; |
@@ -681,6 +680,10 @@ int ipath_unmap_fmr(struct list_head *fmr_list); | |||
681 | 680 | ||
682 | int ipath_dealloc_fmr(struct ib_fmr *ibfmr); | 681 | int ipath_dealloc_fmr(struct ib_fmr *ibfmr); |
683 | 682 | ||
683 | void ipath_release_mmap_info(struct kref *ref); | ||
684 | |||
685 | int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); | ||
686 | |||
684 | void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev); | 687 | void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev); |
685 | 688 | ||
686 | void ipath_insert_rnr_queue(struct ipath_qp *qp); | 689 | void ipath_insert_rnr_queue(struct ipath_qp *qp); |