diff options
author | Steve Wise <swise@opengridcomputing.com> | 2010-06-10 15:03:00 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2010-07-21 14:16:20 -0400 |
commit | d37ac31ddc24c1a0beed134278bc074c98812210 (patch) | |
tree | 20b61b408fb31cd4b16d50c73d0445784a1255cd /drivers/infiniband/hw/cxgb4/t4.h | |
parent | d3c814e8b2a094dc3bcbe6a0d93ec4824b26e86a (diff) |
RDMA/cxgb4: Support variable sized work requests
T4 EQ entries are in multiples of 64 bytes. Currently the RDMA SQ and
RQ use fixed sized entries composed of 4 EQ entries for the SQ and 2
EQ entries for the RQ. For optimial latency with small IO, we need to
change this so the HW only needs to DMA the EQ entries actually used
by a given work request.
Implementation:
- add wq_pidx counter to track where we are in the EQ. cidx/pidx are
used for the sw sq/rq tracking and flow control.
- the variable part of work requests is the SGL. Add new functions to
build the SGL and/or immediate data directly in the EQ memory
wrapping when needed.
- adjust the min burst size for the EQ contexts to 64B.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/cxgb4/t4.h')
-rw-r--r-- | drivers/infiniband/hw/cxgb4/t4.h | 32 |
1 files changed, 15 insertions, 17 deletions
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 9cf8d85bfcff..aef55f42bea4 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h | |||
@@ -65,10 +65,10 @@ struct t4_status_page { | |||
65 | u8 db_off; | 65 | u8 db_off; |
66 | }; | 66 | }; |
67 | 67 | ||
68 | #define T4_EQ_SIZE 64 | 68 | #define T4_EQ_ENTRY_SIZE 64 |
69 | 69 | ||
70 | #define T4_SQ_NUM_SLOTS 4 | 70 | #define T4_SQ_NUM_SLOTS 4 |
71 | #define T4_SQ_NUM_BYTES (T4_EQ_SIZE * T4_SQ_NUM_SLOTS) | 71 | #define T4_SQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_SQ_NUM_SLOTS) |
72 | #define T4_MAX_SEND_SGE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ | 72 | #define T4_MAX_SEND_SGE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ |
73 | sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) | 73 | sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) |
74 | #define T4_MAX_SEND_INLINE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ | 74 | #define T4_MAX_SEND_INLINE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ |
@@ -84,7 +84,7 @@ struct t4_status_page { | |||
84 | #define T4_MAX_FR_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) | 84 | #define T4_MAX_FR_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) |
85 | 85 | ||
86 | #define T4_RQ_NUM_SLOTS 2 | 86 | #define T4_RQ_NUM_SLOTS 2 |
87 | #define T4_RQ_NUM_BYTES (T4_EQ_SIZE * T4_RQ_NUM_SLOTS) | 87 | #define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS) |
88 | #define T4_MAX_RECV_SGE 4 | 88 | #define T4_MAX_RECV_SGE 4 |
89 | 89 | ||
90 | union t4_wr { | 90 | union t4_wr { |
@@ -97,20 +97,18 @@ union t4_wr { | |||
97 | struct fw_ri_fr_nsmr_wr fr; | 97 | struct fw_ri_fr_nsmr_wr fr; |
98 | struct fw_ri_inv_lstag_wr inv; | 98 | struct fw_ri_inv_lstag_wr inv; |
99 | struct t4_status_page status; | 99 | struct t4_status_page status; |
100 | __be64 flits[T4_EQ_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; | 100 | __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; |
101 | }; | 101 | }; |
102 | 102 | ||
103 | union t4_recv_wr { | 103 | union t4_recv_wr { |
104 | struct fw_ri_recv_wr recv; | 104 | struct fw_ri_recv_wr recv; |
105 | struct t4_status_page status; | 105 | struct t4_status_page status; |
106 | __be64 flits[T4_EQ_SIZE / sizeof(__be64) * T4_RQ_NUM_SLOTS]; | 106 | __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_RQ_NUM_SLOTS]; |
107 | }; | 107 | }; |
108 | 108 | ||
109 | static inline void init_wr_hdr(union t4_wr *wqe, u16 wrid, | 109 | static inline void init_wr_hdr(union t4_wr *wqe, u16 wrid, |
110 | enum fw_wr_opcodes opcode, u8 flags, u8 len16) | 110 | enum fw_wr_opcodes opcode, u8 flags, u8 len16) |
111 | { | 111 | { |
112 | int slots_used; | ||
113 | |||
114 | wqe->send.opcode = (u8)opcode; | 112 | wqe->send.opcode = (u8)opcode; |
115 | wqe->send.flags = flags; | 113 | wqe->send.flags = flags; |
116 | wqe->send.wrid = wrid; | 114 | wqe->send.wrid = wrid; |
@@ -118,12 +116,6 @@ static inline void init_wr_hdr(union t4_wr *wqe, u16 wrid, | |||
118 | wqe->send.r1[1] = 0; | 116 | wqe->send.r1[1] = 0; |
119 | wqe->send.r1[2] = 0; | 117 | wqe->send.r1[2] = 0; |
120 | wqe->send.len16 = len16; | 118 | wqe->send.len16 = len16; |
121 | |||
122 | slots_used = DIV_ROUND_UP(len16*16, T4_EQ_SIZE); | ||
123 | while (slots_used < T4_SQ_NUM_SLOTS) { | ||
124 | wqe->flits[slots_used * T4_EQ_SIZE / sizeof(__be64)] = 0; | ||
125 | slots_used++; | ||
126 | } | ||
127 | } | 119 | } |
128 | 120 | ||
129 | /* CQE/AE status codes */ | 121 | /* CQE/AE status codes */ |
@@ -289,6 +281,7 @@ struct t4_sq { | |||
289 | u16 size; | 281 | u16 size; |
290 | u16 cidx; | 282 | u16 cidx; |
291 | u16 pidx; | 283 | u16 pidx; |
284 | u16 wq_pidx; | ||
292 | }; | 285 | }; |
293 | 286 | ||
294 | struct t4_swrqe { | 287 | struct t4_swrqe { |
@@ -310,6 +303,7 @@ struct t4_rq { | |||
310 | u16 size; | 303 | u16 size; |
311 | u16 cidx; | 304 | u16 cidx; |
312 | u16 pidx; | 305 | u16 pidx; |
306 | u16 wq_pidx; | ||
313 | }; | 307 | }; |
314 | 308 | ||
315 | struct t4_wq { | 309 | struct t4_wq { |
@@ -340,11 +334,14 @@ static inline u32 t4_rq_avail(struct t4_wq *wq) | |||
340 | return wq->rq.size - 1 - wq->rq.in_use; | 334 | return wq->rq.size - 1 - wq->rq.in_use; |
341 | } | 335 | } |
342 | 336 | ||
343 | static inline void t4_rq_produce(struct t4_wq *wq) | 337 | static inline void t4_rq_produce(struct t4_wq *wq, u8 len16) |
344 | { | 338 | { |
345 | wq->rq.in_use++; | 339 | wq->rq.in_use++; |
346 | if (++wq->rq.pidx == wq->rq.size) | 340 | if (++wq->rq.pidx == wq->rq.size) |
347 | wq->rq.pidx = 0; | 341 | wq->rq.pidx = 0; |
342 | wq->rq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); | ||
343 | if (wq->rq.wq_pidx >= wq->rq.size * T4_RQ_NUM_SLOTS) | ||
344 | wq->rq.wq_pidx %= wq->rq.size * T4_RQ_NUM_SLOTS; | ||
348 | } | 345 | } |
349 | 346 | ||
350 | static inline void t4_rq_consume(struct t4_wq *wq) | 347 | static inline void t4_rq_consume(struct t4_wq *wq) |
@@ -370,11 +367,14 @@ static inline u32 t4_sq_avail(struct t4_wq *wq) | |||
370 | return wq->sq.size - 1 - wq->sq.in_use; | 367 | return wq->sq.size - 1 - wq->sq.in_use; |
371 | } | 368 | } |
372 | 369 | ||
373 | static inline void t4_sq_produce(struct t4_wq *wq) | 370 | static inline void t4_sq_produce(struct t4_wq *wq, u8 len16) |
374 | { | 371 | { |
375 | wq->sq.in_use++; | 372 | wq->sq.in_use++; |
376 | if (++wq->sq.pidx == wq->sq.size) | 373 | if (++wq->sq.pidx == wq->sq.size) |
377 | wq->sq.pidx = 0; | 374 | wq->sq.pidx = 0; |
375 | wq->sq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); | ||
376 | if (wq->sq.wq_pidx >= wq->sq.size * T4_SQ_NUM_SLOTS) | ||
377 | wq->sq.wq_pidx %= wq->sq.size * T4_SQ_NUM_SLOTS; | ||
378 | } | 378 | } |
379 | 379 | ||
380 | static inline void t4_sq_consume(struct t4_wq *wq) | 380 | static inline void t4_sq_consume(struct t4_wq *wq) |
@@ -386,14 +386,12 @@ static inline void t4_sq_consume(struct t4_wq *wq) | |||
386 | 386 | ||
387 | static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc) | 387 | static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc) |
388 | { | 388 | { |
389 | inc *= T4_SQ_NUM_SLOTS; | ||
390 | wmb(); | 389 | wmb(); |
391 | writel(QID(wq->sq.qid) | PIDX(inc), wq->db); | 390 | writel(QID(wq->sq.qid) | PIDX(inc), wq->db); |
392 | } | 391 | } |
393 | 392 | ||
394 | static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc) | 393 | static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc) |
395 | { | 394 | { |
396 | inc *= T4_RQ_NUM_SLOTS; | ||
397 | wmb(); | 395 | wmb(); |
398 | writel(QID(wq->rq.qid) | PIDX(inc), wq->db); | 396 | writel(QID(wq->rq.qid) | PIDX(inc), wq->db); |
399 | } | 397 | } |