aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/cxgb4/t4.h
diff options
context:
space:
mode:
authorSteve Wise <swise@opengridcomputing.com>2010-06-10 15:03:00 -0400
committerRoland Dreier <rolandd@cisco.com>2010-07-21 14:16:20 -0400
commitd37ac31ddc24c1a0beed134278bc074c98812210 (patch)
tree20b61b408fb31cd4b16d50c73d0445784a1255cd /drivers/infiniband/hw/cxgb4/t4.h
parentd3c814e8b2a094dc3bcbe6a0d93ec4824b26e86a (diff)
RDMA/cxgb4: Support variable sized work requests
T4 EQ entries are in multiples of 64 bytes. Currently the RDMA SQ and RQ use fixed sized entries composed of 4 EQ entries for the SQ and 2 EQ entries for the RQ. For optimial latency with small IO, we need to change this so the HW only needs to DMA the EQ entries actually used by a given work request. Implementation: - add wq_pidx counter to track where we are in the EQ. cidx/pidx are used for the sw sq/rq tracking and flow control. - the variable part of work requests is the SGL. Add new functions to build the SGL and/or immediate data directly in the EQ memory wrapping when needed. - adjust the min burst size for the EQ contexts to 64B. Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/cxgb4/t4.h')
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h32
1 files changed, 15 insertions, 17 deletions
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 9cf8d85bfcff..aef55f42bea4 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -65,10 +65,10 @@ struct t4_status_page {
65 u8 db_off; 65 u8 db_off;
66}; 66};
67 67
68#define T4_EQ_SIZE 64 68#define T4_EQ_ENTRY_SIZE 64
69 69
70#define T4_SQ_NUM_SLOTS 4 70#define T4_SQ_NUM_SLOTS 4
71#define T4_SQ_NUM_BYTES (T4_EQ_SIZE * T4_SQ_NUM_SLOTS) 71#define T4_SQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_SQ_NUM_SLOTS)
72#define T4_MAX_SEND_SGE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ 72#define T4_MAX_SEND_SGE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \
73 sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) 73 sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge))
74#define T4_MAX_SEND_INLINE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ 74#define T4_MAX_SEND_INLINE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \
@@ -84,7 +84,7 @@ struct t4_status_page {
84#define T4_MAX_FR_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) 84#define T4_MAX_FR_DEPTH (T4_MAX_FR_IMMD / sizeof(u64))
85 85
86#define T4_RQ_NUM_SLOTS 2 86#define T4_RQ_NUM_SLOTS 2
87#define T4_RQ_NUM_BYTES (T4_EQ_SIZE * T4_RQ_NUM_SLOTS) 87#define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS)
88#define T4_MAX_RECV_SGE 4 88#define T4_MAX_RECV_SGE 4
89 89
90union t4_wr { 90union t4_wr {
@@ -97,20 +97,18 @@ union t4_wr {
97 struct fw_ri_fr_nsmr_wr fr; 97 struct fw_ri_fr_nsmr_wr fr;
98 struct fw_ri_inv_lstag_wr inv; 98 struct fw_ri_inv_lstag_wr inv;
99 struct t4_status_page status; 99 struct t4_status_page status;
100 __be64 flits[T4_EQ_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; 100 __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS];
101}; 101};
102 102
103union t4_recv_wr { 103union t4_recv_wr {
104 struct fw_ri_recv_wr recv; 104 struct fw_ri_recv_wr recv;
105 struct t4_status_page status; 105 struct t4_status_page status;
106 __be64 flits[T4_EQ_SIZE / sizeof(__be64) * T4_RQ_NUM_SLOTS]; 106 __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_RQ_NUM_SLOTS];
107}; 107};
108 108
109static inline void init_wr_hdr(union t4_wr *wqe, u16 wrid, 109static inline void init_wr_hdr(union t4_wr *wqe, u16 wrid,
110 enum fw_wr_opcodes opcode, u8 flags, u8 len16) 110 enum fw_wr_opcodes opcode, u8 flags, u8 len16)
111{ 111{
112 int slots_used;
113
114 wqe->send.opcode = (u8)opcode; 112 wqe->send.opcode = (u8)opcode;
115 wqe->send.flags = flags; 113 wqe->send.flags = flags;
116 wqe->send.wrid = wrid; 114 wqe->send.wrid = wrid;
@@ -118,12 +116,6 @@ static inline void init_wr_hdr(union t4_wr *wqe, u16 wrid,
118 wqe->send.r1[1] = 0; 116 wqe->send.r1[1] = 0;
119 wqe->send.r1[2] = 0; 117 wqe->send.r1[2] = 0;
120 wqe->send.len16 = len16; 118 wqe->send.len16 = len16;
121
122 slots_used = DIV_ROUND_UP(len16*16, T4_EQ_SIZE);
123 while (slots_used < T4_SQ_NUM_SLOTS) {
124 wqe->flits[slots_used * T4_EQ_SIZE / sizeof(__be64)] = 0;
125 slots_used++;
126 }
127} 119}
128 120
129/* CQE/AE status codes */ 121/* CQE/AE status codes */
@@ -289,6 +281,7 @@ struct t4_sq {
289 u16 size; 281 u16 size;
290 u16 cidx; 282 u16 cidx;
291 u16 pidx; 283 u16 pidx;
284 u16 wq_pidx;
292}; 285};
293 286
294struct t4_swrqe { 287struct t4_swrqe {
@@ -310,6 +303,7 @@ struct t4_rq {
310 u16 size; 303 u16 size;
311 u16 cidx; 304 u16 cidx;
312 u16 pidx; 305 u16 pidx;
306 u16 wq_pidx;
313}; 307};
314 308
315struct t4_wq { 309struct t4_wq {
@@ -340,11 +334,14 @@ static inline u32 t4_rq_avail(struct t4_wq *wq)
340 return wq->rq.size - 1 - wq->rq.in_use; 334 return wq->rq.size - 1 - wq->rq.in_use;
341} 335}
342 336
343static inline void t4_rq_produce(struct t4_wq *wq) 337static inline void t4_rq_produce(struct t4_wq *wq, u8 len16)
344{ 338{
345 wq->rq.in_use++; 339 wq->rq.in_use++;
346 if (++wq->rq.pidx == wq->rq.size) 340 if (++wq->rq.pidx == wq->rq.size)
347 wq->rq.pidx = 0; 341 wq->rq.pidx = 0;
342 wq->rq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
343 if (wq->rq.wq_pidx >= wq->rq.size * T4_RQ_NUM_SLOTS)
344 wq->rq.wq_pidx %= wq->rq.size * T4_RQ_NUM_SLOTS;
348} 345}
349 346
350static inline void t4_rq_consume(struct t4_wq *wq) 347static inline void t4_rq_consume(struct t4_wq *wq)
@@ -370,11 +367,14 @@ static inline u32 t4_sq_avail(struct t4_wq *wq)
370 return wq->sq.size - 1 - wq->sq.in_use; 367 return wq->sq.size - 1 - wq->sq.in_use;
371} 368}
372 369
373static inline void t4_sq_produce(struct t4_wq *wq) 370static inline void t4_sq_produce(struct t4_wq *wq, u8 len16)
374{ 371{
375 wq->sq.in_use++; 372 wq->sq.in_use++;
376 if (++wq->sq.pidx == wq->sq.size) 373 if (++wq->sq.pidx == wq->sq.size)
377 wq->sq.pidx = 0; 374 wq->sq.pidx = 0;
375 wq->sq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
376 if (wq->sq.wq_pidx >= wq->sq.size * T4_SQ_NUM_SLOTS)
377 wq->sq.wq_pidx %= wq->sq.size * T4_SQ_NUM_SLOTS;
378} 378}
379 379
380static inline void t4_sq_consume(struct t4_wq *wq) 380static inline void t4_sq_consume(struct t4_wq *wq)
@@ -386,14 +386,12 @@ static inline void t4_sq_consume(struct t4_wq *wq)
386 386
387static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc) 387static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc)
388{ 388{
389 inc *= T4_SQ_NUM_SLOTS;
390 wmb(); 389 wmb();
391 writel(QID(wq->sq.qid) | PIDX(inc), wq->db); 390 writel(QID(wq->sq.qid) | PIDX(inc), wq->db);
392} 391}
393 392
394static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc) 393static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc)
395{ 394{
396 inc *= T4_RQ_NUM_SLOTS;
397 wmb(); 395 wmb();
398 writel(QID(wq->rq.qid) | PIDX(inc), wq->db); 396 writel(QID(wq->rq.qid) | PIDX(inc), wq->db);
399} 397}