diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-08 18:34:26 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-08 18:34:26 -0500 |
commit | f0e2dcffae8701f00b34bce90e762eb798dea5b1 (patch) | |
tree | f648533a633a2d065b3c9c569c4e9e3c6b2c2ea8 | |
parent | 04a94babd68952a4e3cdd54ebf8ce8891f9b0f2e (diff) | |
parent | 5128bdc97a1018aacac2550cf73bda61041cc3b8 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband:
IB/core: Remove unused struct ib_device.flags member
IB/core: Add IP checksum offload support
IPoIB: Add send gather support
IPoIB: Add high DMA feature flag
IB/mlx4: Use multiple WQ blocks to post smaller send WQEs
mlx4_core: Clean up struct mlx4_buf
mlx4_core: For 64-bit systems, vmap() kernel queue buffers
IB/mlx4: Consolidate code to get an entry from a struct mlx4_buf
-rw-r--r-- | drivers/infiniband/hw/mlx4/cq.c | 20 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/qp.c | 216 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/srq.c | 8 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib.h | 4 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_cm.c | 10 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_ib.c | 89 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 10 | ||||
-rw-r--r-- | drivers/net/mlx4/alloc.c | 48 | ||||
-rw-r--r-- | drivers/net/mlx4/mr.c | 4 | ||||
-rw-r--r-- | include/linux/mlx4/device.h | 19 | ||||
-rw-r--r-- | include/linux/mlx4/qp.h | 4 | ||||
-rw-r--r-- | include/rdma/ib_verbs.h | 16 |
14 files changed, 342 insertions, 112 deletions
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 7950aa6e8184..7360bbafbe84 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c | |||
@@ -64,13 +64,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type) | |||
64 | 64 | ||
65 | static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n) | 65 | static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n) |
66 | { | 66 | { |
67 | int offset = n * sizeof (struct mlx4_cqe); | 67 | return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe)); |
68 | |||
69 | if (buf->buf.nbufs == 1) | ||
70 | return buf->buf.u.direct.buf + offset; | ||
71 | else | ||
72 | return buf->buf.u.page_list[offset >> PAGE_SHIFT].buf + | ||
73 | (offset & (PAGE_SIZE - 1)); | ||
74 | } | 68 | } |
75 | 69 | ||
76 | static void *get_cqe(struct mlx4_ib_cq *cq, int n) | 70 | static void *get_cqe(struct mlx4_ib_cq *cq, int n) |
@@ -332,6 +326,12 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, | |||
332 | is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == | 326 | is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == |
333 | MLX4_CQE_OPCODE_ERROR; | 327 | MLX4_CQE_OPCODE_ERROR; |
334 | 328 | ||
329 | if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP && | ||
330 | is_send)) { | ||
331 | printk(KERN_WARNING "Completion for NOP opcode detected!\n"); | ||
332 | return -EINVAL; | ||
333 | } | ||
334 | |||
335 | if (!*cur_qp || | 335 | if (!*cur_qp || |
336 | (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (*cur_qp)->mqp.qpn) { | 336 | (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (*cur_qp)->mqp.qpn) { |
337 | /* | 337 | /* |
@@ -354,8 +354,10 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, | |||
354 | 354 | ||
355 | if (is_send) { | 355 | if (is_send) { |
356 | wq = &(*cur_qp)->sq; | 356 | wq = &(*cur_qp)->sq; |
357 | wqe_ctr = be16_to_cpu(cqe->wqe_index); | 357 | if (!(*cur_qp)->sq_signal_bits) { |
358 | wq->tail += (u16) (wqe_ctr - (u16) wq->tail); | 358 | wqe_ctr = be16_to_cpu(cqe->wqe_index); |
359 | wq->tail += (u16) (wqe_ctr - (u16) wq->tail); | ||
360 | } | ||
359 | wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; | 361 | wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; |
360 | ++wq->tail; | 362 | ++wq->tail; |
361 | } else if ((*cur_qp)->ibqp.srq) { | 363 | } else if ((*cur_qp)->ibqp.srq) { |
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 28697653a370..3726e451a327 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h | |||
@@ -120,6 +120,8 @@ struct mlx4_ib_qp { | |||
120 | 120 | ||
121 | u32 doorbell_qpn; | 121 | u32 doorbell_qpn; |
122 | __be32 sq_signal_bits; | 122 | __be32 sq_signal_bits; |
123 | unsigned sq_next_wqe; | ||
124 | int sq_max_wqes_per_wr; | ||
123 | int sq_spare_wqes; | 125 | int sq_spare_wqes; |
124 | struct mlx4_ib_wq sq; | 126 | struct mlx4_ib_wq sq; |
125 | 127 | ||
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 8cba9c532e64..958e205b6d7c 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c | |||
@@ -30,6 +30,8 @@ | |||
30 | * SOFTWARE. | 30 | * SOFTWARE. |
31 | */ | 31 | */ |
32 | 32 | ||
33 | #include <linux/log2.h> | ||
34 | |||
33 | #include <rdma/ib_cache.h> | 35 | #include <rdma/ib_cache.h> |
34 | #include <rdma/ib_pack.h> | 36 | #include <rdma/ib_pack.h> |
35 | 37 | ||
@@ -96,11 +98,7 @@ static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) | |||
96 | 98 | ||
97 | static void *get_wqe(struct mlx4_ib_qp *qp, int offset) | 99 | static void *get_wqe(struct mlx4_ib_qp *qp, int offset) |
98 | { | 100 | { |
99 | if (qp->buf.nbufs == 1) | 101 | return mlx4_buf_offset(&qp->buf, offset); |
100 | return qp->buf.u.direct.buf + offset; | ||
101 | else | ||
102 | return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf + | ||
103 | (offset & (PAGE_SIZE - 1)); | ||
104 | } | 102 | } |
105 | 103 | ||
106 | static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n) | 104 | static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n) |
@@ -115,16 +113,87 @@ static void *get_send_wqe(struct mlx4_ib_qp *qp, int n) | |||
115 | 113 | ||
116 | /* | 114 | /* |
117 | * Stamp a SQ WQE so that it is invalid if prefetched by marking the | 115 | * Stamp a SQ WQE so that it is invalid if prefetched by marking the |
118 | * first four bytes of every 64 byte chunk with 0xffffffff, except for | 116 | * first four bytes of every 64 byte chunk with |
119 | * the very first chunk of the WQE. | 117 | * 0x7FFFFFF | (invalid_ownership_value << 31). |
118 | * | ||
119 | * When the max work request size is less than or equal to the WQE | ||
120 | * basic block size, as an optimization, we can stamp all WQEs with | ||
121 | * 0xffffffff, and skip the very first chunk of each WQE. | ||
120 | */ | 122 | */ |
121 | static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n) | 123 | static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) |
122 | { | 124 | { |
123 | u32 *wqe = get_send_wqe(qp, n); | 125 | u32 *wqe; |
124 | int i; | 126 | int i; |
127 | int s; | ||
128 | int ind; | ||
129 | void *buf; | ||
130 | __be32 stamp; | ||
131 | |||
132 | s = roundup(size, 1U << qp->sq.wqe_shift); | ||
133 | if (qp->sq_max_wqes_per_wr > 1) { | ||
134 | for (i = 0; i < s; i += 64) { | ||
135 | ind = (i >> qp->sq.wqe_shift) + n; | ||
136 | stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) : | ||
137 | cpu_to_be32(0xffffffff); | ||
138 | buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); | ||
139 | wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1)); | ||
140 | *wqe = stamp; | ||
141 | } | ||
142 | } else { | ||
143 | buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); | ||
144 | for (i = 64; i < s; i += 64) { | ||
145 | wqe = buf + i; | ||
146 | *wqe = 0xffffffff; | ||
147 | } | ||
148 | } | ||
149 | } | ||
150 | |||
151 | static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size) | ||
152 | { | ||
153 | struct mlx4_wqe_ctrl_seg *ctrl; | ||
154 | struct mlx4_wqe_inline_seg *inl; | ||
155 | void *wqe; | ||
156 | int s; | ||
157 | |||
158 | ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); | ||
159 | s = sizeof(struct mlx4_wqe_ctrl_seg); | ||
125 | 160 | ||
126 | for (i = 16; i < 1 << (qp->sq.wqe_shift - 2); i += 16) | 161 | if (qp->ibqp.qp_type == IB_QPT_UD) { |
127 | wqe[i] = 0xffffffff; | 162 | struct mlx4_wqe_datagram_seg *dgram = wqe + sizeof *ctrl; |
163 | struct mlx4_av *av = (struct mlx4_av *)dgram->av; | ||
164 | memset(dgram, 0, sizeof *dgram); | ||
165 | av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn); | ||
166 | s += sizeof(struct mlx4_wqe_datagram_seg); | ||
167 | } | ||
168 | |||
169 | /* Pad the remainder of the WQE with an inline data segment. */ | ||
170 | if (size > s) { | ||
171 | inl = wqe + s; | ||
172 | inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl)); | ||
173 | } | ||
174 | ctrl->srcrb_flags = 0; | ||
175 | ctrl->fence_size = size / 16; | ||
176 | /* | ||
177 | * Make sure descriptor is fully written before setting ownership bit | ||
178 | * (because HW can start executing as soon as we do). | ||
179 | */ | ||
180 | wmb(); | ||
181 | |||
182 | ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) | | ||
183 | (n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); | ||
184 | |||
185 | stamp_send_wqe(qp, n + qp->sq_spare_wqes, size); | ||
186 | } | ||
187 | |||
188 | /* Post NOP WQE to prevent wrap-around in the middle of WR */ | ||
189 | static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind) | ||
190 | { | ||
191 | unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1)); | ||
192 | if (unlikely(s < qp->sq_max_wqes_per_wr)) { | ||
193 | post_nop_wqe(qp, ind, s << qp->sq.wqe_shift); | ||
194 | ind += s; | ||
195 | } | ||
196 | return ind; | ||
128 | } | 197 | } |
129 | 198 | ||
130 | static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) | 199 | static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) |
@@ -241,6 +310,8 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, | |||
241 | static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, | 310 | static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, |
242 | enum ib_qp_type type, struct mlx4_ib_qp *qp) | 311 | enum ib_qp_type type, struct mlx4_ib_qp *qp) |
243 | { | 312 | { |
313 | int s; | ||
314 | |||
244 | /* Sanity check SQ size before proceeding */ | 315 | /* Sanity check SQ size before proceeding */ |
245 | if (cap->max_send_wr > dev->dev->caps.max_wqes || | 316 | if (cap->max_send_wr > dev->dev->caps.max_wqes || |
246 | cap->max_send_sge > dev->dev->caps.max_sq_sg || | 317 | cap->max_send_sge > dev->dev->caps.max_sq_sg || |
@@ -256,20 +327,74 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, | |||
256 | cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) | 327 | cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) |
257 | return -EINVAL; | 328 | return -EINVAL; |
258 | 329 | ||
259 | qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge * | 330 | s = max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg), |
260 | sizeof (struct mlx4_wqe_data_seg), | 331 | cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) + |
261 | cap->max_inline_data + | 332 | send_wqe_overhead(type); |
262 | sizeof (struct mlx4_wqe_inline_seg)) + | ||
263 | send_wqe_overhead(type))); | ||
264 | qp->sq.max_gs = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type)) / | ||
265 | sizeof (struct mlx4_wqe_data_seg); | ||
266 | 333 | ||
267 | /* | 334 | /* |
268 | * We need to leave 2 KB + 1 WQE of headroom in the SQ to | 335 | * Hermon supports shrinking WQEs, such that a single work |
269 | * allow HW to prefetch. | 336 | * request can include multiple units of 1 << wqe_shift. This |
337 | * way, work requests can differ in size, and do not have to | ||
338 | * be a power of 2 in size, saving memory and speeding up send | ||
339 | * WR posting. Unfortunately, if we do this then the | ||
340 | * wqe_index field in CQEs can't be used to look up the WR ID | ||
341 | * anymore, so we do this only if selective signaling is off. | ||
342 | * | ||
343 | * Further, on 32-bit platforms, we can't use vmap() to make | ||
344 | * the QP buffer virtually contigious. Thus we have to use | ||
345 | * constant-sized WRs to make sure a WR is always fully within | ||
346 | * a single page-sized chunk. | ||
347 | * | ||
348 | * Finally, we use NOP work requests to pad the end of the | ||
349 | * work queue, to avoid wrap-around in the middle of WR. We | ||
350 | * set NEC bit to avoid getting completions with error for | ||
351 | * these NOP WRs, but since NEC is only supported starting | ||
352 | * with firmware 2.2.232, we use constant-sized WRs for older | ||
353 | * firmware. | ||
354 | * | ||
355 | * And, since MLX QPs only support SEND, we use constant-sized | ||
356 | * WRs in this case. | ||
357 | * | ||
358 | * We look for the smallest value of wqe_shift such that the | ||
359 | * resulting number of wqes does not exceed device | ||
360 | * capabilities. | ||
361 | * | ||
362 | * We set WQE size to at least 64 bytes, this way stamping | ||
363 | * invalidates each WQE. | ||
270 | */ | 364 | */ |
271 | qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1; | 365 | if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC && |
272 | qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + qp->sq_spare_wqes); | 366 | qp->sq_signal_bits && BITS_PER_LONG == 64 && |
367 | type != IB_QPT_SMI && type != IB_QPT_GSI) | ||
368 | qp->sq.wqe_shift = ilog2(64); | ||
369 | else | ||
370 | qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); | ||
371 | |||
372 | for (;;) { | ||
373 | if (1 << qp->sq.wqe_shift > dev->dev->caps.max_sq_desc_sz) | ||
374 | return -EINVAL; | ||
375 | |||
376 | qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift); | ||
377 | |||
378 | /* | ||
379 | * We need to leave 2 KB + 1 WR of headroom in the SQ to | ||
380 | * allow HW to prefetch. | ||
381 | */ | ||
382 | qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + qp->sq_max_wqes_per_wr; | ||
383 | qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr * | ||
384 | qp->sq_max_wqes_per_wr + | ||
385 | qp->sq_spare_wqes); | ||
386 | |||
387 | if (qp->sq.wqe_cnt <= dev->dev->caps.max_wqes) | ||
388 | break; | ||
389 | |||
390 | if (qp->sq_max_wqes_per_wr <= 1) | ||
391 | return -EINVAL; | ||
392 | |||
393 | ++qp->sq.wqe_shift; | ||
394 | } | ||
395 | |||
396 | qp->sq.max_gs = ((qp->sq_max_wqes_per_wr << qp->sq.wqe_shift) - | ||
397 | send_wqe_overhead(type)) / sizeof (struct mlx4_wqe_data_seg); | ||
273 | 398 | ||
274 | qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + | 399 | qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + |
275 | (qp->sq.wqe_cnt << qp->sq.wqe_shift); | 400 | (qp->sq.wqe_cnt << qp->sq.wqe_shift); |
@@ -281,7 +406,8 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, | |||
281 | qp->sq.offset = 0; | 406 | qp->sq.offset = 0; |
282 | } | 407 | } |
283 | 408 | ||
284 | cap->max_send_wr = qp->sq.max_post = qp->sq.wqe_cnt - qp->sq_spare_wqes; | 409 | cap->max_send_wr = qp->sq.max_post = |
410 | (qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr; | ||
285 | cap->max_send_sge = qp->sq.max_gs; | 411 | cap->max_send_sge = qp->sq.max_gs; |
286 | /* We don't support inline sends for kernel QPs (yet) */ | 412 | /* We don't support inline sends for kernel QPs (yet) */ |
287 | cap->max_inline_data = 0; | 413 | cap->max_inline_data = 0; |
@@ -327,6 +453,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, | |||
327 | qp->rq.tail = 0; | 453 | qp->rq.tail = 0; |
328 | qp->sq.head = 0; | 454 | qp->sq.head = 0; |
329 | qp->sq.tail = 0; | 455 | qp->sq.tail = 0; |
456 | qp->sq_next_wqe = 0; | ||
457 | |||
458 | if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) | ||
459 | qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); | ||
460 | else | ||
461 | qp->sq_signal_bits = 0; | ||
330 | 462 | ||
331 | err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp); | 463 | err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp); |
332 | if (err) | 464 | if (err) |
@@ -417,11 +549,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, | |||
417 | */ | 549 | */ |
418 | qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); | 550 | qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); |
419 | 551 | ||
420 | if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) | ||
421 | qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); | ||
422 | else | ||
423 | qp->sq_signal_bits = 0; | ||
424 | |||
425 | qp->mqp.event = mlx4_ib_qp_event; | 552 | qp->mqp.event = mlx4_ib_qp_event; |
426 | 553 | ||
427 | return 0; | 554 | return 0; |
@@ -916,7 +1043,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, | |||
916 | ctrl = get_send_wqe(qp, i); | 1043 | ctrl = get_send_wqe(qp, i); |
917 | ctrl->owner_opcode = cpu_to_be32(1 << 31); | 1044 | ctrl->owner_opcode = cpu_to_be32(1 << 31); |
918 | 1045 | ||
919 | stamp_send_wqe(qp, i); | 1046 | stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift); |
920 | } | 1047 | } |
921 | } | 1048 | } |
922 | 1049 | ||
@@ -969,6 +1096,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, | |||
969 | qp->rq.tail = 0; | 1096 | qp->rq.tail = 0; |
970 | qp->sq.head = 0; | 1097 | qp->sq.head = 0; |
971 | qp->sq.tail = 0; | 1098 | qp->sq.tail = 0; |
1099 | qp->sq_next_wqe = 0; | ||
972 | if (!ibqp->srq) | 1100 | if (!ibqp->srq) |
973 | *qp->db.db = 0; | 1101 | *qp->db.db = 0; |
974 | } | 1102 | } |
@@ -1278,13 +1406,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
1278 | unsigned long flags; | 1406 | unsigned long flags; |
1279 | int nreq; | 1407 | int nreq; |
1280 | int err = 0; | 1408 | int err = 0; |
1281 | int ind; | 1409 | unsigned ind; |
1282 | int size; | 1410 | int uninitialized_var(stamp); |
1411 | int uninitialized_var(size); | ||
1283 | int i; | 1412 | int i; |
1284 | 1413 | ||
1285 | spin_lock_irqsave(&qp->sq.lock, flags); | 1414 | spin_lock_irqsave(&qp->sq.lock, flags); |
1286 | 1415 | ||
1287 | ind = qp->sq.head; | 1416 | ind = qp->sq_next_wqe; |
1288 | 1417 | ||
1289 | for (nreq = 0; wr; ++nreq, wr = wr->next) { | 1418 | for (nreq = 0; wr; ++nreq, wr = wr->next) { |
1290 | if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { | 1419 | if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { |
@@ -1300,7 +1429,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
1300 | } | 1429 | } |
1301 | 1430 | ||
1302 | ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); | 1431 | ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); |
1303 | qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id; | 1432 | qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = wr->wr_id; |
1304 | 1433 | ||
1305 | ctrl->srcrb_flags = | 1434 | ctrl->srcrb_flags = |
1306 | (wr->send_flags & IB_SEND_SIGNALED ? | 1435 | (wr->send_flags & IB_SEND_SIGNALED ? |
@@ -1413,16 +1542,23 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
1413 | ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | | 1542 | ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | |
1414 | (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); | 1543 | (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); |
1415 | 1544 | ||
1545 | stamp = ind + qp->sq_spare_wqes; | ||
1546 | ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); | ||
1547 | |||
1416 | /* | 1548 | /* |
1417 | * We can improve latency by not stamping the last | 1549 | * We can improve latency by not stamping the last |
1418 | * send queue WQE until after ringing the doorbell, so | 1550 | * send queue WQE until after ringing the doorbell, so |
1419 | * only stamp here if there are still more WQEs to post. | 1551 | * only stamp here if there are still more WQEs to post. |
1552 | * | ||
1553 | * Same optimization applies to padding with NOP wqe | ||
1554 | * in case of WQE shrinking (used to prevent wrap-around | ||
1555 | * in the middle of WR). | ||
1420 | */ | 1556 | */ |
1421 | if (wr->next) | 1557 | if (wr->next) { |
1422 | stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) & | 1558 | stamp_send_wqe(qp, stamp, size * 16); |
1423 | (qp->sq.wqe_cnt - 1)); | 1559 | ind = pad_wraparound(qp, ind); |
1560 | } | ||
1424 | 1561 | ||
1425 | ++ind; | ||
1426 | } | 1562 | } |
1427 | 1563 | ||
1428 | out: | 1564 | out: |
@@ -1444,8 +1580,10 @@ out: | |||
1444 | */ | 1580 | */ |
1445 | mmiowb(); | 1581 | mmiowb(); |
1446 | 1582 | ||
1447 | stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) & | 1583 | stamp_send_wqe(qp, stamp, size * 16); |
1448 | (qp->sq.wqe_cnt - 1)); | 1584 | |
1585 | ind = pad_wraparound(qp, ind); | ||
1586 | qp->sq_next_wqe = ind; | ||
1449 | } | 1587 | } |
1450 | 1588 | ||
1451 | spin_unlock_irqrestore(&qp->sq.lock, flags); | 1589 | spin_unlock_irqrestore(&qp->sq.lock, flags); |
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index e7e9a3d0dac3..beaa3b06cf58 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c | |||
@@ -38,13 +38,7 @@ | |||
38 | 38 | ||
39 | static void *get_wqe(struct mlx4_ib_srq *srq, int n) | 39 | static void *get_wqe(struct mlx4_ib_srq *srq, int n) |
40 | { | 40 | { |
41 | int offset = n << srq->msrq.wqe_shift; | 41 | return mlx4_buf_offset(&srq->buf, n << srq->msrq.wqe_shift); |
42 | |||
43 | if (srq->buf.nbufs == 1) | ||
44 | return srq->buf.u.direct.buf + offset; | ||
45 | else | ||
46 | return srq->buf.u.page_list[offset >> PAGE_SHIFT].buf + | ||
47 | (offset & (PAGE_SIZE - 1)); | ||
48 | } | 42 | } |
49 | 43 | ||
50 | static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type) | 44 | static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type) |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index fe250c60607d..f9b7caa54143 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h | |||
@@ -143,7 +143,7 @@ struct ipoib_rx_buf { | |||
143 | 143 | ||
144 | struct ipoib_tx_buf { | 144 | struct ipoib_tx_buf { |
145 | struct sk_buff *skb; | 145 | struct sk_buff *skb; |
146 | u64 mapping; | 146 | u64 mapping[MAX_SKB_FRAGS + 1]; |
147 | }; | 147 | }; |
148 | 148 | ||
149 | struct ib_cm_id; | 149 | struct ib_cm_id; |
@@ -296,7 +296,7 @@ struct ipoib_dev_priv { | |||
296 | struct ipoib_tx_buf *tx_ring; | 296 | struct ipoib_tx_buf *tx_ring; |
297 | unsigned tx_head; | 297 | unsigned tx_head; |
298 | unsigned tx_tail; | 298 | unsigned tx_tail; |
299 | struct ib_sge tx_sge; | 299 | struct ib_sge tx_sge[MAX_SKB_FRAGS + 1]; |
300 | struct ib_send_wr tx_wr; | 300 | struct ib_send_wr tx_wr; |
301 | unsigned tx_outstanding; | 301 | unsigned tx_outstanding; |
302 | 302 | ||
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 1818f958c250..7dd2ec473d24 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c | |||
@@ -634,8 +634,8 @@ static inline int post_send(struct ipoib_dev_priv *priv, | |||
634 | { | 634 | { |
635 | struct ib_send_wr *bad_wr; | 635 | struct ib_send_wr *bad_wr; |
636 | 636 | ||
637 | priv->tx_sge.addr = addr; | 637 | priv->tx_sge[0].addr = addr; |
638 | priv->tx_sge.length = len; | 638 | priv->tx_sge[0].length = len; |
639 | 639 | ||
640 | priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM; | 640 | priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM; |
641 | 641 | ||
@@ -676,7 +676,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ | |||
676 | return; | 676 | return; |
677 | } | 677 | } |
678 | 678 | ||
679 | tx_req->mapping = addr; | 679 | tx_req->mapping[0] = addr; |
680 | 680 | ||
681 | if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), | 681 | if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), |
682 | addr, skb->len))) { | 682 | addr, skb->len))) { |
@@ -715,7 +715,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) | |||
715 | 715 | ||
716 | tx_req = &tx->tx_ring[wr_id]; | 716 | tx_req = &tx->tx_ring[wr_id]; |
717 | 717 | ||
718 | ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE); | 718 | ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, DMA_TO_DEVICE); |
719 | 719 | ||
720 | /* FIXME: is this right? Shouldn't we only increment on success? */ | 720 | /* FIXME: is this right? Shouldn't we only increment on success? */ |
721 | ++dev->stats.tx_packets; | 721 | ++dev->stats.tx_packets; |
@@ -1110,7 +1110,7 @@ timeout: | |||
1110 | 1110 | ||
1111 | while ((int) p->tx_tail - (int) p->tx_head < 0) { | 1111 | while ((int) p->tx_tail - (int) p->tx_head < 0) { |
1112 | tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; | 1112 | tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; |
1113 | ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, | 1113 | ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, |
1114 | DMA_TO_DEVICE); | 1114 | DMA_TO_DEVICE); |
1115 | dev_kfree_skb_any(tx_req->skb); | 1115 | dev_kfree_skb_any(tx_req->skb); |
1116 | ++p->tx_tail; | 1116 | ++p->tx_tail; |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 52bc2bd5799a..9d3e778dc56d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c | |||
@@ -239,6 +239,54 @@ repost: | |||
239 | "for buf %d\n", wr_id); | 239 | "for buf %d\n", wr_id); |
240 | } | 240 | } |
241 | 241 | ||
242 | static int ipoib_dma_map_tx(struct ib_device *ca, | ||
243 | struct ipoib_tx_buf *tx_req) | ||
244 | { | ||
245 | struct sk_buff *skb = tx_req->skb; | ||
246 | u64 *mapping = tx_req->mapping; | ||
247 | int i; | ||
248 | |||
249 | mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb), | ||
250 | DMA_TO_DEVICE); | ||
251 | if (unlikely(ib_dma_mapping_error(ca, mapping[0]))) | ||
252 | return -EIO; | ||
253 | |||
254 | for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) { | ||
255 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | ||
256 | mapping[i + 1] = ib_dma_map_page(ca, frag->page, | ||
257 | frag->page_offset, frag->size, | ||
258 | DMA_TO_DEVICE); | ||
259 | if (unlikely(ib_dma_mapping_error(ca, mapping[i + 1]))) | ||
260 | goto partial_error; | ||
261 | } | ||
262 | return 0; | ||
263 | |||
264 | partial_error: | ||
265 | ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE); | ||
266 | |||
267 | for (; i > 0; --i) { | ||
268 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; | ||
269 | ib_dma_unmap_page(ca, mapping[i], frag->size, DMA_TO_DEVICE); | ||
270 | } | ||
271 | return -EIO; | ||
272 | } | ||
273 | |||
274 | static void ipoib_dma_unmap_tx(struct ib_device *ca, | ||
275 | struct ipoib_tx_buf *tx_req) | ||
276 | { | ||
277 | struct sk_buff *skb = tx_req->skb; | ||
278 | u64 *mapping = tx_req->mapping; | ||
279 | int i; | ||
280 | |||
281 | ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE); | ||
282 | |||
283 | for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) { | ||
284 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | ||
285 | ib_dma_unmap_page(ca, mapping[i + 1], frag->size, | ||
286 | DMA_TO_DEVICE); | ||
287 | } | ||
288 | } | ||
289 | |||
242 | static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) | 290 | static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) |
243 | { | 291 | { |
244 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 292 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
@@ -257,8 +305,7 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) | |||
257 | 305 | ||
258 | tx_req = &priv->tx_ring[wr_id]; | 306 | tx_req = &priv->tx_ring[wr_id]; |
259 | 307 | ||
260 | ib_dma_unmap_single(priv->ca, tx_req->mapping, | 308 | ipoib_dma_unmap_tx(priv->ca, tx_req); |
261 | tx_req->skb->len, DMA_TO_DEVICE); | ||
262 | 309 | ||
263 | ++dev->stats.tx_packets; | 310 | ++dev->stats.tx_packets; |
264 | dev->stats.tx_bytes += tx_req->skb->len; | 311 | dev->stats.tx_bytes += tx_req->skb->len; |
@@ -341,16 +388,23 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr) | |||
341 | static inline int post_send(struct ipoib_dev_priv *priv, | 388 | static inline int post_send(struct ipoib_dev_priv *priv, |
342 | unsigned int wr_id, | 389 | unsigned int wr_id, |
343 | struct ib_ah *address, u32 qpn, | 390 | struct ib_ah *address, u32 qpn, |
344 | u64 addr, int len) | 391 | u64 *mapping, int headlen, |
392 | skb_frag_t *frags, | ||
393 | int nr_frags) | ||
345 | { | 394 | { |
346 | struct ib_send_wr *bad_wr; | 395 | struct ib_send_wr *bad_wr; |
396 | int i; | ||
347 | 397 | ||
348 | priv->tx_sge.addr = addr; | 398 | priv->tx_sge[0].addr = mapping[0]; |
349 | priv->tx_sge.length = len; | 399 | priv->tx_sge[0].length = headlen; |
350 | 400 | for (i = 0; i < nr_frags; ++i) { | |
351 | priv->tx_wr.wr_id = wr_id; | 401 | priv->tx_sge[i + 1].addr = mapping[i + 1]; |
352 | priv->tx_wr.wr.ud.remote_qpn = qpn; | 402 | priv->tx_sge[i + 1].length = frags[i].size; |
353 | priv->tx_wr.wr.ud.ah = address; | 403 | } |
404 | priv->tx_wr.num_sge = nr_frags + 1; | ||
405 | priv->tx_wr.wr_id = wr_id; | ||
406 | priv->tx_wr.wr.ud.remote_qpn = qpn; | ||
407 | priv->tx_wr.wr.ud.ah = address; | ||
354 | 408 | ||
355 | return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr); | 409 | return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr); |
356 | } | 410 | } |
@@ -360,7 +414,6 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, | |||
360 | { | 414 | { |
361 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 415 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
362 | struct ipoib_tx_buf *tx_req; | 416 | struct ipoib_tx_buf *tx_req; |
363 | u64 addr; | ||
364 | 417 | ||
365 | if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) { | 418 | if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) { |
366 | ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", | 419 | ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", |
@@ -383,20 +436,19 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, | |||
383 | */ | 436 | */ |
384 | tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)]; | 437 | tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)]; |
385 | tx_req->skb = skb; | 438 | tx_req->skb = skb; |
386 | addr = ib_dma_map_single(priv->ca, skb->data, skb->len, | 439 | if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) { |
387 | DMA_TO_DEVICE); | ||
388 | if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { | ||
389 | ++dev->stats.tx_errors; | 440 | ++dev->stats.tx_errors; |
390 | dev_kfree_skb_any(skb); | 441 | dev_kfree_skb_any(skb); |
391 | return; | 442 | return; |
392 | } | 443 | } |
393 | tx_req->mapping = addr; | ||
394 | 444 | ||
395 | if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), | 445 | if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), |
396 | address->ah, qpn, addr, skb->len))) { | 446 | address->ah, qpn, |
447 | tx_req->mapping, skb_headlen(skb), | ||
448 | skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags))) { | ||
397 | ipoib_warn(priv, "post_send failed\n"); | 449 | ipoib_warn(priv, "post_send failed\n"); |
398 | ++dev->stats.tx_errors; | 450 | ++dev->stats.tx_errors; |
399 | ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE); | 451 | ipoib_dma_unmap_tx(priv->ca, tx_req); |
400 | dev_kfree_skb_any(skb); | 452 | dev_kfree_skb_any(skb); |
401 | } else { | 453 | } else { |
402 | dev->trans_start = jiffies; | 454 | dev->trans_start = jiffies; |
@@ -615,10 +667,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush) | |||
615 | while ((int) priv->tx_tail - (int) priv->tx_head < 0) { | 667 | while ((int) priv->tx_tail - (int) priv->tx_head < 0) { |
616 | tx_req = &priv->tx_ring[priv->tx_tail & | 668 | tx_req = &priv->tx_ring[priv->tx_tail & |
617 | (ipoib_sendq_size - 1)]; | 669 | (ipoib_sendq_size - 1)]; |
618 | ib_dma_unmap_single(priv->ca, | 670 | ipoib_dma_unmap_tx(priv->ca, tx_req); |
619 | tx_req->mapping, | ||
620 | tx_req->skb->len, | ||
621 | DMA_TO_DEVICE); | ||
622 | dev_kfree_skb_any(tx_req->skb); | 671 | dev_kfree_skb_any(tx_req->skb); |
623 | ++priv->tx_tail; | 672 | ++priv->tx_tail; |
624 | --priv->tx_outstanding; | 673 | --priv->tx_outstanding; |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 09f5371137a1..f96477a8ca5a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c | |||
@@ -965,7 +965,9 @@ static void ipoib_setup(struct net_device *dev) | |||
965 | dev->addr_len = INFINIBAND_ALEN; | 965 | dev->addr_len = INFINIBAND_ALEN; |
966 | dev->type = ARPHRD_INFINIBAND; | 966 | dev->type = ARPHRD_INFINIBAND; |
967 | dev->tx_queue_len = ipoib_sendq_size * 2; | 967 | dev->tx_queue_len = ipoib_sendq_size * 2; |
968 | dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; | 968 | dev->features = (NETIF_F_VLAN_CHALLENGED | |
969 | NETIF_F_LLTX | | ||
970 | NETIF_F_HIGHDMA); | ||
969 | 971 | ||
970 | /* MTU will be reset when mcast join happens */ | 972 | /* MTU will be reset when mcast join happens */ |
971 | dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN; | 973 | dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN; |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 433e99ac227b..a3aeb911f024 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c | |||
@@ -157,6 +157,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) | |||
157 | }; | 157 | }; |
158 | 158 | ||
159 | int ret, size; | 159 | int ret, size; |
160 | int i; | ||
160 | 161 | ||
161 | priv->pd = ib_alloc_pd(priv->ca); | 162 | priv->pd = ib_alloc_pd(priv->ca); |
162 | if (IS_ERR(priv->pd)) { | 163 | if (IS_ERR(priv->pd)) { |
@@ -191,6 +192,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) | |||
191 | init_attr.send_cq = priv->cq; | 192 | init_attr.send_cq = priv->cq; |
192 | init_attr.recv_cq = priv->cq; | 193 | init_attr.recv_cq = priv->cq; |
193 | 194 | ||
195 | if (dev->features & NETIF_F_SG) | ||
196 | init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; | ||
197 | |||
194 | priv->qp = ib_create_qp(priv->pd, &init_attr); | 198 | priv->qp = ib_create_qp(priv->pd, &init_attr); |
195 | if (IS_ERR(priv->qp)) { | 199 | if (IS_ERR(priv->qp)) { |
196 | printk(KERN_WARNING "%s: failed to create QP\n", ca->name); | 200 | printk(KERN_WARNING "%s: failed to create QP\n", ca->name); |
@@ -201,11 +205,11 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) | |||
201 | priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff; | 205 | priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff; |
202 | priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff; | 206 | priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff; |
203 | 207 | ||
204 | priv->tx_sge.lkey = priv->mr->lkey; | 208 | for (i = 0; i < MAX_SKB_FRAGS + 1; ++i) |
209 | priv->tx_sge[i].lkey = priv->mr->lkey; | ||
205 | 210 | ||
206 | priv->tx_wr.opcode = IB_WR_SEND; | 211 | priv->tx_wr.opcode = IB_WR_SEND; |
207 | priv->tx_wr.sg_list = &priv->tx_sge; | 212 | priv->tx_wr.sg_list = priv->tx_sge; |
208 | priv->tx_wr.num_sge = 1; | ||
209 | priv->tx_wr.send_flags = IB_SEND_SIGNALED; | 213 | priv->tx_wr.send_flags = IB_SEND_SIGNALED; |
210 | 214 | ||
211 | return 0; | 215 | return 0; |
diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c index b226e019bc8b..521dc0322ee4 100644 --- a/drivers/net/mlx4/alloc.c +++ b/drivers/net/mlx4/alloc.c | |||
@@ -116,40 +116,53 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, | |||
116 | buf->nbufs = 1; | 116 | buf->nbufs = 1; |
117 | buf->npages = 1; | 117 | buf->npages = 1; |
118 | buf->page_shift = get_order(size) + PAGE_SHIFT; | 118 | buf->page_shift = get_order(size) + PAGE_SHIFT; |
119 | buf->u.direct.buf = dma_alloc_coherent(&dev->pdev->dev, | 119 | buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev, |
120 | size, &t, GFP_KERNEL); | 120 | size, &t, GFP_KERNEL); |
121 | if (!buf->u.direct.buf) | 121 | if (!buf->direct.buf) |
122 | return -ENOMEM; | 122 | return -ENOMEM; |
123 | 123 | ||
124 | buf->u.direct.map = t; | 124 | buf->direct.map = t; |
125 | 125 | ||
126 | while (t & ((1 << buf->page_shift) - 1)) { | 126 | while (t & ((1 << buf->page_shift) - 1)) { |
127 | --buf->page_shift; | 127 | --buf->page_shift; |
128 | buf->npages *= 2; | 128 | buf->npages *= 2; |
129 | } | 129 | } |
130 | 130 | ||
131 | memset(buf->u.direct.buf, 0, size); | 131 | memset(buf->direct.buf, 0, size); |
132 | } else { | 132 | } else { |
133 | int i; | 133 | int i; |
134 | 134 | ||
135 | buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE; | 135 | buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE; |
136 | buf->npages = buf->nbufs; | 136 | buf->npages = buf->nbufs; |
137 | buf->page_shift = PAGE_SHIFT; | 137 | buf->page_shift = PAGE_SHIFT; |
138 | buf->u.page_list = kzalloc(buf->nbufs * sizeof *buf->u.page_list, | 138 | buf->page_list = kzalloc(buf->nbufs * sizeof *buf->page_list, |
139 | GFP_KERNEL); | 139 | GFP_KERNEL); |
140 | if (!buf->u.page_list) | 140 | if (!buf->page_list) |
141 | return -ENOMEM; | 141 | return -ENOMEM; |
142 | 142 | ||
143 | for (i = 0; i < buf->nbufs; ++i) { | 143 | for (i = 0; i < buf->nbufs; ++i) { |
144 | buf->u.page_list[i].buf = | 144 | buf->page_list[i].buf = |
145 | dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, | 145 | dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, |
146 | &t, GFP_KERNEL); | 146 | &t, GFP_KERNEL); |
147 | if (!buf->u.page_list[i].buf) | 147 | if (!buf->page_list[i].buf) |
148 | goto err_free; | 148 | goto err_free; |
149 | 149 | ||
150 | buf->u.page_list[i].map = t; | 150 | buf->page_list[i].map = t; |
151 | 151 | ||
152 | memset(buf->u.page_list[i].buf, 0, PAGE_SIZE); | 152 | memset(buf->page_list[i].buf, 0, PAGE_SIZE); |
153 | } | ||
154 | |||
155 | if (BITS_PER_LONG == 64) { | ||
156 | struct page **pages; | ||
157 | pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL); | ||
158 | if (!pages) | ||
159 | goto err_free; | ||
160 | for (i = 0; i < buf->nbufs; ++i) | ||
161 | pages[i] = virt_to_page(buf->page_list[i].buf); | ||
162 | buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL); | ||
163 | kfree(pages); | ||
164 | if (!buf->direct.buf) | ||
165 | goto err_free; | ||
153 | } | 166 | } |
154 | } | 167 | } |
155 | 168 | ||
@@ -167,15 +180,18 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) | |||
167 | int i; | 180 | int i; |
168 | 181 | ||
169 | if (buf->nbufs == 1) | 182 | if (buf->nbufs == 1) |
170 | dma_free_coherent(&dev->pdev->dev, size, buf->u.direct.buf, | 183 | dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf, |
171 | buf->u.direct.map); | 184 | buf->direct.map); |
172 | else { | 185 | else { |
186 | if (BITS_PER_LONG == 64) | ||
187 | vunmap(buf->direct.buf); | ||
188 | |||
173 | for (i = 0; i < buf->nbufs; ++i) | 189 | for (i = 0; i < buf->nbufs; ++i) |
174 | if (buf->u.page_list[i].buf) | 190 | if (buf->page_list[i].buf) |
175 | dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, | 191 | dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, |
176 | buf->u.page_list[i].buf, | 192 | buf->page_list[i].buf, |
177 | buf->u.page_list[i].map); | 193 | buf->page_list[i].map); |
178 | kfree(buf->u.page_list); | 194 | kfree(buf->page_list); |
179 | } | 195 | } |
180 | } | 196 | } |
181 | EXPORT_SYMBOL_GPL(mlx4_buf_free); | 197 | EXPORT_SYMBOL_GPL(mlx4_buf_free); |
diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c index 9c9e308d0917..679dfdb6807f 100644 --- a/drivers/net/mlx4/mr.c +++ b/drivers/net/mlx4/mr.c | |||
@@ -419,9 +419,9 @@ int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, | |||
419 | 419 | ||
420 | for (i = 0; i < buf->npages; ++i) | 420 | for (i = 0; i < buf->npages; ++i) |
421 | if (buf->nbufs == 1) | 421 | if (buf->nbufs == 1) |
422 | page_list[i] = buf->u.direct.map + (i << buf->page_shift); | 422 | page_list[i] = buf->direct.map + (i << buf->page_shift); |
423 | else | 423 | else |
424 | page_list[i] = buf->u.page_list[i].map; | 424 | page_list[i] = buf->page_list[i].map; |
425 | 425 | ||
426 | err = mlx4_write_mtt(dev, mtt, 0, buf->npages, page_list); | 426 | err = mlx4_write_mtt(dev, mtt, 0, buf->npages, page_list); |
427 | 427 | ||
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 222815d91c40..6cdf813cd478 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h | |||
@@ -133,6 +133,11 @@ enum { | |||
133 | MLX4_STAT_RATE_OFFSET = 5 | 133 | MLX4_STAT_RATE_OFFSET = 5 |
134 | }; | 134 | }; |
135 | 135 | ||
136 | static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) | ||
137 | { | ||
138 | return (major << 32) | (minor << 16) | subminor; | ||
139 | } | ||
140 | |||
136 | struct mlx4_caps { | 141 | struct mlx4_caps { |
137 | u64 fw_ver; | 142 | u64 fw_ver; |
138 | int num_ports; | 143 | int num_ports; |
@@ -189,10 +194,8 @@ struct mlx4_buf_list { | |||
189 | }; | 194 | }; |
190 | 195 | ||
191 | struct mlx4_buf { | 196 | struct mlx4_buf { |
192 | union { | 197 | struct mlx4_buf_list direct; |
193 | struct mlx4_buf_list direct; | 198 | struct mlx4_buf_list *page_list; |
194 | struct mlx4_buf_list *page_list; | ||
195 | } u; | ||
196 | int nbufs; | 199 | int nbufs; |
197 | int npages; | 200 | int npages; |
198 | int page_shift; | 201 | int page_shift; |
@@ -308,6 +311,14 @@ struct mlx4_init_port_param { | |||
308 | int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, | 311 | int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, |
309 | struct mlx4_buf *buf); | 312 | struct mlx4_buf *buf); |
310 | void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); | 313 | void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); |
314 | static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) | ||
315 | { | ||
316 | if (BITS_PER_LONG == 64 || buf->nbufs == 1) | ||
317 | return buf->direct.buf + offset; | ||
318 | else | ||
319 | return buf->page_list[offset >> PAGE_SHIFT].buf + | ||
320 | (offset & (PAGE_SIZE - 1)); | ||
321 | } | ||
311 | 322 | ||
312 | int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn); | 323 | int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn); |
313 | void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn); | 324 | void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn); |
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 3968b943259a..09a2230923f2 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h | |||
@@ -154,7 +154,11 @@ struct mlx4_qp_context { | |||
154 | u32 reserved5[10]; | 154 | u32 reserved5[10]; |
155 | }; | 155 | }; |
156 | 156 | ||
157 | /* Which firmware version adds support for NEC (NoErrorCompletion) bit */ | ||
158 | #define MLX4_FW_VER_WQE_CTRL_NEC mlx4_fw_ver(2, 2, 232) | ||
159 | |||
157 | enum { | 160 | enum { |
161 | MLX4_WQE_CTRL_NEC = 1 << 29, | ||
158 | MLX4_WQE_CTRL_FENCE = 1 << 6, | 162 | MLX4_WQE_CTRL_FENCE = 1 << 6, |
159 | MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2, | 163 | MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2, |
160 | MLX4_WQE_CTRL_SOLICITED = 1 << 1, | 164 | MLX4_WQE_CTRL_SOLICITED = 1 << 1, |
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index cfbd38fe2998..701e7b40560a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h | |||
@@ -95,7 +95,15 @@ enum ib_device_cap_flags { | |||
95 | IB_DEVICE_N_NOTIFY_CQ = (1<<14), | 95 | IB_DEVICE_N_NOTIFY_CQ = (1<<14), |
96 | IB_DEVICE_ZERO_STAG = (1<<15), | 96 | IB_DEVICE_ZERO_STAG = (1<<15), |
97 | IB_DEVICE_SEND_W_INV = (1<<16), | 97 | IB_DEVICE_SEND_W_INV = (1<<16), |
98 | IB_DEVICE_MEM_WINDOW = (1<<17) | 98 | IB_DEVICE_MEM_WINDOW = (1<<17), |
99 | /* | ||
100 | * Devices should set IB_DEVICE_UD_IP_SUM if they support | ||
101 | * insertion of UDP and TCP checksum on outgoing UD IPoIB | ||
102 | * messages and can verify the validity of checksum for | ||
103 | * incoming messages. Setting this flag implies that the | ||
104 | * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode. | ||
105 | */ | ||
106 | IB_DEVICE_UD_IP_CSUM = (1<<18), | ||
99 | }; | 107 | }; |
100 | 108 | ||
101 | enum ib_atomic_cap { | 109 | enum ib_atomic_cap { |
@@ -431,6 +439,7 @@ struct ib_wc { | |||
431 | u8 sl; | 439 | u8 sl; |
432 | u8 dlid_path_bits; | 440 | u8 dlid_path_bits; |
433 | u8 port_num; /* valid only for DR SMPs on switches */ | 441 | u8 port_num; /* valid only for DR SMPs on switches */ |
442 | int csum_ok; | ||
434 | }; | 443 | }; |
435 | 444 | ||
436 | enum ib_cq_notify_flags { | 445 | enum ib_cq_notify_flags { |
@@ -615,7 +624,8 @@ enum ib_send_flags { | |||
615 | IB_SEND_FENCE = 1, | 624 | IB_SEND_FENCE = 1, |
616 | IB_SEND_SIGNALED = (1<<1), | 625 | IB_SEND_SIGNALED = (1<<1), |
617 | IB_SEND_SOLICITED = (1<<2), | 626 | IB_SEND_SOLICITED = (1<<2), |
618 | IB_SEND_INLINE = (1<<3) | 627 | IB_SEND_INLINE = (1<<3), |
628 | IB_SEND_IP_CSUM = (1<<4) | ||
619 | }; | 629 | }; |
620 | 630 | ||
621 | struct ib_sge { | 631 | struct ib_sge { |
@@ -890,8 +900,6 @@ struct ib_device { | |||
890 | int *pkey_tbl_len; | 900 | int *pkey_tbl_len; |
891 | int *gid_tbl_len; | 901 | int *gid_tbl_len; |
892 | 902 | ||
893 | u32 flags; | ||
894 | |||
895 | int num_comp_vectors; | 903 | int num_comp_vectors; |
896 | 904 | ||
897 | struct iw_cm_verbs *iwcm; | 905 | struct iw_cm_verbs *iwcm; |