aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEli Cohen <eli@mellanox.co.il>2008-01-30 11:30:53 -0500
committerRoland Dreier <rolandd@cisco.com>2008-02-08 17:32:37 -0500
commit7143740d26098aca84ecc7376ccfe2c58fd0412e (patch)
treec29029045ebdbe197f94ead3e1d6610f498f1880
parenteb14032f9eb595621270f3269f40094adb3144e8 (diff)
IPoIB: Add send gather support
This patch acts as a preparation for using checksum offload for IB devices capable of inserting/verifying checksum in IP packets. The patch does not actaully turn on NETIF_F_SG - we defer that to the patches adding checksum offload capabilities. We only add support for send gathers for datagram mode, since existing HW does not support checksum offload on connected QPs. Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il> Signed-off-by: Eli Cohen <eli@mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c10
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c89
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c10
4 files changed, 83 insertions, 30 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index fe250c60607d..f9b7caa54143 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -143,7 +143,7 @@ struct ipoib_rx_buf {
143 143
144struct ipoib_tx_buf { 144struct ipoib_tx_buf {
145 struct sk_buff *skb; 145 struct sk_buff *skb;
146 u64 mapping; 146 u64 mapping[MAX_SKB_FRAGS + 1];
147}; 147};
148 148
149struct ib_cm_id; 149struct ib_cm_id;
@@ -296,7 +296,7 @@ struct ipoib_dev_priv {
296 struct ipoib_tx_buf *tx_ring; 296 struct ipoib_tx_buf *tx_ring;
297 unsigned tx_head; 297 unsigned tx_head;
298 unsigned tx_tail; 298 unsigned tx_tail;
299 struct ib_sge tx_sge; 299 struct ib_sge tx_sge[MAX_SKB_FRAGS + 1];
300 struct ib_send_wr tx_wr; 300 struct ib_send_wr tx_wr;
301 unsigned tx_outstanding; 301 unsigned tx_outstanding;
302 302
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 1818f958c250..7dd2ec473d24 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -634,8 +634,8 @@ static inline int post_send(struct ipoib_dev_priv *priv,
634{ 634{
635 struct ib_send_wr *bad_wr; 635 struct ib_send_wr *bad_wr;
636 636
637 priv->tx_sge.addr = addr; 637 priv->tx_sge[0].addr = addr;
638 priv->tx_sge.length = len; 638 priv->tx_sge[0].length = len;
639 639
640 priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM; 640 priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM;
641 641
@@ -676,7 +676,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
676 return; 676 return;
677 } 677 }
678 678
679 tx_req->mapping = addr; 679 tx_req->mapping[0] = addr;
680 680
681 if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), 681 if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1),
682 addr, skb->len))) { 682 addr, skb->len))) {
@@ -715,7 +715,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
715 715
716 tx_req = &tx->tx_ring[wr_id]; 716 tx_req = &tx->tx_ring[wr_id];
717 717
718 ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE); 718 ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, DMA_TO_DEVICE);
719 719
720 /* FIXME: is this right? Shouldn't we only increment on success? */ 720 /* FIXME: is this right? Shouldn't we only increment on success? */
721 ++dev->stats.tx_packets; 721 ++dev->stats.tx_packets;
@@ -1110,7 +1110,7 @@ timeout:
1110 1110
1111 while ((int) p->tx_tail - (int) p->tx_head < 0) { 1111 while ((int) p->tx_tail - (int) p->tx_head < 0) {
1112 tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; 1112 tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
1113 ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, 1113 ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len,
1114 DMA_TO_DEVICE); 1114 DMA_TO_DEVICE);
1115 dev_kfree_skb_any(tx_req->skb); 1115 dev_kfree_skb_any(tx_req->skb);
1116 ++p->tx_tail; 1116 ++p->tx_tail;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 52bc2bd5799a..9d3e778dc56d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -239,6 +239,54 @@ repost:
239 "for buf %d\n", wr_id); 239 "for buf %d\n", wr_id);
240} 240}
241 241
242static int ipoib_dma_map_tx(struct ib_device *ca,
243 struct ipoib_tx_buf *tx_req)
244{
245 struct sk_buff *skb = tx_req->skb;
246 u64 *mapping = tx_req->mapping;
247 int i;
248
249 mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb),
250 DMA_TO_DEVICE);
251 if (unlikely(ib_dma_mapping_error(ca, mapping[0])))
252 return -EIO;
253
254 for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
255 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
256 mapping[i + 1] = ib_dma_map_page(ca, frag->page,
257 frag->page_offset, frag->size,
258 DMA_TO_DEVICE);
259 if (unlikely(ib_dma_mapping_error(ca, mapping[i + 1])))
260 goto partial_error;
261 }
262 return 0;
263
264partial_error:
265 ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
266
267 for (; i > 0; --i) {
268 skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
269 ib_dma_unmap_page(ca, mapping[i], frag->size, DMA_TO_DEVICE);
270 }
271 return -EIO;
272}
273
274static void ipoib_dma_unmap_tx(struct ib_device *ca,
275 struct ipoib_tx_buf *tx_req)
276{
277 struct sk_buff *skb = tx_req->skb;
278 u64 *mapping = tx_req->mapping;
279 int i;
280
281 ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
282
283 for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
284 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
285 ib_dma_unmap_page(ca, mapping[i + 1], frag->size,
286 DMA_TO_DEVICE);
287 }
288}
289
242static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) 290static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
243{ 291{
244 struct ipoib_dev_priv *priv = netdev_priv(dev); 292 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -257,8 +305,7 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
257 305
258 tx_req = &priv->tx_ring[wr_id]; 306 tx_req = &priv->tx_ring[wr_id];
259 307
260 ib_dma_unmap_single(priv->ca, tx_req->mapping, 308 ipoib_dma_unmap_tx(priv->ca, tx_req);
261 tx_req->skb->len, DMA_TO_DEVICE);
262 309
263 ++dev->stats.tx_packets; 310 ++dev->stats.tx_packets;
264 dev->stats.tx_bytes += tx_req->skb->len; 311 dev->stats.tx_bytes += tx_req->skb->len;
@@ -341,16 +388,23 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
341static inline int post_send(struct ipoib_dev_priv *priv, 388static inline int post_send(struct ipoib_dev_priv *priv,
342 unsigned int wr_id, 389 unsigned int wr_id,
343 struct ib_ah *address, u32 qpn, 390 struct ib_ah *address, u32 qpn,
344 u64 addr, int len) 391 u64 *mapping, int headlen,
392 skb_frag_t *frags,
393 int nr_frags)
345{ 394{
346 struct ib_send_wr *bad_wr; 395 struct ib_send_wr *bad_wr;
396 int i;
347 397
348 priv->tx_sge.addr = addr; 398 priv->tx_sge[0].addr = mapping[0];
349 priv->tx_sge.length = len; 399 priv->tx_sge[0].length = headlen;
350 400 for (i = 0; i < nr_frags; ++i) {
351 priv->tx_wr.wr_id = wr_id; 401 priv->tx_sge[i + 1].addr = mapping[i + 1];
352 priv->tx_wr.wr.ud.remote_qpn = qpn; 402 priv->tx_sge[i + 1].length = frags[i].size;
353 priv->tx_wr.wr.ud.ah = address; 403 }
404 priv->tx_wr.num_sge = nr_frags + 1;
405 priv->tx_wr.wr_id = wr_id;
406 priv->tx_wr.wr.ud.remote_qpn = qpn;
407 priv->tx_wr.wr.ud.ah = address;
354 408
355 return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr); 409 return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
356} 410}
@@ -360,7 +414,6 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
360{ 414{
361 struct ipoib_dev_priv *priv = netdev_priv(dev); 415 struct ipoib_dev_priv *priv = netdev_priv(dev);
362 struct ipoib_tx_buf *tx_req; 416 struct ipoib_tx_buf *tx_req;
363 u64 addr;
364 417
365 if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) { 418 if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
366 ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", 419 ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
@@ -383,20 +436,19 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
383 */ 436 */
384 tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)]; 437 tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
385 tx_req->skb = skb; 438 tx_req->skb = skb;
386 addr = ib_dma_map_single(priv->ca, skb->data, skb->len, 439 if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
387 DMA_TO_DEVICE);
388 if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
389 ++dev->stats.tx_errors; 440 ++dev->stats.tx_errors;
390 dev_kfree_skb_any(skb); 441 dev_kfree_skb_any(skb);
391 return; 442 return;
392 } 443 }
393 tx_req->mapping = addr;
394 444
395 if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), 445 if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
396 address->ah, qpn, addr, skb->len))) { 446 address->ah, qpn,
447 tx_req->mapping, skb_headlen(skb),
448 skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags))) {
397 ipoib_warn(priv, "post_send failed\n"); 449 ipoib_warn(priv, "post_send failed\n");
398 ++dev->stats.tx_errors; 450 ++dev->stats.tx_errors;
399 ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE); 451 ipoib_dma_unmap_tx(priv->ca, tx_req);
400 dev_kfree_skb_any(skb); 452 dev_kfree_skb_any(skb);
401 } else { 453 } else {
402 dev->trans_start = jiffies; 454 dev->trans_start = jiffies;
@@ -615,10 +667,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
615 while ((int) priv->tx_tail - (int) priv->tx_head < 0) { 667 while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
616 tx_req = &priv->tx_ring[priv->tx_tail & 668 tx_req = &priv->tx_ring[priv->tx_tail &
617 (ipoib_sendq_size - 1)]; 669 (ipoib_sendq_size - 1)];
618 ib_dma_unmap_single(priv->ca, 670 ipoib_dma_unmap_tx(priv->ca, tx_req);
619 tx_req->mapping,
620 tx_req->skb->len,
621 DMA_TO_DEVICE);
622 dev_kfree_skb_any(tx_req->skb); 671 dev_kfree_skb_any(tx_req->skb);
623 ++priv->tx_tail; 672 ++priv->tx_tail;
624 --priv->tx_outstanding; 673 --priv->tx_outstanding;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 433e99ac227b..a3aeb911f024 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -157,6 +157,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
157 }; 157 };
158 158
159 int ret, size; 159 int ret, size;
160 int i;
160 161
161 priv->pd = ib_alloc_pd(priv->ca); 162 priv->pd = ib_alloc_pd(priv->ca);
162 if (IS_ERR(priv->pd)) { 163 if (IS_ERR(priv->pd)) {
@@ -191,6 +192,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
191 init_attr.send_cq = priv->cq; 192 init_attr.send_cq = priv->cq;
192 init_attr.recv_cq = priv->cq; 193 init_attr.recv_cq = priv->cq;
193 194
195 if (dev->features & NETIF_F_SG)
196 init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
197
194 priv->qp = ib_create_qp(priv->pd, &init_attr); 198 priv->qp = ib_create_qp(priv->pd, &init_attr);
195 if (IS_ERR(priv->qp)) { 199 if (IS_ERR(priv->qp)) {
196 printk(KERN_WARNING "%s: failed to create QP\n", ca->name); 200 printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
@@ -201,11 +205,11 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
201 priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff; 205 priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff;
202 priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff; 206 priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff;
203 207
204 priv->tx_sge.lkey = priv->mr->lkey; 208 for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
209 priv->tx_sge[i].lkey = priv->mr->lkey;
205 210
206 priv->tx_wr.opcode = IB_WR_SEND; 211 priv->tx_wr.opcode = IB_WR_SEND;
207 priv->tx_wr.sg_list = &priv->tx_sge; 212 priv->tx_wr.sg_list = priv->tx_sge;
208 priv->tx_wr.num_sge = 1;
209 priv->tx_wr.send_flags = IB_SEND_SIGNALED; 213 priv->tx_wr.send_flags = IB_SEND_SIGNALED;
210 214
211 return 0; 215 return 0;