aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/ulp/ipoib
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2008-07-18 05:39:39 -0400
committerDavid S. Miller <davem@davemloft.net>2008-07-18 05:39:39 -0400
commit49997d75152b3d23c53b0fa730599f2f74c92c65 (patch)
tree46e93126170d02cfec9505172e545732c1b69656 /drivers/infiniband/ulp/ipoib
parenta0c80b80e0fb48129e4e9d6a9ede914f9ff1850d (diff)
parent5b664cb235e97afbf34db9c4d77f08ebd725335e (diff)
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts: Documentation/powerpc/booting-without-of.txt drivers/atm/Makefile drivers/net/fs_enet/fs_enet-main.c drivers/pci/pci-acpi.c net/8021q/vlan.c net/iucv/iucv.c
Diffstat (limited to 'drivers/infiniband/ulp/ipoib')
-rw-r--r--drivers/infiniband/ulp/ipoib/Kconfig1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h48
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c104
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c46
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c52
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c115
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c27
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c69
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c2
10 files changed, 342 insertions, 124 deletions
diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig
index 1f76bad020f3..691525cf394a 100644
--- a/drivers/infiniband/ulp/ipoib/Kconfig
+++ b/drivers/infiniband/ulp/ipoib/Kconfig
@@ -1,6 +1,7 @@
1config INFINIBAND_IPOIB 1config INFINIBAND_IPOIB
2 tristate "IP-over-InfiniBand" 2 tristate "IP-over-InfiniBand"
3 depends on NETDEVICES && INET && (IPV6 || IPV6=n) 3 depends on NETDEVICES && INET && (IPV6 || IPV6=n)
4 select INET_LRO
4 ---help--- 5 ---help---
5 Support for the IP-over-InfiniBand protocol (IPoIB). This 6 Support for the IP-over-InfiniBand protocol (IPoIB). This
6 transports IP packets over InfiniBand so you can use your IB 7 transports IP packets over InfiniBand so you can use your IB
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index ca126fc2b853..b0ffc9abe8c0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id: ipoib.h 1358 2004-12-17 22:00:11Z roland $
35 */ 33 */
36 34
37#ifndef _IPOIB_H 35#ifndef _IPOIB_H
@@ -52,9 +50,16 @@
52#include <rdma/ib_verbs.h> 50#include <rdma/ib_verbs.h>
53#include <rdma/ib_pack.h> 51#include <rdma/ib_pack.h>
54#include <rdma/ib_sa.h> 52#include <rdma/ib_sa.h>
53#include <linux/inet_lro.h>
55 54
56/* constants */ 55/* constants */
57 56
57enum ipoib_flush_level {
58 IPOIB_FLUSH_LIGHT,
59 IPOIB_FLUSH_NORMAL,
60 IPOIB_FLUSH_HEAVY
61};
62
58enum { 63enum {
59 IPOIB_ENCAP_LEN = 4, 64 IPOIB_ENCAP_LEN = 4,
60 65
@@ -65,8 +70,8 @@ enum {
65 IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, 70 IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN,
66 IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, 71 IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE,
67 IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE, 72 IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE,
68 IPOIB_RX_RING_SIZE = 128, 73 IPOIB_RX_RING_SIZE = 256,
69 IPOIB_TX_RING_SIZE = 64, 74 IPOIB_TX_RING_SIZE = 128,
70 IPOIB_MAX_QUEUE_SIZE = 8192, 75 IPOIB_MAX_QUEUE_SIZE = 8192,
71 IPOIB_MIN_QUEUE_SIZE = 2, 76 IPOIB_MIN_QUEUE_SIZE = 2,
72 IPOIB_CM_MAX_CONN_QP = 4096, 77 IPOIB_CM_MAX_CONN_QP = 4096,
@@ -84,7 +89,6 @@ enum {
84 IPOIB_FLAG_SUBINTERFACE = 5, 89 IPOIB_FLAG_SUBINTERFACE = 5,
85 IPOIB_MCAST_RUN = 6, 90 IPOIB_MCAST_RUN = 6,
86 IPOIB_STOP_REAPER = 7, 91 IPOIB_STOP_REAPER = 7,
87 IPOIB_MCAST_STARTED = 8,
88 IPOIB_FLAG_ADMIN_CM = 9, 92 IPOIB_FLAG_ADMIN_CM = 9,
89 IPOIB_FLAG_UMCAST = 10, 93 IPOIB_FLAG_UMCAST = 10,
90 IPOIB_FLAG_CSUM = 11, 94 IPOIB_FLAG_CSUM = 11,
@@ -96,7 +100,11 @@ enum {
96 IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ 100 IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
97 IPOIB_MCAST_FLAG_ATTACHED = 3, 101 IPOIB_MCAST_FLAG_ATTACHED = 3,
98 102
103 IPOIB_MAX_LRO_DESCRIPTORS = 8,
104 IPOIB_LRO_MAX_AGGR = 64,
105
99 MAX_SEND_CQE = 16, 106 MAX_SEND_CQE = 16,
107 IPOIB_CM_COPYBREAK = 256,
100}; 108};
101 109
102#define IPOIB_OP_RECV (1ul << 31) 110#define IPOIB_OP_RECV (1ul << 31)
@@ -149,6 +157,11 @@ struct ipoib_tx_buf {
149 u64 mapping[MAX_SKB_FRAGS + 1]; 157 u64 mapping[MAX_SKB_FRAGS + 1];
150}; 158};
151 159
160struct ipoib_cm_tx_buf {
161 struct sk_buff *skb;
162 u64 mapping;
163};
164
152struct ib_cm_id; 165struct ib_cm_id;
153 166
154struct ipoib_cm_data { 167struct ipoib_cm_data {
@@ -207,7 +220,7 @@ struct ipoib_cm_tx {
207 struct net_device *dev; 220 struct net_device *dev;
208 struct ipoib_neigh *neigh; 221 struct ipoib_neigh *neigh;
209 struct ipoib_path *path; 222 struct ipoib_path *path;
210 struct ipoib_tx_buf *tx_ring; 223 struct ipoib_cm_tx_buf *tx_ring;
211 unsigned tx_head; 224 unsigned tx_head;
212 unsigned tx_tail; 225 unsigned tx_tail;
213 unsigned long flags; 226 unsigned long flags;
@@ -249,6 +262,11 @@ struct ipoib_ethtool_st {
249 u16 max_coalesced_frames; 262 u16 max_coalesced_frames;
250}; 263};
251 264
265struct ipoib_lro {
266 struct net_lro_mgr lro_mgr;
267 struct net_lro_desc lro_desc[IPOIB_MAX_LRO_DESCRIPTORS];
268};
269
252/* 270/*
253 * Device private locking: tx_lock protects members used in TX fast 271 * Device private locking: tx_lock protects members used in TX fast
254 * path (and we use LLTX so upper layers don't do extra locking). 272 * path (and we use LLTX so upper layers don't do extra locking).
@@ -264,7 +282,6 @@ struct ipoib_dev_priv {
264 282
265 unsigned long flags; 283 unsigned long flags;
266 284
267 struct mutex mcast_mutex;
268 struct mutex vlan_mutex; 285 struct mutex vlan_mutex;
269 286
270 struct rb_root path_tree; 287 struct rb_root path_tree;
@@ -276,10 +293,11 @@ struct ipoib_dev_priv {
276 293
277 struct delayed_work pkey_poll_task; 294 struct delayed_work pkey_poll_task;
278 struct delayed_work mcast_task; 295 struct delayed_work mcast_task;
279 struct work_struct flush_task; 296 struct work_struct flush_light;
297 struct work_struct flush_normal;
298 struct work_struct flush_heavy;
280 struct work_struct restart_task; 299 struct work_struct restart_task;
281 struct delayed_work ah_reap_task; 300 struct delayed_work ah_reap_task;
282 struct work_struct pkey_event_task;
283 301
284 struct ib_device *ca; 302 struct ib_device *ca;
285 u8 port; 303 u8 port;
@@ -335,6 +353,8 @@ struct ipoib_dev_priv {
335 int hca_caps; 353 int hca_caps;
336 struct ipoib_ethtool_st ethtool; 354 struct ipoib_ethtool_st ethtool;
337 struct timer_list poll_timer; 355 struct timer_list poll_timer;
356
357 struct ipoib_lro lro;
338}; 358};
339 359
340struct ipoib_ah { 360struct ipoib_ah {
@@ -359,6 +379,7 @@ struct ipoib_path {
359 379
360 struct rb_node rb_node; 380 struct rb_node rb_node;
361 struct list_head list; 381 struct list_head list;
382 int valid;
362}; 383};
363 384
364struct ipoib_neigh { 385struct ipoib_neigh {
@@ -423,11 +444,14 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
423 struct ipoib_ah *address, u32 qpn); 444 struct ipoib_ah *address, u32 qpn);
424void ipoib_reap_ah(struct work_struct *work); 445void ipoib_reap_ah(struct work_struct *work);
425 446
447void ipoib_mark_paths_invalid(struct net_device *dev);
426void ipoib_flush_paths(struct net_device *dev); 448void ipoib_flush_paths(struct net_device *dev);
427struct ipoib_dev_priv *ipoib_intf_alloc(const char *format); 449struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
428 450
429int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port); 451int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
430void ipoib_ib_dev_flush(struct work_struct *work); 452void ipoib_ib_dev_flush_light(struct work_struct *work);
453void ipoib_ib_dev_flush_normal(struct work_struct *work);
454void ipoib_ib_dev_flush_heavy(struct work_struct *work);
431void ipoib_pkey_event(struct work_struct *work); 455void ipoib_pkey_event(struct work_struct *work);
432void ipoib_ib_dev_cleanup(struct net_device *dev); 456void ipoib_ib_dev_cleanup(struct net_device *dev);
433 457
@@ -466,9 +490,7 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
466#endif 490#endif
467 491
468int ipoib_mcast_attach(struct net_device *dev, u16 mlid, 492int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
469 union ib_gid *mgid); 493 union ib_gid *mgid, int set_qkey);
470int ipoib_mcast_detach(struct net_device *dev, u16 mlid,
471 union ib_gid *mgid);
472 494
473int ipoib_init_qp(struct net_device *dev); 495int ipoib_init_qp(struct net_device *dev);
474int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); 496int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 97e67d36378f..0f2d3045061a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -28,8 +28,6 @@
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 *
32 * $Id$
33 */ 31 */
34 32
35#include <rdma/ib_cm.h> 33#include <rdma/ib_cm.h>
@@ -113,18 +111,20 @@ static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
113} 111}
114 112
115static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, 113static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
116 struct ipoib_cm_rx *rx, int id) 114 struct ipoib_cm_rx *rx,
115 struct ib_recv_wr *wr,
116 struct ib_sge *sge, int id)
117{ 117{
118 struct ipoib_dev_priv *priv = netdev_priv(dev); 118 struct ipoib_dev_priv *priv = netdev_priv(dev);
119 struct ib_recv_wr *bad_wr; 119 struct ib_recv_wr *bad_wr;
120 int i, ret; 120 int i, ret;
121 121
122 priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; 122 wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
123 123
124 for (i = 0; i < IPOIB_CM_RX_SG; ++i) 124 for (i = 0; i < IPOIB_CM_RX_SG; ++i)
125 priv->cm.rx_sge[i].addr = rx->rx_ring[id].mapping[i]; 125 sge[i].addr = rx->rx_ring[id].mapping[i];
126 126
127 ret = ib_post_recv(rx->qp, &priv->cm.rx_wr, &bad_wr); 127 ret = ib_post_recv(rx->qp, wr, &bad_wr);
128 if (unlikely(ret)) { 128 if (unlikely(ret)) {
129 ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret); 129 ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret);
130 ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, 130 ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
@@ -322,10 +322,33 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev,
322 return 0; 322 return 0;
323} 323}
324 324
325static void ipoib_cm_init_rx_wr(struct net_device *dev,
326 struct ib_recv_wr *wr,
327 struct ib_sge *sge)
328{
329 struct ipoib_dev_priv *priv = netdev_priv(dev);
330 int i;
331
332 for (i = 0; i < priv->cm.num_frags; ++i)
333 sge[i].lkey = priv->mr->lkey;
334
335 sge[0].length = IPOIB_CM_HEAD_SIZE;
336 for (i = 1; i < priv->cm.num_frags; ++i)
337 sge[i].length = PAGE_SIZE;
338
339 wr->next = NULL;
340 wr->sg_list = priv->cm.rx_sge;
341 wr->num_sge = priv->cm.num_frags;
342}
343
325static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id, 344static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
326 struct ipoib_cm_rx *rx) 345 struct ipoib_cm_rx *rx)
327{ 346{
328 struct ipoib_dev_priv *priv = netdev_priv(dev); 347 struct ipoib_dev_priv *priv = netdev_priv(dev);
348 struct {
349 struct ib_recv_wr wr;
350 struct ib_sge sge[IPOIB_CM_RX_SG];
351 } *t;
329 int ret; 352 int ret;
330 int i; 353 int i;
331 354
@@ -333,6 +356,14 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
333 if (!rx->rx_ring) 356 if (!rx->rx_ring)
334 return -ENOMEM; 357 return -ENOMEM;
335 358
359 t = kmalloc(sizeof *t, GFP_KERNEL);
360 if (!t) {
361 ret = -ENOMEM;
362 goto err_free;
363 }
364
365 ipoib_cm_init_rx_wr(dev, &t->wr, t->sge);
366
336 spin_lock_irq(&priv->lock); 367 spin_lock_irq(&priv->lock);
337 368
338 if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) { 369 if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) {
@@ -351,8 +382,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
351 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); 382 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
352 ret = -ENOMEM; 383 ret = -ENOMEM;
353 goto err_count; 384 goto err_count;
354 } 385 }
355 ret = ipoib_cm_post_receive_nonsrq(dev, rx, i); 386 ret = ipoib_cm_post_receive_nonsrq(dev, rx, &t->wr, t->sge, i);
356 if (ret) { 387 if (ret) {
357 ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq " 388 ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq "
358 "failed for buf %d\n", i); 389 "failed for buf %d\n", i);
@@ -363,6 +394,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
363 394
364 rx->recv_count = ipoib_recvq_size; 395 rx->recv_count = ipoib_recvq_size;
365 396
397 kfree(t);
398
366 return 0; 399 return 0;
367 400
368err_count: 401err_count:
@@ -371,6 +404,7 @@ err_count:
371 spin_unlock_irq(&priv->lock); 404 spin_unlock_irq(&priv->lock);
372 405
373err_free: 406err_free:
407 kfree(t);
374 ipoib_cm_free_rx_ring(dev, rx->rx_ring); 408 ipoib_cm_free_rx_ring(dev, rx->rx_ring);
375 409
376 return ret; 410 return ret;
@@ -525,6 +559,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
525 u64 mapping[IPOIB_CM_RX_SG]; 559 u64 mapping[IPOIB_CM_RX_SG];
526 int frags; 560 int frags;
527 int has_srq; 561 int has_srq;
562 struct sk_buff *small_skb;
528 563
529 ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n", 564 ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
530 wr_id, wc->status); 565 wr_id, wc->status);
@@ -579,6 +614,23 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
579 } 614 }
580 } 615 }
581 616
617 if (wc->byte_len < IPOIB_CM_COPYBREAK) {
618 int dlen = wc->byte_len;
619
620 small_skb = dev_alloc_skb(dlen + 12);
621 if (small_skb) {
622 skb_reserve(small_skb, 12);
623 ib_dma_sync_single_for_cpu(priv->ca, rx_ring[wr_id].mapping[0],
624 dlen, DMA_FROM_DEVICE);
625 skb_copy_from_linear_data(skb, small_skb->data, dlen);
626 ib_dma_sync_single_for_device(priv->ca, rx_ring[wr_id].mapping[0],
627 dlen, DMA_FROM_DEVICE);
628 skb_put(small_skb, dlen);
629 skb = small_skb;
630 goto copied;
631 }
632 }
633
582 frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, 634 frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
583 (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; 635 (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
584 636
@@ -601,6 +653,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
601 653
602 skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb); 654 skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
603 655
656copied:
604 skb->protocol = ((struct ipoib_header *) skb->data)->proto; 657 skb->protocol = ((struct ipoib_header *) skb->data)->proto;
605 skb_reset_mac_header(skb); 658 skb_reset_mac_header(skb);
606 skb_pull(skb, IPOIB_ENCAP_LEN); 659 skb_pull(skb, IPOIB_ENCAP_LEN);
@@ -620,7 +673,10 @@ repost:
620 ipoib_warn(priv, "ipoib_cm_post_receive_srq failed " 673 ipoib_warn(priv, "ipoib_cm_post_receive_srq failed "
621 "for buf %d\n", wr_id); 674 "for buf %d\n", wr_id);
622 } else { 675 } else {
623 if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, wr_id))) { 676 if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p,
677 &priv->cm.rx_wr,
678 priv->cm.rx_sge,
679 wr_id))) {
624 --p->recv_count; 680 --p->recv_count;
625 ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed " 681 ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed "
626 "for buf %d\n", wr_id); 682 "for buf %d\n", wr_id);
@@ -647,7 +703,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
647void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) 703void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
648{ 704{
649 struct ipoib_dev_priv *priv = netdev_priv(dev); 705 struct ipoib_dev_priv *priv = netdev_priv(dev);
650 struct ipoib_tx_buf *tx_req; 706 struct ipoib_cm_tx_buf *tx_req;
651 u64 addr; 707 u64 addr;
652 708
653 if (unlikely(skb->len > tx->mtu)) { 709 if (unlikely(skb->len > tx->mtu)) {
@@ -678,7 +734,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
678 return; 734 return;
679 } 735 }
680 736
681 tx_req->mapping[0] = addr; 737 tx_req->mapping = addr;
682 738
683 if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), 739 if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1),
684 addr, skb->len))) { 740 addr, skb->len))) {
@@ -703,7 +759,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
703 struct ipoib_dev_priv *priv = netdev_priv(dev); 759 struct ipoib_dev_priv *priv = netdev_priv(dev);
704 struct ipoib_cm_tx *tx = wc->qp->qp_context; 760 struct ipoib_cm_tx *tx = wc->qp->qp_context;
705 unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM; 761 unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
706 struct ipoib_tx_buf *tx_req; 762 struct ipoib_cm_tx_buf *tx_req;
707 unsigned long flags; 763 unsigned long flags;
708 764
709 ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n", 765 ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n",
@@ -717,7 +773,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
717 773
718 tx_req = &tx->tx_ring[wr_id]; 774 tx_req = &tx->tx_ring[wr_id];
719 775
720 ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, DMA_TO_DEVICE); 776 ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE);
721 777
722 /* FIXME: is this right? Shouldn't we only increment on success? */ 778 /* FIXME: is this right? Shouldn't we only increment on success? */
723 ++dev->stats.tx_packets; 779 ++dev->stats.tx_packets;
@@ -1087,7 +1143,7 @@ err_tx:
1087static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) 1143static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
1088{ 1144{
1089 struct ipoib_dev_priv *priv = netdev_priv(p->dev); 1145 struct ipoib_dev_priv *priv = netdev_priv(p->dev);
1090 struct ipoib_tx_buf *tx_req; 1146 struct ipoib_cm_tx_buf *tx_req;
1091 unsigned long flags; 1147 unsigned long flags;
1092 unsigned long begin; 1148 unsigned long begin;
1093 1149
@@ -1115,7 +1171,7 @@ timeout:
1115 1171
1116 while ((int) p->tx_tail - (int) p->tx_head < 0) { 1172 while ((int) p->tx_tail - (int) p->tx_head < 0) {
1117 tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; 1173 tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
1118 ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, 1174 ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
1119 DMA_TO_DEVICE); 1175 DMA_TO_DEVICE);
1120 dev_kfree_skb_any(tx_req->skb); 1176 dev_kfree_skb_any(tx_req->skb);
1121 ++p->tx_tail; 1177 ++p->tx_tail;
@@ -1384,7 +1440,9 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
1384 ipoib_warn(priv, "enabling connected mode " 1440 ipoib_warn(priv, "enabling connected mode "
1385 "will cause multicast packet drops\n"); 1441 "will cause multicast packet drops\n");
1386 1442
1443 rtnl_lock();
1387 dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO); 1444 dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO);
1445 rtnl_unlock();
1388 priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; 1446 priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
1389 1447
1390 ipoib_flush_paths(dev); 1448 ipoib_flush_paths(dev);
@@ -1393,14 +1451,16 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
1393 1451
1394 if (!strcmp(buf, "datagram\n")) { 1452 if (!strcmp(buf, "datagram\n")) {
1395 clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); 1453 clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
1396 dev->mtu = min(priv->mcast_mtu, dev->mtu);
1397 ipoib_flush_paths(dev);
1398 1454
1455 rtnl_lock();
1399 if (test_bit(IPOIB_FLAG_CSUM, &priv->flags)) { 1456 if (test_bit(IPOIB_FLAG_CSUM, &priv->flags)) {
1400 dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; 1457 dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
1401 if (priv->hca_caps & IB_DEVICE_UD_TSO) 1458 if (priv->hca_caps & IB_DEVICE_UD_TSO)
1402 dev->features |= NETIF_F_TSO; 1459 dev->features |= NETIF_F_TSO;
1403 } 1460 }
1461 dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
1462 rtnl_unlock();
1463 ipoib_flush_paths(dev);
1404 1464
1405 return count; 1465 return count;
1406 } 1466 }
@@ -1485,15 +1545,7 @@ int ipoib_cm_dev_init(struct net_device *dev)
1485 priv->cm.num_frags = IPOIB_CM_RX_SG; 1545 priv->cm.num_frags = IPOIB_CM_RX_SG;
1486 } 1546 }
1487 1547
1488 for (i = 0; i < priv->cm.num_frags; ++i) 1548 ipoib_cm_init_rx_wr(dev, &priv->cm.rx_wr, priv->cm.rx_sge);
1489 priv->cm.rx_sge[i].lkey = priv->mr->lkey;
1490
1491 priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
1492 for (i = 1; i < priv->cm.num_frags; ++i)
1493 priv->cm.rx_sge[i].length = PAGE_SIZE;
1494 priv->cm.rx_wr.next = NULL;
1495 priv->cm.rx_wr.sg_list = priv->cm.rx_sge;
1496 priv->cm.rx_wr.num_sge = priv->cm.num_frags;
1497 1549
1498 if (ipoib_cm_has_srq(dev)) { 1550 if (ipoib_cm_has_srq(dev)) {
1499 for (i = 0; i < ipoib_recvq_size; ++i) { 1551 for (i = 0; i < ipoib_recvq_size; ++i) {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 10279b79c44d..66af5c1a76e5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -86,11 +86,57 @@ static int ipoib_set_coalesce(struct net_device *dev,
86 return 0; 86 return 0;
87} 87}
88 88
89static const char ipoib_stats_keys[][ETH_GSTRING_LEN] = {
90 "LRO aggregated", "LRO flushed",
91 "LRO avg aggr", "LRO no desc"
92};
93
94static void ipoib_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
95{
96 switch (stringset) {
97 case ETH_SS_STATS:
98 memcpy(data, *ipoib_stats_keys, sizeof(ipoib_stats_keys));
99 break;
100 }
101}
102
103static int ipoib_get_sset_count(struct net_device *dev, int sset)
104{
105 switch (sset) {
106 case ETH_SS_STATS:
107 return ARRAY_SIZE(ipoib_stats_keys);
108 default:
109 return -EOPNOTSUPP;
110 }
111}
112
113static void ipoib_get_ethtool_stats(struct net_device *dev,
114 struct ethtool_stats *stats, uint64_t *data)
115{
116 struct ipoib_dev_priv *priv = netdev_priv(dev);
117 int index = 0;
118
119 /* Get LRO statistics */
120 data[index++] = priv->lro.lro_mgr.stats.aggregated;
121 data[index++] = priv->lro.lro_mgr.stats.flushed;
122 if (priv->lro.lro_mgr.stats.flushed)
123 data[index++] = priv->lro.lro_mgr.stats.aggregated /
124 priv->lro.lro_mgr.stats.flushed;
125 else
126 data[index++] = 0;
127 data[index++] = priv->lro.lro_mgr.stats.no_desc;
128}
129
89static const struct ethtool_ops ipoib_ethtool_ops = { 130static const struct ethtool_ops ipoib_ethtool_ops = {
90 .get_drvinfo = ipoib_get_drvinfo, 131 .get_drvinfo = ipoib_get_drvinfo,
91 .get_tso = ethtool_op_get_tso, 132 .get_tso = ethtool_op_get_tso,
92 .get_coalesce = ipoib_get_coalesce, 133 .get_coalesce = ipoib_get_coalesce,
93 .set_coalesce = ipoib_set_coalesce, 134 .set_coalesce = ipoib_set_coalesce,
135 .get_flags = ethtool_op_get_flags,
136 .set_flags = ethtool_op_set_flags,
137 .get_strings = ipoib_get_strings,
138 .get_sset_count = ipoib_get_sset_count,
139 .get_ethtool_stats = ipoib_get_ethtool_stats,
94}; 140};
95 141
96void ipoib_set_ethtool_ops(struct net_device *dev) 142void ipoib_set_ethtool_ops(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 8b882bbd1d05..961c585da216 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -28,8 +28,6 @@
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 *
32 * $Id: ipoib_fs.c 1389 2004-12-27 22:56:47Z roland $
33 */ 31 */
34 32
35#include <linux/err.h> 33#include <linux/err.h>
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index f429bce24c20..66cafa20c246 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -31,8 +31,6 @@
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE. 33 * SOFTWARE.
34 *
35 * $Id: ipoib_ib.c 1386 2004-12-27 16:23:17Z roland $
36 */ 34 */
37 35
38#include <linux/delay.h> 36#include <linux/delay.h>
@@ -290,7 +288,10 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
290 if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok)) 288 if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok))
291 skb->ip_summed = CHECKSUM_UNNECESSARY; 289 skb->ip_summed = CHECKSUM_UNNECESSARY;
292 290
293 netif_receive_skb(skb); 291 if (dev->features & NETIF_F_LRO)
292 lro_receive_skb(&priv->lro.lro_mgr, skb, NULL);
293 else
294 netif_receive_skb(skb);
294 295
295repost: 296repost:
296 if (unlikely(ipoib_ib_post_receive(dev, wr_id))) 297 if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
@@ -442,6 +443,9 @@ poll_more:
442 } 443 }
443 444
444 if (done < budget) { 445 if (done < budget) {
446 if (dev->features & NETIF_F_LRO)
447 lro_flush_all(&priv->lro.lro_mgr);
448
445 netif_rx_complete(dev, napi); 449 netif_rx_complete(dev, napi);
446 if (unlikely(ib_req_notify_cq(priv->recv_cq, 450 if (unlikely(ib_req_notify_cq(priv->recv_cq,
447 IB_CQ_NEXT_COMP | 451 IB_CQ_NEXT_COMP |
@@ -898,7 +902,8 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
898 return 0; 902 return 0;
899} 903}
900 904
901static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) 905static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
906 enum ipoib_flush_level level)
902{ 907{
903 struct ipoib_dev_priv *cpriv; 908 struct ipoib_dev_priv *cpriv;
904 struct net_device *dev = priv->dev; 909 struct net_device *dev = priv->dev;
@@ -911,7 +916,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
911 * the parent is down. 916 * the parent is down.
912 */ 917 */
913 list_for_each_entry(cpriv, &priv->child_intfs, list) 918 list_for_each_entry(cpriv, &priv->child_intfs, list)
914 __ipoib_ib_dev_flush(cpriv, pkey_event); 919 __ipoib_ib_dev_flush(cpriv, level);
915 920
916 mutex_unlock(&priv->vlan_mutex); 921 mutex_unlock(&priv->vlan_mutex);
917 922
@@ -925,7 +930,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
925 return; 930 return;
926 } 931 }
927 932
928 if (pkey_event) { 933 if (level == IPOIB_FLUSH_HEAVY) {
929 if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) { 934 if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
930 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 935 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
931 ipoib_ib_dev_down(dev, 0); 936 ipoib_ib_dev_down(dev, 0);
@@ -943,11 +948,15 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
943 priv->pkey_index = new_index; 948 priv->pkey_index = new_index;
944 } 949 }
945 950
946 ipoib_dbg(priv, "flushing\n"); 951 if (level == IPOIB_FLUSH_LIGHT) {
952 ipoib_mark_paths_invalid(dev);
953 ipoib_mcast_dev_flush(dev);
954 }
947 955
948 ipoib_ib_dev_down(dev, 0); 956 if (level >= IPOIB_FLUSH_NORMAL)
957 ipoib_ib_dev_down(dev, 0);
949 958
950 if (pkey_event) { 959 if (level == IPOIB_FLUSH_HEAVY) {
951 ipoib_ib_dev_stop(dev, 0); 960 ipoib_ib_dev_stop(dev, 0);
952 ipoib_ib_dev_open(dev); 961 ipoib_ib_dev_open(dev);
953 } 962 }
@@ -957,27 +966,34 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
957 * we get here, don't bring it back up if it's not configured up 966 * we get here, don't bring it back up if it's not configured up
958 */ 967 */
959 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) { 968 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
960 ipoib_ib_dev_up(dev); 969 if (level >= IPOIB_FLUSH_NORMAL)
970 ipoib_ib_dev_up(dev);
961 ipoib_mcast_restart_task(&priv->restart_task); 971 ipoib_mcast_restart_task(&priv->restart_task);
962 } 972 }
963} 973}
964 974
965void ipoib_ib_dev_flush(struct work_struct *work) 975void ipoib_ib_dev_flush_light(struct work_struct *work)
976{
977 struct ipoib_dev_priv *priv =
978 container_of(work, struct ipoib_dev_priv, flush_light);
979
980 __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT);
981}
982
983void ipoib_ib_dev_flush_normal(struct work_struct *work)
966{ 984{
967 struct ipoib_dev_priv *priv = 985 struct ipoib_dev_priv *priv =
968 container_of(work, struct ipoib_dev_priv, flush_task); 986 container_of(work, struct ipoib_dev_priv, flush_normal);
969 987
970 ipoib_dbg(priv, "Flushing %s\n", priv->dev->name); 988 __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL);
971 __ipoib_ib_dev_flush(priv, 0);
972} 989}
973 990
974void ipoib_pkey_event(struct work_struct *work) 991void ipoib_ib_dev_flush_heavy(struct work_struct *work)
975{ 992{
976 struct ipoib_dev_priv *priv = 993 struct ipoib_dev_priv *priv =
977 container_of(work, struct ipoib_dev_priv, pkey_event_task); 994 container_of(work, struct ipoib_dev_priv, flush_heavy);
978 995
979 ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name); 996 __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY);
980 __ipoib_ib_dev_flush(priv, 1);
981} 997}
982 998
983void ipoib_ib_dev_cleanup(struct net_device *dev) 999void ipoib_ib_dev_cleanup(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 2442090ac8d1..8be9ea0436e6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id: ipoib_main.c 1377 2004-12-23 19:57:12Z roland $
35 */ 33 */
36 34
37#include "ipoib.h" 35#include "ipoib.h"
@@ -62,6 +60,15 @@ MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
62module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); 60module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
63MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); 61MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
64 62
63static int lro;
64module_param(lro, bool, 0444);
65MODULE_PARM_DESC(lro, "Enable LRO (Large Receive Offload)");
66
67static int lro_max_aggr = IPOIB_LRO_MAX_AGGR;
68module_param(lro_max_aggr, int, 0644);
69MODULE_PARM_DESC(lro_max_aggr, "LRO: Max packets to be aggregated "
70 "(default = 64)");
71
65#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 72#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
66int ipoib_debug_level; 73int ipoib_debug_level;
67 74
@@ -350,6 +357,23 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
350 357
351#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ 358#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
352 359
360void ipoib_mark_paths_invalid(struct net_device *dev)
361{
362 struct ipoib_dev_priv *priv = netdev_priv(dev);
363 struct ipoib_path *path, *tp;
364
365 spin_lock_irq(&priv->lock);
366
367 list_for_each_entry_safe(path, tp, &priv->path_list, list) {
368 ipoib_dbg(priv, "mark path LID 0x%04x GID " IPOIB_GID_FMT " invalid\n",
369 be16_to_cpu(path->pathrec.dlid),
370 IPOIB_GID_ARG(path->pathrec.dgid));
371 path->valid = 0;
372 }
373
374 spin_unlock_irq(&priv->lock);
375}
376
353void ipoib_flush_paths(struct net_device *dev) 377void ipoib_flush_paths(struct net_device *dev)
354{ 378{
355 struct ipoib_dev_priv *priv = netdev_priv(dev); 379 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -386,6 +410,7 @@ static void path_rec_completion(int status,
386 struct net_device *dev = path->dev; 410 struct net_device *dev = path->dev;
387 struct ipoib_dev_priv *priv = netdev_priv(dev); 411 struct ipoib_dev_priv *priv = netdev_priv(dev);
388 struct ipoib_ah *ah = NULL; 412 struct ipoib_ah *ah = NULL;
413 struct ipoib_ah *old_ah;
389 struct ipoib_neigh *neigh, *tn; 414 struct ipoib_neigh *neigh, *tn;
390 struct sk_buff_head skqueue; 415 struct sk_buff_head skqueue;
391 struct sk_buff *skb; 416 struct sk_buff *skb;
@@ -409,6 +434,7 @@ static void path_rec_completion(int status,
409 434
410 spin_lock_irqsave(&priv->lock, flags); 435 spin_lock_irqsave(&priv->lock, flags);
411 436
437 old_ah = path->ah;
412 path->ah = ah; 438 path->ah = ah;
413 439
414 if (ah) { 440 if (ah) {
@@ -421,6 +447,17 @@ static void path_rec_completion(int status,
421 __skb_queue_tail(&skqueue, skb); 447 __skb_queue_tail(&skqueue, skb);
422 448
423 list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) { 449 list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) {
450 if (neigh->ah) {
451 WARN_ON(neigh->ah != old_ah);
452 /*
453 * Dropping the ah reference inside
454 * priv->lock is safe here, because we
455 * will hold one more reference from
456 * the original value of path->ah (ie
457 * old_ah).
458 */
459 ipoib_put_ah(neigh->ah);
460 }
424 kref_get(&path->ah->ref); 461 kref_get(&path->ah->ref);
425 neigh->ah = path->ah; 462 neigh->ah = path->ah;
426 memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, 463 memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
@@ -443,6 +480,7 @@ static void path_rec_completion(int status,
443 while ((skb = __skb_dequeue(&neigh->queue))) 480 while ((skb = __skb_dequeue(&neigh->queue)))
444 __skb_queue_tail(&skqueue, skb); 481 __skb_queue_tail(&skqueue, skb);
445 } 482 }
483 path->valid = 1;
446 } 484 }
447 485
448 path->query = NULL; 486 path->query = NULL;
@@ -450,6 +488,9 @@ static void path_rec_completion(int status,
450 488
451 spin_unlock_irqrestore(&priv->lock, flags); 489 spin_unlock_irqrestore(&priv->lock, flags);
452 490
491 if (old_ah)
492 ipoib_put_ah(old_ah);
493
453 while ((skb = __skb_dequeue(&skqueue))) { 494 while ((skb = __skb_dequeue(&skqueue))) {
454 skb->dev = dev; 495 skb->dev = dev;
455 if (dev_queue_xmit(skb)) 496 if (dev_queue_xmit(skb))
@@ -623,8 +664,9 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
623 spin_lock(&priv->lock); 664 spin_lock(&priv->lock);
624 665
625 path = __path_find(dev, phdr->hwaddr + 4); 666 path = __path_find(dev, phdr->hwaddr + 4);
626 if (!path) { 667 if (!path || !path->valid) {
627 path = path_rec_create(dev, phdr->hwaddr + 4); 668 if (!path)
669 path = path_rec_create(dev, phdr->hwaddr + 4);
628 if (path) { 670 if (path) {
629 /* put pseudoheader back on for next time */ 671 /* put pseudoheader back on for next time */
630 skb_push(skb, sizeof *phdr); 672 skb_push(skb, sizeof *phdr);
@@ -938,6 +980,54 @@ static const struct header_ops ipoib_header_ops = {
938 .create = ipoib_hard_header, 980 .create = ipoib_hard_header,
939}; 981};
940 982
983static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
984 void **tcph, u64 *hdr_flags, void *priv)
985{
986 unsigned int ip_len;
987 struct iphdr *iph;
988
989 if (unlikely(skb->protocol != htons(ETH_P_IP)))
990 return -1;
991
992 /*
993 * In the future we may add an else clause that verifies the
994 * checksum and allows devices which do not calculate checksum
995 * to use LRO.
996 */
997 if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY))
998 return -1;
999
1000 /* Check for non-TCP packet */
1001 skb_reset_network_header(skb);
1002 iph = ip_hdr(skb);
1003 if (iph->protocol != IPPROTO_TCP)
1004 return -1;
1005
1006 ip_len = ip_hdrlen(skb);
1007 skb_set_transport_header(skb, ip_len);
1008 *tcph = tcp_hdr(skb);
1009
1010 /* check if IP header and TCP header are complete */
1011 if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb))
1012 return -1;
1013
1014 *hdr_flags = LRO_IPV4 | LRO_TCP;
1015 *iphdr = iph;
1016
1017 return 0;
1018}
1019
1020static void ipoib_lro_setup(struct ipoib_dev_priv *priv)
1021{
1022 priv->lro.lro_mgr.max_aggr = lro_max_aggr;
1023 priv->lro.lro_mgr.max_desc = IPOIB_MAX_LRO_DESCRIPTORS;
1024 priv->lro.lro_mgr.lro_arr = priv->lro.lro_desc;
1025 priv->lro.lro_mgr.get_skb_header = get_skb_hdr;
1026 priv->lro.lro_mgr.features = LRO_F_NAPI;
1027 priv->lro.lro_mgr.dev = priv->dev;
1028 priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
1029}
1030
941static void ipoib_setup(struct net_device *dev) 1031static void ipoib_setup(struct net_device *dev)
942{ 1032{
943 struct ipoib_dev_priv *priv = netdev_priv(dev); 1033 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -977,10 +1067,11 @@ static void ipoib_setup(struct net_device *dev)
977 1067
978 priv->dev = dev; 1068 priv->dev = dev;
979 1069
1070 ipoib_lro_setup(priv);
1071
980 spin_lock_init(&priv->lock); 1072 spin_lock_init(&priv->lock);
981 spin_lock_init(&priv->tx_lock); 1073 spin_lock_init(&priv->tx_lock);
982 1074
983 mutex_init(&priv->mcast_mutex);
984 mutex_init(&priv->vlan_mutex); 1075 mutex_init(&priv->vlan_mutex);
985 1076
986 INIT_LIST_HEAD(&priv->path_list); 1077 INIT_LIST_HEAD(&priv->path_list);
@@ -989,9 +1080,10 @@ static void ipoib_setup(struct net_device *dev)
989 INIT_LIST_HEAD(&priv->multicast_list); 1080 INIT_LIST_HEAD(&priv->multicast_list);
990 1081
991 INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); 1082 INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
992 INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event);
993 INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); 1083 INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
994 INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush); 1084 INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light);
1085 INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal);
1086 INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy);
995 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); 1087 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
996 INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); 1088 INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
997} 1089}
@@ -1154,6 +1246,9 @@ static struct net_device *ipoib_add_port(const char *format,
1154 priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; 1246 priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
1155 } 1247 }
1156 1248
1249 if (lro)
1250 priv->dev->features |= NETIF_F_LRO;
1251
1157 /* 1252 /*
1158 * Set the full membership bit, so that we join the right 1253 * Set the full membership bit, so that we join the right
1159 * broadcast group, etc. 1254 * broadcast group, etc.
@@ -1304,6 +1399,12 @@ static int __init ipoib_init_module(void)
1304 ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); 1399 ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
1305#endif 1400#endif
1306 1401
1402 /*
1403 * When copying small received packets, we only copy from the
1404 * linear data part of the SKB, so we rely on this condition.
1405 */
1406 BUILD_BUG_ON(IPOIB_CM_COPYBREAK > IPOIB_CM_HEAD_SIZE);
1407
1307 ret = ipoib_register_debugfs(); 1408 ret = ipoib_register_debugfs();
1308 if (ret) 1409 if (ret)
1309 return ret; 1410 return ret;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index cd2fb955040f..8950e9546f4e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id: ipoib_multicast.c 1362 2004-12-18 15:56:29Z roland $
35 */ 33 */
36 34
37#include <linux/skbuff.h> 35#include <linux/skbuff.h>
@@ -188,6 +186,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
188 struct ipoib_dev_priv *priv = netdev_priv(dev); 186 struct ipoib_dev_priv *priv = netdev_priv(dev);
189 struct ipoib_ah *ah; 187 struct ipoib_ah *ah;
190 int ret; 188 int ret;
189 int set_qkey = 0;
191 190
192 mcast->mcmember = *mcmember; 191 mcast->mcmember = *mcmember;
193 192
@@ -202,6 +201,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
202 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); 201 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
203 spin_unlock_irq(&priv->lock); 202 spin_unlock_irq(&priv->lock);
204 priv->tx_wr.wr.ud.remote_qkey = priv->qkey; 203 priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
204 set_qkey = 1;
205 } 205 }
206 206
207 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 207 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -214,7 +214,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
214 } 214 }
215 215
216 ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid), 216 ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
217 &mcast->mcmember.mgid); 217 &mcast->mcmember.mgid, set_qkey);
218 if (ret < 0) { 218 if (ret < 0) {
219 ipoib_warn(priv, "couldn't attach QP to multicast group " 219 ipoib_warn(priv, "couldn't attach QP to multicast group "
220 IPOIB_GID_FMT "\n", 220 IPOIB_GID_FMT "\n",
@@ -575,8 +575,11 @@ void ipoib_mcast_join_task(struct work_struct *work)
575 575
576 priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); 576 priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
577 577
578 if (!ipoib_cm_admin_enabled(dev)) 578 if (!ipoib_cm_admin_enabled(dev)) {
579 dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); 579 rtnl_lock();
580 dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
581 rtnl_unlock();
582 }
580 583
581 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); 584 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
582 585
@@ -594,10 +597,6 @@ int ipoib_mcast_start_thread(struct net_device *dev)
594 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0); 597 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
595 mutex_unlock(&mcast_mutex); 598 mutex_unlock(&mcast_mutex);
596 599
597 spin_lock_irq(&priv->lock);
598 set_bit(IPOIB_MCAST_STARTED, &priv->flags);
599 spin_unlock_irq(&priv->lock);
600
601 return 0; 600 return 0;
602} 601}
603 602
@@ -607,10 +606,6 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
607 606
608 ipoib_dbg_mcast(priv, "stopping multicast thread\n"); 607 ipoib_dbg_mcast(priv, "stopping multicast thread\n");
609 608
610 spin_lock_irq(&priv->lock);
611 clear_bit(IPOIB_MCAST_STARTED, &priv->flags);
612 spin_unlock_irq(&priv->lock);
613
614 mutex_lock(&mcast_mutex); 609 mutex_lock(&mcast_mutex);
615 clear_bit(IPOIB_MCAST_RUN, &priv->flags); 610 clear_bit(IPOIB_MCAST_RUN, &priv->flags);
616 cancel_delayed_work(&priv->mcast_task); 611 cancel_delayed_work(&priv->mcast_task);
@@ -635,10 +630,10 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
635 IPOIB_GID_ARG(mcast->mcmember.mgid)); 630 IPOIB_GID_ARG(mcast->mcmember.mgid));
636 631
637 /* Remove ourselves from the multicast group */ 632 /* Remove ourselves from the multicast group */
638 ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), 633 ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid,
639 &mcast->mcmember.mgid); 634 be16_to_cpu(mcast->mcmember.mlid));
640 if (ret) 635 if (ret)
641 ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret); 636 ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
642 } 637 }
643 638
644 return 0; 639 return 0;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 8766d29ce3b7..68325119f740 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -29,24 +29,17 @@
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 *
33 * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $
34 */ 32 */
35 33
36#include "ipoib.h" 34#include "ipoib.h"
37 35
38int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) 36int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey)
39{ 37{
40 struct ipoib_dev_priv *priv = netdev_priv(dev); 38 struct ipoib_dev_priv *priv = netdev_priv(dev);
41 struct ib_qp_attr *qp_attr; 39 struct ib_qp_attr *qp_attr = NULL;
42 int ret; 40 int ret;
43 u16 pkey_index; 41 u16 pkey_index;
44 42
45 ret = -ENOMEM;
46 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
47 if (!qp_attr)
48 goto out;
49
50 if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) { 43 if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
51 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 44 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
52 ret = -ENXIO; 45 ret = -ENXIO;
@@ -54,18 +47,23 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
54 } 47 }
55 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 48 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
56 49
57 /* set correct QKey for QP */ 50 if (set_qkey) {
58 qp_attr->qkey = priv->qkey; 51 ret = -ENOMEM;
59 ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY); 52 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
60 if (ret) { 53 if (!qp_attr)
61 ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret); 54 goto out;
62 goto out; 55
56 /* set correct QKey for QP */
57 qp_attr->qkey = priv->qkey;
58 ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY);
59 if (ret) {
60 ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret);
61 goto out;
62 }
63 } 63 }
64 64
65 /* attach QP to multicast group */ 65 /* attach QP to multicast group */
66 mutex_lock(&priv->mcast_mutex);
67 ret = ib_attach_mcast(priv->qp, mgid, mlid); 66 ret = ib_attach_mcast(priv->qp, mgid, mlid);
68 mutex_unlock(&priv->mcast_mutex);
69 if (ret) 67 if (ret)
70 ipoib_warn(priv, "failed to attach to multicast group, ret = %d\n", ret); 68 ipoib_warn(priv, "failed to attach to multicast group, ret = %d\n", ret);
71 69
@@ -74,20 +72,6 @@ out:
74 return ret; 72 return ret;
75} 73}
76 74
77int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
78{
79 struct ipoib_dev_priv *priv = netdev_priv(dev);
80 int ret;
81
82 mutex_lock(&priv->mcast_mutex);
83 ret = ib_detach_mcast(priv->qp, mgid, mlid);
84 mutex_unlock(&priv->mcast_mutex);
85 if (ret)
86 ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
87
88 return ret;
89}
90
91int ipoib_init_qp(struct net_device *dev) 75int ipoib_init_qp(struct net_device *dev)
92{ 76{
93 struct ipoib_dev_priv *priv = netdev_priv(dev); 77 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -201,7 +185,10 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
201 init_attr.recv_cq = priv->recv_cq; 185 init_attr.recv_cq = priv->recv_cq;
202 186
203 if (priv->hca_caps & IB_DEVICE_UD_TSO) 187 if (priv->hca_caps & IB_DEVICE_UD_TSO)
204 init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO; 188 init_attr.create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
189
190 if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK)
191 init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
205 192
206 if (dev->features & NETIF_F_SG) 193 if (dev->features & NETIF_F_SG)
207 init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; 194 init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
@@ -289,15 +276,17 @@ void ipoib_event(struct ib_event_handler *handler,
289 if (record->element.port_num != priv->port) 276 if (record->element.port_num != priv->port)
290 return; 277 return;
291 278
292 if (record->event == IB_EVENT_PORT_ERR || 279 ipoib_dbg(priv, "Event %d on device %s port %d\n", record->event,
293 record->event == IB_EVENT_PORT_ACTIVE || 280 record->device->name, record->element.port_num);
294 record->event == IB_EVENT_LID_CHANGE || 281
295 record->event == IB_EVENT_SM_CHANGE || 282 if (record->event == IB_EVENT_SM_CHANGE ||
296 record->event == IB_EVENT_CLIENT_REREGISTER) { 283 record->event == IB_EVENT_CLIENT_REREGISTER) {
297 ipoib_dbg(priv, "Port state change event\n"); 284 queue_work(ipoib_workqueue, &priv->flush_light);
298 queue_work(ipoib_workqueue, &priv->flush_task); 285 } else if (record->event == IB_EVENT_PORT_ERR ||
286 record->event == IB_EVENT_PORT_ACTIVE ||
287 record->event == IB_EVENT_LID_CHANGE) {
288 queue_work(ipoib_workqueue, &priv->flush_normal);
299 } else if (record->event == IB_EVENT_PKEY_CHANGE) { 289 } else if (record->event == IB_EVENT_PKEY_CHANGE) {
300 ipoib_dbg(priv, "P_Key change event on port:%d\n", priv->port); 290 queue_work(ipoib_workqueue, &priv->flush_heavy);
301 queue_work(ipoib_workqueue, &priv->pkey_event_task);
302 } 291 }
303} 292}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 1cdb5cfb0ff1..b08eb56196d3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -28,8 +28,6 @@
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 *
32 * $Id: ipoib_vlan.c 1349 2004-12-16 21:09:43Z roland $
33 */ 31 */
34 32
35#include <linux/module.h> 33#include <linux/module.h>