aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h56
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c16
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c646
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c57
4 files changed, 539 insertions, 236 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 86df632ea612..ca43901ed861 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -92,6 +92,8 @@ enum {
92 IPOIB_STOP_REAPER = 7, 92 IPOIB_STOP_REAPER = 7,
93 IPOIB_FLAG_ADMIN_CM = 9, 93 IPOIB_FLAG_ADMIN_CM = 9,
94 IPOIB_FLAG_UMCAST = 10, 94 IPOIB_FLAG_UMCAST = 10,
95 IPOIB_STOP_NEIGH_GC = 11,
96 IPOIB_NEIGH_TBL_FLUSH = 12,
95 97
96 IPOIB_MAX_BACKOFF_SECONDS = 16, 98 IPOIB_MAX_BACKOFF_SECONDS = 16,
97 99
@@ -260,6 +262,20 @@ struct ipoib_ethtool_st {
260 u16 max_coalesced_frames; 262 u16 max_coalesced_frames;
261}; 263};
262 264
265struct ipoib_neigh_hash {
266 struct ipoib_neigh __rcu **buckets;
267 struct rcu_head rcu;
268 u32 mask;
269 u32 size;
270};
271
272struct ipoib_neigh_table {
273 struct ipoib_neigh_hash __rcu *htbl;
274 rwlock_t rwlock;
275 atomic_t entries;
276 struct completion flushed;
277};
278
263/* 279/*
264 * Device private locking: network stack tx_lock protects members used 280 * Device private locking: network stack tx_lock protects members used
265 * in TX fast path, lock protects everything else. lock nests inside 281 * in TX fast path, lock protects everything else. lock nests inside
@@ -279,6 +295,8 @@ struct ipoib_dev_priv {
279 struct rb_root path_tree; 295 struct rb_root path_tree;
280 struct list_head path_list; 296 struct list_head path_list;
281 297
298 struct ipoib_neigh_table ntbl;
299
282 struct ipoib_mcast *broadcast; 300 struct ipoib_mcast *broadcast;
283 struct list_head multicast_list; 301 struct list_head multicast_list;
284 struct rb_root multicast_tree; 302 struct rb_root multicast_tree;
@@ -291,7 +309,7 @@ struct ipoib_dev_priv {
291 struct work_struct flush_heavy; 309 struct work_struct flush_heavy;
292 struct work_struct restart_task; 310 struct work_struct restart_task;
293 struct delayed_work ah_reap_task; 311 struct delayed_work ah_reap_task;
294 312 struct delayed_work neigh_reap_task;
295 struct ib_device *ca; 313 struct ib_device *ca;
296 u8 port; 314 u8 port;
297 u16 pkey; 315 u16 pkey;
@@ -377,13 +395,16 @@ struct ipoib_neigh {
377#ifdef CONFIG_INFINIBAND_IPOIB_CM 395#ifdef CONFIG_INFINIBAND_IPOIB_CM
378 struct ipoib_cm_tx *cm; 396 struct ipoib_cm_tx *cm;
379#endif 397#endif
380 union ib_gid dgid; 398 u8 daddr[INFINIBAND_ALEN];
381 struct sk_buff_head queue; 399 struct sk_buff_head queue;
382 400
383 struct neighbour *neighbour;
384 struct net_device *dev; 401 struct net_device *dev;
385 402
386 struct list_head list; 403 struct list_head list;
404 struct ipoib_neigh __rcu *hnext;
405 struct rcu_head rcu;
406 atomic_t refcnt;
407 unsigned long alive;
387}; 408};
388 409
389#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) 410#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN)
@@ -394,21 +415,17 @@ static inline int ipoib_ud_need_sg(unsigned int ib_mtu)
394 return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE; 415 return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE;
395} 416}
396 417
397/* 418void ipoib_neigh_dtor(struct ipoib_neigh *neigh);
398 * We stash a pointer to our private neighbour information after our 419static inline void ipoib_neigh_put(struct ipoib_neigh *neigh)
399 * hardware address in neigh->ha. The ALIGN() expression here makes
400 * sure that this pointer is stored aligned so that an unaligned
401 * load is not needed to dereference it.
402 */
403static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh)
404{ 420{
405 return (void*) neigh + ALIGN(offsetof(struct neighbour, ha) + 421 if (atomic_dec_and_test(&neigh->refcnt))
406 INFINIBAND_ALEN, sizeof(void *)); 422 ipoib_neigh_dtor(neigh);
407} 423}
408 424struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr);
409struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh, 425struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
410 struct net_device *dev); 426 struct net_device *dev);
411void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh); 427void ipoib_neigh_free(struct ipoib_neigh *neigh);
428void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid);
412 429
413extern struct workqueue_struct *ipoib_workqueue; 430extern struct workqueue_struct *ipoib_workqueue;
414 431
@@ -425,7 +442,6 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah)
425{ 442{
426 kref_put(&ah->ref, ipoib_free_ah); 443 kref_put(&ah->ref, ipoib_free_ah);
427} 444}
428
429int ipoib_open(struct net_device *dev); 445int ipoib_open(struct net_device *dev);
430int ipoib_add_pkey_attr(struct net_device *dev); 446int ipoib_add_pkey_attr(struct net_device *dev);
431int ipoib_add_umcast_attr(struct net_device *dev); 447int ipoib_add_umcast_attr(struct net_device *dev);
@@ -455,7 +471,7 @@ void ipoib_dev_cleanup(struct net_device *dev);
455 471
456void ipoib_mcast_join_task(struct work_struct *work); 472void ipoib_mcast_join_task(struct work_struct *work);
457void ipoib_mcast_carrier_on_task(struct work_struct *work); 473void ipoib_mcast_carrier_on_task(struct work_struct *work);
458void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb); 474void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
459 475
460void ipoib_mcast_restart_task(struct work_struct *work); 476void ipoib_mcast_restart_task(struct work_struct *work);
461int ipoib_mcast_start_thread(struct net_device *dev); 477int ipoib_mcast_start_thread(struct net_device *dev);
@@ -517,10 +533,10 @@ static inline int ipoib_cm_admin_enabled(struct net_device *dev)
517 test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); 533 test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
518} 534}
519 535
520static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n) 536static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr)
521{ 537{
522 struct ipoib_dev_priv *priv = netdev_priv(dev); 538 struct ipoib_dev_priv *priv = netdev_priv(dev);
523 return IPOIB_CM_SUPPORTED(n->ha) && 539 return IPOIB_CM_SUPPORTED(hwaddr) &&
524 test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); 540 test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
525} 541}
526 542
@@ -575,7 +591,7 @@ static inline int ipoib_cm_admin_enabled(struct net_device *dev)
575{ 591{
576 return 0; 592 return 0;
577} 593}
578static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n) 594static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr)
579 595
580{ 596{
581 return 0; 597 return 0;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 6d66ab0dd92a..95ecf4eadf5f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -811,9 +811,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
811 if (neigh) { 811 if (neigh) {
812 neigh->cm = NULL; 812 neigh->cm = NULL;
813 list_del(&neigh->list); 813 list_del(&neigh->list);
814 if (neigh->ah) 814 ipoib_neigh_free(neigh);
815 ipoib_put_ah(neigh->ah);
816 ipoib_neigh_free(dev, neigh);
817 815
818 tx->neigh = NULL; 816 tx->neigh = NULL;
819 } 817 }
@@ -1230,9 +1228,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
1230 if (neigh) { 1228 if (neigh) {
1231 neigh->cm = NULL; 1229 neigh->cm = NULL;
1232 list_del(&neigh->list); 1230 list_del(&neigh->list);
1233 if (neigh->ah) 1231 ipoib_neigh_free(neigh);
1234 ipoib_put_ah(neigh->ah);
1235 ipoib_neigh_free(dev, neigh);
1236 1232
1237 tx->neigh = NULL; 1233 tx->neigh = NULL;
1238 } 1234 }
@@ -1279,7 +1275,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
1279 list_move(&tx->list, &priv->cm.reap_list); 1275 list_move(&tx->list, &priv->cm.reap_list);
1280 queue_work(ipoib_workqueue, &priv->cm.reap_task); 1276 queue_work(ipoib_workqueue, &priv->cm.reap_task);
1281 ipoib_dbg(priv, "Reap connection for gid %pI6\n", 1277 ipoib_dbg(priv, "Reap connection for gid %pI6\n",
1282 tx->neigh->dgid.raw); 1278 tx->neigh->daddr + 4);
1283 tx->neigh = NULL; 1279 tx->neigh = NULL;
1284 } 1280 }
1285} 1281}
@@ -1304,7 +1300,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
1304 p = list_entry(priv->cm.start_list.next, typeof(*p), list); 1300 p = list_entry(priv->cm.start_list.next, typeof(*p), list);
1305 list_del_init(&p->list); 1301 list_del_init(&p->list);
1306 neigh = p->neigh; 1302 neigh = p->neigh;
1307 qpn = IPOIB_QPN(neigh->neighbour->ha); 1303 qpn = IPOIB_QPN(neigh->daddr);
1308 memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); 1304 memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
1309 1305
1310 spin_unlock_irqrestore(&priv->lock, flags); 1306 spin_unlock_irqrestore(&priv->lock, flags);
@@ -1320,9 +1316,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
1320 if (neigh) { 1316 if (neigh) {
1321 neigh->cm = NULL; 1317 neigh->cm = NULL;
1322 list_del(&neigh->list); 1318 list_del(&neigh->list);
1323 if (neigh->ah) 1319 ipoib_neigh_free(neigh);
1324 ipoib_put_ah(neigh->ah);
1325 ipoib_neigh_free(dev, neigh);
1326 } 1320 }
1327 list_del(&p->list); 1321 list_del(&p->list);
1328 kfree(p); 1322 kfree(p);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index bbee4b2d7a13..97920b77a5d0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -46,7 +46,8 @@
46#include <linux/ip.h> 46#include <linux/ip.h>
47#include <linux/in.h> 47#include <linux/in.h>
48 48
49#include <net/dst.h> 49#include <linux/jhash.h>
50#include <net/arp.h>
50 51
51MODULE_AUTHOR("Roland Dreier"); 52MODULE_AUTHOR("Roland Dreier");
52MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); 53MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
@@ -84,6 +85,7 @@ struct ib_sa_client ipoib_sa_client;
84 85
85static void ipoib_add_one(struct ib_device *device); 86static void ipoib_add_one(struct ib_device *device);
86static void ipoib_remove_one(struct ib_device *device); 87static void ipoib_remove_one(struct ib_device *device);
88static void ipoib_neigh_reclaim(struct rcu_head *rp);
87 89
88static struct ib_client ipoib_client = { 90static struct ib_client ipoib_client = {
89 .name = "ipoib", 91 .name = "ipoib",
@@ -264,30 +266,15 @@ static int __path_add(struct net_device *dev, struct ipoib_path *path)
264 266
265static void path_free(struct net_device *dev, struct ipoib_path *path) 267static void path_free(struct net_device *dev, struct ipoib_path *path)
266{ 268{
267 struct ipoib_dev_priv *priv = netdev_priv(dev);
268 struct ipoib_neigh *neigh, *tn;
269 struct sk_buff *skb; 269 struct sk_buff *skb;
270 unsigned long flags;
271 270
272 while ((skb = __skb_dequeue(&path->queue))) 271 while ((skb = __skb_dequeue(&path->queue)))
273 dev_kfree_skb_irq(skb); 272 dev_kfree_skb_irq(skb);
274 273
275 spin_lock_irqsave(&priv->lock, flags); 274 ipoib_dbg(netdev_priv(dev), "path_free\n");
276
277 list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) {
278 /*
279 * It's safe to call ipoib_put_ah() inside priv->lock
280 * here, because we know that path->ah will always
281 * hold one more reference, so ipoib_put_ah() will
282 * never do more than decrement the ref count.
283 */
284 if (neigh->ah)
285 ipoib_put_ah(neigh->ah);
286
287 ipoib_neigh_free(dev, neigh);
288 }
289 275
290 spin_unlock_irqrestore(&priv->lock, flags); 276 /* remove all neigh connected to this path */
277 ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw);
291 278
292 if (path->ah) 279 if (path->ah)
293 ipoib_put_ah(path->ah); 280 ipoib_put_ah(path->ah);
@@ -458,19 +445,15 @@ static void path_rec_completion(int status,
458 } 445 }
459 kref_get(&path->ah->ref); 446 kref_get(&path->ah->ref);
460 neigh->ah = path->ah; 447 neigh->ah = path->ah;
461 memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
462 sizeof(union ib_gid));
463 448
464 if (ipoib_cm_enabled(dev, neigh->neighbour)) { 449 if (ipoib_cm_enabled(dev, neigh->daddr)) {
465 if (!ipoib_cm_get(neigh)) 450 if (!ipoib_cm_get(neigh))
466 ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, 451 ipoib_cm_set(neigh, ipoib_cm_create_tx(dev,
467 path, 452 path,
468 neigh)); 453 neigh));
469 if (!ipoib_cm_get(neigh)) { 454 if (!ipoib_cm_get(neigh)) {
470 list_del(&neigh->list); 455 list_del(&neigh->list);
471 if (neigh->ah) 456 ipoib_neigh_free(neigh);
472 ipoib_put_ah(neigh->ah);
473 ipoib_neigh_free(dev, neigh);
474 continue; 457 continue;
475 } 458 }
476 } 459 }
@@ -555,15 +538,15 @@ static int path_rec_start(struct net_device *dev,
555 return 0; 538 return 0;
556} 539}
557 540
558/* called with rcu_read_lock */ 541static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
559static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_device *dev) 542 struct net_device *dev)
560{ 543{
561 struct ipoib_dev_priv *priv = netdev_priv(dev); 544 struct ipoib_dev_priv *priv = netdev_priv(dev);
562 struct ipoib_path *path; 545 struct ipoib_path *path;
563 struct ipoib_neigh *neigh; 546 struct ipoib_neigh *neigh;
564 unsigned long flags; 547 unsigned long flags;
565 548
566 neigh = ipoib_neigh_alloc(n, skb->dev); 549 neigh = ipoib_neigh_alloc(daddr, dev);
567 if (!neigh) { 550 if (!neigh) {
568 ++dev->stats.tx_dropped; 551 ++dev->stats.tx_dropped;
569 dev_kfree_skb_any(skb); 552 dev_kfree_skb_any(skb);
@@ -572,9 +555,9 @@ static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_
572 555
573 spin_lock_irqsave(&priv->lock, flags); 556 spin_lock_irqsave(&priv->lock, flags);
574 557
575 path = __path_find(dev, n->ha + 4); 558 path = __path_find(dev, daddr + 4);
576 if (!path) { 559 if (!path) {
577 path = path_rec_create(dev, n->ha + 4); 560 path = path_rec_create(dev, daddr + 4);
578 if (!path) 561 if (!path)
579 goto err_path; 562 goto err_path;
580 563
@@ -586,17 +569,13 @@ static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_
586 if (path->ah) { 569 if (path->ah) {
587 kref_get(&path->ah->ref); 570 kref_get(&path->ah->ref);
588 neigh->ah = path->ah; 571 neigh->ah = path->ah;
589 memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
590 sizeof(union ib_gid));
591 572
592 if (ipoib_cm_enabled(dev, neigh->neighbour)) { 573 if (ipoib_cm_enabled(dev, neigh->daddr)) {
593 if (!ipoib_cm_get(neigh)) 574 if (!ipoib_cm_get(neigh))
594 ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh)); 575 ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh));
595 if (!ipoib_cm_get(neigh)) { 576 if (!ipoib_cm_get(neigh)) {
596 list_del(&neigh->list); 577 list_del(&neigh->list);
597 if (neigh->ah) 578 ipoib_neigh_free(neigh);
598 ipoib_put_ah(neigh->ah);
599 ipoib_neigh_free(dev, neigh);
600 goto err_drop; 579 goto err_drop;
601 } 580 }
602 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) 581 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
@@ -608,7 +587,8 @@ static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_
608 } 587 }
609 } else { 588 } else {
610 spin_unlock_irqrestore(&priv->lock, flags); 589 spin_unlock_irqrestore(&priv->lock, flags);
611 ipoib_send(dev, skb, path->ah, IPOIB_QPN(n->ha)); 590 ipoib_send(dev, skb, path->ah, IPOIB_QPN(daddr));
591 ipoib_neigh_put(neigh);
612 return; 592 return;
613 } 593 }
614 } else { 594 } else {
@@ -621,35 +601,20 @@ static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_
621 } 601 }
622 602
623 spin_unlock_irqrestore(&priv->lock, flags); 603 spin_unlock_irqrestore(&priv->lock, flags);
604 ipoib_neigh_put(neigh);
624 return; 605 return;
625 606
626err_list: 607err_list:
627 list_del(&neigh->list); 608 list_del(&neigh->list);
628 609
629err_path: 610err_path:
630 ipoib_neigh_free(dev, neigh); 611 ipoib_neigh_free(neigh);
631err_drop: 612err_drop:
632 ++dev->stats.tx_dropped; 613 ++dev->stats.tx_dropped;
633 dev_kfree_skb_any(skb); 614 dev_kfree_skb_any(skb);
634 615
635 spin_unlock_irqrestore(&priv->lock, flags); 616 spin_unlock_irqrestore(&priv->lock, flags);
636} 617 ipoib_neigh_put(neigh);
637
638/* called with rcu_read_lock */
639static void ipoib_path_lookup(struct sk_buff *skb, struct neighbour *n, struct net_device *dev)
640{
641 struct ipoib_dev_priv *priv = netdev_priv(skb->dev);
642
643 /* Look up path record for unicasts */
644 if (n->ha[4] != 0xff) {
645 neigh_add_path(skb, n, dev);
646 return;
647 }
648
649 /* Add in the P_Key for multicasts */
650 n->ha[8] = (priv->pkey >> 8) & 0xff;
651 n->ha[9] = priv->pkey & 0xff;
652 ipoib_mcast_send(dev, n->ha + 4, skb);
653} 618}
654 619
655static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, 620static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
@@ -710,96 +675,80 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
710{ 675{
711 struct ipoib_dev_priv *priv = netdev_priv(dev); 676 struct ipoib_dev_priv *priv = netdev_priv(dev);
712 struct ipoib_neigh *neigh; 677 struct ipoib_neigh *neigh;
713 struct neighbour *n = NULL; 678 struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb;
679 struct ipoib_header *header;
714 unsigned long flags; 680 unsigned long flags;
715 681
716 rcu_read_lock(); 682 header = (struct ipoib_header *) skb->data;
717 if (likely(skb_dst(skb))) { 683
718 n = dst_neigh_lookup_skb(skb_dst(skb), skb); 684 if (unlikely(cb->hwaddr[4] == 0xff)) {
719 if (!n) { 685 /* multicast, arrange "if" according to probability */
686 if ((header->proto != htons(ETH_P_IP)) &&
687 (header->proto != htons(ETH_P_IPV6)) &&
688 (header->proto != htons(ETH_P_ARP)) &&
689 (header->proto != htons(ETH_P_RARP))) {
690 /* ethertype not supported by IPoIB */
720 ++dev->stats.tx_dropped; 691 ++dev->stats.tx_dropped;
721 dev_kfree_skb_any(skb); 692 dev_kfree_skb_any(skb);
722 goto unlock; 693 return NETDEV_TX_OK;
723 } 694 }
695 /* Add in the P_Key for multicast*/
696 cb->hwaddr[8] = (priv->pkey >> 8) & 0xff;
697 cb->hwaddr[9] = priv->pkey & 0xff;
698
699 neigh = ipoib_neigh_get(dev, cb->hwaddr);
700 if (likely(neigh))
701 goto send_using_neigh;
702 ipoib_mcast_send(dev, cb->hwaddr, skb);
703 return NETDEV_TX_OK;
724 } 704 }
725 if (likely(n)) {
726 if (unlikely(!*to_ipoib_neigh(n))) {
727 ipoib_path_lookup(skb, n, dev);
728 goto unlock;
729 }
730
731 neigh = *to_ipoib_neigh(n);
732 705
733 if (unlikely((memcmp(&neigh->dgid.raw, 706 /* unicast, arrange "switch" according to probability */
734 n->ha + 4, 707 switch (header->proto) {
735 sizeof(union ib_gid))) || 708 case htons(ETH_P_IP):
736 (neigh->dev != dev))) { 709 case htons(ETH_P_IPV6):
737 spin_lock_irqsave(&priv->lock, flags); 710 neigh = ipoib_neigh_get(dev, cb->hwaddr);
738 /* 711 if (unlikely(!neigh)) {
739 * It's safe to call ipoib_put_ah() inside 712 neigh_add_path(skb, cb->hwaddr, dev);
740 * priv->lock here, because we know that 713 return NETDEV_TX_OK;
741 * path->ah will always hold one more reference,
742 * so ipoib_put_ah() will never do more than
743 * decrement the ref count.
744 */
745 if (neigh->ah)
746 ipoib_put_ah(neigh->ah);
747 list_del(&neigh->list);
748 ipoib_neigh_free(dev, neigh);
749 spin_unlock_irqrestore(&priv->lock, flags);
750 ipoib_path_lookup(skb, n, dev);
751 goto unlock;
752 } 714 }
715 break;
716 case htons(ETH_P_ARP):
717 case htons(ETH_P_RARP):
718 /* for unicast ARP and RARP should always perform path find */
719 unicast_arp_send(skb, dev, cb);
720 return NETDEV_TX_OK;
721 default:
722 /* ethertype not supported by IPoIB */
723 ++dev->stats.tx_dropped;
724 dev_kfree_skb_any(skb);
725 return NETDEV_TX_OK;
726 }
753 727
754 if (ipoib_cm_get(neigh)) { 728send_using_neigh:
755 if (ipoib_cm_up(neigh)) { 729 /* note we now hold a ref to neigh */
756 ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); 730 if (ipoib_cm_get(neigh)) {
757 goto unlock; 731 if (ipoib_cm_up(neigh)) {
758 } 732 ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
759 } else if (neigh->ah) { 733 goto unref;
760 ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(n->ha));
761 goto unlock;
762 } 734 }
735 } else if (neigh->ah) {
736 ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(cb->hwaddr));
737 goto unref;
738 }
763 739
764 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { 740 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
765 spin_lock_irqsave(&priv->lock, flags); 741 spin_lock_irqsave(&priv->lock, flags);
766 __skb_queue_tail(&neigh->queue, skb); 742 __skb_queue_tail(&neigh->queue, skb);
767 spin_unlock_irqrestore(&priv->lock, flags); 743 spin_unlock_irqrestore(&priv->lock, flags);
768 } else {
769 ++dev->stats.tx_dropped;
770 dev_kfree_skb_any(skb);
771 }
772 } else { 744 } else {
773 struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; 745 ++dev->stats.tx_dropped;
774 746 dev_kfree_skb_any(skb);
775 if (cb->hwaddr[4] == 0xff) { 747 }
776 /* Add in the P_Key for multicast*/
777 cb->hwaddr[8] = (priv->pkey >> 8) & 0xff;
778 cb->hwaddr[9] = priv->pkey & 0xff;
779 748
780 ipoib_mcast_send(dev, cb->hwaddr + 4, skb); 749unref:
781 } else { 750 ipoib_neigh_put(neigh);
782 /* unicast GID -- should be ARP or RARP reply */
783
784 if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) &&
785 (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) {
786 ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x %pI6\n",
787 skb_dst(skb) ? "neigh" : "dst",
788 be16_to_cpup((__be16 *) skb->data),
789 IPOIB_QPN(cb->hwaddr),
790 cb->hwaddr + 4);
791 dev_kfree_skb_any(skb);
792 ++dev->stats.tx_dropped;
793 goto unlock;
794 }
795 751
796 unicast_arp_send(skb, dev, cb);
797 }
798 }
799unlock:
800 if (n)
801 neigh_release(n);
802 rcu_read_unlock();
803 return NETDEV_TX_OK; 752 return NETDEV_TX_OK;
804} 753}
805 754
@@ -821,6 +770,7 @@ static int ipoib_hard_header(struct sk_buff *skb,
821 const void *daddr, const void *saddr, unsigned len) 770 const void *daddr, const void *saddr, unsigned len)
822{ 771{
823 struct ipoib_header *header; 772 struct ipoib_header *header;
773 struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb;
824 774
825 header = (struct ipoib_header *) skb_push(skb, sizeof *header); 775 header = (struct ipoib_header *) skb_push(skb, sizeof *header);
826 776
@@ -828,14 +778,11 @@ static int ipoib_hard_header(struct sk_buff *skb,
828 header->reserved = 0; 778 header->reserved = 0;
829 779
830 /* 780 /*
831 * If we don't have a dst_entry structure, stuff the 781 * we don't rely on dst_entry structure, always stuff the
832 * destination address into skb->cb so we can figure out where 782 * destination address into skb->cb so we can figure out where
833 * to send the packet later. 783 * to send the packet later.
834 */ 784 */
835 if (!skb_dst(skb)) { 785 memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN);
836 struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb;
837 memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN);
838 }
839 786
840 return 0; 787 return 0;
841} 788}
@@ -852,86 +799,438 @@ static void ipoib_set_mcast_list(struct net_device *dev)
852 queue_work(ipoib_workqueue, &priv->restart_task); 799 queue_work(ipoib_workqueue, &priv->restart_task);
853} 800}
854 801
855static void ipoib_neigh_cleanup(struct neighbour *n) 802static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
856{ 803{
857 struct ipoib_neigh *neigh; 804 /*
858 struct ipoib_dev_priv *priv = netdev_priv(n->dev); 805 * Use only the address parts that contributes to spreading
806 * The subnet prefix is not used as one can not connect to
807 * same remote port (GUID) using the same remote QPN via two
808 * different subnets.
809 */
810 /* qpn octets[1:4) & port GUID octets[12:20) */
811 u32 *daddr_32 = (u32 *) daddr;
812 u32 hv;
813
814 hv = jhash_3words(daddr_32[3], daddr_32[4], 0xFFFFFF & daddr_32[0], 0);
815 return hv & htbl->mask;
816}
817
818struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr)
819{
820 struct ipoib_dev_priv *priv = netdev_priv(dev);
821 struct ipoib_neigh_table *ntbl = &priv->ntbl;
822 struct ipoib_neigh_hash *htbl;
823 struct ipoib_neigh *neigh = NULL;
824 u32 hash_val;
825
826 rcu_read_lock_bh();
827
828 htbl = rcu_dereference_bh(ntbl->htbl);
829
830 if (!htbl)
831 goto out_unlock;
832
833 hash_val = ipoib_addr_hash(htbl, daddr);
834 for (neigh = rcu_dereference_bh(htbl->buckets[hash_val]);
835 neigh != NULL;
836 neigh = rcu_dereference_bh(neigh->hnext)) {
837 if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) {
838 /* found, take one ref on behalf of the caller */
839 if (!atomic_inc_not_zero(&neigh->refcnt)) {
840 /* deleted */
841 neigh = NULL;
842 goto out_unlock;
843 }
844 neigh->alive = jiffies;
845 goto out_unlock;
846 }
847 }
848
849out_unlock:
850 rcu_read_unlock_bh();
851 return neigh;
852}
853
854static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
855{
856 struct ipoib_neigh_table *ntbl = &priv->ntbl;
857 struct ipoib_neigh_hash *htbl;
858 unsigned long neigh_obsolete;
859 unsigned long dt;
859 unsigned long flags; 860 unsigned long flags;
860 struct ipoib_ah *ah = NULL; 861 int i;
861 862
862 neigh = *to_ipoib_neigh(n); 863 if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
863 if (neigh)
864 priv = netdev_priv(neigh->dev);
865 else
866 return; 864 return;
867 ipoib_dbg(priv,
868 "neigh_cleanup for %06x %pI6\n",
869 IPOIB_QPN(n->ha),
870 n->ha + 4);
871 865
872 spin_lock_irqsave(&priv->lock, flags); 866 write_lock_bh(&ntbl->rwlock);
867
868 htbl = rcu_dereference_protected(ntbl->htbl,
869 lockdep_is_held(&ntbl->rwlock));
870
871 if (!htbl)
872 goto out_unlock;
873
874 /* neigh is obsolete if it was idle for two GC periods */
875 dt = 2 * arp_tbl.gc_interval;
876 neigh_obsolete = jiffies - dt;
877 /* handle possible race condition */
878 if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
879 goto out_unlock;
880
881 for (i = 0; i < htbl->size; i++) {
882 struct ipoib_neigh *neigh;
883 struct ipoib_neigh __rcu **np = &htbl->buckets[i];
884
885 while ((neigh = rcu_dereference_protected(*np,
886 lockdep_is_held(&ntbl->rwlock))) != NULL) {
887 /* was the neigh idle for two GC periods */
888 if (time_after(neigh_obsolete, neigh->alive)) {
889 rcu_assign_pointer(*np,
890 rcu_dereference_protected(neigh->hnext,
891 lockdep_is_held(&ntbl->rwlock)));
892 /* remove from path/mc list */
893 spin_lock_irqsave(&priv->lock, flags);
894 list_del(&neigh->list);
895 spin_unlock_irqrestore(&priv->lock, flags);
896 call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
897 } else {
898 np = &neigh->hnext;
899 }
873 900
874 if (neigh->ah) 901 }
875 ah = neigh->ah; 902 }
876 list_del(&neigh->list);
877 ipoib_neigh_free(n->dev, neigh);
878 903
879 spin_unlock_irqrestore(&priv->lock, flags); 904out_unlock:
905 write_unlock_bh(&ntbl->rwlock);
906}
880 907
881 if (ah) 908static void ipoib_reap_neigh(struct work_struct *work)
882 ipoib_put_ah(ah); 909{
910 struct ipoib_dev_priv *priv =
911 container_of(work, struct ipoib_dev_priv, neigh_reap_task.work);
912
913 __ipoib_reap_neigh(priv);
914
915 if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
916 queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
917 arp_tbl.gc_interval);
883} 918}
884 919
885struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour, 920
921static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr,
886 struct net_device *dev) 922 struct net_device *dev)
887{ 923{
888 struct ipoib_neigh *neigh; 924 struct ipoib_neigh *neigh;
889 925
890 neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); 926 neigh = kzalloc(sizeof *neigh, GFP_ATOMIC);
891 if (!neigh) 927 if (!neigh)
892 return NULL; 928 return NULL;
893 929
894 neigh->neighbour = neighbour;
895 neigh->dev = dev; 930 neigh->dev = dev;
896 memset(&neigh->dgid.raw, 0, sizeof (union ib_gid)); 931 memcpy(&neigh->daddr, daddr, sizeof(neigh->daddr));
897 *to_ipoib_neigh(neighbour) = neigh;
898 skb_queue_head_init(&neigh->queue); 932 skb_queue_head_init(&neigh->queue);
933 INIT_LIST_HEAD(&neigh->list);
899 ipoib_cm_set(neigh, NULL); 934 ipoib_cm_set(neigh, NULL);
935 /* one ref on behalf of the caller */
936 atomic_set(&neigh->refcnt, 1);
937
938 return neigh;
939}
940
941struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
942 struct net_device *dev)
943{
944 struct ipoib_dev_priv *priv = netdev_priv(dev);
945 struct ipoib_neigh_table *ntbl = &priv->ntbl;
946 struct ipoib_neigh_hash *htbl;
947 struct ipoib_neigh *neigh;
948 u32 hash_val;
949
950 write_lock_bh(&ntbl->rwlock);
951
952 htbl = rcu_dereference_protected(ntbl->htbl,
953 lockdep_is_held(&ntbl->rwlock));
954 if (!htbl) {
955 neigh = NULL;
956 goto out_unlock;
957 }
958
959 /* need to add a new neigh, but maybe some other thread succeeded?
960 * recalc hash, maybe hash resize took place so we do a search
961 */
962 hash_val = ipoib_addr_hash(htbl, daddr);
963 for (neigh = rcu_dereference_protected(htbl->buckets[hash_val],
964 lockdep_is_held(&ntbl->rwlock));
965 neigh != NULL;
966 neigh = rcu_dereference_protected(neigh->hnext,
967 lockdep_is_held(&ntbl->rwlock))) {
968 if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) {
969 /* found, take one ref on behalf of the caller */
970 if (!atomic_inc_not_zero(&neigh->refcnt)) {
971 /* deleted */
972 neigh = NULL;
973 break;
974 }
975 neigh->alive = jiffies;
976 goto out_unlock;
977 }
978 }
979
980 neigh = ipoib_neigh_ctor(daddr, dev);
981 if (!neigh)
982 goto out_unlock;
983
984 /* one ref on behalf of the hash table */
985 atomic_inc(&neigh->refcnt);
986 neigh->alive = jiffies;
987 /* put in hash */
988 rcu_assign_pointer(neigh->hnext,
989 rcu_dereference_protected(htbl->buckets[hash_val],
990 lockdep_is_held(&ntbl->rwlock)));
991 rcu_assign_pointer(htbl->buckets[hash_val], neigh);
992 atomic_inc(&ntbl->entries);
993
994out_unlock:
995 write_unlock_bh(&ntbl->rwlock);
900 996
901 return neigh; 997 return neigh;
902} 998}
903 999
904void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh) 1000void ipoib_neigh_dtor(struct ipoib_neigh *neigh)
905{ 1001{
1002 /* neigh reference count was dropprd to zero */
1003 struct net_device *dev = neigh->dev;
1004 struct ipoib_dev_priv *priv = netdev_priv(dev);
906 struct sk_buff *skb; 1005 struct sk_buff *skb;
907 *to_ipoib_neigh(neigh->neighbour) = NULL; 1006 if (neigh->ah)
1007 ipoib_put_ah(neigh->ah);
908 while ((skb = __skb_dequeue(&neigh->queue))) { 1008 while ((skb = __skb_dequeue(&neigh->queue))) {
909 ++dev->stats.tx_dropped; 1009 ++dev->stats.tx_dropped;
910 dev_kfree_skb_any(skb); 1010 dev_kfree_skb_any(skb);
911 } 1011 }
912 if (ipoib_cm_get(neigh)) 1012 if (ipoib_cm_get(neigh))
913 ipoib_cm_destroy_tx(ipoib_cm_get(neigh)); 1013 ipoib_cm_destroy_tx(ipoib_cm_get(neigh));
1014 ipoib_dbg(netdev_priv(dev),
1015 "neigh free for %06x %pI6\n",
1016 IPOIB_QPN(neigh->daddr),
1017 neigh->daddr + 4);
914 kfree(neigh); 1018 kfree(neigh);
1019 if (atomic_dec_and_test(&priv->ntbl.entries)) {
1020 if (test_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags))
1021 complete(&priv->ntbl.flushed);
1022 }
1023}
1024
1025static void ipoib_neigh_reclaim(struct rcu_head *rp)
1026{
1027 /* Called as a result of removal from hash table */
1028 struct ipoib_neigh *neigh = container_of(rp, struct ipoib_neigh, rcu);
1029 /* note TX context may hold another ref */
1030 ipoib_neigh_put(neigh);
915} 1031}
916 1032
917static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms) 1033void ipoib_neigh_free(struct ipoib_neigh *neigh)
918{ 1034{
919 parms->neigh_cleanup = ipoib_neigh_cleanup; 1035 struct net_device *dev = neigh->dev;
1036 struct ipoib_dev_priv *priv = netdev_priv(dev);
1037 struct ipoib_neigh_table *ntbl = &priv->ntbl;
1038 struct ipoib_neigh_hash *htbl;
1039 struct ipoib_neigh __rcu **np;
1040 struct ipoib_neigh *n;
1041 u32 hash_val;
1042
1043 write_lock_bh(&ntbl->rwlock);
1044
1045 htbl = rcu_dereference_protected(ntbl->htbl,
1046 lockdep_is_held(&ntbl->rwlock));
1047 if (!htbl)
1048 goto out_unlock;
1049
1050 hash_val = ipoib_addr_hash(htbl, neigh->daddr);
1051 np = &htbl->buckets[hash_val];
1052 for (n = rcu_dereference_protected(*np,
1053 lockdep_is_held(&ntbl->rwlock));
1054 n != NULL;
1055 n = rcu_dereference_protected(neigh->hnext,
1056 lockdep_is_held(&ntbl->rwlock))) {
1057 if (n == neigh) {
1058 /* found */
1059 rcu_assign_pointer(*np,
1060 rcu_dereference_protected(neigh->hnext,
1061 lockdep_is_held(&ntbl->rwlock)));
1062 call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
1063 goto out_unlock;
1064 } else {
1065 np = &n->hnext;
1066 }
1067 }
1068
1069out_unlock:
1070 write_unlock_bh(&ntbl->rwlock);
1071
1072}
1073
1074static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
1075{
1076 struct ipoib_neigh_table *ntbl = &priv->ntbl;
1077 struct ipoib_neigh_hash *htbl;
1078 struct ipoib_neigh **buckets;
1079 u32 size;
1080
1081 clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
1082 ntbl->htbl = NULL;
1083 rwlock_init(&ntbl->rwlock);
1084 htbl = kzalloc(sizeof(*htbl), GFP_KERNEL);
1085 if (!htbl)
1086 return -ENOMEM;
1087 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1088 size = roundup_pow_of_two(arp_tbl.gc_thresh3);
1089 buckets = kzalloc(size * sizeof(*buckets), GFP_KERNEL);
1090 if (!buckets) {
1091 kfree(htbl);
1092 return -ENOMEM;
1093 }
1094 htbl->size = size;
1095 htbl->mask = (size - 1);
1096 htbl->buckets = buckets;
1097 ntbl->htbl = htbl;
1098 atomic_set(&ntbl->entries, 0);
1099
1100 /* start garbage collection */
1101 clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1102 queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
1103 arp_tbl.gc_interval);
920 1104
921 return 0; 1105 return 0;
922} 1106}
923 1107
1108static void neigh_hash_free_rcu(struct rcu_head *head)
1109{
1110 struct ipoib_neigh_hash *htbl = container_of(head,
1111 struct ipoib_neigh_hash,
1112 rcu);
1113 struct ipoib_neigh __rcu **buckets = htbl->buckets;
1114
1115 kfree(buckets);
1116 kfree(htbl);
1117}
1118
1119void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
1120{
1121 struct ipoib_dev_priv *priv = netdev_priv(dev);
1122 struct ipoib_neigh_table *ntbl = &priv->ntbl;
1123 struct ipoib_neigh_hash *htbl;
1124 unsigned long flags;
1125 int i;
1126
1127 /* remove all neigh connected to a given path or mcast */
1128 write_lock_bh(&ntbl->rwlock);
1129
1130 htbl = rcu_dereference_protected(ntbl->htbl,
1131 lockdep_is_held(&ntbl->rwlock));
1132
1133 if (!htbl)
1134 goto out_unlock;
1135
1136 for (i = 0; i < htbl->size; i++) {
1137 struct ipoib_neigh *neigh;
1138 struct ipoib_neigh __rcu **np = &htbl->buckets[i];
1139
1140 while ((neigh = rcu_dereference_protected(*np,
1141 lockdep_is_held(&ntbl->rwlock))) != NULL) {
1142 /* delete neighs belong to this parent */
1143 if (!memcmp(gid, neigh->daddr + 4, sizeof (union ib_gid))) {
1144 rcu_assign_pointer(*np,
1145 rcu_dereference_protected(neigh->hnext,
1146 lockdep_is_held(&ntbl->rwlock)));
1147 /* remove from parent list */
1148 spin_lock_irqsave(&priv->lock, flags);
1149 list_del(&neigh->list);
1150 spin_unlock_irqrestore(&priv->lock, flags);
1151 call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
1152 } else {
1153 np = &neigh->hnext;
1154 }
1155
1156 }
1157 }
1158out_unlock:
1159 write_unlock_bh(&ntbl->rwlock);
1160}
1161
1162static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
1163{
1164 struct ipoib_neigh_table *ntbl = &priv->ntbl;
1165 struct ipoib_neigh_hash *htbl;
1166 unsigned long flags;
1167 int i;
1168
1169 write_lock_bh(&ntbl->rwlock);
1170
1171 htbl = rcu_dereference_protected(ntbl->htbl,
1172 lockdep_is_held(&ntbl->rwlock));
1173 if (!htbl)
1174 goto out_unlock;
1175
1176 for (i = 0; i < htbl->size; i++) {
1177 struct ipoib_neigh *neigh;
1178 struct ipoib_neigh __rcu **np = &htbl->buckets[i];
1179
1180 while ((neigh = rcu_dereference_protected(*np,
1181 lockdep_is_held(&ntbl->rwlock))) != NULL) {
1182 rcu_assign_pointer(*np,
1183 rcu_dereference_protected(neigh->hnext,
1184 lockdep_is_held(&ntbl->rwlock)));
1185 /* remove from path/mc list */
1186 spin_lock_irqsave(&priv->lock, flags);
1187 list_del(&neigh->list);
1188 spin_unlock_irqrestore(&priv->lock, flags);
1189 call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
1190 }
1191 }
1192
1193 rcu_assign_pointer(ntbl->htbl, NULL);
1194 call_rcu(&htbl->rcu, neigh_hash_free_rcu);
1195
1196out_unlock:
1197 write_unlock_bh(&ntbl->rwlock);
1198}
1199
1200static void ipoib_neigh_hash_uninit(struct net_device *dev)
1201{
1202 struct ipoib_dev_priv *priv = netdev_priv(dev);
1203 int stopped;
1204
1205 ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
1206 init_completion(&priv->ntbl.flushed);
1207 set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
1208
1209 /* Stop GC if called at init fail need to cancel work */
1210 stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1211 if (!stopped)
1212 cancel_delayed_work(&priv->neigh_reap_task);
1213
1214 if (atomic_read(&priv->ntbl.entries)) {
1215 ipoib_flush_neighs(priv);
1216 wait_for_completion(&priv->ntbl.flushed);
1217 }
1218}
1219
1220
924int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) 1221int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
925{ 1222{
926 struct ipoib_dev_priv *priv = netdev_priv(dev); 1223 struct ipoib_dev_priv *priv = netdev_priv(dev);
927 1224
1225 if (ipoib_neigh_hash_init(priv) < 0)
1226 goto out;
928 /* Allocate RX/TX "rings" to hold queued skbs */ 1227 /* Allocate RX/TX "rings" to hold queued skbs */
929 priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, 1228 priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
930 GFP_KERNEL); 1229 GFP_KERNEL);
931 if (!priv->rx_ring) { 1230 if (!priv->rx_ring) {
932 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", 1231 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
933 ca->name, ipoib_recvq_size); 1232 ca->name, ipoib_recvq_size);
934 goto out; 1233 goto out_neigh_hash_cleanup;
935 } 1234 }
936 1235
937 priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring); 1236 priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
@@ -954,6 +1253,8 @@ out_tx_ring_cleanup:
954out_rx_ring_cleanup: 1253out_rx_ring_cleanup:
955 kfree(priv->rx_ring); 1254 kfree(priv->rx_ring);
956 1255
1256out_neigh_hash_cleanup:
1257 ipoib_neigh_hash_uninit(dev);
957out: 1258out:
958 return -ENOMEM; 1259 return -ENOMEM;
959} 1260}
@@ -966,6 +1267,9 @@ void ipoib_dev_cleanup(struct net_device *dev)
966 1267
967 /* Delete any child interfaces first */ 1268 /* Delete any child interfaces first */
968 list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { 1269 list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
1270 /* Stop GC on child */
1271 set_bit(IPOIB_STOP_NEIGH_GC, &cpriv->flags);
1272 cancel_delayed_work(&cpriv->neigh_reap_task);
969 unregister_netdev(cpriv->dev); 1273 unregister_netdev(cpriv->dev);
970 ipoib_dev_cleanup(cpriv->dev); 1274 ipoib_dev_cleanup(cpriv->dev);
971 free_netdev(cpriv->dev); 1275 free_netdev(cpriv->dev);
@@ -978,6 +1282,8 @@ void ipoib_dev_cleanup(struct net_device *dev)
978 1282
979 priv->rx_ring = NULL; 1283 priv->rx_ring = NULL;
980 priv->tx_ring = NULL; 1284 priv->tx_ring = NULL;
1285
1286 ipoib_neigh_hash_uninit(dev);
981} 1287}
982 1288
983static const struct header_ops ipoib_header_ops = { 1289static const struct header_ops ipoib_header_ops = {
@@ -992,7 +1298,6 @@ static const struct net_device_ops ipoib_netdev_ops = {
992 .ndo_start_xmit = ipoib_start_xmit, 1298 .ndo_start_xmit = ipoib_start_xmit,
993 .ndo_tx_timeout = ipoib_timeout, 1299 .ndo_tx_timeout = ipoib_timeout,
994 .ndo_set_rx_mode = ipoib_set_mcast_list, 1300 .ndo_set_rx_mode = ipoib_set_mcast_list,
995 .ndo_neigh_setup = ipoib_neigh_setup_dev,
996}; 1301};
997 1302
998static void ipoib_setup(struct net_device *dev) 1303static void ipoib_setup(struct net_device *dev)
@@ -1041,6 +1346,7 @@ static void ipoib_setup(struct net_device *dev)
1041 INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); 1346 INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy);
1042 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); 1347 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
1043 INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); 1348 INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
1349 INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh);
1044} 1350}
1045 1351
1046struct ipoib_dev_priv *ipoib_intf_alloc(const char *name) 1352struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
@@ -1281,6 +1587,9 @@ sysfs_failed:
1281 1587
1282register_failed: 1588register_failed:
1283 ib_unregister_event_handler(&priv->event_handler); 1589 ib_unregister_event_handler(&priv->event_handler);
1590 /* Stop GC if started before flush */
1591 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1592 cancel_delayed_work(&priv->neigh_reap_task);
1284 flush_workqueue(ipoib_workqueue); 1593 flush_workqueue(ipoib_workqueue);
1285 1594
1286event_failed: 1595event_failed:
@@ -1347,6 +1656,9 @@ static void ipoib_remove_one(struct ib_device *device)
1347 dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); 1656 dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
1348 rtnl_unlock(); 1657 rtnl_unlock();
1349 1658
1659 /* Stop GC */
1660 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1661 cancel_delayed_work(&priv->neigh_reap_task);
1350 flush_workqueue(ipoib_workqueue); 1662 flush_workqueue(ipoib_workqueue);
1351 1663
1352 unregister_netdev(priv->dev); 1664 unregister_netdev(priv->dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 7cecb16d3d48..13f4aa7593c8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -69,28 +69,13 @@ struct ipoib_mcast_iter {
69static void ipoib_mcast_free(struct ipoib_mcast *mcast) 69static void ipoib_mcast_free(struct ipoib_mcast *mcast)
70{ 70{
71 struct net_device *dev = mcast->dev; 71 struct net_device *dev = mcast->dev;
72 struct ipoib_dev_priv *priv = netdev_priv(dev);
73 struct ipoib_neigh *neigh, *tmp;
74 int tx_dropped = 0; 72 int tx_dropped = 0;
75 73
76 ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n", 74 ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n",
77 mcast->mcmember.mgid.raw); 75 mcast->mcmember.mgid.raw);
78 76
79 spin_lock_irq(&priv->lock); 77 /* remove all neigh connected to this mcast */
80 78 ipoib_del_neighs_by_gid(dev, mcast->mcmember.mgid.raw);
81 list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) {
82 /*
83 * It's safe to call ipoib_put_ah() inside priv->lock
84 * here, because we know that mcast->ah will always
85 * hold one more reference, so ipoib_put_ah() will
86 * never do more than decrement the ref count.
87 */
88 if (neigh->ah)
89 ipoib_put_ah(neigh->ah);
90 ipoib_neigh_free(dev, neigh);
91 }
92
93 spin_unlock_irq(&priv->lock);
94 79
95 if (mcast->ah) 80 if (mcast->ah)
96 ipoib_put_ah(mcast->ah); 81 ipoib_put_ah(mcast->ah);
@@ -655,17 +640,12 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
655 return 0; 640 return 0;
656} 641}
657 642
658void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) 643void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
659{ 644{
660 struct ipoib_dev_priv *priv = netdev_priv(dev); 645 struct ipoib_dev_priv *priv = netdev_priv(dev);
661 struct dst_entry *dst = skb_dst(skb);
662 struct ipoib_mcast *mcast; 646 struct ipoib_mcast *mcast;
663 struct neighbour *n;
664 unsigned long flags; 647 unsigned long flags;
665 648 void *mgid = daddr + 4;
666 n = NULL;
667 if (dst)
668 n = dst_neigh_lookup_skb(dst, skb);
669 649
670 spin_lock_irqsave(&priv->lock, flags); 650 spin_lock_irqsave(&priv->lock, flags);
671 651
@@ -721,28 +701,29 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
721 701
722out: 702out:
723 if (mcast && mcast->ah) { 703 if (mcast && mcast->ah) {
724 if (n) { 704 struct ipoib_neigh *neigh;
725 if (!*to_ipoib_neigh(n)) { 705
726 struct ipoib_neigh *neigh; 706 spin_unlock_irqrestore(&priv->lock, flags);
727 707 neigh = ipoib_neigh_get(dev, daddr);
728 neigh = ipoib_neigh_alloc(n, skb->dev); 708 spin_lock_irqsave(&priv->lock, flags);
729 if (neigh) { 709 if (!neigh) {
730 kref_get(&mcast->ah->ref); 710 spin_unlock_irqrestore(&priv->lock, flags);
731 neigh->ah = mcast->ah; 711 neigh = ipoib_neigh_alloc(daddr, dev);
732 list_add_tail(&neigh->list, 712 spin_lock_irqsave(&priv->lock, flags);
733 &mcast->neigh_list); 713 if (neigh) {
734 } 714 kref_get(&mcast->ah->ref);
715 neigh->ah = mcast->ah;
716 list_add_tail(&neigh->list, &mcast->neigh_list);
735 } 717 }
736 neigh_release(n);
737 } 718 }
738 spin_unlock_irqrestore(&priv->lock, flags); 719 spin_unlock_irqrestore(&priv->lock, flags);
739 ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); 720 ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
721 if (neigh)
722 ipoib_neigh_put(neigh);
740 return; 723 return;
741 } 724 }
742 725
743unlock: 726unlock:
744 if (n)
745 neigh_release(n);
746 spin_unlock_irqrestore(&priv->lock, flags); 727 spin_unlock_irqrestore(&priv->lock, flags);
747} 728}
748 729