diff options
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib.h | 56 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_cm.c | 16 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_main.c | 646 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 57 |
4 files changed, 539 insertions, 236 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 86df632ea612..ca43901ed861 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h | |||
@@ -92,6 +92,8 @@ enum { | |||
92 | IPOIB_STOP_REAPER = 7, | 92 | IPOIB_STOP_REAPER = 7, |
93 | IPOIB_FLAG_ADMIN_CM = 9, | 93 | IPOIB_FLAG_ADMIN_CM = 9, |
94 | IPOIB_FLAG_UMCAST = 10, | 94 | IPOIB_FLAG_UMCAST = 10, |
95 | IPOIB_STOP_NEIGH_GC = 11, | ||
96 | IPOIB_NEIGH_TBL_FLUSH = 12, | ||
95 | 97 | ||
96 | IPOIB_MAX_BACKOFF_SECONDS = 16, | 98 | IPOIB_MAX_BACKOFF_SECONDS = 16, |
97 | 99 | ||
@@ -260,6 +262,20 @@ struct ipoib_ethtool_st { | |||
260 | u16 max_coalesced_frames; | 262 | u16 max_coalesced_frames; |
261 | }; | 263 | }; |
262 | 264 | ||
265 | struct ipoib_neigh_hash { | ||
266 | struct ipoib_neigh __rcu **buckets; | ||
267 | struct rcu_head rcu; | ||
268 | u32 mask; | ||
269 | u32 size; | ||
270 | }; | ||
271 | |||
272 | struct ipoib_neigh_table { | ||
273 | struct ipoib_neigh_hash __rcu *htbl; | ||
274 | rwlock_t rwlock; | ||
275 | atomic_t entries; | ||
276 | struct completion flushed; | ||
277 | }; | ||
278 | |||
263 | /* | 279 | /* |
264 | * Device private locking: network stack tx_lock protects members used | 280 | * Device private locking: network stack tx_lock protects members used |
265 | * in TX fast path, lock protects everything else. lock nests inside | 281 | * in TX fast path, lock protects everything else. lock nests inside |
@@ -279,6 +295,8 @@ struct ipoib_dev_priv { | |||
279 | struct rb_root path_tree; | 295 | struct rb_root path_tree; |
280 | struct list_head path_list; | 296 | struct list_head path_list; |
281 | 297 | ||
298 | struct ipoib_neigh_table ntbl; | ||
299 | |||
282 | struct ipoib_mcast *broadcast; | 300 | struct ipoib_mcast *broadcast; |
283 | struct list_head multicast_list; | 301 | struct list_head multicast_list; |
284 | struct rb_root multicast_tree; | 302 | struct rb_root multicast_tree; |
@@ -291,7 +309,7 @@ struct ipoib_dev_priv { | |||
291 | struct work_struct flush_heavy; | 309 | struct work_struct flush_heavy; |
292 | struct work_struct restart_task; | 310 | struct work_struct restart_task; |
293 | struct delayed_work ah_reap_task; | 311 | struct delayed_work ah_reap_task; |
294 | 312 | struct delayed_work neigh_reap_task; | |
295 | struct ib_device *ca; | 313 | struct ib_device *ca; |
296 | u8 port; | 314 | u8 port; |
297 | u16 pkey; | 315 | u16 pkey; |
@@ -377,13 +395,16 @@ struct ipoib_neigh { | |||
377 | #ifdef CONFIG_INFINIBAND_IPOIB_CM | 395 | #ifdef CONFIG_INFINIBAND_IPOIB_CM |
378 | struct ipoib_cm_tx *cm; | 396 | struct ipoib_cm_tx *cm; |
379 | #endif | 397 | #endif |
380 | union ib_gid dgid; | 398 | u8 daddr[INFINIBAND_ALEN]; |
381 | struct sk_buff_head queue; | 399 | struct sk_buff_head queue; |
382 | 400 | ||
383 | struct neighbour *neighbour; | ||
384 | struct net_device *dev; | 401 | struct net_device *dev; |
385 | 402 | ||
386 | struct list_head list; | 403 | struct list_head list; |
404 | struct ipoib_neigh __rcu *hnext; | ||
405 | struct rcu_head rcu; | ||
406 | atomic_t refcnt; | ||
407 | unsigned long alive; | ||
387 | }; | 408 | }; |
388 | 409 | ||
389 | #define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) | 410 | #define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) |
@@ -394,21 +415,17 @@ static inline int ipoib_ud_need_sg(unsigned int ib_mtu) | |||
394 | return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE; | 415 | return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE; |
395 | } | 416 | } |
396 | 417 | ||
397 | /* | 418 | void ipoib_neigh_dtor(struct ipoib_neigh *neigh); |
398 | * We stash a pointer to our private neighbour information after our | 419 | static inline void ipoib_neigh_put(struct ipoib_neigh *neigh) |
399 | * hardware address in neigh->ha. The ALIGN() expression here makes | ||
400 | * sure that this pointer is stored aligned so that an unaligned | ||
401 | * load is not needed to dereference it. | ||
402 | */ | ||
403 | static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh) | ||
404 | { | 420 | { |
405 | return (void*) neigh + ALIGN(offsetof(struct neighbour, ha) + | 421 | if (atomic_dec_and_test(&neigh->refcnt)) |
406 | INFINIBAND_ALEN, sizeof(void *)); | 422 | ipoib_neigh_dtor(neigh); |
407 | } | 423 | } |
408 | 424 | struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr); | |
409 | struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh, | 425 | struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, |
410 | struct net_device *dev); | 426 | struct net_device *dev); |
411 | void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh); | 427 | void ipoib_neigh_free(struct ipoib_neigh *neigh); |
428 | void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid); | ||
412 | 429 | ||
413 | extern struct workqueue_struct *ipoib_workqueue; | 430 | extern struct workqueue_struct *ipoib_workqueue; |
414 | 431 | ||
@@ -425,7 +442,6 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah) | |||
425 | { | 442 | { |
426 | kref_put(&ah->ref, ipoib_free_ah); | 443 | kref_put(&ah->ref, ipoib_free_ah); |
427 | } | 444 | } |
428 | |||
429 | int ipoib_open(struct net_device *dev); | 445 | int ipoib_open(struct net_device *dev); |
430 | int ipoib_add_pkey_attr(struct net_device *dev); | 446 | int ipoib_add_pkey_attr(struct net_device *dev); |
431 | int ipoib_add_umcast_attr(struct net_device *dev); | 447 | int ipoib_add_umcast_attr(struct net_device *dev); |
@@ -455,7 +471,7 @@ void ipoib_dev_cleanup(struct net_device *dev); | |||
455 | 471 | ||
456 | void ipoib_mcast_join_task(struct work_struct *work); | 472 | void ipoib_mcast_join_task(struct work_struct *work); |
457 | void ipoib_mcast_carrier_on_task(struct work_struct *work); | 473 | void ipoib_mcast_carrier_on_task(struct work_struct *work); |
458 | void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb); | 474 | void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb); |
459 | 475 | ||
460 | void ipoib_mcast_restart_task(struct work_struct *work); | 476 | void ipoib_mcast_restart_task(struct work_struct *work); |
461 | int ipoib_mcast_start_thread(struct net_device *dev); | 477 | int ipoib_mcast_start_thread(struct net_device *dev); |
@@ -517,10 +533,10 @@ static inline int ipoib_cm_admin_enabled(struct net_device *dev) | |||
517 | test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); | 533 | test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); |
518 | } | 534 | } |
519 | 535 | ||
520 | static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n) | 536 | static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr) |
521 | { | 537 | { |
522 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 538 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
523 | return IPOIB_CM_SUPPORTED(n->ha) && | 539 | return IPOIB_CM_SUPPORTED(hwaddr) && |
524 | test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); | 540 | test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); |
525 | } | 541 | } |
526 | 542 | ||
@@ -575,7 +591,7 @@ static inline int ipoib_cm_admin_enabled(struct net_device *dev) | |||
575 | { | 591 | { |
576 | return 0; | 592 | return 0; |
577 | } | 593 | } |
578 | static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n) | 594 | static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr) |
579 | 595 | ||
580 | { | 596 | { |
581 | return 0; | 597 | return 0; |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 6d66ab0dd92a..95ecf4eadf5f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c | |||
@@ -811,9 +811,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) | |||
811 | if (neigh) { | 811 | if (neigh) { |
812 | neigh->cm = NULL; | 812 | neigh->cm = NULL; |
813 | list_del(&neigh->list); | 813 | list_del(&neigh->list); |
814 | if (neigh->ah) | 814 | ipoib_neigh_free(neigh); |
815 | ipoib_put_ah(neigh->ah); | ||
816 | ipoib_neigh_free(dev, neigh); | ||
817 | 815 | ||
818 | tx->neigh = NULL; | 816 | tx->neigh = NULL; |
819 | } | 817 | } |
@@ -1230,9 +1228,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, | |||
1230 | if (neigh) { | 1228 | if (neigh) { |
1231 | neigh->cm = NULL; | 1229 | neigh->cm = NULL; |
1232 | list_del(&neigh->list); | 1230 | list_del(&neigh->list); |
1233 | if (neigh->ah) | 1231 | ipoib_neigh_free(neigh); |
1234 | ipoib_put_ah(neigh->ah); | ||
1235 | ipoib_neigh_free(dev, neigh); | ||
1236 | 1232 | ||
1237 | tx->neigh = NULL; | 1233 | tx->neigh = NULL; |
1238 | } | 1234 | } |
@@ -1279,7 +1275,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) | |||
1279 | list_move(&tx->list, &priv->cm.reap_list); | 1275 | list_move(&tx->list, &priv->cm.reap_list); |
1280 | queue_work(ipoib_workqueue, &priv->cm.reap_task); | 1276 | queue_work(ipoib_workqueue, &priv->cm.reap_task); |
1281 | ipoib_dbg(priv, "Reap connection for gid %pI6\n", | 1277 | ipoib_dbg(priv, "Reap connection for gid %pI6\n", |
1282 | tx->neigh->dgid.raw); | 1278 | tx->neigh->daddr + 4); |
1283 | tx->neigh = NULL; | 1279 | tx->neigh = NULL; |
1284 | } | 1280 | } |
1285 | } | 1281 | } |
@@ -1304,7 +1300,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) | |||
1304 | p = list_entry(priv->cm.start_list.next, typeof(*p), list); | 1300 | p = list_entry(priv->cm.start_list.next, typeof(*p), list); |
1305 | list_del_init(&p->list); | 1301 | list_del_init(&p->list); |
1306 | neigh = p->neigh; | 1302 | neigh = p->neigh; |
1307 | qpn = IPOIB_QPN(neigh->neighbour->ha); | 1303 | qpn = IPOIB_QPN(neigh->daddr); |
1308 | memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); | 1304 | memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); |
1309 | 1305 | ||
1310 | spin_unlock_irqrestore(&priv->lock, flags); | 1306 | spin_unlock_irqrestore(&priv->lock, flags); |
@@ -1320,9 +1316,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) | |||
1320 | if (neigh) { | 1316 | if (neigh) { |
1321 | neigh->cm = NULL; | 1317 | neigh->cm = NULL; |
1322 | list_del(&neigh->list); | 1318 | list_del(&neigh->list); |
1323 | if (neigh->ah) | 1319 | ipoib_neigh_free(neigh); |
1324 | ipoib_put_ah(neigh->ah); | ||
1325 | ipoib_neigh_free(dev, neigh); | ||
1326 | } | 1320 | } |
1327 | list_del(&p->list); | 1321 | list_del(&p->list); |
1328 | kfree(p); | 1322 | kfree(p); |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index bbee4b2d7a13..97920b77a5d0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c | |||
@@ -46,7 +46,8 @@ | |||
46 | #include <linux/ip.h> | 46 | #include <linux/ip.h> |
47 | #include <linux/in.h> | 47 | #include <linux/in.h> |
48 | 48 | ||
49 | #include <net/dst.h> | 49 | #include <linux/jhash.h> |
50 | #include <net/arp.h> | ||
50 | 51 | ||
51 | MODULE_AUTHOR("Roland Dreier"); | 52 | MODULE_AUTHOR("Roland Dreier"); |
52 | MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); | 53 | MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); |
@@ -84,6 +85,7 @@ struct ib_sa_client ipoib_sa_client; | |||
84 | 85 | ||
85 | static void ipoib_add_one(struct ib_device *device); | 86 | static void ipoib_add_one(struct ib_device *device); |
86 | static void ipoib_remove_one(struct ib_device *device); | 87 | static void ipoib_remove_one(struct ib_device *device); |
88 | static void ipoib_neigh_reclaim(struct rcu_head *rp); | ||
87 | 89 | ||
88 | static struct ib_client ipoib_client = { | 90 | static struct ib_client ipoib_client = { |
89 | .name = "ipoib", | 91 | .name = "ipoib", |
@@ -264,30 +266,15 @@ static int __path_add(struct net_device *dev, struct ipoib_path *path) | |||
264 | 266 | ||
265 | static void path_free(struct net_device *dev, struct ipoib_path *path) | 267 | static void path_free(struct net_device *dev, struct ipoib_path *path) |
266 | { | 268 | { |
267 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
268 | struct ipoib_neigh *neigh, *tn; | ||
269 | struct sk_buff *skb; | 269 | struct sk_buff *skb; |
270 | unsigned long flags; | ||
271 | 270 | ||
272 | while ((skb = __skb_dequeue(&path->queue))) | 271 | while ((skb = __skb_dequeue(&path->queue))) |
273 | dev_kfree_skb_irq(skb); | 272 | dev_kfree_skb_irq(skb); |
274 | 273 | ||
275 | spin_lock_irqsave(&priv->lock, flags); | 274 | ipoib_dbg(netdev_priv(dev), "path_free\n"); |
276 | |||
277 | list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) { | ||
278 | /* | ||
279 | * It's safe to call ipoib_put_ah() inside priv->lock | ||
280 | * here, because we know that path->ah will always | ||
281 | * hold one more reference, so ipoib_put_ah() will | ||
282 | * never do more than decrement the ref count. | ||
283 | */ | ||
284 | if (neigh->ah) | ||
285 | ipoib_put_ah(neigh->ah); | ||
286 | |||
287 | ipoib_neigh_free(dev, neigh); | ||
288 | } | ||
289 | 275 | ||
290 | spin_unlock_irqrestore(&priv->lock, flags); | 276 | /* remove all neigh connected to this path */ |
277 | ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw); | ||
291 | 278 | ||
292 | if (path->ah) | 279 | if (path->ah) |
293 | ipoib_put_ah(path->ah); | 280 | ipoib_put_ah(path->ah); |
@@ -458,19 +445,15 @@ static void path_rec_completion(int status, | |||
458 | } | 445 | } |
459 | kref_get(&path->ah->ref); | 446 | kref_get(&path->ah->ref); |
460 | neigh->ah = path->ah; | 447 | neigh->ah = path->ah; |
461 | memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, | ||
462 | sizeof(union ib_gid)); | ||
463 | 448 | ||
464 | if (ipoib_cm_enabled(dev, neigh->neighbour)) { | 449 | if (ipoib_cm_enabled(dev, neigh->daddr)) { |
465 | if (!ipoib_cm_get(neigh)) | 450 | if (!ipoib_cm_get(neigh)) |
466 | ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, | 451 | ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, |
467 | path, | 452 | path, |
468 | neigh)); | 453 | neigh)); |
469 | if (!ipoib_cm_get(neigh)) { | 454 | if (!ipoib_cm_get(neigh)) { |
470 | list_del(&neigh->list); | 455 | list_del(&neigh->list); |
471 | if (neigh->ah) | 456 | ipoib_neigh_free(neigh); |
472 | ipoib_put_ah(neigh->ah); | ||
473 | ipoib_neigh_free(dev, neigh); | ||
474 | continue; | 457 | continue; |
475 | } | 458 | } |
476 | } | 459 | } |
@@ -555,15 +538,15 @@ static int path_rec_start(struct net_device *dev, | |||
555 | return 0; | 538 | return 0; |
556 | } | 539 | } |
557 | 540 | ||
558 | /* called with rcu_read_lock */ | 541 | static void neigh_add_path(struct sk_buff *skb, u8 *daddr, |
559 | static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_device *dev) | 542 | struct net_device *dev) |
560 | { | 543 | { |
561 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 544 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
562 | struct ipoib_path *path; | 545 | struct ipoib_path *path; |
563 | struct ipoib_neigh *neigh; | 546 | struct ipoib_neigh *neigh; |
564 | unsigned long flags; | 547 | unsigned long flags; |
565 | 548 | ||
566 | neigh = ipoib_neigh_alloc(n, skb->dev); | 549 | neigh = ipoib_neigh_alloc(daddr, dev); |
567 | if (!neigh) { | 550 | if (!neigh) { |
568 | ++dev->stats.tx_dropped; | 551 | ++dev->stats.tx_dropped; |
569 | dev_kfree_skb_any(skb); | 552 | dev_kfree_skb_any(skb); |
@@ -572,9 +555,9 @@ static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_ | |||
572 | 555 | ||
573 | spin_lock_irqsave(&priv->lock, flags); | 556 | spin_lock_irqsave(&priv->lock, flags); |
574 | 557 | ||
575 | path = __path_find(dev, n->ha + 4); | 558 | path = __path_find(dev, daddr + 4); |
576 | if (!path) { | 559 | if (!path) { |
577 | path = path_rec_create(dev, n->ha + 4); | 560 | path = path_rec_create(dev, daddr + 4); |
578 | if (!path) | 561 | if (!path) |
579 | goto err_path; | 562 | goto err_path; |
580 | 563 | ||
@@ -586,17 +569,13 @@ static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_ | |||
586 | if (path->ah) { | 569 | if (path->ah) { |
587 | kref_get(&path->ah->ref); | 570 | kref_get(&path->ah->ref); |
588 | neigh->ah = path->ah; | 571 | neigh->ah = path->ah; |
589 | memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, | ||
590 | sizeof(union ib_gid)); | ||
591 | 572 | ||
592 | if (ipoib_cm_enabled(dev, neigh->neighbour)) { | 573 | if (ipoib_cm_enabled(dev, neigh->daddr)) { |
593 | if (!ipoib_cm_get(neigh)) | 574 | if (!ipoib_cm_get(neigh)) |
594 | ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh)); | 575 | ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh)); |
595 | if (!ipoib_cm_get(neigh)) { | 576 | if (!ipoib_cm_get(neigh)) { |
596 | list_del(&neigh->list); | 577 | list_del(&neigh->list); |
597 | if (neigh->ah) | 578 | ipoib_neigh_free(neigh); |
598 | ipoib_put_ah(neigh->ah); | ||
599 | ipoib_neigh_free(dev, neigh); | ||
600 | goto err_drop; | 579 | goto err_drop; |
601 | } | 580 | } |
602 | if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) | 581 | if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) |
@@ -608,7 +587,8 @@ static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_ | |||
608 | } | 587 | } |
609 | } else { | 588 | } else { |
610 | spin_unlock_irqrestore(&priv->lock, flags); | 589 | spin_unlock_irqrestore(&priv->lock, flags); |
611 | ipoib_send(dev, skb, path->ah, IPOIB_QPN(n->ha)); | 590 | ipoib_send(dev, skb, path->ah, IPOIB_QPN(daddr)); |
591 | ipoib_neigh_put(neigh); | ||
612 | return; | 592 | return; |
613 | } | 593 | } |
614 | } else { | 594 | } else { |
@@ -621,35 +601,20 @@ static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_ | |||
621 | } | 601 | } |
622 | 602 | ||
623 | spin_unlock_irqrestore(&priv->lock, flags); | 603 | spin_unlock_irqrestore(&priv->lock, flags); |
604 | ipoib_neigh_put(neigh); | ||
624 | return; | 605 | return; |
625 | 606 | ||
626 | err_list: | 607 | err_list: |
627 | list_del(&neigh->list); | 608 | list_del(&neigh->list); |
628 | 609 | ||
629 | err_path: | 610 | err_path: |
630 | ipoib_neigh_free(dev, neigh); | 611 | ipoib_neigh_free(neigh); |
631 | err_drop: | 612 | err_drop: |
632 | ++dev->stats.tx_dropped; | 613 | ++dev->stats.tx_dropped; |
633 | dev_kfree_skb_any(skb); | 614 | dev_kfree_skb_any(skb); |
634 | 615 | ||
635 | spin_unlock_irqrestore(&priv->lock, flags); | 616 | spin_unlock_irqrestore(&priv->lock, flags); |
636 | } | 617 | ipoib_neigh_put(neigh); |
637 | |||
638 | /* called with rcu_read_lock */ | ||
639 | static void ipoib_path_lookup(struct sk_buff *skb, struct neighbour *n, struct net_device *dev) | ||
640 | { | ||
641 | struct ipoib_dev_priv *priv = netdev_priv(skb->dev); | ||
642 | |||
643 | /* Look up path record for unicasts */ | ||
644 | if (n->ha[4] != 0xff) { | ||
645 | neigh_add_path(skb, n, dev); | ||
646 | return; | ||
647 | } | ||
648 | |||
649 | /* Add in the P_Key for multicasts */ | ||
650 | n->ha[8] = (priv->pkey >> 8) & 0xff; | ||
651 | n->ha[9] = priv->pkey & 0xff; | ||
652 | ipoib_mcast_send(dev, n->ha + 4, skb); | ||
653 | } | 618 | } |
654 | 619 | ||
655 | static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, | 620 | static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, |
@@ -710,96 +675,80 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
710 | { | 675 | { |
711 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 676 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
712 | struct ipoib_neigh *neigh; | 677 | struct ipoib_neigh *neigh; |
713 | struct neighbour *n = NULL; | 678 | struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; |
679 | struct ipoib_header *header; | ||
714 | unsigned long flags; | 680 | unsigned long flags; |
715 | 681 | ||
716 | rcu_read_lock(); | 682 | header = (struct ipoib_header *) skb->data; |
717 | if (likely(skb_dst(skb))) { | 683 | |
718 | n = dst_neigh_lookup_skb(skb_dst(skb), skb); | 684 | if (unlikely(cb->hwaddr[4] == 0xff)) { |
719 | if (!n) { | 685 | /* multicast, arrange "if" according to probability */ |
686 | if ((header->proto != htons(ETH_P_IP)) && | ||
687 | (header->proto != htons(ETH_P_IPV6)) && | ||
688 | (header->proto != htons(ETH_P_ARP)) && | ||
689 | (header->proto != htons(ETH_P_RARP))) { | ||
690 | /* ethertype not supported by IPoIB */ | ||
720 | ++dev->stats.tx_dropped; | 691 | ++dev->stats.tx_dropped; |
721 | dev_kfree_skb_any(skb); | 692 | dev_kfree_skb_any(skb); |
722 | goto unlock; | 693 | return NETDEV_TX_OK; |
723 | } | 694 | } |
695 | /* Add in the P_Key for multicast*/ | ||
696 | cb->hwaddr[8] = (priv->pkey >> 8) & 0xff; | ||
697 | cb->hwaddr[9] = priv->pkey & 0xff; | ||
698 | |||
699 | neigh = ipoib_neigh_get(dev, cb->hwaddr); | ||
700 | if (likely(neigh)) | ||
701 | goto send_using_neigh; | ||
702 | ipoib_mcast_send(dev, cb->hwaddr, skb); | ||
703 | return NETDEV_TX_OK; | ||
724 | } | 704 | } |
725 | if (likely(n)) { | ||
726 | if (unlikely(!*to_ipoib_neigh(n))) { | ||
727 | ipoib_path_lookup(skb, n, dev); | ||
728 | goto unlock; | ||
729 | } | ||
730 | |||
731 | neigh = *to_ipoib_neigh(n); | ||
732 | 705 | ||
733 | if (unlikely((memcmp(&neigh->dgid.raw, | 706 | /* unicast, arrange "switch" according to probability */ |
734 | n->ha + 4, | 707 | switch (header->proto) { |
735 | sizeof(union ib_gid))) || | 708 | case htons(ETH_P_IP): |
736 | (neigh->dev != dev))) { | 709 | case htons(ETH_P_IPV6): |
737 | spin_lock_irqsave(&priv->lock, flags); | 710 | neigh = ipoib_neigh_get(dev, cb->hwaddr); |
738 | /* | 711 | if (unlikely(!neigh)) { |
739 | * It's safe to call ipoib_put_ah() inside | 712 | neigh_add_path(skb, cb->hwaddr, dev); |
740 | * priv->lock here, because we know that | 713 | return NETDEV_TX_OK; |
741 | * path->ah will always hold one more reference, | ||
742 | * so ipoib_put_ah() will never do more than | ||
743 | * decrement the ref count. | ||
744 | */ | ||
745 | if (neigh->ah) | ||
746 | ipoib_put_ah(neigh->ah); | ||
747 | list_del(&neigh->list); | ||
748 | ipoib_neigh_free(dev, neigh); | ||
749 | spin_unlock_irqrestore(&priv->lock, flags); | ||
750 | ipoib_path_lookup(skb, n, dev); | ||
751 | goto unlock; | ||
752 | } | 714 | } |
715 | break; | ||
716 | case htons(ETH_P_ARP): | ||
717 | case htons(ETH_P_RARP): | ||
718 | /* for unicast ARP and RARP should always perform path find */ | ||
719 | unicast_arp_send(skb, dev, cb); | ||
720 | return NETDEV_TX_OK; | ||
721 | default: | ||
722 | /* ethertype not supported by IPoIB */ | ||
723 | ++dev->stats.tx_dropped; | ||
724 | dev_kfree_skb_any(skb); | ||
725 | return NETDEV_TX_OK; | ||
726 | } | ||
753 | 727 | ||
754 | if (ipoib_cm_get(neigh)) { | 728 | send_using_neigh: |
755 | if (ipoib_cm_up(neigh)) { | 729 | /* note we now hold a ref to neigh */ |
756 | ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); | 730 | if (ipoib_cm_get(neigh)) { |
757 | goto unlock; | 731 | if (ipoib_cm_up(neigh)) { |
758 | } | 732 | ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); |
759 | } else if (neigh->ah) { | 733 | goto unref; |
760 | ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(n->ha)); | ||
761 | goto unlock; | ||
762 | } | 734 | } |
735 | } else if (neigh->ah) { | ||
736 | ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(cb->hwaddr)); | ||
737 | goto unref; | ||
738 | } | ||
763 | 739 | ||
764 | if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { | 740 | if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { |
765 | spin_lock_irqsave(&priv->lock, flags); | 741 | spin_lock_irqsave(&priv->lock, flags); |
766 | __skb_queue_tail(&neigh->queue, skb); | 742 | __skb_queue_tail(&neigh->queue, skb); |
767 | spin_unlock_irqrestore(&priv->lock, flags); | 743 | spin_unlock_irqrestore(&priv->lock, flags); |
768 | } else { | ||
769 | ++dev->stats.tx_dropped; | ||
770 | dev_kfree_skb_any(skb); | ||
771 | } | ||
772 | } else { | 744 | } else { |
773 | struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; | 745 | ++dev->stats.tx_dropped; |
774 | 746 | dev_kfree_skb_any(skb); | |
775 | if (cb->hwaddr[4] == 0xff) { | 747 | } |
776 | /* Add in the P_Key for multicast*/ | ||
777 | cb->hwaddr[8] = (priv->pkey >> 8) & 0xff; | ||
778 | cb->hwaddr[9] = priv->pkey & 0xff; | ||
779 | 748 | ||
780 | ipoib_mcast_send(dev, cb->hwaddr + 4, skb); | 749 | unref: |
781 | } else { | 750 | ipoib_neigh_put(neigh); |
782 | /* unicast GID -- should be ARP or RARP reply */ | ||
783 | |||
784 | if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) && | ||
785 | (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) { | ||
786 | ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x %pI6\n", | ||
787 | skb_dst(skb) ? "neigh" : "dst", | ||
788 | be16_to_cpup((__be16 *) skb->data), | ||
789 | IPOIB_QPN(cb->hwaddr), | ||
790 | cb->hwaddr + 4); | ||
791 | dev_kfree_skb_any(skb); | ||
792 | ++dev->stats.tx_dropped; | ||
793 | goto unlock; | ||
794 | } | ||
795 | 751 | ||
796 | unicast_arp_send(skb, dev, cb); | ||
797 | } | ||
798 | } | ||
799 | unlock: | ||
800 | if (n) | ||
801 | neigh_release(n); | ||
802 | rcu_read_unlock(); | ||
803 | return NETDEV_TX_OK; | 752 | return NETDEV_TX_OK; |
804 | } | 753 | } |
805 | 754 | ||
@@ -821,6 +770,7 @@ static int ipoib_hard_header(struct sk_buff *skb, | |||
821 | const void *daddr, const void *saddr, unsigned len) | 770 | const void *daddr, const void *saddr, unsigned len) |
822 | { | 771 | { |
823 | struct ipoib_header *header; | 772 | struct ipoib_header *header; |
773 | struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; | ||
824 | 774 | ||
825 | header = (struct ipoib_header *) skb_push(skb, sizeof *header); | 775 | header = (struct ipoib_header *) skb_push(skb, sizeof *header); |
826 | 776 | ||
@@ -828,14 +778,11 @@ static int ipoib_hard_header(struct sk_buff *skb, | |||
828 | header->reserved = 0; | 778 | header->reserved = 0; |
829 | 779 | ||
830 | /* | 780 | /* |
831 | * If we don't have a dst_entry structure, stuff the | 781 | * we don't rely on dst_entry structure, always stuff the |
832 | * destination address into skb->cb so we can figure out where | 782 | * destination address into skb->cb so we can figure out where |
833 | * to send the packet later. | 783 | * to send the packet later. |
834 | */ | 784 | */ |
835 | if (!skb_dst(skb)) { | 785 | memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN); |
836 | struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; | ||
837 | memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN); | ||
838 | } | ||
839 | 786 | ||
840 | return 0; | 787 | return 0; |
841 | } | 788 | } |
@@ -852,86 +799,438 @@ static void ipoib_set_mcast_list(struct net_device *dev) | |||
852 | queue_work(ipoib_workqueue, &priv->restart_task); | 799 | queue_work(ipoib_workqueue, &priv->restart_task); |
853 | } | 800 | } |
854 | 801 | ||
855 | static void ipoib_neigh_cleanup(struct neighbour *n) | 802 | static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr) |
856 | { | 803 | { |
857 | struct ipoib_neigh *neigh; | 804 | /* |
858 | struct ipoib_dev_priv *priv = netdev_priv(n->dev); | 805 | * Use only the address parts that contributes to spreading |
806 | * The subnet prefix is not used as one can not connect to | ||
807 | * same remote port (GUID) using the same remote QPN via two | ||
808 | * different subnets. | ||
809 | */ | ||
810 | /* qpn octets[1:4) & port GUID octets[12:20) */ | ||
811 | u32 *daddr_32 = (u32 *) daddr; | ||
812 | u32 hv; | ||
813 | |||
814 | hv = jhash_3words(daddr_32[3], daddr_32[4], 0xFFFFFF & daddr_32[0], 0); | ||
815 | return hv & htbl->mask; | ||
816 | } | ||
817 | |||
818 | struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr) | ||
819 | { | ||
820 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
821 | struct ipoib_neigh_table *ntbl = &priv->ntbl; | ||
822 | struct ipoib_neigh_hash *htbl; | ||
823 | struct ipoib_neigh *neigh = NULL; | ||
824 | u32 hash_val; | ||
825 | |||
826 | rcu_read_lock_bh(); | ||
827 | |||
828 | htbl = rcu_dereference_bh(ntbl->htbl); | ||
829 | |||
830 | if (!htbl) | ||
831 | goto out_unlock; | ||
832 | |||
833 | hash_val = ipoib_addr_hash(htbl, daddr); | ||
834 | for (neigh = rcu_dereference_bh(htbl->buckets[hash_val]); | ||
835 | neigh != NULL; | ||
836 | neigh = rcu_dereference_bh(neigh->hnext)) { | ||
837 | if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) { | ||
838 | /* found, take one ref on behalf of the caller */ | ||
839 | if (!atomic_inc_not_zero(&neigh->refcnt)) { | ||
840 | /* deleted */ | ||
841 | neigh = NULL; | ||
842 | goto out_unlock; | ||
843 | } | ||
844 | neigh->alive = jiffies; | ||
845 | goto out_unlock; | ||
846 | } | ||
847 | } | ||
848 | |||
849 | out_unlock: | ||
850 | rcu_read_unlock_bh(); | ||
851 | return neigh; | ||
852 | } | ||
853 | |||
854 | static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) | ||
855 | { | ||
856 | struct ipoib_neigh_table *ntbl = &priv->ntbl; | ||
857 | struct ipoib_neigh_hash *htbl; | ||
858 | unsigned long neigh_obsolete; | ||
859 | unsigned long dt; | ||
859 | unsigned long flags; | 860 | unsigned long flags; |
860 | struct ipoib_ah *ah = NULL; | 861 | int i; |
861 | 862 | ||
862 | neigh = *to_ipoib_neigh(n); | 863 | if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) |
863 | if (neigh) | ||
864 | priv = netdev_priv(neigh->dev); | ||
865 | else | ||
866 | return; | 864 | return; |
867 | ipoib_dbg(priv, | ||
868 | "neigh_cleanup for %06x %pI6\n", | ||
869 | IPOIB_QPN(n->ha), | ||
870 | n->ha + 4); | ||
871 | 865 | ||
872 | spin_lock_irqsave(&priv->lock, flags); | 866 | write_lock_bh(&ntbl->rwlock); |
867 | |||
868 | htbl = rcu_dereference_protected(ntbl->htbl, | ||
869 | lockdep_is_held(&ntbl->rwlock)); | ||
870 | |||
871 | if (!htbl) | ||
872 | goto out_unlock; | ||
873 | |||
874 | /* neigh is obsolete if it was idle for two GC periods */ | ||
875 | dt = 2 * arp_tbl.gc_interval; | ||
876 | neigh_obsolete = jiffies - dt; | ||
877 | /* handle possible race condition */ | ||
878 | if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) | ||
879 | goto out_unlock; | ||
880 | |||
881 | for (i = 0; i < htbl->size; i++) { | ||
882 | struct ipoib_neigh *neigh; | ||
883 | struct ipoib_neigh __rcu **np = &htbl->buckets[i]; | ||
884 | |||
885 | while ((neigh = rcu_dereference_protected(*np, | ||
886 | lockdep_is_held(&ntbl->rwlock))) != NULL) { | ||
887 | /* was the neigh idle for two GC periods */ | ||
888 | if (time_after(neigh_obsolete, neigh->alive)) { | ||
889 | rcu_assign_pointer(*np, | ||
890 | rcu_dereference_protected(neigh->hnext, | ||
891 | lockdep_is_held(&ntbl->rwlock))); | ||
892 | /* remove from path/mc list */ | ||
893 | spin_lock_irqsave(&priv->lock, flags); | ||
894 | list_del(&neigh->list); | ||
895 | spin_unlock_irqrestore(&priv->lock, flags); | ||
896 | call_rcu(&neigh->rcu, ipoib_neigh_reclaim); | ||
897 | } else { | ||
898 | np = &neigh->hnext; | ||
899 | } | ||
873 | 900 | ||
874 | if (neigh->ah) | 901 | } |
875 | ah = neigh->ah; | 902 | } |
876 | list_del(&neigh->list); | ||
877 | ipoib_neigh_free(n->dev, neigh); | ||
878 | 903 | ||
879 | spin_unlock_irqrestore(&priv->lock, flags); | 904 | out_unlock: |
905 | write_unlock_bh(&ntbl->rwlock); | ||
906 | } | ||
880 | 907 | ||
881 | if (ah) | 908 | static void ipoib_reap_neigh(struct work_struct *work) |
882 | ipoib_put_ah(ah); | 909 | { |
910 | struct ipoib_dev_priv *priv = | ||
911 | container_of(work, struct ipoib_dev_priv, neigh_reap_task.work); | ||
912 | |||
913 | __ipoib_reap_neigh(priv); | ||
914 | |||
915 | if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) | ||
916 | queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task, | ||
917 | arp_tbl.gc_interval); | ||
883 | } | 918 | } |
884 | 919 | ||
885 | struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour, | 920 | |
921 | static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr, | ||
886 | struct net_device *dev) | 922 | struct net_device *dev) |
887 | { | 923 | { |
888 | struct ipoib_neigh *neigh; | 924 | struct ipoib_neigh *neigh; |
889 | 925 | ||
890 | neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); | 926 | neigh = kzalloc(sizeof *neigh, GFP_ATOMIC); |
891 | if (!neigh) | 927 | if (!neigh) |
892 | return NULL; | 928 | return NULL; |
893 | 929 | ||
894 | neigh->neighbour = neighbour; | ||
895 | neigh->dev = dev; | 930 | neigh->dev = dev; |
896 | memset(&neigh->dgid.raw, 0, sizeof (union ib_gid)); | 931 | memcpy(&neigh->daddr, daddr, sizeof(neigh->daddr)); |
897 | *to_ipoib_neigh(neighbour) = neigh; | ||
898 | skb_queue_head_init(&neigh->queue); | 932 | skb_queue_head_init(&neigh->queue); |
933 | INIT_LIST_HEAD(&neigh->list); | ||
899 | ipoib_cm_set(neigh, NULL); | 934 | ipoib_cm_set(neigh, NULL); |
935 | /* one ref on behalf of the caller */ | ||
936 | atomic_set(&neigh->refcnt, 1); | ||
937 | |||
938 | return neigh; | ||
939 | } | ||
940 | |||
941 | struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, | ||
942 | struct net_device *dev) | ||
943 | { | ||
944 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
945 | struct ipoib_neigh_table *ntbl = &priv->ntbl; | ||
946 | struct ipoib_neigh_hash *htbl; | ||
947 | struct ipoib_neigh *neigh; | ||
948 | u32 hash_val; | ||
949 | |||
950 | write_lock_bh(&ntbl->rwlock); | ||
951 | |||
952 | htbl = rcu_dereference_protected(ntbl->htbl, | ||
953 | lockdep_is_held(&ntbl->rwlock)); | ||
954 | if (!htbl) { | ||
955 | neigh = NULL; | ||
956 | goto out_unlock; | ||
957 | } | ||
958 | |||
959 | /* need to add a new neigh, but maybe some other thread succeeded? | ||
960 | * recalc hash, maybe hash resize took place so we do a search | ||
961 | */ | ||
962 | hash_val = ipoib_addr_hash(htbl, daddr); | ||
963 | for (neigh = rcu_dereference_protected(htbl->buckets[hash_val], | ||
964 | lockdep_is_held(&ntbl->rwlock)); | ||
965 | neigh != NULL; | ||
966 | neigh = rcu_dereference_protected(neigh->hnext, | ||
967 | lockdep_is_held(&ntbl->rwlock))) { | ||
968 | if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) { | ||
969 | /* found, take one ref on behalf of the caller */ | ||
970 | if (!atomic_inc_not_zero(&neigh->refcnt)) { | ||
971 | /* deleted */ | ||
972 | neigh = NULL; | ||
973 | break; | ||
974 | } | ||
975 | neigh->alive = jiffies; | ||
976 | goto out_unlock; | ||
977 | } | ||
978 | } | ||
979 | |||
980 | neigh = ipoib_neigh_ctor(daddr, dev); | ||
981 | if (!neigh) | ||
982 | goto out_unlock; | ||
983 | |||
984 | /* one ref on behalf of the hash table */ | ||
985 | atomic_inc(&neigh->refcnt); | ||
986 | neigh->alive = jiffies; | ||
987 | /* put in hash */ | ||
988 | rcu_assign_pointer(neigh->hnext, | ||
989 | rcu_dereference_protected(htbl->buckets[hash_val], | ||
990 | lockdep_is_held(&ntbl->rwlock))); | ||
991 | rcu_assign_pointer(htbl->buckets[hash_val], neigh); | ||
992 | atomic_inc(&ntbl->entries); | ||
993 | |||
994 | out_unlock: | ||
995 | write_unlock_bh(&ntbl->rwlock); | ||
900 | 996 | ||
901 | return neigh; | 997 | return neigh; |
902 | } | 998 | } |
903 | 999 | ||
904 | void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh) | 1000 | void ipoib_neigh_dtor(struct ipoib_neigh *neigh) |
905 | { | 1001 | { |
1002 | /* neigh reference count was dropprd to zero */ | ||
1003 | struct net_device *dev = neigh->dev; | ||
1004 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
906 | struct sk_buff *skb; | 1005 | struct sk_buff *skb; |
907 | *to_ipoib_neigh(neigh->neighbour) = NULL; | 1006 | if (neigh->ah) |
1007 | ipoib_put_ah(neigh->ah); | ||
908 | while ((skb = __skb_dequeue(&neigh->queue))) { | 1008 | while ((skb = __skb_dequeue(&neigh->queue))) { |
909 | ++dev->stats.tx_dropped; | 1009 | ++dev->stats.tx_dropped; |
910 | dev_kfree_skb_any(skb); | 1010 | dev_kfree_skb_any(skb); |
911 | } | 1011 | } |
912 | if (ipoib_cm_get(neigh)) | 1012 | if (ipoib_cm_get(neigh)) |
913 | ipoib_cm_destroy_tx(ipoib_cm_get(neigh)); | 1013 | ipoib_cm_destroy_tx(ipoib_cm_get(neigh)); |
1014 | ipoib_dbg(netdev_priv(dev), | ||
1015 | "neigh free for %06x %pI6\n", | ||
1016 | IPOIB_QPN(neigh->daddr), | ||
1017 | neigh->daddr + 4); | ||
914 | kfree(neigh); | 1018 | kfree(neigh); |
1019 | if (atomic_dec_and_test(&priv->ntbl.entries)) { | ||
1020 | if (test_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags)) | ||
1021 | complete(&priv->ntbl.flushed); | ||
1022 | } | ||
1023 | } | ||
1024 | |||
1025 | static void ipoib_neigh_reclaim(struct rcu_head *rp) | ||
1026 | { | ||
1027 | /* Called as a result of removal from hash table */ | ||
1028 | struct ipoib_neigh *neigh = container_of(rp, struct ipoib_neigh, rcu); | ||
1029 | /* note TX context may hold another ref */ | ||
1030 | ipoib_neigh_put(neigh); | ||
915 | } | 1031 | } |
916 | 1032 | ||
917 | static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms) | 1033 | void ipoib_neigh_free(struct ipoib_neigh *neigh) |
918 | { | 1034 | { |
919 | parms->neigh_cleanup = ipoib_neigh_cleanup; | 1035 | struct net_device *dev = neigh->dev; |
1036 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
1037 | struct ipoib_neigh_table *ntbl = &priv->ntbl; | ||
1038 | struct ipoib_neigh_hash *htbl; | ||
1039 | struct ipoib_neigh __rcu **np; | ||
1040 | struct ipoib_neigh *n; | ||
1041 | u32 hash_val; | ||
1042 | |||
1043 | write_lock_bh(&ntbl->rwlock); | ||
1044 | |||
1045 | htbl = rcu_dereference_protected(ntbl->htbl, | ||
1046 | lockdep_is_held(&ntbl->rwlock)); | ||
1047 | if (!htbl) | ||
1048 | goto out_unlock; | ||
1049 | |||
1050 | hash_val = ipoib_addr_hash(htbl, neigh->daddr); | ||
1051 | np = &htbl->buckets[hash_val]; | ||
1052 | for (n = rcu_dereference_protected(*np, | ||
1053 | lockdep_is_held(&ntbl->rwlock)); | ||
1054 | n != NULL; | ||
1055 | n = rcu_dereference_protected(neigh->hnext, | ||
1056 | lockdep_is_held(&ntbl->rwlock))) { | ||
1057 | if (n == neigh) { | ||
1058 | /* found */ | ||
1059 | rcu_assign_pointer(*np, | ||
1060 | rcu_dereference_protected(neigh->hnext, | ||
1061 | lockdep_is_held(&ntbl->rwlock))); | ||
1062 | call_rcu(&neigh->rcu, ipoib_neigh_reclaim); | ||
1063 | goto out_unlock; | ||
1064 | } else { | ||
1065 | np = &n->hnext; | ||
1066 | } | ||
1067 | } | ||
1068 | |||
1069 | out_unlock: | ||
1070 | write_unlock_bh(&ntbl->rwlock); | ||
1071 | |||
1072 | } | ||
1073 | |||
1074 | static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) | ||
1075 | { | ||
1076 | struct ipoib_neigh_table *ntbl = &priv->ntbl; | ||
1077 | struct ipoib_neigh_hash *htbl; | ||
1078 | struct ipoib_neigh **buckets; | ||
1079 | u32 size; | ||
1080 | |||
1081 | clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); | ||
1082 | ntbl->htbl = NULL; | ||
1083 | rwlock_init(&ntbl->rwlock); | ||
1084 | htbl = kzalloc(sizeof(*htbl), GFP_KERNEL); | ||
1085 | if (!htbl) | ||
1086 | return -ENOMEM; | ||
1087 | set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); | ||
1088 | size = roundup_pow_of_two(arp_tbl.gc_thresh3); | ||
1089 | buckets = kzalloc(size * sizeof(*buckets), GFP_KERNEL); | ||
1090 | if (!buckets) { | ||
1091 | kfree(htbl); | ||
1092 | return -ENOMEM; | ||
1093 | } | ||
1094 | htbl->size = size; | ||
1095 | htbl->mask = (size - 1); | ||
1096 | htbl->buckets = buckets; | ||
1097 | ntbl->htbl = htbl; | ||
1098 | atomic_set(&ntbl->entries, 0); | ||
1099 | |||
1100 | /* start garbage collection */ | ||
1101 | clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); | ||
1102 | queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task, | ||
1103 | arp_tbl.gc_interval); | ||
920 | 1104 | ||
921 | return 0; | 1105 | return 0; |
922 | } | 1106 | } |
923 | 1107 | ||
1108 | static void neigh_hash_free_rcu(struct rcu_head *head) | ||
1109 | { | ||
1110 | struct ipoib_neigh_hash *htbl = container_of(head, | ||
1111 | struct ipoib_neigh_hash, | ||
1112 | rcu); | ||
1113 | struct ipoib_neigh __rcu **buckets = htbl->buckets; | ||
1114 | |||
1115 | kfree(buckets); | ||
1116 | kfree(htbl); | ||
1117 | } | ||
1118 | |||
1119 | void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid) | ||
1120 | { | ||
1121 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
1122 | struct ipoib_neigh_table *ntbl = &priv->ntbl; | ||
1123 | struct ipoib_neigh_hash *htbl; | ||
1124 | unsigned long flags; | ||
1125 | int i; | ||
1126 | |||
1127 | /* remove all neigh connected to a given path or mcast */ | ||
1128 | write_lock_bh(&ntbl->rwlock); | ||
1129 | |||
1130 | htbl = rcu_dereference_protected(ntbl->htbl, | ||
1131 | lockdep_is_held(&ntbl->rwlock)); | ||
1132 | |||
1133 | if (!htbl) | ||
1134 | goto out_unlock; | ||
1135 | |||
1136 | for (i = 0; i < htbl->size; i++) { | ||
1137 | struct ipoib_neigh *neigh; | ||
1138 | struct ipoib_neigh __rcu **np = &htbl->buckets[i]; | ||
1139 | |||
1140 | while ((neigh = rcu_dereference_protected(*np, | ||
1141 | lockdep_is_held(&ntbl->rwlock))) != NULL) { | ||
1142 | /* delete neighs belong to this parent */ | ||
1143 | if (!memcmp(gid, neigh->daddr + 4, sizeof (union ib_gid))) { | ||
1144 | rcu_assign_pointer(*np, | ||
1145 | rcu_dereference_protected(neigh->hnext, | ||
1146 | lockdep_is_held(&ntbl->rwlock))); | ||
1147 | /* remove from parent list */ | ||
1148 | spin_lock_irqsave(&priv->lock, flags); | ||
1149 | list_del(&neigh->list); | ||
1150 | spin_unlock_irqrestore(&priv->lock, flags); | ||
1151 | call_rcu(&neigh->rcu, ipoib_neigh_reclaim); | ||
1152 | } else { | ||
1153 | np = &neigh->hnext; | ||
1154 | } | ||
1155 | |||
1156 | } | ||
1157 | } | ||
1158 | out_unlock: | ||
1159 | write_unlock_bh(&ntbl->rwlock); | ||
1160 | } | ||
1161 | |||
1162 | static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) | ||
1163 | { | ||
1164 | struct ipoib_neigh_table *ntbl = &priv->ntbl; | ||
1165 | struct ipoib_neigh_hash *htbl; | ||
1166 | unsigned long flags; | ||
1167 | int i; | ||
1168 | |||
1169 | write_lock_bh(&ntbl->rwlock); | ||
1170 | |||
1171 | htbl = rcu_dereference_protected(ntbl->htbl, | ||
1172 | lockdep_is_held(&ntbl->rwlock)); | ||
1173 | if (!htbl) | ||
1174 | goto out_unlock; | ||
1175 | |||
1176 | for (i = 0; i < htbl->size; i++) { | ||
1177 | struct ipoib_neigh *neigh; | ||
1178 | struct ipoib_neigh __rcu **np = &htbl->buckets[i]; | ||
1179 | |||
1180 | while ((neigh = rcu_dereference_protected(*np, | ||
1181 | lockdep_is_held(&ntbl->rwlock))) != NULL) { | ||
1182 | rcu_assign_pointer(*np, | ||
1183 | rcu_dereference_protected(neigh->hnext, | ||
1184 | lockdep_is_held(&ntbl->rwlock))); | ||
1185 | /* remove from path/mc list */ | ||
1186 | spin_lock_irqsave(&priv->lock, flags); | ||
1187 | list_del(&neigh->list); | ||
1188 | spin_unlock_irqrestore(&priv->lock, flags); | ||
1189 | call_rcu(&neigh->rcu, ipoib_neigh_reclaim); | ||
1190 | } | ||
1191 | } | ||
1192 | |||
1193 | rcu_assign_pointer(ntbl->htbl, NULL); | ||
1194 | call_rcu(&htbl->rcu, neigh_hash_free_rcu); | ||
1195 | |||
1196 | out_unlock: | ||
1197 | write_unlock_bh(&ntbl->rwlock); | ||
1198 | } | ||
1199 | |||
1200 | static void ipoib_neigh_hash_uninit(struct net_device *dev) | ||
1201 | { | ||
1202 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
1203 | int stopped; | ||
1204 | |||
1205 | ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n"); | ||
1206 | init_completion(&priv->ntbl.flushed); | ||
1207 | set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); | ||
1208 | |||
1209 | /* Stop GC if called at init fail need to cancel work */ | ||
1210 | stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); | ||
1211 | if (!stopped) | ||
1212 | cancel_delayed_work(&priv->neigh_reap_task); | ||
1213 | |||
1214 | if (atomic_read(&priv->ntbl.entries)) { | ||
1215 | ipoib_flush_neighs(priv); | ||
1216 | wait_for_completion(&priv->ntbl.flushed); | ||
1217 | } | ||
1218 | } | ||
1219 | |||
1220 | |||
924 | int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) | 1221 | int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) |
925 | { | 1222 | { |
926 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 1223 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
927 | 1224 | ||
1225 | if (ipoib_neigh_hash_init(priv) < 0) | ||
1226 | goto out; | ||
928 | /* Allocate RX/TX "rings" to hold queued skbs */ | 1227 | /* Allocate RX/TX "rings" to hold queued skbs */ |
929 | priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, | 1228 | priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, |
930 | GFP_KERNEL); | 1229 | GFP_KERNEL); |
931 | if (!priv->rx_ring) { | 1230 | if (!priv->rx_ring) { |
932 | printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", | 1231 | printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", |
933 | ca->name, ipoib_recvq_size); | 1232 | ca->name, ipoib_recvq_size); |
934 | goto out; | 1233 | goto out_neigh_hash_cleanup; |
935 | } | 1234 | } |
936 | 1235 | ||
937 | priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring); | 1236 | priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring); |
@@ -954,6 +1253,8 @@ out_tx_ring_cleanup: | |||
954 | out_rx_ring_cleanup: | 1253 | out_rx_ring_cleanup: |
955 | kfree(priv->rx_ring); | 1254 | kfree(priv->rx_ring); |
956 | 1255 | ||
1256 | out_neigh_hash_cleanup: | ||
1257 | ipoib_neigh_hash_uninit(dev); | ||
957 | out: | 1258 | out: |
958 | return -ENOMEM; | 1259 | return -ENOMEM; |
959 | } | 1260 | } |
@@ -966,6 +1267,9 @@ void ipoib_dev_cleanup(struct net_device *dev) | |||
966 | 1267 | ||
967 | /* Delete any child interfaces first */ | 1268 | /* Delete any child interfaces first */ |
968 | list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { | 1269 | list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { |
1270 | /* Stop GC on child */ | ||
1271 | set_bit(IPOIB_STOP_NEIGH_GC, &cpriv->flags); | ||
1272 | cancel_delayed_work(&cpriv->neigh_reap_task); | ||
969 | unregister_netdev(cpriv->dev); | 1273 | unregister_netdev(cpriv->dev); |
970 | ipoib_dev_cleanup(cpriv->dev); | 1274 | ipoib_dev_cleanup(cpriv->dev); |
971 | free_netdev(cpriv->dev); | 1275 | free_netdev(cpriv->dev); |
@@ -978,6 +1282,8 @@ void ipoib_dev_cleanup(struct net_device *dev) | |||
978 | 1282 | ||
979 | priv->rx_ring = NULL; | 1283 | priv->rx_ring = NULL; |
980 | priv->tx_ring = NULL; | 1284 | priv->tx_ring = NULL; |
1285 | |||
1286 | ipoib_neigh_hash_uninit(dev); | ||
981 | } | 1287 | } |
982 | 1288 | ||
983 | static const struct header_ops ipoib_header_ops = { | 1289 | static const struct header_ops ipoib_header_ops = { |
@@ -992,7 +1298,6 @@ static const struct net_device_ops ipoib_netdev_ops = { | |||
992 | .ndo_start_xmit = ipoib_start_xmit, | 1298 | .ndo_start_xmit = ipoib_start_xmit, |
993 | .ndo_tx_timeout = ipoib_timeout, | 1299 | .ndo_tx_timeout = ipoib_timeout, |
994 | .ndo_set_rx_mode = ipoib_set_mcast_list, | 1300 | .ndo_set_rx_mode = ipoib_set_mcast_list, |
995 | .ndo_neigh_setup = ipoib_neigh_setup_dev, | ||
996 | }; | 1301 | }; |
997 | 1302 | ||
998 | static void ipoib_setup(struct net_device *dev) | 1303 | static void ipoib_setup(struct net_device *dev) |
@@ -1041,6 +1346,7 @@ static void ipoib_setup(struct net_device *dev) | |||
1041 | INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); | 1346 | INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); |
1042 | INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); | 1347 | INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); |
1043 | INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); | 1348 | INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); |
1349 | INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh); | ||
1044 | } | 1350 | } |
1045 | 1351 | ||
1046 | struct ipoib_dev_priv *ipoib_intf_alloc(const char *name) | 1352 | struct ipoib_dev_priv *ipoib_intf_alloc(const char *name) |
@@ -1281,6 +1587,9 @@ sysfs_failed: | |||
1281 | 1587 | ||
1282 | register_failed: | 1588 | register_failed: |
1283 | ib_unregister_event_handler(&priv->event_handler); | 1589 | ib_unregister_event_handler(&priv->event_handler); |
1590 | /* Stop GC if started before flush */ | ||
1591 | set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); | ||
1592 | cancel_delayed_work(&priv->neigh_reap_task); | ||
1284 | flush_workqueue(ipoib_workqueue); | 1593 | flush_workqueue(ipoib_workqueue); |
1285 | 1594 | ||
1286 | event_failed: | 1595 | event_failed: |
@@ -1347,6 +1656,9 @@ static void ipoib_remove_one(struct ib_device *device) | |||
1347 | dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); | 1656 | dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); |
1348 | rtnl_unlock(); | 1657 | rtnl_unlock(); |
1349 | 1658 | ||
1659 | /* Stop GC */ | ||
1660 | set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); | ||
1661 | cancel_delayed_work(&priv->neigh_reap_task); | ||
1350 | flush_workqueue(ipoib_workqueue); | 1662 | flush_workqueue(ipoib_workqueue); |
1351 | 1663 | ||
1352 | unregister_netdev(priv->dev); | 1664 | unregister_netdev(priv->dev); |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 7cecb16d3d48..13f4aa7593c8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |||
@@ -69,28 +69,13 @@ struct ipoib_mcast_iter { | |||
69 | static void ipoib_mcast_free(struct ipoib_mcast *mcast) | 69 | static void ipoib_mcast_free(struct ipoib_mcast *mcast) |
70 | { | 70 | { |
71 | struct net_device *dev = mcast->dev; | 71 | struct net_device *dev = mcast->dev; |
72 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
73 | struct ipoib_neigh *neigh, *tmp; | ||
74 | int tx_dropped = 0; | 72 | int tx_dropped = 0; |
75 | 73 | ||
76 | ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n", | 74 | ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n", |
77 | mcast->mcmember.mgid.raw); | 75 | mcast->mcmember.mgid.raw); |
78 | 76 | ||
79 | spin_lock_irq(&priv->lock); | 77 | /* remove all neigh connected to this mcast */ |
80 | 78 | ipoib_del_neighs_by_gid(dev, mcast->mcmember.mgid.raw); | |
81 | list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) { | ||
82 | /* | ||
83 | * It's safe to call ipoib_put_ah() inside priv->lock | ||
84 | * here, because we know that mcast->ah will always | ||
85 | * hold one more reference, so ipoib_put_ah() will | ||
86 | * never do more than decrement the ref count. | ||
87 | */ | ||
88 | if (neigh->ah) | ||
89 | ipoib_put_ah(neigh->ah); | ||
90 | ipoib_neigh_free(dev, neigh); | ||
91 | } | ||
92 | |||
93 | spin_unlock_irq(&priv->lock); | ||
94 | 79 | ||
95 | if (mcast->ah) | 80 | if (mcast->ah) |
96 | ipoib_put_ah(mcast->ah); | 81 | ipoib_put_ah(mcast->ah); |
@@ -655,17 +640,12 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) | |||
655 | return 0; | 640 | return 0; |
656 | } | 641 | } |
657 | 642 | ||
658 | void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) | 643 | void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) |
659 | { | 644 | { |
660 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 645 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
661 | struct dst_entry *dst = skb_dst(skb); | ||
662 | struct ipoib_mcast *mcast; | 646 | struct ipoib_mcast *mcast; |
663 | struct neighbour *n; | ||
664 | unsigned long flags; | 647 | unsigned long flags; |
665 | 648 | void *mgid = daddr + 4; | |
666 | n = NULL; | ||
667 | if (dst) | ||
668 | n = dst_neigh_lookup_skb(dst, skb); | ||
669 | 649 | ||
670 | spin_lock_irqsave(&priv->lock, flags); | 650 | spin_lock_irqsave(&priv->lock, flags); |
671 | 651 | ||
@@ -721,28 +701,29 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) | |||
721 | 701 | ||
722 | out: | 702 | out: |
723 | if (mcast && mcast->ah) { | 703 | if (mcast && mcast->ah) { |
724 | if (n) { | 704 | struct ipoib_neigh *neigh; |
725 | if (!*to_ipoib_neigh(n)) { | 705 | |
726 | struct ipoib_neigh *neigh; | 706 | spin_unlock_irqrestore(&priv->lock, flags); |
727 | 707 | neigh = ipoib_neigh_get(dev, daddr); | |
728 | neigh = ipoib_neigh_alloc(n, skb->dev); | 708 | spin_lock_irqsave(&priv->lock, flags); |
729 | if (neigh) { | 709 | if (!neigh) { |
730 | kref_get(&mcast->ah->ref); | 710 | spin_unlock_irqrestore(&priv->lock, flags); |
731 | neigh->ah = mcast->ah; | 711 | neigh = ipoib_neigh_alloc(daddr, dev); |
732 | list_add_tail(&neigh->list, | 712 | spin_lock_irqsave(&priv->lock, flags); |
733 | &mcast->neigh_list); | 713 | if (neigh) { |
734 | } | 714 | kref_get(&mcast->ah->ref); |
715 | neigh->ah = mcast->ah; | ||
716 | list_add_tail(&neigh->list, &mcast->neigh_list); | ||
735 | } | 717 | } |
736 | neigh_release(n); | ||
737 | } | 718 | } |
738 | spin_unlock_irqrestore(&priv->lock, flags); | 719 | spin_unlock_irqrestore(&priv->lock, flags); |
739 | ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); | 720 | ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); |
721 | if (neigh) | ||
722 | ipoib_neigh_put(neigh); | ||
740 | return; | 723 | return; |
741 | } | 724 | } |
742 | 725 | ||
743 | unlock: | 726 | unlock: |
744 | if (n) | ||
745 | neigh_release(n); | ||
746 | spin_unlock_irqrestore(&priv->lock, flags); | 727 | spin_unlock_irqrestore(&priv->lock, flags); |
747 | } | 728 | } |
748 | 729 | ||