aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorErez Shitrit <erezsh@mellanox.com>2017-12-31 08:33:15 -0500
committerJason Gunthorpe <jgg@mellanox.com>2018-01-02 13:09:05 -0500
commit16ba3defb8bd01a9464ba4820a487f5b196b455b (patch)
tree9492fce56c1f5e95c6da8ecafa3739ec9fd9d598
parent5a371cf87e145b86efd32007e46146e78c1eff6d (diff)
IB/ipoib: Fix race condition in neigh creation
When using enhanced mode for IPoIB, two threads may execute xmit in parallel to two different TX queues while the target is the same. In this case, both of them will add the same neighbor to the path's neigh link list and we might see the following message: list_add double add: new=ffff88024767a348, prev=ffff88024767a348... WARNING: lib/list_debug.c:31__list_add_valid+0x4e/0x70 ipoib_start_xmit+0x477/0x680 [ib_ipoib] dev_hard_start_xmit+0xb9/0x3e0 sch_direct_xmit+0xf9/0x250 __qdisc_run+0x176/0x5d0 __dev_queue_xmit+0x1f5/0xb10 __dev_queue_xmit+0x55/0xb10 Analysis: Two SKB are scheduled to be transmitted from two cores. In ipoib_start_xmit, both gets NULL when calling ipoib_neigh_get. Two calls to neigh_add_path are made. One thread takes the spin-lock and calls ipoib_neigh_alloc which creates the neigh structure, then (after the __path_find) the neigh is added to the path's neigh link list. When the second thread enters the critical section it also calls ipoib_neigh_alloc but in this case it gets the already allocated ipoib_neigh structure, which is already linked to the path's neigh link list and adds it again to the list. Which beside of triggering the list, it creates a loop in the linked list. This loop leads to endless loop inside path_rec_completion. Solution: Check list_empty(&neigh->list) before adding to the list. Add a similar fix in "ipoib_multicast.c::ipoib_mcast_send" Fixes: b63b70d87741 ('IPoIB: Use a private hash table for path lookup in xmit path') Signed-off-by: Erez Shitrit <erezsh@mellanox.com> Reviewed-by: Alex Vesker <valex@mellanox.com> Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c25
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c5
2 files changed, 22 insertions, 8 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 12b7f911f0e5..8880351df179 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -902,8 +902,8 @@ static int path_rec_start(struct net_device *dev,
902 return 0; 902 return 0;
903} 903}
904 904
905static void neigh_add_path(struct sk_buff *skb, u8 *daddr, 905static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr,
906 struct net_device *dev) 906 struct net_device *dev)
907{ 907{
908 struct ipoib_dev_priv *priv = ipoib_priv(dev); 908 struct ipoib_dev_priv *priv = ipoib_priv(dev);
909 struct rdma_netdev *rn = netdev_priv(dev); 909 struct rdma_netdev *rn = netdev_priv(dev);
@@ -917,7 +917,15 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
917 spin_unlock_irqrestore(&priv->lock, flags); 917 spin_unlock_irqrestore(&priv->lock, flags);
918 ++dev->stats.tx_dropped; 918 ++dev->stats.tx_dropped;
919 dev_kfree_skb_any(skb); 919 dev_kfree_skb_any(skb);
920 return; 920 return NULL;
921 }
922
923 /* To avoid race condition, make sure that the
924 * neigh will be added only once.
925 */
926 if (unlikely(!list_empty(&neigh->list))) {
927 spin_unlock_irqrestore(&priv->lock, flags);
928 return neigh;
921 } 929 }
922 930
923 path = __path_find(dev, daddr + 4); 931 path = __path_find(dev, daddr + 4);
@@ -956,7 +964,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
956 path->ah->last_send = rn->send(dev, skb, path->ah->ah, 964 path->ah->last_send = rn->send(dev, skb, path->ah->ah,
957 IPOIB_QPN(daddr)); 965 IPOIB_QPN(daddr));
958 ipoib_neigh_put(neigh); 966 ipoib_neigh_put(neigh);
959 return; 967 return NULL;
960 } 968 }
961 } else { 969 } else {
962 neigh->ah = NULL; 970 neigh->ah = NULL;
@@ -973,7 +981,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
973 981
974 spin_unlock_irqrestore(&priv->lock, flags); 982 spin_unlock_irqrestore(&priv->lock, flags);
975 ipoib_neigh_put(neigh); 983 ipoib_neigh_put(neigh);
976 return; 984 return NULL;
977 985
978err_path: 986err_path:
979 ipoib_neigh_free(neigh); 987 ipoib_neigh_free(neigh);
@@ -983,6 +991,8 @@ err_drop:
983 991
984 spin_unlock_irqrestore(&priv->lock, flags); 992 spin_unlock_irqrestore(&priv->lock, flags);
985 ipoib_neigh_put(neigh); 993 ipoib_neigh_put(neigh);
994
995 return NULL;
986} 996}
987 997
988static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, 998static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
@@ -1091,8 +1101,9 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
1091 case htons(ETH_P_TIPC): 1101 case htons(ETH_P_TIPC):
1092 neigh = ipoib_neigh_get(dev, phdr->hwaddr); 1102 neigh = ipoib_neigh_get(dev, phdr->hwaddr);
1093 if (unlikely(!neigh)) { 1103 if (unlikely(!neigh)) {
1094 neigh_add_path(skb, phdr->hwaddr, dev); 1104 neigh = neigh_add_path(skb, phdr->hwaddr, dev);
1095 return NETDEV_TX_OK; 1105 if (likely(!neigh))
1106 return NETDEV_TX_OK;
1096 } 1107 }
1097 break; 1108 break;
1098 case htons(ETH_P_ARP): 1109 case htons(ETH_P_ARP):
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 93e149efc1f5..9b3f47ae2016 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -816,7 +816,10 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
816 spin_lock_irqsave(&priv->lock, flags); 816 spin_lock_irqsave(&priv->lock, flags);
817 if (!neigh) { 817 if (!neigh) {
818 neigh = ipoib_neigh_alloc(daddr, dev); 818 neigh = ipoib_neigh_alloc(daddr, dev);
819 if (neigh) { 819 /* Make sure that the neigh will be added only
820 * once to mcast list.
821 */
822 if (neigh && list_empty(&neigh->list)) {
820 kref_get(&mcast->ah->ref); 823 kref_get(&mcast->ah->ref);
821 neigh->ah = mcast->ah; 824 neigh->ah = mcast->ah;
822 list_add_tail(&neigh->list, &mcast->neigh_list); 825 list_add_tail(&neigh->list, &mcast->neigh_list);