diff options
author | Shlomo Pongratz <shlomop@mellanox.com> | 2012-08-29 11:14:34 -0400 |
---|---|---|
committer | Roland Dreier <roland@purestorage.com> | 2012-09-12 12:21:45 -0400 |
commit | b5120a6e11e90d98d8a752545ac60bfa1ea95f1a (patch) | |
tree | f376dd84bf679fb2b3ed5ae18af967455a75a958 /drivers/infiniband/ulp/ipoib | |
parent | 66172c09938bfc4efdcf9b5e0246a85b9b76dd54 (diff) |
IPoIB: Fix AB-BA deadlock when deleting neighbours
Lockdep points out a circular locking dependency betwwen the ipoib
device priv spinlock (priv->lock) and the neighbour table rwlock
(ntbl->rwlock).
In the normal path, ie neigbour garbage collection task, the neigh
table rwlock is taken first and then if the neighbour needs to be
deleted, priv->lock is taken.
However in some error paths, such as in ipoib_cm_handle_tx_wc(),
priv->lock is taken first and then ipoib_neigh_free routine is called
which in turn takes the neighbour table ntbl->rwlock.
The solution is to get rid the neigh table rwlock completely and use
only priv->lock.
Signed-off-by: Shlomo Pongratz <shlomop@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband/ulp/ipoib')
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib.h | 1 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_main.c | 70 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 2 |
3 files changed, 27 insertions, 46 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index e6bbeae1c309..0af216d21f87 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h | |||
@@ -274,7 +274,6 @@ struct ipoib_neigh_hash { | |||
274 | 274 | ||
275 | struct ipoib_neigh_table { | 275 | struct ipoib_neigh_table { |
276 | struct ipoib_neigh_hash __rcu *htbl; | 276 | struct ipoib_neigh_hash __rcu *htbl; |
277 | rwlock_t rwlock; | ||
278 | atomic_t entries; | 277 | atomic_t entries; |
279 | struct completion flushed; | 278 | struct completion flushed; |
280 | struct completion deleted; | 279 | struct completion deleted; |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 72c1fc28f079..1e19b5ae7c47 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c | |||
@@ -546,15 +546,15 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, | |||
546 | struct ipoib_neigh *neigh; | 546 | struct ipoib_neigh *neigh; |
547 | unsigned long flags; | 547 | unsigned long flags; |
548 | 548 | ||
549 | spin_lock_irqsave(&priv->lock, flags); | ||
549 | neigh = ipoib_neigh_alloc(daddr, dev); | 550 | neigh = ipoib_neigh_alloc(daddr, dev); |
550 | if (!neigh) { | 551 | if (!neigh) { |
552 | spin_unlock_irqrestore(&priv->lock, flags); | ||
551 | ++dev->stats.tx_dropped; | 553 | ++dev->stats.tx_dropped; |
552 | dev_kfree_skb_any(skb); | 554 | dev_kfree_skb_any(skb); |
553 | return; | 555 | return; |
554 | } | 556 | } |
555 | 557 | ||
556 | spin_lock_irqsave(&priv->lock, flags); | ||
557 | |||
558 | path = __path_find(dev, daddr + 4); | 558 | path = __path_find(dev, daddr + 4); |
559 | if (!path) { | 559 | if (!path) { |
560 | path = path_rec_create(dev, daddr + 4); | 560 | path = path_rec_create(dev, daddr + 4); |
@@ -863,10 +863,10 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) | |||
863 | if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) | 863 | if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) |
864 | return; | 864 | return; |
865 | 865 | ||
866 | write_lock_bh(&ntbl->rwlock); | 866 | spin_lock_irqsave(&priv->lock, flags); |
867 | 867 | ||
868 | htbl = rcu_dereference_protected(ntbl->htbl, | 868 | htbl = rcu_dereference_protected(ntbl->htbl, |
869 | lockdep_is_held(&ntbl->rwlock)); | 869 | lockdep_is_held(&priv->lock)); |
870 | 870 | ||
871 | if (!htbl) | 871 | if (!htbl) |
872 | goto out_unlock; | 872 | goto out_unlock; |
@@ -883,16 +883,14 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) | |||
883 | struct ipoib_neigh __rcu **np = &htbl->buckets[i]; | 883 | struct ipoib_neigh __rcu **np = &htbl->buckets[i]; |
884 | 884 | ||
885 | while ((neigh = rcu_dereference_protected(*np, | 885 | while ((neigh = rcu_dereference_protected(*np, |
886 | lockdep_is_held(&ntbl->rwlock))) != NULL) { | 886 | lockdep_is_held(&priv->lock))) != NULL) { |
887 | /* was the neigh idle for two GC periods */ | 887 | /* was the neigh idle for two GC periods */ |
888 | if (time_after(neigh_obsolete, neigh->alive)) { | 888 | if (time_after(neigh_obsolete, neigh->alive)) { |
889 | rcu_assign_pointer(*np, | 889 | rcu_assign_pointer(*np, |
890 | rcu_dereference_protected(neigh->hnext, | 890 | rcu_dereference_protected(neigh->hnext, |
891 | lockdep_is_held(&ntbl->rwlock))); | 891 | lockdep_is_held(&priv->lock))); |
892 | /* remove from path/mc list */ | 892 | /* remove from path/mc list */ |
893 | spin_lock_irqsave(&priv->lock, flags); | ||
894 | list_del(&neigh->list); | 893 | list_del(&neigh->list); |
895 | spin_unlock_irqrestore(&priv->lock, flags); | ||
896 | call_rcu(&neigh->rcu, ipoib_neigh_reclaim); | 894 | call_rcu(&neigh->rcu, ipoib_neigh_reclaim); |
897 | } else { | 895 | } else { |
898 | np = &neigh->hnext; | 896 | np = &neigh->hnext; |
@@ -902,7 +900,7 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) | |||
902 | } | 900 | } |
903 | 901 | ||
904 | out_unlock: | 902 | out_unlock: |
905 | write_unlock_bh(&ntbl->rwlock); | 903 | spin_unlock_irqrestore(&priv->lock, flags); |
906 | } | 904 | } |
907 | 905 | ||
908 | static void ipoib_reap_neigh(struct work_struct *work) | 906 | static void ipoib_reap_neigh(struct work_struct *work) |
@@ -947,10 +945,8 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, | |||
947 | struct ipoib_neigh *neigh; | 945 | struct ipoib_neigh *neigh; |
948 | u32 hash_val; | 946 | u32 hash_val; |
949 | 947 | ||
950 | write_lock_bh(&ntbl->rwlock); | ||
951 | |||
952 | htbl = rcu_dereference_protected(ntbl->htbl, | 948 | htbl = rcu_dereference_protected(ntbl->htbl, |
953 | lockdep_is_held(&ntbl->rwlock)); | 949 | lockdep_is_held(&priv->lock)); |
954 | if (!htbl) { | 950 | if (!htbl) { |
955 | neigh = NULL; | 951 | neigh = NULL; |
956 | goto out_unlock; | 952 | goto out_unlock; |
@@ -961,10 +957,10 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, | |||
961 | */ | 957 | */ |
962 | hash_val = ipoib_addr_hash(htbl, daddr); | 958 | hash_val = ipoib_addr_hash(htbl, daddr); |
963 | for (neigh = rcu_dereference_protected(htbl->buckets[hash_val], | 959 | for (neigh = rcu_dereference_protected(htbl->buckets[hash_val], |
964 | lockdep_is_held(&ntbl->rwlock)); | 960 | lockdep_is_held(&priv->lock)); |
965 | neigh != NULL; | 961 | neigh != NULL; |
966 | neigh = rcu_dereference_protected(neigh->hnext, | 962 | neigh = rcu_dereference_protected(neigh->hnext, |
967 | lockdep_is_held(&ntbl->rwlock))) { | 963 | lockdep_is_held(&priv->lock))) { |
968 | if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) { | 964 | if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) { |
969 | /* found, take one ref on behalf of the caller */ | 965 | /* found, take one ref on behalf of the caller */ |
970 | if (!atomic_inc_not_zero(&neigh->refcnt)) { | 966 | if (!atomic_inc_not_zero(&neigh->refcnt)) { |
@@ -987,12 +983,11 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, | |||
987 | /* put in hash */ | 983 | /* put in hash */ |
988 | rcu_assign_pointer(neigh->hnext, | 984 | rcu_assign_pointer(neigh->hnext, |
989 | rcu_dereference_protected(htbl->buckets[hash_val], | 985 | rcu_dereference_protected(htbl->buckets[hash_val], |
990 | lockdep_is_held(&ntbl->rwlock))); | 986 | lockdep_is_held(&priv->lock))); |
991 | rcu_assign_pointer(htbl->buckets[hash_val], neigh); | 987 | rcu_assign_pointer(htbl->buckets[hash_val], neigh); |
992 | atomic_inc(&ntbl->entries); | 988 | atomic_inc(&ntbl->entries); |
993 | 989 | ||
994 | out_unlock: | 990 | out_unlock: |
995 | write_unlock_bh(&ntbl->rwlock); | ||
996 | 991 | ||
997 | return neigh; | 992 | return neigh; |
998 | } | 993 | } |
@@ -1040,35 +1035,29 @@ void ipoib_neigh_free(struct ipoib_neigh *neigh) | |||
1040 | struct ipoib_neigh *n; | 1035 | struct ipoib_neigh *n; |
1041 | u32 hash_val; | 1036 | u32 hash_val; |
1042 | 1037 | ||
1043 | write_lock_bh(&ntbl->rwlock); | ||
1044 | |||
1045 | htbl = rcu_dereference_protected(ntbl->htbl, | 1038 | htbl = rcu_dereference_protected(ntbl->htbl, |
1046 | lockdep_is_held(&ntbl->rwlock)); | 1039 | lockdep_is_held(&priv->lock)); |
1047 | if (!htbl) | 1040 | if (!htbl) |
1048 | goto out_unlock; | 1041 | return; |
1049 | 1042 | ||
1050 | hash_val = ipoib_addr_hash(htbl, neigh->daddr); | 1043 | hash_val = ipoib_addr_hash(htbl, neigh->daddr); |
1051 | np = &htbl->buckets[hash_val]; | 1044 | np = &htbl->buckets[hash_val]; |
1052 | for (n = rcu_dereference_protected(*np, | 1045 | for (n = rcu_dereference_protected(*np, |
1053 | lockdep_is_held(&ntbl->rwlock)); | 1046 | lockdep_is_held(&priv->lock)); |
1054 | n != NULL; | 1047 | n != NULL; |
1055 | n = rcu_dereference_protected(*np, | 1048 | n = rcu_dereference_protected(*np, |
1056 | lockdep_is_held(&ntbl->rwlock))) { | 1049 | lockdep_is_held(&priv->lock))) { |
1057 | if (n == neigh) { | 1050 | if (n == neigh) { |
1058 | /* found */ | 1051 | /* found */ |
1059 | rcu_assign_pointer(*np, | 1052 | rcu_assign_pointer(*np, |
1060 | rcu_dereference_protected(neigh->hnext, | 1053 | rcu_dereference_protected(neigh->hnext, |
1061 | lockdep_is_held(&ntbl->rwlock))); | 1054 | lockdep_is_held(&priv->lock))); |
1062 | call_rcu(&neigh->rcu, ipoib_neigh_reclaim); | 1055 | call_rcu(&neigh->rcu, ipoib_neigh_reclaim); |
1063 | goto out_unlock; | 1056 | return; |
1064 | } else { | 1057 | } else { |
1065 | np = &n->hnext; | 1058 | np = &n->hnext; |
1066 | } | 1059 | } |
1067 | } | 1060 | } |
1068 | |||
1069 | out_unlock: | ||
1070 | write_unlock_bh(&ntbl->rwlock); | ||
1071 | |||
1072 | } | 1061 | } |
1073 | 1062 | ||
1074 | static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) | 1063 | static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) |
@@ -1080,7 +1069,6 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) | |||
1080 | 1069 | ||
1081 | clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); | 1070 | clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); |
1082 | ntbl->htbl = NULL; | 1071 | ntbl->htbl = NULL; |
1083 | rwlock_init(&ntbl->rwlock); | ||
1084 | htbl = kzalloc(sizeof(*htbl), GFP_KERNEL); | 1072 | htbl = kzalloc(sizeof(*htbl), GFP_KERNEL); |
1085 | if (!htbl) | 1073 | if (!htbl) |
1086 | return -ENOMEM; | 1074 | return -ENOMEM; |
@@ -1128,10 +1116,10 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid) | |||
1128 | int i; | 1116 | int i; |
1129 | 1117 | ||
1130 | /* remove all neigh connected to a given path or mcast */ | 1118 | /* remove all neigh connected to a given path or mcast */ |
1131 | write_lock_bh(&ntbl->rwlock); | 1119 | spin_lock_irqsave(&priv->lock, flags); |
1132 | 1120 | ||
1133 | htbl = rcu_dereference_protected(ntbl->htbl, | 1121 | htbl = rcu_dereference_protected(ntbl->htbl, |
1134 | lockdep_is_held(&ntbl->rwlock)); | 1122 | lockdep_is_held(&priv->lock)); |
1135 | 1123 | ||
1136 | if (!htbl) | 1124 | if (!htbl) |
1137 | goto out_unlock; | 1125 | goto out_unlock; |
@@ -1141,16 +1129,14 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid) | |||
1141 | struct ipoib_neigh __rcu **np = &htbl->buckets[i]; | 1129 | struct ipoib_neigh __rcu **np = &htbl->buckets[i]; |
1142 | 1130 | ||
1143 | while ((neigh = rcu_dereference_protected(*np, | 1131 | while ((neigh = rcu_dereference_protected(*np, |
1144 | lockdep_is_held(&ntbl->rwlock))) != NULL) { | 1132 | lockdep_is_held(&priv->lock))) != NULL) { |
1145 | /* delete neighs belong to this parent */ | 1133 | /* delete neighs belong to this parent */ |
1146 | if (!memcmp(gid, neigh->daddr + 4, sizeof (union ib_gid))) { | 1134 | if (!memcmp(gid, neigh->daddr + 4, sizeof (union ib_gid))) { |
1147 | rcu_assign_pointer(*np, | 1135 | rcu_assign_pointer(*np, |
1148 | rcu_dereference_protected(neigh->hnext, | 1136 | rcu_dereference_protected(neigh->hnext, |
1149 | lockdep_is_held(&ntbl->rwlock))); | 1137 | lockdep_is_held(&priv->lock))); |
1150 | /* remove from parent list */ | 1138 | /* remove from parent list */ |
1151 | spin_lock_irqsave(&priv->lock, flags); | ||
1152 | list_del(&neigh->list); | 1139 | list_del(&neigh->list); |
1153 | spin_unlock_irqrestore(&priv->lock, flags); | ||
1154 | call_rcu(&neigh->rcu, ipoib_neigh_reclaim); | 1140 | call_rcu(&neigh->rcu, ipoib_neigh_reclaim); |
1155 | } else { | 1141 | } else { |
1156 | np = &neigh->hnext; | 1142 | np = &neigh->hnext; |
@@ -1159,7 +1145,7 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid) | |||
1159 | } | 1145 | } |
1160 | } | 1146 | } |
1161 | out_unlock: | 1147 | out_unlock: |
1162 | write_unlock_bh(&ntbl->rwlock); | 1148 | spin_unlock_irqrestore(&priv->lock, flags); |
1163 | } | 1149 | } |
1164 | 1150 | ||
1165 | static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) | 1151 | static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) |
@@ -1171,10 +1157,10 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) | |||
1171 | 1157 | ||
1172 | init_completion(&priv->ntbl.flushed); | 1158 | init_completion(&priv->ntbl.flushed); |
1173 | 1159 | ||
1174 | write_lock_bh(&ntbl->rwlock); | 1160 | spin_lock_irqsave(&priv->lock, flags); |
1175 | 1161 | ||
1176 | htbl = rcu_dereference_protected(ntbl->htbl, | 1162 | htbl = rcu_dereference_protected(ntbl->htbl, |
1177 | lockdep_is_held(&ntbl->rwlock)); | 1163 | lockdep_is_held(&priv->lock)); |
1178 | if (!htbl) | 1164 | if (!htbl) |
1179 | goto out_unlock; | 1165 | goto out_unlock; |
1180 | 1166 | ||
@@ -1187,14 +1173,12 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) | |||
1187 | struct ipoib_neigh __rcu **np = &htbl->buckets[i]; | 1173 | struct ipoib_neigh __rcu **np = &htbl->buckets[i]; |
1188 | 1174 | ||
1189 | while ((neigh = rcu_dereference_protected(*np, | 1175 | while ((neigh = rcu_dereference_protected(*np, |
1190 | lockdep_is_held(&ntbl->rwlock))) != NULL) { | 1176 | lockdep_is_held(&priv->lock))) != NULL) { |
1191 | rcu_assign_pointer(*np, | 1177 | rcu_assign_pointer(*np, |
1192 | rcu_dereference_protected(neigh->hnext, | 1178 | rcu_dereference_protected(neigh->hnext, |
1193 | lockdep_is_held(&ntbl->rwlock))); | 1179 | lockdep_is_held(&priv->lock))); |
1194 | /* remove from path/mc list */ | 1180 | /* remove from path/mc list */ |
1195 | spin_lock_irqsave(&priv->lock, flags); | ||
1196 | list_del(&neigh->list); | 1181 | list_del(&neigh->list); |
1197 | spin_unlock_irqrestore(&priv->lock, flags); | ||
1198 | call_rcu(&neigh->rcu, ipoib_neigh_reclaim); | 1182 | call_rcu(&neigh->rcu, ipoib_neigh_reclaim); |
1199 | } | 1183 | } |
1200 | } | 1184 | } |
@@ -1204,7 +1188,7 @@ free_htbl: | |||
1204 | call_rcu(&htbl->rcu, neigh_hash_free_rcu); | 1188 | call_rcu(&htbl->rcu, neigh_hash_free_rcu); |
1205 | 1189 | ||
1206 | out_unlock: | 1190 | out_unlock: |
1207 | write_unlock_bh(&ntbl->rwlock); | 1191 | spin_unlock_irqrestore(&priv->lock, flags); |
1208 | if (wait_flushed) | 1192 | if (wait_flushed) |
1209 | wait_for_completion(&priv->ntbl.flushed); | 1193 | wait_for_completion(&priv->ntbl.flushed); |
1210 | } | 1194 | } |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 13f4aa7593c8..75367249f447 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |||
@@ -707,9 +707,7 @@ out: | |||
707 | neigh = ipoib_neigh_get(dev, daddr); | 707 | neigh = ipoib_neigh_get(dev, daddr); |
708 | spin_lock_irqsave(&priv->lock, flags); | 708 | spin_lock_irqsave(&priv->lock, flags); |
709 | if (!neigh) { | 709 | if (!neigh) { |
710 | spin_unlock_irqrestore(&priv->lock, flags); | ||
711 | neigh = ipoib_neigh_alloc(daddr, dev); | 710 | neigh = ipoib_neigh_alloc(daddr, dev); |
712 | spin_lock_irqsave(&priv->lock, flags); | ||
713 | if (neigh) { | 711 | if (neigh) { |
714 | kref_get(&mcast->ah->ref); | 712 | kref_get(&mcast->ah->ref); |
715 | neigh->ah = mcast->ah; | 713 | neigh->ah = mcast->ah; |