diff options
author | Martin KaFai Lau <kafai@fb.com> | 2015-09-15 17:30:07 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-09-15 17:53:05 -0400 |
commit | cdf3464e6c6bd764277cbbe992cd12da735b92fb (patch) | |
tree | 53e75a7f94aba7ced57ae7f2b527bce0d463a629 /net | |
parent | f230d1e891ba1da5953460516960894154f265db (diff) |
ipv6: Fix dst_entry refcnt bugs in ip6_tunnel
Problems in the current dst_entry cache in the ip6_tunnel:
1. ip6_tnl_dst_set is racy. There is no lock to protect it:
- One major problem is that the dst refcnt gets messed up. F.e.
the same dst_cache can be released multiple times and then
triggering the infamous dst refcnt < 0 warning message.
- Another issue is the inconsistency between dst_cache and
dst_cookie.
It can be reproduced by adding and removing the ip6gre tunnel
while running a super_netperf TCP_CRR test.
2. ip6_tnl_dst_get does not take the dst refcnt before returning
the dst.
This patch:
1. Create a percpu dst_entry cache in ip6_tnl
2. Use a spinlock to protect the dst_cache operations
3. ip6_tnl_dst_get always takes the dst refcnt before returning
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv6/ip6_gre.c | 38 | ||||
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 122 |
2 files changed, 114 insertions, 46 deletions
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 24f5dd8f76a8..646512488c28 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c | |||
@@ -637,17 +637,17 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, | |||
637 | dst = ip6_tnl_dst_get(tunnel); | 637 | dst = ip6_tnl_dst_get(tunnel); |
638 | 638 | ||
639 | if (!dst) { | 639 | if (!dst) { |
640 | ndst = ip6_route_output(net, NULL, fl6); | 640 | dst = ip6_route_output(net, NULL, fl6); |
641 | 641 | ||
642 | if (ndst->error) | 642 | if (dst->error) |
643 | goto tx_err_link_failure; | 643 | goto tx_err_link_failure; |
644 | ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); | 644 | dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0); |
645 | if (IS_ERR(ndst)) { | 645 | if (IS_ERR(dst)) { |
646 | err = PTR_ERR(ndst); | 646 | err = PTR_ERR(dst); |
647 | ndst = NULL; | 647 | dst = NULL; |
648 | goto tx_err_link_failure; | 648 | goto tx_err_link_failure; |
649 | } | 649 | } |
650 | dst = ndst; | 650 | ndst = dst; |
651 | } | 651 | } |
652 | 652 | ||
653 | tdev = dst->dev; | 653 | tdev = dst->dev; |
@@ -702,12 +702,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, | |||
702 | skb = new_skb; | 702 | skb = new_skb; |
703 | } | 703 | } |
704 | 704 | ||
705 | if (fl6->flowi6_mark) { | 705 | if (!fl6->flowi6_mark && ndst) |
706 | skb_dst_set(skb, dst); | 706 | ip6_tnl_dst_set(tunnel, ndst); |
707 | ndst = NULL; | 707 | skb_dst_set(skb, dst); |
708 | } else { | ||
709 | skb_dst_set_noref(skb, dst); | ||
710 | } | ||
711 | 708 | ||
712 | proto = NEXTHDR_GRE; | 709 | proto = NEXTHDR_GRE; |
713 | if (encap_limit >= 0) { | 710 | if (encap_limit >= 0) { |
@@ -762,14 +759,12 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, | |||
762 | skb_set_inner_protocol(skb, protocol); | 759 | skb_set_inner_protocol(skb, protocol); |
763 | 760 | ||
764 | ip6tunnel_xmit(NULL, skb, dev); | 761 | ip6tunnel_xmit(NULL, skb, dev); |
765 | if (ndst) | ||
766 | ip6_tnl_dst_set(tunnel, ndst); | ||
767 | return 0; | 762 | return 0; |
768 | tx_err_link_failure: | 763 | tx_err_link_failure: |
769 | stats->tx_carrier_errors++; | 764 | stats->tx_carrier_errors++; |
770 | dst_link_failure(skb); | 765 | dst_link_failure(skb); |
771 | tx_err_dst_release: | 766 | tx_err_dst_release: |
772 | dst_release(ndst); | 767 | dst_release(dst); |
773 | return err; | 768 | return err; |
774 | } | 769 | } |
775 | 770 | ||
@@ -1223,6 +1218,9 @@ static const struct net_device_ops ip6gre_netdev_ops = { | |||
1223 | 1218 | ||
1224 | static void ip6gre_dev_free(struct net_device *dev) | 1219 | static void ip6gre_dev_free(struct net_device *dev) |
1225 | { | 1220 | { |
1221 | struct ip6_tnl *t = netdev_priv(dev); | ||
1222 | |||
1223 | ip6_tnl_dst_destroy(t); | ||
1226 | free_percpu(dev->tstats); | 1224 | free_percpu(dev->tstats); |
1227 | free_netdev(dev); | 1225 | free_netdev(dev); |
1228 | } | 1226 | } |
@@ -1248,6 +1246,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev) | |||
1248 | static int ip6gre_tunnel_init_common(struct net_device *dev) | 1246 | static int ip6gre_tunnel_init_common(struct net_device *dev) |
1249 | { | 1247 | { |
1250 | struct ip6_tnl *tunnel; | 1248 | struct ip6_tnl *tunnel; |
1249 | int ret; | ||
1251 | 1250 | ||
1252 | tunnel = netdev_priv(dev); | 1251 | tunnel = netdev_priv(dev); |
1253 | 1252 | ||
@@ -1259,6 +1258,13 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) | |||
1259 | if (!dev->tstats) | 1258 | if (!dev->tstats) |
1260 | return -ENOMEM; | 1259 | return -ENOMEM; |
1261 | 1260 | ||
1261 | ret = ip6_tnl_dst_init(tunnel); | ||
1262 | if (ret) { | ||
1263 | free_percpu(dev->tstats); | ||
1264 | dev->tstats = NULL; | ||
1265 | return ret; | ||
1266 | } | ||
1267 | |||
1262 | return 0; | 1268 | return 0; |
1263 | } | 1269 | } |
1264 | 1270 | ||
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 599b0b419fbc..851cf6d1eb45 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c | |||
@@ -126,37 +126,90 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) | |||
126 | * Locking : hash tables are protected by RCU and RTNL | 126 | * Locking : hash tables are protected by RCU and RTNL |
127 | */ | 127 | */ |
128 | 128 | ||
129 | struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t) | 129 | static void __ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst, |
130 | struct dst_entry *dst) | ||
130 | { | 131 | { |
131 | struct dst_entry *dst = t->dst_cache; | 132 | dst_release(idst->dst); |
132 | 133 | if (dst) { | |
133 | if (dst && dst->obsolete && | 134 | dst_hold(dst); |
134 | !dst->ops->check(dst, t->dst_cookie)) { | 135 | idst->cookie = rt6_get_cookie((struct rt6_info *)dst); |
135 | t->dst_cache = NULL; | 136 | } else { |
136 | dst_release(dst); | 137 | idst->cookie = 0; |
137 | return NULL; | ||
138 | } | 138 | } |
139 | idst->dst = dst; | ||
140 | } | ||
141 | |||
142 | static void ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst, | ||
143 | struct dst_entry *dst) | ||
144 | { | ||
139 | 145 | ||
146 | spin_lock_bh(&idst->lock); | ||
147 | __ip6_tnl_per_cpu_dst_set(idst, dst); | ||
148 | spin_unlock_bh(&idst->lock); | ||
149 | } | ||
150 | |||
151 | struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t) | ||
152 | { | ||
153 | struct ip6_tnl_dst *idst; | ||
154 | struct dst_entry *dst; | ||
155 | |||
156 | idst = raw_cpu_ptr(t->dst_cache); | ||
157 | spin_lock_bh(&idst->lock); | ||
158 | dst = idst->dst; | ||
159 | if (dst) { | ||
160 | if (!dst->obsolete || dst->ops->check(dst, idst->cookie)) { | ||
161 | dst_hold(idst->dst); | ||
162 | } else { | ||
163 | __ip6_tnl_per_cpu_dst_set(idst, NULL); | ||
164 | dst = NULL; | ||
165 | } | ||
166 | } | ||
167 | spin_unlock_bh(&idst->lock); | ||
140 | return dst; | 168 | return dst; |
141 | } | 169 | } |
142 | EXPORT_SYMBOL_GPL(ip6_tnl_dst_get); | 170 | EXPORT_SYMBOL_GPL(ip6_tnl_dst_get); |
143 | 171 | ||
144 | void ip6_tnl_dst_reset(struct ip6_tnl *t) | 172 | void ip6_tnl_dst_reset(struct ip6_tnl *t) |
145 | { | 173 | { |
146 | dst_release(t->dst_cache); | 174 | int i; |
147 | t->dst_cache = NULL; | 175 | |
176 | for_each_possible_cpu(i) | ||
177 | ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), NULL); | ||
148 | } | 178 | } |
149 | EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); | 179 | EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); |
150 | 180 | ||
151 | void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst) | 181 | void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst) |
152 | { | 182 | { |
153 | struct rt6_info *rt = (struct rt6_info *) dst; | 183 | ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), dst); |
154 | t->dst_cookie = rt6_get_cookie(rt); | 184 | |
155 | dst_release(t->dst_cache); | ||
156 | t->dst_cache = dst; | ||
157 | } | 185 | } |
158 | EXPORT_SYMBOL_GPL(ip6_tnl_dst_set); | 186 | EXPORT_SYMBOL_GPL(ip6_tnl_dst_set); |
159 | 187 | ||
188 | void ip6_tnl_dst_destroy(struct ip6_tnl *t) | ||
189 | { | ||
190 | if (!t->dst_cache) | ||
191 | return; | ||
192 | |||
193 | ip6_tnl_dst_reset(t); | ||
194 | free_percpu(t->dst_cache); | ||
195 | } | ||
196 | EXPORT_SYMBOL_GPL(ip6_tnl_dst_destroy); | ||
197 | |||
198 | int ip6_tnl_dst_init(struct ip6_tnl *t) | ||
199 | { | ||
200 | int i; | ||
201 | |||
202 | t->dst_cache = alloc_percpu(struct ip6_tnl_dst); | ||
203 | if (!t->dst_cache) | ||
204 | return -ENOMEM; | ||
205 | |||
206 | for_each_possible_cpu(i) | ||
207 | spin_lock_init(&per_cpu_ptr(t->dst_cache, i)->lock); | ||
208 | |||
209 | return 0; | ||
210 | } | ||
211 | EXPORT_SYMBOL_GPL(ip6_tnl_dst_init); | ||
212 | |||
160 | /** | 213 | /** |
161 | * ip6_tnl_lookup - fetch tunnel matching the end-point addresses | 214 | * ip6_tnl_lookup - fetch tunnel matching the end-point addresses |
162 | * @remote: the address of the tunnel exit-point | 215 | * @remote: the address of the tunnel exit-point |
@@ -271,6 +324,9 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) | |||
271 | 324 | ||
272 | static void ip6_dev_free(struct net_device *dev) | 325 | static void ip6_dev_free(struct net_device *dev) |
273 | { | 326 | { |
327 | struct ip6_tnl *t = netdev_priv(dev); | ||
328 | |||
329 | ip6_tnl_dst_destroy(t); | ||
274 | free_percpu(dev->tstats); | 330 | free_percpu(dev->tstats); |
275 | free_netdev(dev); | 331 | free_netdev(dev); |
276 | } | 332 | } |
@@ -1016,17 +1072,17 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, | |||
1016 | goto tx_err_link_failure; | 1072 | goto tx_err_link_failure; |
1017 | 1073 | ||
1018 | if (!dst) { | 1074 | if (!dst) { |
1019 | ndst = ip6_route_output(net, NULL, fl6); | 1075 | dst = ip6_route_output(net, NULL, fl6); |
1020 | 1076 | ||
1021 | if (ndst->error) | 1077 | if (dst->error) |
1022 | goto tx_err_link_failure; | 1078 | goto tx_err_link_failure; |
1023 | ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); | 1079 | dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0); |
1024 | if (IS_ERR(ndst)) { | 1080 | if (IS_ERR(dst)) { |
1025 | err = PTR_ERR(ndst); | 1081 | err = PTR_ERR(dst); |
1026 | ndst = NULL; | 1082 | dst = NULL; |
1027 | goto tx_err_link_failure; | 1083 | goto tx_err_link_failure; |
1028 | } | 1084 | } |
1029 | dst = ndst; | 1085 | ndst = dst; |
1030 | } | 1086 | } |
1031 | 1087 | ||
1032 | tdev = dst->dev; | 1088 | tdev = dst->dev; |
@@ -1072,12 +1128,11 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, | |||
1072 | consume_skb(skb); | 1128 | consume_skb(skb); |
1073 | skb = new_skb; | 1129 | skb = new_skb; |
1074 | } | 1130 | } |
1075 | if (fl6->flowi6_mark) { | 1131 | |
1076 | skb_dst_set(skb, dst); | 1132 | if (!fl6->flowi6_mark && ndst) |
1077 | ndst = NULL; | 1133 | ip6_tnl_dst_set(t, ndst); |
1078 | } else { | 1134 | skb_dst_set(skb, dst); |
1079 | skb_dst_set_noref(skb, dst); | 1135 | |
1080 | } | ||
1081 | skb->transport_header = skb->network_header; | 1136 | skb->transport_header = skb->network_header; |
1082 | 1137 | ||
1083 | proto = fl6->flowi6_proto; | 1138 | proto = fl6->flowi6_proto; |
@@ -1101,14 +1156,12 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, | |||
1101 | ipv6h->saddr = fl6->saddr; | 1156 | ipv6h->saddr = fl6->saddr; |
1102 | ipv6h->daddr = fl6->daddr; | 1157 | ipv6h->daddr = fl6->daddr; |
1103 | ip6tunnel_xmit(NULL, skb, dev); | 1158 | ip6tunnel_xmit(NULL, skb, dev); |
1104 | if (ndst) | ||
1105 | ip6_tnl_dst_set(t, ndst); | ||
1106 | return 0; | 1159 | return 0; |
1107 | tx_err_link_failure: | 1160 | tx_err_link_failure: |
1108 | stats->tx_carrier_errors++; | 1161 | stats->tx_carrier_errors++; |
1109 | dst_link_failure(skb); | 1162 | dst_link_failure(skb); |
1110 | tx_err_dst_release: | 1163 | tx_err_dst_release: |
1111 | dst_release(ndst); | 1164 | dst_release(dst); |
1112 | return err; | 1165 | return err; |
1113 | } | 1166 | } |
1114 | 1167 | ||
@@ -1573,12 +1626,21 @@ static inline int | |||
1573 | ip6_tnl_dev_init_gen(struct net_device *dev) | 1626 | ip6_tnl_dev_init_gen(struct net_device *dev) |
1574 | { | 1627 | { |
1575 | struct ip6_tnl *t = netdev_priv(dev); | 1628 | struct ip6_tnl *t = netdev_priv(dev); |
1629 | int ret; | ||
1576 | 1630 | ||
1577 | t->dev = dev; | 1631 | t->dev = dev; |
1578 | t->net = dev_net(dev); | 1632 | t->net = dev_net(dev); |
1579 | dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); | 1633 | dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); |
1580 | if (!dev->tstats) | 1634 | if (!dev->tstats) |
1581 | return -ENOMEM; | 1635 | return -ENOMEM; |
1636 | |||
1637 | ret = ip6_tnl_dst_init(t); | ||
1638 | if (ret) { | ||
1639 | free_percpu(dev->tstats); | ||
1640 | dev->tstats = NULL; | ||
1641 | return ret; | ||
1642 | } | ||
1643 | |||
1582 | return 0; | 1644 | return 0; |
1583 | } | 1645 | } |
1584 | 1646 | ||