aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorMartin KaFai Lau <kafai@fb.com>2015-09-15 17:30:07 -0400
committerDavid S. Miller <davem@davemloft.net>2015-09-15 17:53:05 -0400
commitcdf3464e6c6bd764277cbbe992cd12da735b92fb (patch)
tree53e75a7f94aba7ced57ae7f2b527bce0d463a629 /net
parentf230d1e891ba1da5953460516960894154f265db (diff)
ipv6: Fix dst_entry refcnt bugs in ip6_tunnel
Problems in the current dst_entry cache in the ip6_tunnel: 1. ip6_tnl_dst_set is racy. There is no lock to protect it: - One major problem is that the dst refcnt gets messed up. F.e. the same dst_cache can be released multiple times and then triggering the infamous dst refcnt < 0 warning message. - Another issue is the inconsistency between dst_cache and dst_cookie. It can be reproduced by adding and removing the ip6gre tunnel while running a super_netperf TCP_CRR test. 2. ip6_tnl_dst_get does not take the dst refcnt before returning the dst. This patch: 1. Create a percpu dst_entry cache in ip6_tnl 2. Use a spinlock to protect the dst_cache operations 3. ip6_tnl_dst_get always takes the dst refcnt before returning Signed-off-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv6/ip6_gre.c38
-rw-r--r--net/ipv6/ip6_tunnel.c122
2 files changed, 114 insertions, 46 deletions
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 24f5dd8f76a8..646512488c28 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -637,17 +637,17 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
637 dst = ip6_tnl_dst_get(tunnel); 637 dst = ip6_tnl_dst_get(tunnel);
638 638
639 if (!dst) { 639 if (!dst) {
640 ndst = ip6_route_output(net, NULL, fl6); 640 dst = ip6_route_output(net, NULL, fl6);
641 641
642 if (ndst->error) 642 if (dst->error)
643 goto tx_err_link_failure; 643 goto tx_err_link_failure;
644 ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); 644 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
645 if (IS_ERR(ndst)) { 645 if (IS_ERR(dst)) {
646 err = PTR_ERR(ndst); 646 err = PTR_ERR(dst);
647 ndst = NULL; 647 dst = NULL;
648 goto tx_err_link_failure; 648 goto tx_err_link_failure;
649 } 649 }
650 dst = ndst; 650 ndst = dst;
651 } 651 }
652 652
653 tdev = dst->dev; 653 tdev = dst->dev;
@@ -702,12 +702,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
702 skb = new_skb; 702 skb = new_skb;
703 } 703 }
704 704
705 if (fl6->flowi6_mark) { 705 if (!fl6->flowi6_mark && ndst)
706 skb_dst_set(skb, dst); 706 ip6_tnl_dst_set(tunnel, ndst);
707 ndst = NULL; 707 skb_dst_set(skb, dst);
708 } else {
709 skb_dst_set_noref(skb, dst);
710 }
711 708
712 proto = NEXTHDR_GRE; 709 proto = NEXTHDR_GRE;
713 if (encap_limit >= 0) { 710 if (encap_limit >= 0) {
@@ -762,14 +759,12 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
762 skb_set_inner_protocol(skb, protocol); 759 skb_set_inner_protocol(skb, protocol);
763 760
764 ip6tunnel_xmit(NULL, skb, dev); 761 ip6tunnel_xmit(NULL, skb, dev);
765 if (ndst)
766 ip6_tnl_dst_set(tunnel, ndst);
767 return 0; 762 return 0;
768tx_err_link_failure: 763tx_err_link_failure:
769 stats->tx_carrier_errors++; 764 stats->tx_carrier_errors++;
770 dst_link_failure(skb); 765 dst_link_failure(skb);
771tx_err_dst_release: 766tx_err_dst_release:
772 dst_release(ndst); 767 dst_release(dst);
773 return err; 768 return err;
774} 769}
775 770
@@ -1223,6 +1218,9 @@ static const struct net_device_ops ip6gre_netdev_ops = {
1223 1218
1224static void ip6gre_dev_free(struct net_device *dev) 1219static void ip6gre_dev_free(struct net_device *dev)
1225{ 1220{
1221 struct ip6_tnl *t = netdev_priv(dev);
1222
1223 ip6_tnl_dst_destroy(t);
1226 free_percpu(dev->tstats); 1224 free_percpu(dev->tstats);
1227 free_netdev(dev); 1225 free_netdev(dev);
1228} 1226}
@@ -1248,6 +1246,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
1248static int ip6gre_tunnel_init_common(struct net_device *dev) 1246static int ip6gre_tunnel_init_common(struct net_device *dev)
1249{ 1247{
1250 struct ip6_tnl *tunnel; 1248 struct ip6_tnl *tunnel;
1249 int ret;
1251 1250
1252 tunnel = netdev_priv(dev); 1251 tunnel = netdev_priv(dev);
1253 1252
@@ -1259,6 +1258,13 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
1259 if (!dev->tstats) 1258 if (!dev->tstats)
1260 return -ENOMEM; 1259 return -ENOMEM;
1261 1260
1261 ret = ip6_tnl_dst_init(tunnel);
1262 if (ret) {
1263 free_percpu(dev->tstats);
1264 dev->tstats = NULL;
1265 return ret;
1266 }
1267
1262 return 0; 1268 return 0;
1263} 1269}
1264 1270
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 599b0b419fbc..851cf6d1eb45 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -126,37 +126,90 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
126 * Locking : hash tables are protected by RCU and RTNL 126 * Locking : hash tables are protected by RCU and RTNL
127 */ 127 */
128 128
129struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t) 129static void __ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst,
130 struct dst_entry *dst)
130{ 131{
131 struct dst_entry *dst = t->dst_cache; 132 dst_release(idst->dst);
132 133 if (dst) {
133 if (dst && dst->obsolete && 134 dst_hold(dst);
134 !dst->ops->check(dst, t->dst_cookie)) { 135 idst->cookie = rt6_get_cookie((struct rt6_info *)dst);
135 t->dst_cache = NULL; 136 } else {
136 dst_release(dst); 137 idst->cookie = 0;
137 return NULL;
138 } 138 }
139 idst->dst = dst;
140}
141
142static void ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst,
143 struct dst_entry *dst)
144{
139 145
146 spin_lock_bh(&idst->lock);
147 __ip6_tnl_per_cpu_dst_set(idst, dst);
148 spin_unlock_bh(&idst->lock);
149}
150
151struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t)
152{
153 struct ip6_tnl_dst *idst;
154 struct dst_entry *dst;
155
156 idst = raw_cpu_ptr(t->dst_cache);
157 spin_lock_bh(&idst->lock);
158 dst = idst->dst;
159 if (dst) {
160 if (!dst->obsolete || dst->ops->check(dst, idst->cookie)) {
161 dst_hold(idst->dst);
162 } else {
163 __ip6_tnl_per_cpu_dst_set(idst, NULL);
164 dst = NULL;
165 }
166 }
167 spin_unlock_bh(&idst->lock);
140 return dst; 168 return dst;
141} 169}
142EXPORT_SYMBOL_GPL(ip6_tnl_dst_get); 170EXPORT_SYMBOL_GPL(ip6_tnl_dst_get);
143 171
144void ip6_tnl_dst_reset(struct ip6_tnl *t) 172void ip6_tnl_dst_reset(struct ip6_tnl *t)
145{ 173{
146 dst_release(t->dst_cache); 174 int i;
147 t->dst_cache = NULL; 175
176 for_each_possible_cpu(i)
177 ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), NULL);
148} 178}
149EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); 179EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
150 180
151void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst) 181void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst)
152{ 182{
153 struct rt6_info *rt = (struct rt6_info *) dst; 183 ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), dst);
154 t->dst_cookie = rt6_get_cookie(rt); 184
155 dst_release(t->dst_cache);
156 t->dst_cache = dst;
157} 185}
158EXPORT_SYMBOL_GPL(ip6_tnl_dst_set); 186EXPORT_SYMBOL_GPL(ip6_tnl_dst_set);
159 187
188void ip6_tnl_dst_destroy(struct ip6_tnl *t)
189{
190 if (!t->dst_cache)
191 return;
192
193 ip6_tnl_dst_reset(t);
194 free_percpu(t->dst_cache);
195}
196EXPORT_SYMBOL_GPL(ip6_tnl_dst_destroy);
197
198int ip6_tnl_dst_init(struct ip6_tnl *t)
199{
200 int i;
201
202 t->dst_cache = alloc_percpu(struct ip6_tnl_dst);
203 if (!t->dst_cache)
204 return -ENOMEM;
205
206 for_each_possible_cpu(i)
207 spin_lock_init(&per_cpu_ptr(t->dst_cache, i)->lock);
208
209 return 0;
210}
211EXPORT_SYMBOL_GPL(ip6_tnl_dst_init);
212
160/** 213/**
161 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses 214 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
162 * @remote: the address of the tunnel exit-point 215 * @remote: the address of the tunnel exit-point
@@ -271,6 +324,9 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
271 324
272static void ip6_dev_free(struct net_device *dev) 325static void ip6_dev_free(struct net_device *dev)
273{ 326{
327 struct ip6_tnl *t = netdev_priv(dev);
328
329 ip6_tnl_dst_destroy(t);
274 free_percpu(dev->tstats); 330 free_percpu(dev->tstats);
275 free_netdev(dev); 331 free_netdev(dev);
276} 332}
@@ -1016,17 +1072,17 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1016 goto tx_err_link_failure; 1072 goto tx_err_link_failure;
1017 1073
1018 if (!dst) { 1074 if (!dst) {
1019 ndst = ip6_route_output(net, NULL, fl6); 1075 dst = ip6_route_output(net, NULL, fl6);
1020 1076
1021 if (ndst->error) 1077 if (dst->error)
1022 goto tx_err_link_failure; 1078 goto tx_err_link_failure;
1023 ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); 1079 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
1024 if (IS_ERR(ndst)) { 1080 if (IS_ERR(dst)) {
1025 err = PTR_ERR(ndst); 1081 err = PTR_ERR(dst);
1026 ndst = NULL; 1082 dst = NULL;
1027 goto tx_err_link_failure; 1083 goto tx_err_link_failure;
1028 } 1084 }
1029 dst = ndst; 1085 ndst = dst;
1030 } 1086 }
1031 1087
1032 tdev = dst->dev; 1088 tdev = dst->dev;
@@ -1072,12 +1128,11 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1072 consume_skb(skb); 1128 consume_skb(skb);
1073 skb = new_skb; 1129 skb = new_skb;
1074 } 1130 }
1075 if (fl6->flowi6_mark) { 1131
1076 skb_dst_set(skb, dst); 1132 if (!fl6->flowi6_mark && ndst)
1077 ndst = NULL; 1133 ip6_tnl_dst_set(t, ndst);
1078 } else { 1134 skb_dst_set(skb, dst);
1079 skb_dst_set_noref(skb, dst); 1135
1080 }
1081 skb->transport_header = skb->network_header; 1136 skb->transport_header = skb->network_header;
1082 1137
1083 proto = fl6->flowi6_proto; 1138 proto = fl6->flowi6_proto;
@@ -1101,14 +1156,12 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1101 ipv6h->saddr = fl6->saddr; 1156 ipv6h->saddr = fl6->saddr;
1102 ipv6h->daddr = fl6->daddr; 1157 ipv6h->daddr = fl6->daddr;
1103 ip6tunnel_xmit(NULL, skb, dev); 1158 ip6tunnel_xmit(NULL, skb, dev);
1104 if (ndst)
1105 ip6_tnl_dst_set(t, ndst);
1106 return 0; 1159 return 0;
1107tx_err_link_failure: 1160tx_err_link_failure:
1108 stats->tx_carrier_errors++; 1161 stats->tx_carrier_errors++;
1109 dst_link_failure(skb); 1162 dst_link_failure(skb);
1110tx_err_dst_release: 1163tx_err_dst_release:
1111 dst_release(ndst); 1164 dst_release(dst);
1112 return err; 1165 return err;
1113} 1166}
1114 1167
@@ -1573,12 +1626,21 @@ static inline int
1573ip6_tnl_dev_init_gen(struct net_device *dev) 1626ip6_tnl_dev_init_gen(struct net_device *dev)
1574{ 1627{
1575 struct ip6_tnl *t = netdev_priv(dev); 1628 struct ip6_tnl *t = netdev_priv(dev);
1629 int ret;
1576 1630
1577 t->dev = dev; 1631 t->dev = dev;
1578 t->net = dev_net(dev); 1632 t->net = dev_net(dev);
1579 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 1633 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1580 if (!dev->tstats) 1634 if (!dev->tstats)
1581 return -ENOMEM; 1635 return -ENOMEM;
1636
1637 ret = ip6_tnl_dst_init(t);
1638 if (ret) {
1639 free_percpu(dev->tstats);
1640 dev->tstats = NULL;
1641 return ret;
1642 }
1643
1582 return 0; 1644 return 0;
1583} 1645}
1584 1646