diff options
-rw-r--r-- | include/net/ip_fib.h | 1 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 39 | ||||
-rw-r--r-- | net/ipv4/route.c | 103 |
3 files changed, 79 insertions, 64 deletions
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e331746029b4..926142ed8d7a 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h | |||
@@ -55,6 +55,7 @@ struct fib_nh_exception { | |||
55 | u32 fnhe_pmtu; | 55 | u32 fnhe_pmtu; |
56 | __be32 fnhe_gw; | 56 | __be32 fnhe_gw; |
57 | unsigned long fnhe_expires; | 57 | unsigned long fnhe_expires; |
58 | struct rtable __rcu *fnhe_rth; | ||
58 | unsigned long fnhe_stamp; | 59 | unsigned long fnhe_stamp; |
59 | }; | 60 | }; |
60 | 61 | ||
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index fe2ca02a1979..da80dc14cc76 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -140,6 +140,21 @@ const struct fib_prop fib_props[RTN_MAX + 1] = { | |||
140 | }, | 140 | }, |
141 | }; | 141 | }; |
142 | 142 | ||
143 | static void rt_fibinfo_free(struct rtable __rcu **rtp) | ||
144 | { | ||
145 | struct rtable *rt = rcu_dereference_protected(*rtp, 1); | ||
146 | |||
147 | if (!rt) | ||
148 | return; | ||
149 | |||
150 | /* Not even needed : RCU_INIT_POINTER(*rtp, NULL); | ||
151 | * because we waited an RCU grace period before calling | ||
152 | * free_fib_info_rcu() | ||
153 | */ | ||
154 | |||
155 | dst_free(&rt->dst); | ||
156 | } | ||
157 | |||
143 | static void free_nh_exceptions(struct fib_nh *nh) | 158 | static void free_nh_exceptions(struct fib_nh *nh) |
144 | { | 159 | { |
145 | struct fnhe_hash_bucket *hash = nh->nh_exceptions; | 160 | struct fnhe_hash_bucket *hash = nh->nh_exceptions; |
@@ -153,6 +168,9 @@ static void free_nh_exceptions(struct fib_nh *nh) | |||
153 | struct fib_nh_exception *next; | 168 | struct fib_nh_exception *next; |
154 | 169 | ||
155 | next = rcu_dereference_protected(fnhe->fnhe_next, 1); | 170 | next = rcu_dereference_protected(fnhe->fnhe_next, 1); |
171 | |||
172 | rt_fibinfo_free(&fnhe->fnhe_rth); | ||
173 | |||
156 | kfree(fnhe); | 174 | kfree(fnhe); |
157 | 175 | ||
158 | fnhe = next; | 176 | fnhe = next; |
@@ -161,22 +179,7 @@ static void free_nh_exceptions(struct fib_nh *nh) | |||
161 | kfree(hash); | 179 | kfree(hash); |
162 | } | 180 | } |
163 | 181 | ||
164 | static void rt_nexthop_free(struct rtable __rcu **rtp) | 182 | static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) |
165 | { | ||
166 | struct rtable *rt = rcu_dereference_protected(*rtp, 1); | ||
167 | |||
168 | if (!rt) | ||
169 | return; | ||
170 | |||
171 | /* Not even needed : RCU_INIT_POINTER(*rtp, NULL); | ||
172 | * because we waited an RCU grace period before calling | ||
173 | * free_fib_info_rcu() | ||
174 | */ | ||
175 | |||
176 | dst_free(&rt->dst); | ||
177 | } | ||
178 | |||
179 | static void rt_nexthop_free_cpus(struct rtable __rcu * __percpu *rtp) | ||
180 | { | 183 | { |
181 | int cpu; | 184 | int cpu; |
182 | 185 | ||
@@ -203,8 +206,8 @@ static void free_fib_info_rcu(struct rcu_head *head) | |||
203 | dev_put(nexthop_nh->nh_dev); | 206 | dev_put(nexthop_nh->nh_dev); |
204 | if (nexthop_nh->nh_exceptions) | 207 | if (nexthop_nh->nh_exceptions) |
205 | free_nh_exceptions(nexthop_nh); | 208 | free_nh_exceptions(nexthop_nh); |
206 | rt_nexthop_free_cpus(nexthop_nh->nh_pcpu_rth_output); | 209 | rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output); |
207 | rt_nexthop_free(&nexthop_nh->nh_rth_input); | 210 | rt_fibinfo_free(&nexthop_nh->nh_rth_input); |
208 | } endfor_nexthops(fi); | 211 | } endfor_nexthops(fi); |
209 | 212 | ||
210 | release_net(fi->fib_net); | 213 | release_net(fi->fib_net); |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4f6276ce0af3..b102eeb16e34 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -587,11 +587,17 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, | |||
587 | build_sk_flow_key(fl4, sk); | 587 | build_sk_flow_key(fl4, sk); |
588 | } | 588 | } |
589 | 589 | ||
590 | static DEFINE_SEQLOCK(fnhe_seqlock); | 590 | static inline void rt_free(struct rtable *rt) |
591 | { | ||
592 | call_rcu(&rt->dst.rcu_head, dst_rcu_free); | ||
593 | } | ||
594 | |||
595 | static DEFINE_SPINLOCK(fnhe_lock); | ||
591 | 596 | ||
592 | static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) | 597 | static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) |
593 | { | 598 | { |
594 | struct fib_nh_exception *fnhe, *oldest; | 599 | struct fib_nh_exception *fnhe, *oldest; |
600 | struct rtable *orig; | ||
595 | 601 | ||
596 | oldest = rcu_dereference(hash->chain); | 602 | oldest = rcu_dereference(hash->chain); |
597 | for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; | 603 | for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; |
@@ -599,6 +605,11 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) | |||
599 | if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) | 605 | if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) |
600 | oldest = fnhe; | 606 | oldest = fnhe; |
601 | } | 607 | } |
608 | orig = rcu_dereference(oldest->fnhe_rth); | ||
609 | if (orig) { | ||
610 | RCU_INIT_POINTER(oldest->fnhe_rth, NULL); | ||
611 | rt_free(orig); | ||
612 | } | ||
602 | return oldest; | 613 | return oldest; |
603 | } | 614 | } |
604 | 615 | ||
@@ -620,7 +631,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, | |||
620 | int depth; | 631 | int depth; |
621 | u32 hval = fnhe_hashfun(daddr); | 632 | u32 hval = fnhe_hashfun(daddr); |
622 | 633 | ||
623 | write_seqlock_bh(&fnhe_seqlock); | 634 | spin_lock_bh(&fnhe_lock); |
624 | 635 | ||
625 | hash = nh->nh_exceptions; | 636 | hash = nh->nh_exceptions; |
626 | if (!hash) { | 637 | if (!hash) { |
@@ -667,7 +678,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, | |||
667 | fnhe->fnhe_stamp = jiffies; | 678 | fnhe->fnhe_stamp = jiffies; |
668 | 679 | ||
669 | out_unlock: | 680 | out_unlock: |
670 | write_sequnlock_bh(&fnhe_seqlock); | 681 | spin_unlock_bh(&fnhe_lock); |
671 | return; | 682 | return; |
672 | } | 683 | } |
673 | 684 | ||
@@ -1167,41 +1178,40 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) | |||
1167 | static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, | 1178 | static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, |
1168 | __be32 daddr) | 1179 | __be32 daddr) |
1169 | { | 1180 | { |
1170 | __be32 fnhe_daddr, gw; | 1181 | spin_lock_bh(&fnhe_lock); |
1171 | unsigned long expires; | ||
1172 | unsigned int seq; | ||
1173 | u32 pmtu; | ||
1174 | |||
1175 | restart: | ||
1176 | seq = read_seqbegin(&fnhe_seqlock); | ||
1177 | fnhe_daddr = fnhe->fnhe_daddr; | ||
1178 | gw = fnhe->fnhe_gw; | ||
1179 | pmtu = fnhe->fnhe_pmtu; | ||
1180 | expires = fnhe->fnhe_expires; | ||
1181 | if (read_seqretry(&fnhe_seqlock, seq)) | ||
1182 | goto restart; | ||
1183 | |||
1184 | if (daddr != fnhe_daddr) | ||
1185 | return; | ||
1186 | 1182 | ||
1187 | if (pmtu) { | 1183 | if (daddr == fnhe->fnhe_daddr) { |
1188 | unsigned long diff = expires - jiffies; | 1184 | struct rtable *orig; |
1189 | 1185 | ||
1190 | if (time_before(jiffies, expires)) { | 1186 | if (fnhe->fnhe_pmtu) { |
1191 | rt->rt_pmtu = pmtu; | 1187 | unsigned long expires = fnhe->fnhe_expires; |
1192 | dst_set_expires(&rt->dst, diff); | 1188 | unsigned long diff = expires - jiffies; |
1189 | |||
1190 | if (time_before(jiffies, expires)) { | ||
1191 | rt->rt_pmtu = fnhe->fnhe_pmtu; | ||
1192 | dst_set_expires(&rt->dst, diff); | ||
1193 | } | ||
1194 | } | ||
1195 | if (fnhe->fnhe_gw) { | ||
1196 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1197 | rt->rt_gateway = fnhe->fnhe_gw; | ||
1193 | } | 1198 | } |
1194 | } | ||
1195 | if (gw) { | ||
1196 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1197 | rt->rt_gateway = gw; | ||
1198 | } | ||
1199 | fnhe->fnhe_stamp = jiffies; | ||
1200 | } | ||
1201 | 1199 | ||
1202 | static inline void rt_free(struct rtable *rt) | 1200 | orig = rcu_dereference(fnhe->fnhe_rth); |
1203 | { | 1201 | rcu_assign_pointer(fnhe->fnhe_rth, rt); |
1204 | call_rcu(&rt->dst.rcu_head, dst_rcu_free); | 1202 | if (orig) |
1203 | rt_free(orig); | ||
1204 | |||
1205 | fnhe->fnhe_stamp = jiffies; | ||
1206 | } else { | ||
1207 | /* Routes we intend to cache in nexthop exception have | ||
1208 | * the DST_NOCACHE bit clear. However, if we are | ||
1209 | * unsuccessful at storing this route into the cache | ||
1210 | * we really need to set it. | ||
1211 | */ | ||
1212 | rt->dst.flags |= DST_NOCACHE; | ||
1213 | } | ||
1214 | spin_unlock_bh(&fnhe_lock); | ||
1205 | } | 1215 | } |
1206 | 1216 | ||
1207 | static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) | 1217 | static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) |
@@ -1249,13 +1259,13 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, | |||
1249 | 1259 | ||
1250 | if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) | 1260 | if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) |
1251 | rt->rt_gateway = nh->nh_gw; | 1261 | rt->rt_gateway = nh->nh_gw; |
1252 | if (unlikely(fnhe)) | ||
1253 | rt_bind_exception(rt, fnhe, daddr); | ||
1254 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); | 1262 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); |
1255 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1263 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1256 | rt->dst.tclassid = nh->nh_tclassid; | 1264 | rt->dst.tclassid = nh->nh_tclassid; |
1257 | #endif | 1265 | #endif |
1258 | if (!(rt->dst.flags & DST_NOCACHE)) | 1266 | if (unlikely(fnhe)) |
1267 | rt_bind_exception(rt, fnhe, daddr); | ||
1268 | else if (!(rt->dst.flags & DST_NOCACHE)) | ||
1259 | rt_cache_route(nh, rt); | 1269 | rt_cache_route(nh, rt); |
1260 | } | 1270 | } |
1261 | 1271 | ||
@@ -1753,22 +1763,23 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
1753 | 1763 | ||
1754 | fnhe = NULL; | 1764 | fnhe = NULL; |
1755 | if (fi) { | 1765 | if (fi) { |
1756 | fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); | 1766 | struct rtable __rcu **prth; |
1757 | if (!fnhe && FIB_RES_NH(*res).nh_pcpu_rth_output) { | ||
1758 | struct rtable __rcu **prth; | ||
1759 | 1767 | ||
1768 | fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); | ||
1769 | if (fnhe) | ||
1770 | prth = &fnhe->fnhe_rth; | ||
1771 | else | ||
1760 | prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output); | 1772 | prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output); |
1761 | rth = rcu_dereference(*prth); | 1773 | rth = rcu_dereference(*prth); |
1762 | if (rt_cache_valid(rth)) { | 1774 | if (rt_cache_valid(rth)) { |
1763 | dst_hold(&rth->dst); | 1775 | dst_hold(&rth->dst); |
1764 | return rth; | 1776 | return rth; |
1765 | } | ||
1766 | } | 1777 | } |
1767 | } | 1778 | } |
1768 | rth = rt_dst_alloc(dev_out, | 1779 | rth = rt_dst_alloc(dev_out, |
1769 | IN_DEV_CONF_GET(in_dev, NOPOLICY), | 1780 | IN_DEV_CONF_GET(in_dev, NOPOLICY), |
1770 | IN_DEV_CONF_GET(in_dev, NOXFRM), | 1781 | IN_DEV_CONF_GET(in_dev, NOXFRM), |
1771 | fi && !fnhe); | 1782 | fi); |
1772 | if (!rth) | 1783 | if (!rth) |
1773 | return ERR_PTR(-ENOBUFS); | 1784 | return ERR_PTR(-ENOBUFS); |
1774 | 1785 | ||