diff options
-rw-r--r-- | include/net/dst.h | 2 | ||||
-rw-r--r-- | net/ipv4/route.c | 75 |
2 files changed, 47 insertions, 30 deletions
diff --git a/include/net/dst.h b/include/net/dst.h index a217c838ec0d..ffe9cb719c0e 100644 --- a/include/net/dst.h +++ b/include/net/dst.h | |||
@@ -95,7 +95,7 @@ struct dst_entry { | |||
95 | unsigned long lastuse; | 95 | unsigned long lastuse; |
96 | union { | 96 | union { |
97 | struct dst_entry *next; | 97 | struct dst_entry *next; |
98 | struct rtable *rt_next; | 98 | struct rtable __rcu *rt_next; |
99 | struct rt6_info *rt6_next; | 99 | struct rt6_info *rt6_next; |
100 | struct dn_route *dn_next; | 100 | struct dn_route *dn_next; |
101 | }; | 101 | }; |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d6cb2bfcd8e1..987bf9adb318 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -198,7 +198,7 @@ const __u8 ip_tos2prio[16] = { | |||
198 | */ | 198 | */ |
199 | 199 | ||
200 | struct rt_hash_bucket { | 200 | struct rt_hash_bucket { |
201 | struct rtable *chain; | 201 | struct rtable __rcu *chain; |
202 | }; | 202 | }; |
203 | 203 | ||
204 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ | 204 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ |
@@ -280,7 +280,7 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) | |||
280 | struct rtable *r = NULL; | 280 | struct rtable *r = NULL; |
281 | 281 | ||
282 | for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { | 282 | for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { |
283 | if (!rt_hash_table[st->bucket].chain) | 283 | if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain)) |
284 | continue; | 284 | continue; |
285 | rcu_read_lock_bh(); | 285 | rcu_read_lock_bh(); |
286 | r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); | 286 | r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); |
@@ -300,17 +300,17 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, | |||
300 | { | 300 | { |
301 | struct rt_cache_iter_state *st = seq->private; | 301 | struct rt_cache_iter_state *st = seq->private; |
302 | 302 | ||
303 | r = r->dst.rt_next; | 303 | r = rcu_dereference_bh(r->dst.rt_next); |
304 | while (!r) { | 304 | while (!r) { |
305 | rcu_read_unlock_bh(); | 305 | rcu_read_unlock_bh(); |
306 | do { | 306 | do { |
307 | if (--st->bucket < 0) | 307 | if (--st->bucket < 0) |
308 | return NULL; | 308 | return NULL; |
309 | } while (!rt_hash_table[st->bucket].chain); | 309 | } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain)); |
310 | rcu_read_lock_bh(); | 310 | rcu_read_lock_bh(); |
311 | r = rt_hash_table[st->bucket].chain; | 311 | r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); |
312 | } | 312 | } |
313 | return rcu_dereference_bh(r); | 313 | return r; |
314 | } | 314 | } |
315 | 315 | ||
316 | static struct rtable *rt_cache_get_next(struct seq_file *seq, | 316 | static struct rtable *rt_cache_get_next(struct seq_file *seq, |
@@ -721,19 +721,23 @@ static void rt_do_flush(int process_context) | |||
721 | for (i = 0; i <= rt_hash_mask; i++) { | 721 | for (i = 0; i <= rt_hash_mask; i++) { |
722 | if (process_context && need_resched()) | 722 | if (process_context && need_resched()) |
723 | cond_resched(); | 723 | cond_resched(); |
724 | rth = rt_hash_table[i].chain; | 724 | rth = rcu_dereference_raw(rt_hash_table[i].chain); |
725 | if (!rth) | 725 | if (!rth) |
726 | continue; | 726 | continue; |
727 | 727 | ||
728 | spin_lock_bh(rt_hash_lock_addr(i)); | 728 | spin_lock_bh(rt_hash_lock_addr(i)); |
729 | #ifdef CONFIG_NET_NS | 729 | #ifdef CONFIG_NET_NS |
730 | { | 730 | { |
731 | struct rtable ** prev, * p; | 731 | struct rtable __rcu **prev; |
732 | struct rtable *p; | ||
732 | 733 | ||
733 | rth = rt_hash_table[i].chain; | 734 | rth = rcu_dereference_protected(rt_hash_table[i].chain, |
735 | lockdep_is_held(rt_hash_lock_addr(i))); | ||
734 | 736 | ||
735 | /* defer releasing the head of the list after spin_unlock */ | 737 | /* defer releasing the head of the list after spin_unlock */ |
736 | for (tail = rth; tail; tail = tail->dst.rt_next) | 738 | for (tail = rth; tail; |
739 | tail = rcu_dereference_protected(tail->dst.rt_next, | ||
740 | lockdep_is_held(rt_hash_lock_addr(i)))) | ||
737 | if (!rt_is_expired(tail)) | 741 | if (!rt_is_expired(tail)) |
738 | break; | 742 | break; |
739 | if (rth != tail) | 743 | if (rth != tail) |
@@ -741,8 +745,12 @@ static void rt_do_flush(int process_context) | |||
741 | 745 | ||
742 | /* call rt_free on entries after the tail requiring flush */ | 746 | /* call rt_free on entries after the tail requiring flush */ |
743 | prev = &rt_hash_table[i].chain; | 747 | prev = &rt_hash_table[i].chain; |
744 | for (p = *prev; p; p = next) { | 748 | for (p = rcu_dereference_protected(*prev, |
745 | next = p->dst.rt_next; | 749 | lockdep_is_held(rt_hash_lock_addr(i))); |
750 | p != NULL; | ||
751 | p = next) { | ||
752 | next = rcu_dereference_protected(p->dst.rt_next, | ||
753 | lockdep_is_held(rt_hash_lock_addr(i))); | ||
746 | if (!rt_is_expired(p)) { | 754 | if (!rt_is_expired(p)) { |
747 | prev = &p->dst.rt_next; | 755 | prev = &p->dst.rt_next; |
748 | } else { | 756 | } else { |
@@ -752,14 +760,15 @@ static void rt_do_flush(int process_context) | |||
752 | } | 760 | } |
753 | } | 761 | } |
754 | #else | 762 | #else |
755 | rth = rt_hash_table[i].chain; | 763 | rth = rcu_dereference_protected(rt_hash_table[i].chain, |
756 | rt_hash_table[i].chain = NULL; | 764 | lockdep_is_held(rt_hash_lock_addr(i))); |
765 | rcu_assign_pointer(rt_hash_table[i].chain, NULL); | ||
757 | tail = NULL; | 766 | tail = NULL; |
758 | #endif | 767 | #endif |
759 | spin_unlock_bh(rt_hash_lock_addr(i)); | 768 | spin_unlock_bh(rt_hash_lock_addr(i)); |
760 | 769 | ||
761 | for (; rth != tail; rth = next) { | 770 | for (; rth != tail; rth = next) { |
762 | next = rth->dst.rt_next; | 771 | next = rcu_dereference_protected(rth->dst.rt_next, 1); |
763 | rt_free(rth); | 772 | rt_free(rth); |
764 | } | 773 | } |
765 | } | 774 | } |
@@ -790,7 +799,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) | |||
790 | while (aux != rth) { | 799 | while (aux != rth) { |
791 | if (compare_hash_inputs(&aux->fl, &rth->fl)) | 800 | if (compare_hash_inputs(&aux->fl, &rth->fl)) |
792 | return 0; | 801 | return 0; |
793 | aux = aux->dst.rt_next; | 802 | aux = rcu_dereference_protected(aux->dst.rt_next, 1); |
794 | } | 803 | } |
795 | return ONE; | 804 | return ONE; |
796 | } | 805 | } |
@@ -799,7 +808,8 @@ static void rt_check_expire(void) | |||
799 | { | 808 | { |
800 | static unsigned int rover; | 809 | static unsigned int rover; |
801 | unsigned int i = rover, goal; | 810 | unsigned int i = rover, goal; |
802 | struct rtable *rth, **rthp; | 811 | struct rtable *rth; |
812 | struct rtable __rcu **rthp; | ||
803 | unsigned long samples = 0; | 813 | unsigned long samples = 0; |
804 | unsigned long sum = 0, sum2 = 0; | 814 | unsigned long sum = 0, sum2 = 0; |
805 | unsigned long delta; | 815 | unsigned long delta; |
@@ -825,11 +835,12 @@ static void rt_check_expire(void) | |||
825 | 835 | ||
826 | samples++; | 836 | samples++; |
827 | 837 | ||
828 | if (*rthp == NULL) | 838 | if (rcu_dereference_raw(*rthp) == NULL) |
829 | continue; | 839 | continue; |
830 | length = 0; | 840 | length = 0; |
831 | spin_lock_bh(rt_hash_lock_addr(i)); | 841 | spin_lock_bh(rt_hash_lock_addr(i)); |
832 | while ((rth = *rthp) != NULL) { | 842 | while ((rth = rcu_dereference_protected(*rthp, |
843 | lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { | ||
833 | prefetch(rth->dst.rt_next); | 844 | prefetch(rth->dst.rt_next); |
834 | if (rt_is_expired(rth)) { | 845 | if (rt_is_expired(rth)) { |
835 | *rthp = rth->dst.rt_next; | 846 | *rthp = rth->dst.rt_next; |
@@ -941,7 +952,8 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
941 | static unsigned long last_gc; | 952 | static unsigned long last_gc; |
942 | static int rover; | 953 | static int rover; |
943 | static int equilibrium; | 954 | static int equilibrium; |
944 | struct rtable *rth, **rthp; | 955 | struct rtable *rth; |
956 | struct rtable __rcu **rthp; | ||
945 | unsigned long now = jiffies; | 957 | unsigned long now = jiffies; |
946 | int goal; | 958 | int goal; |
947 | int entries = dst_entries_get_fast(&ipv4_dst_ops); | 959 | int entries = dst_entries_get_fast(&ipv4_dst_ops); |
@@ -995,7 +1007,8 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
995 | k = (k + 1) & rt_hash_mask; | 1007 | k = (k + 1) & rt_hash_mask; |
996 | rthp = &rt_hash_table[k].chain; | 1008 | rthp = &rt_hash_table[k].chain; |
997 | spin_lock_bh(rt_hash_lock_addr(k)); | 1009 | spin_lock_bh(rt_hash_lock_addr(k)); |
998 | while ((rth = *rthp) != NULL) { | 1010 | while ((rth = rcu_dereference_protected(*rthp, |
1011 | lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) { | ||
999 | if (!rt_is_expired(rth) && | 1012 | if (!rt_is_expired(rth) && |
1000 | !rt_may_expire(rth, tmo, expire)) { | 1013 | !rt_may_expire(rth, tmo, expire)) { |
1001 | tmo >>= 1; | 1014 | tmo >>= 1; |
@@ -1071,7 +1084,7 @@ static int slow_chain_length(const struct rtable *head) | |||
1071 | 1084 | ||
1072 | while (rth) { | 1085 | while (rth) { |
1073 | length += has_noalias(head, rth); | 1086 | length += has_noalias(head, rth); |
1074 | rth = rth->dst.rt_next; | 1087 | rth = rcu_dereference_protected(rth->dst.rt_next, 1); |
1075 | } | 1088 | } |
1076 | return length >> FRACT_BITS; | 1089 | return length >> FRACT_BITS; |
1077 | } | 1090 | } |
@@ -1079,9 +1092,9 @@ static int slow_chain_length(const struct rtable *head) | |||
1079 | static int rt_intern_hash(unsigned hash, struct rtable *rt, | 1092 | static int rt_intern_hash(unsigned hash, struct rtable *rt, |
1080 | struct rtable **rp, struct sk_buff *skb, int ifindex) | 1093 | struct rtable **rp, struct sk_buff *skb, int ifindex) |
1081 | { | 1094 | { |
1082 | struct rtable *rth, **rthp; | 1095 | struct rtable *rth, *cand; |
1096 | struct rtable __rcu **rthp, **candp; | ||
1083 | unsigned long now; | 1097 | unsigned long now; |
1084 | struct rtable *cand, **candp; | ||
1085 | u32 min_score; | 1098 | u32 min_score; |
1086 | int chain_length; | 1099 | int chain_length; |
1087 | int attempts = !in_softirq(); | 1100 | int attempts = !in_softirq(); |
@@ -1128,7 +1141,8 @@ restart: | |||
1128 | rthp = &rt_hash_table[hash].chain; | 1141 | rthp = &rt_hash_table[hash].chain; |
1129 | 1142 | ||
1130 | spin_lock_bh(rt_hash_lock_addr(hash)); | 1143 | spin_lock_bh(rt_hash_lock_addr(hash)); |
1131 | while ((rth = *rthp) != NULL) { | 1144 | while ((rth = rcu_dereference_protected(*rthp, |
1145 | lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) { | ||
1132 | if (rt_is_expired(rth)) { | 1146 | if (rt_is_expired(rth)) { |
1133 | *rthp = rth->dst.rt_next; | 1147 | *rthp = rth->dst.rt_next; |
1134 | rt_free(rth); | 1148 | rt_free(rth); |
@@ -1324,12 +1338,14 @@ EXPORT_SYMBOL(__ip_select_ident); | |||
1324 | 1338 | ||
1325 | static void rt_del(unsigned hash, struct rtable *rt) | 1339 | static void rt_del(unsigned hash, struct rtable *rt) |
1326 | { | 1340 | { |
1327 | struct rtable **rthp, *aux; | 1341 | struct rtable __rcu **rthp; |
1342 | struct rtable *aux; | ||
1328 | 1343 | ||
1329 | rthp = &rt_hash_table[hash].chain; | 1344 | rthp = &rt_hash_table[hash].chain; |
1330 | spin_lock_bh(rt_hash_lock_addr(hash)); | 1345 | spin_lock_bh(rt_hash_lock_addr(hash)); |
1331 | ip_rt_put(rt); | 1346 | ip_rt_put(rt); |
1332 | while ((aux = *rthp) != NULL) { | 1347 | while ((aux = rcu_dereference_protected(*rthp, |
1348 | lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) { | ||
1333 | if (aux == rt || rt_is_expired(aux)) { | 1349 | if (aux == rt || rt_is_expired(aux)) { |
1334 | *rthp = aux->dst.rt_next; | 1350 | *rthp = aux->dst.rt_next; |
1335 | rt_free(aux); | 1351 | rt_free(aux); |
@@ -1346,7 +1362,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1346 | { | 1362 | { |
1347 | int i, k; | 1363 | int i, k; |
1348 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 1364 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
1349 | struct rtable *rth, **rthp; | 1365 | struct rtable *rth; |
1366 | struct rtable __rcu **rthp; | ||
1350 | __be32 skeys[2] = { saddr, 0 }; | 1367 | __be32 skeys[2] = { saddr, 0 }; |
1351 | int ikeys[2] = { dev->ifindex, 0 }; | 1368 | int ikeys[2] = { dev->ifindex, 0 }; |
1352 | struct netevent_redirect netevent; | 1369 | struct netevent_redirect netevent; |
@@ -1379,7 +1396,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1379 | unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], | 1396 | unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], |
1380 | rt_genid(net)); | 1397 | rt_genid(net)); |
1381 | 1398 | ||
1382 | rthp=&rt_hash_table[hash].chain; | 1399 | rthp = &rt_hash_table[hash].chain; |
1383 | 1400 | ||
1384 | while ((rth = rcu_dereference(*rthp)) != NULL) { | 1401 | while ((rth = rcu_dereference(*rthp)) != NULL) { |
1385 | struct rtable *rt; | 1402 | struct rtable *rt; |