aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2010-11-15 01:33:11 -0500
committerChris Wilson <chris@chris-wilson.co.uk>2010-11-15 01:33:11 -0500
commit1bb95834bbcdc969e477a9284cf96c17a4c2616f (patch)
tree9cf66b22a611bb6bc78778c05dac72263bb45a23 /net/ipv4
parent85345517fe6d4de27b0d6ca19fef9d28ac947c4a (diff)
parenta41c73e04673b47730df682446f0d52f95e32a5b (diff)
Merge remote branch 'airlied/drm-fixes' into drm-intel-fixes
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_hash.c54
-rw-r--r--net/ipv4/fib_lookup.h5
-rw-r--r--net/ipv4/fib_trie.c5
-rw-r--r--net/ipv4/gre.c5
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/inet_diag.c27
-rw-r--r--net/ipv4/inetpeer.c138
-rw-r--r--net/ipv4/ip_gre.c7
-rw-r--r--net/ipv4/ip_sockglue.c10
-rw-r--r--net/ipv4/ipip.c1
-rw-r--r--net/ipv4/netfilter/arp_tables.c1
-rw-r--r--net/ipv4/netfilter/ip_tables.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c40
-rw-r--r--net/ipv4/proc.c8
-rw-r--r--net/ipv4/protocol.c8
-rw-r--r--net/ipv4/route.c75
-rw-r--r--net/ipv4/sysctl_net_ipv4.c5
-rw-r--r--net/ipv4/tcp.c6
-rw-r--r--net/ipv4/tcp_input.c11
-rw-r--r--net/ipv4/tcp_ipv4.c8
-rw-r--r--net/ipv4/tunnel4.c29
-rw-r--r--net/ipv4/udp.c6
23 files changed, 271 insertions, 185 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 36e27c2107de..eb6f69a8f27a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1052,7 +1052,7 @@ static void ip_fib_net_exit(struct net *net)
1052 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { 1052 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1053 hlist_del(node); 1053 hlist_del(node);
1054 fib_table_flush(tb); 1054 fib_table_flush(tb);
1055 kfree(tb); 1055 fib_free_table(tb);
1056 } 1056 }
1057 } 1057 }
1058 kfree(net->ipv4.fib_table_hash); 1058 kfree(net->ipv4.fib_table_hash);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 43e1c594ce8f..b3acb0417b21 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -120,11 +120,12 @@ static inline void fn_rebuild_zone(struct fn_zone *fz,
120 struct fib_node *f; 120 struct fib_node *f;
121 121
122 hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) { 122 hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) {
123 struct hlist_head __rcu *new_head; 123 struct hlist_head *new_head;
124 124
125 hlist_del_rcu(&f->fn_hash); 125 hlist_del_rcu(&f->fn_hash);
126 126
127 new_head = &fz->fz_hash[fn_hash(f->fn_key, fz)]; 127 new_head = rcu_dereference_protected(fz->fz_hash, 1) +
128 fn_hash(f->fn_key, fz);
128 hlist_add_head_rcu(&f->fn_hash, new_head); 129 hlist_add_head_rcu(&f->fn_hash, new_head);
129 } 130 }
130 } 131 }
@@ -179,8 +180,8 @@ static void fn_rehash_zone(struct fn_zone *fz)
179 memcpy(&nfz, fz, sizeof(nfz)); 180 memcpy(&nfz, fz, sizeof(nfz));
180 181
181 write_seqlock_bh(&fz->fz_lock); 182 write_seqlock_bh(&fz->fz_lock);
182 old_ht = fz->fz_hash; 183 old_ht = rcu_dereference_protected(fz->fz_hash, 1);
183 nfz.fz_hash = ht; 184 RCU_INIT_POINTER(nfz.fz_hash, ht);
184 nfz.fz_hashmask = new_hashmask; 185 nfz.fz_hashmask = new_hashmask;
185 nfz.fz_divisor = new_divisor; 186 nfz.fz_divisor = new_divisor;
186 fn_rebuild_zone(&nfz, old_ht, old_divisor); 187 fn_rebuild_zone(&nfz, old_ht, old_divisor);
@@ -236,7 +237,7 @@ fn_new_zone(struct fn_hash *table, int z)
236 seqlock_init(&fz->fz_lock); 237 seqlock_init(&fz->fz_lock);
237 fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1; 238 fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1;
238 fz->fz_hashmask = fz->fz_divisor - 1; 239 fz->fz_hashmask = fz->fz_divisor - 1;
239 fz->fz_hash = fz->fz_embedded_hash; 240 RCU_INIT_POINTER(fz->fz_hash, fz->fz_embedded_hash);
240 fz->fz_order = z; 241 fz->fz_order = z;
241 fz->fz_revorder = 32 - z; 242 fz->fz_revorder = 32 - z;
242 fz->fz_mask = inet_make_mask(z); 243 fz->fz_mask = inet_make_mask(z);
@@ -272,7 +273,7 @@ int fib_table_lookup(struct fib_table *tb,
272 for (fz = rcu_dereference(t->fn_zone_list); 273 for (fz = rcu_dereference(t->fn_zone_list);
273 fz != NULL; 274 fz != NULL;
274 fz = rcu_dereference(fz->fz_next)) { 275 fz = rcu_dereference(fz->fz_next)) {
275 struct hlist_head __rcu *head; 276 struct hlist_head *head;
276 struct hlist_node *node; 277 struct hlist_node *node;
277 struct fib_node *f; 278 struct fib_node *f;
278 __be32 k; 279 __be32 k;
@@ -282,7 +283,7 @@ int fib_table_lookup(struct fib_table *tb,
282 seq = read_seqbegin(&fz->fz_lock); 283 seq = read_seqbegin(&fz->fz_lock);
283 k = fz_key(flp->fl4_dst, fz); 284 k = fz_key(flp->fl4_dst, fz);
284 285
285 head = &fz->fz_hash[fn_hash(k, fz)]; 286 head = rcu_dereference(fz->fz_hash) + fn_hash(k, fz);
286 hlist_for_each_entry_rcu(f, node, head, fn_hash) { 287 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
287 if (f->fn_key != k) 288 if (f->fn_key != k)
288 continue; 289 continue;
@@ -311,6 +312,7 @@ void fib_table_select_default(struct fib_table *tb,
311 struct fib_info *last_resort; 312 struct fib_info *last_resort;
312 struct fn_hash *t = (struct fn_hash *)tb->tb_data; 313 struct fn_hash *t = (struct fn_hash *)tb->tb_data;
313 struct fn_zone *fz = t->fn_zones[0]; 314 struct fn_zone *fz = t->fn_zones[0];
315 struct hlist_head *head;
314 316
315 if (fz == NULL) 317 if (fz == NULL)
316 return; 318 return;
@@ -320,7 +322,8 @@ void fib_table_select_default(struct fib_table *tb,
320 order = -1; 322 order = -1;
321 323
322 rcu_read_lock(); 324 rcu_read_lock();
323 hlist_for_each_entry_rcu(f, node, &fz->fz_hash[0], fn_hash) { 325 head = rcu_dereference(fz->fz_hash);
326 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
324 struct fib_alias *fa; 327 struct fib_alias *fa;
325 328
326 list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) { 329 list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
@@ -374,7 +377,7 @@ out:
374/* Insert node F to FZ. */ 377/* Insert node F to FZ. */
375static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f) 378static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
376{ 379{
377 struct hlist_head *head = &fz->fz_hash[fn_hash(f->fn_key, fz)]; 380 struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(f->fn_key, fz);
378 381
379 hlist_add_head_rcu(&f->fn_hash, head); 382 hlist_add_head_rcu(&f->fn_hash, head);
380} 383}
@@ -382,7 +385,7 @@ static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
382/* Return the node in FZ matching KEY. */ 385/* Return the node in FZ matching KEY. */
383static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key) 386static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
384{ 387{
385 struct hlist_head *head = &fz->fz_hash[fn_hash(key, fz)]; 388 struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(key, fz);
386 struct hlist_node *node; 389 struct hlist_node *node;
387 struct fib_node *f; 390 struct fib_node *f;
388 391
@@ -662,7 +665,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
662 665
663static int fn_flush_list(struct fn_zone *fz, int idx) 666static int fn_flush_list(struct fn_zone *fz, int idx)
664{ 667{
665 struct hlist_head *head = &fz->fz_hash[idx]; 668 struct hlist_head *head = rtnl_dereference(fz->fz_hash) + idx;
666 struct hlist_node *node, *n; 669 struct hlist_node *node, *n;
667 struct fib_node *f; 670 struct fib_node *f;
668 int found = 0; 671 int found = 0;
@@ -713,6 +716,24 @@ int fib_table_flush(struct fib_table *tb)
713 return found; 716 return found;
714} 717}
715 718
719void fib_free_table(struct fib_table *tb)
720{
721 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
722 struct fn_zone *fz, *next;
723
724 next = table->fn_zone_list;
725 while (next != NULL) {
726 fz = next;
727 next = fz->fz_next;
728
729 if (fz->fz_hash != fz->fz_embedded_hash)
730 fz_hash_free(fz->fz_hash, fz->fz_divisor);
731
732 kfree(fz);
733 }
734
735 kfree(tb);
736}
716 737
717static inline int 738static inline int
718fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, 739fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
@@ -761,14 +782,15 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
761 struct fn_zone *fz) 782 struct fn_zone *fz)
762{ 783{
763 int h, s_h; 784 int h, s_h;
785 struct hlist_head *head = rcu_dereference(fz->fz_hash);
764 786
765 if (fz->fz_hash == NULL) 787 if (head == NULL)
766 return skb->len; 788 return skb->len;
767 s_h = cb->args[3]; 789 s_h = cb->args[3];
768 for (h = s_h; h < fz->fz_divisor; h++) { 790 for (h = s_h; h < fz->fz_divisor; h++) {
769 if (hlist_empty(&fz->fz_hash[h])) 791 if (hlist_empty(head + h))
770 continue; 792 continue;
771 if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h]) < 0) { 793 if (fn_hash_dump_bucket(skb, cb, tb, fz, head + h) < 0) {
772 cb->args[3] = h; 794 cb->args[3] = h;
773 return -1; 795 return -1;
774 } 796 }
@@ -872,7 +894,7 @@ static struct fib_alias *fib_get_first(struct seq_file *seq)
872 if (!iter->zone->fz_nent) 894 if (!iter->zone->fz_nent)
873 continue; 895 continue;
874 896
875 iter->hash_head = iter->zone->fz_hash; 897 iter->hash_head = rcu_dereference(iter->zone->fz_hash);
876 maxslot = iter->zone->fz_divisor; 898 maxslot = iter->zone->fz_divisor;
877 899
878 for (iter->bucket = 0; iter->bucket < maxslot; 900 for (iter->bucket = 0; iter->bucket < maxslot;
@@ -957,7 +979,7 @@ static struct fib_alias *fib_get_next(struct seq_file *seq)
957 goto out; 979 goto out;
958 980
959 iter->bucket = 0; 981 iter->bucket = 0;
960 iter->hash_head = iter->zone->fz_hash; 982 iter->hash_head = rcu_dereference(iter->zone->fz_hash);
961 983
962 hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { 984 hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
963 list_for_each_entry(fa, &fn->fn_alias, fa_list) { 985 list_for_each_entry(fa, &fn->fn_alias, fa_list) {
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index a29edf2219c8..c079cc0ec651 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -47,11 +47,8 @@ extern int fib_detect_death(struct fib_info *fi, int order,
47static inline void fib_result_assign(struct fib_result *res, 47static inline void fib_result_assign(struct fib_result *res,
48 struct fib_info *fi) 48 struct fib_info *fi)
49{ 49{
50 if (res->fi != NULL) 50 /* we used to play games with refcounts, but we now use RCU */
51 fib_info_put(res->fi);
52 res->fi = fi; 51 res->fi = fi;
53 if (fi != NULL)
54 atomic_inc(&fi->fib_clntref);
55} 52}
56 53
57#endif /* _FIB_LOOKUP_H */ 54#endif /* _FIB_LOOKUP_H */
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b14450895102..200eb538fbb3 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1797,6 +1797,11 @@ int fib_table_flush(struct fib_table *tb)
1797 return found; 1797 return found;
1798} 1798}
1799 1799
1800void fib_free_table(struct fib_table *tb)
1801{
1802 kfree(tb);
1803}
1804
1800void fib_table_select_default(struct fib_table *tb, 1805void fib_table_select_default(struct fib_table *tb,
1801 const struct flowi *flp, 1806 const struct flowi *flp,
1802 struct fib_result *res) 1807 struct fib_result *res)
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index caea6885fdbd..c6933f2ea310 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -22,7 +22,7 @@
22#include <net/gre.h> 22#include <net/gre.h>
23 23
24 24
25static const struct gre_protocol *gre_proto[GREPROTO_MAX] __read_mostly; 25static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
26static DEFINE_SPINLOCK(gre_proto_lock); 26static DEFINE_SPINLOCK(gre_proto_lock);
27 27
28int gre_add_protocol(const struct gre_protocol *proto, u8 version) 28int gre_add_protocol(const struct gre_protocol *proto, u8 version)
@@ -51,7 +51,8 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version)
51 goto err_out; 51 goto err_out;
52 52
53 spin_lock(&gre_proto_lock); 53 spin_lock(&gre_proto_lock);
54 if (gre_proto[version] != proto) 54 if (rcu_dereference_protected(gre_proto[version],
55 lockdep_is_held(&gre_proto_lock)) != proto)
55 goto err_out_unlock; 56 goto err_out_unlock;
56 rcu_assign_pointer(gre_proto[version], NULL); 57 rcu_assign_pointer(gre_proto[version], NULL);
57 spin_unlock(&gre_proto_lock); 58 spin_unlock(&gre_proto_lock);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c8877c6c7216..3c53c2d89e3b 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2306,10 +2306,8 @@ void ip_mc_drop_socket(struct sock *sk)
2306 2306
2307 in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); 2307 in_dev = inetdev_by_index(net, iml->multi.imr_ifindex);
2308 (void) ip_mc_leave_src(sk, iml, in_dev); 2308 (void) ip_mc_leave_src(sk, iml, in_dev);
2309 if (in_dev != NULL) { 2309 if (in_dev != NULL)
2310 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); 2310 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
2311 in_dev_put(in_dev);
2312 }
2313 /* decrease mem now to avoid the memleak warning */ 2311 /* decrease mem now to avoid the memleak warning */
2314 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); 2312 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
2315 call_rcu(&iml->rcu, ip_mc_socklist_reclaim); 2313 call_rcu(&iml->rcu, ip_mc_socklist_reclaim);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ba8042665849..2ada17129fce 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -490,9 +490,11 @@ static int inet_csk_diag_dump(struct sock *sk,
490{ 490{
491 struct inet_diag_req *r = NLMSG_DATA(cb->nlh); 491 struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
492 492
493 if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { 493 if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
494 struct inet_diag_entry entry; 494 struct inet_diag_entry entry;
495 struct rtattr *bc = (struct rtattr *)(r + 1); 495 const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
496 sizeof(*r),
497 INET_DIAG_REQ_BYTECODE);
496 struct inet_sock *inet = inet_sk(sk); 498 struct inet_sock *inet = inet_sk(sk);
497 499
498 entry.family = sk->sk_family; 500 entry.family = sk->sk_family;
@@ -512,7 +514,7 @@ static int inet_csk_diag_dump(struct sock *sk,
512 entry.dport = ntohs(inet->inet_dport); 514 entry.dport = ntohs(inet->inet_dport);
513 entry.userlocks = sk->sk_userlocks; 515 entry.userlocks = sk->sk_userlocks;
514 516
515 if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) 517 if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
516 return 0; 518 return 0;
517 } 519 }
518 520
@@ -527,9 +529,11 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
527{ 529{
528 struct inet_diag_req *r = NLMSG_DATA(cb->nlh); 530 struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
529 531
530 if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { 532 if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
531 struct inet_diag_entry entry; 533 struct inet_diag_entry entry;
532 struct rtattr *bc = (struct rtattr *)(r + 1); 534 const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
535 sizeof(*r),
536 INET_DIAG_REQ_BYTECODE);
533 537
534 entry.family = tw->tw_family; 538 entry.family = tw->tw_family;
535#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 539#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
@@ -548,7 +552,7 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
548 entry.dport = ntohs(tw->tw_dport); 552 entry.dport = ntohs(tw->tw_dport);
549 entry.userlocks = 0; 553 entry.userlocks = 0;
550 554
551 if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) 555 if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
552 return 0; 556 return 0;
553 } 557 }
554 558
@@ -618,7 +622,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
618 struct inet_diag_req *r = NLMSG_DATA(cb->nlh); 622 struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
619 struct inet_connection_sock *icsk = inet_csk(sk); 623 struct inet_connection_sock *icsk = inet_csk(sk);
620 struct listen_sock *lopt; 624 struct listen_sock *lopt;
621 struct rtattr *bc = NULL; 625 const struct nlattr *bc = NULL;
622 struct inet_sock *inet = inet_sk(sk); 626 struct inet_sock *inet = inet_sk(sk);
623 int j, s_j; 627 int j, s_j;
624 int reqnum, s_reqnum; 628 int reqnum, s_reqnum;
@@ -638,8 +642,9 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
638 if (!lopt || !lopt->qlen) 642 if (!lopt || !lopt->qlen)
639 goto out; 643 goto out;
640 644
641 if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { 645 if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
642 bc = (struct rtattr *)(r + 1); 646 bc = nlmsg_find_attr(cb->nlh, sizeof(*r),
647 INET_DIAG_REQ_BYTECODE);
643 entry.sport = inet->inet_num; 648 entry.sport = inet->inet_num;
644 entry.userlocks = sk->sk_userlocks; 649 entry.userlocks = sk->sk_userlocks;
645 } 650 }
@@ -672,8 +677,8 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
672 &ireq->rmt_addr; 677 &ireq->rmt_addr;
673 entry.dport = ntohs(ireq->rmt_port); 678 entry.dport = ntohs(ireq->rmt_port);
674 679
675 if (!inet_diag_bc_run(RTA_DATA(bc), 680 if (!inet_diag_bc_run(nla_data(bc),
676 RTA_PAYLOAD(bc), &entry)) 681 nla_len(bc), &entry))
677 continue; 682 continue;
678 } 683 }
679 684
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 9ffa24b9a804..9e94d7cf4f8a 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -72,18 +72,19 @@ static struct kmem_cache *peer_cachep __read_mostly;
72#define node_height(x) x->avl_height 72#define node_height(x) x->avl_height
73 73
74#define peer_avl_empty ((struct inet_peer *)&peer_fake_node) 74#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
75#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
75static const struct inet_peer peer_fake_node = { 76static const struct inet_peer peer_fake_node = {
76 .avl_left = peer_avl_empty, 77 .avl_left = peer_avl_empty_rcu,
77 .avl_right = peer_avl_empty, 78 .avl_right = peer_avl_empty_rcu,
78 .avl_height = 0 79 .avl_height = 0
79}; 80};
80 81
81static struct { 82static struct {
82 struct inet_peer *root; 83 struct inet_peer __rcu *root;
83 spinlock_t lock; 84 spinlock_t lock;
84 int total; 85 int total;
85} peers = { 86} peers = {
86 .root = peer_avl_empty, 87 .root = peer_avl_empty_rcu,
87 .lock = __SPIN_LOCK_UNLOCKED(peers.lock), 88 .lock = __SPIN_LOCK_UNLOCKED(peers.lock),
88 .total = 0, 89 .total = 0,
89}; 90};
@@ -156,11 +157,14 @@ static void unlink_from_unused(struct inet_peer *p)
156 */ 157 */
157#define lookup(_daddr, _stack) \ 158#define lookup(_daddr, _stack) \
158({ \ 159({ \
159 struct inet_peer *u, **v; \ 160 struct inet_peer *u; \
161 struct inet_peer __rcu **v; \
160 \ 162 \
161 stackptr = _stack; \ 163 stackptr = _stack; \
162 *stackptr++ = &peers.root; \ 164 *stackptr++ = &peers.root; \
163 for (u = peers.root; u != peer_avl_empty; ) { \ 165 for (u = rcu_dereference_protected(peers.root, \
166 lockdep_is_held(&peers.lock)); \
167 u != peer_avl_empty; ) { \
164 if (_daddr == u->v4daddr) \ 168 if (_daddr == u->v4daddr) \
165 break; \ 169 break; \
166 if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ 170 if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
@@ -168,7 +172,8 @@ static void unlink_from_unused(struct inet_peer *p)
168 else \ 172 else \
169 v = &u->avl_right; \ 173 v = &u->avl_right; \
170 *stackptr++ = v; \ 174 *stackptr++ = v; \
171 u = *v; \ 175 u = rcu_dereference_protected(*v, \
176 lockdep_is_held(&peers.lock)); \
172 } \ 177 } \
173 u; \ 178 u; \
174}) 179})
@@ -209,13 +214,17 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
209/* Called with local BH disabled and the pool lock held. */ 214/* Called with local BH disabled and the pool lock held. */
210#define lookup_rightempty(start) \ 215#define lookup_rightempty(start) \
211({ \ 216({ \
212 struct inet_peer *u, **v; \ 217 struct inet_peer *u; \
218 struct inet_peer __rcu **v; \
213 *stackptr++ = &start->avl_left; \ 219 *stackptr++ = &start->avl_left; \
214 v = &start->avl_left; \ 220 v = &start->avl_left; \
215 for (u = *v; u->avl_right != peer_avl_empty; ) { \ 221 for (u = rcu_dereference_protected(*v, \
222 lockdep_is_held(&peers.lock)); \
223 u->avl_right != peer_avl_empty_rcu; ) { \
216 v = &u->avl_right; \ 224 v = &u->avl_right; \
217 *stackptr++ = v; \ 225 *stackptr++ = v; \
218 u = *v; \ 226 u = rcu_dereference_protected(*v, \
227 lockdep_is_held(&peers.lock)); \
219 } \ 228 } \
220 u; \ 229 u; \
221}) 230})
@@ -224,74 +233,86 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
224 * Variable names are the proof of operation correctness. 233 * Variable names are the proof of operation correctness.
225 * Look into mm/map_avl.c for more detail description of the ideas. 234 * Look into mm/map_avl.c for more detail description of the ideas.
226 */ 235 */
227static void peer_avl_rebalance(struct inet_peer **stack[], 236static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
228 struct inet_peer ***stackend) 237 struct inet_peer __rcu ***stackend)
229{ 238{
230 struct inet_peer **nodep, *node, *l, *r; 239 struct inet_peer __rcu **nodep;
240 struct inet_peer *node, *l, *r;
231 int lh, rh; 241 int lh, rh;
232 242
233 while (stackend > stack) { 243 while (stackend > stack) {
234 nodep = *--stackend; 244 nodep = *--stackend;
235 node = *nodep; 245 node = rcu_dereference_protected(*nodep,
236 l = node->avl_left; 246 lockdep_is_held(&peers.lock));
237 r = node->avl_right; 247 l = rcu_dereference_protected(node->avl_left,
248 lockdep_is_held(&peers.lock));
249 r = rcu_dereference_protected(node->avl_right,
250 lockdep_is_held(&peers.lock));
238 lh = node_height(l); 251 lh = node_height(l);
239 rh = node_height(r); 252 rh = node_height(r);
240 if (lh > rh + 1) { /* l: RH+2 */ 253 if (lh > rh + 1) { /* l: RH+2 */
241 struct inet_peer *ll, *lr, *lrl, *lrr; 254 struct inet_peer *ll, *lr, *lrl, *lrr;
242 int lrh; 255 int lrh;
243 ll = l->avl_left; 256 ll = rcu_dereference_protected(l->avl_left,
244 lr = l->avl_right; 257 lockdep_is_held(&peers.lock));
258 lr = rcu_dereference_protected(l->avl_right,
259 lockdep_is_held(&peers.lock));
245 lrh = node_height(lr); 260 lrh = node_height(lr);
246 if (lrh <= node_height(ll)) { /* ll: RH+1 */ 261 if (lrh <= node_height(ll)) { /* ll: RH+1 */
247 node->avl_left = lr; /* lr: RH or RH+1 */ 262 RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */
248 node->avl_right = r; /* r: RH */ 263 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
249 node->avl_height = lrh + 1; /* RH+1 or RH+2 */ 264 node->avl_height = lrh + 1; /* RH+1 or RH+2 */
250 l->avl_left = ll; /* ll: RH+1 */ 265 RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH+1 */
251 l->avl_right = node; /* node: RH+1 or RH+2 */ 266 RCU_INIT_POINTER(l->avl_right, node); /* node: RH+1 or RH+2 */
252 l->avl_height = node->avl_height + 1; 267 l->avl_height = node->avl_height + 1;
253 *nodep = l; 268 RCU_INIT_POINTER(*nodep, l);
254 } else { /* ll: RH, lr: RH+1 */ 269 } else { /* ll: RH, lr: RH+1 */
255 lrl = lr->avl_left; /* lrl: RH or RH-1 */ 270 lrl = rcu_dereference_protected(lr->avl_left,
256 lrr = lr->avl_right; /* lrr: RH or RH-1 */ 271 lockdep_is_held(&peers.lock)); /* lrl: RH or RH-1 */
257 node->avl_left = lrr; /* lrr: RH or RH-1 */ 272 lrr = rcu_dereference_protected(lr->avl_right,
258 node->avl_right = r; /* r: RH */ 273 lockdep_is_held(&peers.lock)); /* lrr: RH or RH-1 */
274 RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */
275 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
259 node->avl_height = rh + 1; /* node: RH+1 */ 276 node->avl_height = rh + 1; /* node: RH+1 */
260 l->avl_left = ll; /* ll: RH */ 277 RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH */
261 l->avl_right = lrl; /* lrl: RH or RH-1 */ 278 RCU_INIT_POINTER(l->avl_right, lrl); /* lrl: RH or RH-1 */
262 l->avl_height = rh + 1; /* l: RH+1 */ 279 l->avl_height = rh + 1; /* l: RH+1 */
263 lr->avl_left = l; /* l: RH+1 */ 280 RCU_INIT_POINTER(lr->avl_left, l); /* l: RH+1 */
264 lr->avl_right = node; /* node: RH+1 */ 281 RCU_INIT_POINTER(lr->avl_right, node); /* node: RH+1 */
265 lr->avl_height = rh + 2; 282 lr->avl_height = rh + 2;
266 *nodep = lr; 283 RCU_INIT_POINTER(*nodep, lr);
267 } 284 }
268 } else if (rh > lh + 1) { /* r: LH+2 */ 285 } else if (rh > lh + 1) { /* r: LH+2 */
269 struct inet_peer *rr, *rl, *rlr, *rll; 286 struct inet_peer *rr, *rl, *rlr, *rll;
270 int rlh; 287 int rlh;
271 rr = r->avl_right; 288 rr = rcu_dereference_protected(r->avl_right,
272 rl = r->avl_left; 289 lockdep_is_held(&peers.lock));
290 rl = rcu_dereference_protected(r->avl_left,
291 lockdep_is_held(&peers.lock));
273 rlh = node_height(rl); 292 rlh = node_height(rl);
274 if (rlh <= node_height(rr)) { /* rr: LH+1 */ 293 if (rlh <= node_height(rr)) { /* rr: LH+1 */
275 node->avl_right = rl; /* rl: LH or LH+1 */ 294 RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */
276 node->avl_left = l; /* l: LH */ 295 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
277 node->avl_height = rlh + 1; /* LH+1 or LH+2 */ 296 node->avl_height = rlh + 1; /* LH+1 or LH+2 */
278 r->avl_right = rr; /* rr: LH+1 */ 297 RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH+1 */
279 r->avl_left = node; /* node: LH+1 or LH+2 */ 298 RCU_INIT_POINTER(r->avl_left, node); /* node: LH+1 or LH+2 */
280 r->avl_height = node->avl_height + 1; 299 r->avl_height = node->avl_height + 1;
281 *nodep = r; 300 RCU_INIT_POINTER(*nodep, r);
282 } else { /* rr: RH, rl: RH+1 */ 301 } else { /* rr: RH, rl: RH+1 */
283 rlr = rl->avl_right; /* rlr: LH or LH-1 */ 302 rlr = rcu_dereference_protected(rl->avl_right,
284 rll = rl->avl_left; /* rll: LH or LH-1 */ 303 lockdep_is_held(&peers.lock)); /* rlr: LH or LH-1 */
285 node->avl_right = rll; /* rll: LH or LH-1 */ 304 rll = rcu_dereference_protected(rl->avl_left,
286 node->avl_left = l; /* l: LH */ 305 lockdep_is_held(&peers.lock)); /* rll: LH or LH-1 */
306 RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */
307 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
287 node->avl_height = lh + 1; /* node: LH+1 */ 308 node->avl_height = lh + 1; /* node: LH+1 */
288 r->avl_right = rr; /* rr: LH */ 309 RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH */
289 r->avl_left = rlr; /* rlr: LH or LH-1 */ 310 RCU_INIT_POINTER(r->avl_left, rlr); /* rlr: LH or LH-1 */
290 r->avl_height = lh + 1; /* r: LH+1 */ 311 r->avl_height = lh + 1; /* r: LH+1 */
291 rl->avl_right = r; /* r: LH+1 */ 312 RCU_INIT_POINTER(rl->avl_right, r); /* r: LH+1 */
292 rl->avl_left = node; /* node: LH+1 */ 313 RCU_INIT_POINTER(rl->avl_left, node); /* node: LH+1 */
293 rl->avl_height = lh + 2; 314 rl->avl_height = lh + 2;
294 *nodep = rl; 315 RCU_INIT_POINTER(*nodep, rl);
295 } 316 }
296 } else { 317 } else {
297 node->avl_height = (lh > rh ? lh : rh) + 1; 318 node->avl_height = (lh > rh ? lh : rh) + 1;
@@ -303,10 +324,10 @@ static void peer_avl_rebalance(struct inet_peer **stack[],
303#define link_to_pool(n) \ 324#define link_to_pool(n) \
304do { \ 325do { \
305 n->avl_height = 1; \ 326 n->avl_height = 1; \
306 n->avl_left = peer_avl_empty; \ 327 n->avl_left = peer_avl_empty_rcu; \
307 n->avl_right = peer_avl_empty; \ 328 n->avl_right = peer_avl_empty_rcu; \
308 smp_wmb(); /* lockless readers can catch us now */ \ 329 /* lockless readers can catch us now */ \
309 **--stackptr = n; \ 330 rcu_assign_pointer(**--stackptr, n); \
310 peer_avl_rebalance(stack, stackptr); \ 331 peer_avl_rebalance(stack, stackptr); \
311} while (0) 332} while (0)
312 333
@@ -330,24 +351,25 @@ static void unlink_from_pool(struct inet_peer *p)
330 * We use refcnt=-1 to alert lockless readers this entry is deleted. 351 * We use refcnt=-1 to alert lockless readers this entry is deleted.
331 */ 352 */
332 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { 353 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
333 struct inet_peer **stack[PEER_MAXDEPTH]; 354 struct inet_peer __rcu **stack[PEER_MAXDEPTH];
334 struct inet_peer ***stackptr, ***delp; 355 struct inet_peer __rcu ***stackptr, ***delp;
335 if (lookup(p->v4daddr, stack) != p) 356 if (lookup(p->v4daddr, stack) != p)
336 BUG(); 357 BUG();
337 delp = stackptr - 1; /* *delp[0] == p */ 358 delp = stackptr - 1; /* *delp[0] == p */
338 if (p->avl_left == peer_avl_empty) { 359 if (p->avl_left == peer_avl_empty_rcu) {
339 *delp[0] = p->avl_right; 360 *delp[0] = p->avl_right;
340 --stackptr; 361 --stackptr;
341 } else { 362 } else {
342 /* look for a node to insert instead of p */ 363 /* look for a node to insert instead of p */
343 struct inet_peer *t; 364 struct inet_peer *t;
344 t = lookup_rightempty(p); 365 t = lookup_rightempty(p);
345 BUG_ON(*stackptr[-1] != t); 366 BUG_ON(rcu_dereference_protected(*stackptr[-1],
367 lockdep_is_held(&peers.lock)) != t);
346 **--stackptr = t->avl_left; 368 **--stackptr = t->avl_left;
347 /* t is removed, t->v4daddr > x->v4daddr for any 369 /* t is removed, t->v4daddr > x->v4daddr for any
348 * x in p->avl_left subtree. 370 * x in p->avl_left subtree.
349 * Put t in the old place of p. */ 371 * Put t in the old place of p. */
350 *delp[0] = t; 372 RCU_INIT_POINTER(*delp[0], t);
351 t->avl_left = p->avl_left; 373 t->avl_left = p->avl_left;
352 t->avl_right = p->avl_right; 374 t->avl_right = p->avl_right;
353 t->avl_height = p->avl_height; 375 t->avl_height = p->avl_height;
@@ -414,7 +436,7 @@ static int cleanup_once(unsigned long ttl)
414struct inet_peer *inet_getpeer(__be32 daddr, int create) 436struct inet_peer *inet_getpeer(__be32 daddr, int create)
415{ 437{
416 struct inet_peer *p; 438 struct inet_peer *p;
417 struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; 439 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
418 440
419 /* Look up for the address quickly, lockless. 441 /* Look up for the address quickly, lockless.
420 * Because of a concurrent writer, we might not find an existing entry. 442 * Because of a concurrent writer, we might not find an existing entry.
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d0ffcbe369b7..70ff77f02eee 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1072,6 +1072,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1072 break; 1072 break;
1073 } 1073 }
1074 ipgre_tunnel_unlink(ign, t); 1074 ipgre_tunnel_unlink(ign, t);
1075 synchronize_net();
1075 t->parms.iph.saddr = p.iph.saddr; 1076 t->parms.iph.saddr = p.iph.saddr;
1076 t->parms.iph.daddr = p.iph.daddr; 1077 t->parms.iph.daddr = p.iph.daddr;
1077 t->parms.i_key = p.i_key; 1078 t->parms.i_key = p.i_key;
@@ -1324,7 +1325,6 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
1324{ 1325{
1325 struct ip_tunnel *tunnel = netdev_priv(dev); 1326 struct ip_tunnel *tunnel = netdev_priv(dev);
1326 struct iphdr *iph = &tunnel->parms.iph; 1327 struct iphdr *iph = &tunnel->parms.iph;
1327 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1328 1328
1329 tunnel->dev = dev; 1329 tunnel->dev = dev;
1330 strcpy(tunnel->parms.name, dev->name); 1330 strcpy(tunnel->parms.name, dev->name);
@@ -1335,7 +1335,6 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
1335 tunnel->hlen = sizeof(struct iphdr) + 4; 1335 tunnel->hlen = sizeof(struct iphdr) + 4;
1336 1336
1337 dev_hold(dev); 1337 dev_hold(dev);
1338 rcu_assign_pointer(ign->tunnels_wc[0], tunnel);
1339} 1338}
1340 1339
1341 1340
@@ -1382,10 +1381,12 @@ static int __net_init ipgre_init_net(struct net *net)
1382 if ((err = register_netdev(ign->fb_tunnel_dev))) 1381 if ((err = register_netdev(ign->fb_tunnel_dev)))
1383 goto err_reg_dev; 1382 goto err_reg_dev;
1384 1383
1384 rcu_assign_pointer(ign->tunnels_wc[0],
1385 netdev_priv(ign->fb_tunnel_dev));
1385 return 0; 1386 return 0;
1386 1387
1387err_reg_dev: 1388err_reg_dev:
1388 free_netdev(ign->fb_tunnel_dev); 1389 ipgre_dev_free(ign->fb_tunnel_dev);
1389err_alloc_dev: 1390err_alloc_dev:
1390 return err; 1391 return err;
1391} 1392}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 64b70ad162e3..3948c86e59ca 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -238,7 +238,7 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
238 but receiver should be enough clever f.e. to forward mtrace requests, 238 but receiver should be enough clever f.e. to forward mtrace requests,
239 sent to multicast group to reach destination designated router. 239 sent to multicast group to reach destination designated router.
240 */ 240 */
241struct ip_ra_chain *ip_ra_chain; 241struct ip_ra_chain __rcu *ip_ra_chain;
242static DEFINE_SPINLOCK(ip_ra_lock); 242static DEFINE_SPINLOCK(ip_ra_lock);
243 243
244 244
@@ -253,7 +253,8 @@ static void ip_ra_destroy_rcu(struct rcu_head *head)
253int ip_ra_control(struct sock *sk, unsigned char on, 253int ip_ra_control(struct sock *sk, unsigned char on,
254 void (*destructor)(struct sock *)) 254 void (*destructor)(struct sock *))
255{ 255{
256 struct ip_ra_chain *ra, *new_ra, **rap; 256 struct ip_ra_chain *ra, *new_ra;
257 struct ip_ra_chain __rcu **rap;
257 258
258 if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW) 259 if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
259 return -EINVAL; 260 return -EINVAL;
@@ -261,7 +262,10 @@ int ip_ra_control(struct sock *sk, unsigned char on,
261 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; 262 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
262 263
263 spin_lock_bh(&ip_ra_lock); 264 spin_lock_bh(&ip_ra_lock);
264 for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) { 265 for (rap = &ip_ra_chain;
266 (ra = rcu_dereference_protected(*rap,
267 lockdep_is_held(&ip_ra_lock))) != NULL;
268 rap = &ra->next) {
265 if (ra->sk == sk) { 269 if (ra->sk == sk) {
266 if (on) { 270 if (on) {
267 spin_unlock_bh(&ip_ra_lock); 271 spin_unlock_bh(&ip_ra_lock);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index e9b816e6cd73..cd300aaee78f 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -676,6 +676,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
676 } 676 }
677 t = netdev_priv(dev); 677 t = netdev_priv(dev);
678 ipip_tunnel_unlink(ipn, t); 678 ipip_tunnel_unlink(ipn, t);
679 synchronize_net();
679 t->parms.iph.saddr = p.iph.saddr; 680 t->parms.iph.saddr = p.iph.saddr;
680 t->parms.iph.daddr = p.iph.daddr; 681 t->parms.iph.daddr = p.iph.daddr;
681 memcpy(dev->dev_addr, &p.iph.saddr, 4); 682 memcpy(dev->dev_addr, &p.iph.saddr, 4);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3cad2591ace0..3fac340a28d5 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -927,6 +927,7 @@ static int get_info(struct net *net, void __user *user,
927 private = &tmp; 927 private = &tmp;
928 } 928 }
929#endif 929#endif
930 memset(&info, 0, sizeof(info));
930 info.valid_hooks = t->valid_hooks; 931 info.valid_hooks = t->valid_hooks;
931 memcpy(info.hook_entry, private->hook_entry, 932 memcpy(info.hook_entry, private->hook_entry,
932 sizeof(info.hook_entry)); 933 sizeof(info.hook_entry));
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d31b007a6d80..a846d633b3b6 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1124,6 +1124,7 @@ static int get_info(struct net *net, void __user *user,
1124 private = &tmp; 1124 private = &tmp;
1125 } 1125 }
1126#endif 1126#endif
1127 memset(&info, 0, sizeof(info));
1127 info.valid_hooks = t->valid_hooks; 1128 info.valid_hooks = t->valid_hooks;
1128 memcpy(info.hook_entry, private->hook_entry, 1129 memcpy(info.hook_entry, private->hook_entry,
1129 sizeof(info.hook_entry)); 1130 sizeof(info.hook_entry));
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 295c97431e43..c04787ce1a71 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -47,26 +47,6 @@ __nf_nat_proto_find(u_int8_t protonum)
47 return rcu_dereference(nf_nat_protos[protonum]); 47 return rcu_dereference(nf_nat_protos[protonum]);
48} 48}
49 49
50static const struct nf_nat_protocol *
51nf_nat_proto_find_get(u_int8_t protonum)
52{
53 const struct nf_nat_protocol *p;
54
55 rcu_read_lock();
56 p = __nf_nat_proto_find(protonum);
57 if (!try_module_get(p->me))
58 p = &nf_nat_unknown_protocol;
59 rcu_read_unlock();
60
61 return p;
62}
63
64static void
65nf_nat_proto_put(const struct nf_nat_protocol *p)
66{
67 module_put(p->me);
68}
69
70/* We keep an extra hash for each conntrack, for fast searching. */ 50/* We keep an extra hash for each conntrack, for fast searching. */
71static inline unsigned int 51static inline unsigned int
72hash_by_src(const struct net *net, u16 zone, 52hash_by_src(const struct net *net, u16 zone,
@@ -588,6 +568,26 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
588#include <linux/netfilter/nfnetlink.h> 568#include <linux/netfilter/nfnetlink.h>
589#include <linux/netfilter/nfnetlink_conntrack.h> 569#include <linux/netfilter/nfnetlink_conntrack.h>
590 570
571static const struct nf_nat_protocol *
572nf_nat_proto_find_get(u_int8_t protonum)
573{
574 const struct nf_nat_protocol *p;
575
576 rcu_read_lock();
577 p = __nf_nat_proto_find(protonum);
578 if (!try_module_get(p->me))
579 p = &nf_nat_unknown_protocol;
580 rcu_read_unlock();
581
582 return p;
583}
584
585static void
586nf_nat_proto_put(const struct nf_nat_protocol *p)
587{
588 module_put(p->me);
589}
590
591static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { 591static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
592 [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, 592 [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 },
593 [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, 593 [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 },
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4ae1f203f7cb..1b48eb1ed453 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -59,13 +59,13 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
59 local_bh_enable(); 59 local_bh_enable();
60 60
61 socket_seq_show(seq); 61 socket_seq_show(seq);
62 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", 62 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
63 sock_prot_inuse_get(net, &tcp_prot), orphans, 63 sock_prot_inuse_get(net, &tcp_prot), orphans,
64 tcp_death_row.tw_count, sockets, 64 tcp_death_row.tw_count, sockets,
65 atomic_read(&tcp_memory_allocated)); 65 atomic_long_read(&tcp_memory_allocated));
66 seq_printf(seq, "UDP: inuse %d mem %d\n", 66 seq_printf(seq, "UDP: inuse %d mem %ld\n",
67 sock_prot_inuse_get(net, &udp_prot), 67 sock_prot_inuse_get(net, &udp_prot),
68 atomic_read(&udp_memory_allocated)); 68 atomic_long_read(&udp_memory_allocated));
69 seq_printf(seq, "UDPLITE: inuse %d\n", 69 seq_printf(seq, "UDPLITE: inuse %d\n",
70 sock_prot_inuse_get(net, &udplite_prot)); 70 sock_prot_inuse_get(net, &udplite_prot));
71 seq_printf(seq, "RAW: inuse %d\n", 71 seq_printf(seq, "RAW: inuse %d\n",
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 65699c24411c..9ae5c01cd0b2 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -28,7 +28,7 @@
28#include <linux/spinlock.h> 28#include <linux/spinlock.h>
29#include <net/protocol.h> 29#include <net/protocol.h>
30 30
31const struct net_protocol *inet_protos[MAX_INET_PROTOS] __read_mostly; 31const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
32 32
33/* 33/*
34 * Add a protocol handler to the hash tables 34 * Add a protocol handler to the hash tables
@@ -38,7 +38,8 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
38{ 38{
39 int hash = protocol & (MAX_INET_PROTOS - 1); 39 int hash = protocol & (MAX_INET_PROTOS - 1);
40 40
41 return !cmpxchg(&inet_protos[hash], NULL, prot) ? 0 : -1; 41 return !cmpxchg((const struct net_protocol **)&inet_protos[hash],
42 NULL, prot) ? 0 : -1;
42} 43}
43EXPORT_SYMBOL(inet_add_protocol); 44EXPORT_SYMBOL(inet_add_protocol);
44 45
@@ -50,7 +51,8 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
50{ 51{
51 int ret, hash = protocol & (MAX_INET_PROTOS - 1); 52 int ret, hash = protocol & (MAX_INET_PROTOS - 1);
52 53
53 ret = (cmpxchg(&inet_protos[hash], prot, NULL) == prot) ? 0 : -1; 54 ret = (cmpxchg((const struct net_protocol **)&inet_protos[hash],
55 prot, NULL) == prot) ? 0 : -1;
54 56
55 synchronize_net(); 57 synchronize_net();
56 58
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d6cb2bfcd8e1..987bf9adb318 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -198,7 +198,7 @@ const __u8 ip_tos2prio[16] = {
198 */ 198 */
199 199
200struct rt_hash_bucket { 200struct rt_hash_bucket {
201 struct rtable *chain; 201 struct rtable __rcu *chain;
202}; 202};
203 203
204#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ 204#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
@@ -280,7 +280,7 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
280 struct rtable *r = NULL; 280 struct rtable *r = NULL;
281 281
282 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { 282 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
283 if (!rt_hash_table[st->bucket].chain) 283 if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain))
284 continue; 284 continue;
285 rcu_read_lock_bh(); 285 rcu_read_lock_bh();
286 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); 286 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
@@ -300,17 +300,17 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
300{ 300{
301 struct rt_cache_iter_state *st = seq->private; 301 struct rt_cache_iter_state *st = seq->private;
302 302
303 r = r->dst.rt_next; 303 r = rcu_dereference_bh(r->dst.rt_next);
304 while (!r) { 304 while (!r) {
305 rcu_read_unlock_bh(); 305 rcu_read_unlock_bh();
306 do { 306 do {
307 if (--st->bucket < 0) 307 if (--st->bucket < 0)
308 return NULL; 308 return NULL;
309 } while (!rt_hash_table[st->bucket].chain); 309 } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain));
310 rcu_read_lock_bh(); 310 rcu_read_lock_bh();
311 r = rt_hash_table[st->bucket].chain; 311 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
312 } 312 }
313 return rcu_dereference_bh(r); 313 return r;
314} 314}
315 315
316static struct rtable *rt_cache_get_next(struct seq_file *seq, 316static struct rtable *rt_cache_get_next(struct seq_file *seq,
@@ -721,19 +721,23 @@ static void rt_do_flush(int process_context)
721 for (i = 0; i <= rt_hash_mask; i++) { 721 for (i = 0; i <= rt_hash_mask; i++) {
722 if (process_context && need_resched()) 722 if (process_context && need_resched())
723 cond_resched(); 723 cond_resched();
724 rth = rt_hash_table[i].chain; 724 rth = rcu_dereference_raw(rt_hash_table[i].chain);
725 if (!rth) 725 if (!rth)
726 continue; 726 continue;
727 727
728 spin_lock_bh(rt_hash_lock_addr(i)); 728 spin_lock_bh(rt_hash_lock_addr(i));
729#ifdef CONFIG_NET_NS 729#ifdef CONFIG_NET_NS
730 { 730 {
731 struct rtable ** prev, * p; 731 struct rtable __rcu **prev;
732 struct rtable *p;
732 733
733 rth = rt_hash_table[i].chain; 734 rth = rcu_dereference_protected(rt_hash_table[i].chain,
735 lockdep_is_held(rt_hash_lock_addr(i)));
734 736
735 /* defer releasing the head of the list after spin_unlock */ 737 /* defer releasing the head of the list after spin_unlock */
736 for (tail = rth; tail; tail = tail->dst.rt_next) 738 for (tail = rth; tail;
739 tail = rcu_dereference_protected(tail->dst.rt_next,
740 lockdep_is_held(rt_hash_lock_addr(i))))
737 if (!rt_is_expired(tail)) 741 if (!rt_is_expired(tail))
738 break; 742 break;
739 if (rth != tail) 743 if (rth != tail)
@@ -741,8 +745,12 @@ static void rt_do_flush(int process_context)
741 745
742 /* call rt_free on entries after the tail requiring flush */ 746 /* call rt_free on entries after the tail requiring flush */
743 prev = &rt_hash_table[i].chain; 747 prev = &rt_hash_table[i].chain;
744 for (p = *prev; p; p = next) { 748 for (p = rcu_dereference_protected(*prev,
745 next = p->dst.rt_next; 749 lockdep_is_held(rt_hash_lock_addr(i)));
750 p != NULL;
751 p = next) {
752 next = rcu_dereference_protected(p->dst.rt_next,
753 lockdep_is_held(rt_hash_lock_addr(i)));
746 if (!rt_is_expired(p)) { 754 if (!rt_is_expired(p)) {
747 prev = &p->dst.rt_next; 755 prev = &p->dst.rt_next;
748 } else { 756 } else {
@@ -752,14 +760,15 @@ static void rt_do_flush(int process_context)
752 } 760 }
753 } 761 }
754#else 762#else
755 rth = rt_hash_table[i].chain; 763 rth = rcu_dereference_protected(rt_hash_table[i].chain,
756 rt_hash_table[i].chain = NULL; 764 lockdep_is_held(rt_hash_lock_addr(i)));
765 rcu_assign_pointer(rt_hash_table[i].chain, NULL);
757 tail = NULL; 766 tail = NULL;
758#endif 767#endif
759 spin_unlock_bh(rt_hash_lock_addr(i)); 768 spin_unlock_bh(rt_hash_lock_addr(i));
760 769
761 for (; rth != tail; rth = next) { 770 for (; rth != tail; rth = next) {
762 next = rth->dst.rt_next; 771 next = rcu_dereference_protected(rth->dst.rt_next, 1);
763 rt_free(rth); 772 rt_free(rth);
764 } 773 }
765 } 774 }
@@ -790,7 +799,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
790 while (aux != rth) { 799 while (aux != rth) {
791 if (compare_hash_inputs(&aux->fl, &rth->fl)) 800 if (compare_hash_inputs(&aux->fl, &rth->fl))
792 return 0; 801 return 0;
793 aux = aux->dst.rt_next; 802 aux = rcu_dereference_protected(aux->dst.rt_next, 1);
794 } 803 }
795 return ONE; 804 return ONE;
796} 805}
@@ -799,7 +808,8 @@ static void rt_check_expire(void)
799{ 808{
800 static unsigned int rover; 809 static unsigned int rover;
801 unsigned int i = rover, goal; 810 unsigned int i = rover, goal;
802 struct rtable *rth, **rthp; 811 struct rtable *rth;
812 struct rtable __rcu **rthp;
803 unsigned long samples = 0; 813 unsigned long samples = 0;
804 unsigned long sum = 0, sum2 = 0; 814 unsigned long sum = 0, sum2 = 0;
805 unsigned long delta; 815 unsigned long delta;
@@ -825,11 +835,12 @@ static void rt_check_expire(void)
825 835
826 samples++; 836 samples++;
827 837
828 if (*rthp == NULL) 838 if (rcu_dereference_raw(*rthp) == NULL)
829 continue; 839 continue;
830 length = 0; 840 length = 0;
831 spin_lock_bh(rt_hash_lock_addr(i)); 841 spin_lock_bh(rt_hash_lock_addr(i));
832 while ((rth = *rthp) != NULL) { 842 while ((rth = rcu_dereference_protected(*rthp,
843 lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
833 prefetch(rth->dst.rt_next); 844 prefetch(rth->dst.rt_next);
834 if (rt_is_expired(rth)) { 845 if (rt_is_expired(rth)) {
835 *rthp = rth->dst.rt_next; 846 *rthp = rth->dst.rt_next;
@@ -941,7 +952,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
941 static unsigned long last_gc; 952 static unsigned long last_gc;
942 static int rover; 953 static int rover;
943 static int equilibrium; 954 static int equilibrium;
944 struct rtable *rth, **rthp; 955 struct rtable *rth;
956 struct rtable __rcu **rthp;
945 unsigned long now = jiffies; 957 unsigned long now = jiffies;
946 int goal; 958 int goal;
947 int entries = dst_entries_get_fast(&ipv4_dst_ops); 959 int entries = dst_entries_get_fast(&ipv4_dst_ops);
@@ -995,7 +1007,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
995 k = (k + 1) & rt_hash_mask; 1007 k = (k + 1) & rt_hash_mask;
996 rthp = &rt_hash_table[k].chain; 1008 rthp = &rt_hash_table[k].chain;
997 spin_lock_bh(rt_hash_lock_addr(k)); 1009 spin_lock_bh(rt_hash_lock_addr(k));
998 while ((rth = *rthp) != NULL) { 1010 while ((rth = rcu_dereference_protected(*rthp,
1011 lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) {
999 if (!rt_is_expired(rth) && 1012 if (!rt_is_expired(rth) &&
1000 !rt_may_expire(rth, tmo, expire)) { 1013 !rt_may_expire(rth, tmo, expire)) {
1001 tmo >>= 1; 1014 tmo >>= 1;
@@ -1071,7 +1084,7 @@ static int slow_chain_length(const struct rtable *head)
1071 1084
1072 while (rth) { 1085 while (rth) {
1073 length += has_noalias(head, rth); 1086 length += has_noalias(head, rth);
1074 rth = rth->dst.rt_next; 1087 rth = rcu_dereference_protected(rth->dst.rt_next, 1);
1075 } 1088 }
1076 return length >> FRACT_BITS; 1089 return length >> FRACT_BITS;
1077} 1090}
@@ -1079,9 +1092,9 @@ static int slow_chain_length(const struct rtable *head)
1079static int rt_intern_hash(unsigned hash, struct rtable *rt, 1092static int rt_intern_hash(unsigned hash, struct rtable *rt,
1080 struct rtable **rp, struct sk_buff *skb, int ifindex) 1093 struct rtable **rp, struct sk_buff *skb, int ifindex)
1081{ 1094{
1082 struct rtable *rth, **rthp; 1095 struct rtable *rth, *cand;
1096 struct rtable __rcu **rthp, **candp;
1083 unsigned long now; 1097 unsigned long now;
1084 struct rtable *cand, **candp;
1085 u32 min_score; 1098 u32 min_score;
1086 int chain_length; 1099 int chain_length;
1087 int attempts = !in_softirq(); 1100 int attempts = !in_softirq();
@@ -1128,7 +1141,8 @@ restart:
1128 rthp = &rt_hash_table[hash].chain; 1141 rthp = &rt_hash_table[hash].chain;
1129 1142
1130 spin_lock_bh(rt_hash_lock_addr(hash)); 1143 spin_lock_bh(rt_hash_lock_addr(hash));
1131 while ((rth = *rthp) != NULL) { 1144 while ((rth = rcu_dereference_protected(*rthp,
1145 lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
1132 if (rt_is_expired(rth)) { 1146 if (rt_is_expired(rth)) {
1133 *rthp = rth->dst.rt_next; 1147 *rthp = rth->dst.rt_next;
1134 rt_free(rth); 1148 rt_free(rth);
@@ -1324,12 +1338,14 @@ EXPORT_SYMBOL(__ip_select_ident);
1324 1338
1325static void rt_del(unsigned hash, struct rtable *rt) 1339static void rt_del(unsigned hash, struct rtable *rt)
1326{ 1340{
1327 struct rtable **rthp, *aux; 1341 struct rtable __rcu **rthp;
1342 struct rtable *aux;
1328 1343
1329 rthp = &rt_hash_table[hash].chain; 1344 rthp = &rt_hash_table[hash].chain;
1330 spin_lock_bh(rt_hash_lock_addr(hash)); 1345 spin_lock_bh(rt_hash_lock_addr(hash));
1331 ip_rt_put(rt); 1346 ip_rt_put(rt);
1332 while ((aux = *rthp) != NULL) { 1347 while ((aux = rcu_dereference_protected(*rthp,
1348 lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
1333 if (aux == rt || rt_is_expired(aux)) { 1349 if (aux == rt || rt_is_expired(aux)) {
1334 *rthp = aux->dst.rt_next; 1350 *rthp = aux->dst.rt_next;
1335 rt_free(aux); 1351 rt_free(aux);
@@ -1346,7 +1362,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1346{ 1362{
1347 int i, k; 1363 int i, k;
1348 struct in_device *in_dev = __in_dev_get_rcu(dev); 1364 struct in_device *in_dev = __in_dev_get_rcu(dev);
1349 struct rtable *rth, **rthp; 1365 struct rtable *rth;
1366 struct rtable __rcu **rthp;
1350 __be32 skeys[2] = { saddr, 0 }; 1367 __be32 skeys[2] = { saddr, 0 };
1351 int ikeys[2] = { dev->ifindex, 0 }; 1368 int ikeys[2] = { dev->ifindex, 0 };
1352 struct netevent_redirect netevent; 1369 struct netevent_redirect netevent;
@@ -1379,7 +1396,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1379 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], 1396 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
1380 rt_genid(net)); 1397 rt_genid(net));
1381 1398
1382 rthp=&rt_hash_table[hash].chain; 1399 rthp = &rt_hash_table[hash].chain;
1383 1400
1384 while ((rth = rcu_dereference(*rthp)) != NULL) { 1401 while ((rth = rcu_dereference(*rthp)) != NULL) {
1385 struct rtable *rt; 1402 struct rtable *rt;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d96c1da4b17c..e91911d7aae2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -398,7 +398,7 @@ static struct ctl_table ipv4_table[] = {
398 .data = &sysctl_tcp_mem, 398 .data = &sysctl_tcp_mem,
399 .maxlen = sizeof(sysctl_tcp_mem), 399 .maxlen = sizeof(sysctl_tcp_mem),
400 .mode = 0644, 400 .mode = 0644,
401 .proc_handler = proc_dointvec 401 .proc_handler = proc_doulongvec_minmax
402 }, 402 },
403 { 403 {
404 .procname = "tcp_wmem", 404 .procname = "tcp_wmem",
@@ -602,8 +602,7 @@ static struct ctl_table ipv4_table[] = {
602 .data = &sysctl_udp_mem, 602 .data = &sysctl_udp_mem,
603 .maxlen = sizeof(sysctl_udp_mem), 603 .maxlen = sizeof(sysctl_udp_mem),
604 .mode = 0644, 604 .mode = 0644,
605 .proc_handler = proc_dointvec_minmax, 605 .proc_handler = proc_doulongvec_minmax,
606 .extra1 = &zero
607 }, 606 },
608 { 607 {
609 .procname = "udp_rmem_min", 608 .procname = "udp_rmem_min",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1664a0590bb8..081419969485 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -282,7 +282,7 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
282struct percpu_counter tcp_orphan_count; 282struct percpu_counter tcp_orphan_count;
283EXPORT_SYMBOL_GPL(tcp_orphan_count); 283EXPORT_SYMBOL_GPL(tcp_orphan_count);
284 284
285int sysctl_tcp_mem[3] __read_mostly; 285long sysctl_tcp_mem[3] __read_mostly;
286int sysctl_tcp_wmem[3] __read_mostly; 286int sysctl_tcp_wmem[3] __read_mostly;
287int sysctl_tcp_rmem[3] __read_mostly; 287int sysctl_tcp_rmem[3] __read_mostly;
288 288
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(sysctl_tcp_mem);
290EXPORT_SYMBOL(sysctl_tcp_rmem); 290EXPORT_SYMBOL(sysctl_tcp_rmem);
291EXPORT_SYMBOL(sysctl_tcp_wmem); 291EXPORT_SYMBOL(sysctl_tcp_wmem);
292 292
293atomic_t tcp_memory_allocated; /* Current allocated memory. */ 293atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
294EXPORT_SYMBOL(tcp_memory_allocated); 294EXPORT_SYMBOL(tcp_memory_allocated);
295 295
296/* 296/*
@@ -2246,7 +2246,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2246 /* Values greater than interface MTU won't take effect. However 2246 /* Values greater than interface MTU won't take effect. However
2247 * at the point when this call is done we typically don't yet 2247 * at the point when this call is done we typically don't yet
2248 * know which interface is going to be used */ 2248 * know which interface is going to be used */
2249 if (val < 8 || val > MAX_TCP_WINDOW) { 2249 if (val < 64 || val > MAX_TCP_WINDOW) {
2250 err = -EINVAL; 2250 err = -EINVAL;
2251 break; 2251 break;
2252 } 2252 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3357f69e353d..6d8ab1c4efc3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -259,8 +259,11 @@ static void tcp_fixup_sndbuf(struct sock *sk)
259 int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + 259 int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
260 sizeof(struct sk_buff); 260 sizeof(struct sk_buff);
261 261
262 if (sk->sk_sndbuf < 3 * sndmem) 262 if (sk->sk_sndbuf < 3 * sndmem) {
263 sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]); 263 sk->sk_sndbuf = 3 * sndmem;
264 if (sk->sk_sndbuf > sysctl_tcp_wmem[2])
265 sk->sk_sndbuf = sysctl_tcp_wmem[2];
266 }
264} 267}
265 268
266/* 2. Tuning advertised window (window_clamp, rcv_ssthresh) 269/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -396,7 +399,7 @@ static void tcp_clamp_window(struct sock *sk)
396 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && 399 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
397 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && 400 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
398 !tcp_memory_pressure && 401 !tcp_memory_pressure &&
399 atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { 402 atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
400 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), 403 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
401 sysctl_tcp_rmem[2]); 404 sysctl_tcp_rmem[2]);
402 } 405 }
@@ -4861,7 +4864,7 @@ static int tcp_should_expand_sndbuf(struct sock *sk)
4861 return 0; 4864 return 0;
4862 4865
4863 /* If we are under soft global TCP memory pressure, do not expand. */ 4866 /* If we are under soft global TCP memory pressure, do not expand. */
4864 if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) 4867 if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
4865 return 0; 4868 return 0;
4866 4869
4867 /* If we filled the congestion window, do not expand. */ 4870 /* If we filled the congestion window, do not expand. */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8f8527d41682..69ccbc1dde9c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -415,6 +415,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
415 !icsk->icsk_backoff) 415 !icsk->icsk_backoff)
416 break; 416 break;
417 417
418 if (sock_owned_by_user(sk))
419 break;
420
418 icsk->icsk_backoff--; 421 icsk->icsk_backoff--;
419 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << 422 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
420 icsk->icsk_backoff; 423 icsk->icsk_backoff;
@@ -429,11 +432,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
429 if (remaining) { 432 if (remaining) {
430 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 433 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
431 remaining, TCP_RTO_MAX); 434 remaining, TCP_RTO_MAX);
432 } else if (sock_owned_by_user(sk)) {
433 /* RTO revert clocked out retransmission,
434 * but socket is locked. Will defer. */
435 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
436 HZ/20, TCP_RTO_MAX);
437 } else { 435 } else {
438 /* RTO revert clocked out retransmission. 436 /* RTO revert clocked out retransmission.
439 * Will retransmit now */ 437 * Will retransmit now */
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 9a17bd2a0a37..ac3b3ee4b07c 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -14,27 +14,32 @@
14#include <net/protocol.h> 14#include <net/protocol.h>
15#include <net/xfrm.h> 15#include <net/xfrm.h>
16 16
17static struct xfrm_tunnel *tunnel4_handlers __read_mostly; 17static struct xfrm_tunnel __rcu *tunnel4_handlers __read_mostly;
18static struct xfrm_tunnel *tunnel64_handlers __read_mostly; 18static struct xfrm_tunnel __rcu *tunnel64_handlers __read_mostly;
19static DEFINE_MUTEX(tunnel4_mutex); 19static DEFINE_MUTEX(tunnel4_mutex);
20 20
21static inline struct xfrm_tunnel **fam_handlers(unsigned short family) 21static inline struct xfrm_tunnel __rcu **fam_handlers(unsigned short family)
22{ 22{
23 return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers; 23 return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
24} 24}
25 25
26int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family) 26int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
27{ 27{
28 struct xfrm_tunnel **pprev; 28 struct xfrm_tunnel __rcu **pprev;
29 struct xfrm_tunnel *t;
30
29 int ret = -EEXIST; 31 int ret = -EEXIST;
30 int priority = handler->priority; 32 int priority = handler->priority;
31 33
32 mutex_lock(&tunnel4_mutex); 34 mutex_lock(&tunnel4_mutex);
33 35
34 for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) { 36 for (pprev = fam_handlers(family);
35 if ((*pprev)->priority > priority) 37 (t = rcu_dereference_protected(*pprev,
38 lockdep_is_held(&tunnel4_mutex))) != NULL;
39 pprev = &t->next) {
40 if (t->priority > priority)
36 break; 41 break;
37 if ((*pprev)->priority == priority) 42 if (t->priority == priority)
38 goto err; 43 goto err;
39 } 44 }
40 45
@@ -52,13 +57,17 @@ EXPORT_SYMBOL(xfrm4_tunnel_register);
52 57
53int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family) 58int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
54{ 59{
55 struct xfrm_tunnel **pprev; 60 struct xfrm_tunnel __rcu **pprev;
61 struct xfrm_tunnel *t;
56 int ret = -ENOENT; 62 int ret = -ENOENT;
57 63
58 mutex_lock(&tunnel4_mutex); 64 mutex_lock(&tunnel4_mutex);
59 65
60 for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) { 66 for (pprev = fam_handlers(family);
61 if (*pprev == handler) { 67 (t = rcu_dereference_protected(*pprev,
68 lockdep_is_held(&tunnel4_mutex))) != NULL;
69 pprev = &t->next) {
70 if (t == handler) {
62 *pprev = handler->next; 71 *pprev = handler->next;
63 ret = 0; 72 ret = 0;
64 break; 73 break;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b3f7e8cf18ac..5e0a3a582a59 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -110,7 +110,7 @@
110struct udp_table udp_table __read_mostly; 110struct udp_table udp_table __read_mostly;
111EXPORT_SYMBOL(udp_table); 111EXPORT_SYMBOL(udp_table);
112 112
113int sysctl_udp_mem[3] __read_mostly; 113long sysctl_udp_mem[3] __read_mostly;
114EXPORT_SYMBOL(sysctl_udp_mem); 114EXPORT_SYMBOL(sysctl_udp_mem);
115 115
116int sysctl_udp_rmem_min __read_mostly; 116int sysctl_udp_rmem_min __read_mostly;
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(sysctl_udp_rmem_min);
119int sysctl_udp_wmem_min __read_mostly; 119int sysctl_udp_wmem_min __read_mostly;
120EXPORT_SYMBOL(sysctl_udp_wmem_min); 120EXPORT_SYMBOL(sysctl_udp_wmem_min);
121 121
122atomic_t udp_memory_allocated; 122atomic_long_t udp_memory_allocated;
123EXPORT_SYMBOL(udp_memory_allocated); 123EXPORT_SYMBOL(udp_memory_allocated);
124 124
125#define MAX_UDP_PORTS 65536 125#define MAX_UDP_PORTS 65536
@@ -1413,7 +1413,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1413 } 1413 }
1414 } 1414 }
1415 1415
1416 if (sk->sk_filter) { 1416 if (rcu_dereference_raw(sk->sk_filter)) {
1417 if (udp_lib_checksum_complete(skb)) 1417 if (udp_lib_checksum_complete(skb))
1418 goto drop; 1418 goto drop;
1419 } 1419 }